daShangDao_psiServer/service/ocr.go
2026-06-15 13:47:39 +08:00

447 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package service
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"psi/config"
systemRes "psi/models/response"
"regexp"
"strings"
"time"
)
type OcrService struct{}
type ocrServiceRequest struct {
ImageBase64 string `json:"image_base64"`
}
type ocrServiceResponse struct {
Error string `json:"error,omitempty"`
Texts []string `json:"texts,omitempty"`
}
func (s *OcrService) RecognizeText(imageData []byte) (systemRes.OcrResponse, error) {
base64Data := base64.StdEncoding.EncodeToString(imageData)
reqBody := ocrServiceRequest{
ImageBase64: base64Data,
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("序列化请求数据失败: %v", err)
}
ocrServiceURL := config.AppConfig.OCR.ServiceUrl
client := &http.Client{Timeout: 60 * time.Second}
resp, err := client.Post(ocrServiceURL, "application/json", bytes.NewReader(jsonData))
if err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("调用OCR服务失败: %v", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("读取OCR响应失败: %v", err)
}
var ocrResp ocrServiceResponse
if err := json.Unmarshal(body, &ocrResp); err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("解析OCR响应失败: %v", err)
}
if ocrResp.Error != "" {
return systemRes.OcrResponse{}, fmt.Errorf("OCR识别错误: %s", ocrResp.Error)
}
guessedInfo := s.analyzeBookInfo(ocrResp.Texts)
return systemRes.OcrResponse{
Success: true,
Texts: ocrResp.Texts,
GuessBookInfo: guessedInfo,
}, nil
}
type candidate struct {
text string
priority int
index int
}
func (s *OcrService) analyzeBookInfo(texts []string) systemRes.GuessBookInfo {
var result systemRes.GuessBookInfo
if len(texts) == 0 {
return result
}
var bookNameCandidates []candidate
var authorCandidates []candidate
var publisherCandidates []candidate
for i, text := range texts {
text = strings.TrimSpace(text)
if text == "" {
continue
}
if s.isBookName(text) {
bookNameCandidates = append(bookNameCandidates, candidate{text, s.calculateBookNameScore(text), i})
}
if author := s.extractAuthor(text); author != "" {
authorCandidates = append(authorCandidates, candidate{author, s.calculateAuthorScore(author, text), i})
}
if publisher := s.extractPublisher(text); publisher != "" {
publisherCandidates = append(publisherCandidates, candidate{publisher, s.calculatePublisherScore(publisher), i})
}
}
if len(bookNameCandidates) > 0 {
bestCandidate := s.selectBestCandidate(bookNameCandidates)
result.BookName = bestCandidate.text
}
if len(authorCandidates) > 0 {
bestCandidate := s.selectBestCandidate(authorCandidates)
result.Author = bestCandidate.text
}
if len(publisherCandidates) > 0 {
bestCandidate := s.selectBestCandidate(publisherCandidates)
result.Publisher = bestCandidate.text
}
return result
}
func (s *OcrService) isBookName(text string) bool {
if len(text) < 2 || len(text) > 30 {
return false
}
if s.hasAuthorMarker(text) || s.hasPublisherMarker(text) {
return false
}
invalidPatterns := []string{
`^\d+$`,
`^[A-Z]{1,3}$`,
`^[\p{P}]+$`,
`^\d+\.`,
`^第\d+[卷册部辑]`,
}
for _, pattern := range invalidPatterns {
if matched, _ := regexp.MatchString(pattern, text); matched {
return false
}
}
descriptionKeywords := []string{
"版本", "插图", "阅读", "经典", "名家", "开本",
"精装", "平装", "修订版", "增补版", "全集", "选集",
"注释版", "译本", "原版", "引进版", "推荐", "畅销",
"全新", "最新", "权威", "完整版", "简体", "繁体",
"定价", "售价", "元", "ISBN", "书号", "条码",
"出版社", "出版", "发行", "印刷", "印次", "版次",
"字数", "页数", "张", "册",
"丛书", "系列", "文库", "书系", "读本", "教材",
"教辅", "考试", "习题", "练习", "答案", "解析",
"上册", "下册", "上卷", "下卷", "第一卷", "第二卷",
"前言", "序言", "目录", "附录", "后记", "跋",
"简介", "摘要", "概述", "导读", "书评", "推荐语",
"获奖", "荣获", "提名", "榜单", "排行榜",
"扫码", "二维码", "公众号", "微信", "关注",
"配套", "资源", "下载", "音频", "视频", "课件",
"适用", "适合", "读者", "对象", "年龄", "岁",
"教育部", "新课标", "统编", "部编", "人教版",
}
textLower := strings.ToLower(text)
for _, keyword := range descriptionKeywords {
if strings.Contains(textLower, keyword) {
return false
}
}
if strings.Count(text, ",") >= 2 || strings.Count(text, "") >= 2 {
return false
}
if strings.HasSuffix(text, "。") || strings.HasSuffix(text, ".") {
return false
}
if regexp.MustCompile(`\d{4,}`).MatchString(text) {
return false
}
if regexp.MustCompile(`[¥¥\$]\d+`).MatchString(text) {
return false
}
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(text, -1))
if chineseCount < 2 {
return false
}
if float64(chineseCount) < float64(len([]rune(text)))*0.3 {
return false
}
if strings.Contains(text, " ") && chineseCount <= 10 {
parts := strings.Fields(text)
if len(parts) >= 2 {
allShortNames := true
for _, part := range parts {
part = strings.TrimSpace(part)
partChars := regexp.MustCompile(`[\p{Han}]`)
partChineseCount := len(partChars.FindAllString(part, -1))
if partChineseCount < 2 || partChineseCount > 4 {
allShortNames = false
break
}
}
if allShortNames && len(parts) <= 3 {
return false
}
}
}
return true
}
func (s *OcrService) extractAuthor(text string) string {
if s.hasAuthorMarker(text) {
cleaned := s.removeAuthorMarker(text)
cleaned = strings.TrimSpace(cleaned)
if cleaned != "" && len(cleaned) >= 2 && len(cleaned) <= 20 {
return cleaned
}
}
if text == "主编" || text == "副主编" || text == "编著" || text == "编译" {
return ""
}
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(text, -1))
if chineseCount >= 2 && chineseCount <= 15 {
hasSpace := strings.Contains(text, " ") || strings.Contains(text, " ")
hasDot := strings.Contains(text, "·")
if hasSpace || hasDot {
parts := regexp.MustCompile(`[\s·]+`).Split(text, -1)
allChinese := true
for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}
partChars := regexp.MustCompile(`[\p{Han}]`)
if len(partChars.FindAllString(part, -1)) != len([]rune(part)) {
allChinese = false
break
}
}
if allChinese && len(parts) >= 2 {
return text
}
}
if !hasSpace && !hasDot && chineseCount >= 2 && chineseCount <= 4 {
invalidAuthorWords := []string{"主编", "副主编", "编著", "编译", "责任编辑", "责任校对", "封面设计"}
for _, word := range invalidAuthorWords {
if strings.Contains(text, word) {
return ""
}
}
return text
}
}
return ""
}
func (s *OcrService) extractPublisher(text string) string {
publisherKeywords := []string{
"出版社", "出版", "press", "Publishing",
}
textLower := strings.ToLower(text)
for _, keyword := range publisherKeywords {
if strings.Contains(textLower, strings.ToLower(keyword)) {
return text
}
}
return ""
}
func (s *OcrService) hasAuthorMarker(text string) bool {
markers := []string{"著", "编", "作者", "/", "·"}
for _, marker := range markers {
if strings.Contains(text, marker) {
return true
}
}
return false
}
func (s *OcrService) hasPublisherMarker(text string) bool {
markers := []string{"出版社", "出版", "press", "Publishing"}
textLower := strings.ToLower(text)
for _, marker := range markers {
if strings.Contains(textLower, strings.ToLower(marker)) {
return true
}
}
publisherPatterns := []string{
`^.*人民.*出[版报]`,
`^.*大学.*出版`,
`^.*文艺.*出版`,
`^.*科技.*出版`,
`^.*教育.*出版`,
`^.*美术.*出版`,
`^.*音乐.*出版`,
`^.*古籍.*出版`,
`^.*少年.*儿童.*出版`,
`^.*出版.*集团`,
}
for _, pattern := range publisherPatterns {
if matched, _ := regexp.MatchString(pattern, text); matched {
return true
}
}
if strings.Contains(text, "人民") && (strings.Contains(text, "出") || strings.Contains(text, "版")) {
return true
}
return false
}
func (s *OcrService) removeAuthorMarker(text string) string {
result := text
markerPatterns := []string{
`\s*(?:主编|副主编|编著|编译|编者|译者|著者|作者)[著编译者]*$`,
`\s*[著编译者]+$`,
`/[著编译编著译者作者]+$`,
`/.*$`,
}
for _, pattern := range markerPatterns {
re := regexp.MustCompile(pattern)
if re.MatchString(result) {
result = re.ReplaceAllString(result, "")
break
}
}
return strings.TrimSpace(result)
}
func (s *OcrService) calculateBookNameScore(text string) int {
score := 0
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(text, -1))
score += chineseCount * 2
if len(text) >= 2 && len(text) <= 10 {
score += 10
} else if len(text) >= 11 && len(text) <= 20 {
score += 5
}
if regexp.MustCompile(`[《》]`).MatchString(text) {
score += 15
}
if chineseCount >= 4 {
score += 10
}
if regexp.MustCompile(`^[A-Z]`).MatchString(text) {
score -= 5
}
if chineseCount >= 6 {
score += 5
}
return score
}
func (s *OcrService) calculateAuthorScore(author, originalText string) int {
score := 0
if strings.Contains(originalText, "/") {
score += 10
}
if strings.Contains(originalText, "著") {
score += 8
} else if strings.Contains(originalText, "编") {
score += 6
} else if strings.Contains(originalText, "译") {
score += 5
}
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(author, -1))
score += chineseCount * 3
if len(author) >= 2 && len(author) <= 5 {
score += 10
}
if strings.Contains(author, " ") || strings.Contains(author, "·") {
score += 8
}
return score
}
func (s *OcrService) calculatePublisherScore(text string) int {
score := 0
if strings.Contains(text, "出版社") {
score += 20
} else if strings.Contains(text, "出版") {
score += 10
}
if strings.Contains(strings.ToLower(text), "press") {
score += 15
}
return score
}
func (s *OcrService) selectBestCandidate(candidates []candidate) candidate {
if len(candidates) == 0 {
return candidate{}
}
best := candidates[0]
for _, c := range candidates[1:] {
if c.priority > best.priority {
best = c
}
}
return best
}