daShangDao_psiServer/service/ocr.go

package service

import (
	"bytes"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"psi/config"
	systemRes "psi/models/response"
	"regexp"
	"strings"
	"time"
)

type OcrService struct{}

type ocrServiceRequest struct {
	ImageBase64 string `json:"image_base64"`
}

type ocrServiceResponse struct {
	Error string   `json:"error,omitempty"`
	Texts []string `json:"texts,omitempty"`
}

func (s *OcrService) RecognizeText(imageData []byte) (systemRes.OcrResponse, error) {
	base64Data := base64.StdEncoding.EncodeToString(imageData)

	reqBody := ocrServiceRequest{
		ImageBase64: base64Data,
	}

	jsonData, err := json.Marshal(reqBody)
	if err != nil {
		return systemRes.OcrResponse{}, fmt.Errorf("序列化请求数据失败: %v", err)
	}

	ocrServiceURL := config.AppConfig.OCR.ServiceUrl
	client := &http.Client{Timeout: 60 * time.Second}
	resp, err := client.Post(ocrServiceURL, "application/json", bytes.NewReader(jsonData))
	if err != nil {
		return systemRes.OcrResponse{}, fmt.Errorf("调用OCR服务失败: %v", err)
	}
	defer resp.Body.Close()

	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return systemRes.OcrResponse{}, fmt.Errorf("读取OCR响应失败: %v", err)
	}

	var ocrResp ocrServiceResponse
	if err := json.Unmarshal(body, &ocrResp); err != nil {
		return systemRes.OcrResponse{}, fmt.Errorf("解析OCR响应失败: %v", err)
	}

	if ocrResp.Error != "" {
		return systemRes.OcrResponse{}, fmt.Errorf("OCR识别错误: %s", ocrResp.Error)
	}
	guessedInfo := s.analyzeBookInfo(ocrResp.Texts)
	return systemRes.OcrResponse{
		Success:       true,
		Texts:         ocrResp.Texts,
		GuessBookInfo: guessedInfo,
	}, nil
}

type candidate struct {
	text     string
	priority int
	index    int
}

func (s *OcrService) analyzeBookInfo(texts []string) systemRes.GuessBookInfo {
	var result systemRes.GuessBookInfo

	if len(texts) == 0 {
		return result
	}

	var bookNameCandidates []candidate
	var authorCandidates []candidate
	var publisherCandidates []candidate

	for i, text := range texts {
		text = strings.TrimSpace(text)
		if text == "" {
			continue
		}

		if s.isBookName(text) {
			bookNameCandidates = append(bookNameCandidates, candidate{text, s.calculateBookNameScore(text), i})
		}

		if author := s.extractAuthor(text); author != "" {
			authorCandidates = append(authorCandidates, candidate{author, s.calculateAuthorScore(author, text), i})
		}

		if publisher := s.extractPublisher(text); publisher != "" {
			publisherCandidates = append(publisherCandidates, candidate{publisher, s.calculatePublisherScore(publisher), i})
		}
	}

	if len(bookNameCandidates) > 0 {
		bestCandidate := s.selectBestCandidate(bookNameCandidates)
		result.BookName = bestCandidate.text
	}

	if len(authorCandidates) > 0 {
		bestCandidate := s.selectBestCandidate(authorCandidates)
		result.Author = bestCandidate.text
	}

	if len(publisherCandidates) > 0 {
		bestCandidate := s.selectBestCandidate(publisherCandidates)
		result.Publisher = bestCandidate.text
	}

	return result
}

func (s *OcrService) isBookName(text string) bool {
	if len(text) < 2 || len(text) > 30 {
		return false
	}

	if s.hasAuthorMarker(text) || s.hasPublisherMarker(text) {
		return false
	}

	invalidPatterns := []string{
		`^\d+$`,
		`^[A-Z]{1,3}$`,
		`^[\p{P}]+$`,
		`^\d+\.`,
		`^第\d+[卷册部辑]`,
	}

	for _, pattern := range invalidPatterns {
		if matched, _ := regexp.MatchString(pattern, text); matched {
			return false
		}
	}

	descriptionKeywords := []string{
		"版本", "插图", "阅读", "经典", "名家", "开本",
		"精装", "平装", "修订版", "增补版", "全集", "选集",
		"注释版", "译本", "原版", "引进版", "推荐", "畅销",
		"全新", "最新", "权威", "完整版", "简体", "繁体",
		"定价", "售价", "元", "ISBN", "书号", "条码",
		"出版社", "出版", "发行", "印刷", "印次", "版次",
		"字数", "页数", "张", "册",
		"丛书", "系列", "文库", "书系", "读本", "教材",
		"教辅", "考试", "习题", "练习", "答案", "解析",
		"上册", "下册", "上卷", "下卷", "第一卷", "第二卷",
		"前言", "序言", "目录", "附录", "后记", "跋",
		"简介", "摘要", "概述", "导读", "书评", "推荐语",
		"获奖", "荣获", "提名", "榜单", "排行榜",
		"扫码", "二维码", "公众号", "微信", "关注",
		"配套", "资源", "下载", "音频", "视频", "课件",
		"适用", "适合", "读者", "对象", "年龄", "岁",
		"教育部", "新课标", "统编", "部编", "人教版",
	}

	textLower := strings.ToLower(text)
	for _, keyword := range descriptionKeywords {
		if strings.Contains(textLower, keyword) {
			return false
		}
	}

	if strings.Count(text, ",") >= 2 || strings.Count(text, "，") >= 2 {
		return false
	}

	if strings.HasSuffix(text, "。") || strings.HasSuffix(text, ".") {
		return false
	}

	if regexp.MustCompile(`\d{4,}`).MatchString(text) {
		return false
	}

	if regexp.MustCompile(`[¥￥\$]\d+`).MatchString(text) {
		return false
	}

	chineseChars := regexp.MustCompile(`[\p{Han}]`)
	chineseCount := len(chineseChars.FindAllString(text, -1))
	if chineseCount < 2 {
		return false
	}

	if float64(chineseCount) < float64(len([]rune(text)))*0.3 {
		return false
	}

	if strings.Contains(text, " ") && chineseCount <= 10 {
		parts := strings.Fields(text)
		if len(parts) >= 2 {
			allShortNames := true
			for _, part := range parts {
				part = strings.TrimSpace(part)
				partChars := regexp.MustCompile(`[\p{Han}]`)
				partChineseCount := len(partChars.FindAllString(part, -1))
				if partChineseCount < 2 || partChineseCount > 4 {
					allShortNames = false
					break
				}
			}
			if allShortNames && len(parts) <= 3 {
				return false
			}
		}
	}

	return true
}

func (s *OcrService) extractAuthor(text string) string {
	if s.hasAuthorMarker(text) {
		cleaned := s.removeAuthorMarker(text)
		cleaned = strings.TrimSpace(cleaned)
		if cleaned != "" && len(cleaned) >= 2 && len(cleaned) <= 20 {
			return cleaned
		}
	}

	if text == "主编" || text == "副主编" || text == "编著" || text == "编译" {
		return ""
	}

	chineseChars := regexp.MustCompile(`[\p{Han}]`)
	chineseCount := len(chineseChars.FindAllString(text, -1))

	if chineseCount >= 2 && chineseCount <= 15 {
		hasSpace := strings.Contains(text, " ") || strings.Contains(text, "  ")
		hasDot := strings.Contains(text, "·")

		if hasSpace || hasDot {
			parts := regexp.MustCompile(`[\s·]+`).Split(text, -1)
			allChinese := true
			for _, part := range parts {
				part = strings.TrimSpace(part)
				if part == "" {
					continue
				}
				partChars := regexp.MustCompile(`[\p{Han}]`)
				if len(partChars.FindAllString(part, -1)) != len([]rune(part)) {
					allChinese = false
					break
				}
			}

			if allChinese && len(parts) >= 2 {
				return text
			}
		}

		if !hasSpace && !hasDot && chineseCount >= 2 && chineseCount <= 4 {
			invalidAuthorWords := []string{"主编", "副主编", "编著", "编译", "责任编辑", "责任校对", "封面设计"}
			for _, word := range invalidAuthorWords {
				if strings.Contains(text, word) {
					return ""
				}
			}

			return text
		}
	}

	return ""
}

func (s *OcrService) extractPublisher(text string) string {
	publisherKeywords := []string{
		"出版社", "出版", "press", "Publishing",
	}

	textLower := strings.ToLower(text)
	for _, keyword := range publisherKeywords {
		if strings.Contains(textLower, strings.ToLower(keyword)) {
			return text
		}
	}
	return ""
}

func (s *OcrService) hasAuthorMarker(text string) bool {
	markers := []string{"著", "编", "作者", "/", "·"}
	for _, marker := range markers {
		if strings.Contains(text, marker) {
			return true
		}
	}
	return false
}

func (s *OcrService) hasPublisherMarker(text string) bool {
	markers := []string{"出版社", "出版", "press", "Publishing"}
	textLower := strings.ToLower(text)
	for _, marker := range markers {
		if strings.Contains(textLower, strings.ToLower(marker)) {
			return true
		}
	}

	publisherPatterns := []string{
		`^.*人民.*出[版报]`,
		`^.*大学.*出版`,
		`^.*文艺.*出版`,
		`^.*科技.*出版`,
		`^.*教育.*出版`,
		`^.*美术.*出版`,
		`^.*音乐.*出版`,
		`^.*古籍.*出版`,
		`^.*少年.*儿童.*出版`,
		`^.*出版.*集团`,
	}

	for _, pattern := range publisherPatterns {
		if matched, _ := regexp.MatchString(pattern, text); matched {
			return true
		}
	}

	if strings.Contains(text, "人民") && (strings.Contains(text, "出") || strings.Contains(text, "版")) {
		return true
	}

	return false
}

func (s *OcrService) removeAuthorMarker(text string) string {
	result := text

	markerPatterns := []string{
		`\s*(?:主编|副主编|编著|编译|编者|译者|著者|作者)[著编译者]*$`,
		`\s*[著编译者]+$`,
		`/[著编译编著译者作者]+$`,
		`/.*$`,
	}

	for _, pattern := range markerPatterns {
		re := regexp.MustCompile(pattern)
		if re.MatchString(result) {
			result = re.ReplaceAllString(result, "")
			break
		}
	}

	return strings.TrimSpace(result)
}

func (s *OcrService) calculateBookNameScore(text string) int {
	score := 0

	chineseChars := regexp.MustCompile(`[\p{Han}]`)
	chineseCount := len(chineseChars.FindAllString(text, -1))
	score += chineseCount * 2

	if len(text) >= 2 && len(text) <= 10 {
		score += 10
	} else if len(text) >= 11 && len(text) <= 20 {
		score += 5
	}

	if regexp.MustCompile(`[《》]`).MatchString(text) {
		score += 15
	}

	if chineseCount >= 4 {
		score += 10
	}

	if regexp.MustCompile(`^[A-Z]`).MatchString(text) {
		score -= 5
	}

	if chineseCount >= 6 {
		score += 5
	}

	return score
}

func (s *OcrService) calculateAuthorScore(author, originalText string) int {
	score := 0

	if strings.Contains(originalText, "/") {
		score += 10
	}

	if strings.Contains(originalText, "著") {
		score += 8
	} else if strings.Contains(originalText, "编") {
		score += 6
	} else if strings.Contains(originalText, "译") {
		score += 5
	}

	chineseChars := regexp.MustCompile(`[\p{Han}]`)
	chineseCount := len(chineseChars.FindAllString(author, -1))
	score += chineseCount * 3

	if len(author) >= 2 && len(author) <= 5 {
		score += 10
	}

	if strings.Contains(author, " ") || strings.Contains(author, "·") {
		score += 8
	}

	return score
}

func (s *OcrService) calculatePublisherScore(text string) int {
	score := 0

	if strings.Contains(text, "出版社") {
		score += 20
	} else if strings.Contains(text, "出版") {
		score += 10
	}

	if strings.Contains(strings.ToLower(text), "press") {
		score += 15
	}

	return score
}

func (s *OcrService) selectBestCandidate(candidates []candidate) candidate {
	if len(candidates) == 0 {
		return candidate{}
	}

	best := candidates[0]
	for _, c := range candidates[1:] {
		if c.priority > best.priority {
			best = c
		}
	}
	return best
}