daShangDao_psiServer/service/ocr.go
2026-06-18 13:01:56 +08:00

459 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package service
import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"psi/config"
systemRes "psi/models/response"
"regexp"
"strings"
"time"
)
type OcrService struct{}
type ocrServiceRequest struct {
ImageBase64 string `json:"image_base64"`
}
type ocrServiceResponse struct {
Error string `json:"error,omitempty"`
Texts []string `json:"texts,omitempty"`
}
// RecognizeText 调用OCR服务进行文字识别
func (s *OcrService) RecognizeText(imageData []byte) (systemRes.OcrResponse, error) {
base64Data := base64.StdEncoding.EncodeToString(imageData)
reqBody := ocrServiceRequest{
ImageBase64: base64Data,
}
jsonData, err := json.Marshal(reqBody)
if err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("序列化请求数据失败: %v", err)
}
ocrServiceURL := config.AppConfig.OCR.ServiceUrl
client := &http.Client{Timeout: 60 * time.Second}
resp, err := client.Post(ocrServiceURL, "application/json", bytes.NewReader(jsonData))
if err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("调用OCR服务失败: %v", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("读取OCR响应失败: %v", err)
}
var ocrResp ocrServiceResponse
if err := json.Unmarshal(body, &ocrResp); err != nil {
return systemRes.OcrResponse{}, fmt.Errorf("解析OCR响应失败: %v", err)
}
if ocrResp.Error != "" {
return systemRes.OcrResponse{}, fmt.Errorf("OCR识别错误: %s", ocrResp.Error)
}
guessedInfo := s.analyzeBookInfo(ocrResp.Texts)
return systemRes.OcrResponse{
Success: true,
Texts: ocrResp.Texts,
GuessBookInfo: guessedInfo,
}, nil
}
type candidate struct {
text string
priority int
index int
}
// analyzeBookInfo 分析识别结果
func (s *OcrService) analyzeBookInfo(texts []string) systemRes.GuessBookInfo {
var result systemRes.GuessBookInfo
if len(texts) == 0 {
return result
}
var bookNameCandidates []candidate
var authorCandidates []candidate
var publisherCandidates []candidate
for i, text := range texts {
text = strings.TrimSpace(text)
if text == "" {
continue
}
if s.isBookName(text) {
bookNameCandidates = append(bookNameCandidates, candidate{text, s.calculateBookNameScore(text), i})
}
if author := s.extractAuthor(text); author != "" {
authorCandidates = append(authorCandidates, candidate{author, s.calculateAuthorScore(author, text), i})
}
if publisher := s.extractPublisher(text); publisher != "" {
publisherCandidates = append(publisherCandidates, candidate{publisher, s.calculatePublisherScore(publisher), i})
}
}
if len(bookNameCandidates) > 0 {
bestCandidate := s.selectBestCandidate(bookNameCandidates)
result.BookName = bestCandidate.text
}
if len(authorCandidates) > 0 {
bestCandidate := s.selectBestCandidate(authorCandidates)
result.Author = bestCandidate.text
}
if len(publisherCandidates) > 0 {
bestCandidate := s.selectBestCandidate(publisherCandidates)
result.Publisher = bestCandidate.text
}
return result
}
// isBookName 判断给定的文本是否可能是图书名称
func (s *OcrService) isBookName(text string) bool {
if len(text) < 2 || len(text) > 30 {
return false
}
if s.hasAuthorMarker(text) || s.hasPublisherMarker(text) {
return false
}
invalidPatterns := []string{
`^\d+$`,
`^[A-Z]{1,3}$`,
`^[\p{P}]+$`,
`^\d+\.`,
`^第\d+[卷册部辑]`,
}
for _, pattern := range invalidPatterns {
if matched, _ := regexp.MatchString(pattern, text); matched {
return false
}
}
descriptionKeywords := []string{
"版本", "插图", "阅读", "经典", "名家", "开本",
"精装", "平装", "修订版", "增补版", "全集", "选集",
"注释版", "译本", "原版", "引进版", "推荐", "畅销",
"全新", "最新", "权威", "完整版", "简体", "繁体",
"定价", "售价", "元", "ISBN", "书号", "条码",
"出版社", "出版", "发行", "印刷", "印次", "版次",
"字数", "页数", "张", "册",
"丛书", "系列", "文库", "书系", "读本", "教材",
"教辅", "考试", "习题", "练习", "答案", "解析",
"上册", "下册", "上卷", "下卷", "第一卷", "第二卷",
"前言", "序言", "目录", "附录", "后记", "跋",
"简介", "摘要", "概述", "导读", "书评", "推荐语",
"获奖", "荣获", "提名", "榜单", "排行榜",
"扫码", "二维码", "公众号", "微信", "关注",
"配套", "资源", "下载", "音频", "视频", "课件",
"适用", "适合", "读者", "对象", "年龄", "岁",
"教育部", "新课标", "统编", "部编", "人教版",
}
textLower := strings.ToLower(text)
for _, keyword := range descriptionKeywords {
if strings.Contains(textLower, keyword) {
return false
}
}
if strings.Count(text, ",") >= 2 || strings.Count(text, "") >= 2 {
return false
}
if strings.HasSuffix(text, "。") || strings.HasSuffix(text, ".") {
return false
}
if regexp.MustCompile(`\d{4,}`).MatchString(text) {
return false
}
if regexp.MustCompile(`[¥¥\$]\d+`).MatchString(text) {
return false
}
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(text, -1))
if chineseCount < 2 {
return false
}
if float64(chineseCount) < float64(len([]rune(text)))*0.3 {
return false
}
if strings.Contains(text, " ") && chineseCount <= 10 {
parts := strings.Fields(text)
if len(parts) >= 2 {
allShortNames := true
for _, part := range parts {
part = strings.TrimSpace(part)
partChars := regexp.MustCompile(`[\p{Han}]`)
partChineseCount := len(partChars.FindAllString(part, -1))
if partChineseCount < 2 || partChineseCount > 4 {
allShortNames = false
break
}
}
if allShortNames && len(parts) <= 3 {
return false
}
}
}
return true
}
// calculateBookNameScore 计算图书名称的得分
func (s *OcrService) extractAuthor(text string) string {
if s.hasAuthorMarker(text) {
cleaned := s.removeAuthorMarker(text)
cleaned = strings.TrimSpace(cleaned)
if cleaned != "" && len(cleaned) >= 2 && len(cleaned) <= 20 {
return cleaned
}
}
if text == "主编" || text == "副主编" || text == "编著" || text == "编译" {
return ""
}
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(text, -1))
if chineseCount >= 2 && chineseCount <= 15 {
hasSpace := strings.Contains(text, " ") || strings.Contains(text, " ")
hasDot := strings.Contains(text, "·")
if hasSpace || hasDot {
parts := regexp.MustCompile(`[\s·]+`).Split(text, -1)
allChinese := true
for _, part := range parts {
part = strings.TrimSpace(part)
if part == "" {
continue
}
partChars := regexp.MustCompile(`[\p{Han}]`)
if len(partChars.FindAllString(part, -1)) != len([]rune(part)) {
allChinese = false
break
}
}
if allChinese && len(parts) >= 2 {
return text
}
}
if !hasSpace && !hasDot && chineseCount >= 2 && chineseCount <= 4 {
invalidAuthorWords := []string{"主编", "副主编", "编著", "编译", "责任编辑", "责任校对", "封面设计"}
for _, word := range invalidAuthorWords {
if strings.Contains(text, word) {
return ""
}
}
return text
}
}
return ""
}
// extractPublisher 提取出版商
func (s *OcrService) extractPublisher(text string) string {
publisherKeywords := []string{
"出版社", "出版", "press", "Publishing",
}
textLower := strings.ToLower(text)
for _, keyword := range publisherKeywords {
if strings.Contains(textLower, strings.ToLower(keyword)) {
return text
}
}
return ""
}
// hasAuthorMarker 检测给定的文本是否包含作者标记
func (s *OcrService) hasAuthorMarker(text string) bool {
markers := []string{"著", "编", "作者", "/", "·"}
for _, marker := range markers {
if strings.Contains(text, marker) {
return true
}
}
return false
}
// hasPublisherMarker 检测给定的文本是否包含出版商标记
func (s *OcrService) hasPublisherMarker(text string) bool {
markers := []string{"出版社", "出版", "press", "Publishing"}
textLower := strings.ToLower(text)
for _, marker := range markers {
if strings.Contains(textLower, strings.ToLower(marker)) {
return true
}
}
publisherPatterns := []string{
`^.*人民.*出[版报]`,
`^.*大学.*出版`,
`^.*文艺.*出版`,
`^.*科技.*出版`,
`^.*教育.*出版`,
`^.*美术.*出版`,
`^.*音乐.*出版`,
`^.*古籍.*出版`,
`^.*少年.*儿童.*出版`,
`^.*出版.*集团`,
}
for _, pattern := range publisherPatterns {
if matched, _ := regexp.MatchString(pattern, text); matched {
return true
}
}
if strings.Contains(text, "人民") && (strings.Contains(text, "出") || strings.Contains(text, "版")) {
return true
}
return false
}
// removeAuthorMarker 移除作者标记
func (s *OcrService) removeAuthorMarker(text string) string {
result := text
markerPatterns := []string{
`\s*(?:主编|副主编|编著|编译|编者|译者|著者|作者)[著编译者]*$`,
`\s*[著编译者]+$`,
`/[著编译编著译者作者]+$`,
`/.*$`,
}
for _, pattern := range markerPatterns {
re := regexp.MustCompile(pattern)
if re.MatchString(result) {
result = re.ReplaceAllString(result, "")
break
}
}
return strings.TrimSpace(result)
}
// calculateBookNameScore 计算图书名称的得分
func (s *OcrService) calculateBookNameScore(text string) int {
score := 0
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(text, -1))
score += chineseCount * 2
if len(text) >= 2 && len(text) <= 10 {
score += 10
} else if len(text) >= 11 && len(text) <= 20 {
score += 5
}
if regexp.MustCompile(`[《》]`).MatchString(text) {
score += 15
}
if chineseCount >= 4 {
score += 10
}
if regexp.MustCompile(`^[A-Z]`).MatchString(text) {
score -= 5
}
if chineseCount >= 6 {
score += 5
}
return score
}
// calculateAuthorScore 计算作者的得分
func (s *OcrService) calculateAuthorScore(author, originalText string) int {
score := 0
if strings.Contains(originalText, "/") {
score += 10
}
if strings.Contains(originalText, "著") {
score += 8
} else if strings.Contains(originalText, "编") {
score += 6
} else if strings.Contains(originalText, "译") {
score += 5
}
chineseChars := regexp.MustCompile(`[\p{Han}]`)
chineseCount := len(chineseChars.FindAllString(author, -1))
score += chineseCount * 3
if len(author) >= 2 && len(author) <= 5 {
score += 10
}
if strings.Contains(author, " ") || strings.Contains(author, "·") {
score += 8
}
return score
}
// calculatePublisherScore 检测给定的文本是否包含出版商标记
func (s *OcrService) calculatePublisherScore(text string) int {
score := 0
if strings.Contains(text, "出版社") {
score += 20
} else if strings.Contains(text, "出版") {
score += 10
}
if strings.Contains(strings.ToLower(text), "press") {
score += 15
}
return score
}
// selectBestCandidate 选择最佳候选项
func (s *OcrService) selectBestCandidate(candidates []candidate) candidate {
if len(candidates) == 0 {
return candidate{}
}
best := candidates[0]
for _, c := range candidates[1:] {
if c.priority > best.priority {
best = c
}
}
return best
}