package service import ( "bytes" "encoding/base64" "encoding/json" "fmt" "io" "net/http" "psi/config" systemRes "psi/models/response" "regexp" "strings" "time" ) type OcrService struct{} type ocrServiceRequest struct { ImageBase64 string `json:"image_base64"` } type ocrServiceResponse struct { Error string `json:"error,omitempty"` Texts []string `json:"texts,omitempty"` } func (s *OcrService) RecognizeText(imageData []byte) (systemRes.OcrResponse, error) { base64Data := base64.StdEncoding.EncodeToString(imageData) reqBody := ocrServiceRequest{ ImageBase64: base64Data, } jsonData, err := json.Marshal(reqBody) if err != nil { return systemRes.OcrResponse{}, fmt.Errorf("序列化请求数据失败: %v", err) } ocrServiceURL := config.AppConfig.OCR.ServiceUrl client := &http.Client{Timeout: 60 * time.Second} resp, err := client.Post(ocrServiceURL, "application/json", bytes.NewReader(jsonData)) if err != nil { return systemRes.OcrResponse{}, fmt.Errorf("调用OCR服务失败: %v", err) } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { return systemRes.OcrResponse{}, fmt.Errorf("读取OCR响应失败: %v", err) } var ocrResp ocrServiceResponse if err := json.Unmarshal(body, &ocrResp); err != nil { return systemRes.OcrResponse{}, fmt.Errorf("解析OCR响应失败: %v", err) } if ocrResp.Error != "" { return systemRes.OcrResponse{}, fmt.Errorf("OCR识别错误: %s", ocrResp.Error) } guessedInfo := s.analyzeBookInfo(ocrResp.Texts) return systemRes.OcrResponse{ Success: true, Texts: ocrResp.Texts, GuessBookInfo: guessedInfo, }, nil } type candidate struct { text string priority int index int } func (s *OcrService) analyzeBookInfo(texts []string) systemRes.GuessBookInfo { var result systemRes.GuessBookInfo if len(texts) == 0 { return result } var bookNameCandidates []candidate var authorCandidates []candidate var publisherCandidates []candidate for i, text := range texts { text = strings.TrimSpace(text) if text == "" { continue } if s.isBookName(text) { bookNameCandidates = append(bookNameCandidates, candidate{text, s.calculateBookNameScore(text), i}) } if author := s.extractAuthor(text); author != "" { authorCandidates = append(authorCandidates, candidate{author, s.calculateAuthorScore(author, text), i}) } if publisher := s.extractPublisher(text); publisher != "" { publisherCandidates = append(publisherCandidates, candidate{publisher, s.calculatePublisherScore(publisher), i}) } } if len(bookNameCandidates) > 0 { bestCandidate := s.selectBestCandidate(bookNameCandidates) result.BookName = bestCandidate.text } if len(authorCandidates) > 0 { bestCandidate := s.selectBestCandidate(authorCandidates) result.Author = bestCandidate.text } if len(publisherCandidates) > 0 { bestCandidate := s.selectBestCandidate(publisherCandidates) result.Publisher = bestCandidate.text } return result } func (s *OcrService) isBookName(text string) bool { if len(text) < 2 || len(text) > 30 { return false } if s.hasAuthorMarker(text) || s.hasPublisherMarker(text) { return false } invalidPatterns := []string{ `^\d+$`, `^[A-Z]{1,3}$`, `^[\p{P}]+$`, `^\d+\.`, `^第\d+[卷册部辑]`, } for _, pattern := range invalidPatterns { if matched, _ := regexp.MatchString(pattern, text); matched { return false } } descriptionKeywords := []string{ "版本", "插图", "阅读", "经典", "名家", "开本", "精装", "平装", "修订版", "增补版", "全集", "选集", "注释版", "译本", "原版", "引进版", "推荐", "畅销", "全新", "最新", "权威", "完整版", "简体", "繁体", "定价", "售价", "元", "ISBN", "书号", "条码", "出版社", "出版", "发行", "印刷", "印次", "版次", "字数", "页数", "张", "册", "丛书", "系列", "文库", "书系", "读本", "教材", "教辅", "考试", "习题", "练习", "答案", "解析", "上册", "下册", "上卷", "下卷", "第一卷", "第二卷", "前言", "序言", "目录", "附录", "后记", "跋", "简介", "摘要", "概述", "导读", "书评", "推荐语", "获奖", "荣获", "提名", "榜单", "排行榜", "扫码", "二维码", "公众号", "微信", "关注", "配套", "资源", "下载", "音频", "视频", "课件", "适用", "适合", "读者", "对象", "年龄", "岁", "教育部", "新课标", "统编", "部编", "人教版", } textLower := strings.ToLower(text) for _, keyword := range descriptionKeywords { if strings.Contains(textLower, keyword) { return false } } if strings.Count(text, ",") >= 2 || strings.Count(text, ",") >= 2 { return false } if strings.HasSuffix(text, "。") || strings.HasSuffix(text, ".") { return false } if regexp.MustCompile(`\d{4,}`).MatchString(text) { return false } if regexp.MustCompile(`[¥¥\$]\d+`).MatchString(text) { return false } chineseChars := regexp.MustCompile(`[\p{Han}]`) chineseCount := len(chineseChars.FindAllString(text, -1)) if chineseCount < 2 { return false } if float64(chineseCount) < float64(len([]rune(text)))*0.3 { return false } if strings.Contains(text, " ") && chineseCount <= 10 { parts := strings.Fields(text) if len(parts) >= 2 { allShortNames := true for _, part := range parts { part = strings.TrimSpace(part) partChars := regexp.MustCompile(`[\p{Han}]`) partChineseCount := len(partChars.FindAllString(part, -1)) if partChineseCount < 2 || partChineseCount > 4 { allShortNames = false break } } if allShortNames && len(parts) <= 3 { return false } } } return true } func (s *OcrService) extractAuthor(text string) string { if s.hasAuthorMarker(text) { cleaned := s.removeAuthorMarker(text) cleaned = strings.TrimSpace(cleaned) if cleaned != "" && len(cleaned) >= 2 && len(cleaned) <= 20 { return cleaned } } if text == "主编" || text == "副主编" || text == "编著" || text == "编译" { return "" } chineseChars := regexp.MustCompile(`[\p{Han}]`) chineseCount := len(chineseChars.FindAllString(text, -1)) if chineseCount >= 2 && chineseCount <= 15 { hasSpace := strings.Contains(text, " ") || strings.Contains(text, " ") hasDot := strings.Contains(text, "·") if hasSpace || hasDot { parts := regexp.MustCompile(`[\s·]+`).Split(text, -1) allChinese := true for _, part := range parts { part = strings.TrimSpace(part) if part == "" { continue } partChars := regexp.MustCompile(`[\p{Han}]`) if len(partChars.FindAllString(part, -1)) != len([]rune(part)) { allChinese = false break } } if allChinese && len(parts) >= 2 { return text } } if !hasSpace && !hasDot && chineseCount >= 2 && chineseCount <= 4 { invalidAuthorWords := []string{"主编", "副主编", "编著", "编译", "责任编辑", "责任校对", "封面设计"} for _, word := range invalidAuthorWords { if strings.Contains(text, word) { return "" } } return text } } return "" } func (s *OcrService) extractPublisher(text string) string { publisherKeywords := []string{ "出版社", "出版", "press", "Publishing", } textLower := strings.ToLower(text) for _, keyword := range publisherKeywords { if strings.Contains(textLower, strings.ToLower(keyword)) { return text } } return "" } func (s *OcrService) hasAuthorMarker(text string) bool { markers := []string{"著", "编", "作者", "/", "·"} for _, marker := range markers { if strings.Contains(text, marker) { return true } } return false } func (s *OcrService) hasPublisherMarker(text string) bool { markers := []string{"出版社", "出版", "press", "Publishing"} textLower := strings.ToLower(text) for _, marker := range markers { if strings.Contains(textLower, strings.ToLower(marker)) { return true } } publisherPatterns := []string{ `^.*人民.*出[版报]`, `^.*大学.*出版`, `^.*文艺.*出版`, `^.*科技.*出版`, `^.*教育.*出版`, `^.*美术.*出版`, `^.*音乐.*出版`, `^.*古籍.*出版`, `^.*少年.*儿童.*出版`, `^.*出版.*集团`, } for _, pattern := range publisherPatterns { if matched, _ := regexp.MatchString(pattern, text); matched { return true } } if strings.Contains(text, "人民") && (strings.Contains(text, "出") || strings.Contains(text, "版")) { return true } return false } func (s *OcrService) removeAuthorMarker(text string) string { result := text markerPatterns := []string{ `\s*(?:主编|副主编|编著|编译|编者|译者|著者|作者)[著编译者]*$`, `\s*[著编译者]+$`, `/[著编译编著译者作者]+$`, `/.*$`, } for _, pattern := range markerPatterns { re := regexp.MustCompile(pattern) if re.MatchString(result) { result = re.ReplaceAllString(result, "") break } } return strings.TrimSpace(result) } func (s *OcrService) calculateBookNameScore(text string) int { score := 0 chineseChars := regexp.MustCompile(`[\p{Han}]`) chineseCount := len(chineseChars.FindAllString(text, -1)) score += chineseCount * 2 if len(text) >= 2 && len(text) <= 10 { score += 10 } else if len(text) >= 11 && len(text) <= 20 { score += 5 } if regexp.MustCompile(`[《》]`).MatchString(text) { score += 15 } if chineseCount >= 4 { score += 10 } if regexp.MustCompile(`^[A-Z]`).MatchString(text) { score -= 5 } if chineseCount >= 6 { score += 5 } return score } func (s *OcrService) calculateAuthorScore(author, originalText string) int { score := 0 if strings.Contains(originalText, "/") { score += 10 } if strings.Contains(originalText, "著") { score += 8 } else if strings.Contains(originalText, "编") { score += 6 } else if strings.Contains(originalText, "译") { score += 5 } chineseChars := regexp.MustCompile(`[\p{Han}]`) chineseCount := len(chineseChars.FindAllString(author, -1)) score += chineseCount * 3 if len(author) >= 2 && len(author) <= 5 { score += 10 } if strings.Contains(author, " ") || strings.Contains(author, "·") { score += 8 } return score } func (s *OcrService) calculatePublisherScore(text string) int { score := 0 if strings.Contains(text, "出版社") { score += 20 } else if strings.Contains(text, "出版") { score += 10 } if strings.Contains(strings.ToLower(text), "press") { score += 15 } return score } func (s *OcrService) selectBestCandidate(candidates []candidate) candidate { if len(candidates) == 0 { return candidate{} } best := candidates[0] for _, c := range candidates[1:] { if c.priority > best.priority { best = c } } return best }