daShangDao_miniProgram/修改后的main.txt
2025-11-24 10:25:20 +08:00

30 lines
1.8 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 书号匹配
if not result["书号"]:
# 修改正则表达式,更精确地匹配书号格式,避免匹配到后面的定价信息
if m := re.search(r'(?:书号|統一書號|统一[书書][号號]|普號|ISBN)[:]?\s*([\d·\-]{5,}[·\-][\d]{1,5})', line):
result["书号"] = m.group(1).replace('·', '-')
# 备用匹配:如果上面的匹配失败,使用更宽松但有长度限制的匹配
elif m := re.search(r'(?:书号|統一書號|统一[书書][号號]|普號|ISBN)[:]?\s*([\w·\-]{5,15})', line):
# 确保只提取书号部分,如果有明显的分隔符或关键词,则截取到该位置
book_id = m.group(1)
# 检查是否包含"定价"关键词,如果有则截取到"定价"之前
if "定价" in book_id:
book_id = book_id.split("定价")[0].strip()
# 检查是否包含"元"关键词,如果有则截取到"元"之前
if "元" in book_id:
book_id = book_id.split("元")[0].strip()
# 去除末尾可能的非书号字符
book_id = re.sub(r'[^\d·\-\w]+$', '', book_id)
result["书号"] = book_id.replace('·', '-')
# 定价匹配
if not result["定价"]:
# 1. 匹配"定价xx.xx元"或"定價xx.xx元"
m = re.search(r'定[价價][:]?\s*(\d+\.\d{1,2})元\b', line)
if m:
result["定价"] = f"{m.group(1)}元"
else:
# 2. 匹配"定价xx.xx"无"元"字
m2 = re.search(r'定[价價][:]?\s*(\d+\.\d{1,2})\b', line)
if m2:
result["定价"] = f"{m2.group(1)}元"