30 lines
1.8 KiB
Plaintext
30 lines
1.8 KiB
Plaintext
# 书号匹配
|
||
if not result["书号"]:
|
||
# 修改正则表达式,更精确地匹配书号格式,避免匹配到后面的定价信息
|
||
if m := re.search(r'(?:书号|統一書號|统一[书書][号號]|普號|ISBN)[::]?\s*([\d·\-]{5,}[·\-][\d]{1,5})', line):
|
||
result["书号"] = m.group(1).replace('·', '-')
|
||
# 备用匹配:如果上面的匹配失败,使用更宽松但有长度限制的匹配
|
||
elif m := re.search(r'(?:书号|統一書號|统一[书書][号號]|普號|ISBN)[::]?\s*([\w·\-]{5,15})', line):
|
||
# 确保只提取书号部分,如果有明显的分隔符或关键词,则截取到该位置
|
||
book_id = m.group(1)
|
||
# 检查是否包含"定价"关键词,如果有则截取到"定价"之前
|
||
if "定价" in book_id:
|
||
book_id = book_id.split("定价")[0].strip()
|
||
# 检查是否包含"元"关键词,如果有则截取到"元"之前
|
||
if "元" in book_id:
|
||
book_id = book_id.split("元")[0].strip()
|
||
# 去除末尾可能的非书号字符
|
||
book_id = re.sub(r'[^\d·\-\w]+$', '', book_id)
|
||
result["书号"] = book_id.replace('·', '-')
|
||
|
||
# 定价匹配
|
||
if not result["定价"]:
|
||
# 1. 匹配"定价:xx.xx元"或"定價:xx.xx元"
|
||
m = re.search(r'定[价價][::]?\s*(\d+\.\d{1,2})元\b', line)
|
||
if m:
|
||
result["定价"] = f"{m.group(1)}元"
|
||
else:
|
||
# 2. 匹配"定价:xx.xx"无"元"字
|
||
m2 = re.search(r'定[价價][::]?\s*(\d+\.\d{1,2})\b', line)
|
||
if m2:
|
||
result["定价"] = f"{m2.group(1)}元" |