|
|
@@ -1,5 +1,4 @@
|
|
|
from config import MODEL_PATH, INFERENCE_URL, INFERENCE_AUTH_TOKEN, INFERENCE_MODEL, PROMPT_EXTRACT_NAME, PROMPT_EXTRACT_COMPONENTS, PROMPT_EXTRACT_KEYWORD, PROMPT_EXTRACT_PREVENTION,PROMPT_EXTRACT_SUPPLIER,PROMPT_EXTRACT_ICON
|
|
|
-from model import QwenOcr
|
|
|
|
|
|
from io import BytesIO
|
|
|
import base64
|
|
|
@@ -78,6 +77,29 @@ class OcrAgent:
|
|
|
content = response.json()["choices"][0]["message"]["content"]
|
|
|
return index, content
|
|
|
|
|
|
+ @staticmethod
|
|
|
+ def _parse_json(text: str, step_name: str) -> dict:
|
|
|
+ """
|
|
|
+ 解析模型返回的 JSON 文本,自动清洗 ```json``` 标记。
|
|
|
+ 解析失败时抛出 RuntimeError(不会被 ValueError 捕获误报为"参数验证失败")。
|
|
|
+ """
|
|
|
+ # 去除首尾空白
|
|
|
+ text = text.strip()
|
|
|
+ # 兼容模型偶尔返回 ```json ... ``` 包裹的情况
|
|
|
+ if text.startswith("```"):
|
|
|
+ lines = text.splitlines()
|
|
|
+ # 去掉首行的 ```json 或 ``` 和末行的 ```
|
|
|
+ text = "\n".join(
|
|
|
+ line for line in lines
|
|
|
+ if not line.strip().startswith("```")
|
|
|
+ ).strip()
|
|
|
+ try:
|
|
|
+ return json.loads(text)
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
+ raise RuntimeError(
|
|
|
+ f"步骤[{step_name}]模型返回内容无法解析为 JSON: {e}\n原始内容: {text[:200]}"
|
|
|
+ )
|
|
|
+
|
|
|
def agent_ocr(self, image):
|
|
|
"""qwen_ocr提取化学品安全标签信息"""
|
|
|
image = resize_image(image, max_size=512)
|
|
|
@@ -102,12 +124,13 @@ class OcrAgent:
|
|
|
results.append(content)
|
|
|
|
|
|
# 从结果中提取数据(顺序已由 index 保证)
|
|
|
- icon = json.loads(results[0])
|
|
|
- name = json.loads(results[1])
|
|
|
- tag = json.loads(results[2])
|
|
|
- risk_notice = json.loads(results[3])
|
|
|
- pre_notice = json.loads(results[4])
|
|
|
- suppliers = json.loads(results[5])
|
|
|
+ step_names = ["icon", "name", "components", "keyword", "prevention", "supplier"]
|
|
|
+ icon = self._parse_json(results[0], step_names[0])
|
|
|
+ name = self._parse_json(results[1], step_names[1])
|
|
|
+ tag = self._parse_json(results[2], step_names[2])
|
|
|
+ risk_notice = self._parse_json(results[3], step_names[3])
|
|
|
+ pre_notice = self._parse_json(results[4], step_names[4])
|
|
|
+ suppliers = self._parse_json(results[5], step_names[5])
|
|
|
|
|
|
end_time = time.perf_counter()
|
|
|
elapsed_time = end_time - start_time
|