Browse Source

修改异常抛出

Sherlock1011 2 months ago
parent
commit
efb5a961f6
5 changed files with 33 additions and 10 deletions
  1. 30 7
      agent/agent.py
  2. 3 3
      api/run_api.py
  3. BIN
      ocr/agent/agent.pyc
  4. BIN
      ocr/api/run_api.pyc
  5. BIN
      ocr/config/config.pyc

+ 30 - 7
agent/agent.py

@@ -1,5 +1,4 @@
 from config import MODEL_PATH, INFERENCE_URL, INFERENCE_AUTH_TOKEN, INFERENCE_MODEL, PROMPT_EXTRACT_NAME, PROMPT_EXTRACT_COMPONENTS, PROMPT_EXTRACT_KEYWORD, PROMPT_EXTRACT_PREVENTION,PROMPT_EXTRACT_SUPPLIER,PROMPT_EXTRACT_ICON
-from model import QwenOcr
 
 from io import BytesIO
 import base64
@@ -78,6 +77,29 @@ class OcrAgent:
         content = response.json()["choices"][0]["message"]["content"]
         return index, content
 
+    @staticmethod
+    def _parse_json(text: str, step_name: str) -> dict:
+        """
+        解析模型返回的 JSON 文本,自动清洗 ```json``` 标记。
+        解析失败时抛出 RuntimeError(不会被 ValueError 捕获误报为"参数验证失败")。
+        """
+        # 去除首尾空白
+        text = text.strip()
+        # 兼容模型偶尔返回 ```json ... ``` 包裹的情况
+        if text.startswith("```"):
+            lines = text.splitlines()
+            # 去掉首行的 ```json 或 ``` 和末行的 ```
+            text = "\n".join(
+                line for line in lines
+                if not line.strip().startswith("```")
+            ).strip()
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError as e:
+            raise RuntimeError(
+                f"步骤[{step_name}]模型返回内容无法解析为 JSON: {e}\n原始内容: {text[:200]}"
+            )
+
     def agent_ocr(self, image):
         """qwen_ocr提取化学品安全标签信息"""
         image = resize_image(image, max_size=512)
@@ -102,12 +124,13 @@ class OcrAgent:
             results.append(content)
 
         # 从结果中提取数据(顺序已由 index 保证)
-        icon        = json.loads(results[0])
-        name        = json.loads(results[1])
-        tag         = json.loads(results[2])
-        risk_notice = json.loads(results[3])
-        pre_notice  = json.loads(results[4])
-        suppliers   = json.loads(results[5])
+        step_names = ["icon", "name", "components", "keyword", "prevention", "supplier"]
+        icon        = self._parse_json(results[0], step_names[0])
+        name        = self._parse_json(results[1], step_names[1])
+        tag         = self._parse_json(results[2], step_names[2])
+        risk_notice = self._parse_json(results[3], step_names[3])
+        pre_notice  = self._parse_json(results[4], step_names[4])
+        suppliers   = self._parse_json(results[5], step_names[5])
 
         end_time = time.perf_counter()
         elapsed_time = end_time - start_time

+ 3 - 3
api/run_api.py

@@ -304,8 +304,8 @@ async def agent_ocr_endpoint(request: AgentOCRRequest):
         )
 
     except ValueError as e:
-        # 参数验证错误
-        logger.warning(f"[{request_id}] 参数验证失败: {e}")
+        # 请求参数验证错误(如 base64 格式非法)
+        logger.warning(f"[{request_id}] 请求参数验证失败: {e}")
         return ErrorResponse(
             code="500",
             data={},
@@ -313,7 +313,7 @@ async def agent_ocr_endpoint(request: AgentOCRRequest):
         )
 
     except RuntimeError as e:
-        # 运行时错误
+        # 运行时错误(含模型返回 JSON 解析失败)
         logger.error(f"[{request_id}] 运行时错误: {e}")
         return ErrorResponse(
             code="500",

BIN
ocr/agent/agent.pyc


BIN
ocr/api/run_api.pyc


BIN
ocr/config/config.pyc