from config import MODEL_PATH, INFERENCE_URL, INFERENCE_AUTH_TOKEN, INFERENCE_MODEL, PROMPT_EXTRACT_NAME, PROMPT_EXTRACT_COMPONENTS, PROMPT_EXTRACT_KEYWORD, PROMPT_EXTRACT_PREVENTION,PROMPT_EXTRACT_SUPPLIER,PROMPT_EXTRACT_ICON from io import BytesIO import base64 import json from PIL import Image, ImageFilter, ImageEnhance import time import requests def image_to_base64(pil_image, image_format="JPEG"): """将PIL Image图像转换为Base64编码""" buffered = BytesIO() pil_image.save(buffered, format=image_format) img_byte_array = buffered.getvalue() encode_image = base64.b64encode(img_byte_array).decode('utf-8') return encode_image def resize_image(image, max_size=512): """缩放图像尺寸,保持 OCR 质量""" width, height = image.size max_dim = max(width, height) # 如果图像不需要缩小,直接返回 if max_dim <= max_size: return image scaling_factor = max_size / max_dim new_width = int(width * scaling_factor) new_height = int(height * scaling_factor) # 使用 LANCZOS 高质量缩放 resized = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # 应用 UnsharpMask 锐化,补偿缩放损失 resized = resized.filter(ImageFilter.UnsharpMask(radius=1, percent=120, threshold=3)) # 轻微增强对比度,提高文字识别率 enhancer = ImageEnhance.Contrast(resized) resized = enhancer.enhance(1.1) return resized class OcrAgent: def __init__(self): self._url = INFERENCE_URL def extract_single(self, image_base64: str, prompt: str, index: int): """单个任务请求,返回 (index, 结果文本)""" response = requests.post( self._url, headers={ "Authorization": INFERENCE_AUTH_TOKEN, "Content-Type": "application/json" }, json={ "model": INFERENCE_MODEL, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"} }, {"type": "text", "text": prompt} ] } ], "max_tokens": 4096, "stream": False, "temperature": 0 }, timeout=600 ) response.raise_for_status() content = response.json()["choices"][0]["message"]["content"] return index, content @staticmethod def _parse_json(text: str, step_name: str) -> dict: """ 解析模型返回的 JSON 文本,自动清洗 ```json``` 标记。 解析失败时抛出 RuntimeError(不会被 ValueError 捕获误报为"参数验证失败")。 """ # 去除首尾空白 text = text.strip() # 兼容模型偶尔返回 ```json ... ``` 包裹的情况 if text.startswith("```"): lines = text.splitlines() # 去掉首行的 ```json 或 ``` 和末行的 ``` text = "\n".join( line for line in lines if not line.strip().startswith("```") ).strip() try: return json.loads(text) except json.JSONDecodeError as e: raise RuntimeError( f"步骤[{step_name}]模型返回内容无法解析为 JSON: {e}\n原始内容: {text[:200]}" ) def agent_ocr(self, image): """qwen_ocr提取化学品安全标签信息""" image = resize_image(image, max_size=512) image_base64 = image_to_base64(image) start_time = time.perf_counter() # 定义需要并行执行的任务(顺序固定,用 index 保序) prompts = [ PROMPT_EXTRACT_ICON, # 0 PROMPT_EXTRACT_NAME, # 1 PROMPT_EXTRACT_COMPONENTS, # 2 PROMPT_EXTRACT_KEYWORD, # 3 PROMPT_EXTRACT_PREVENTION, # 4 PROMPT_EXTRACT_SUPPLIER # 5 ] # 串行发送 6 个请求 results = [] for idx, prompt in enumerate(prompts): _, content = self.extract_single(image_base64, prompt, idx) results.append(content) # 从结果中提取数据(顺序已由 index 保证) step_names = ["icon", "name", "components", "keyword", "prevention", "supplier"] icon = self._parse_json(results[0], step_names[0]) name = self._parse_json(results[1], step_names[1]) tag = self._parse_json(results[2], step_names[2]) risk_notice = self._parse_json(results[3], step_names[3]) pre_notice = self._parse_json(results[4], step_names[4]) suppliers = self._parse_json(results[5], step_names[5]) end_time = time.perf_counter() elapsed_time = end_time - start_time print(f"推理时间: {elapsed_time:.6f} 秒") result = { "tag": { "name_cn": name["name_cn"], "name_en": name["name_en"], "cf_list": tag["cf_list"] }, "tag_images": icon["tag_images"], "key_word": risk_notice["key_word"], "risk_notice": risk_notice["risk_notice"], "pre_notice": pre_notice["pre_notice"], "supplier": suppliers["supplier"], "acc_tel": suppliers["acc_tel"], } return result if __name__ == "__main__": image = Image.open("./test1.jpg").convert("RGB") agent = OcrAgent() res = agent.agent_ocr(image) print(res)