from config import MODEL_PATH, PROMPT_EXTRACT_NAME, PROMPT_EXTRACT_COMPONENTS, PROMPT_EXTRACT_KEYWORD, PROMPT_EXTRACT_PREVENTION,PROMPT_EXTRACT_SUPPLIER,PROMPT_EXTRACT_ICON from model import QwenOcr from io import BytesIO import base64 import json from PIL import Image, ImageFilter, ImageEnhance import time from concurrent.futures import ThreadPoolExecutor, as_completed import requests def image_to_base64(pil_image, image_format="JPEG"): """将PIL Image图像转换为Base64编码""" buffered = BytesIO() pil_image.save(buffered, format=image_format) img_byte_array = buffered.getvalue() encode_image = base64.b64encode(img_byte_array).decode('utf-8') return encode_image def resize_image(image, max_size=512): """缩放图像尺寸,保持 OCR 质量""" width, height = image.size max_dim = max(width, height) # 如果图像不需要缩小,直接返回 if max_dim <= max_size: return image scaling_factor = max_size / max_dim new_width = int(width * scaling_factor) new_height = int(height * scaling_factor) # 使用 LANCZOS 高质量缩放 resized = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # 应用 UnsharpMask 锐化,补偿缩放损失 resized = resized.filter(ImageFilter.UnsharpMask(radius=1, percent=120, threshold=3)) # 轻微增强对比度,提高文字识别率 enhancer = ImageEnhance.Contrast(resized) resized = enhancer.enhance(1.1) return resized class OcrAgent: def __init__(self): self._url = "http://127.0.0.1:8000/api/v1/ocr" def extract_part_info(self, image_base64, prompts): """根据提示词提取信息""" response = requests.post( self._url, json={ "image": image_base64, "text": prompts } ) result = response.json() return result def agent_ocr(self, image): """qwen_ocr提取化学品安全标签信息""" image = resize_image(image, max_size=1024) image_base64 = image_to_base64(image) start_time = time.perf_counter() # 定义需要并行执行的任务 prompts = [ PROMPT_EXTRACT_ICON, PROMPT_EXTRACT_NAME, PROMPT_EXTRACT_COMPONENTS, PROMPT_EXTRACT_KEYWORD, PROMPT_EXTRACT_PREVENTION, PROMPT_EXTRACT_SUPPLIER ] results = self.extract_part_info(image_base64, prompts) results = results["data"] # 从结果中提取数据 icon = json.loads(results[0]) name = json.loads(results[1]) tag = json.loads(results[2]) risk_notice = json.loads(results[3]) pre_notice = json.loads(results[4]) suppliers = json.loads(results[5]) end_time = time.perf_counter() elapsed_time = end_time - start_time print(f"推理时间: {elapsed_time:.6f} 秒") result = { "tag": { "name_cn": name["name_cn"], "name_en": name["name_en"], "cf_list": tag["cf_list"] }, "tag_images": icon["tag_images"], "key_word": risk_notice["key_word"], "risk_notice": risk_notice["risk_notice"], "pre_notice": pre_notice["pre_notice"], "supplier": suppliers["supplier"], "acc_tel": suppliers["acc_tel"], } return result if __name__ == "__main__": image = Image.open("./test1.jpg").convert("RGB") agent = OcrAgent() res = agent.agent_ocr(image) print(res)