| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- from config import MODEL_PATH, INFERENCE_URL, INFERENCE_AUTH_TOKEN, INFERENCE_MODEL, PROMPT_EXTRACT_NAME, PROMPT_EXTRACT_COMPONENTS, PROMPT_EXTRACT_KEYWORD, PROMPT_EXTRACT_PREVENTION,PROMPT_EXTRACT_SUPPLIER,PROMPT_EXTRACT_ICON
- from model import QwenOcr
- from io import BytesIO
- import base64
- import json
- from PIL import Image, ImageFilter, ImageEnhance
- import time
- import requests
- def image_to_base64(pil_image, image_format="JPEG"):
- """将PIL Image图像转换为Base64编码"""
- buffered = BytesIO()
- pil_image.save(buffered, format=image_format)
- img_byte_array = buffered.getvalue()
- encode_image = base64.b64encode(img_byte_array).decode('utf-8')
- return encode_image
- def resize_image(image, max_size=512):
- """缩放图像尺寸,保持 OCR 质量"""
- width, height = image.size
- max_dim = max(width, height)
- # 如果图像不需要缩小,直接返回
- if max_dim <= max_size:
- return image
- scaling_factor = max_size / max_dim
- new_width = int(width * scaling_factor)
- new_height = int(height * scaling_factor)
- # 使用 LANCZOS 高质量缩放
- resized = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
- # 应用 UnsharpMask 锐化,补偿缩放损失
- resized = resized.filter(ImageFilter.UnsharpMask(radius=1, percent=120, threshold=3))
- # 轻微增强对比度,提高文字识别率
- enhancer = ImageEnhance.Contrast(resized)
- resized = enhancer.enhance(1.1)
- return resized
- class OcrAgent:
- def __init__(self):
- self._url = INFERENCE_URL
- def extract_single(self, image_base64: str, prompt: str, index: int):
- """单个任务请求,返回 (index, 结果文本)"""
- response = requests.post(
- self._url,
- headers={
- "Authorization": INFERENCE_AUTH_TOKEN,
- "Content-Type": "application/json"
- },
- json={
- "model": INFERENCE_MODEL,
- "messages": [
- {"role": "system", "content": "You are a helpful assistant."},
- {
- "role": "user",
- "content": [
- {
- "type": "image_url",
- "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}
- },
- {"type": "text", "text": prompt}
- ]
- }
- ],
- "max_tokens": 4096,
- "stream": False,
- "temperature": 0
- },
- timeout=600
- )
- response.raise_for_status()
- content = response.json()["choices"][0]["message"]["content"]
- return index, content
- def agent_ocr(self, image):
- """qwen_ocr提取化学品安全标签信息"""
- image = resize_image(image, max_size=512)
- image_base64 = image_to_base64(image)
- start_time = time.perf_counter()
- # 定义需要并行执行的任务(顺序固定,用 index 保序)
- prompts = [
- PROMPT_EXTRACT_ICON, # 0
- PROMPT_EXTRACT_NAME, # 1
- PROMPT_EXTRACT_COMPONENTS, # 2
- PROMPT_EXTRACT_KEYWORD, # 3
- PROMPT_EXTRACT_PREVENTION, # 4
- PROMPT_EXTRACT_SUPPLIER # 5
- ]
- # 串行发送 6 个请求
- results = []
- for idx, prompt in enumerate(prompts):
- _, content = self.extract_single(image_base64, prompt, idx)
- results.append(content)
- # 从结果中提取数据(顺序已由 index 保证)
- icon = json.loads(results[0])
- name = json.loads(results[1])
- tag = json.loads(results[2])
- risk_notice = json.loads(results[3])
- pre_notice = json.loads(results[4])
- suppliers = json.loads(results[5])
- end_time = time.perf_counter()
- elapsed_time = end_time - start_time
- print(f"推理时间: {elapsed_time:.6f} 秒")
- result = {
- "tag": {
- "name_cn": name["name_cn"],
- "name_en": name["name_en"],
- "cf_list": tag["cf_list"]
- },
- "tag_images": icon["tag_images"],
- "key_word": risk_notice["key_word"],
- "risk_notice": risk_notice["risk_notice"],
- "pre_notice": pre_notice["pre_notice"],
- "supplier": suppliers["supplier"],
- "acc_tel": suppliers["acc_tel"],
- }
- return result
- if __name__ == "__main__":
- image = Image.open("./test1.jpg").convert("RGB")
- agent = OcrAgent()
- res = agent.agent_ocr(image)
- print(res)
|