Эх сурвалжийг харах

封装所有AI检测功能

Sherlock1011 7 сар өмнө
parent
commit
f35ba98b0f

+ 4 - 0
agent/agent.py

@@ -13,6 +13,10 @@ class Agent:
             self.glm = Glm()
             self._initialized = True
             
+    def judge_title_relation(self, basic_title, url_title):
+        """判断基础标题与链接标题的相关性,过滤错误数据的爬取"""
+        self.glm.set_modelname("glm-4-plus")
+            
     def brand_key_word_judgement(self, brandname, title):
         """判断是否为关键词引流"""
         self.glm.set_modelname("glm-4-plus")

+ 17 - 60
agent/glm.py

@@ -1,5 +1,5 @@
 from config import load_config
-from utils import image_to_base
+from utils import image_to_base, pil_image_to_base64, load_image_from_cos
 from zhipuai import ZhipuAI
 class Glm:
     _instance = None
@@ -39,6 +39,8 @@ class Glm:
             }
         ]
         for image_url in image_url_list:
+            # image = load_image_from_cos(image_url)
+            # image_base = pil_image_to_base64(image)
             content.append(
                 {
                     "type": "image_url",
@@ -70,10 +72,12 @@ class Glm:
         ]
         
         for image_url in image_url_list1:
+            image = load_image_from_cos(image_url)
+            image_base = pil_image_to_base64(image)
             content1.append(
                 {
                     "type": "image_url",
-                    "image_url": {"url": image_url}
+                    "image_url": {"url": image_base}
                 }
             )
         
@@ -85,10 +89,12 @@ class Glm:
         ]
         
         for image_url in image_url_list2:
+            image = load_image_from_cos(image_url)
+            image_base = pil_image_to_base64(image)
             content2.append(
                 {
                     "type": "image_url",
-                    "image_url": {"url": image_url}
+                    "image_url": {"url": image_base}
                 }
             )
             
@@ -113,6 +119,11 @@ class Glm:
     
     def image_response(self, query, image_url1, image_url2):
         """单图像问答"""
+        image1 = load_image_from_cos(image_url1)
+        image1_base = pil_image_to_base64(image1)
+        
+        image2 = load_image_from_cos(image_url2)
+        image2_base = pil_image_to_base64(image2)
         response = self.client.chat.completions.create(
             model="glm-4v-plus-0111",
             messages=[
@@ -125,11 +136,11 @@ class Glm:
                         },
                         {
                             "type": "image_url",
-                            "image_url": {"url": image_url1}
+                            "image_url": {"url": image1_base}
                         },
                         {
                             "type": "image_url",
-                            "image_url": {"url": image_url2}
+                            "image_url": {"url": image2_base}
                         }
                     ]
                 }
@@ -180,62 +191,8 @@ class Glm:
         )
         
         return response.choices[0].message
-    
-    def web_search_in_chat(self, search_prompt, content):
-        """网络搜索工具"""
-        tools = [{
-            "type": "web_search",
-            "web_search": {
-                "enable": True,
-                "search_engine": "search_pro_sogou", # 选择搜索引擎
-                "search_result": True,
-                "search_prompt": search_prompt,
-            }
-        }]
-        
-        messages = [{
-            "role": "user",
-            "content": content
-        }]
-        
-        response = self.client.chat.completions.create(
-            model=self.model_name,
-            messages=messages,
-            tools=tools,
-            response_format={
-                "type": "json_object"
-            }
-        )
-        
-        return response.choices[0].message
-        
         
     
     def set_modelname(self, modelname):
         self.model_name = modelname
-    
-if __name__ == '__main__':
-    glm = Glm()
-    glm.set_modelname("glm-4v-plus-0111")
-    query = f"""
-        你是一个产品图像分析助手,你的任务是判断第二张图像中的产品上是否包含logo,并与第一张logo图像做对比,判断第二张图像中的logo是否与第一张的一致。
-        判断思路:
-        1. 首先判断第二张图像中是否包含logo。
-        2. 如果包含logo的话,再进行判断,是否与第一张的logo一致。
-        3. 如果与第一张的logo不一致,请根据你的经验判断图像中的logo是什么品牌,如果不知道返回'未知'
-        最终结果返回为以下给出的json格式
-        输出结果示例:
-        
-        {{
-            "is_contain_logo": false
-        }}
-        
-        
-        {{
-            "is_contain_logo": true,
-            "is_jugement_logo": true,
-            "brand_name": "李宁"
-        }}
-    """
-    response = glm.multi_epoch_image_response("./logo/lining.jpg", "http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/88/9b9027dd-95b7-4024-b71e-fb7cbfde16a1.jpg", query)
-    print(response.content)
+    

+ 12 - 22
api_test.py

@@ -36,35 +36,25 @@ import json
 # print(response.json())
 
 url = "https://670813644644357-http-7860.northwest1.gpugeek.com:8443/brandanalysis/api/v1/infringe_judgement"
-# url = "http://172.18.1.189:7860/brandanalysis/api/v1/infringe_judgement"
+# url = "http://127.0.0.1:7860/brandanalysis/api/v1/infringe_judgement"
 url_data = {
-    'title': '李宁超轻21男跑鞋2024年新款反光䨻丝高回弹轻质透气缓震ARBU001',
-    'brand_name': '李宁',
+    'title': '全棉时代男女情侣款抗菌针织开襟家居服纯棉开空调房T恤式睡衣',
+    'brand_name': '全棉时代',
     'product_images': [
-            "https://gw.alicdn.com/imgextra/O1CN01DoWU8A1JdrhxBBsmP_!!3378851052.jpg_q95.jpg_.webp",
-            "https://img.alicdn.com/imgextra/i4/3378851052/O1CN01E1epnW1JdrgxCnPpR_!!3378851052.jpg_q75.jpg_.webp",
-            "https://img.alicdn.com/imgextra/i2/3378851052/O1CN01Kx7yq11JdrgEO6kSN_!!3378851052.jpg_q75.jpg_.webp",
-            "https://img.alicdn.com/imgextra/i1/3378851052/O1CN010sutlV1JdrgGP4GTI_!!3378851052.jpg_q75.jpg_.webp",
-            "https://img.alicdn.com/imgextra/i3/3378851052/O1CN0190Cvpw1JdriE6otol_!!3378851052.jpg_q75.jpg_.webp",
-            "https://img.alicdn.com/imgextra/i4/3378851052/O1CN01SsCo6a1JdriD00UuX_!!3378851052.jpg_q75.jpg_.webp"
+           "https://img.alicdn.com/imgextra/i1/430490406/O1CN01eqLhdb1ErzpfFkaM3_!!430490406.jpg_.webp"
         ],
-    'price': 19.9
+    'price': 287
 }
 
 basic_data = {
-    'product_name': '25李宁赤兔8PRO蛇年跑步鞋男回弹减震透气竞速训练专业比赛运动鞋',
-    'brand_name': '李宁',
-    'similarity_logos': ["李宇", "李I宁"],
+    'product_name': '全棉时代男女情侣款抗菌针织开襟家居服纯棉开空调房T恤式睡衣',
+    'brand_name': '全棉时代',
+    'similarity_logos': ["全棉時代", "全绵时代"],
     'product_images': [
-        "https://gw.alicdn.com/imgextra/O1CN01EFpxoy1JdrhyyD8Gp_!!3378851052.jpg_q95.jpg_.webp",
-        "https://img.alicdn.com/imgextra/i2/3378851052/O1CN01N1VBKz1JdriDQ7v3s_!!3378851052.jpg_q75.jpg_.webp",
-        "https://img.alicdn.com/imgextra/i4/3378851052/O1CN01d1T16h1JdriBxQW8j_!!3378851052.jpg_q75.jpg_.webp",
-        "https://img.alicdn.com/imgextra/i4/3378851052/O1CN01O5Gb861JdriAyGLo0_!!3378851052.jpg_q75.jpg_.webp",
-        "https://img.alicdn.com/imgextra/i1/3378851052/O1CN01WT8Kg81JdriDTZ4lq_!!3378851052.jpg_q75.jpg_.webp",
-        "https://img.alicdn.com/imgextra/i2/3378851052/O1CN01v4KL0D1JdriCrG3SH_!!3378851052.jpg_q75.jpg_.webp"
-    ],
-    'base_price': 0,
-    'price_percent': 0
+            "https://dev-govern-private-1251740668.cos.ap-guangzhou.myqcloud.com/private/20250723/b9c411c4c2514367afc4dd4ad199eb0d.webp?q-sign-algorithm=sha1&q-ak=AKIDIWXN4kqgpiMm0z4T5VgcKn4KSP8cZwnO&q-sign-time=1753320960%3B1753328160&q-key-time=1753320960%3B1753328160&q-header-list=host&q-url-param-list=&q-signature=f6223c1353c1c97c8e42355625dec94967c9c2f6", 
+        ],
+    'base_price': 70.5,
+    'price_percent':  0.7
         
 }
 

+ 3 - 2
model/__init__.py

@@ -1,5 +1,6 @@
-from .clip.inference import ClipModelInference
+from .clip.inference import ClipModelInference, ClipCompareModelInference
 
 __all__ = [
-    'ClipModelInference'
+    'ClipModelInference',
+    'ClipCompareModelInference'
 ]

+ 37 - 1
model/clip/inference.py

@@ -1,5 +1,6 @@
 from .train import ClipModelTrainer
-from utils.utils import load_image_from_url
+from utils.utils import load_image_from_url, load_image_from_cos
+from .clip_model import ClipModel
 class ClipModelInference:
     def __init__(self, brand_name):
         self._products_feat_map = self._load_model(brand_name)
@@ -25,7 +26,42 @@ class ClipModelInference:
         # 将列表按照similarity进行倒序排序
         similarity_map = sorted(similarity_map, key=lambda x: x['similarity'], reverse=True)
         return similarity_map
+
+class ClipCompareModelInference:
+    def __init__(self):
+        self._clip_model = ClipModel()
     
+    def calculate_similarity(self, product_image, base_products):
+        base_products_feats_map = self.get_base_products_feats_map(base_products)
+        product_image_feat = self._clip_model.extract_image_feature(product_image)
+        similarity_map = []
+        for product in base_products_feats_map:
+            similarity = product_image_feat @ product['image_feat'].t() * 100
+            similarity_map.append(
+                {
+                    'image': product['image'],
+                    'similarity': similarity.item()
+                }
+            )
+        
+        # 将列表按照similarity进行倒序排序
+        similarity_map = sorted(similarity_map, key=lambda x: x['similarity'], reverse=True)
+        return similarity_map  
+        
+    
+    def get_base_products_feats_map(self, base_products):
+        feats_map =  []
+        for url in base_products:
+            image = load_image_from_cos(url).resize((512, 512))
+            feat = self._clip_model.extract_image_feature(image)
+            feats_map.append(
+                {
+                    'image': url,
+                    'image_feat': feat
+                }
+            )     
+        return feats_map
+        
 if __name__ == '__main__':
     brand_name = '李宁'
     product_image_url = 'https://gw.alicdn.com/imgextra/O1CN015qx8Gw1Jdrhzk6y3v_!!3378851052.jpg_q95.jpg_.webp'

+ 2 - 2
model/clip/train.py

@@ -1,7 +1,7 @@
 from .clip_model import ClipModel
 from db import MongoDao
 from tqdm import tqdm
-from utils.utils import load_image_from_url
+from utils.utils import load_image_from_url,load_image_from_cos
 
 class ClipModelTrainer:
     def __init__(self, brand_name):
@@ -31,7 +31,7 @@ class ClipModelTrainer:
             if len(data['images']) == 0:
                 continue
             for image_url in data['images']:
-                image = load_image_from_url(image_url).resize((512, 512))
+                image = load_image_from_cos(image_url).resize((512, 512))
                 feat = self._model.extract_image_feature(image)
                 products_feature_map.append(
                     {

BIN
product.png


+ 3 - 1
utils/__init__.py

@@ -1,9 +1,11 @@
-from utils.utils import image_to_base, load_image_from_url
+from utils.utils import image_to_base, load_image_from_url, load_image_from_cos, pil_image_to_base64
 from utils.service import Service
 from utils.api_service import ApiService
 __all__ =[
     "image_to_base",
     "load_image_from_url",
+    "load_image_from_cos",
+    "pil_image_to_base64",
     "Service",
     "ApiService"
 ]

+ 36 - 27
utils/api_service.py

@@ -1,12 +1,15 @@
 from agent.agent import Agent
 from db import MongoDao
 import json5
-from model import ClipModelInference
+from model import ClipModelInference, ClipCompareModelInference
 
-from utils.utils import load_image_from_url
+from utils.utils import load_image_from_url, load_image_from_cos
+
+import pandas as pd
 
 license_dao = MongoDao("ProductStandard")
-license_infernece = ClipModelInference('李宁')
+# license_infernece = ClipModelInference('全棉时代')
+license_infernece = ClipCompareModelInference()
 
 class ApiService:
     agent = Agent()
@@ -27,7 +30,11 @@ class ApiService:
             logos_info = json5.loads(ApiService.agent.get_log_from_product_images(url_data['product_images']))
             if logos_info['is_contained_logo']:
                 for log_name in logos_info['logo_infos']:
-                    if log_name.strip() != basic_data['brand_name'] and log_name.strip() != 'LI-NING':
+                    # if log_name.strip() != basic_data['brand_name'] and log_name.strip() != 'LI-NING':
+                    #     return True
+                    if log_name.strip() != basic_data['brand_name'] and log_name.strip() != 'Purcotton':
+                        print(f"brand_name:{basic_data['brand_name']}")
+                        print(f"log_name:{log_name}")
                         return True
             
             # 与授权商品对比
@@ -39,8 +46,9 @@ class ApiService:
             
             if len(url_data['product_images']) != 0:
                 for image_url in url_data['product_images']:
+                    # product_image = load_image_from_cos(image_url).resize((512, 512))
                     product_image = load_image_from_url(image_url).resize((512, 512))
-                    similarity_map = license_infernece.calulate_similarity(product_image)
+                    similarity_map = license_infernece.calculate_similarity(product_image, basic_data['product_images'])
                     if similarity_map[0]['similarity'] >= 90.0:
                         return False
                     
@@ -139,30 +147,31 @@ class ApiService:
         result = json5.loads(ApiService.agent.license_product_judgement(url_data['title'], license_list))
         return result['in_list']
     
+    
 if __name__ == '__main__':
-    # url_data = {
-    #     'title': '休闲短裤女士运动潮流系列夏季女装裤子梭织运动裤',
-    #     'brand_name': '李宁',
-    #     'product_images': [
-    #         'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/0902/118/27466cf6-fb28-4580-9009-95a3763e06bf.jpg',
-    #         'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/169/8ca15632-9cb9-40e7-8915-e6773e17a05e.jpg'  
-    #     ],
-    #     'price': 199
-    # }
-    # basic_data = {
-    #     'product_name': '休闲短裤女士运动潮流系列夏季女装裤子梭织运动裤',
-    #     'brand_name': '李宁',
-    #     'similarity_logos': ['李宇', '李柠'],
-    #     'product_images': [
-    #         'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/169/8ca15632-9cb9-40e7-8915-e6773e17a05e.jpg',
-    #         'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/169/8ca15632-9cb9-40e7-8915-e6773e17a05e.jpg' 
-    #     ],
-    #     'base_price': 200,
-    #     'price_percent': 0.9
+    url_data = {
+        'title': '休闲短裤女士运动潮流系列夏季女装裤子梭织运动裤',
+        'brand_name': '李宁',
+        'product_images': [
+            'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/0902/118/27466cf6-fb28-4580-9009-95a3763e06bf.jpg',
+            'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/169/8ca15632-9cb9-40e7-8915-e6773e17a05e.jpg'  
+        ],
+        'price': 199
+    }
+    basic_data = {
+        'product_name': '休闲短裤女士运动潮流系列夏季女装裤子梭织运动裤',
+        'brand_name': '李宁',
+        'similarity_logos': ['李宇', '李柠'],
+        'product_images': [
+            'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/169/8ca15632-9cb9-40e7-8915-e6773e17a05e.jpg',
+            'http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/169/8ca15632-9cb9-40e7-8915-e6773e17a05e.jpg' 
+        ],
+        'base_price': 200,
+        'price_percent': 0.9
         
-    # }
-    # result = ApiService.low_price_judgement(url_data, basic_data)
-    # print(result)
+    }
+    result = ApiService.low_price_judgement(url_data, basic_data)
+    print(result)
     product_list = [
             "https://gw.alicdn.com/imgextra/O1CN01SRLgJ11JdrhxIPXGd_!!3378851052.jpg_q95.jpg_.webp",
             "https://img.alicdn.com/imgextra/i2/3378851052/O1CN01UpWwyY1JdrUq6MJvu_!!3378851052.jpg_q75.jpg_.webp",

+ 2 - 2
utils/service.py

@@ -4,7 +4,7 @@ from data import BrandInfo
 import gradio as gr
 import json
 import pandas as pd
-from utils import load_image_from_url
+from utils import load_image_from_url, load_image_from_cos
 
 product_dao = MongoDao("vbrand-ec")
 license_dao = MongoDao("ProductStandard")
@@ -48,7 +48,7 @@ class Service:
             return [], [["商品未找到", ""]]
         
         # 加载图片
-        images = [load_image_from_url(image_url) for image_url in product.images]
+        images = [load_image_from_cos(image_url) for image_url in product.images]
         
         # 产品信息
         info = [

+ 31 - 2
utils/utils.py

@@ -1,6 +1,7 @@
 import base64
 from io import BytesIO
-from PIL import Image
+from PIL import Image, features
+import pillow_avif
 import requests
 
 def image_to_base(image_path):
@@ -8,6 +9,12 @@ def image_to_base(image_path):
         image_base = base64.b64encode(image_file.read()).decode('utf-8')
     return image_base
 
+def pil_image_to_base64(image, format="JPEG"):
+    """将图像转换为Base64的编码字符串"""
+    buffered = BytesIO()
+    image.save(buffered, format=format)
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+
 def load_image_from_url(image_url):
     """根据url加载图像"""
     headers = {
@@ -17,5 +24,27 @@ def load_image_from_url(image_url):
     }
     response = requests.get(image_url, headers=headers)
     image = Image.open(BytesIO(response.content))
+    return image
+
+def load_image_from_cos(cos_url):
+    """下载腾讯云COS的AVIF文件,并转换为可处理的图像数据"""
+    try:
+        response = requests.get(cos_url)
+        response.raise_for_status() # 检查请求是否成功
+        
+        img = Image.open(BytesIO(response.content))
+        return img
+    except requests.exceptions.RequestException as e:
+        raise requests.exceptions.RequestException(f"下载失败:{e}")
+    except Exception as e:
+        raise ValueError(f"解析AVIF失败:{e}")
+            
+
+if __name__ == "__main__":
+    # url = 'https://img.alicdn.com/imgextra/i1/2212526294503/O1CN01P0qxZL1j8QU5cC4ed_!!2212526294503.jpg_q75.jpg_.webp'
+    # image = load_image_from_url(url)
+    # image.save('./product.png')  # 保存为PNG
     
-    return image
+    cos_url = "https://dev-govern-private-1251740668.cos.ap-guangzhou.myqcloud.com/private/20250723/b9c411c4c2514367afc4dd4ad199eb0d.webp?q-sign-algorithm=sha1&q-ak=AKIDIWXN4kqgpiMm0z4T5VgcKn4KSP8cZwnO&q-sign-time=1753320960%3B1753328160&q-key-time=1753320960%3B1753328160&q-header-list=host&q-url-param-list=&q-signature=f6223c1353c1c97c8e42355625dec94967c9c2f6"
+    image = load_image_from_cos(cos_url)
+    image.save('./product.png')