Przeglądaj źródła

文字关键词引流判定

Sherlock1011 1 rok temu
rodzic
commit
2ec3f7cb58
5 zmienionych plików z 167 dodań i 25 usunięć
  1. 9 0
      agent/__init__.py
  2. 28 0
      agent/agent.py
  3. 32 0
      agent/config.py
  4. 11 5
      agent/glm.py
  5. 87 20
      webui.py

+ 9 - 0
agent/__init__.py

@@ -0,0 +1,9 @@
+from agent.glm import Glm
+from agent.config import KeyWordPrompt
+from agent.agent import Agent
+
+__all__ = [
+    "Glm",
+    "KeyWordPrompt",
+    "Agent"
+]

+ 28 - 0
agent/agent.py

@@ -0,0 +1,28 @@
+from agent import Glm, KeyWordPrompt
+class Agent:
+    
+    _instance = None
+    def __new__(cls):
+        if not cls._instance:
+            cls._instance = super(Agent, cls).__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if not self._initialized:
+            self.glm = Glm()
+            self._initialized = True
+            
+    def brand_key_word_judgement(self, brandname, title):
+        self.glm.set_modelname("glm-4-plus")
+        prompt = KeyWordPrompt.EXTRACT_INFO_FROM_TITLE + f"""
+            请根据上述逻辑,分析以下商品标题,并输出结果:
+            商品标题:{brandname}
+            给定的引流品牌:{title}"""
+            
+        response = self.glm.text_response(prompt)
+        return response.content
+
+if __name__ == "__main__":
+    agent = Agent()
+    agent.brand_key_word_judgement("【防泼水】荷叶风衣连帽加绒外套防风外套保暖户外运动服女外套", "李宁")

+ 32 - 0
agent/config.py

@@ -0,0 +1,32 @@
+class KeyWordPrompt:
+    EXTRACT_INFO_FROM_TITLE = f"""
+         你是一个电商数据分析助手,负责从商品标题中提取实际售卖品牌和产品款式等信息,并根据给定的引流品牌判断是否涉嫌关键词引流。关键词引流的定义是:商品标题中包含给定的引流品牌名称,但实际销售的是另一个品牌的产品。请根据以下步骤进行分析:
+
+        1. **提取实际售卖品牌**:从商品标题中提取出实际售卖的品牌名称。实际售卖品牌通常是标题中明确提到的品牌,或者是商品描述中明确指出的品牌。
+
+        2. **判断引流行为**:如果标题中提取的实际售卖品牌与给定的引流品牌不一致,且标题中包含给定的引流品牌名称,则判定为涉嫌关键词引流。
+
+        3. **输出结果**:请输出以下信息:
+            - 提取出的实际售卖品牌(title_brand_name)
+            - 给定的引流品牌(brand_name)
+            - 是否涉嫌关键词引流(是/否)(key_word_flag:true of false)
+            以json的格式输出
+
+        示例:
+            - 商品标题:今典66w快充头华为充电器
+            - 给定的引流品牌:华为
+        - 输出结果:
+        ```json
+        {{
+            "title_brand_name": "今典",
+            "brand_name": "华为",
+            "product_style": "66w快充头",
+            "key_word_flag": true
+        }}
+        ```
+    """
+    
+    
+    
+if __name__ == "__main__":
+    print(KeyWordPrompt.EXTRACT_INFO_FROM_TITLE)

+ 11 - 5
agent/glm.py

@@ -2,30 +2,36 @@ from config import load_config
 from zhipuai import ZhipuAI
 class Glm:
     _instance = None
-    def __new__(cls, model_name):
+    def __new__(cls):
         if not cls._instance:
             cls._instance = super(Glm, cls).__new__(cls)
             cls._instance._initialized = False
         return cls._instance
     
-    def __init__(self, model_name):
+    def __init__(self):
         if not self._initialized:
             self.cfg = load_config()['glm']
             self.client = ZhipuAI(api_key=self.cfg["api_key"])
             self._initialized = True
             
-        self.model_name = model_name
+        self.model_name = "glm-4-plus"
     
-    def response(self, query):
+    def text_response(self, query):
         resonse = self.client.chat.completions.create(
             model=self.model_name,
             messages=[
                 {"role": "user", "content": query}
             ],
+            response_format= {
+                'type': 'json_object'
+            }
         )
         return resonse.choices[0].message
     
+    def set_modelname(self, modelname):
+        self.model_name = modelname
+    
 if __name__ == '__main__':
-    glm = Glm("glm-4-flash")
+    glm = Glm()
     response = glm.response("请帮我编写一段快速排序的代码")
     print(response)

+ 87 - 20
webui.py

@@ -1,34 +1,91 @@
 import gradio as gr
 from db import MongoDao
+import requests
+from PIL import Image
+from io import BytesIO
+from agent.agent import Agent
+import json
 
-dao = MongoDao("obrand-ec")
+agent = Agent()
+
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
+    "Referer": "https://www.aliexpress.com/",
+    "Accept-Language": "en-US,en;q=0.9",
+}
+
+dao = MongoDao("vbrand-ec")
 
 def get_merchant_list():
     """ 返回商户列表,显示 title,存储 outId """
-    merchant_data = dao.get_fields_data(["outId", "title"])[:50]
-    merchant_dict = {m["title"]: m["outId"] for m in merchant_data}
-    return merchant_dict
+    merchant_data = [item["title"] for item in dao.get_fields_data(["title"])]
+    # merchant_dict = {m["title"]: m["outId"] for m in merchant_data}
+    return merchant_data
 
-def display_outid(title):
-    """ 根据商户名称返回对应的 outId """
-    merchant_dict = get_merchant_list()
-    return merchant_dict.get(title, "未找到对应 ID")
+# def display_cust_info(title):
+#     """ 根据商户名称返回对应的 outId """
+#     merchant_dict = get_merchant_list()
+#     cust_id = merchant_dict.get(title, "-1")
+#     if cust_id != '-1':
+#         return get_cust_info(cust_id)
+#     else:
+#         return "未找到对应 ID", None
+    
+def load_image(image_url):
+    response = requests.get(image_url, headers=headers)
+    image = Image.open(BytesIO(response.content))
+    return image
 
-def check_infringement(merchant):
+def get_cust_info(title):
+    record = dao.get_one_record_by_query({"title": title})
+    if record == None:
+        return "title不正确", None
+    res = f"""
+            商品名称:\t{record["title"]}\n
+            平台:\t{record["platFormName"]}\n
+            品牌:\t{record["brandName"]}\n
+            价格:\t{record["price"]}\n
+            链接:\t{record["url"]}\n
+    """
+    image_url = record["image"][0]
+    image = load_image(image_url)
+    return res, image
+
+def check_infringement(title, brandname):
     """ 模拟侵权检测逻辑 """
-    return f"商户 {merchant} 的侵权检测结果:未发现侵权"
+    if brandname not in title:
+        title = brandname + title
+        
+    key_word_judgement = json.loads(agent.brand_key_word_judgement(brandname, title))
+    
+    result = f"""
+        关键词引流: {key_word_judgement["key_word_flag"]}
+    
+    """
+    return result
+
+def search_by_cust_id(cust_id):
+    if cust_id == '':
+        return None, None
+    else:
+        return get_cust_info(cust_id)
+
+# merchant_dict = get_merchant_list()
+# merchant_list_titles = list(merchant_dict.keys())
+merchant_list_titles = get_merchant_list()
 
-merchant_dict = get_merchant_list()
-merchant_list_titles = list(merchant_dict.keys())
+# 确保商户列表不为空
+default_merchant = merchant_list_titles[0] if merchant_list_titles else None
+default_cust_info, default_image = get_cust_info(default_merchant)
 
 with gr.Blocks() as demo:
     gr.Markdown("## 侵权识别系统", elem_id="header")
     
     with gr.Row():
-        # 左侧部分
         with gr.Column():
+            brand_state = gr.State(value="李宁")
             brand_dropdown = gr.Dropdown(
-                ["李宁", "耐克", "阿迪达斯", "彪马"],
+                ["李宁"],
                 label="品牌选择", 
                 value="李宁",
                 interactive=True)
@@ -39,21 +96,31 @@ with gr.Blocks() as demo:
             merchant_list = gr.Dropdown(
                 merchant_list_titles, 
                 label="商户列表", 
+                value=default_merchant,  # 设置默认值
                 interactive=True
             )
             
             check_button = gr.Button("查询侵权")
         
-        # 右侧部分
         with gr.Column():
             with gr.Row():
-                image_display = gr.Image(label="商品图片", interactive=False)
-                product_info = gr.Textbox(label="商品信息", interactive=False)
+                image_display = gr.Image(label="商品图片", interactive=False, type='pil', value=default_image)
+                product_info = gr.Textbox(
+                    label="商品信息",
+                    interactive=False,
+                    value=default_cust_info  # 预填充默认商户信息
+                )
             
             infringement_result = gr.Textbox(label="侵权识别结果", interactive=False)
     
     # 事件绑定
-    merchant_list.change(display_outid, inputs=merchant_list, outputs=product_info)
-    check_button.click(check_infringement, inputs=merchant_list, outputs=infringement_result)
+    brand_dropdown.change(
+        fn=lambda x: x,
+        inputs=brand_dropdown,
+        outputs=brand_state
+    )
+    search_button.click(search_by_cust_id, inputs=search_box, outputs=[product_info, image_display])
+    merchant_list.change(get_cust_info, inputs=merchant_list, outputs=[product_info, image_display])
+    check_button.click(check_infringement, inputs=[merchant_list, brand_state], outputs=infringement_result)
 
-demo.launch(share=True)
+demo.launch(server_name = "0.0.0.0", server_port = 7860)