Browse Source

logo识别功能测试

Sherlock1011 11 tháng trước cách đây
mục cha
commit
5285f1313f
4 tập tin đã thay đổi với 74 bổ sung1 xóa
  1. 63 1
      agent/glm.py
  2. BIN
      logo/lining.jpg
  3. 5 0
      utils/__init__.py
  4. 6 0
      utils/utils.py

+ 63 - 1
agent/glm.py

@@ -1,4 +1,5 @@
 from config import load_config
+from utils import image_to_base
 from zhipuai import ZhipuAI
 class Glm:
     _instance = None
@@ -17,6 +18,7 @@ class Glm:
         self.model_name = "glm-4-plus"
     
     def text_response(self, query):
+        """文字问答"""
         resonse = self.client.chat.completions.create(
             model=self.model_name,
             messages=[
@@ -28,10 +30,70 @@ class Glm:
         )
         return resonse.choices[0].message
     
+    def image_response(self, query):
+        """单图像问答"""
+        response = self.client.chat.completions.create(
+            model="glm-4v-plus-0111",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": query
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": "http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/613214/2024/1120/88/9b9027dd-95b7-4024-b71e-fb7cbfde16a1.jpg"}
+                        }
+                    ]
+                }
+            ]
+        )
+        
+        return response.choices[0].message
+    
+    def multi_epoch_image_response(self, logo_path):
+        """多轮图像问答"""
+        image_base = image_to_base(logo_path)
+        response = self.client.chat.completions.create(
+            model="glm-4v-plus-0111",
+            messages=[
+                {
+                    "content": [
+                        {
+                            "image_url": {"url": image_base},
+                            "type": "image_url"
+                        },
+                        {
+                            "text": "这是李宁的logo",
+                            "type": "text"
+                        }
+                    ],
+                    "role": "user"
+                },
+                {
+                    "content": [
+                        {
+                            "image_url": {"url": "http://h2.appsimg.com/a.appsimg.com/upload/merchandise/pdcvis/2023/04/14/79/d75fa3db-3bec-45c4-b7a6-54332d42e373.jpg"},
+                            "type": "image_url"
+                        },
+                        {
+                            "text": "第二张图像中的产品是否包含logo,如果包含的话是否是第一张的logo?",
+                            "type": "text"
+                        }
+                    ],
+                    "role": "user"
+                }
+            ]
+        )
+        
+        return response.choices[0].message
+    
     def set_modelname(self, modelname):
         self.model_name = modelname
     
 if __name__ == '__main__':
     glm = Glm()
-    response = glm.response("请帮我编写一段快速排序的代码")
+    response = glm.multi_epoch_image_response("./logo/lining.jpg")
     print(response)

BIN
logo/lining.jpg


+ 5 - 0
utils/__init__.py

@@ -0,0 +1,5 @@
+from utils.utils import image_to_base
+
+__all__ =[
+    "image_to_base"
+]

+ 6 - 0
utils/utils.py

@@ -0,0 +1,6 @@
+import base64
+
+def image_to_base(image_path):
+    with open(image_path, 'rb') as image_file:
+        image_base = base64.b64encode(image_file.read()).decode('utf-8')
+    return image_base