Prechádzať zdrojové kódy

优化推荐错乱的问题

杨泽宇 3 hodín pred
rodič
commit
3ac302a1f4

+ 9 - 5
api_test.py

@@ -4,16 +4,20 @@ import json
 url = "http://127.0.0.1:7960/brandcultivation/api/v1/recommend"
 payload = {
     "city_uuid": "00000000000000000000000011440801",
-    "product_code": "430121",
-    "recall_cust_count": 30,
-    "delivery_count": 500,
+    "product_code": "440308",
+    "recall_cust_count": 200,
+    "delivery_count": 200,
     "cultivacation_id": "10000001",
-    "limit_cycle_name": "202502W2(02.10-02.16)"
+    "limit_cycle_name": "202603W4(03.21-03.29)"
 }
 headers = {'Content-Type': 'application/json'}
 
 response = requests.post(url, data=json.dumps(payload), headers=headers)
-print(response.json())
+result = response.json()
+print(result)
+
+with open("api_test_result.json", "w", encoding="utf-8") as f:
+    json.dump(result, f, ensure_ascii=False, indent=2)
 
 
 # url = "http://127.0.0.1:7960/brandcultivation/api/v1/report"

+ 2 - 1
database/dao/mysql_dao.py

@@ -152,6 +152,7 @@ class MySqlDao:
         FROM {self._product_tablename}
         WHERE city_uuid = :city_uuid
         AND product_code IN :ids
+        ORDER BY product_code
     """).bindparams(bindparam("ids", expanding=True))
         params = {"city_uuid": city_uuid, "ids": list(product_id_list)}
         data = pd.DataFrame(self.db_helper.fetch_all(query, params))
@@ -254,7 +255,7 @@ class MySqlDao:
         return data
     
     def get_product_from_order(self, city_uuid):
-        query = f"SELECT DISTINCT product_code FROM {self._order_tablename} WHERE city_uuid = :city_uuid"
+        query = f"SELECT DISTINCT product_code FROM {self._order_tablename} WHERE city_uuid = :city_uuid ORDER BY product_code"
         params = {"city_uuid": city_uuid}
         
         data = self.db_helper.load_data_with_page(query, params)

+ 2 - 1
models/item2vec/item2vec.py

@@ -28,7 +28,8 @@ class Item2Vec:
             window=3,
             min_count=1,
             sg=1, # skip-gram
-            workers=4,
+            workers=1,   # 固定为1,保证多线程不引入随机性
+            seed=123456,     # 固定随机种子,确保结果可复现
             epochs=20,
             sample=0.0000001
         )

+ 10 - 3
models/recommend.py

@@ -62,13 +62,20 @@ class Recommend:
         # shop_data = self._dao.get_shop_by_ids(self._city_uuid, recall_cust_list)[ShopConfig.FEATURE_COLUMNS]
         cust_data = sample_data_clear(cust_data, CustConfig)
         # shop_data = sample_data_clear(shop_data, ShopConfig)
-        
+
         # cust_feats = shop_data.set_index("cust_code")
         # cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
-        
+
+        # 按 recall_cust_list 顺序对齐 cust_data,确保 feats_map 行顺序与 recall_list 一致
+        # 否则 get_recommend_list 中 zip(recall_list, scores) 会错配商户ID和分数
+        cust_codes_in_data = set(cust_data["cust_code"].tolist())
+        ordered_recall_list = [c for c in recall_cust_list if c in cust_codes_in_data]
+        cust_order = {code: i for i, code in enumerate(ordered_recall_list)}
+        cust_data = cust_data.sort_values("cust_code", key=lambda x: x.map(cust_order)).reset_index(drop=True)
+
         # 获取推理用的feats_map
         feats_map = generate_feats_map(product_data, cust_data)
-        recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
+        recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, ordered_recall_list)
         # recommend_list = self.filter_recommend_list(recommend_list)
         return recommend_list