Jelajahi Sumber

封装item2vec冷启动推理流程

yangzeyu 11 bulan lalu
induk
melakukan
d28d803f6c
6 mengubah file dengan 76 tambahan dan 16 penghapusan
  1. 0 3
      gnerate_report.py
  2. 0 3
      inference.py
  3. 3 1
      models/__init__.py
  4. 1 1
      models/recall/itemCF/score.py
  5. 42 8
      models/recommend.py
  6. 30 0
      report.py

+ 0 - 3
gnerate_report.py

@@ -75,9 +75,6 @@ def get_recommend_list_by_gbdt_lr(city_uuid, product_id):
     return recommend_list
     
 
-def gbdt_lr_inference(city_uuid, product_id):
-    pass
-
 def generate_features_shap(city_uuid, product_id, delivery_count):
     feats_sample, filter_dict, cust_list = generate_recommend_sample(city_uuid, product_id)
     

+ 0 - 3
inference.py

@@ -1,3 +0,0 @@
-from database.db.redis_db import RedisDatabaseHelper
-
-

+ 3 - 1
models/__init__.py

@@ -4,10 +4,12 @@ from models.recall.hot_recall import HotRecallModel
 from models.recall.itemCF.score import UserItemScore
 from models.recall.itemCF.similarity_matrix import SimilarityMatrix
 from models.recall.itemCF.ItemCF import ItemCFModel
+from models.recommend import Recommend
 
 __all__ = [
     "HotRecallModel",
     "UserItemScore",
     "SimilarityMatrix",
-    "ItemCFModel"
+    "ItemCFModel",
+    "Recommend"
 ]

+ 1 - 1
models/recall/itemCF/score.py

@@ -13,7 +13,7 @@ class UserItemScore:
         
         # 数据清洗
         self._order_data["sale_qty"] = self._order_data["sale_qty"].fillna(0)
-        self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()
+        self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].mean()
         self._order_data = self._order_data[self._order_data["sale_qty"] != 0]
         
         # 归一化处理

+ 42 - 8
recommend.py → models/recommend.py

@@ -5,6 +5,7 @@ from models.item2vec.inference import Item2VecModel
 from models.rank.data.config import CustConfig, ProductConfig, ShopConfig
 from models.rank.data.utils import sample_data_clear
 from models.rank.gbdt_lr_inference import GbdtLrModel
+import pandas as pd
 
 
 class Recommend:
@@ -33,7 +34,7 @@ class Recommend:
         recall_list = self._redis.zrevrange(key, 0, -1, withscores=False)
         return recall_list
     
-    def _get_recal_cust(self, product_id, recall_count):
+    def get_recal_cust(self, product_id, recall_count):
         """通过协同过滤和热度召回,召回待推荐商户列表"""
         itemcf_recall_list = self._get_itemcf_recall(product_id)
         hot_recall_list =  self._get_hot_recall()
@@ -48,11 +49,10 @@ class Recommend:
             
         return result[:recall_count]
     
-    def get_recommend_list_by_gbdtlr(self, product_id, recall_count=100, discovery_count=500):
+    def get_recommend_list_by_gbdtlr(self, product_id, recall_count=500):
         """根据gbdt_lr获取商户推荐列表"""
         # 获取召回的商户列表
-        recall_cust_list = self._get_recal_cust(product_id, recall_count)
-        print(len(recall_cust_list))
+        recall_cust_list = self.get_recal_cust(product_id, recall_count)
         # 获取卷烟数据
         product_data = self._dao.get_product_by_id(self._city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
         product_data = sample_data_clear(product_data, ProductConfig)
@@ -68,13 +68,47 @@ class Recommend:
         
         # 获取推理用的feats_map
         feats_map = self._gbdtlr_model.generate_feats_map(product_data, cust_data)
-        print(len(cust_data))
         recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
-        
         return recommend_list
     
+    def get_recommend_list_by_item2vec(self, product_id, recall_count=500):
+        """根据item2vec获取商户推荐列表"""
+        recommend_list = self._item2vec_model.get_recommend_cust_list(product_id, top=recall_count)
+        recommend_list = recommend_list.drop(columns=["sale_qty"])
+        recommend_list = recommend_list.to_dict(orient='records')
+        return recommend_list[:recall_count]
+    
+    def get_recommend_and_delivery(self, recommend_list, delivery_count=5000):
+        """根据推荐列表生成投放分配"""
+        recommend_data = pd.DataFrame(recommend_list)
+        # 1. 计算每个商户的理论应得数量(带小数)
+        recommend_data["delivery_float"] = (
+            recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
+        )
+
+        # 2. 向下取整得到基础配额
+        recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
+
+        # 3. 计算余数并排序
+        recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
+        recommend_data = recommend_data.sort_values("remainder", ascending=False)
+
+        # 4. 将剩余配额按余数从大到小分配
+        remaining = delivery_count - recommend_data["delivery_count"].sum()
+        recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
+        
+        recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
+        recommend_data = recommend_data.sort_values("recommend_score", ascending=False)
+        
+        recommend_data = recommend_data.to_dict(orient='records')
+        return recommend_data
+        
+    
 if __name__ == "__main__":
     city_uuid = "00000000000000000000000011445301"
-    product_id = '110110'
+    product_id = '350139'
     recommend = Recommend(city_uuid)
-    recommend_list = recommend.get_recommend_list_by_gbdtlr(product_id)
+    recommend_list = recommend.get_recommend_list_by_item2vec(product_id)
+    recommend_data = recommend.get_recommend_and_delivery(recommend_list)
+    for i in recommend_data:
+        print(i)

+ 30 - 0
report.py

@@ -0,0 +1,30 @@
+from database.dao.mysql_dao import MySqlDao
+from models import Recommend
+
+class ReportUtils:
+    def __init__(self, city_uuid):
+        self._recommend_model = Recommend(city_uuid)
+        self._city_uuid = city_uuid
+        self._dao = MySqlDao()
+        
+    def _get_recommend_cust_list(self, product_id):
+        """获取推荐商户列表"""
+        # 判断product_id是否是新品规
+        products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
+        recall_count = 1000 # 参数调整
+        if product_id in products_in_order:
+            recommend_list = self._recommend_model.get_recommend_list_by_gbdtlr(product_id, recall_count=recall_count)
+        else:
+            recommend_list = self._recommend_model.get_recommend_list_by_item2vec(product_id, recall_count=recall_count)
+        recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
+        
+        return recommend_list
+    
+    
+if __name__ == "__main__":
+    city_uuid = "00000000000000000000000011445301"
+    product_id = '350139'
+    report = ReportUtils(city_uuid)
+    recommend_list = report._get_recommend_cust_list(product_id)
+    
+