Pārlūkot izejas kodu

refactor(models): remove hotness fallback, merge cust_code_list into ItemCF recall, remove delivery allocation

Sherlock 1 nedēļu atpakaļ
vecāks
revīzija
35c3d1a911
1 mainītis faili ar 16 papildinājumiem un 52 dzēšanām
  1. 16 52
      models/recommend.py

+ 16 - 52
models/recommend.py

@@ -32,33 +32,22 @@ class Recommend:
         recall_list = self._redis.zrevrange(key, 0, -1, withscores=False)
         return recall_list
     
-    def _get_hot_recall(self):
-        """热度召回"""
-        key = f"hot:{self._city_uuid}:sale_qty"
-        recall_list = self._redis.zrevrange(key, 0, -1, withscores=False)
-        return recall_list
-    
-    def get_recal_cust(self, product_id, recall_count):
-        """通过协同过滤和热度召回,召回待推荐商户列表"""
+    def get_recal_cust(self, product_id, cust_code_list):
+        """通过协同过滤召回与核心零售户列表取并集,得到待推荐商户列表"""
         itemcf_recall_list = self._get_itemcf_recall(product_id)
-        hot_recall_list =  self._get_hot_recall()
-        
-        result = list(dict.fromkeys(itemcf_recall_list))
-        # 如果结果不足,从hot_recall中补齐
-        if len(result) < recall_count:
-            hot_recall_set = set(hot_recall_list) - set(result)
-            additional_items = [item for item in hot_recall_list if item in hot_recall_set]
-            needed = recall_count - len(result)
-            result.extend(additional_items[:needed])
+        seen = set(itemcf_recall_list)
+        extra = [c for c in cust_code_list if c not in seen]
+        result = list(itemcf_recall_list) + extra
+        logger.info(f"Recall completed: {len(result)} customers (itemcf={len(itemcf_recall_list)}, core_extra={len(extra)}) for product {product_id}")
+        return result
 
-        logger.info(f"Recall completed: {len(result)} customers for product {product_id}")
-        return result[:recall_count]
-    
-    def get_recommend_list_by_gbdtlr(self, product_id, recall_count=500):
+    def get_recommend_list_by_gbdtlr(self, product_id, cust_code_list=None):
         """根据gbdt_lr获取商户推荐列表"""
         logger.info(f"GBDT-LR recommend started for product {product_id}")
         # 获取召回的商户列表
-        recall_cust_list = self.get_recal_cust(product_id, recall_count)
+        if cust_code_list is None:
+            cust_code_list = []
+        recall_cust_list = self.get_recal_cust(product_id, cust_code_list)
         # 获取卷烟数据
         product_data = self._dao.get_product_by_id(self._city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
         product_data = sample_data_clear(product_data, ProductConfig)
@@ -86,13 +75,14 @@ class Recommend:
         logger.info(f"GBDT-LR recommend completed: {len(recommend_list)} results")
         return recommend_list
     
-    def get_recommend_list_by_item2vec(self, product_id, recall_count=500):
-        """根据item2vec获取商户推荐列表"""
+    def get_recommend_list_by_item2vec(self, product_id, cust_code_list=None):
+        """根据item2vec获取商户推荐列表,核心商户并入候选集统一评分"""
+        if cust_code_list is None:
+            cust_code_list = []
         logger.info(f"Item2Vec recommend started for product {product_id}")
-        recommend_list = self._item2vec_model.get_recommend_cust_list(product_id, top=recall_count)
+        recommend_list = self._item2vec_model.get_recommend_cust_list(product_id, cust_code_list=cust_code_list)
         recommend_list = recommend_list.drop(columns=["sale_qty"])
         recommend_list = recommend_list.to_dict(orient='records')
-        recommend_list = recommend_list[:recall_count]
         # recommend_list = self.filter_recommend_list(recommend_list)
         logger.info(f"Item2Vec recommend completed: {len(recommend_list)} results")
         return recommend_list
@@ -105,34 +95,8 @@ class Recommend:
             if item["cust_code"] in cust_set
         ]
         return filter_recommend_list
-    
-    def get_recommend_and_delivery(self, recommend_list, delivery_count=5000):
-        """根据推荐列表生成投放分配"""
-        recommend_data = pd.DataFrame(recommend_list)
-        # 1. 计算每个商户的理论应得数量(带小数)
-        recommend_data["delivery_float"] = (
-            recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
-        )
 
-        # 2. 向下取整得到基础配额
-        recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
 
-        # 3. 计算余数并排序
-        recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
-        recommend_data = recommend_data.sort_values(["remainder", "cust_code"], ascending=[False, True])
-
-        # 4. 将剩余配额按余数从大到小分配
-        remaining = delivery_count - recommend_data["delivery_count"].sum()
-        recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
-        
-        recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
-        recommend_data = recommend_data.sort_values(["recommend_score", "cust_code"], ascending=[False, True])
-        
-        recommend_data = recommend_data.to_dict(orient='records')
-        logger.info(f"Delivery allocation completed for {len(recommend_data)} customers, total={delivery_count}")
-        return recommend_data
-        
-    
 if __name__ == "__main__":
     city_uuid = "00000000000000000000000011445301"
     product_id = '350139'