Просмотр исходного кода

refactor(utils): remove delivery_count from recommend report, pass cust_code_list through report pipeline

Sherlock 1 неделя назад
Родитель
Сommit
0ebb18af4d
2 измененных файлов с 33 добавлено и 71 удалено
  1. 22 38
      utils/report_utils.py
  2. 11 33
      utils/reports_process.py

+ 22 - 38
utils/report_utils.py

@@ -7,7 +7,7 @@ from core import get_logger
 
 import os
 import pandas as pd
-from utils.reports_process import feats_relation_process, calculate_delivery_by_recommend_data, eval_report_process_pre, eval_report_process
+from utils.reports_process import feats_relation_process, build_recommend_report, eval_report_process_pre, eval_report_process
 
 logger = get_logger("utils.report")
 
@@ -23,41 +23,28 @@ class ReportUtils:
         if not os.path.exists(self._save_dir):
             os.makedirs(self._save_dir)
         
-    def _get_recommend_data(self, recall_count):
+    def _get_recommend_data(self, cust_code_list):
         """获取推荐商户列表"""
-        # 判断product_id是否是新品规
         products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
-        # recall_count = 100 # 参数调整
         if self._product_id in products_in_order:
-            recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(self._product_id, recall_count=recall_count)
+            recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(
+                self._product_id, cust_code_list=cust_code_list
+            )
         else:
-            recommend_data = self._recommend_model.get_recommend_list_by_item2vec(self._product_id, recall_count=recall_count)
-        
-        # # 根据推荐列表获取商户售卖卷烟的月均销量总和
-        # recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
-        # order_data = self._dao.get_order_by_cust(self._city_uuid, )
+            recommend_data = self._recommend_model.get_recommend_list_by_item2vec(
+                self._product_id, cust_code_list=cust_code_list
+            )
         return recommend_data
-    
-    def _generate_feats_map(self, recall_count):
+
+    def _generate_feats_map(self, cust_code_list):
         """根据召回的推荐列表生成品规-商户features_map"""
-        recommend_data = self._get_recommend_data(recall_count)
+        recommend_data = self._get_recommend_data(cust_code_list)
         recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
-        # 获取卷烟的信息
         product_data = self._product_data.copy()
-        
-        # 根据cust_lit获取商户信息和商圈信息
         cust_data = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)[CustConfig.FEATURE_COLUMNS]
-        # shop_data = self._dao.get_shop_by_ids(self._city_uuid, recommend_list)[ShopConfig.FEATURE_COLUMNS]
-        
         product_data = sample_data_clear(product_data, ProductConfig)
         cust_data = sample_data_clear(cust_data, CustConfig)
-        # shop_data = sample_data_clear(shop_data, ShopConfig)
-        
-        # cust_feats = shop_data.set_index("cust_code")
-        # cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
-        
         feats_map = generate_feats_map(product_data, cust_data)
-        
         return feats_map
     
     def _get_product_content(self):
@@ -66,15 +53,13 @@ class ReportUtils:
         filter_dict = product_data.to_dict('records')[0]
         return filter_dict
     
-    def generate_feats_ralation_report(self, recall_count):
+    def generate_feats_ralation_report(self, cust_code_list):
         """生成特征相关性分析报告"""
         logger.info("Generating feature relation report")
-        feats_map = self._generate_feats_map(recall_count)
+        feats_map = self._generate_feats_map(cust_code_list)
         product_content = self._get_product_content()
-        # 计算SHAP值
         shap_result = self._recommend_model._gbdtlr_model.generate_shap_interance(feats_map)
         report = feats_relation_process(shap_result, product_content)
-
         report.to_excel(os.path.join(self._save_dir, "品规商户特征关系表.xlsx"), index=False)
         logger.info("Feature relation report saved")
         
@@ -88,13 +73,13 @@ class ReportUtils:
                     file.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
         logger.info("Product report saved")
                     
-    def generate_recommend_report(self, recall_count, delivery_count):
-        """生成推荐报告,包括投放量"""
+    def generate_recommend_report(self, cust_code_list):
+        """生成推荐报告"""
         logger.info("Generating recommend report")
-        recommend_data = self._get_recommend_data(recall_count)
+        recommend_data = self._get_recommend_data(cust_code_list)
         recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
         recommend_cust_infos = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)
-        report = calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, delivery_count)
+        report = build_recommend_report(recommend_data, recommend_cust_infos)
 
         report.to_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"), index=False)
         logger.info("Recommend report saved")
@@ -150,20 +135,19 @@ class ReportUtils:
         delivery_data = delivery_data[DeliveryConfig.FEATURE_COLUMNS]
         delivery_data = sample_data_clear(delivery_data, DeliveryConfig)
 
-        recommend_data = recommend_data.drop(columns=["建议投放量(条)"])
-        
+        recommend_data = recommend_data.drop(columns=["建议投放量(条)"], errors="ignore")
+
         report = eval_report_process(delivery_data, recommend_data)
         
         report.to_excel(os.path.join(self._save_dir, "投放验证报告.xlsx"), index=False)
         logger.info("Eval report saved")
     
-    def generate_all_data(self, recall_count, delivery_count):
+    def generate_all_data(self, cust_code_list):
         logger.info("Generating all reports")
-        self.generate_feats_ralation_report(recall_count)
+        self.generate_feats_ralation_report(cust_code_list)
         self.generate_product_report()
-        self.generate_recommend_report(recall_count, delivery_count)
+        self.generate_recommend_report(cust_code_list)
         self.generate_similarity_product_report()
-        # self.generate_eval_data()
         logger.info("All reports generated")
         
 if __name__ == "__main__":

+ 11 - 33
utils/reports_process.py

@@ -30,46 +30,24 @@ def feats_relation_process(shap_result, product_content):
     )
     return report
 
-def calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, delivery_count):
-    """根据推荐数据计算投放量,并生成推荐商户报告"""
+def build_recommend_report(recommend_data, recommend_cust_infos):
+    """根据推荐数据生成推荐商户报告(不含投放量)"""
     recommend_data = pd.DataFrame(recommend_data)
-    
+
     cust_ids = recommend_cust_infos.set_index("cust_code")
     recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
     recommend_data = recommend_data[["cust_code", "cust_name", "recommend_score"]]
-   # 1. 计算每个商户的理论应得数量(带小数)
-    recommend_data["delivery_float"] = (
-        recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
-    )
-
-    # 2. 向下取整得到基础配额
-    recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
-
-    # 3. 计算余数并排序
-    recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
-    recommend_data = recommend_data.sort_values("remainder", ascending=False)
-
-    # 4. 将剩余配额按余数从大到小分配
-    remaining = delivery_count - recommend_data["delivery_count"].sum()
-    recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
-    
-    recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
+    recommend_data = recommend_data.reset_index(drop=True)
+    recommend_data.index = recommend_data.index + 1
     recommend_data = recommend_data.reset_index()
-    # 5. 按recommend_score从大到小重新排序
-    recommend_data = recommend_data.sort_values("index")
-    # recommend_data["sale_qty"] = recommend_data["sale_qty"].round(0).astype(int) # 将月均销量四舍五入取整
     recommend_data = recommend_data.rename(
         columns={
-            "index": "推荐序号", 
-            "cust_code": "商户编号", 
-            "cust_name": "商户名称", 
-            # "sale_qty": "历史月均销量", 
-            "recommend_score": "推荐系数", 
-            "delivery_count": "建议投放量(条)"
-            }
-        )
-    recommend_data["推荐序号"] = recommend_data["推荐序号"] + 1
-    
+            "index": "推荐序号",
+            "cust_code": "商户编号",
+            "cust_name": "商户名称",
+            "recommend_score": "推荐系数",
+        }
+    )
     return recommend_data
 
 def eval_report_process_pre(eval_order_data, recommend_data):