Procházet zdrojové kódy

重新封装report

yangzeyu před 11 měsíci
rodič
revize
158faac723
4 změnil soubory, kde provedl 141 přidání a 135 odebrání
  1. 4 0
      api.py
  2. 2 135
      report.py
  3. 135 0
      utils/report_utils.py
  4. 0 0
      utils/reports_process.py

+ 4 - 0
api.py

@@ -62,5 +62,9 @@ def recommend(request: RecommendRequest):
         
     return {"code": 200, "msg": "success", "data": {"recommendationInfo": request_data}}
 
+def generate_report(city_uuid, product_id, recall_count, delivery_count):
+    """生成报告"""
+    
+
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)

+ 2 - 135
report.py

@@ -1,135 +1,6 @@
-from database.dao.mysql_dao import MySqlDao
-from models import Recommend
-from models.rank.data.config import CustConfig, ImportanceFeaturesMap, ProductConfig, ShopConfig
-from models.rank.data.utils import sample_data_clear
-from models.rank import generate_feats_map
 import os
 import argparse
-import pandas as pd
-from utils.reports_utils import feats_relation_process, calculate_delivery_by_recommend_data, eval_report_process
-
-class ReportUtils:
-    def __init__(self, city_uuid, product_id):
-        self._recommend_model = Recommend(city_uuid)
-        self._city_uuid = city_uuid
-        self._product_id = product_id
-        self._dao = MySqlDao()
-        self._product_data =  self._dao.get_product_by_id(self._city_uuid, self._product_id)[ProductConfig.FEATURE_COLUMNS]
-        
-    def _get_recommend_data(self, args):
-        """获取推荐商户列表"""
-        # 判断product_id是否是新品规
-        products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
-        # recall_count = 100 # 参数调整
-        if self._product_id in products_in_order:
-            recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(self._product_id, recall_count=args.recall_count)
-        else:
-            recommend_data = self._recommend_model.get_recommend_list_by_item2vec(self._product_id, recall_count=args.recall_count)
-        
-        # # 根据推荐列表获取商户售卖卷烟的月均销量总和
-        # recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
-        # order_data = self._dao.get_order_by_cust(self._city_uuid, )
-        return recommend_data
-    
-    def _generate_feats_map(self, args):
-        """根据召回的推荐列表生成品规-商户features_map"""
-        recommend_data = self._get_recommend_data(args)
-        recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
-        # 获取卷烟的信息
-        product_data = self._product_data.copy()
-        
-        # 根据cust_lit获取商户信息和商圈信息
-        cust_data = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)[CustConfig.FEATURE_COLUMNS]
-        shop_data = self._dao.get_shop_by_ids(self._city_uuid, recommend_list)[ShopConfig.FEATURE_COLUMNS]
-        
-        product_data = sample_data_clear(product_data, ProductConfig)
-        cust_data = sample_data_clear(cust_data, CustConfig)
-        shop_data = sample_data_clear(shop_data, ShopConfig)
-        
-        cust_feats = shop_data.set_index("cust_code")
-        cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
-        
-        feats_map = generate_feats_map(product_data, cust_data)
-        
-        return feats_map
-    
-    def _get_product_content(self):
-        """获取品规的内容,并以字典的形式返回"""
-        product_data = self._product_data.copy()
-        filter_dict = product_data.to_dict('records')[0]
-        return filter_dict
-    
-    def generate_feats_ralation_report(self, args):
-        """生成特征相关性分析报告"""
-        feats_map = self._generate_feats_map(args)
-        product_content = self._get_product_content()
-        # 计算SHAP值
-        shap_result = self._recommend_model._gbdtlr_model.generate_shap_interance(feats_map)
-        report = feats_relation_process(shap_result, product_content)
-        
-        report.to_excel(os.path.join(args.report_dir, "品规商户特征关系表.xlsx"), index=False)
-        
-    def generate_product_report(self, args):
-        """生成推荐品规信息表"""
-        product_data = self._get_product_content()
-        with open(os.path.join(args.report_dir, "卷烟信息表.xlsx"), "w", encoding='utf-8-sig') as file:
-            for key, value in product_data.items():
-                if key != 'product_code':
-                    file.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
-                    
-    def generate_recommend_report(self, args):
-        """生成推荐报告,包括投放量"""
-        recommend_data = self._get_recommend_data(args)
-        recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
-        recommend_cust_infos = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)
-        report = calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, args.delivery_count)
-        
-        report.to_excel(os.path.join(args.report_dir, "商户售卖推荐表.xlsx"), index=False)
-        
-    def generate_similarity_product_report(self, args):
-        """生成相似卷烟表"""
-        product_similarity_map = self._recommend_model._item2vec_model.generate_product_similarity_map(self._product_id)
-        product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
-        product_similarity_map = product_similarity_map.rename(
-            columns={
-                "product_name":                 "卷烟名称",
-                "similarity":                   "相似度",
-                "factory_name":                 "生产厂商",
-                "brand_name":                   "品牌名称",
-                "is_low_tar":                   "低焦油卷烟",
-                "is_medium":                    "中支烟",
-                "is_tiny":                      "细支烟",
-                "is_coarse":                    "粗支烟",
-                "is_exploding_beads":           "爆珠烟",
-                "is_abnormity":                 "异形包装",
-                "is_cig":                       "雪茄烟",
-                "is_chuangxin":                 "创新品类",
-                "direct_retail_price":          "卷烟建议零售价",
-                "tbc_total_length":             "烟支总长度",
-                "product_style":                "包装类型",
-            }
-        )
-        product_similarity_map.to_excel(os.path.join(args.report_dir, "相似卷烟表.xlsx"), index=False)
-        
-    def generate_eval_data(self, args):
-        if self._product_id == '350139':
-            eval_product_id = "350355"
-        else:
-            eval_product_id = self._product_id
-        eval_order_data = self._dao.get_eval_order_by_product(self._city_uuid, eval_product_id)
-        if not os.path.exists(os.path.join(args.report_dir, "商户售卖推荐表.xlsx")):
-            print("请先生成'商户售卖推荐表'")
-        recommend_data = pd.read_excel(os.path.join(args.report_dir, "商户售卖推荐表.xlsx"))
-        report = eval_report_process(eval_order_data, recommend_data)
-        
-        report.to_excel(os.path.join(args.report_dir, "效果验证表.xlsx"), index=False)
-
-def generate_all_data(args, report_utils):
-    report_utils.generate_feats_ralation_report(args)
-    report_utils.generate_product_report(args)
-    report_utils.generate_recommend_report(args)
-    report_utils.generate_similarity_product_report(args) 
-    report_utils.generate_eval_data(args)
+from utils.report_utils import ReportUtils
     
     
 def run():
@@ -147,19 +18,15 @@ def run():
     
     # 查找该城市的gbdt模型是否存在
     args.gbdtlr_model_path = os.path.join("./models/rank/weights/", args.city_uuid, "gbdtlr_model.pkl")
-    args.report_dir = os.path.join("./data/report", args.city_uuid, args.product_id)
     if not os.path.exists(args.gbdtlr_model_path):
         print("该城市的模型还未训练,请先启动训练!!!")
         
     # 初始化report生成工具
     report_utils = ReportUtils(args.city_uuid, args.product_id)
     
-    # 创建报告保存文件夹
-    if not os.path.exists(args.report_dir):
-        os.makedirs(args.report_dir)
         
     # 生成报告
-    generate_all_data(args, report_utils)
+    report_utils.generate_all_data(args.recall_count, args.delivery_count)
     
     
 if __name__ == "__main__":

+ 135 - 0
utils/report_utils.py

@@ -0,0 +1,135 @@
+from database.dao.mysql_dao import MySqlDao
+from models import Recommend
+from models.rank.data.config import CustConfig, ImportanceFeaturesMap, ProductConfig, ShopConfig
+from models.rank.data.utils import sample_data_clear
+from models.rank import generate_feats_map
+
+import os
+import pandas as pd
+from utils.reports_process import feats_relation_process, calculate_delivery_by_recommend_data, eval_report_process
+class ReportUtils:
+    def __init__(self, city_uuid, product_id):
+        self._recommend_model = Recommend(city_uuid)
+        self._city_uuid = city_uuid
+        self._product_id = product_id
+        self._dao = MySqlDao()
+        self._product_data =  self._dao.get_product_by_id(self._city_uuid, self._product_id)[ProductConfig.FEATURE_COLUMNS]
+        self._save_dir = os.path.join("./data/reports", city_uuid, product_id)
+        
+        if not os.path.exists(self._save_dir):
+            os.makedirs(self._save_dir)
+        
+    def _get_recommend_data(self, recall_count):
+        """获取推荐商户列表"""
+        # 判断product_id是否是新品规
+        products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
+        # recall_count = 100 # 参数调整
+        if self._product_id in products_in_order:
+            recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(self._product_id, recall_count=recall_count)
+        else:
+            recommend_data = self._recommend_model.get_recommend_list_by_item2vec(self._product_id, recall_count=recall_count)
+        
+        # # 根据推荐列表获取商户售卖卷烟的月均销量总和
+        # recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
+        # order_data = self._dao.get_order_by_cust(self._city_uuid, )
+        return recommend_data
+    
+    def _generate_feats_map(self, recall_count):
+        """根据召回的推荐列表生成品规-商户features_map"""
+        recommend_data = self._get_recommend_data(recall_count)
+        recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
+        # 获取卷烟的信息
+        product_data = self._product_data.copy()
+        
+        # 根据cust_lit获取商户信息和商圈信息
+        cust_data = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)[CustConfig.FEATURE_COLUMNS]
+        shop_data = self._dao.get_shop_by_ids(self._city_uuid, recommend_list)[ShopConfig.FEATURE_COLUMNS]
+        
+        product_data = sample_data_clear(product_data, ProductConfig)
+        cust_data = sample_data_clear(cust_data, CustConfig)
+        shop_data = sample_data_clear(shop_data, ShopConfig)
+        
+        cust_feats = shop_data.set_index("cust_code")
+        cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
+        
+        feats_map = generate_feats_map(product_data, cust_data)
+        
+        return feats_map
+    
+    def _get_product_content(self):
+        """获取品规的内容,并以字典的形式返回"""
+        product_data = self._product_data.copy()
+        filter_dict = product_data.to_dict('records')[0]
+        return filter_dict
+    
+    def generate_feats_ralation_report(self, recall_count):
+        """生成特征相关性分析报告"""
+        feats_map = self._generate_feats_map(recall_count)
+        product_content = self._get_product_content()
+        # 计算SHAP值
+        shap_result = self._recommend_model._gbdtlr_model.generate_shap_interance(feats_map)
+        report = feats_relation_process(shap_result, product_content)
+        
+        report.to_excel(os.path.join(self._save_dir, "品规商户特征关系表.xlsx"), index=False)
+        
+    def generate_product_report(self):
+        """生成推荐品规信息表"""
+        product_data = self._get_product_content()
+        with open(os.path.join(self._save_dir, "卷烟信息表.xlsx"), "w", encoding='utf-8-sig') as file:
+            for key, value in product_data.items():
+                if key != 'product_code':
+                    file.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
+                    
+    def generate_recommend_report(self, recall_count, delivery_count):
+        """生成推荐报告,包括投放量"""
+        recommend_data = self._get_recommend_data(recall_count)
+        recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
+        recommend_cust_infos = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)
+        report = calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, delivery_count)
+        
+        report.to_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"), index=False)
+        
+    def generate_similarity_product_report(self):
+        """生成相似卷烟表"""
+        product_similarity_map = self._recommend_model._item2vec_model.generate_product_similarity_map(self._product_id)
+        product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
+        product_similarity_map = product_similarity_map.rename(
+            columns={
+                "product_name":                 "卷烟名称",
+                "similarity":                   "相似度",
+                "factory_name":                 "生产厂商",
+                "brand_name":                   "品牌名称",
+                "is_low_tar":                   "低焦油卷烟",
+                "is_medium":                    "中支烟",
+                "is_tiny":                      "细支烟",
+                "is_coarse":                    "粗支烟",
+                "is_exploding_beads":           "爆珠烟",
+                "is_abnormity":                 "异形包装",
+                "is_cig":                       "雪茄烟",
+                "is_chuangxin":                 "创新品类",
+                "direct_retail_price":          "卷烟建议零售价",
+                "tbc_total_length":             "烟支总长度",
+                "product_style":                "包装类型",
+            }
+        )
+        product_similarity_map.to_excel(os.path.join(self._save_dir, "相似卷烟表.xlsx"), index=False)
+        
+    def generate_eval_data(self):
+        if self._product_id == '350139':
+            eval_product_id = "350355"
+        else:
+            eval_product_id = self._product_id
+        eval_order_data = self._dao.get_eval_order_by_product(self._city_uuid, eval_product_id)
+        if not os.path.exists(os.path.join(self._save_dir, "商户售卖推荐表.xlsx")):
+            print("请先生成'商户售卖推荐表'")
+        recommend_data = pd.read_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"))
+        report = eval_report_process(eval_order_data, recommend_data)
+        
+        report.to_excel(os.path.join(self._save_dir, "效果验证表.xlsx"), index=False)
+        
+    def generate_all_data(self, recall_count, delivery_count):
+        self.generate_feats_ralation_report(recall_count)
+        self.generate_product_report()
+        self.generate_recommend_report(recall_count, delivery_count)
+        self.generate_similarity_product_report()
+        self.generate_eval_data()

+ 0 - 0
utils/reports_utils.py → utils/reports_process.py