report_utils.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. from database.dao.mysql_dao import MySqlDao
  2. from models import Recommend
  3. from models.rank.data.config import CustConfig, ImportanceFeaturesMap, ProductConfig, ShopConfig
  4. from models.rank.data.utils import sample_data_clear
  5. from models.rank import generate_feats_map
  6. import os
  7. import pandas as pd
  8. from utils.reports_process import feats_relation_process, calculate_delivery_by_recommend_data, eval_report_process
  9. class ReportUtils:
  10. def __init__(self, city_uuid, product_id):
  11. self._recommend_model = Recommend(city_uuid)
  12. self._city_uuid = city_uuid
  13. self._product_id = product_id
  14. self._dao = MySqlDao()
  15. self._product_data = self._dao.get_product_by_id(self._city_uuid, self._product_id)[ProductConfig.FEATURE_COLUMNS]
  16. self._save_dir = os.path.join("./data/reports", city_uuid, product_id)
  17. if not os.path.exists(self._save_dir):
  18. os.makedirs(self._save_dir)
  19. def _get_recommend_data(self, recall_count):
  20. """获取推荐商户列表"""
  21. # 判断product_id是否是新品规
  22. products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
  23. # recall_count = 100 # 参数调整
  24. if self._product_id in products_in_order:
  25. recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(self._product_id, recall_count=recall_count)
  26. else:
  27. recommend_data = self._recommend_model.get_recommend_list_by_item2vec(self._product_id, recall_count=recall_count)
  28. # # 根据推荐列表获取商户售卖卷烟的月均销量总和
  29. # recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
  30. # order_data = self._dao.get_order_by_cust(self._city_uuid, )
  31. return recommend_data
  32. def _generate_feats_map(self, recall_count):
  33. """根据召回的推荐列表生成品规-商户features_map"""
  34. recommend_data = self._get_recommend_data(recall_count)
  35. recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
  36. # 获取卷烟的信息
  37. product_data = self._product_data.copy()
  38. # 根据cust_lit获取商户信息和商圈信息
  39. cust_data = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)[CustConfig.FEATURE_COLUMNS]
  40. shop_data = self._dao.get_shop_by_ids(self._city_uuid, recommend_list)[ShopConfig.FEATURE_COLUMNS]
  41. product_data = sample_data_clear(product_data, ProductConfig)
  42. cust_data = sample_data_clear(cust_data, CustConfig)
  43. shop_data = sample_data_clear(shop_data, ShopConfig)
  44. cust_feats = shop_data.set_index("cust_code")
  45. cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
  46. feats_map = generate_feats_map(product_data, cust_data)
  47. return feats_map
  48. def _get_product_content(self):
  49. """获取品规的内容,并以字典的形式返回"""
  50. product_data = self._product_data.copy()
  51. filter_dict = product_data.to_dict('records')[0]
  52. return filter_dict
  53. def generate_feats_ralation_report(self, recall_count):
  54. """生成特征相关性分析报告"""
  55. feats_map = self._generate_feats_map(recall_count)
  56. product_content = self._get_product_content()
  57. # 计算SHAP值
  58. shap_result = self._recommend_model._gbdtlr_model.generate_shap_interance(feats_map)
  59. report = feats_relation_process(shap_result, product_content)
  60. report.to_excel(os.path.join(self._save_dir, "品规商户特征关系表.xlsx"), index=False)
  61. def generate_product_report(self):
  62. """生成推荐品规信息表"""
  63. product_data = self._get_product_content()
  64. with open(os.path.join(self._save_dir, "卷烟信息表.xlsx"), "w", encoding='utf-8-sig') as file:
  65. for key, value in product_data.items():
  66. if key != 'product_code':
  67. file.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
  68. def generate_recommend_report(self, recall_count, delivery_count):
  69. """生成推荐报告,包括投放量"""
  70. recommend_data = self._get_recommend_data(recall_count)
  71. recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
  72. recommend_cust_infos = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)
  73. report = calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, delivery_count)
  74. report.to_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"), index=False)
  75. def generate_similarity_product_report(self):
  76. """生成相似卷烟表"""
  77. product_similarity_map = self._recommend_model._item2vec_model.generate_product_similarity_map(self._product_id)
  78. product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
  79. product_similarity_map = product_similarity_map.rename(
  80. columns={
  81. "product_name": "卷烟名称",
  82. "similarity": "相似度",
  83. "factory_name": "生产厂商",
  84. "brand_name": "品牌名称",
  85. "is_low_tar": "低焦油卷烟",
  86. "is_medium": "中支烟",
  87. "is_tiny": "细支烟",
  88. "is_coarse": "粗支烟",
  89. "is_exploding_beads": "爆珠烟",
  90. "is_abnormity": "异形包装",
  91. "is_cig": "雪茄烟",
  92. "is_chuangxin": "创新品类",
  93. "direct_retail_price": "卷烟建议零售价",
  94. "tbc_total_length": "烟支总长度",
  95. "product_style": "包装类型",
  96. }
  97. )
  98. product_similarity_map.to_excel(os.path.join(self._save_dir, "相似卷烟表.xlsx"), index=False)
  99. def generate_eval_data(self):
  100. if self._product_id == '350139':
  101. eval_product_id = "350355"
  102. else:
  103. eval_product_id = self._product_id
  104. eval_order_data = self._dao.get_eval_order_by_product(self._city_uuid, eval_product_id)
  105. if not os.path.exists(os.path.join(self._save_dir, "商户售卖推荐表.xlsx")):
  106. print("请先生成'商户售卖推荐表'")
  107. recommend_data = pd.read_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"))
  108. report = eval_report_process(eval_order_data, recommend_data)
  109. report.to_excel(os.path.join(self._save_dir, "效果验证表.xlsx"), index=False)
  110. def generate_all_data(self, recall_count, delivery_count):
  111. self.generate_feats_ralation_report(recall_count)
  112. self.generate_product_report()
  113. self.generate_recommend_report(recall_count, delivery_count)
  114. self.generate_similarity_product_report()
  115. # self.generate_eval_data()