report_utils.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. from database import RedisDatabaseHelper
  2. from database.dao.mysql_dao import MySqlDao
  3. from models import Recommend
  4. from models.rank.data.config import CustConfig, ImportanceFeaturesMap, ProductConfig, DeliveryConfig
  5. from models.rank.data.utils import sample_data_clear
  6. from models.rank import generate_feats_map
  7. from core import get_logger
  8. import os
  9. import pandas as pd
  10. from utils.reports_process import feats_relation_process, build_recommend_report, eval_report_process_pre, eval_report_process
  11. logger = get_logger("utils.report")
  12. def _get_itemcf_key(city_uuid, product_code):
  13. return f"fc:{city_uuid}:{product_code}"
  14. class ReportUtils:
  15. def __init__(self, city_uuid, product_id):
  16. self._recommend_model = Recommend(city_uuid)
  17. self._city_uuid = city_uuid
  18. self._product_id = product_id
  19. self._dao = MySqlDao()
  20. self._redis = RedisDatabaseHelper().redis
  21. self._product_data = self._dao.get_product_by_id(self._city_uuid, self._product_id)[ProductConfig.FEATURE_COLUMNS]
  22. self._save_dir = os.path.join("./data/reports", city_uuid, product_id)
  23. if not os.path.exists(self._save_dir):
  24. os.makedirs(self._save_dir)
  25. def _get_recommend_data(self, cust_code_list):
  26. """获取推荐商户列表"""
  27. itemcf_key = _get_itemcf_key(self._city_uuid, self._product_id)
  28. if self._redis.exists(itemcf_key):
  29. logger.info(f"Using GBDT-LR model for report product {self._product_id}, itemcf_key={itemcf_key}")
  30. recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(
  31. self._product_id, cust_code_list=cust_code_list
  32. )
  33. else:
  34. logger.info(f"Using Item2Vec model for report product {self._product_id}, itemcf_key not found: {itemcf_key}")
  35. recommend_data = self._recommend_model.get_recommend_list_by_item2vec(
  36. self._product_id, cust_code_list=cust_code_list
  37. )
  38. return recommend_data
  39. def _generate_feats_map(self, cust_code_list):
  40. """根据召回的推荐列表生成品规-商户features_map"""
  41. recommend_data = self._get_recommend_data(cust_code_list)
  42. recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
  43. product_data = self._product_data.copy()
  44. cust_data = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)[CustConfig.FEATURE_COLUMNS]
  45. product_data = sample_data_clear(product_data, ProductConfig)
  46. cust_data = sample_data_clear(cust_data, CustConfig)
  47. feats_map = generate_feats_map(product_data, cust_data)
  48. return feats_map
  49. def _get_product_content(self):
  50. """获取品规的内容,并以字典的形式返回"""
  51. product_data = self._product_data.copy()
  52. filter_dict = product_data.to_dict('records')[0]
  53. return filter_dict
  54. def generate_feats_ralation_report(self, cust_code_list):
  55. """生成特征相关性分析报告"""
  56. logger.info("Generating feature relation report")
  57. feats_map = self._generate_feats_map(cust_code_list)
  58. product_content = self._get_product_content()
  59. shap_result = self._recommend_model._gbdtlr_model.generate_shap_interance(feats_map)
  60. report = feats_relation_process(shap_result, product_content)
  61. report.to_excel(os.path.join(self._save_dir, "品规商户特征关系表.xlsx"), index=False)
  62. logger.info("Feature relation report saved")
  63. def generate_product_report(self):
  64. """生成推荐品规信息表"""
  65. logger.info("Generating product report")
  66. product_data = self._get_product_content()
  67. with open(os.path.join(self._save_dir, "卷烟信息表.xlsx"), "w", encoding='utf-8-sig') as file:
  68. for key, value in product_data.items():
  69. if key != 'product_code':
  70. file.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
  71. logger.info("Product report saved")
  72. def generate_recommend_report(self, cust_code_list):
  73. """生成推荐报告"""
  74. logger.info("Generating recommend report")
  75. recommend_data = self._get_recommend_data(cust_code_list)
  76. recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
  77. recommend_cust_infos = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)
  78. report = build_recommend_report(recommend_data, recommend_cust_infos)
  79. report.to_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"), index=False)
  80. logger.info("Recommend report saved")
  81. def generate_similarity_product_report(self):
  82. """生成相似卷烟表"""
  83. logger.info("Generating similarity product report")
  84. product_similarity_map = self._recommend_model._item2vec_model.generate_product_similarity_map(self._product_id)
  85. product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
  86. product_similarity_map = product_similarity_map.rename(
  87. columns={
  88. "product_name": "卷烟名称",
  89. "similarity": "相似度",
  90. "factory_name": "生产厂商",
  91. "brand_name": "品牌名称",
  92. "is_low_tar": "低焦油卷烟",
  93. "is_medium": "中支烟",
  94. "is_tiny": "细支烟",
  95. "is_coarse": "粗支烟",
  96. "is_exploding_beads": "爆珠烟",
  97. "is_abnormity": "异形包装",
  98. "is_cig": "雪茄烟",
  99. "is_chuangxin": "创新品类",
  100. "direct_retail_price": "卷烟建议零售价",
  101. "tbc_total_length": "烟支总长度",
  102. "product_style": "包装类型",
  103. }
  104. )
  105. product_similarity_map.to_excel(os.path.join(self._save_dir, "相似卷烟表.xlsx"), index=False)
  106. logger.info("Similarity product report saved")
  107. def generate_eval_data_pre(self):
  108. if self._product_id == '350139':
  109. eval_product_id = "350355"
  110. else:
  111. eval_product_id = self._product_id
  112. eval_order_data = self._dao.get_eval_order_by_product(self._city_uuid, eval_product_id)
  113. if not os.path.exists(os.path.join(self._save_dir, "商户售卖推荐表.xlsx")):
  114. logger.error("商户售卖推荐表 not found")
  115. recommend_data = pd.read_excel(os.path.join(self._save_dir, "商户售卖推荐表.xlsx"))
  116. report = eval_report_process_pre(eval_order_data, recommend_data)
  117. report.to_excel(os.path.join(self._save_dir, "效果验证表.xlsx"), index=False)
  118. def generate_eval_data(self, start_time, end_time, recommend_data):
  119. """根据推荐列表生成验证报告"""
  120. logger.info("Generating eval report")
  121. if self._product_id == '350139':
  122. eval_product_id = "350355"
  123. else:
  124. eval_product_id = self._product_id
  125. delivery_data = self._dao.get_delivery_data_by_product(self._city_uuid, eval_product_id, start_time, end_time)
  126. delivery_data = delivery_data[DeliveryConfig.FEATURE_COLUMNS]
  127. delivery_data = sample_data_clear(delivery_data, DeliveryConfig)
  128. recommend_data = recommend_data.drop(columns=["建议投放量(条)"], errors="ignore")
  129. report = eval_report_process(delivery_data, recommend_data)
  130. report.to_excel(os.path.join(self._save_dir, "投放验证报告.xlsx"), index=False)
  131. logger.info("Eval report saved")
  132. def generate_all_data(self, cust_code_list):
  133. logger.info("Generating all reports")
  134. self.generate_feats_ralation_report(cust_code_list)
  135. self.generate_product_report()
  136. self.generate_recommend_report(cust_code_list)
  137. self.generate_similarity_product_report()
  138. logger.info("All reports generated")
  139. if __name__ == "__main__":
  140. city_uuid = "00000000000000000000000011445301"
  141. product_id = '440298'
  142. start_time = '2025/2/10'
  143. end_time = '2025/2/16'
  144. report = ReportUtils(city_uuid, product_id)
  145. report.generate_eval_data(start_time, end_time)