reports_process.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. from models.rank.data.config import ImportanceFeaturesMap, DeliveryConfig
  2. import os
  3. import pandas as pd
  4. def filter_data(data, filter_dict):
  5. """从shap结果中过滤特征相关性数据"""
  6. product_content = []
  7. for key, value in filter_dict.items():
  8. if key != 'product_code':
  9. product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
  10. data = data[data['product_feat'].isin(product_content)]
  11. return data
  12. def feats_relation_process(shap_result, product_content):
  13. """生成特征相关性分析报告"""
  14. # 筛选商户相关性排序结果
  15. report = filter_data(shap_result, product_content).copy()
  16. cust_feats_sum = report.groupby("cust_feat")["relation"].sum()
  17. # 筛选出正相关性的cust_feat
  18. valid_cust_feats = cust_feats_sum[cust_feats_sum > 0].index.to_list()
  19. report = report[report["cust_feat"].isin(valid_cust_feats)]
  20. report = report.reset_index(drop=True)
  21. report = report.rename(
  22. columns = {
  23. "product_feat": "卷烟特征",
  24. "cust_feat": "商户特征",
  25. "relation": "相关性"
  26. }
  27. )
  28. return report
  29. def build_recommend_report(recommend_data, recommend_cust_infos):
  30. """根据推荐数据生成推荐商户报告(不含投放量)"""
  31. recommend_data = pd.DataFrame(recommend_data)
  32. cust_ids = recommend_cust_infos.set_index("cust_code")
  33. before_join = set(recommend_data["cust_code"])
  34. recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
  35. missing = before_join - set(recommend_data["cust_code"])
  36. if missing:
  37. import logging
  38. logging.getLogger("utils.report").warning(
  39. f"build_recommend_report: {len(missing)} cust_codes not found in customer master and dropped from report: {missing}"
  40. )
  41. recommend_data = recommend_data[["cust_code", "cust_name", "recommend_score"]]
  42. recommend_data = recommend_data.reset_index(drop=True)
  43. recommend_data.index = recommend_data.index + 1
  44. recommend_data = recommend_data.reset_index()
  45. recommend_data = recommend_data.rename(
  46. columns={
  47. "index": "推荐序号",
  48. "cust_code": "商户编号",
  49. "cust_name": "商户名称",
  50. "recommend_score": "推荐系数",
  51. }
  52. )
  53. return recommend_data
  54. def eval_report_process_pre(eval_order_data, recommend_data):
  55. # 获取订单数据并处理
  56. eval_order_data = eval_order_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "sale_amt"]]
  57. # 确保cust_code是字符串类型
  58. eval_order_data["cust_code"] = eval_order_data["cust_code"].astype(str)
  59. eval_order_data = eval_order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].mean().reset_index()
  60. eval_order_data["sale_qty"] = eval_order_data["sale_qty"].round(0).astype(int)
  61. eval_order_data = eval_order_data.sort_values("sale_qty", ascending=False)
  62. # recommend_data = recommend_data.drop(columns=["sale_qty"])
  63. # 确保recommend_data中的cust_code也是字符串类型
  64. recommend_data["商户编号"] = recommend_data["商户编号"].astype(str)
  65. cust_ids = recommend_data.set_index("商户编号")
  66. # 执行合并操作
  67. merge_data = eval_order_data.join(cust_ids, on="cust_code", how="left")
  68. merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号", "推荐系数"]]
  69. merge_data = merge_data.rename(
  70. columns={
  71. "cust_code": "商户编号",
  72. "cust_name": "商户名称",
  73. "product_code": "卷烟编码",
  74. "product_name": "卷烟名称",
  75. "sale_qty": "月均销量"
  76. }
  77. )
  78. return merge_data
  79. def eval_report_process(delivery_data, recommend_data):
  80. report = recommend_data.merge(delivery_data, left_on="商户编号", right_on="customer_code", how="left")
  81. report = report.drop(columns=["customer_code", "goods_code"])
  82. report = report.rename(columns={
  83. "retail_index_week": DeliveryConfig.FEATURES_MAP["retail_index_week"],
  84. "turnover_rate_collpoint": DeliveryConfig.FEATURES_MAP["turnover_rate_collpoint"],
  85. "turnover_rate_terminal": DeliveryConfig.FEATURES_MAP["turnover_rate_terminal"],
  86. "sale_qty": DeliveryConfig.FEATURES_MAP["sale_qty"],
  87. })
  88. return report
  89. def split_relation_subtable(data, filter_dict, save_dir):
  90. """拆分卷烟商户特征相关性子表"""
  91. data = filter_data(data, filter_dict).copy()
  92. data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
  93. data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
  94. grouped = data.groupby('group_key')
  95. sub_tables = {
  96. name: group.drop(columns=['group_key']).sort_values('relation', ascending=False)
  97. for name, group in grouped
  98. }
  99. for name, sub_data in sub_tables.items():
  100. sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')