reports_process.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. from models.rank.data.config import ImportanceFeaturesMap
  2. import pandas as pd
  3. def filter_data(data, filter_dict):
  4. """从shap结果中过滤特征相关性数据"""
  5. product_content = []
  6. for key, value in filter_dict.items():
  7. if key != 'product_code':
  8. product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
  9. data = data[data['product_feat'].isin(product_content)]
  10. return data
  11. def feats_relation_process(shap_result, product_content):
  12. """生成特征相关性分析报告"""
  13. # 筛选商户相关性排序结果
  14. report = filter_data(shap_result, product_content).copy()
  15. cust_feats_sum = report.groupby("cust_feat")["relation"].sum()
  16. # 筛选出正相关性的cust_feat
  17. valid_cust_feats = cust_feats_sum[cust_feats_sum > 0].index.to_list()
  18. report = report[report["cust_feat"].isin(valid_cust_feats)]
  19. report = report.reset_index(drop=True)
  20. report = report.rename(
  21. columns = {
  22. "product_feat": "卷烟特征",
  23. "cust_feat": "商户特征",
  24. "relation": "相关性"
  25. }
  26. )
  27. return report
  28. def calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, delivery_count):
  29. """根据推荐数据计算投放量,并生成推荐商户报告"""
  30. recommend_data = pd.DataFrame(recommend_data)
  31. cust_ids = recommend_cust_infos.set_index("BB_RETAIL_CUSTOMER_CODE")
  32. recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
  33. recommend_data = recommend_data[["cust_code", "BB_RETAIL_CUSTOMER_NAME", "recommend_score"]]
  34. # 1. 计算每个商户的理论应得数量(带小数)
  35. recommend_data["delivery_float"] = (
  36. recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
  37. )
  38. # 2. 向下取整得到基础配额
  39. recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
  40. # 3. 计算余数并排序
  41. recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
  42. recommend_data = recommend_data.sort_values("remainder", ascending=False)
  43. # 4. 将剩余配额按余数从大到小分配
  44. remaining = delivery_count - recommend_data["delivery_count"].sum()
  45. recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
  46. recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
  47. recommend_data = recommend_data.reset_index()
  48. # 5. 按recommend_score从大到小重新排序
  49. recommend_data = recommend_data.sort_values("index")
  50. # recommend_data["sale_qty"] = recommend_data["sale_qty"].round(0).astype(int) # 将月均销量四舍五入取整
  51. recommend_data = recommend_data.rename(
  52. columns={
  53. "index": "推荐序号",
  54. "cust_code": "商户编号",
  55. "BB_RETAIL_CUSTOMER_NAME": "商户名称",
  56. # "sale_qty": "历史月均销量",
  57. "recommend_score": "推荐系数",
  58. "delivery_count": "建议投放量(条)"
  59. }
  60. )
  61. recommend_data["推荐序号"] = recommend_data["推荐序号"] + 1
  62. return recommend_data
  63. def eval_report_process(eval_order_data, recommend_data):
  64. # 获取订单数据并处理
  65. eval_order_data = eval_order_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "sale_amt"]]
  66. # 确保cust_code是字符串类型
  67. eval_order_data["cust_code"] = eval_order_data["cust_code"].astype(str)
  68. eval_order_data = eval_order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].mean().reset_index()
  69. eval_order_data["sale_qty"] = eval_order_data["sale_qty"].round(0).astype(int)
  70. eval_order_data = eval_order_data.sort_values("sale_qty", ascending=False)
  71. # recommend_data = recommend_data.drop(columns=["sale_qty"])
  72. # 确保recommend_data中的cust_code也是字符串类型
  73. recommend_data["商户编号"] = recommend_data["商户编号"].astype(str)
  74. cust_ids = recommend_data.set_index("商户编号")
  75. # 执行合并操作
  76. merge_data = eval_order_data.join(cust_ids, on="cust_code", how="left")
  77. merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号", "推荐系数"]]
  78. merge_data = merge_data.rename(
  79. columns={
  80. "cust_code": "商户编号",
  81. "cust_name": "商户名称",
  82. "product_code": "卷烟编码",
  83. "product_name": "卷烟名称",
  84. "sale_qty": "月均销量"
  85. }
  86. )
  87. return merge_data