result_process.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. import os
  2. import pandas as pd
  3. from database import MySqlDao
  4. from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
  5. dao = MySqlDao()
  6. def filter_data(data, filter_dict):
  7. product_content = []
  8. for key, value in filter_dict.items():
  9. if key != 'product_code':
  10. product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
  11. data = data[data['product_feat'].isin(product_content)]
  12. return data
  13. def split_relation_subtable(data, filter_dict, save_dir):
  14. """拆分卷烟商户特征相关性子表"""
  15. data = filter_data(data, filter_dict).copy()
  16. data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
  17. data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
  18. grouped = data.groupby('group_key')
  19. sub_tables = {
  20. name: group.drop(columns=['group_key']).sort_values('relation', ascending=False)
  21. for name, group in grouped
  22. }
  23. for name, sub_data in sub_tables.items():
  24. sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
  25. def generate_report(city_uuid, data, filter_dict, recommend_data, delivery_count, save_dir):
  26. """根据总表筛选结果"""
  27. # 1. 筛选商户相关性排序结果
  28. data = filter_data(data, filter_dict).copy()
  29. data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
  30. group_sums = data.groupby("cust_feat")["relation"].sum()
  31. # 筛选出总和非负的cust_feat
  32. valid_cust_feats = group_sums[group_sums > 0].index.tolist()
  33. cust_relation = data[data["cust_feat"].isin(valid_cust_feats)]
  34. cust_relation = cust_relation.reset_index(drop=True)
  35. cust_relation = cust_relation.rename(
  36. columns = {
  37. "product_feat": "卷烟特征",
  38. "cust_feat": "商户特征",
  39. "relation": "相关性"
  40. }
  41. )
  42. cust_relation.to_csv(os.path.join(save_dir, "品规商户特征关系表.csv"), index=False, encoding='utf-8-sig')
  43. # 2. 品规信息
  44. cust_relation[:20].to_csv(os.path.join(save_dir, "cust_relation.csv"), index=False, encoding='utf-8-sig')
  45. with open(os.path.join(save_dir, "卷烟信息表.csv"), "w", encoding='utf-8-sig') as f:
  46. for key, value in filter_dict.items():
  47. if key != 'product_code':
  48. f.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
  49. # 3. 生成推荐报告
  50. recommend_report = generate_recommend_report(city_uuid, recommend_data, delivery_count)
  51. recommend_report.to_csv(os.path.join(save_dir, "商户售卖推荐表.csv"), index=False, encoding="utf-8-sig")
  52. def generate_recommend_report(city_uuid, recommend_data, delivery_count):
  53. recommend_data = pd.DataFrame(recommend_data)
  54. recommend_list = recommend_data["cust_code"].to_list()
  55. recommend_cust_info = dao.get_cust_by_ids(city_uuid, recommend_list)
  56. cust_ids = recommend_cust_info.set_index("BB_RETAIL_CUSTOMER_CODE")
  57. recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
  58. recommend_data = recommend_data[["cust_code", "BB_RETAIL_CUSTOMER_NAME", "sale_qty", "recommend_score"]]
  59. # 1. 计算每个商户的理论应得数量(带小数)
  60. recommend_data["delivery_float"] = (
  61. recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
  62. )
  63. # 2. 向下取整得到基础配额
  64. recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
  65. # 3. 计算余数并排序
  66. recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
  67. recommend_data = recommend_data.sort_values("remainder", ascending=False)
  68. # 4. 将剩余配额按余数从大到小分配
  69. remaining = delivery_count - recommend_data["delivery_count"].sum()
  70. recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
  71. recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
  72. recommend_data = recommend_data.reset_index()
  73. # 5. 按recommend_score从大到小重新排序
  74. recommend_data = recommend_data.sort_values("index")
  75. recommend_data["sale_qty"] = recommend_data["sale_qty"].round(0).astype(int) # 将月均销量四舍五入取整
  76. recommend_data = recommend_data.rename(
  77. columns={
  78. "index": "推荐序号",
  79. "cust_code": "商户编号",
  80. "BB_RETAIL_CUSTOMER_NAME": "商户名称",
  81. "sale_qty": "历史月均销量",
  82. "recommend_score": "推荐系数",
  83. "delivery_count": "建议投放量(条)"
  84. }
  85. )
  86. recommend_data["推荐序号"] = recommend_data["推荐序号"] + 1
  87. return recommend_data
  88. def get_cust_list_from_history_order(city_uuid, product_code):
  89. # 获取订单数据并处理
  90. order_data = dao.get_eval_order_by_product(city_uuid, product_code)
  91. order_data = order_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "sale_amt"]]
  92. # 确保cust_code是字符串类型
  93. order_data["cust_code"] = order_data["cust_code"].astype(str)
  94. order_data = order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].mean().reset_index()
  95. order_data["sale_qty"] = order_data["sale_qty"].round(0).astype(int)
  96. order_data = order_data.sort_values("sale_qty", ascending=False)
  97. # 读取推荐数据
  98. recommend_data = pd.read_csv('./data/商户售卖推荐表.csv')
  99. # recommend_data = recommend_data.drop(columns=["sale_qty"])
  100. # 确保recommend_data中的cust_code也是字符串类型
  101. recommend_data["商户编号"] = recommend_data["商户编号"].astype(str)
  102. cust_ids = recommend_data.set_index("商户编号")
  103. # 执行合并操作
  104. merge_data = order_data.join(cust_ids, on="cust_code", how="left")
  105. merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号", "推荐系数", "历史月均销量"]]
  106. merge_data = merge_data.rename(
  107. columns={
  108. "cust_code": "商户编号",
  109. "cust_name": "商户名称",
  110. "product_code": "卷烟编码",
  111. "product_name": "卷烟名称",
  112. "sale_qty": "月均销量"
  113. }
  114. )
  115. return merge_data
  116. if __name__ == "__main__":
  117. order_data = get_cust_list_from_history_order("00000000000000000000000011445301", "350355")
  118. order_data.to_csv("./data/eval.csv", index=False)