result_process.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import os
  2. import pandas as pd
  3. from database import MySqlDao
  4. from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
  5. dao = MySqlDao()
  6. def filter_data(data, filter_dict):
  7. product_content = []
  8. for key, value in filter_dict.items():
  9. if key != 'product_code':
  10. product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
  11. data = data[data['product_feat'].isin(product_content)]
  12. return data
  13. def split_relation_subtable(data, filter_dict, save_dir):
  14. """拆分卷烟商户特征相关性子表"""
  15. data = filter_data(data, filter_dict).copy()
  16. data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
  17. data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
  18. grouped = data.groupby('group_key')
  19. sub_tables = {
  20. name: group.drop(columns=['group_key']).sort_values('relation', ascending=False)
  21. for name, group in grouped
  22. }
  23. for name, sub_data in sub_tables.items():
  24. sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
  25. def generate_report(data, filter_dict, save_dir):
  26. """根据总表筛选结果"""
  27. # 1. 筛选商户相关性排序结果
  28. data = filter_data(data, filter_dict).copy()
  29. data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
  30. group_sums = data.groupby("cust_feat")["relation"].sum()
  31. # 筛选出总和非负的cust_feat
  32. valid_cust_feats = group_sums[group_sums > 0].index.tolist()
  33. cust_relation = data[data["cust_feat"].isin(valid_cust_feats)]
  34. cust_relation = cust_relation.reset_index(drop=True)
  35. # 2. 品规信息
  36. cust_relation[:20].to_csv(os.path.join(save_dir, "cust_relation.csv"), index=False, encoding='utf-8-sig')
  37. with open(os.path.join(save_dir, "product_info.csv"), "w", encoding='utf-8-sig') as f:
  38. for key, value in filter_dict.items():
  39. if key != 'product_code':
  40. f.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
  41. def get_cust_list_from_history_order(city_uuid, product_code):
  42. order_data = dao.get_order_by_product(city_uuid, product_code)
  43. return order_data
  44. if __name__ == "__main__":
  45. order_data = get_cust_list_from_history_order("00000000000000000000000011445301", "350139")
  46. order_data.to_csv("./data/history.csv", index=False)