result_process.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import os
  2. import pandas as pd
  3. from database import MySqlDao
  4. from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
  5. def filter_data(data, filter_dict):
  6. product_content = []
  7. for key, value in filter_dict.items():
  8. if key != 'product_code':
  9. product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
  10. data = data[data['product_feat'].isin(product_content)]
  11. return data
  12. def split_relation_subtable(data, product_data, save_dir):
  13. """拆分卷烟商户特征相关性子表"""
  14. data = filter_data(data, product_data).copy()
  15. data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
  16. data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
  17. grouped = data.groupby('group_key')
  18. sub_tables = {
  19. name: group.drop(columns=['group_key']).sort_values('relation', ascending=False)
  20. for name, group in grouped
  21. }
  22. for name, sub_data in sub_tables.items():
  23. sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
  24. if __name__ == "__main__":
  25. dao = MySqlDao()
  26. save_dir = "./data"
  27. data = pd.read_csv("./data/feats_interaction.csv")
  28. product_data = dao.get_product_by_id("00000000000000000000000011445301", "430201")[ProductConfig.FEATURE_COLUMNS]
  29. filter_dict = product_data.to_dict("records")[0]
  30. split_relation_subtable(data, filter_dict, save_dir)