| 12345678910111213141516171819202122232425262728293031323334353637 |
- import os
- import pandas as pd
- from database import MySqlDao
- from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
- def filter_data(data, filter_dict):
-
- product_content = []
- for key, value in filter_dict.items():
- if key != 'product_code':
- product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
-
- data = data[data['product_feat'].isin(product_content)]
- return data
- def split_relation_subtable(data, product_data, save_dir):
- """拆分卷烟商户特征相关性子表"""
- data = filter_data(data, product_data).copy()
- data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
- data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
- grouped = data.groupby('group_key')
- sub_tables = {
- name: group.drop(columns=['group_key']).sort_values('relation', ascending=False)
- for name, group in grouped
- }
-
- for name, sub_data in sub_tables.items():
- sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
-
- if __name__ == "__main__":
- dao = MySqlDao()
-
- save_dir = "./data"
- data = pd.read_csv("./data/feats_interaction.csv")
- product_data = dao.get_product_by_id("00000000000000000000000011445301", "430201")[ProductConfig.FEATURE_COLUMNS]
- filter_dict = product_data.to_dict("records")[0]
- split_relation_subtable(data, filter_dict, save_dir)
|