import os import pandas as pd from database import MySqlDao from models.rank.data.config import ImportanceFeaturesMap, ProductConfig def filter_data(data, filter_dict): product_content = [] for key, value in filter_dict.items(): if key != 'product_code': product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})") data = data[data['product_feat'].isin(product_content)] return data def split_relation_subtable(data, product_data, save_dir): """拆分卷烟商户特征相关性子表""" data = filter_data(data, product_data).copy() data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig') data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)') grouped = data.groupby('group_key') sub_tables = { name: group.drop(columns=['group_key']).sort_values('relation', ascending=False) for name, group in grouped } for name, sub_data in sub_tables.items(): sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig') if __name__ == "__main__": dao = MySqlDao() save_dir = "./data" data = pd.read_csv("./data/feats_interaction.csv") product_data = dao.get_product_by_id("00000000000000000000000011445301", "430201")[ProductConfig.FEATURE_COLUMNS] filter_dict = product_data.to_dict("records")[0] split_relation_subtable(data, filter_dict, save_dir)