import os import pandas as pd from database import MySqlDao from models.rank.data.config import ImportanceFeaturesMap, ProductConfig dao = MySqlDao() def filter_data(data, filter_dict): product_content = [] for key, value in filter_dict.items(): if key != 'product_code': product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})") data = data[data['product_feat'].isin(product_content)] return data def split_relation_subtable(data, filter_dict, save_dir): """拆分卷烟商户特征相关性子表""" data = filter_data(data, filter_dict).copy() data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig') data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)') grouped = data.groupby('group_key') sub_tables = { name: group.drop(columns=['group_key']).sort_values('relation', ascending=False) for name, group in grouped } for name, sub_data in sub_tables.items(): sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig') def generate_report(data, filter_dict, save_dir): """根据总表筛选结果""" # 1. 筛选商户相关性排序结果 data = filter_data(data, filter_dict).copy() data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig') group_sums = data.groupby("cust_feat")["relation"].sum() # 筛选出总和非负的cust_feat valid_cust_feats = group_sums[group_sums > 0].index.tolist() cust_relation = data[data["cust_feat"].isin(valid_cust_feats)] cust_relation = cust_relation.reset_index(drop=True) # 2. 品规信息 cust_relation[:20].to_csv(os.path.join(save_dir, "cust_relation.csv"), index=False, encoding='utf-8-sig') with open(os.path.join(save_dir, "product_info.csv"), "w", encoding='utf-8-sig') as f: for key, value in filter_dict.items(): if key != 'product_code': f.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n") def get_cust_list_from_history_order(city_uuid, product_code): order_data = dao.get_order_by_product(city_uuid, product_code) return order_data if __name__ == "__main__": order_data = get_cust_list_from_history_order("00000000000000000000000011445301", "350139") order_data.to_csv("./data/history.csv", index=False)