| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import os
- import pandas as pd
- from database import MySqlDao
- from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
- dao = MySqlDao()
- def filter_data(data, filter_dict):
-
- product_content = []
- for key, value in filter_dict.items():
- if key != 'product_code':
- product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
-
- data = data[data['product_feat'].isin(product_content)]
- return data
- def split_relation_subtable(data, filter_dict, save_dir):
- """拆分卷烟商户特征相关性子表"""
- data = filter_data(data, filter_dict).copy()
- data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
- data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
- grouped = data.groupby('group_key')
- sub_tables = {
- name: group.drop(columns=['group_key']).sort_values('relation', ascending=False)
- for name, group in grouped
- }
-
- for name, sub_data in sub_tables.items():
- sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
-
- def generate_report(data, filter_dict, save_dir):
- """根据总表筛选结果"""
- # 1. 筛选商户相关性排序结果
- data = filter_data(data, filter_dict).copy()
- data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
- group_sums = data.groupby("cust_feat")["relation"].sum()
- # 筛选出总和非负的cust_feat
- valid_cust_feats = group_sums[group_sums > 0].index.tolist()
- cust_relation = data[data["cust_feat"].isin(valid_cust_feats)]
- cust_relation = cust_relation.reset_index(drop=True)
-
- # 2. 品规信息
- cust_relation[:20].to_csv(os.path.join(save_dir, "cust_relation.csv"), index=False, encoding='utf-8-sig')
- with open(os.path.join(save_dir, "product_info.csv"), "w", encoding='utf-8-sig') as f:
- for key, value in filter_dict.items():
- if key != 'product_code':
- f.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
-
-
- def get_cust_list_from_history_order(city_uuid, product_code):
- order_data = dao.get_order_by_product(city_uuid, product_code)
- return order_data
-
- if __name__ == "__main__":
- order_data = get_cust_list_from_history_order("00000000000000000000000011445301", "350139")
- order_data.to_csv("./data/history.csv", index=False)
|