|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
|
from database import MySqlDao
|
|
from database import MySqlDao
|
|
|
from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
|
|
from models.rank.data.config import ImportanceFeaturesMap, ProductConfig
|
|
|
|
|
|
|
|
|
|
+dao = MySqlDao()
|
|
|
def filter_data(data, filter_dict):
|
|
def filter_data(data, filter_dict):
|
|
|
|
|
|
|
|
product_content = []
|
|
product_content = []
|
|
@@ -13,9 +14,9 @@ def filter_data(data, filter_dict):
|
|
|
data = data[data['product_feat'].isin(product_content)]
|
|
data = data[data['product_feat'].isin(product_content)]
|
|
|
return data
|
|
return data
|
|
|
|
|
|
|
|
-def split_relation_subtable(data, product_data, save_dir):
|
|
|
|
|
|
|
+def split_relation_subtable(data, filter_dict, save_dir):
|
|
|
"""拆分卷烟商户特征相关性子表"""
|
|
"""拆分卷烟商户特征相关性子表"""
|
|
|
- data = filter_data(data, product_data).copy()
|
|
|
|
|
|
|
+ data = filter_data(data, filter_dict).copy()
|
|
|
data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
|
|
data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
|
|
|
data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
|
|
data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)')
|
|
|
grouped = data.groupby('group_key')
|
|
grouped = data.groupby('group_key')
|
|
@@ -27,11 +28,29 @@ def split_relation_subtable(data, product_data, save_dir):
|
|
|
for name, sub_data in sub_tables.items():
|
|
for name, sub_data in sub_tables.items():
|
|
|
sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
|
|
sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')
|
|
|
|
|
|
|
|
-if __name__ == "__main__":
|
|
|
|
|
- dao = MySqlDao()
|
|
|
|
|
|
|
+def generate_report(data, filter_dict, save_dir):
|
|
|
|
|
+ """根据总表筛选结果"""
|
|
|
|
|
+ # 1. 筛选商户相关性排序结果
|
|
|
|
|
+ data = filter_data(data, filter_dict).copy()
|
|
|
|
|
+ data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
|
|
|
|
|
+ group_sums = data.groupby("cust_feat")["relation"].sum()
|
|
|
|
|
+ # 筛选出总和非负的cust_feat
|
|
|
|
|
+ valid_cust_feats = group_sums[group_sums > 0].index.tolist()
|
|
|
|
|
+ cust_relation = data[data["cust_feat"].isin(valid_cust_feats)]
|
|
|
|
|
+ cust_relation = cust_relation.reset_index(drop=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 2. 品规信息
|
|
|
|
|
+ cust_relation[:20].to_csv(os.path.join(save_dir, "cust_relation.csv"), index=False, encoding='utf-8-sig')
|
|
|
|
|
+ with open(os.path.join(save_dir, "product_info.csv"), "w", encoding='utf-8-sig') as f:
|
|
|
|
|
+ for key, value in filter_dict.items():
|
|
|
|
|
+ if key != 'product_code':
|
|
|
|
|
+ f.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
|
|
|
|
|
|
|
|
- save_dir = "./data"
|
|
|
|
|
- data = pd.read_csv("./data/feats_interaction.csv")
|
|
|
|
|
- product_data = dao.get_product_by_id("00000000000000000000000011445301", "430201")[ProductConfig.FEATURE_COLUMNS]
|
|
|
|
|
- filter_dict = product_data.to_dict("records")[0]
|
|
|
|
|
- split_relation_subtable(data, filter_dict, save_dir)
|
|
|
|
|
|
|
+
|
|
|
|
|
+def get_cust_list_from_history_order(city_uuid, product_code):
|
|
|
|
|
+ order_data = dao.get_order_by_product(city_uuid, product_code)
|
|
|
|
|
+ return order_data
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ order_data = get_cust_list_from_history_order("00000000000000000000000011445301", "350139")
|
|
|
|
|
+ order_data.to_csv("./data/history.csv", index=False)
|