from models.rank.data.config import ImportanceFeaturesMap, DeliveryConfig import os import pandas as pd def filter_data(data, filter_dict): """从shap结果中过滤特征相关性数据""" product_content = [] for key, value in filter_dict.items(): if key != 'product_code': product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})") data = data[data['product_feat'].isin(product_content)] return data def feats_relation_process(shap_result, product_content): """生成特征相关性分析报告""" # 筛选商户相关性排序结果 report = filter_data(shap_result, product_content).copy() cust_feats_sum = report.groupby("cust_feat")["relation"].sum() # 筛选出正相关性的cust_feat valid_cust_feats = cust_feats_sum[cust_feats_sum > 0].index.to_list() report = report[report["cust_feat"].isin(valid_cust_feats)] report = report.reset_index(drop=True) report = report.rename( columns = { "product_feat": "卷烟特征", "cust_feat": "商户特征", "relation": "相关性" } ) return report def build_recommend_report(recommend_data, recommend_cust_infos): """根据推荐数据生成推荐商户报告(不含投放量)""" recommend_data = pd.DataFrame(recommend_data) cust_ids = recommend_cust_infos.set_index("cust_code") before_join = set(recommend_data["cust_code"]) recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner") missing = before_join - set(recommend_data["cust_code"]) if missing: import logging logging.getLogger("utils.report").warning( f"build_recommend_report: {len(missing)} cust_codes not found in customer master and dropped from report: {missing}" ) recommend_data = recommend_data[["cust_code", "cust_name", "recommend_score"]] recommend_data = recommend_data.reset_index(drop=True) recommend_data.index = recommend_data.index + 1 recommend_data = recommend_data.reset_index() recommend_data = recommend_data.rename( columns={ "index": "推荐序号", "cust_code": "商户编号", "cust_name": "商户名称", "recommend_score": "推荐系数", } ) return recommend_data def eval_report_process_pre(eval_order_data, recommend_data): # 获取订单数据并处理 eval_order_data = eval_order_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "sale_amt"]] # 确保cust_code是字符串类型 eval_order_data["cust_code"] = eval_order_data["cust_code"].astype(str) eval_order_data = eval_order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].mean().reset_index() eval_order_data["sale_qty"] = eval_order_data["sale_qty"].round(0).astype(int) eval_order_data = eval_order_data.sort_values("sale_qty", ascending=False) # recommend_data = recommend_data.drop(columns=["sale_qty"]) # 确保recommend_data中的cust_code也是字符串类型 recommend_data["商户编号"] = recommend_data["商户编号"].astype(str) cust_ids = recommend_data.set_index("商户编号") # 执行合并操作 merge_data = eval_order_data.join(cust_ids, on="cust_code", how="left") merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号", "推荐系数"]] merge_data = merge_data.rename( columns={ "cust_code": "商户编号", "cust_name": "商户名称", "product_code": "卷烟编码", "product_name": "卷烟名称", "sale_qty": "月均销量" } ) return merge_data def eval_report_process(delivery_data, recommend_data): report = recommend_data.merge(delivery_data, left_on="商户编号", right_on="customer_code", how="left") report = report.drop(columns=["customer_code", "goods_code"]) report = report.rename(columns={ "retail_index_week": DeliveryConfig.FEATURES_MAP["retail_index_week"], "turnover_rate_collpoint": DeliveryConfig.FEATURES_MAP["turnover_rate_collpoint"], "turnover_rate_terminal": DeliveryConfig.FEATURES_MAP["turnover_rate_terminal"], "sale_qty": DeliveryConfig.FEATURES_MAP["sale_qty"], }) return report def split_relation_subtable(data, filter_dict, save_dir): """拆分卷烟商户特征相关性子表""" data = filter_data(data, filter_dict).copy() data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig') data['group_key'] = data["product_feat"].str.extract(r'^([^(]+)') grouped = data.groupby('group_key') sub_tables = { name: group.drop(columns=['group_key']).sort_values('relation', ascending=False) for name, group in grouped } for name, sub_data in sub_tables.items(): sub_data.to_csv(os.path.join(save_dir, f"{name}.csv"), index=False, encoding='utf-8-sig')