| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- from database import RedisDatabaseHelper, MySqlDao
- from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig
- from models.rank.data.utils import sample_data_clear
- from models.rank.gbdt_lr_inference import GbdtLrModel
- from utils.result_process import split_relation_subtable, generate_report
- import pandas as pd
- redis = RedisDatabaseHelper().redis
- dao = MySqlDao()
- gbdtlr_model = GbdtLrModel("./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl")
- def get_itemcf_recall(city_uuid, product_id):
- """协同召回"""
- key = f"fc:{city_uuid}:{product_id}"
- recall_list = redis.zrevrange(key, 0, -1, withscores=False)
- return recall_list
- def get_hot_recall(city_uuid):
- """热度召回"""
- key = f"hot:{city_uuid}:sale_qty"
- recall_list = redis.zrevrange(key, 0, -1, withscores=False)
- return recall_list
- def get_recall_cust(city_uuid, product_id, recall_count):
- """根据协同过滤和热度召回召回商户"""
- itemcf_recall_list = get_itemcf_recall(city_uuid, product_id)
- hot_recall_list = get_hot_recall(city_uuid)
-
- result = list(dict.fromkeys(itemcf_recall_list))
-
- # 如果结果不足,从hot_recall中补齐
- if len(result) < recall_count:
- hot_recall_set = set(hot_recall_list) - set(result)
- additional_items = [item for item in hot_recall_list if item in hot_recall_set]
- needed = recall_count - len(result)
- result.extend(additional_items[:needed])
- return result[:recall_count]
- def generate_recommend_sample(city_uuid, product_id):
- """生成预测数据集"""
- recall_count = 300
- cust_list = get_recall_cust(city_uuid, product_id, recall_count)
-
- product_data = dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
- filter_dict = product_data.to_dict("records")[0]
- cust_data = dao.get_cust_by_ids(city_uuid, cust_list)[CustConfig.FEATURE_COLUMNS]
- shop_data = dao.get_shop_by_ids(city_uuid, cust_list)[ShopConfig.FEATURE_COLUMNS]
-
- product_data = sample_data_clear(product_data, ProductConfig)
- cust_data = sample_data_clear(cust_data, CustConfig)
- shop_data = sample_data_clear(shop_data, ShopConfig)
-
- cust_feats = shop_data.set_index("cust_code")
- cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
-
- feats_map = gbdtlr_model.generate_feats_map(product_data, cust_data)
-
- return feats_map, filter_dict, cust_list
- def get_recommend_list(city_uuid, product_id):
- feats_sample, _, cust_list = generate_recommend_sample(city_uuid, product_id)
- recommend_list = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
- return recommend_list
-
- def gbdt_lr_inference(city_uuid, product_id):
- pass
- def generate_features_shap(city_uuid, product_id, delivery_count):
- feats_sample, filter_dict, cust_list = generate_recommend_sample(city_uuid, product_id)
- result = gbdtlr_model.generate_shap_interance(feats_sample)
-
- recommend_data = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
- generate_report(city_uuid, result, filter_dict, recommend_data, delivery_count, "./data")
-
- def generate_delivery_strategy():
-
- pass
- def run():
- pass
- if __name__ == '__main__':
- generate_features_shap("00000000000000000000000011445301", "350139", delivery_count=5000)
- # recommend_list = get_recommend_list("00000000000000000000000011445301", "420202")
- # recommend_list = pd.DataFrame(recommend_list)
- # recommend_list.to_csv("./data/recommend_list.csv", index=False, encoding="utf-8-sig")
- # data = dao.get_order_by_cust("00000000000000000000000011445301", "445381107139")
- # data = data.groupby(["cust_code", "product_code", "product_name"], as_index=False)["sale_qty"].sum()
- # data.to_csv("./data/cust.csv", index=False)
|