from database import RedisDatabaseHelper, MySqlDao from models.item2vec import Item2VecModel from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig from models.rank.data.utils import sample_data_clear from models.rank.gbdt_lr_inference import GbdtLrModel from utils.result_process import get_cust_list_from_history_order, split_relation_subtable, generate_report import pandas as pd redis = RedisDatabaseHelper().redis dao = MySqlDao() gbdtlr_model = GbdtLrModel("./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl") item2vec = Item2VecModel("00000000000000000000000011445301") def get_itemcf_recall(city_uuid, product_id): """协同召回""" key = f"fc:{city_uuid}:{product_id}" recall_list = redis.zrevrange(key, 0, -1, withscores=False) return recall_list def get_hot_recall(city_uuid): """热度召回""" key = f"hot:{city_uuid}:sale_qty" recall_list = redis.zrevrange(key, 0, -1, withscores=False) return recall_list def get_recall_cust(city_uuid, product_id, recall_count): """根据协同过滤和热度召回召回商户 """ itemcf_recall_list = get_itemcf_recall(city_uuid, product_id) hot_recall_list = get_hot_recall(city_uuid) result = list(dict.fromkeys(itemcf_recall_list)) # 如果结果不足,从hot_recall中补齐 if len(result) < recall_count: hot_recall_set = set(hot_recall_list) - set(result) additional_items = [item for item in hot_recall_list if item in hot_recall_set] needed = recall_count - len(result) result.extend(additional_items[:needed]) return result[:recall_count] def generate_recommend_sample(city_uuid, product_id): """生成预测数据集""" product_in_order = dao.get_product_from_order(city_uuid)["product_code"].unique().tolist() if product_id in product_in_order: recall_count = 1000 cust_list = get_recall_cust(city_uuid, product_id, recall_count) else: cust_list = item2vec.get_recommend_cust_list(product_id)["cust_code"].to_list() # 获取卷烟的信息 product_data = dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS] filter_dict = product_data.to_dict("records")[0] cust_data = dao.get_cust_by_ids(city_uuid, cust_list)[CustConfig.FEATURE_COLUMNS] shop_data = dao.get_shop_by_ids(city_uuid, cust_list)[ShopConfig.FEATURE_COLUMNS] product_data = sample_data_clear(product_data, ProductConfig) cust_data = sample_data_clear(cust_data, CustConfig) shop_data = sample_data_clear(shop_data, ShopConfig) cust_feats = shop_data.set_index("cust_code") cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner") feats_map = gbdtlr_model.generate_feats_map(product_data, cust_data) return feats_map, filter_dict, cust_list def get_recommend_list_by_gbdt_lr(city_uuid, product_id): """根据gbdt-lr进行打分并获得推荐列表,适用于推荐历史订单中存在的卷烟""" feats_sample, _, cust_list = generate_recommend_sample(city_uuid, product_id) recommend_list = gbdtlr_model.get_recommend_list(feats_sample, cust_list) return recommend_list def gbdt_lr_inference(city_uuid, product_id): pass def generate_features_shap(city_uuid, product_id, delivery_count): feats_sample, filter_dict, cust_list = generate_recommend_sample(city_uuid, product_id) if product_id in dao.get_product_from_order(city_uuid)["product_code"].unique().tolist(): # 如果推荐商品为新卷烟,走iterm2vec recommend_data = gbdtlr_model.get_recommend_list(feats_sample, cust_list) else: recommend_data = item2vec.get_recommend_cust_list(product_id).to_dict("records") result = gbdtlr_model.generate_shap_interance(feats_sample) generate_report(city_uuid, result, filter_dict, recommend_data, delivery_count, "./data") def eval(city_uuid, product_code): """推荐效果验证""" eval_report = get_cust_list_from_history_order(city_uuid, product_code) eval_report.to_csv("./data/eval.csv", index=False) def generate_delivery_strategy(): pass def run(): pass if __name__ == '__main__': # generate_features_shap("00000000000000000000000011445301", "350139", delivery_count=5000) eval("00000000000000000000000011445301", "350355") # recommend_list = get_recommend_list_by_gbdt_lr("00000000000000000000000011445301", "350139") # recommend_list = pd.DataFrame(recommend_list) # recommend_list.to_csv("./data/recommend_list.csv", index=False, encoding="utf-8-sig") # 拿龙军数据 # data = dao.get_order_by_cust("00000000000000000000000011445301", "445323105795") # data = data.groupby(["cust_code", "product_code", "product_name"], as_index=False)["sale_qty"].sum() # data.to_csv("./data/cust.csv", index=False) # city_uuid = "00000000000000000000000011445301" # order_data = dao.get_order_by_cust("00000000000000000000000011445301", "445323105795") # order_data["sale_qty"] = order_data["sale_qty"].fillna(0) # order_data = order_data.infer_objects(copy=False) # order_data = order_data.groupby(["cust_code", "product_code", "product_name"], as_index=False)["sale_qty"].sum() # cust_data = dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS] # sample_data_clear(cust_data, CustConfig) # shop_data = dao.load_shopping_data(city_uuid)[ShopConfig.FEATURE_COLUMNS] # sample_data_clear(shop_data, ShopConfig) # cust_ids = shop_data.set_index("cust_code") # cust_data = cust_data.join(cust_ids, on="BB_RETAIL_CUSTOMER_CODE", how="inner") # product_data = dao.load_product_data(city_uuid)[ProductConfig.FEATURE_COLUMNS] # sample_data_clear(product_data, ProductConfig) # order_data = order_data.merge(product_data, on="product_code", how="inner") # order_data = order_data.merge(cust_data, left_on='cust_code', right_on='BB_RETAIL_CUSTOMER_CODE', how="inner") # result = gbdtlr_model.inference_from_sample(order_data) # result.to_csv("./data/junlong.csv", index=False)