|
@@ -1,162 +1,3 @@
|
|
|
|
|
+from database.db.redis_db import RedisDatabaseHelper
|
|
|
|
|
|
|
|
-from database import RedisDatabaseHelper, MySqlDao
|
|
|
|
|
-from models.item2vec import Item2VecModel
|
|
|
|
|
-from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig
|
|
|
|
|
-from models.rank.data.utils import sample_data_clear
|
|
|
|
|
-from models.rank.gbdt_lr_inference import GbdtLrModel
|
|
|
|
|
-from utils.result_process import get_cust_list_from_history_order, split_relation_subtable, generate_report
|
|
|
|
|
-import pandas as pd
|
|
|
|
|
|
|
|
|
|
-redis = RedisDatabaseHelper().redis
|
|
|
|
|
-dao = MySqlDao()
|
|
|
|
|
-gbdtlr_model = GbdtLrModel("./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl")
|
|
|
|
|
-item2vec = Item2VecModel("00000000000000000000000011445301")
|
|
|
|
|
-
|
|
|
|
|
-def get_itemcf_recall(city_uuid, product_id):
|
|
|
|
|
- """协同召回"""
|
|
|
|
|
- key = f"fc:{city_uuid}:{product_id}"
|
|
|
|
|
- recall_list = redis.zrevrange(key, 0, -1, withscores=False)
|
|
|
|
|
- return recall_list
|
|
|
|
|
-
|
|
|
|
|
-def get_hot_recall(city_uuid):
|
|
|
|
|
- """热度召回"""
|
|
|
|
|
- key = f"hot:{city_uuid}:sale_qty"
|
|
|
|
|
- recall_list = redis.zrevrange(key, 0, -1, withscores=False)
|
|
|
|
|
- return recall_list
|
|
|
|
|
-
|
|
|
|
|
-def get_recall_cust(city_uuid, product_id, recall_count):
|
|
|
|
|
- """根据协同过滤和热度召回召回商户
|
|
|
|
|
- """
|
|
|
|
|
- itemcf_recall_list = get_itemcf_recall(city_uuid, product_id)
|
|
|
|
|
- hot_recall_list = get_hot_recall(city_uuid)
|
|
|
|
|
-
|
|
|
|
|
- result = list(dict.fromkeys(itemcf_recall_list))
|
|
|
|
|
-
|
|
|
|
|
- # 如果结果不足,从hot_recall中补齐
|
|
|
|
|
- if len(result) < recall_count:
|
|
|
|
|
- hot_recall_set = set(hot_recall_list) - set(result)
|
|
|
|
|
- additional_items = [item for item in hot_recall_list if item in hot_recall_set]
|
|
|
|
|
- needed = recall_count - len(result)
|
|
|
|
|
- result.extend(additional_items[:needed])
|
|
|
|
|
- return result[:recall_count]
|
|
|
|
|
-
|
|
|
|
|
-def generate_recommend_sample(city_uuid, product_id):
|
|
|
|
|
- """生成预测数据集"""
|
|
|
|
|
- product_in_order = dao.get_product_from_order(city_uuid)["product_code"].unique().tolist()
|
|
|
|
|
- if product_id in product_in_order:
|
|
|
|
|
- recall_count = 1000
|
|
|
|
|
- cust_list = get_recall_cust(city_uuid, product_id, recall_count)
|
|
|
|
|
- else:
|
|
|
|
|
- cust_list = item2vec.get_recommend_cust_list(product_id)["cust_code"].to_list()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- # 获取卷烟的信息
|
|
|
|
|
- product_data = dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
|
|
|
|
|
- filter_dict = product_data.to_dict("records")[0]
|
|
|
|
|
-
|
|
|
|
|
- cust_data = dao.get_cust_by_ids(city_uuid, cust_list)[CustConfig.FEATURE_COLUMNS]
|
|
|
|
|
- shop_data = dao.get_shop_by_ids(city_uuid, cust_list)[ShopConfig.FEATURE_COLUMNS]
|
|
|
|
|
-
|
|
|
|
|
- product_data = sample_data_clear(product_data, ProductConfig)
|
|
|
|
|
- cust_data = sample_data_clear(cust_data, CustConfig)
|
|
|
|
|
- shop_data = sample_data_clear(shop_data, ShopConfig)
|
|
|
|
|
-
|
|
|
|
|
- cust_feats = shop_data.set_index("cust_code")
|
|
|
|
|
- cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
|
|
|
|
|
-
|
|
|
|
|
- feats_map = gbdtlr_model.generate_feats_map(product_data, cust_data)
|
|
|
|
|
-
|
|
|
|
|
- return feats_map, filter_dict, cust_list
|
|
|
|
|
-
|
|
|
|
|
-def get_recommend_list_by_gbdt_lr(city_uuid, product_id):
|
|
|
|
|
- """根据gbdt-lr进行打分并获得推荐列表,适用于推荐历史订单中存在的卷烟"""
|
|
|
|
|
- feats_sample, _, cust_list = generate_recommend_sample(city_uuid, product_id)
|
|
|
|
|
- recommend_list = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
|
|
|
|
|
- return recommend_list
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-def gbdt_lr_inference(city_uuid, product_id):
|
|
|
|
|
- pass
|
|
|
|
|
-
|
|
|
|
|
-def generate_features_shap(city_uuid, product_id, delivery_count):
|
|
|
|
|
- feats_sample, filter_dict, cust_list = generate_recommend_sample(city_uuid, product_id)
|
|
|
|
|
-
|
|
|
|
|
- if product_id in dao.get_product_from_order(city_uuid)["product_code"].unique().tolist():
|
|
|
|
|
- # 如果推荐商品为新卷烟,走iterm2vec
|
|
|
|
|
- recommend_data = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
|
|
|
|
|
- else:
|
|
|
|
|
- recommend_data = item2vec.get_recommend_cust_list(product_id).to_dict("records")
|
|
|
|
|
- result = gbdtlr_model.generate_shap_interance(feats_sample)
|
|
|
|
|
- generate_report(city_uuid, result, filter_dict, recommend_data, delivery_count, "./data")
|
|
|
|
|
-
|
|
|
|
|
-def eval(city_uuid, product_code):
|
|
|
|
|
- """推荐效果验证"""
|
|
|
|
|
- eval_report = get_cust_list_from_history_order(city_uuid, product_code)
|
|
|
|
|
- eval_report.to_csv("./data/效果验证表.csv", index=False)
|
|
|
|
|
-
|
|
|
|
|
-def generate_similarity_product(product_code):
|
|
|
|
|
- product_similarity_map = item2vec.generate_product_similarity_map(product_code)
|
|
|
|
|
- product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
|
|
|
|
|
- product_similarity_map = product_similarity_map.rename(
|
|
|
|
|
- columns={
|
|
|
|
|
- "product_name": "卷烟名称",
|
|
|
|
|
- "similarity": "相似度",
|
|
|
|
|
- "factory_name": "生产厂商",
|
|
|
|
|
- "brand_name": "品牌名称",
|
|
|
|
|
- "is_low_tar": "低焦油卷烟",
|
|
|
|
|
- "is_medium": "中支烟",
|
|
|
|
|
- "is_tiny": "细支烟",
|
|
|
|
|
- "is_coarse": "粗支烟",
|
|
|
|
|
- "is_exploding_beads": "爆珠烟",
|
|
|
|
|
- "is_abnormity": "异形包装",
|
|
|
|
|
- "is_cig": "雪茄烟",
|
|
|
|
|
- "is_chuangxin": "创新品类",
|
|
|
|
|
- "direct_retail_price": "卷烟建议零售价",
|
|
|
|
|
- "tbc_total_length": "烟支总长度",
|
|
|
|
|
- "product_style": "包装类型",
|
|
|
|
|
- }
|
|
|
|
|
- )
|
|
|
|
|
- product_similarity_map.to_excel("./data/相似卷烟表.xlsx", index=False)
|
|
|
|
|
-
|
|
|
|
|
-def generate_delivery_strategy():
|
|
|
|
|
-
|
|
|
|
|
- pass
|
|
|
|
|
-
|
|
|
|
|
-def run():
|
|
|
|
|
- pass
|
|
|
|
|
-
|
|
|
|
|
-if __name__ == '__main__':
|
|
|
|
|
- generate_features_shap("00000000000000000000000011445301", "350139", delivery_count=5000)
|
|
|
|
|
- generate_similarity_product("350139")
|
|
|
|
|
- eval("00000000000000000000000011445301", "350355")
|
|
|
|
|
-
|
|
|
|
|
- # recommend_list = get_recommend_list_by_gbdt_lr("00000000000000000000000011445301", "350139")
|
|
|
|
|
- # recommend_list = pd.DataFrame(recommend_list)
|
|
|
|
|
- # recommend_list.to_csv("./data/recommend_list.csv", index=False, encoding="utf-8-sig")
|
|
|
|
|
-
|
|
|
|
|
- # 拿龙军数据
|
|
|
|
|
- # data = dao.get_order_by_cust("00000000000000000000000011445301", "445323105795")
|
|
|
|
|
- # data = data.groupby(["cust_code", "product_code", "product_name"], as_index=False)["sale_qty"].sum()
|
|
|
|
|
- # data.to_csv("./data/cust.csv", index=False)
|
|
|
|
|
-
|
|
|
|
|
- # city_uuid = "00000000000000000000000011445301"
|
|
|
|
|
- # order_data = dao.get_order_by_cust("00000000000000000000000011445301", "445323105795")
|
|
|
|
|
- # order_data["sale_qty"] = order_data["sale_qty"].fillna(0)
|
|
|
|
|
- # order_data = order_data.infer_objects(copy=False)
|
|
|
|
|
- # order_data = order_data.groupby(["cust_code", "product_code", "product_name"], as_index=False)["sale_qty"].sum()
|
|
|
|
|
-
|
|
|
|
|
- # cust_data = dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS]
|
|
|
|
|
- # sample_data_clear(cust_data, CustConfig)
|
|
|
|
|
- # shop_data = dao.load_shopping_data(city_uuid)[ShopConfig.FEATURE_COLUMNS]
|
|
|
|
|
- # sample_data_clear(shop_data, ShopConfig)
|
|
|
|
|
- # cust_ids = shop_data.set_index("cust_code")
|
|
|
|
|
- # cust_data = cust_data.join(cust_ids, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
|
|
|
|
|
-
|
|
|
|
|
- # product_data = dao.load_product_data(city_uuid)[ProductConfig.FEATURE_COLUMNS]
|
|
|
|
|
- # sample_data_clear(product_data, ProductConfig)
|
|
|
|
|
-
|
|
|
|
|
- # order_data = order_data.merge(product_data, on="product_code", how="inner")
|
|
|
|
|
- # order_data = order_data.merge(cust_data, left_on='cust_code', right_on='BB_RETAIL_CUSTOMER_CODE', how="inner")
|
|
|
|
|
-
|
|
|
|
|
- # result = gbdtlr_model.inference_from_sample(order_data)
|
|
|
|
|
- # result.to_csv("./data/junlong.csv", index=False)
|
|
|