|
@@ -1,14 +1,16 @@
|
|
|
|
|
|
|
|
from database import RedisDatabaseHelper, MySqlDao
|
|
from database import RedisDatabaseHelper, MySqlDao
|
|
|
|
|
+from models.item2vec import Item2VecModel
|
|
|
from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig
|
|
from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig
|
|
|
from models.rank.data.utils import sample_data_clear
|
|
from models.rank.data.utils import sample_data_clear
|
|
|
from models.rank.gbdt_lr_inference import GbdtLrModel
|
|
from models.rank.gbdt_lr_inference import GbdtLrModel
|
|
|
-from utils.result_process import split_relation_subtable, generate_report
|
|
|
|
|
|
|
+from utils.result_process import get_cust_list_from_history_order, split_relation_subtable, generate_report
|
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
|
|
|
|
|
redis = RedisDatabaseHelper().redis
|
|
redis = RedisDatabaseHelper().redis
|
|
|
dao = MySqlDao()
|
|
dao = MySqlDao()
|
|
|
gbdtlr_model = GbdtLrModel("./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl")
|
|
gbdtlr_model = GbdtLrModel("./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl")
|
|
|
|
|
+item2vec = Item2VecModel("00000000000000000000000011445301")
|
|
|
|
|
|
|
|
def get_itemcf_recall(city_uuid, product_id):
|
|
def get_itemcf_recall(city_uuid, product_id):
|
|
|
"""协同召回"""
|
|
"""协同召回"""
|
|
@@ -23,7 +25,8 @@ def get_hot_recall(city_uuid):
|
|
|
return recall_list
|
|
return recall_list
|
|
|
|
|
|
|
|
def get_recall_cust(city_uuid, product_id, recall_count):
|
|
def get_recall_cust(city_uuid, product_id, recall_count):
|
|
|
- """根据协同过滤和热度召回召回商户"""
|
|
|
|
|
|
|
+ """根据协同过滤和热度召回召回商户
|
|
|
|
|
+ """
|
|
|
itemcf_recall_list = get_itemcf_recall(city_uuid, product_id)
|
|
itemcf_recall_list = get_itemcf_recall(city_uuid, product_id)
|
|
|
hot_recall_list = get_hot_recall(city_uuid)
|
|
hot_recall_list = get_hot_recall(city_uuid)
|
|
|
|
|
|
|
@@ -39,11 +42,18 @@ def get_recall_cust(city_uuid, product_id, recall_count):
|
|
|
|
|
|
|
|
def generate_recommend_sample(city_uuid, product_id):
|
|
def generate_recommend_sample(city_uuid, product_id):
|
|
|
"""生成预测数据集"""
|
|
"""生成预测数据集"""
|
|
|
- recall_count = 1000
|
|
|
|
|
- cust_list = get_recall_cust(city_uuid, product_id, recall_count)
|
|
|
|
|
|
|
+ product_in_order = dao.get_product_from_order(city_uuid)["product_code"].unique().tolist()
|
|
|
|
|
+ if product_id in product_in_order:
|
|
|
|
|
+ recall_count = 1000
|
|
|
|
|
+ cust_list = get_recall_cust(city_uuid, product_id, recall_count)
|
|
|
|
|
+ else:
|
|
|
|
|
+ cust_list = item2vec.get_recommend_cust_list(product_id)["cust_code"].to_list()
|
|
|
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 获取卷烟的信息
|
|
|
product_data = dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
|
|
product_data = dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
|
|
|
filter_dict = product_data.to_dict("records")[0]
|
|
filter_dict = product_data.to_dict("records")[0]
|
|
|
|
|
+
|
|
|
cust_data = dao.get_cust_by_ids(city_uuid, cust_list)[CustConfig.FEATURE_COLUMNS]
|
|
cust_data = dao.get_cust_by_ids(city_uuid, cust_list)[CustConfig.FEATURE_COLUMNS]
|
|
|
shop_data = dao.get_shop_by_ids(city_uuid, cust_list)[ShopConfig.FEATURE_COLUMNS]
|
|
shop_data = dao.get_shop_by_ids(city_uuid, cust_list)[ShopConfig.FEATURE_COLUMNS]
|
|
|
|
|
|
|
@@ -58,7 +68,8 @@ def generate_recommend_sample(city_uuid, product_id):
|
|
|
|
|
|
|
|
return feats_map, filter_dict, cust_list
|
|
return feats_map, filter_dict, cust_list
|
|
|
|
|
|
|
|
-def get_recommend_list(city_uuid, product_id):
|
|
|
|
|
|
|
+def get_recommend_list_by_gbdt_lr(city_uuid, product_id):
|
|
|
|
|
+ """根据gbdt-lr进行打分并获得推荐列表,适用于推荐历史订单中存在的卷烟"""
|
|
|
feats_sample, _, cust_list = generate_recommend_sample(city_uuid, product_id)
|
|
feats_sample, _, cust_list = generate_recommend_sample(city_uuid, product_id)
|
|
|
recommend_list = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
|
|
recommend_list = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
|
|
|
return recommend_list
|
|
return recommend_list
|
|
@@ -69,11 +80,19 @@ def gbdt_lr_inference(city_uuid, product_id):
|
|
|
|
|
|
|
|
def generate_features_shap(city_uuid, product_id, delivery_count):
|
|
def generate_features_shap(city_uuid, product_id, delivery_count):
|
|
|
feats_sample, filter_dict, cust_list = generate_recommend_sample(city_uuid, product_id)
|
|
feats_sample, filter_dict, cust_list = generate_recommend_sample(city_uuid, product_id)
|
|
|
- result = gbdtlr_model.generate_shap_interance(feats_sample)
|
|
|
|
|
|
|
|
|
|
- recommend_data = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
|
|
|
|
|
|
|
+ if product_id in dao.get_product_from_order(city_uuid)["product_code"].unique().tolist():
|
|
|
|
|
+ # 如果推荐商品为新卷烟,走iterm2vec
|
|
|
|
|
+ recommend_data = gbdtlr_model.get_recommend_list(feats_sample, cust_list)
|
|
|
|
|
+ else:
|
|
|
|
|
+ recommend_data = item2vec.get_recommend_cust_list(product_id).to_dict("records")
|
|
|
|
|
+ result = gbdtlr_model.generate_shap_interance(feats_sample)
|
|
|
generate_report(city_uuid, result, filter_dict, recommend_data, delivery_count, "./data")
|
|
generate_report(city_uuid, result, filter_dict, recommend_data, delivery_count, "./data")
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+def eval(city_uuid, product_code):
|
|
|
|
|
+ """推荐效果验证"""
|
|
|
|
|
+ eval_report = get_cust_list_from_history_order(city_uuid, product_code)
|
|
|
|
|
+ eval_report.to_csv("./data/eval.csv", index=False)
|
|
|
|
|
|
|
|
def generate_delivery_strategy():
|
|
def generate_delivery_strategy():
|
|
|
|
|
|
|
@@ -83,8 +102,9 @@ def run():
|
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
|
- generate_features_shap("00000000000000000000000011445301", "420202", delivery_count=5000)
|
|
|
|
|
- # recommend_list = get_recommend_list("00000000000000000000000011445301", "420202")
|
|
|
|
|
|
|
+ # generate_features_shap("00000000000000000000000011445301", "350139", delivery_count=5000)
|
|
|
|
|
+ eval("00000000000000000000000011445301", "350355")
|
|
|
|
|
+ # recommend_list = get_recommend_list_by_gbdt_lr("00000000000000000000000011445301", "350139")
|
|
|
# recommend_list = pd.DataFrame(recommend_list)
|
|
# recommend_list = pd.DataFrame(recommend_list)
|
|
|
# recommend_list.to_csv("./data/recommend_list.csv", index=False, encoding="utf-8-sig")
|
|
# recommend_list.to_csv("./data/recommend_list.csv", index=False, encoding="utf-8-sig")
|
|
|
|
|
|