|
@@ -5,6 +5,7 @@ from models.item2vec.inference import Item2VecModel
|
|
|
from models.rank.data.config import CustConfig, ProductConfig, ShopConfig
|
|
from models.rank.data.config import CustConfig, ProductConfig, ShopConfig
|
|
|
from models.rank.data.utils import sample_data_clear
|
|
from models.rank.data.utils import sample_data_clear
|
|
|
from models.rank.gbdt_lr_inference import GbdtLrModel
|
|
from models.rank.gbdt_lr_inference import GbdtLrModel
|
|
|
|
|
+import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
class Recommend:
|
|
class Recommend:
|
|
@@ -33,7 +34,7 @@ class Recommend:
|
|
|
recall_list = self._redis.zrevrange(key, 0, -1, withscores=False)
|
|
recall_list = self._redis.zrevrange(key, 0, -1, withscores=False)
|
|
|
return recall_list
|
|
return recall_list
|
|
|
|
|
|
|
|
- def _get_recal_cust(self, product_id, recall_count):
|
|
|
|
|
|
|
+ def get_recal_cust(self, product_id, recall_count):
|
|
|
"""通过协同过滤和热度召回,召回待推荐商户列表"""
|
|
"""通过协同过滤和热度召回,召回待推荐商户列表"""
|
|
|
itemcf_recall_list = self._get_itemcf_recall(product_id)
|
|
itemcf_recall_list = self._get_itemcf_recall(product_id)
|
|
|
hot_recall_list = self._get_hot_recall()
|
|
hot_recall_list = self._get_hot_recall()
|
|
@@ -48,11 +49,10 @@ class Recommend:
|
|
|
|
|
|
|
|
return result[:recall_count]
|
|
return result[:recall_count]
|
|
|
|
|
|
|
|
- def get_recommend_list_by_gbdtlr(self, product_id, recall_count=100, discovery_count=500):
|
|
|
|
|
|
|
+ def get_recommend_list_by_gbdtlr(self, product_id, recall_count=500):
|
|
|
"""根据gbdt_lr获取商户推荐列表"""
|
|
"""根据gbdt_lr获取商户推荐列表"""
|
|
|
# 获取召回的商户列表
|
|
# 获取召回的商户列表
|
|
|
- recall_cust_list = self._get_recal_cust(product_id, recall_count)
|
|
|
|
|
- print(len(recall_cust_list))
|
|
|
|
|
|
|
+ recall_cust_list = self.get_recal_cust(product_id, recall_count)
|
|
|
# 获取卷烟数据
|
|
# 获取卷烟数据
|
|
|
product_data = self._dao.get_product_by_id(self._city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
|
|
product_data = self._dao.get_product_by_id(self._city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
|
|
|
product_data = sample_data_clear(product_data, ProductConfig)
|
|
product_data = sample_data_clear(product_data, ProductConfig)
|
|
@@ -68,13 +68,47 @@ class Recommend:
|
|
|
|
|
|
|
|
# 获取推理用的feats_map
|
|
# 获取推理用的feats_map
|
|
|
feats_map = self._gbdtlr_model.generate_feats_map(product_data, cust_data)
|
|
feats_map = self._gbdtlr_model.generate_feats_map(product_data, cust_data)
|
|
|
- print(len(cust_data))
|
|
|
|
|
recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
|
|
recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
|
|
|
-
|
|
|
|
|
return recommend_list
|
|
return recommend_list
|
|
|
|
|
|
|
|
|
|
+ def get_recommend_list_by_item2vec(self, product_id, recall_count=500):
|
|
|
|
|
+ """根据item2vec获取商户推荐列表"""
|
|
|
|
|
+ recommend_list = self._item2vec_model.get_recommend_cust_list(product_id, top=recall_count)
|
|
|
|
|
+ recommend_list = recommend_list.drop(columns=["sale_qty"])
|
|
|
|
|
+ recommend_list = recommend_list.to_dict(orient='records')
|
|
|
|
|
+ return recommend_list[:recall_count]
|
|
|
|
|
+
|
|
|
|
|
+ def get_recommend_and_delivery(self, recommend_list, delivery_count=5000):
|
|
|
|
|
+ """根据推荐列表生成投放分配"""
|
|
|
|
|
+ recommend_data = pd.DataFrame(recommend_list)
|
|
|
|
|
+ # 1. 计算每个商户的理论应得数量(带小数)
|
|
|
|
|
+ recommend_data["delivery_float"] = (
|
|
|
|
|
+ recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 2. 向下取整得到基础配额
|
|
|
|
|
+ recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
|
|
|
|
|
+
|
|
|
|
|
+ # 3. 计算余数并排序
|
|
|
|
|
+ recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
|
|
|
|
|
+ recommend_data = recommend_data.sort_values("remainder", ascending=False)
|
|
|
|
|
+
|
|
|
|
|
+ # 4. 将剩余配额按余数从大到小分配
|
|
|
|
|
+ remaining = delivery_count - recommend_data["delivery_count"].sum()
|
|
|
|
|
+ recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
|
|
|
|
|
+
|
|
|
|
|
+ recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
|
|
|
|
|
+ recommend_data = recommend_data.sort_values("recommend_score", ascending=False)
|
|
|
|
|
+
|
|
|
|
|
+ recommend_data = recommend_data.to_dict(orient='records')
|
|
|
|
|
+ return recommend_data
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
city_uuid = "00000000000000000000000011445301"
|
|
city_uuid = "00000000000000000000000011445301"
|
|
|
- product_id = '110110'
|
|
|
|
|
|
|
+ product_id = '350139'
|
|
|
recommend = Recommend(city_uuid)
|
|
recommend = Recommend(city_uuid)
|
|
|
- recommend_list = recommend.get_recommend_list_by_gbdtlr(product_id)
|
|
|
|
|
|
|
+ recommend_list = recommend.get_recommend_list_by_item2vec(product_id)
|
|
|
|
|
+ recommend_data = recommend.get_recommend_and_delivery(recommend_list)
|
|
|
|
|
+ for i in recommend_data:
|
|
|
|
|
+ print(i)
|