|
@@ -19,7 +19,7 @@ class ItemCFModel:
|
|
|
|
|
|
|
|
def process_product(product_code, scores):
|
|
def process_product(product_code, scores):
|
|
|
# 获取热度最高的n个商户
|
|
# 获取热度最高的n个商户
|
|
|
- top_n_shops = scores.nlargest(n, "SCORE")["BB_RETAIL_CUSTOMER_CODE"].values
|
|
|
|
|
|
|
+ top_n_shops = scores.nlargest(n, "score")["cust_code"].values
|
|
|
top_n_indices = [self._shop_index[shop] for shop in top_n_shops]
|
|
top_n_indices = [self._shop_index[shop] for shop in top_n_shops]
|
|
|
|
|
|
|
|
# 找到每个商户最相似的k个商户
|
|
# 找到每个商户最相似的k个商户
|
|
@@ -40,7 +40,7 @@ class ItemCFModel:
|
|
|
interest_score = 0
|
|
interest_score = 0
|
|
|
for shop_idx in top_n_indices:
|
|
for shop_idx in top_n_indices:
|
|
|
if self._index_shop[candidate_idx] in similar_shops[self._index_shop[shop_idx]]:
|
|
if self._index_shop[candidate_idx] in similar_shops[self._index_shop[shop_idx]]:
|
|
|
- shop_score = scores[scores["BB_RETAIL_CUSTOMER_CODE"]==self._index_shop[shop_idx]]["SCORE"].values[0]
|
|
|
|
|
|
|
+ shop_score = scores[scores["cust_code"]==self._index_shop[shop_idx]]["score"].values[0]
|
|
|
interest_score += shop_score * self._similarity_matrix[shop_idx, candidate_idx]
|
|
interest_score += shop_score * self._similarity_matrix[shop_idx, candidate_idx]
|
|
|
interest_scores[self._index_shop[candidate_idx]] = interest_score
|
|
interest_scores[self._index_shop[candidate_idx]] = interest_score
|
|
|
|
|
|
|
@@ -52,7 +52,7 @@ class ItemCFModel:
|
|
|
|
|
|
|
|
# 并行处理每个品规
|
|
# 并行处理每个品规
|
|
|
results = Parallel(n_jobs=n_jobs)(delayed(process_product)(product_code, scores)
|
|
results = Parallel(n_jobs=n_jobs)(delayed(process_product)(product_code, scores)
|
|
|
- for product_code, scores in tqdm(self._score_df.groupby("PRODUCT_CODE"), desc="train:正在计算候选得分"))
|
|
|
|
|
|
|
+ for product_code, scores in tqdm(self._score_df.groupby("product_code"), desc="train:正在计算候选得分"))
|
|
|
print(len(results))
|
|
print(len(results))
|
|
|
# 存储结果
|
|
# 存储结果
|
|
|
self._recommendations = {product_code: sorted_candidates for product_code, sorted_candidates in results}
|
|
self._recommendations = {product_code: sorted_candidates for product_code, sorted_candidates in results}
|
|
@@ -85,10 +85,10 @@ class ItemCFModel:
|
|
|
redis_db.redis.zadd(redis_key, zset_data)
|
|
redis_db.redis.zadd(redis_key, zset_data)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
|
- score_path = "./models/recall/itemCF/matrix/score.csv"
|
|
|
|
|
- similarity_path = "./models/recall/itemCF/matrix/similarity.csv"
|
|
|
|
|
- # itemcf_model = ItemCFModel()
|
|
|
|
|
- # itemcf_model.train(score_path, similarity_path, n_jobs=4)
|
|
|
|
|
|
|
+ score_path = "./data/itemcf/scores.csv"
|
|
|
|
|
+ similarity_path = "./data/itemcf/similarity.csv"
|
|
|
|
|
+ itemcf_model = ItemCFModel()
|
|
|
|
|
+ itemcf_model.train(score_path, similarity_path, "00000000000000000000000011445301", n_jobs=4)
|
|
|
# recommend_list = itemcf_model.inference(110111)
|
|
# recommend_list = itemcf_model.inference(110111)
|
|
|
# itemcf_model.to_redis_zset()
|
|
# itemcf_model.to_redis_zset()
|
|
|
# print(len(recommend_list))
|
|
# print(len(recommend_list))
|
|
@@ -98,6 +98,4 @@ if __name__ == "__main__":
|
|
|
# model = joblib.load("./itemCF.model")
|
|
# model = joblib.load("./itemCF.model")
|
|
|
# recommend_list = model.inference(110102)
|
|
# recommend_list = model.inference(110102)
|
|
|
# print(len(recommend_list))
|
|
# print(len(recommend_list))
|
|
|
- # print(recommend_list)
|
|
|
|
|
- data = pd.read_csv(similarity_path, index_col=0)
|
|
|
|
|
- print(data)
|
|
|
|
|
|
|
+ # print(recommend_list)
|