|
|
@@ -3,6 +3,7 @@ import numpy as np
|
|
|
|
|
|
from itertools import combinations
|
|
|
from dao.mysql_client import Mysql
|
|
|
+from tqdm import tqdm
|
|
|
|
|
|
def load_data_from_dataset():
|
|
|
"""从数据库中读取数据"""
|
|
|
@@ -44,9 +45,31 @@ def build_co_occurence_matrix(order_data):
|
|
|
# 更新共现矩阵
|
|
|
co_occurrence_matrix[idx1, idx2] += 1
|
|
|
co_occurrence_matrix[idx2, idx1] += 1
|
|
|
- return co_occurrence_matrix, shops
|
|
|
+ return co_occurrence_matrix, shops, shops_to_index
|
|
|
|
|
|
-def save_co_occurrence_matrix(matrix, shops, save_path):
|
|
|
+def calculate_similarity_matrix(co_occurrence_matrix, order_data, shops_to_index):
|
|
|
+ """
|
|
|
+ 使用向量计算商铺之间的相似度矩阵
|
|
|
+ """
|
|
|
+ # 计算每个商铺售卖品规的总次数
|
|
|
+ shop_counts = order_data.groupby("BB_RETAIL_CUSTOMER_CODE").size()
|
|
|
+ num_shops = len(shops_to_index)
|
|
|
+
|
|
|
+ # 将商户售卖次数转换为数组
|
|
|
+ counts = np.array([shop_counts[shop] for shop in shops_to_index.keys()])
|
|
|
+
|
|
|
+ # 计算分母部分 (sqrt(count_i * count_j))
|
|
|
+ denominator = np.sqrt(np.outer(counts, counts))
|
|
|
+
|
|
|
+ # 计算相似度矩阵
|
|
|
+ similarity_matrix = co_occurrence_matrix / denominator
|
|
|
+
|
|
|
+ # 将对角线设置为1
|
|
|
+ np.fill_diagonal(similarity_matrix, 1.0)
|
|
|
+
|
|
|
+ return similarity_matrix
|
|
|
+
|
|
|
+def save_matrix(matrix, shops, save_path):
|
|
|
"""
|
|
|
保存共现矩阵
|
|
|
"""
|
|
|
@@ -54,10 +77,13 @@ def save_co_occurrence_matrix(matrix, shops, save_path):
|
|
|
matrix_df.to_csv(save_path, index=True, encoding="utf-8")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- save_path = "./models/recall/itemCF/matrix/occurrence.csv"
|
|
|
+ co_occurrence_save_path = "./models/recall/itemCF/matrix/occurrence.csv"
|
|
|
+ similarity_matrix_save_path = "./models/recall/itemCF/matrix/similarity.csv"
|
|
|
order_data = load_data_from_dataset()
|
|
|
|
|
|
- co_occurrence_matrix, shops = build_co_occurence_matrix(order_data)
|
|
|
+ co_occurrence_matrix, shops, shops_to_index = build_co_occurence_matrix(order_data)
|
|
|
|
|
|
- save_co_occurrence_matrix(co_occurrence_matrix, shops, save_path)
|
|
|
+ # save_matrix(co_occurrence_matrix, shops, co_occurrence_save_path)
|
|
|
+ similarity_matrix = calculate_similarity_matrix(co_occurrence_matrix, order_data, shops_to_index)
|
|
|
+ save_matrix(similarity_matrix, shops, similarity_matrix_save_path)
|
|
|
|