|
@@ -0,0 +1,63 @@
|
|
|
|
|
+import pandas as pd
|
|
|
|
|
+import numpy as np
|
|
|
|
|
+
|
|
|
|
|
+from itertools import combinations
|
|
|
|
|
+from dao.mysql_client import Mysql
|
|
|
|
|
+
|
|
|
|
|
+def load_data_from_dataset():
|
|
|
|
|
+ """从数据库中读取数据"""
|
|
|
|
|
+ client = Mysql()
|
|
|
|
|
+ tablename = "mock_order"
|
|
|
|
|
+ query_text = "*"
|
|
|
|
|
+
|
|
|
|
|
+ df = client.load_data(tablename, query_text)
|
|
|
|
|
+
|
|
|
|
|
+ # 去除重复值和填补缺失值
|
|
|
|
|
+ df.drop_duplicates(inplace=True)
|
|
|
|
|
+ df.fillna(0, inplace=True)
|
|
|
|
|
+ return df
|
|
|
|
|
+
|
|
|
|
|
+def build_co_occurence_matrix(order_data):
|
|
|
|
|
+ """
|
|
|
|
|
+ 构建商户共现矩阵
|
|
|
|
|
+ """
|
|
|
|
|
+ # 获取所有商户的唯一列表
|
|
|
|
|
+ shops = order_data["BB_RETAIL_CUSTOMER_CODE"].unique()
|
|
|
|
|
+ num_shops = len(shops)
|
|
|
|
|
+
|
|
|
|
|
+ # 创建商户到索引的映射
|
|
|
|
|
+ shops_to_index = {shop: idx for idx, shop in enumerate(shops)}
|
|
|
|
|
+ # 初始化共现矩阵(上三角部分)
|
|
|
|
|
+ co_occurrence_matrix = np.zeros((num_shops, num_shops), dtype=int)
|
|
|
|
|
+
|
|
|
|
|
+ # 按照品规分组
|
|
|
|
|
+ grouped = order_data.groupby("PRODUCT_CODE")["BB_RETAIL_CUSTOMER_CODE"].apply(list)
|
|
|
|
|
+
|
|
|
|
|
+ # 遍历每个品规的商户列表
|
|
|
|
|
+ for shop_in_product in grouped:
|
|
|
|
|
+ # 生成商户对
|
|
|
|
|
+ shop_pairs = combinations(shop_in_product, 2)
|
|
|
|
|
+ for shop1, shop2 in shop_pairs:
|
|
|
|
|
+ # 获取商户索引
|
|
|
|
|
+ idx1 = shops_to_index[shop1]
|
|
|
|
|
+ idx2 = shops_to_index[shop2]
|
|
|
|
|
+ # 更新共现矩阵
|
|
|
|
|
+ co_occurrence_matrix[idx1, idx2] += 1
|
|
|
|
|
+ co_occurrence_matrix[idx2, idx1] += 1
|
|
|
|
|
+ return co_occurrence_matrix, shops
|
|
|
|
|
+
|
|
|
|
|
+def save_co_occurrence_matrix(matrix, shops, save_path):
|
|
|
|
|
+ """
|
|
|
|
|
+ 保存共现矩阵
|
|
|
|
|
+ """
|
|
|
|
|
+ matrix_df = pd.DataFrame(matrix, index=shops, columns=shops)
|
|
|
|
|
+ matrix_df.to_csv(save_path, index=True, encoding="utf-8")
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ save_path = "./models/recall/itemCF/matrix/occurrence.csv"
|
|
|
|
|
+ order_data = load_data_from_dataset()
|
|
|
|
|
+
|
|
|
|
|
+ co_occurrence_matrix, shops = build_co_occurence_matrix(order_data)
|
|
|
|
|
+
|
|
|
|
|
+ save_co_occurrence_matrix(co_occurrence_matrix, shops, save_path)
|
|
|
|
|
+
|