Browse Source

ShopScore更名为user_item_score并增加共现矩阵计算代码

Sherlock1011 1 year ago
parent
commit
821a7d8079

BIN
models/recall/itemCF/__pycache__/ShopScore.cpython-310.pyc


BIN
models/recall/itemCF/__pycache__/itemCF.cpython-310.pyc


+ 63 - 0
models/recall/itemCF/calculate_co_occurrence_matrix.py

@@ -0,0 +1,63 @@
+import pandas as pd
+import numpy as np
+
+from itertools import combinations
+from dao.mysql_client import Mysql
+
+def load_data_from_dataset():
+    """从数据库中读取数据"""
+    client = Mysql()
+    tablename = "mock_order"
+    query_text = "*"
+    
+    df = client.load_data(tablename, query_text)
+    
+     # 去除重复值和填补缺失值
+    df.drop_duplicates(inplace=True)
+    df.fillna(0, inplace=True)
+    return df
+
+def build_co_occurence_matrix(order_data):
+    """
+    构建商户共现矩阵
+    """
+    # 获取所有商户的唯一列表
+    shops = order_data["BB_RETAIL_CUSTOMER_CODE"].unique()
+    num_shops = len(shops)
+    
+    # 创建商户到索引的映射
+    shops_to_index = {shop: idx for idx, shop in enumerate(shops)}
+    # 初始化共现矩阵(上三角部分)
+    co_occurrence_matrix = np.zeros((num_shops, num_shops), dtype=int)
+    
+    # 按照品规分组
+    grouped = order_data.groupby("PRODUCT_CODE")["BB_RETAIL_CUSTOMER_CODE"].apply(list)
+    
+    # 遍历每个品规的商户列表
+    for shop_in_product in grouped:
+        # 生成商户对
+        shop_pairs = combinations(shop_in_product, 2)
+        for shop1, shop2 in shop_pairs:
+            # 获取商户索引
+            idx1 = shops_to_index[shop1]
+            idx2 = shops_to_index[shop2]
+            # 更新共现矩阵
+            co_occurrence_matrix[idx1, idx2] += 1
+            co_occurrence_matrix[idx2, idx1] += 1
+    return co_occurrence_matrix, shops
+
+def save_co_occurrence_matrix(matrix, shops, save_path):
+    """
+    保存共现矩阵
+    """
+    matrix_df = pd.DataFrame(matrix, index=shops, columns=shops)
+    matrix_df.to_csv(save_path, index=True, encoding="utf-8")
+    
+if __name__ == "__main__":
+    save_path = "./models/recall/itemCF/matrix/occurrence.csv"
+    order_data = load_data_from_dataset()
+    
+    co_occurrence_matrix, shops = build_co_occurence_matrix(order_data)
+    
+    save_co_occurrence_matrix(co_occurrence_matrix, shops, save_path)
+    

+ 3 - 5
models/recall/itemCF/ShopScore.py → models/recall/itemCF/user_item_score.py

@@ -8,14 +8,12 @@
 @Version     : 1.0
 '''
 import joblib
-import pandas as pd
-import numpy as np
-from sqlalchemy import create_engine, text
+
 from dao.mysql_client import Mysql
 from decimal import Decimal
 
 # 算法封装成一个类
-class ShopScore:
+class UserItemScore:
     """TODO 1. 将结果保存到redis数据库中"""
     def __init__(self):
         self.weights = {
@@ -81,7 +79,7 @@ def load_data_from_dataset():
  
 if __name__ == "__main__":
     # 创建一个 ItemCF 类的实例
-    item_cf_algorithm = ShopScore()
+    item_cf_algorithm = UserItemScore()
     
     # 读取数据
     order_data = load_data_from_dataset()