|
|
@@ -1,9 +1,9 @@
|
|
|
#!/usr/bin/env python
|
|
|
# -*- encoding: utf-8 -*-
|
|
|
'''
|
|
|
-@filename : itemCF.py
|
|
|
-@description : 基于物品的协同过滤算法
|
|
|
-@time : 2025/01/21/00
|
|
|
+@filename : ShopScore.py
|
|
|
+@description : 品规-商户-评分矩阵:品规(用户)对商铺(物品)的评分矩阵,将结果保存在score.csv文件中
|
|
|
+@time : 2025/01/31/02
|
|
|
@author : Sherlock1011 & Min1027
|
|
|
@Version : 1.0
|
|
|
'''
|
|
|
@@ -42,66 +42,53 @@ class ItemCFModel:
|
|
|
else:
|
|
|
return (column - column.min()) / (column.max() - column.min())
|
|
|
|
|
|
- # 按照品规分组归一化和计算热度值
|
|
|
+ # 按照品规分组归一化并计算评分
|
|
|
def calculate_heart_per_product(self, group):
|
|
|
for column in self.weights.keys():
|
|
|
if column == "OUT_STOCK_DAYS":
|
|
|
group[column] = 1 - self.standardize_column(group[column])
|
|
|
else:
|
|
|
group[column] = self.standardize_column(group[column])
|
|
|
- group["FC_SCORE"] = group.apply(
|
|
|
+ group["SCORE"] = group.apply(
|
|
|
lambda row: sum(Decimal(row[col]) * weight for col, weight in self.weights.items()) * 100, axis=1
|
|
|
)
|
|
|
return group
|
|
|
|
|
|
- # 主算法函数:计算推荐结果
|
|
|
- def recommend(self, order_data):
|
|
|
- # 去除重复值和填补缺失值
|
|
|
- order_data.drop_duplicates(inplace=True)
|
|
|
- order_data.fillna(0, inplace=True)
|
|
|
+ # 主算法函数:计算品规-商铺评分矩阵
|
|
|
+ def score(self, order_data):
|
|
|
+
|
|
|
|
|
|
# 应用分组计算
|
|
|
df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
|
|
|
- df_result = df_result.sort_values(by=["PRODUCT_CODE", "FC_SCORE"], ascending=[True, False])
|
|
|
+ df_result = df_result.sort_values(by=["PRODUCT_CODE", "SCORE"], ascending=[True, False])
|
|
|
|
|
|
- recomend_list = []
|
|
|
- for product_code, group in df_result.groupby("PRODUCT_CODE"):
|
|
|
- group_values = group[["BB_RETAIL_CUSTOMER_CODE", "FC_SCORE"]].apply(
|
|
|
- lambda row: {row["BB_RETAIL_CUSTOMER_CODE"]: row["FC_SCORE"]}, axis=1
|
|
|
- ).tolist()
|
|
|
- recomend_list.append({"keys": product_code, "value": group_values})
|
|
|
-
|
|
|
- return recomend_list
|
|
|
+ # 选择要保存的列
|
|
|
+ # df_result[['PRODUCT_CODE', 'BB_RETAIL_CUSTOMER_CODE', 'SCORE']].to_csv("./models/recall/itemCF/matrix/score.csv", index=False, encoding="utf-8")
|
|
|
+ return df_result[['PRODUCT_CODE', 'BB_RETAIL_CUSTOMER_CODE', 'SCORE']]
|
|
|
|
|
|
def load_data_from_dataset():
|
|
|
+ """从数据库中读取数据"""
|
|
|
client = Mysql()
|
|
|
- # 创建会话
|
|
|
- session = client.create_session()
|
|
|
+ tablename = "mock_order"
|
|
|
+ query_text = "*"
|
|
|
|
|
|
- # 使用 session 执行查询等操作
|
|
|
- try:
|
|
|
- results = session.execute(
|
|
|
- text("select * from tads_brandcul_cust_order")
|
|
|
- ).all()
|
|
|
- # 将结果转换为DataFrame
|
|
|
- df = pd.DataFrame(results).drop(columns=['stat_month']) # 提取列名
|
|
|
-
|
|
|
- finally:
|
|
|
- session.close()
|
|
|
-
|
|
|
+ df = client.load_data(tablename, query_text)
|
|
|
+
|
|
|
+ # 去除重复值和填补缺失值
|
|
|
+ df.drop_duplicates(inplace=True)
|
|
|
+ df.fillna(0, inplace=True)
|
|
|
return df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
# 创建一个 ItemCF 类的实例
|
|
|
item_cf_algorithm = ItemCFModel()
|
|
|
-
|
|
|
+
|
|
|
# 读取数据
|
|
|
- # order_data = pd.read_csv('order.csv')
|
|
|
order_data = load_data_from_dataset()
|
|
|
|
|
|
# 调用算法
|
|
|
- recomand_list = item_cf_algorithm.recommend(order_data)
|
|
|
- print(recomand_list)
|
|
|
-
|
|
|
- # # 序列化
|
|
|
- # joblib.dump(item_cf_algorithm, "item_cf.model")
|
|
|
+ scores = item_cf_algorithm.score(order_data)
|
|
|
+
|
|
|
+ # 保存评分结果到csv文件
|
|
|
+ scores.to_csv("./models/recall/itemCF/matrix/score.csv", index=False, encoding="utf-8")
|
|
|
+
|