|
@@ -1,2 +1,79 @@
|
|
|
-#!/usr/bin/env python3
|
|
|
|
|
-# -*- coding:utf-8 -*-
|
|
|
|
|
|
|
+#!/usr/bin/env python
|
|
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
|
|
+'''
|
|
|
|
|
+@filename : itemCF.py
|
|
|
|
|
+@description : 基于物品的协同过滤算法
|
|
|
|
|
+@time : 2025/01/21/00
|
|
|
|
|
+@author : Sherlock1011 & Min1027
|
|
|
|
|
+@Version : 1.0
|
|
|
|
|
+'''
|
|
|
|
|
+import joblib
|
|
|
|
|
+import pandas as pd
|
|
|
|
|
+import numpy as np
|
|
|
|
|
+
|
|
|
|
|
+# 算法封装成一个类
|
|
|
|
|
+class ItemCFModel:
|
|
|
|
|
+ """TODO 1. 将结果保存到redis数据库中"""
|
|
|
|
|
+ def __init__(self):
|
|
|
|
|
+ self.weights = {
|
|
|
|
|
+ "MONTH6_SALE_QTY": 0.1,
|
|
|
|
|
+ "MONTH6_SALE_AMT": 0.1,
|
|
|
|
|
+ "MONTH6_GROSS_PROFIT_RATE": 0.03,
|
|
|
|
|
+ "MONTH6_SALE_QTY_YOY": 0.1,
|
|
|
|
|
+ "MONTH6_SALE_QTY_MOM": 0.1,
|
|
|
|
|
+ "MONTH6_SALE_AMT_YOY": 0.1,
|
|
|
|
|
+ "MONTH6_SALE_AMT_MOM": 0.1,
|
|
|
|
|
+ "ORDER_FULLORDR_RATE": 0.1,
|
|
|
|
|
+ "NEW_PRODUCT_ORDER_QTY_OCC": 0.03,
|
|
|
|
|
+ "LISTING_RATE": 0.1,
|
|
|
|
|
+ "OUT_STOCK_DAYS": 0.02,
|
|
|
|
|
+ "RETAIL_PRICE_INDEX": 0.02
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 均值方差归一化函数
|
|
|
|
|
+ def standardize_column(self, column):
|
|
|
|
|
+ return (column - column.min()) / (column.max() - column.min())
|
|
|
|
|
+
|
|
|
|
|
+ # 按照品规分组归一化和计算热度值
|
|
|
|
|
+ def calculate_heart_per_product(self, group):
|
|
|
|
|
+ for column in self.weights.keys():
|
|
|
|
|
+ if column == "OUT_STOCK_DAYS":
|
|
|
|
|
+ group[column] = 1 - self.standardize_column(group[column])
|
|
|
|
|
+ else:
|
|
|
|
|
+ group[column] = self.standardize_column(group[column])
|
|
|
|
|
+ group["FC_SCORE"] = group.apply(
|
|
|
|
|
+ lambda row: sum(row[col] * weight for col, weight in self.weights.items()) * 100, axis=1
|
|
|
|
|
+ )
|
|
|
|
|
+ return group
|
|
|
|
|
+
|
|
|
|
|
+ # 主算法函数:计算推荐结果
|
|
|
|
|
+ def recommend(self, order_data):
|
|
|
|
|
+ # 去除重复值和填补缺失值
|
|
|
|
|
+ order_data.drop_duplicates(inplace=True)
|
|
|
|
|
+ order_data.fillna(0, inplace=True)
|
|
|
|
|
+
|
|
|
|
|
+ # 应用分组计算
|
|
|
|
|
+ df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
|
|
|
|
|
+ df_result = df_result.sort_values(by=["PRODUCT_CODE", "FC_SCORE"], ascending=[True, False])
|
|
|
|
|
+
|
|
|
|
|
+ recomend_list = []
|
|
|
|
|
+ for product_code, group in df_result.groupby("PRODUCT_CODE"):
|
|
|
|
|
+ group_values = group[["BB_RETAIL_CUSTOMER_CODE", "FC_SCORE"]].apply(
|
|
|
|
|
+ lambda row: {row["BB_RETAIL_CUSTOMER_CODE"]: row["FC_SCORE"]}, axis=1
|
|
|
|
|
+ ).tolist()
|
|
|
|
|
+ recomend_list.append({"keys": product_code, "value": group_values})
|
|
|
|
|
+
|
|
|
|
|
+ return recomend_list
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ # 创建一个 ItemCF 类的实例
|
|
|
|
|
+ item_cf_algorithm = ItemCFModel()
|
|
|
|
|
+
|
|
|
|
|
+ # 读取数据
|
|
|
|
|
+ order_data = pd.read_csv('order.csv')
|
|
|
|
|
+
|
|
|
|
|
+ # 调用算法
|
|
|
|
|
+ recomand_list = item_cf_algorithm.recommend(order_data)
|
|
|
|
|
+
|
|
|
|
|
+ # 序列化
|
|
|
|
|
+ joblib.dump(item_cf_algorithm, "item_cf.model")
|