Bladeren bron

热度召回和ItemFc算法实现

Sherlock 1 jaar geleden
bovenliggende
commit
dd062a4160
2 gewijzigde bestanden met toevoegingen van 134 en 4 verwijderingen
  1. 55 2
      models/recall/hot_recall.py
  2. 79 2
      models/recall/itemCF.py

+ 55 - 2
models/recall/hot_recall.py

@@ -1,2 +1,55 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@filename     : hot_recall.py
+@description     : 热度召回算法   
+@time     : 2025/01/21/00
+@author     : Sherlock1011 & Min1027
+@Version     : 1.0
+'''
+import pandas as pd
+import redis
+import random
+import joblib
+random.seed(12345)
+class HotRecallModel:
+    """TODO 1. 将加载数据修改为数据库加载
+            2. 将结果保存到redis数据库中"""
+    def __init__(self):
+        pass
+    
+    def load_dataset(self, data_path):
+        self._order_data = pd.read_excel(data_path)
+        
+    def _calculate_hot_score(self, hot_name):
+        """
+        根据热度指标计算热度得分
+        :param hot_name: 热度指标
+        :type param: string
+        :return: 所有热度指标的得分
+        :rtype: list
+        """
+        results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()
+        sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
+        item_hot_score = []
+        # mock热度召回最大分数
+        max_score = random.randint(85,100) * 0.01
+        total_score = sorted_results.loc[0, hot_name] / max_score
+        for row in sorted_results.itertuples(index=True, name="Row"):
+            item = {row[1]:(row[2]/total_score)*100}
+            item_hot_score.append(item)
+        return {"key":f"hot:{hot_name}", "value":item_hot_score}
+
+    def calculate_all_hot_score(self):
+        """
+        计算所有的热度指标得分
+        """
+        hot_datas = []
+        for col in list(self._order_data[2:]):
+            hot_datas.appends(self._calculate_hot_score(col))
+            
+        return hot_datas
+if __name__ == "__main__":
+    # 序列化
+    model = HotRecallModel()
+    joblib.dump(model, "hot_recall.model")

+ 79 - 2
models/recall/itemCF.py

@@ -1,2 +1,79 @@
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@filename     : itemCF.py
+@description     : 基于物品的协同过滤算法
+@time     : 2025/01/21/00
+@author     : Sherlock1011 & Min1027
+@Version     : 1.0
+'''
+import joblib
+import pandas as pd
+import numpy as np
+
+# 算法封装成一个类
+class ItemCFModel:
+    """TODO 1. 将结果保存到redis数据库中"""
+    def __init__(self):
+        self.weights = {
+            "MONTH6_SALE_QTY": 0.1,
+            "MONTH6_SALE_AMT": 0.1,
+            "MONTH6_GROSS_PROFIT_RATE": 0.03,
+            "MONTH6_SALE_QTY_YOY": 0.1,
+            "MONTH6_SALE_QTY_MOM": 0.1,
+            "MONTH6_SALE_AMT_YOY": 0.1,
+            "MONTH6_SALE_AMT_MOM": 0.1,
+            "ORDER_FULLORDR_RATE": 0.1,
+            "NEW_PRODUCT_ORDER_QTY_OCC": 0.03,
+            "LISTING_RATE": 0.1,
+            "OUT_STOCK_DAYS": 0.02,
+            "RETAIL_PRICE_INDEX": 0.02
+        }
+
+    # 均值方差归一化函数
+    def standardize_column(self, column):
+        return (column - column.min()) / (column.max() - column.min())
+
+    # 按照品规分组归一化和计算热度值
+    def calculate_heart_per_product(self, group):
+        for column in self.weights.keys():
+            if column == "OUT_STOCK_DAYS":
+                group[column] = 1 - self.standardize_column(group[column])
+            else:
+                group[column] = self.standardize_column(group[column])
+        group["FC_SCORE"] = group.apply(
+            lambda row: sum(row[col] * weight for col, weight in self.weights.items()) * 100, axis=1
+        )
+        return group
+
+    # 主算法函数:计算推荐结果
+    def recommend(self, order_data):
+        # 去除重复值和填补缺失值
+        order_data.drop_duplicates(inplace=True)
+        order_data.fillna(0, inplace=True)
+
+        # 应用分组计算
+        df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
+        df_result = df_result.sort_values(by=["PRODUCT_CODE", "FC_SCORE"], ascending=[True, False])
+
+        recomend_list = []
+        for product_code, group in df_result.groupby("PRODUCT_CODE"):
+            group_values = group[["BB_RETAIL_CUSTOMER_CODE", "FC_SCORE"]].apply(
+                lambda row: {row["BB_RETAIL_CUSTOMER_CODE"]: row["FC_SCORE"]}, axis=1
+            ).tolist()
+            recomend_list.append({"keys": product_code, "value": group_values})
+        
+        return recomend_list
+    
+if __name__ == "__main__":
+    # 创建一个 ItemCF 类的实例
+    item_cf_algorithm = ItemCFModel()
+
+    # 读取数据
+    order_data = pd.read_csv('order.csv')
+
+    # 调用算法
+    recomand_list = item_cf_algorithm.recommend(order_data)
+
+    # 序列化
+    joblib.dump(item_cf_algorithm, "item_cf.model")