itemCF.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @filename : itemCF.py
  5. @description : 基于物品的协同过滤算法
  6. @time : 2025/01/21/00
  7. @author : Sherlock1011 & Min1027
  8. @Version : 1.0
  9. '''
  10. import joblib
  11. import pandas as pd
  12. import numpy as np
  13. # 算法封装成一个类
  14. class ItemCFModel:
  15. """TODO 1. 将结果保存到redis数据库中"""
  16. def __init__(self):
  17. self.weights = {
  18. "MONTH6_SALE_QTY": 0.1,
  19. "MONTH6_SALE_AMT": 0.1,
  20. "MONTH6_GROSS_PROFIT_RATE": 0.03,
  21. "MONTH6_SALE_QTY_YOY": 0.1,
  22. "MONTH6_SALE_QTY_MOM": 0.1,
  23. "MONTH6_SALE_AMT_YOY": 0.1,
  24. "MONTH6_SALE_AMT_MOM": 0.1,
  25. "ORDER_FULLORDR_RATE": 0.1,
  26. "NEW_PRODUCT_ORDER_QTY_OCC": 0.03,
  27. "LISTING_RATE": 0.1,
  28. "OUT_STOCK_DAYS": 0.02,
  29. "RETAIL_PRICE_INDEX": 0.02
  30. }
  31. # 均值方差归一化函数
  32. def standardize_column(self, column):
  33. return (column - column.min()) / (column.max() - column.min())
  34. # 按照品规分组归一化和计算热度值
  35. def calculate_heart_per_product(self, group):
  36. for column in self.weights.keys():
  37. if column == "OUT_STOCK_DAYS":
  38. group[column] = 1 - self.standardize_column(group[column])
  39. else:
  40. group[column] = self.standardize_column(group[column])
  41. group["FC_SCORE"] = group.apply(
  42. lambda row: sum(row[col] * weight for col, weight in self.weights.items()) * 100, axis=1
  43. )
  44. return group
  45. # 主算法函数:计算推荐结果
  46. def recommend(self, order_data):
  47. # 去除重复值和填补缺失值
  48. order_data.drop_duplicates(inplace=True)
  49. order_data.fillna(0, inplace=True)
  50. # 应用分组计算
  51. df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
  52. df_result = df_result.sort_values(by=["PRODUCT_CODE", "FC_SCORE"], ascending=[True, False])
  53. recomend_list = []
  54. for product_code, group in df_result.groupby("PRODUCT_CODE"):
  55. group_values = group[["BB_RETAIL_CUSTOMER_CODE", "FC_SCORE"]].apply(
  56. lambda row: {row["BB_RETAIL_CUSTOMER_CODE"]: row["FC_SCORE"]}, axis=1
  57. ).tolist()
  58. recomend_list.append({"keys": product_code, "value": group_values})
  59. return recomend_list
  60. if __name__ == "__main__":
  61. # 创建一个 ItemCF 类的实例
  62. item_cf_algorithm = ItemCFModel()
  63. # 读取数据
  64. order_data = pd.read_csv('order.csv')
  65. # 调用算法
  66. recomand_list = item_cf_algorithm.recommend(order_data)
  67. # 序列化
  68. joblib.dump(item_cf_algorithm, "item_cf.model")