itemCF.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @filename : itemCF.py
  5. @description : 基于物品的协同过滤算法
  6. @time : 2025/01/21/00
  7. @author : Sherlock1011 & Min1027
  8. @Version : 1.0
  9. '''
  10. import joblib
  11. import pandas as pd
  12. import numpy as np
  13. from sqlalchemy import create_engine, text
  14. from dao.mysql_client import Mysql
  15. from decimal import Decimal
  16. # 算法封装成一个类
  17. class ItemCFModel:
  18. """TODO 1. 将结果保存到redis数据库中"""
  19. def __init__(self):
  20. self.weights = {
  21. "MONTH6_SALE_QTY": Decimal(0.1),
  22. "MONTH6_SALE_AMT": Decimal(0.1),
  23. "MONTH6_GROSS_PROFIT_RATE": Decimal(0.03),
  24. "MONTH6_SALE_QTY_YOY": Decimal(0.1),
  25. "MONTH6_SALE_QTY_MOM": Decimal(0.1),
  26. "MONTH6_SALE_AMT_YOY": Decimal(0.1),
  27. "MONTH6_SALE_AMT_MOM": Decimal(0.1),
  28. "ORDER_FULLORDR_RATE": Decimal(0.1),
  29. "NEW_PRODUCT_ORDER_QTY_OCC": Decimal(0.03),
  30. "LISTING_RATE": Decimal(0.1),
  31. "OUT_STOCK_DAYS": Decimal(0.02),
  32. "RETAIL_PRICE_INDEX": Decimal(0.02)
  33. }
  34. # 均值方差归一化函数
  35. def standardize_column(self, column):
  36. if(column.max() == column.min() and column.max() == 0):
  37. return 0
  38. elif (column.max() == column.min() and column.max() != 0):
  39. return 1
  40. else:
  41. return (column - column.min()) / (column.max() - column.min())
  42. # 按照品规分组归一化和计算热度值
  43. def calculate_heart_per_product(self, group):
  44. for column in self.weights.keys():
  45. if column == "OUT_STOCK_DAYS":
  46. group[column] = 1 - self.standardize_column(group[column])
  47. else:
  48. group[column] = self.standardize_column(group[column])
  49. group["FC_SCORE"] = group.apply(
  50. lambda row: sum(Decimal(row[col]) * weight for col, weight in self.weights.items()) * 100, axis=1
  51. )
  52. return group
  53. # 主算法函数:计算推荐结果
  54. def recommend(self, order_data):
  55. # 去除重复值和填补缺失值
  56. order_data.drop_duplicates(inplace=True)
  57. order_data.fillna(0, inplace=True)
  58. # 应用分组计算
  59. df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
  60. df_result = df_result.sort_values(by=["PRODUCT_CODE", "FC_SCORE"], ascending=[True, False])
  61. recomend_list = []
  62. for product_code, group in df_result.groupby("PRODUCT_CODE"):
  63. group_values = group[["BB_RETAIL_CUSTOMER_CODE", "FC_SCORE"]].apply(
  64. lambda row: {row["BB_RETAIL_CUSTOMER_CODE"]: row["FC_SCORE"]}, axis=1
  65. ).tolist()
  66. recomend_list.append({"keys": product_code, "value": group_values})
  67. return recomend_list
  68. def load_data_from_dataset():
  69. client = Mysql()
  70. # 创建会话
  71. session = client.create_session()
  72. # 使用 session 执行查询等操作
  73. try:
  74. results = session.execute(
  75. text("select * from tads_brandcul_cust_order")
  76. ).all()
  77. # 将结果转换为DataFrame
  78. df = pd.DataFrame(results).drop(columns=['stat_month']) # 提取列名
  79. finally:
  80. session.close()
  81. return df
  82. if __name__ == "__main__":
  83. # 创建一个 ItemCF 类的实例
  84. item_cf_algorithm = ItemCFModel()
  85. # 读取数据
  86. # order_data = pd.read_csv('order.csv')
  87. order_data = load_data_from_dataset()
  88. # 调用算法
  89. recomand_list = item_cf_algorithm.recommend(order_data)
  90. print(recomand_list)
  91. # # 序列化
  92. # joblib.dump(item_cf_algorithm, "item_cf.model")