hot_recall.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @filename : hot_recall.py
  5. @description : 热度召回算法
  6. @time : 2025/01/21/00
  7. @author : Sherlock1011 & Min1027
  8. @Version : 1.0
  9. '''
  10. import pandas as pd
  11. from dao.redis_db import Redis
  12. from dao.mysql_client import Mysql
  13. import random
  14. from tqdm import tqdm
  15. import joblib
  16. random.seed(12345)
  17. class HotRecallModel:
  18. """TODO 1. 将加载数据修改为数据库加载
  19. 2. 将结果保存到redis数据库中"""
  20. def __init__(self):
  21. self._redis_db = Redis()
  22. self._hotkeys = self.get_hotkeys()
  23. self._order_data = self._load_data_from_dataset()
  24. def get_hotkeys(self):
  25. info = self._redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
  26. hotkeys = []
  27. for item, _ in info:
  28. hotkeys.append(item)
  29. return hotkeys
  30. def _load_data_from_dataset(self):
  31. """从数据库中读取数据"""
  32. client = Mysql()
  33. tablename = "mock_order"
  34. query_text = "*"
  35. df = client.load_data(tablename, query_text)
  36. # 去除重复值和填补缺失值
  37. df.drop_duplicates(inplace=True)
  38. df.fillna(0, inplace=True)
  39. return df
  40. def _calculate_hot_score(self, hot_name):
  41. """
  42. 根据热度指标计算热度得分
  43. :param hot_name: 热度指标A
  44. :type param: string
  45. :return: 所有热度指标的得分
  46. :rtype: list
  47. """
  48. results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()
  49. sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
  50. item_hot_score = []
  51. # mock热度召回最大分数
  52. max_score = 1.0
  53. total_score = sorted_results.loc[0, hot_name] / max_score
  54. for row in sorted_results.itertuples(index=True, name="Row"):
  55. item = {row[1]:(row[2]/total_score)*100}
  56. item_hot_score.append(item)
  57. return {"key":f"{hot_name}", "value":item_hot_score}
  58. def calculate_all_hot_score(self):
  59. """
  60. 计算所有的热度指标得分
  61. """
  62. # hot_datas = []
  63. for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
  64. self.to_redis(self._calculate_hot_score(hotkey_name))
  65. def to_redis(self, rec_content_score):
  66. hotkey_name = rec_content_score["key"]
  67. rec_item_id = "hot:" + str(hotkey_name) # 修正 rec_item_id 拼接方式
  68. res = {}
  69. # rec_content_score["value"] 是一个包含字典的列表
  70. for item in rec_content_score["value"]:
  71. for content, score in item.items(): # item 形如 {A001: 75.0}
  72. res[content] = float(score) # 确保 score 是 float 类型
  73. if res: # 只有当 res 不为空时才执行 zadd
  74. self._redis_db.redis.zadd(rec_item_id, res)
  75. if __name__ == "__main__":
  76. # 序列化
  77. model = HotRecallModel()
  78. model.calculate_all_hot_score()
  79. # joblib.dump(model, "hot_recall.model")