hot_recall.py 2.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @filename : hot_recall.py
  5. @description : 热度召回算法
  6. @time : 2025/01/21/00
  7. @author : Sherlock1011 & Min1027
  8. @Version : 1.0
  9. '''
  10. import pandas as pd
  11. from database import RedisDatabaseHelper
  12. from tqdm import tqdm
  13. class HotRecallModel:
  14. def __init__(self, order_data):
  15. self._redis_db = RedisDatabaseHelper()
  16. self._hotkeys = self.get_hotkeys()
  17. self._order_data = order_data
  18. def get_hotkeys(self):
  19. info = self._redis_db.redis.zrange("configs:hotkeys", 0, -1, withscores=True)
  20. hotkeys = []
  21. for item, _ in info:
  22. hotkeys.append(item)
  23. return hotkeys
  24. def _calculate_hot_score(self, hot_name):
  25. """
  26. 根据热度指标计算热度得分
  27. :param hot_name: 热度指标A
  28. :type param: string
  29. :return: 所有热度指标的得分
  30. :rtype: list
  31. """
  32. results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()
  33. sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
  34. item_hot_score = []
  35. # mock热度召回最大分数
  36. max_score = 1.0
  37. total_score = sorted_results.loc[0, hot_name] / max_score
  38. for row in sorted_results.itertuples(index=True, name="Row"):
  39. item = {row[1]:(row[2]/total_score)*100}
  40. item_hot_score.append(item)
  41. return {"key":f"{hot_name}", "value":item_hot_score}
  42. def calculate_all_hot_score(self, city_uuid):
  43. """
  44. 计算所有的热度指标得分
  45. """
  46. # hot_datas = []
  47. for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
  48. self.to_redis(self._calculate_hot_score(hotkey_name), city_uuid)
  49. def to_redis(self, rec_content_score, city_uuid):
  50. hotkey_name = rec_content_score["key"]
  51. rec_item_id = f"hot:{city_uuid}:{str(hotkey_name)}" # 修正 rec_item_id 拼接方式
  52. print("自动清除历史id前数量", self._redis_db.redis.zcard(rec_item_id))
  53. # 清空 sorted set 数据,确保不会影响后续的存储
  54. self._redis_db.redis.delete(rec_item_id)
  55. print("自动清除历史id后数量", self._redis_db.redis.zcard(rec_item_id))
  56. res = {}
  57. for item in rec_content_score["value"]:
  58. for content, score in item.items(): # item 形如 {A001: 75.0}
  59. res[content] = float(score) # 确保 score 是 float 类型
  60. if res: # 只有当 res 不为空时才执行 zadd
  61. self._redis_db.redis.zadd(rec_item_id, res)
  62. if __name__ == "__main__":
  63. # 序列化
  64. model = HotRecallModel()
  65. model.calculate_all_hot_score()
  66. # joblib.dump(model, "hot_recall.model")