hot_recall.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. '''
  4. @filename : hot_recall.py
  5. @description : 热度召回算法
  6. @time : 2025/01/21/00
  7. @author : Sherlock1011 & Min1027
  8. @Version : 1.0
  9. '''
  10. import pandas as pd
  11. from dao.redis_db import Redis
  12. import random
  13. import joblib
  14. random.seed(12345)
  15. class HotRecallModel:
  16. """TODO 1. 将加载数据修改为数据库加载
  17. 2. 将结果保存到redis数据库中"""
  18. def __init__(self):
  19. self.redis_db = Redis()
  20. hotkeys = self.get_hotkeys()
  21. print(hotkeys)
  22. def get_hotkeys(self):
  23. info = self.redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
  24. hotkey = tuple()
  25. results = []
  26. for item, score in info:
  27. hotkey += (item, score)
  28. results.append(hotkey)
  29. print(f"元素: {item}, 分数: {score}")
  30. return results
  31. def load_dataset(self, data_path):
  32. self._order_data = pd.read_excel(data_path)
  33. def _calculate_hot_score(self, hot_name):
  34. """
  35. 根据热度指标计算热度得分
  36. :param hot_name: 热度指标
  37. :type param: string
  38. :return: 所有热度指标的得分
  39. :rtype: list
  40. """
  41. results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()
  42. sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
  43. item_hot_score = []
  44. # mock热度召回最大分数
  45. max_score = random.randint(85,100) * 0.01
  46. total_score = sorted_results.loc[0, hot_name] / max_score
  47. for row in sorted_results.itertuples(index=True, name="Row"):
  48. item = {row[1]:(row[2]/total_score)*100}
  49. item_hot_score.append(item)
  50. return {"key":f"hot:{hot_name}", "value":item_hot_score}
  51. def calculate_all_hot_score(self):
  52. """
  53. 计算所有的热度指标得分
  54. """
  55. hot_datas = []
  56. for col in list(self._order_data[2:]):
  57. hot_datas.appends(self._calculate_hot_score(col))
  58. return hot_datas
  59. def to_redis(self, city, hotkey_name, rec_content_score):
  60. # rec_content_score的格式为:(零售户id,分数)
  61. rec_item_id = "hot:" + city + ":" + str(hotkey_name)
  62. res = dict()
  63. for content, score in rec_content_score.items():
  64. res[content] = score
  65. if len(res) > 0:
  66. data = dict({rec_item_id: res})
  67. for item, value in data.items():
  68. self.redis_db.redis.zadd(item, value)
  69. if __name__ == "__main__":
  70. # 序列化
  71. model = HotRecallModel()
  72. # joblib.dump(model, "hot_recall.model")