| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- '''
- @filename : hot_recall.py
- @description : 热度召回算法
- @time : 2025/01/21/00
- @author : Sherlock1011 & Min1027
- @Version : 1.0
- '''
- import pandas as pd
- from dao.redis_db import Redis
- import random
- import joblib
- random.seed(12345)
- class HotRecallModel:
- """TODO 1. 将加载数据修改为数据库加载
- 2. 将结果保存到redis数据库中"""
- def __init__(self):
- self.redis_db = Redis()
- hotkeys = self.get_hotkeys()
- print(hotkeys)
- def get_hotkeys(self):
- info = self.redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
- hotkey = tuple()
- results = []
- for item, score in info:
- hotkey += (item, score)
- results.append(hotkey)
- print(f"元素: {item}, 分数: {score}")
- return results
- def load_dataset(self, data_path):
- self._order_data = pd.read_excel(data_path)
-
- def _calculate_hot_score(self, hot_name):
- """
- 根据热度指标计算热度得分
- :param hot_name: 热度指标
- :type param: string
- :return: 所有热度指标的得分
- :rtype: list
- """
- results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()
- sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
- item_hot_score = []
- # mock热度召回最大分数
- max_score = random.randint(85,100) * 0.01
- total_score = sorted_results.loc[0, hot_name] / max_score
- for row in sorted_results.itertuples(index=True, name="Row"):
- item = {row[1]:(row[2]/total_score)*100}
- item_hot_score.append(item)
- return {"key":f"hot:{hot_name}", "value":item_hot_score}
- def calculate_all_hot_score(self):
- """
- 计算所有的热度指标得分
- """
- hot_datas = []
- for col in list(self._order_data[2:]):
- hot_datas.appends(self._calculate_hot_score(col))
-
- return hot_datas
- def to_redis(self, city, hotkey_name, rec_content_score):
- # rec_content_score的格式为:(零售户id,分数)
- rec_item_id = "hot:" + city + ":" + str(hotkey_name)
- res = dict()
- for content, score in rec_content_score.items():
- res[content] = score
- if len(res) > 0:
- data = dict({rec_item_id: res})
- for item, value in data.items():
- self.redis_db.redis.zadd(item, value)
- if __name__ == "__main__":
- # 序列化
- model = HotRecallModel()
- # joblib.dump(model, "hot_recall.model")
|