#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @filename : hot_recall.py @description : 热度召回算法 @time : 2025/01/21/00 @author : Sherlock1011 & Min1027 @Version : 1.0 ''' import pandas as pd from dao.redis_db import Redis import random import joblib random.seed(12345) class HotRecallModel: """TODO 1. 将加载数据修改为数据库加载 2. 将结果保存到redis数据库中""" def __init__(self): self.redis_db = Redis() hotkeys = self.get_hotkeys() print(hotkeys) def get_hotkeys(self): info = self.redis_db.redis.zrange("hotkeys", 0, -1, withscores=True) hotkey = tuple() results = [] for item, score in info: hotkey += (item, score) results.append(hotkey) print(f"元素: {item}, 分数: {score}") return results def load_dataset(self, data_path): self._order_data = pd.read_excel(data_path) def _calculate_hot_score(self, hot_name): """ 根据热度指标计算热度得分 :param hot_name: 热度指标 :type param: string :return: 所有热度指标的得分 :rtype: list """ results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index() sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True) item_hot_score = [] # mock热度召回最大分数 max_score = random.randint(85,100) * 0.01 total_score = sorted_results.loc[0, hot_name] / max_score for row in sorted_results.itertuples(index=True, name="Row"): item = {row[1]:(row[2]/total_score)*100} item_hot_score.append(item) return {"key":f"hot:{hot_name}", "value":item_hot_score} def calculate_all_hot_score(self): """ 计算所有的热度指标得分 """ hot_datas = [] for col in list(self._order_data[2:]): hot_datas.appends(self._calculate_hot_score(col)) return hot_datas def to_redis(self, city, hotkey_name, rec_content_score): # rec_content_score的格式为:(零售户id,分数) rec_item_id = "hot:" + city + ":" + str(hotkey_name) res = dict() for content, score in rec_content_score.items(): res[content] = score if len(res) > 0: data = dict({rec_item_id: res}) for item, value in data.items(): self.redis_db.redis.zadd(item, value) if __name__ == "__main__": # 序列化 model = HotRecallModel() # joblib.dump(model, "hot_recall.model")