#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @filename : hot_recall.py @description : 热度召回算法 @time : 2025/01/21/00 @author : Sherlock1011 & Min1027 @Version : 1.0 ''' import pandas as pd import redis import random import joblib random.seed(12345) class HotRecallModel: """TODO 1. 将加载数据修改为数据库加载 2. 将结果保存到redis数据库中""" def __init__(self): pass def load_dataset(self, data_path): self._order_data = pd.read_excel(data_path) def _calculate_hot_score(self, hot_name): """ 根据热度指标计算热度得分 :param hot_name: 热度指标 :type param: string :return: 所有热度指标的得分 :rtype: list """ results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index() sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True) item_hot_score = [] # mock热度召回最大分数 max_score = random.randint(85,100) * 0.01 total_score = sorted_results.loc[0, hot_name] / max_score for row in sorted_results.itertuples(index=True, name="Row"): item = {row[1]:(row[2]/total_score)*100} item_hot_score.append(item) return {"key":f"hot:{hot_name}", "value":item_hot_score} def calculate_all_hot_score(self): """ 计算所有的热度指标得分 """ hot_datas = [] for col in list(self._order_data[2:]): hot_datas.appends(self._calculate_hot_score(col)) return hot_datas if __name__ == "__main__": # 序列化 model = HotRecallModel() joblib.dump(model, "hot_recall.model")