from sklearn.preprocessing import StandardScaler from config import load_model_config from database import RedisDatabaseHelper, MySqlDao from tqdm import tqdm from models.rank.data.config import OrderConfig import numpy as np cfgs = load_model_config() class HotRecallModel: def __init__(self, city_uuid): self._redis_db = RedisDatabaseHelper().redis self._dao = MySqlDao() self._load_data(city_uuid) self._hotkeys = cfgs["hot_recall"]["hot_keys"] def _load_data(self, city_uuid): """加载订单记录表""" print("hot_recall: 正在加载order_info...") self._order_data = self._dao.load_order_data(city_uuid) self._order_data =self._order_data[OrderConfig.FEATURE_COLUMNS] # 数据清洗 self._order_data["sale_qty"] = self._order_data["sale_qty"].fillna(0) self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum() self._order_data = self._order_data[self._order_data["sale_qty"] != 0] def _calculate_hot_score(self, hot_name): """ 根据热度指标计算热度得分 :param hot_name: 热度指标A :type param: string :return: 所有热度指标的得分 :rtype: list """ results = self._order_data.groupby("cust_code")[hot_name].mean().reset_index() sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True) scaler = StandardScaler() normalized = scaler.fit_transform(sorted_results["sale_qty"].values.reshape(-1, 1)) sorted_results["sale_qty"] = ((1 / (1 + np.exp(-normalized))) * 100).flatten() item_hot_score = [] for _, row in sorted_results.iterrows(): item_hot_score.append({row["cust_code"]: row[hot_name]}) return {"key":f"{hot_name}", "value":item_hot_score} def _to_redis(self, rec_content_score, city_uuid): hotkey_name = rec_content_score["key"] rec_item_id = f"hot:{city_uuid}:{str(hotkey_name)}" # 修正 rec_item_id 拼接方式 # 清空 sorted set 数据,确保不会影响后续的存储 self._redis_db.delete(rec_item_id) res = {} for item in rec_content_score["value"]: for content, score in item.items(): # item 形如 {A001: 75.0} res[content] = float(score) # 确保 score 是 float 类型 if res: # 只有当 res 不为空时才执行 zadd self._redis_db.zadd(rec_item_id, res) def calculate_all_hot_score(self, city_uuid): """ 计算所有的热度指标得分 """ # hot_datas = [] for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"): self._to_redis(self._calculate_hot_score(hotkey_name), city_uuid) if __name__ == "__main__": hot_recall = HotRecallModel("00000000000000000000000011445301") hot_recall.calculate_all_hot_score("00000000000000000000000011445301")