| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- from sklearn.preprocessing import StandardScaler
- from config import load_model_config
- from database import RedisDatabaseHelper, MySqlDao
- from tqdm import tqdm
- from models.rank.data.config import OrderConfig
- import numpy as np
- cfgs = load_model_config()
- class HotRecallModel:
- def __init__(self, city_uuid):
- self._city_uuid = city_uuid
- self._redis_db = RedisDatabaseHelper().redis
- self._dao = MySqlDao()
- self._load_data()
- self._hotkeys = cfgs["hot_recall"]["hot_keys"]
-
- def _load_data(self):
- """加载订单记录表"""
- print("hot_recall: 正在加载order_info...")
- self._order_data = self._dao.load_order_data(self._city_uuid)
- self._order_data =self._order_data[OrderConfig.FEATURE_COLUMNS]
-
- # 数据清洗
- self._order_data["sale_qty"] = self._order_data["sale_qty"].fillna(0)
- self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()
- self._order_data = self._order_data[self._order_data["sale_qty"] != 0]
-
-
- def _calculate_hot_score(self, hot_name):
- """
- 根据热度指标计算热度得分
- :param hot_name: 热度指标A
- :type param: string
- :return: 所有热度指标的得分
- :rtype: list
- """
- results = self._order_data.groupby("cust_code")[hot_name].sum().reset_index()
- sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
-
- scaler = StandardScaler()
- normalized = scaler.fit_transform(sorted_results["sale_qty"].values.reshape(-1, 1))
- sorted_results["sale_qty"] = ((1 / (1 + np.exp(-normalized))) * 100).flatten()
- item_hot_score = []
- for _, row in sorted_results.iterrows():
- item_hot_score.append({row["cust_code"]: row[hot_name]})
-
- return {"key":f"{hot_name}", "value":item_hot_score}
-
- def _to_redis(self, rec_content_score):
- hotkey_name = rec_content_score["key"]
- rec_item_id = f"hot:{self._city_uuid}:{str(hotkey_name)}" # 修正 rec_item_id 拼接方式
- # 清空 sorted set 数据,确保不会影响后续的存储
- self._redis_db.delete(rec_item_id)
-
- res = {}
- for item in rec_content_score["value"]:
- for content, score in item.items(): # item 形如 {A001: 75.0}
- res[content] = float(score) # 确保 score 是 float 类型
- if res: # 只有当 res 不为空时才执行 zadd
- self._redis_db.zadd(rec_item_id, res)
-
- def calculate_all_hot_score(self):
- """
- 计算所有的热度指标得分
- """
- # hot_datas = []
- for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
- self._to_redis(self._calculate_hot_score(hotkey_name))
-
-
-
- if __name__ == "__main__":
- hot_recall = HotRecallModel("00000000000000000000000011445301")
- hot_recall.calculate_all_hot_score()
|