|
|
@@ -9,37 +9,46 @@
|
|
|
'''
|
|
|
import pandas as pd
|
|
|
from dao.redis_db import Redis
|
|
|
+from dao.mysql_client import Mysql
|
|
|
import random
|
|
|
+from tqdm import tqdm
|
|
|
import joblib
|
|
|
+
|
|
|
random.seed(12345)
|
|
|
class HotRecallModel:
|
|
|
"""TODO 1. 将加载数据修改为数据库加载
|
|
|
2. 将结果保存到redis数据库中"""
|
|
|
def __init__(self):
|
|
|
- self.redis_db = Redis()
|
|
|
- hotkeys = self.get_hotkeys()
|
|
|
- print(hotkeys)
|
|
|
+ self._redis_db = Redis()
|
|
|
+ self._hotkeys = self.get_hotkeys()
|
|
|
+ self._order_data = self._load_data_from_dataset()
|
|
|
|
|
|
|
|
|
def get_hotkeys(self):
|
|
|
- info = self.redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
|
|
|
- hotkey = tuple()
|
|
|
- results = []
|
|
|
- for item, score in info:
|
|
|
- hotkey += (item, score)
|
|
|
- results.append(hotkey)
|
|
|
- print(f"元素: {item}, 分数: {score}")
|
|
|
-
|
|
|
- return results
|
|
|
+ info = self._redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
|
|
|
+ hotkeys = []
|
|
|
+ for item, _ in info:
|
|
|
+ hotkeys.append(item)
|
|
|
+ return hotkeys
|
|
|
|
|
|
|
|
|
- def load_dataset(self, data_path):
|
|
|
- self._order_data = pd.read_excel(data_path)
|
|
|
+ def _load_data_from_dataset(self):
|
|
|
+ """从数据库中读取数据"""
|
|
|
+ client = Mysql()
|
|
|
+ tablename = "mock_order"
|
|
|
+ query_text = "*"
|
|
|
+
|
|
|
+ df = client.load_data(tablename, query_text)
|
|
|
+
|
|
|
+ # 去除重复值和填补缺失值
|
|
|
+ df.drop_duplicates(inplace=True)
|
|
|
+ df.fillna(0, inplace=True)
|
|
|
+ return df
|
|
|
|
|
|
def _calculate_hot_score(self, hot_name):
|
|
|
"""
|
|
|
根据热度指标计算热度得分
|
|
|
- :param hot_name: 热度指标
|
|
|
+ :param hot_name: 热度指标A
|
|
|
:type param: string
|
|
|
:return: 所有热度指标的得分
|
|
|
:rtype: list
|
|
|
@@ -48,37 +57,37 @@ class HotRecallModel:
|
|
|
sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
|
|
|
item_hot_score = []
|
|
|
# mock热度召回最大分数
|
|
|
- max_score = random.randint(85,100) * 0.01
|
|
|
+ max_score = 1.0
|
|
|
total_score = sorted_results.loc[0, hot_name] / max_score
|
|
|
for row in sorted_results.itertuples(index=True, name="Row"):
|
|
|
item = {row[1]:(row[2]/total_score)*100}
|
|
|
item_hot_score.append(item)
|
|
|
- return {"key":f"hot:{hot_name}", "value":item_hot_score}
|
|
|
+ return {"key":f"{hot_name}", "value":item_hot_score}
|
|
|
|
|
|
def calculate_all_hot_score(self):
|
|
|
"""
|
|
|
计算所有的热度指标得分
|
|
|
"""
|
|
|
- hot_datas = []
|
|
|
- for col in list(self._order_data[2:]):
|
|
|
- hot_datas.appends(self._calculate_hot_score(col))
|
|
|
-
|
|
|
- return hot_datas
|
|
|
+ # hot_datas = []
|
|
|
+ for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
|
|
|
+ self.to_redis(self._calculate_hot_score(hotkey_name))
|
|
|
+
|
|
|
+ def to_redis(self, rec_content_score):
|
|
|
+ hotkey_name = rec_content_score["key"]
|
|
|
+ rec_item_id = "hot:" + str(hotkey_name) # 修正 rec_item_id 拼接方式
|
|
|
+ res = {}
|
|
|
|
|
|
- def to_redis(self, city, hotkey_name, rec_content_score):
|
|
|
- # rec_content_score的格式为:(零售户id,分数)
|
|
|
- rec_item_id = "hot:" + city + ":" + str(hotkey_name)
|
|
|
- res = dict()
|
|
|
- for content, score in rec_content_score.items():
|
|
|
- res[content] = score
|
|
|
+ # rec_content_score["value"] 是一个包含字典的列表
|
|
|
+ for item in rec_content_score["value"]:
|
|
|
+ for content, score in item.items(): # item 形如 {A001: 75.0}
|
|
|
+ res[content] = float(score) # 确保 score 是 float 类型
|
|
|
|
|
|
- if len(res) > 0:
|
|
|
- data = dict({rec_item_id: res})
|
|
|
- for item, value in data.items():
|
|
|
- self.redis_db.redis.zadd(item, value)
|
|
|
+ if res: # 只有当 res 不为空时才执行 zadd
|
|
|
+ self._redis_db.redis.zadd(rec_item_id, res)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
# 序列化
|
|
|
model = HotRecallModel()
|
|
|
+ model.calculate_all_hot_score()
|
|
|
# joblib.dump(model, "hot_recall.model")
|