1 rok temu · 4c2486ab80
--- a/dao/redis_db.py
+++ b/dao/redis_db.py
@@ -24,15 +24,15 @@ if __name__ == '__main__':
 
				     data_list = ['ORDER_FULLORDR_RATE', 'MONTH6_SALE_QTY_YOY', 'MONTH6_SALE_QTY_MOM', 'MONTH6_SALE_QTY']
			
 
				 
			
 
				     # 清空已有的有序集合（可选，若需要全新的集合可执行此操作）
			
 
				-    # r.delete(zset_key)
			
 
				-    #
			
 
				-    # for item in data_list:
			
 
				-    #     # 生成 80 到 100 之间的随机数，小数点后保留 4 位
			
 
				-    #     score = round(random.uniform(80, 100), 4)
			
 
				-    #     # 将元素和对应的分数添加到有序集合中
			
 
				-    #     r.zadd(zset_key, {item: score})
			
 
				-
			
 
				-    # 从 Redis 中读取有序集合并打印
			
 
				-    result = r.zrange(zset_key, 0, -1, withscores=True)
			
 
				-    for item, score in result:
			
 
				-        print(f"元素: {item}, 分数: {score}")
			
 
				+    r.delete(zset_key)
			
 
				+    
			
 
				+    for item in data_list:
			
 
				+        # 生成 80 到 100 之间的随机数，小数点后保留 4 位
			
 
				+        score = round(random.uniform(80, 100), 4)
			
 
				+        # 将元素和对应的分数添加到有序集合中
			
 
				+        r.zadd(zset_key, {item: score})
			
 
				+
			
 
				+    # # 从 Redis 中读取有序集合并打印
			
 
				+    # result = r.zrange(zset_key, 0, -1, withscores=True)
			
 
				+    # for item, score in result:
			
 
				+    #     print(f"元素: {item}, 分数: {score}")
			
--- a/models/recall/hot_recall.py
+++ b/models/recall/hot_recall.py
@@ -9,37 +9,46 @@
 
				 '''
			
 
				 import pandas as pd
			
 
				 from dao.redis_db import Redis
			
 
				+from dao.mysql_client import Mysql
			
 
				 import random
			
 
				+from tqdm import tqdm
			
 
				 import joblib
			
 
				+
			
 
				 random.seed(12345)
			
 
				 class HotRecallModel:
			
 
				     """TODO 1. 将加载数据修改为数据库加载
			
 
				             2. 将结果保存到redis数据库中"""
			
 
				     def __init__(self):
			
 
				-        self.redis_db = Redis()
			
 
				-        hotkeys = self.get_hotkeys()
			
 
				-        print(hotkeys)
			
 
				+        self._redis_db = Redis()
			
 
				+        self._hotkeys = self.get_hotkeys()
			
 
				+        self._order_data = self._load_data_from_dataset()
			
 
				 
			
 
				 
			
 
				     def get_hotkeys(self):
			
 
				-        info = self.redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
			
 
				-        hotkey = tuple()
			
 
				-        results = []
			
 
				-        for item, score in info:
			
 
				-            hotkey += (item, score)
			
 
				-            results.append(hotkey)
			
 
				-            print(f"元素: {item}, 分数: {score}")
			
 
				-
			
 
				-        return results
			
 
				+        info = self._redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
			
 
				+        hotkeys = []
			
 
				+        for item, _ in info:
			
 
				+            hotkeys.append(item)
			
 
				+        return hotkeys
			
 
				 
			
 
				 
			
 
				-    def load_dataset(self, data_path):
			
 
				-        self._order_data = pd.read_excel(data_path)
			
 
				+    def _load_data_from_dataset(self):
			
 
				+        """从数据库中读取数据"""
			
 
				+        client = Mysql()
			
 
				+        tablename = "mock_order"
			
 
				+        query_text = "*"
			
 
				+    
			
 
				+        df = client.load_data(tablename, query_text)
			
 
				+    
			
 
				+        # 去除重复值和填补缺失值
			
 
				+        df.drop_duplicates(inplace=True)
			
 
				+        df.fillna(0, inplace=True)
			
 
				+        return df
			
 
				         
			
 
				     def _calculate_hot_score(self, hot_name):
			
 
				         """
			
 
				         根据热度指标计算热度得分
			
 
				-        :param hot_name: 热度指标
			
 
				+        :param hot_name: 热度指标A
			
 
				         :type param: string
			
 
				         :return: 所有热度指标的得分
			
 
				         :rtype: list
			
@@ -48,37 +57,37 @@ class HotRecallModel:
 
				         sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
			
 
				         item_hot_score = []
			
 
				         # mock热度召回最大分数
			
 
				-        max_score = random.randint(85,100) * 0.01
			
 
				+        max_score = 1.0
			
 
				         total_score = sorted_results.loc[0, hot_name] / max_score
			
 
				         for row in sorted_results.itertuples(index=True, name="Row"):
			
 
				             item = {row[1]:(row[2]/total_score)*100}
			
 
				             item_hot_score.append(item)
			
 
				-        return {"key":f"hot:{hot_name}", "value":item_hot_score}
			
 
				+        return {"key":f"{hot_name}", "value":item_hot_score}
			
 
				 
			
 
				     def calculate_all_hot_score(self):
			
 
				         """
			
 
				         计算所有的热度指标得分
			
 
				         """
			
 
				-        hot_datas = []
			
 
				-        for col in list(self._order_data[2:]):
			
 
				-            hot_datas.appends(self._calculate_hot_score(col))
			
 
				-            
			
 
				-        return hot_datas
			
 
				+        # hot_datas = []
			
 
				+        for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
			
 
				+            self.to_redis(self._calculate_hot_score(hotkey_name))
			
 
				+
			
 
				+    def to_redis(self, rec_content_score):
			
 
				+        hotkey_name = rec_content_score["key"]
			
 
				+        rec_item_id = "hot:" + str(hotkey_name)  # 修正 rec_item_id 拼接方式
			
 
				+        res = {}
			
 
				 
			
 
				-    def to_redis(self, city, hotkey_name, rec_content_score):
			
 
				-        # rec_content_score的格式为：（零售户id，分数）
			
 
				-        rec_item_id = "hot:" + city + ":" + str(hotkey_name)
			
 
				-        res = dict()
			
 
				-        for content, score in rec_content_score.items():
			
 
				-            res[content] = score
			
 
				+        # rec_content_score["value"] 是一个包含字典的列表
			
 
				+        for item in rec_content_score["value"]:  
			
 
				+            for content, score in item.items():  # item 形如 {A001: 75.0}
			
 
				+                res[content] = float(score)  # 确保 score 是 float 类型
			
 
				 
			
 
				-        if len(res) > 0:
			
 
				-            data = dict({rec_item_id: res})
			
 
				-            for item, value in data.items():
			
 
				-                self.redis_db.redis.zadd(item, value)
			
 
				+        if res:  # 只有当 res 不为空时才执行 zadd
			
 
				+            self._redis_db.redis.zadd(rec_item_id, res)
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # 序列化
			
 
				     model = HotRecallModel()
			
 
				+    model.calculate_all_hot_score()
			
 
				     # joblib.dump(model, "hot_recall.model")