Sherlock1011 1 gadu atpakaļ
vecāks
revīzija
4c2486ab80
2 mainītis faili ar 53 papildinājumiem un 44 dzēšanām
  1. 12 12
      dao/redis_db.py
  2. 41 32
      models/recall/hot_recall.py

+ 12 - 12
dao/redis_db.py

@@ -24,15 +24,15 @@ if __name__ == '__main__':
     data_list = ['ORDER_FULLORDR_RATE', 'MONTH6_SALE_QTY_YOY', 'MONTH6_SALE_QTY_MOM', 'MONTH6_SALE_QTY']
 
     # 清空已有的有序集合(可选,若需要全新的集合可执行此操作)
-    # r.delete(zset_key)
-    #
-    # for item in data_list:
-    #     # 生成 80 到 100 之间的随机数,小数点后保留 4 位
-    #     score = round(random.uniform(80, 100), 4)
-    #     # 将元素和对应的分数添加到有序集合中
-    #     r.zadd(zset_key, {item: score})
-
-    # 从 Redis 中读取有序集合并打印
-    result = r.zrange(zset_key, 0, -1, withscores=True)
-    for item, score in result:
-        print(f"元素: {item}, 分数: {score}")
+    r.delete(zset_key)
+    
+    for item in data_list:
+        # 生成 80 到 100 之间的随机数,小数点后保留 4 位
+        score = round(random.uniform(80, 100), 4)
+        # 将元素和对应的分数添加到有序集合中
+        r.zadd(zset_key, {item: score})
+
+    # # 从 Redis 中读取有序集合并打印
+    # result = r.zrange(zset_key, 0, -1, withscores=True)
+    # for item, score in result:
+    #     print(f"元素: {item}, 分数: {score}")

+ 41 - 32
models/recall/hot_recall.py

@@ -9,37 +9,46 @@
 '''
 import pandas as pd
 from dao.redis_db import Redis
+from dao.mysql_client import Mysql
 import random
+from tqdm import tqdm
 import joblib
+
 random.seed(12345)
 class HotRecallModel:
     """TODO 1. 将加载数据修改为数据库加载
             2. 将结果保存到redis数据库中"""
     def __init__(self):
-        self.redis_db = Redis()
-        hotkeys = self.get_hotkeys()
-        print(hotkeys)
+        self._redis_db = Redis()
+        self._hotkeys = self.get_hotkeys()
+        self._order_data = self._load_data_from_dataset()
 
 
     def get_hotkeys(self):
-        info = self.redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
-        hotkey = tuple()
-        results = []
-        for item, score in info:
-            hotkey += (item, score)
-            results.append(hotkey)
-            print(f"元素: {item}, 分数: {score}")
-
-        return results
+        info = self._redis_db.redis.zrange("hotkeys", 0, -1, withscores=True)
+        hotkeys = []
+        for item, _ in info:
+            hotkeys.append(item)
+        return hotkeys
 
 
-    def load_dataset(self, data_path):
-        self._order_data = pd.read_excel(data_path)
+    def _load_data_from_dataset(self):
+        """从数据库中读取数据"""
+        client = Mysql()
+        tablename = "mock_order"
+        query_text = "*"
+    
+        df = client.load_data(tablename, query_text)
+    
+        # 去除重复值和填补缺失值
+        df.drop_duplicates(inplace=True)
+        df.fillna(0, inplace=True)
+        return df
         
     def _calculate_hot_score(self, hot_name):
         """
         根据热度指标计算热度得分
-        :param hot_name: 热度指标
+        :param hot_name: 热度指标A
         :type param: string
         :return: 所有热度指标的得分
         :rtype: list
@@ -48,37 +57,37 @@ class HotRecallModel:
         sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
         item_hot_score = []
         # mock热度召回最大分数
-        max_score = random.randint(85,100) * 0.01
+        max_score = 1.0
         total_score = sorted_results.loc[0, hot_name] / max_score
         for row in sorted_results.itertuples(index=True, name="Row"):
             item = {row[1]:(row[2]/total_score)*100}
             item_hot_score.append(item)
-        return {"key":f"hot:{hot_name}", "value":item_hot_score}
+        return {"key":f"{hot_name}", "value":item_hot_score}
 
     def calculate_all_hot_score(self):
         """
         计算所有的热度指标得分
         """
-        hot_datas = []
-        for col in list(self._order_data[2:]):
-            hot_datas.appends(self._calculate_hot_score(col))
-            
-        return hot_datas
+        # hot_datas = []
+        for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
+            self.to_redis(self._calculate_hot_score(hotkey_name))
+
+    def to_redis(self, rec_content_score):
+        hotkey_name = rec_content_score["key"]
+        rec_item_id = "hot:" + str(hotkey_name)  # 修正 rec_item_id 拼接方式
+        res = {}
 
-    def to_redis(self, city, hotkey_name, rec_content_score):
-        # rec_content_score的格式为:(零售户id,分数)
-        rec_item_id = "hot:" + city + ":" + str(hotkey_name)
-        res = dict()
-        for content, score in rec_content_score.items():
-            res[content] = score
+        # rec_content_score["value"] 是一个包含字典的列表
+        for item in rec_content_score["value"]:  
+            for content, score in item.items():  # item 形如 {A001: 75.0}
+                res[content] = float(score)  # 确保 score 是 float 类型
 
-        if len(res) > 0:
-            data = dict({rec_item_id: res})
-            for item, value in data.items():
-                self.redis_db.redis.zadd(item, value)
+        if res:  # 只有当 res 不为空时才执行 zadd
+            self._redis_db.redis.zadd(rec_item_id, res)
 
 
 if __name__ == "__main__":
     # 序列化
     model = HotRecallModel()
+    model.calculate_all_hot_score()
     # joblib.dump(model, "hot_recall.model")