Explorar el Código

按照城市id读取数据,redis增加城市id的key

Sherlock hace 1 año
padre
commit
c03329a000

+ 22 - 12
app.py

@@ -4,10 +4,10 @@ from dao.redis_db import Redis
 from models import HotRecallModel, UserItemScore, ItemCFModel, calculate_similarity_and_save_results
 import os
 
-def run_hot_recall(order_data):
+def run_hot_recall(order_data, city_uuid):
     """运行热度召回算法"""
     hot_model = HotRecallModel(order_data)
-    hot_model.calculate_all_hot_score()
+    hot_model.calculate_all_hot_score(city_uuid)
     print("热度召回已完成!")
 
 def run_itemcf(order_data, args):
@@ -28,7 +28,7 @@ def run_itemcf(order_data, args):
     
     # 运行协同过滤召回
     itemcf_model = ItemCFModel()
-    itemcf_model.train(args.interst_score_path, args.similarity_matrix_path, args.n, args.k, args.top_n, args.n_jobs)
+    itemcf_model.train(args.interst_score_path, args.similarity_matrix_path, args.city_uuid, args.n, args.k, args.top_n, args.n_jobs)
     print("协同过滤已完成!")
 
 def run_itemcf_inference(product_code):
@@ -58,9 +58,10 @@ def run():
     # parser.add_argument("--interst_score_path", type=str, default="./models/recall/itemCF/matrix/score.csv")
     # parser.add_argument("--similarity_matrix_path", type=str, default="./models/recall/itemCF/matrix/similarity.csv")
     parser.add_argument("--n", type=int, default=100)
-    parser.add_argument("--k", type=int, default=10)
-    parser.add_argument("--top_n", type=int, default=200, help='default n * k')
+    parser.add_argument("--k", type=int, default=20)
+    parser.add_argument("--top_n", type=int, default=2000, help='default n * k')
     parser.add_argument("--n_jobs", type=int, default=4)
+    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011441801', help="City UUID for filtering data")
     
     # 协同过滤推理配置
     parser.add_argument("--product_code", type=int, default=110111)
@@ -75,17 +76,26 @@ def run():
     
     
     if args.run_all:
-        order_data = load_order_data_from_mysql()
-        run_hot_recall(order_data)
-        run_itemcf(order_data, args)
+        order_data = load_order_data_from_mysql(args.city_uuid)
+        if order_data is not None:
+            run_hot_recall(order_data, args.city_uuid)
+            run_itemcf(order_data, args)
+        else:
+            print("数据库中暂无数据")
         
     elif args.run_hot:
-        order_data = load_order_data_from_mysql()
-        run_hot_recall(order_data)
+        order_data = load_order_data_from_mysql(args.city_uuid)
+        if order_data is not None:
+            run_hot_recall(order_data, args.city_uuid)
+        else:
+            print("数据库中暂无数据")
         
     elif args.run_itemcf:
-        order_data = load_order_data_from_mysql()
-        run_itemcf(order_data, args)
+        order_data = load_order_data_from_mysql(args.city_uuid)
+        if order_data is not None:
+            run_itemcf(order_data, args)
+        else:
+            print("数据库中暂无数据")  
         
     elif args.run_itemcf_inference:
         recomments = run_itemcf_inference(args.product_code)

+ 5 - 2
dao/dao.py

@@ -1,14 +1,17 @@
 from dao import Mysql
 
-def load_order_data_from_mysql():
+def load_order_data_from_mysql(city_uuid):
     """从数据库中读取数据"""
     client = Mysql()
     tablename = "tads_brandcul_cust_order"
     query_text = "*"
     
-    df = client.load_data(tablename, query_text)
+    df = client.load_data(tablename, query_text, city_uuid)
+    if len(df) == 0:
+        return None
     
     df.drop('stat_month', axis=1, inplace=True)
+    df.drop('city_uuid', axis=1, inplace=True)
     print(df.columns)
     
      # 去除重复值和填补缺失值

+ 9 - 7
dao/mysql_client.py

@@ -39,22 +39,24 @@ class Mysql(object):
         """创建返回一个新的数据库session"""
         return self._DBSession()
     
-    def fetch_data_with_pagination(self, tablename, query_text, page=1, page_size=1000):
-        """分页查询数据"""
-        offset = (page - 1) * page_size # 计算偏移量
-        query = text(f"select {query_text} from {tablename} LIMIT :limit OFFSET :offset")
+    def fetch_data_with_pagination(self, tablename, query_text, city_uuid, page=1, page_size=1000):
+        """分页查询数据,并根据 city_uuid 进行过滤"""
+        offset = (page - 1) * page_size  # 计算偏移量
+        query = text(f"SELECT {query_text} FROM {tablename} WHERE city_uuid = :city_uuid LIMIT :limit OFFSET :offset")
+    
         with self.create_session() as session:
-            results = session.execute(query, {"limit": page_size, "offset": offset}).fetchall()
+            results = session.execute(query, {"city_uuid": city_uuid, "limit": page_size, "offset": offset}).fetchall()
             df = pd.DataFrame(results)
+    
         return df
     
-    def load_data(self, tablename, query_text, page=1, page_size=1000):
+    def load_data(self, tablename, query_text, city_uuid, page=1, page_size=1000):
         # 创建一个空的DataFrame用于存储所有数据
         total_df = pd.DataFrame()
     
         try:
             while True:
-                df = self.fetch_data_with_pagination(tablename, query_text, page, page_size)
+                df = self.fetch_data_with_pagination(tablename, query_text, city_uuid, page, page_size)
                 if df.empty:
                     break
             

+ 15 - 14
models/recall/hot_recall.py

@@ -27,18 +27,18 @@ class HotRecallModel:
         return hotkeys
 
 
-    def _load_data_from_dataset(self):
-        """从数据库中读取数据"""
-        client = Mysql()
-        tablename = "mock_order"
-        query_text = "*"
+    # def _load_data_from_dataset(self):
+    #     """从数据库中读取数据"""
+    #     client = Mysql()
+    #     tablename = "mock_order"
+    #     query_text = "*"
     
-        df = client.load_data(tablename, query_text)
+    #     df = client.load_data(tablename, query_text)
     
-        # 去除重复值和填补缺失值
-        df.drop_duplicates(inplace=True)
-        df.fillna(0, inplace=True)
-        return df
+    #     # 去除重复值和填补缺失值
+    #     df.drop_duplicates(inplace=True)
+    #     df.fillna(0, inplace=True)
+    #     return df
         
     def _calculate_hot_score(self, hot_name):
         """
@@ -59,17 +59,18 @@ class HotRecallModel:
             item_hot_score.append(item)
         return {"key":f"{hot_name}", "value":item_hot_score}
 
-    def calculate_all_hot_score(self):
+    def calculate_all_hot_score(self, city_uuid):
         """
         计算所有的热度指标得分
         """
         # hot_datas = []
         for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
-            self.to_redis(self._calculate_hot_score(hotkey_name))
+            self.to_redis(self._calculate_hot_score(hotkey_name), city_uuid)
 
-    def to_redis(self, rec_content_score):
+    def to_redis(self, rec_content_score, city_uuid):
         hotkey_name = rec_content_score["key"]
-        rec_item_id = "hot:" + str(hotkey_name)  # 修正 rec_item_id 拼接方式
+        rec_item_id = f"hot:{city_uuid}:{str(hotkey_name)}" # 修正 rec_item_id 拼接方式
+        # rec_item_id = "hot:" + city_uuid + ":" + str(hotkey_name)  
         res = {}
 
         # rec_content_score["value"] 是一个包含字典的列表

+ 4 - 4
models/recall/itemCF/ItemCF.py

@@ -10,7 +10,7 @@ class ItemCFModel:
     def __init__(self):
         self._recommendations = {}
         
-    def train(self, score_path, similatity_path, n=100, k=10, top_n=100, n_jobs=4):
+    def train(self, score_path, similatity_path, city_uuid, n=100, k=10, top_n=100, n_jobs=4):
         self._score_df = pd.read_csv(score_path)
         self._similarity_df = pd.read_csv(similatity_path, index_col=0)
         self._similarity_matrix = csr_matrix(self._similarity_df.values)
@@ -56,16 +56,16 @@ class ItemCFModel:
         print(len(results))
         # 存储结果
         self._recommendations = {product_code: sorted_candidates for product_code, sorted_candidates in results}
-        self.to_redis_zset()
+        self.to_redis_zset(city_uuid)
     
-    def to_redis_zset(self):
+    def to_redis_zset(self, city_uuid):
         """
         将 self._recommendations 中的数据保存到 Redis 的 Sorted Set (ZSET) 中
         存储格式为 fc:product_code,其中商户 ID 作为成员,得分作为分数
         """
         redis_db = Redis()
         for product_code, recommendations in tqdm(self._recommendations.items(), desc="train:正在存储推荐结果"):
-            redis_key = f"fc:{product_code}"
+            redis_key = f"fc:{city_uuid}:{product_code}"
             zset_data = {}
             for rec in recommendations:
                 for shop_id, score in rec.items():

+ 1 - 0
models/recall/itemCF/user_item_score.py

@@ -25,6 +25,7 @@ class UserItemScore:
             "MONTH6_SALE_AMT_YOY": Decimal(0.1),
             "MONTH6_SALE_AMT_MOM": Decimal(0.1),
             "ORDER_FULLORDR_RATE": Decimal(0.1),
+            "CUSTOMER_REPURCHASE_RATE": Decimal(0.1),
             "NEW_PRODUCT_ORDER_QTY_OCC": Decimal(0.03),
             "LISTING_RATE": Decimal(0.1),
             "OUT_STOCK_DAYS": Decimal(0.02),