浏览代码

fix(models): fix get_recommend_list bug, add logging to all model modules

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Sherlock 3 周之前
父节点
当前提交
c84bbbeb69
共有 5 个文件被更改,包括 41 次插入20 次删除
  1. 5 0
      models/item2vec/inference.py
  2. 14 14
      models/rank/gbdt_lr_inference.py
  3. 4 1
      models/recall/hot_recall.py
  4. 7 4
      models/recall/itemCF/ItemCF.py
  5. 11 1
      models/recommend.py

+ 5 - 0
models/item2vec/inference.py

@@ -5,6 +5,9 @@ from models.rank.data.utils import sample_data_clear
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
+from core import get_logger
+
+logger = get_logger("models.item2vec")
 
 class Item2VecModel:
     def __init__(self, city_uuid):
@@ -14,6 +17,7 @@ class Item2VecModel:
         
     def generate_product_similarity_map(self, product_code):
         """根据product_code生成卷烟相似度矩阵"""
+        logger.info(f"Generating similarity map for product {product_code}")
         product = self._dao.get_product_by_id(self._city_uuid, product_code)[ProductConfig.FEATURE_COLUMNS]
         product = sample_data_clear(product, ProductConfig)
         
@@ -33,6 +37,7 @@ class Item2VecModel:
     
     def get_recommend_cust_list(self, product_code, top=100):
         """获取推荐的商户列表"""
+        logger.info(f"Getting recommend list for product {product_code}, top={top}")
         product_list = self.get_similarity_list(product_code)
         order_data = self._dao.get_order_by_product_ids(self._city_uuid, product_list)[OrderConfig.FEATURE_COLUMNS]
         order_data["sale_qty"] = order_data["sale_qty"].fillna(0)

+ 14 - 14
models/rank/gbdt_lr_inference.py

@@ -9,6 +9,9 @@ from models.rank.data.utils import one_hot_embedding, sample_data_clear
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
+from core import get_logger
+
+logger = get_logger("models.rank.gbdtlr")
 
 def clean_column_name(col):
     """清理列名中的特殊字符,与 one_hot_embedding 保持一致"""
@@ -58,22 +61,19 @@ class GbdtLrModel:
         self.custs_data = self._mysql_dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS]
     
     def get_recommend_list(self, recommend_sample, recall_list):
-        
+
         gbdt_preds = self.gbdt_model.predict(recommend_sample, pred_leaf=True)
         gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
         scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1] * 100
-        
-        recommend_list = []
-        for cust_id, score in zip(recall_list, scores):
-            recommend_list.append({cust_id: float(score)})
-            recommend_list.append({"cust_code": cust_id, "recommend_score": score})
-            
-        recommend_list = sorted(
-            [item for item in recommend_list if "recommend_score" in item],
-            key=lambda x: x["recommend_score"],
-            reverse=True
-        )
-        
+
+        recommend_list = [
+            {"cust_code": cust_id, "recommend_score": float(score)}
+            for cust_id, score in zip(recall_list, scores)
+        ]
+        recommend_list.sort(key=lambda x: x["recommend_score"], reverse=True)
+
+        logger.info(f"Scored {len(recommend_list)} items in recommend list")
+
         return recommend_list
         
     
@@ -236,7 +236,7 @@ class GbdtLrModel:
                 os.remove(temp_file)
                 os.rmdir(temp_dir)
             except Exception as e:
-                print(f"清理临时文件时出错: {e}")
+                logger.error(f"清理临时文件时出错: {e}")
     
 if __name__ == "__main__":
     model_path = "./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl"

+ 4 - 1
models/recall/hot_recall.py

@@ -5,6 +5,9 @@ from tqdm import tqdm
 
 from models.rank.data.config import OrderConfig
 import numpy as np
+from core import get_logger
+
+logger = get_logger("models.recall.hot")
 
 cfgs = load_model_config()
 
@@ -18,7 +21,7 @@ class HotRecallModel:
     
     def _load_data(self):
         """加载订单记录表"""
-        print("hot_recall: 正在加载order_info...")
+        logger.info("Loading order data")
         self._order_data = self._dao.load_order_data(self._city_uuid)
         self._order_data =self._order_data[OrderConfig.FEATURE_COLUMNS] 
         

+ 7 - 4
models/recall/itemCF/ItemCF.py

@@ -5,6 +5,9 @@ import numpy as np
 from tqdm import tqdm
 from scipy.sparse import csr_matrix
 from joblib import Parallel, delayed
+from core import get_logger
+
+logger = get_logger("models.recall.itemcf")
 
 class ItemCFModel:
     def __init__(self):
@@ -14,11 +17,11 @@ class ItemCFModel:
     def train(self, city_uuid, n=300, k=100, top_n=300, n_jobs=4):
         # self._score_df = pd.read_csv(score_path)
         # self._similarity_df = pd.read_csv(similatity_path, index_col=0)
-        print("itemcf: 正在加载order_info...")
+        logger.info("Loading order data")
         self._order_data = self._dao.load_order_data(city_uuid)
-        print("正在计算品规培育分数...")
+        logger.info("Calculating product scores")
         self._score_df = UserItemScore(self._order_data).generate_product_scores()
-        print("正在计算商户相似度矩阵...")
+        logger.info("Calculating similarity matrix")
         self._similarity_df = SimilarityMatrix(self._order_data).generate_similarity_matrix()
         
         similarity_matrix = csr_matrix(self._similarity_df.values)
@@ -87,7 +90,7 @@ class ItemCFModel:
                     try:
                         zset_data[shop_id] = float(score)
                     except ValueError as e:
-                        print(f"Error converting score to float for shop_id {shop_id}: {score}")
+                        logger.error(f"Error converting score to float for shop_id {shop_id}: {score}")
                         raise e
             
             redis_db.redis.zadd(redis_key, zset_data)

+ 11 - 1
models/recommend.py

@@ -6,6 +6,9 @@ from models.rank.data.config import CustConfig, ProductConfig
 from models.rank.data.utils import sample_data_clear
 from models.rank import GbdtLrModel, generate_feats_map
 import pandas as pd
+from core import get_logger
+
+logger = get_logger("models.recommend")
 
 
 class Recommend:
@@ -21,6 +24,7 @@ class Recommend:
         gbdtlr_model_path = os.path.join("./models/rank/weights", city_uuid, "gbdtlr_model.pkl")
         self._gbdtlr_model = GbdtLrModel(gbdtlr_model_path)
         self._item2vec_model = Item2VecModel(city_uuid)
+        logger.info(f"Models loaded for city_uuid={city_uuid}")
         
     def _get_itemcf_recall(self, product_id):
         """协同召回"""
@@ -46,11 +50,13 @@ class Recommend:
             additional_items = [item for item in hot_recall_list if item in hot_recall_set]
             needed = recall_count - len(result)
             result.extend(additional_items[:needed])
-            
+
+        logger.info(f"Recall completed: {len(result)} customers for product {product_id}")
         return result[:recall_count]
     
     def get_recommend_list_by_gbdtlr(self, product_id, recall_count=500):
         """根据gbdt_lr获取商户推荐列表"""
+        logger.info(f"GBDT-LR recommend started for product {product_id}")
         # 获取召回的商户列表
         recall_cust_list = self.get_recal_cust(product_id, recall_count)
         # 获取卷烟数据
@@ -77,15 +83,18 @@ class Recommend:
         feats_map = generate_feats_map(product_data, cust_data)
         recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, ordered_recall_list)
         # recommend_list = self.filter_recommend_list(recommend_list)
+        logger.info(f"GBDT-LR recommend completed: {len(recommend_list)} results")
         return recommend_list
     
     def get_recommend_list_by_item2vec(self, product_id, recall_count=500):
         """根据item2vec获取商户推荐列表"""
+        logger.info(f"Item2Vec recommend started for product {product_id}")
         recommend_list = self._item2vec_model.get_recommend_cust_list(product_id, top=recall_count)
         recommend_list = recommend_list.drop(columns=["sale_qty"])
         recommend_list = recommend_list.to_dict(orient='records')
         recommend_list = recommend_list[:recall_count]
         # recommend_list = self.filter_recommend_list(recommend_list)
+        logger.info(f"Item2Vec recommend completed: {len(recommend_list)} results")
         return recommend_list
         
     def filter_recommend_list(self, recommend_list):
@@ -120,6 +129,7 @@ class Recommend:
         recommend_data = recommend_data.sort_values(["recommend_score", "cust_code"], ascending=[False, True])
         
         recommend_data = recommend_data.to_dict(orient='records')
+        logger.info(f"Delivery allocation completed for {len(recommend_data)} customers, total={delivery_count}")
         return recommend_data