Просмотр исходного кода

fix: address code review issues — cust_code_list default, empty-pool guard, missing-cust warning, stale comment

Sherlock 1 неделя назад
Родитель
Сommit
7598c46825
4 измененных файлов с 13 добавлено и 3 удалено
  1. 1 1
      api/request_body.py
  2. 5 0
      models/item2vec/inference.py
  3. 0 2
      models/recommend.py
  4. 7 0
      utils/reports_process.py

+ 1 - 1
api/request_body.py

@@ -5,7 +5,7 @@ from typing import List
 class RecommendRequest(BaseModel):
     city_uuid: str              # 城市id
     product_code: str           # 卷烟编码
-    cust_code_list: List[str]   # 核心零售户ID列表
+    cust_code_list: List[str] = []  # 核心零售户ID列表
     cultivacation_id: str       # 投放策略id
     limit_cycle_name: str       # 投放周期名称
     

+ 5 - 0
models/item2vec/inference.py

@@ -66,6 +66,11 @@ class Item2VecModel:
             extra_df = pd.DataFrame(extra_rows)
             recommend_cust = pd.concat([recommend_cust, extra_df], ignore_index=True)
 
+        if recommend_cust.empty:
+            logger.warning(f"No candidates found for product {product_code} and cust_code_list is empty — returning empty list")
+            recommend_cust["recommend_score"] = pd.Series(dtype=float)
+            return recommend_cust
+
         # 对销量进行归一化:先 log1p 压缩幂律分布的长尾,再 StandardScaler + sigmoid
         # 不做 log 变换时,头部商户 z-score 过大会导致 sigmoid 饱和,分数全为 100
         log_qty = np.log1p(recommend_cust["sale_qty"].values).reshape(-1, 1)

+ 0 - 2
models/recommend.py

@@ -104,5 +104,3 @@ if __name__ == "__main__":
     recommend_list = recommend.get_recommend_list_by_gbdtlr(product_id)
     # for i in recommend_list:
     #     print(i)
-    # recommend_data = recommend.get_recommend_and_delivery(recommend_list)
-    

+ 7 - 0
utils/reports_process.py

@@ -35,7 +35,14 @@ def build_recommend_report(recommend_data, recommend_cust_infos):
     recommend_data = pd.DataFrame(recommend_data)
 
     cust_ids = recommend_cust_infos.set_index("cust_code")
+    before_join = set(recommend_data["cust_code"])
     recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
+    missing = before_join - set(recommend_data["cust_code"])
+    if missing:
+        import logging
+        logging.getLogger("utils.report").warning(
+            f"build_recommend_report: {len(missing)} cust_codes not found in customer master and dropped from report: {missing}"
+        )
     recommend_data = recommend_data[["cust_code", "cust_name", "recommend_score"]]
     recommend_data = recommend_data.reset_index(drop=True)
     recommend_data.index = recommend_data.index + 1