Procházet zdrojové kódy

fix(models): query real sale_qty for core customers in Item2Vec instead of defaulting to 0

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Sherlock před 1 týdnem
rodič
revize
e07839dc0a
2 změnil soubory, kde provedl 28 přidání a 5 odebrání
  1. 18 0
      database/dao/mysql_dao.py
  2. 10 5
      models/item2vec/inference.py

+ 18 - 0
database/dao/mysql_dao.py

@@ -207,6 +207,24 @@ class MySqlDao:
 
         return data
     
+    def get_order_by_cust_ids_and_product_ids(self, city_uuid, cust_id_list, product_ids):
+        """获取指定商户列表在指定品规列表上的售卖记录"""
+        logger.info(f"Getting orders by cust ids and product ids for city_uuid={city_uuid}, custs={len(cust_id_list)}, products={len(product_ids)}")
+        if not cust_id_list or not product_ids:
+            return pd.DataFrame()
+
+        query = text(f"""
+            SELECT cust_code, product_code, sale_qty
+            FROM {self._order_tablename}
+            WHERE city_uuid = :city_uuid
+            AND cust_code IN :cust_ids
+            AND product_code IN :product_ids
+        """).bindparams(bindparam("cust_ids", expanding=True), bindparam("product_ids", expanding=True))
+        params = {"city_uuid": city_uuid, "cust_ids": list(cust_id_list), "product_ids": list(product_ids)}
+        data = pd.DataFrame(self.db_helper.fetch_all(query, params))
+
+        return data
+
     def get_order_by_product(self, city_uuid, product_id):
         logger.info(f"Getting orders by product for city_uuid={city_uuid}, product_id={product_id}")
         query = f"""

+ 10 - 5
models/item2vec/inference.py

@@ -59,12 +59,17 @@ class Item2VecModel:
             .sort_values(["sale_qty", "cust_code"], ascending=[False, True])
         )
 
-        # 将 cust_code_list 中不在候选集的商户补入,sale_qty=0 参与归一化
+        # 将 cust_code_list 中不在候选集的商户补入,查询其在相似品规上的真实销量参与归一化
         existing_custs = set(recommend_cust["cust_code"].tolist())
-        extra_rows = [{"cust_code": c, "sale_qty": 0} for c in cust_code_list if c not in existing_custs]
-        if extra_rows:
-            extra_df = pd.DataFrame(extra_rows)
-            recommend_cust = pd.concat([recommend_cust, extra_df], ignore_index=True)
+        missing_custs = [c for c in cust_code_list if c not in existing_custs]
+        if missing_custs:
+            extra_order = self._dao.get_order_by_cust_ids_and_product_ids(self._city_uuid, missing_custs, product_list)
+            if not extra_order.empty:
+                extra_order["sale_qty"] = extra_order["sale_qty"].fillna(0)
+                extra_order = extra_order.groupby("cust_code", as_index=False)["sale_qty"].sum()
+            else:
+                extra_order = pd.DataFrame({"cust_code": missing_custs, "sale_qty": 0})
+            recommend_cust = pd.concat([recommend_cust, extra_order], ignore_index=True)
 
         if recommend_cust.empty:
             logger.warning(f"No candidates found for product {product_code} and cust_code_list is empty — returning empty list")