1 år sedan · 079b03bf0a
--- a/app.py
+++ b/app.py
@@ -59,9 +59,9 @@ def run():
 
															     # parser.add_argument("--similarity_matrix_path", type=str, default="./models/recall/itemCF/matrix/similarity.csv")
														
 
															     parser.add_argument("--n", type=int, default=100)
														
 
															     parser.add_argument("--k", type=int, default=20)
														
 
															-    parser.add_argument("--top_n", type=int, default=2000, help='default n * k')
														
 
															+    parser.add_argument("--top_n", type=int, default=200, help='default n * k')
														
 
															     parser.add_argument("--n_jobs", type=int, default=4)
														
 
															-    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011441801', help="City UUID for filtering data")
														
 
															+    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301', help="City UUID for filtering data")
														
 
															     # 协同过滤推理配置
														
 
															     parser.add_argument("--product_code", type=int, default=110111)
														
--- a/dao/__init__.py
+++ b/dao/__init__.py
@@ -1,11 +1,15 @@
 
															 #!/usr/bin/env python3
														
 
															 # -*- coding:utf-8 -*-
														
 
															 from dao.mysql_client import Mysql
														
 
															-from dao.dao import load_order_data_from_mysql, load_cust_data_from_mysql, load_product_data_from_mysql
														
 
															+from dao.dao import load_order_data_from_mysql, load_cust_data_from_mysql, load_product_data_from_mysql, get_product_by_id, get_custs_by_ids
														
 
															+from dao.redis_db import Redis
														
 
															 __all__ = [
														
 
															     "Mysql",
														
 
															     "load_order_data_from_mysql",
														
 
															     "load_cust_data_from_mysql",
														
 
															-    "load_product_data_from_mysql"
														
 
															+    "load_product_data_from_mysql",
														
 
															+    "Redis",
														
 
															+    "get_product_by_id",
														
 
															+    "get_custs_by_ids"
														
 
															 ]
														
--- a/dao/dao.py
+++ b/dao/dao.py
@@ -43,6 +43,22 @@ def load_product_data_from_mysql(city_uuid):
 
															     return df
														
 
															+def get_product_by_id(city_uuid, product_id):
														
 
															+    client = Mysql()
														
 
															+    
														
 
															+    res = client.get_product_by_id(city_uuid, product_id)
														
 
															+    if len(res) == 0:
														
 
															+        return None
														
 
															+    return res
														
 
															+
														
 
															+def get_custs_by_ids(city_uuid, cust_ids):
														
 
															+    client = Mysql()
														
 
															+    
														
 
															+    res = client.get_cust_by_ids(city_uuid, cust_ids)
														
 
															+    if len(res) == 0:
														
 
															+        return None
														
 
															+    return res
														
 
															+
														
 
															 if __name__ == '__main__':
														
 
															     data = load_order_data_from_mysql("00000000000000000000000011445301")
														
 
															     print(data)
														
--- a/dao/mysql_client.py
+++ b/dao/mysql_client.py
@@ -72,6 +72,40 @@ class Mysql(object):
 
															             self.closed()
														
 
															             return total_df
														
 
															+    def get_product_by_id(self, city_uuid, product_id):
														
 
															+        """根据 city_uuid 和 product_id 从表中获取品规信息"""
														
 
															+        query = text("""
														
 
															+            SELECT * 
														
 
															+            FROM tads_brandcul_product_info 
														
 
															+            WHERE city_uuid = :city_uuid 
														
 
															+            AND product_code = :product_id
														
 
															+        """)
														
 
															+        
														
 
															+        with self.create_session() as session:
														
 
															+            result = session.execute(query, {"city_uuid": city_uuid, "product_id": product_id}).fetchall()
														
 
															+            result = pd.DataFrame(result)
														
 
															+        return result
														
 
															+        
														
 
															+    def get_cust_by_ids(self, city_uuid, cust_id_list):
														
 
															+        """根据 city_uuid 和 cust_id 列表从表中获取零售户信息"""
														
 
															+        if not cust_id_list:
														
 
															+            return []
														
 
															+        
														
 
															+        cust_id_str = ",".join([f"'{cust_id}'" for cust_id in cust_id_list])
														
 
															+        
														
 
															+        query = text(f"""
														
 
															+            SELECT * 
														
 
															+            FROM tads_brandcul_cust_info 
														
 
															+            WHERE BA_CITY_ORG_CODE = :city_uuid 
														
 
															+            AND BB_RETAIL_CUSTOMER_CODE IN ({cust_id_str})
														
 
															+        """)
														
 
															+        
														
 
															+        with self.create_session() as session:
														
 
															+            results = session.execute(query, {"city_uuid": city_uuid}).fetchall()
														
 
															+            results = pd.DataFrame(results)
														
 
															+        
														
 
															+        return results
														
 
															+        
														
 
															     def load_mock_data(self, tablename, query_text, page=1, page_size=1000):
														
 
															         # 创建一个空的DataFrame用于存储所有数据
														
 
															         total_df = pd.DataFrame()
														
--- a/models/rank/gbdt_lr.py
+++ b/models/rank/gbdt_lr.py
@@ -7,6 +7,7 @@ from sklearn.linear_model import LogisticRegression
 
															 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
														
 
															 from sklearn.model_selection import GridSearchCV
														
 
															 from sklearn.preprocessing import OneHotEncoder
														
 
															+import joblib
														
 
															 class Trainer:
														
 
															     def __init__(self, path):
														
@@ -24,7 +25,8 @@ class Trainer:
 
															             "max_iter": 1000,
														
 
															             'C': 1.0, 
														
 
															             'penalty': 'l2', 
														
 
															-            'solver': 'liblinear',
														
 
															+            # 'l1_ratio': 0.5,  # 添加 l1_ratio 参数，可以根据需要调整
														
 
															+            'solver': 'sag',
														
 
															             'random_state': 42,
														
 
															             'class_weight': 'balanced'
														
 
															         }
														
@@ -82,7 +84,7 @@ class Trainer:
 
															         precision = precision_score(self._test_dataset["label"], y_pred)
														
 
															         recall = recall_score(self._test_dataset["label"], y_pred)
														
 
															         f1 = f1_score(self._test_dataset["label"], y_pred)
														
 
															-        roc_auc = roc_auc_score(self._test_dataset["label"], y_pred_proba)
														
 
															+        roc_auc = roc_auc_score(self._test_dataset["label"], y_pred_proba)    
														
 
															         return {
														
 
															             'accuracy': accuracy,
														
@@ -91,6 +93,11 @@ class Trainer:
 
															             'f1_score': f1,
														
 
															             'roc_auc': roc_auc
														
 
															         }
														
 
															+        
														
 
															+    def save_model(self, model_path):
														
 
															+        """将模型保存到本地"""
														
 
															+        models = {"gbdt_model": self._gbdt_model, "lr_model": self._lr_model, "onehot_encoder": self._onehot_encoder}
														
 
															+        joblib.dump(models, model_path)
														
 
															 if __name__ == "__main__":
														
@@ -103,4 +110,8 @@ if __name__ == "__main__":
 
															     print("GBDT-LR Evaluation Metrics:")
														
 
															     for metric, value in eval_metrics.items():
														
 
															         print(f"{metric}: {value:.4f}")
														
 
															+        
														
 
															+    # 保存模型
														
 
															+    model_path = "./models/rank/weights/model.pkl"
														
 
															+    trainer.save_model(model_path)
														
--- a/models/rank/gbdt_lr_sort.py
+++ b/models/rank/gbdt_lr_sort.py
@@ -0,0 +1,42 @@
 
															+import joblib
														
 
															+from dao import Redis, get_product_by_id, get_custs_by_ids
														
 
															+from models.rank.data import ProductConfig, CustConfig
														
 
															+class GbdtLrSort:
														
 
															+    def __init__(self, model_path):
														
 
															+        self.load_model(model_path)
														
 
															+        self.redis = Redis().redis
														
 
															+    
														
 
															+    def load_model(self, model_path):
														
 
															+        models = joblib.load(model_path)
														
 
															+        self.gbdt_model = models["gbdt_model"], models["lr_model"], models["onehot_encoder"]
														
 
															+        
														
 
															+    
														
 
															+    def get_recall_list(self, city_uuid, product_id):
														
 
															+        """根据卷烟id获取召回的商铺列表"""
														
 
															+        key = f"fc:{city_uuid}:{product_id}"
														
 
															+        self.recall_cust_list = self.redis.zrange(key, 0, -1, withscores=False)
														
 
															+    
														
 
															+    def load_recall_data(self, city_uuid, product_id):
														
 
															+        self.product_data = get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
														
 
															+        self.custs_data = get_custs_by_ids(city_uuid, self.recall_cust_list)[CustConfig.FEATURE_COLUMNS]
														
 
															+        print(self.product_data)
														
 
															+    
														
 
															+    def generate_feats_map(self, city_uuid, product_id):
														
 
															+        """组合卷烟、商户特征矩阵"""
														
 
															+        self.get_recall_list(city_uuid, product_id)
														
 
															+        self.load_recall_data(city_uuid, product_id)
														
 
															+        # 做数据清洗
														
 
															+        
														
 
															+    
														
 
															+    def sort(self, city_uuid, product_id):
														
 
															+        pass
														
 
															+    
														
 
															+    def generate_feats_importance(self):
														
 
															+        pass
														
 
															+    
														
 
															+if __name__ == "__main__":
														
 
															+    model_path = "./models/rank/weights/model.pkl"
														
 
															+    city_uuid = "00000000000000000000000011445301"
														
 
															+    product_id = "110102"
														
 
															+    gbdt_sort = GbdtLrSort(model_path)
														
 
															+    gbdt_sort.generate_feats_map(city_uuid, product_id)