1 жил өмнө · 920579945f
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 
				-.idea/
			
 
				-.vscode/
			
 
				-__pycache__/
			
 
				+.idea/

			
 
				+.vscode/

			
 
				+__pycache__/

			
 
				 *.pyc
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,23 +1,23 @@
 
				-FROM registry.cn-hangzhou.aliyuncs.com/hexiaoshi/python:3.10
			
 
				-
			
 
				-RUN apt-get update && apt-get -y install  tzdata cron vim && ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
			
 
				-
			
 
				-ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
			
 
				-
			
 
				-WORKDIR /app
			
 
				-
			
 
				-COPY . /app/
			
 
				-
			
 
				-
			
 
				-RUN mv /app/crontab /etc/cron.d/crontab && chmod 0644 /etc/cron.d/crontab \ 
			
 
				-        && /usr/bin/crontab /etc/cron.d/crontab \ 
			
 
				-        && pip install --upgrade pip setuptools -i https://mirrors.aliyun.com/pypi/simple  \ 
			
 
				-        && pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple
			
 
				-
			
 
				-RUN find . | grep -E "(__pycache__|Dockerfile|\.md|\.pyc|\.pyo$)" | xargs rm -rf && python3 -m compileall -b . \ 
			
 
				-        &&  find . -name "*.py" |xargs rm -rf && touch /var/log/cron.log
			
 
				-
			
 
				-VOLUME ["/etc/cron.d"]
			
 
				-
			
 
				-CMD /bin/bash -c "/usr/bin/crontab /etc/cron.d/crontab && cron && tail -f /var/log/cron.log"
			
 
				-
			
 
				+FROM registry.cn-hangzhou.aliyuncs.com/hexiaoshi/python:3.10

			
 
				+

			
 
				+RUN apt-get update && apt-get -y install  tzdata cron vim && ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

			
 
				+

			
 
				+ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin

			
 
				+

			
 
				+WORKDIR /app

			
 
				+

			
 
				+COPY . /app/

			
 
				+

			
 
				+

			
 
				+RUN mv /app/crontab /etc/cron.d/crontab && chmod 0644 /etc/cron.d/crontab \ 

			
 
				+        && /usr/bin/crontab /etc/cron.d/crontab \ 

			
 
				+        && pip install --upgrade pip setuptools -i https://mirrors.aliyun.com/pypi/simple  \ 

			
 
				+        && pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple

			
 
				+

			
 
				+RUN find . | grep -E "(__pycache__|Dockerfile|\.md|\.pyc|\.pyo$)" | xargs rm -rf && python3 -m compileall -b . \ 

			
 
				+        &&  find . -name "*.py" |xargs rm -rf && touch /var/log/cron.log

			
 
				+

			
 
				+VOLUME ["/etc/cron.d"]

			
 
				+

			
 
				+CMD /bin/bash -c "/usr/bin/crontab /etc/cron.d/crontab && cron && tail -f /var/log/cron.log"

			
 
				+

			
--- a/app.py
+++ b/app.py
@@ -1,105 +1,105 @@
 
				-import argparse
			
 
				-from dao import load_order_data_from_mysql
			
 
				-from dao.redis_db import Redis
			
 
				-from models import HotRecallModel, UserItemScore, ItemCFModel, calculate_similarity_and_save_results
			
 
				-import os
			
 
				-
			
 
				-def run_hot_recall(order_data, city_uuid):
			
 
				-    """运行热度召回算法"""
			
 
				-    hot_model = HotRecallModel(order_data)
			
 
				-    hot_model.calculate_all_hot_score(city_uuid)
			
 
				-    print("热度召回已完成！")
			
 
				-
			
 
				-def run_itemcf(order_data, args):
			
 
				-    # """运行协同过滤算法"""
			
 
				-    if os.path.exists(args.interst_score_path) and os.path.exists(args.similarity_matrix_path):
			
 
				-        os.remove(args.interst_score_path)
			
 
				-        os.remove(args.similarity_matrix_path)
			
 
				-    
			
 
				-    # 计算user-score-item数据
			
 
				-    cal_interest_scores_model = UserItemScore()
			
 
				-    scores = cal_interest_scores_model.score(order_data)
			
 
				-    scores.to_csv(args.interst_score_path, index=False, encoding="utf-8")
			
 
				-    print("Interest Scores cal done!")
			
 
				-    
			
 
				-    # 计算商户共现矩阵及相似度矩阵
			
 
				-    calculate_similarity_and_save_results(order_data, args.similarity_matrix_path)
			
 
				-    print("Shops similarity matrix cal done!")
			
 
				-    
			
 
				-    # 运行协同过滤召回
			
 
				-    itemcf_model = ItemCFModel()
			
 
				-    itemcf_model.train(args.interst_score_path, args.similarity_matrix_path, args.city_uuid, args.n, args.k, args.top_n, args.n_jobs)
			
 
				-    print("协同过滤已完成！")
			
 
				-
			
 
				-def run_itemcf_inference(product_code):
			
 
				-        """
			
 
				-        从 Redis 中读取推荐结果，并返回 {shop_id: score} 的列表
			
 
				-        """
			
 
				-        redis_db = Redis()
			
 
				-        redis_key = f"fc:{product_code}"
			
 
				-        recommendations = redis_db.redis.zrange(redis_key, 0, -1, withscores=True, desc=True)
			
 
				-        
			
 
				-        # 将推荐结果转换为 {shop_id: score} 的字典列表
			
 
				-        result = [{shop_id: float(score)} for shop_id, score in recommendations]
			
 
				-        
			
 
				-        return result
			
 
				-
			
 
				-def run():
			
 
				-    parser = argparse.ArgumentParser()
			
 
				-    
			
 
				-    # 运行方式
			
 
				-    parser.add_argument("--run_all", action='store_true')
			
 
				-    parser.add_argument("--run_hot", action='store_true')
			
 
				-    parser.add_argument("--run_itemcf", action='store_true')
			
 
				-    parser.add_argument("--run_itemcf_inference", action='store_true')
			
 
				-    
			
 
				-    # 协同过滤相关配置
			
 
				-    parser.add_argument("--matrix_path", type=str, default="./models/recall/itemCF/matrix")
			
 
				-    # parser.add_argument("--interst_score_path", type=str, default="./models/recall/itemCF/matrix/score.csv")
			
 
				-    # parser.add_argument("--similarity_matrix_path", type=str, default="./models/recall/itemCF/matrix/similarity.csv")
			
 
				-    parser.add_argument("--n", type=int, default=100)
			
 
				-    parser.add_argument("--k", type=int, default=20)
			
 
				-    parser.add_argument("--top_n", type=int, default=2000, help='default n * k')
			
 
				-    parser.add_argument("--n_jobs", type=int, default=4)
			
 
				-    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301', help="City UUID for filtering data")
			
 
				-    
			
 
				-    # 协同过滤推理配置
			
 
				-    parser.add_argument("--product_code", type=int, default=110111)
			
 
				-    
			
 
				-    args = parser.parse_args()
			
 
				-    
			
 
				-    # 初始化文件保存相关配置
			
 
				-    if not os.path.exists(args.matrix_path):
			
 
				-        os.makedirs(args.matrix_path)
			
 
				-    args.interst_score_path = os.path.join(args.matrix_path, "score.csv")
			
 
				-    args.similarity_matrix_path = os.path.join(args.matrix_path, "similarity.csv")
			
 
				-    
			
 
				-    
			
 
				-    if args.run_all:
			
 
				-        order_data = load_order_data_from_mysql(args.city_uuid)
			
 
				-        if order_data is not None:
			
 
				-            run_hot_recall(order_data, args.city_uuid)
			
 
				-            run_itemcf(order_data, args)
			
 
				-        else:
			
 
				-            print("数据库中暂无数据")
			
 
				-        
			
 
				-    elif args.run_hot:
			
 
				-        order_data = load_order_data_from_mysql(args.city_uuid)
			
 
				-        if order_data is not None:
			
 
				-            run_hot_recall(order_data, args.city_uuid)
			
 
				-        else:
			
 
				-            print("数据库中暂无数据")
			
 
				-        
			
 
				-    elif args.run_itemcf:
			
 
				-        order_data = load_order_data_from_mysql(args.city_uuid)
			
 
				-        if order_data is not None:
			
 
				-            run_itemcf(order_data, args)
			
 
				-        else:
			
 
				-            print("数据库中暂无数据")  
			
 
				-        
			
 
				-    elif args.run_itemcf_inference:
			
 
				-        recomments = run_itemcf_inference(args.product_code)
			
 
				-        print(recomments)
			
 
				-    
			
 
				-if __name__ == "__main__":
			
 
				+import argparse

			
 
				+from dao import load_order_data_from_mysql

			
 
				+from dao.redis_db import Redis

			
 
				+from models import HotRecallModel, UserItemScore, ItemCFModel, calculate_similarity_and_save_results

			
 
				+import os

			
 
				+

			
 
				+def run_hot_recall(order_data, city_uuid):

			
 
				+    """运行热度召回算法"""

			
 
				+    hot_model = HotRecallModel(order_data)

			
 
				+    hot_model.calculate_all_hot_score(city_uuid)

			
 
				+    print("热度召回已完成！")

			
 
				+

			
 
				+def run_itemcf(order_data, args):

			
 
				+    # """运行协同过滤算法"""

			
 
				+    if os.path.exists(args.interst_score_path) and os.path.exists(args.similarity_matrix_path):

			
 
				+        os.remove(args.interst_score_path)

			
 
				+        os.remove(args.similarity_matrix_path)

			
 
				+    

			
 
				+    # 计算user-score-item数据

			
 
				+    cal_interest_scores_model = UserItemScore()

			
 
				+    scores = cal_interest_scores_model.score(order_data)

			
 
				+    scores.to_csv(args.interst_score_path, index=False, encoding="utf-8")

			
 
				+    print("Interest Scores cal done!")

			
 
				+    

			
 
				+    # 计算商户共现矩阵及相似度矩阵

			
 
				+    calculate_similarity_and_save_results(order_data, args.similarity_matrix_path)

			
 
				+    print("Shops similarity matrix cal done!")

			
 
				+    

			
 
				+    # 运行协同过滤召回

			
 
				+    itemcf_model = ItemCFModel()

			
 
				+    itemcf_model.train(args.interst_score_path, args.similarity_matrix_path, args.city_uuid, args.n, args.k, args.top_n, args.n_jobs)

			
 
				+    print("协同过滤已完成！")

			
 
				+

			
 
				+def run_itemcf_inference(product_code):

			
 
				+        """

			
 
				+        从 Redis 中读取推荐结果，并返回 {shop_id: score} 的列表

			
 
				+        """

			
 
				+        redis_db = Redis()

			
 
				+        redis_key = f"fc:{product_code}"

			
 
				+        recommendations = redis_db.redis.zrange(redis_key, 0, -1, withscores=True, desc=True)

			
 
				+        

			
 
				+        # 将推荐结果转换为 {shop_id: score} 的字典列表

			
 
				+        result = [{shop_id: float(score)} for shop_id, score in recommendations]

			
 
				+        

			
 
				+        return result

			
 
				+

			
 
				+def run():

			
 
				+    parser = argparse.ArgumentParser()

			
 
				+    

			
 
				+    # 运行方式

			
 
				+    parser.add_argument("--run_all", action='store_true')

			
 
				+    parser.add_argument("--run_hot", action='store_true')

			
 
				+    parser.add_argument("--run_itemcf", action='store_true')

			
 
				+    parser.add_argument("--run_itemcf_inference", action='store_true')

			
 
				+    

			
 
				+    # 协同过滤相关配置

			
 
				+    parser.add_argument("--matrix_path", type=str, default="./models/recall/itemCF/matrix")

			
 
				+    # parser.add_argument("--interst_score_path", type=str, default="./models/recall/itemCF/matrix/score.csv")

			
 
				+    # parser.add_argument("--similarity_matrix_path", type=str, default="./models/recall/itemCF/matrix/similarity.csv")

			
 
				+    parser.add_argument("--n", type=int, default=100)

			
 
				+    parser.add_argument("--k", type=int, default=20)

			
 
				+    parser.add_argument("--top_n", type=int, default=2000, help='default n * k')

			
 
				+    parser.add_argument("--n_jobs", type=int, default=4)

			
 
				+    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301', help="City UUID for filtering data")

			
 
				+    

			
 
				+    # 协同过滤推理配置

			
 
				+    parser.add_argument("--product_code", type=int, default=110111)

			
 
				+    

			
 
				+    args = parser.parse_args()

			
 
				+    

			
 
				+    # 初始化文件保存相关配置

			
 
				+    if not os.path.exists(args.matrix_path):

			
 
				+        os.makedirs(args.matrix_path)

			
 
				+    args.interst_score_path = os.path.join(args.matrix_path, "score.csv")

			
 
				+    args.similarity_matrix_path = os.path.join(args.matrix_path, "similarity.csv")

			
 
				+    

			
 
				+    

			
 
				+    if args.run_all:

			
 
				+        order_data = load_order_data_from_mysql(args.city_uuid)

			
 
				+        if order_data is not None:

			
 
				+            run_hot_recall(order_data, args.city_uuid)

			
 
				+            run_itemcf(order_data, args)

			
 
				+        else:

			
 
				+            print("数据库中暂无数据")

			
 
				+        

			
 
				+    elif args.run_hot:

			
 
				+        order_data = load_order_data_from_mysql(args.city_uuid)

			
 
				+        if order_data is not None:

			
 
				+            run_hot_recall(order_data, args.city_uuid)

			
 
				+        else:

			
 
				+            print("数据库中暂无数据")

			
 
				+        

			
 
				+    elif args.run_itemcf:

			
 
				+        order_data = load_order_data_from_mysql(args.city_uuid)

			
 
				+        if order_data is not None:

			
 
				+            run_itemcf(order_data, args)

			
 
				+        else:

			
 
				+            print("数据库中暂无数据")  

			
 
				+        

			
 
				+    elif args.run_itemcf_inference:

			
 
				+        recomments = run_itemcf_inference(args.product_code)

			
 
				+        print(recomments)

			
 
				+    

			
 
				+if __name__ == "__main__":

			
 
				     run()
			
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -1,7 +1,7 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-from config.config import load_config 
			
 
				-
			
 
				-__all__ = [
			
 
				-    "load_config"
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
 
				+from config.config import load_config 

			
 
				+

			
 
				+__all__ = [

			
 
				+    "load_config"

			
 
				 ]
			
--- a/config/config.py
+++ b/config/config.py
@@ -1,6 +1,6 @@
 
				-import yaml
			
 
				-
			
 
				-def load_config():
			
 
				-    with open('./config/database_config.yaml') as file:
			
 
				-        config = yaml.safe_load(file)
			
 
				+import yaml

			
 
				+

			
 
				+def load_config():

			
 
				+    with open('./config/database_config.yaml') as file:

			
 
				+        config = yaml.safe_load(file)

			
 
				     return config
			
--- a/config/database_config.yaml
+++ b/config/database_config.yaml
@@ -1,12 +1,12 @@
 
				-mysql:
			
 
				-  host: 'rm-t4n6rz18y4t5x47y70o.mysql.singapore.rds.aliyuncs.com'
			
 
				-  port: 3036
			
 
				-  db: 'brand_cultivation'
			
 
				-  user: 'BrandCultivation'
			
 
				-  passwd: '8BfWBc18NBXl#CMd'
			
 
				-
			
 
				-redis:
			
 
				-  host: 'r-t4nb4n9i8je7u6ogk1pd.redis.singapore.rds.aliyuncs.com'
			
 
				-  port: 5000
			
 
				-  db: 10
			
 
				-  passwd: 'gHmNkVBd88sZybj'
			
 
				+mysql:

			
 
				+  host: 'rm-t4n6rz18y4t5x47y70o.mysql.singapore.rds.aliyuncs.com'

			
 
				+  port: 3036

			
 
				+  db: 'brand_cultivation'

			
 
				+  user: 'BrandCultivation'

			
 
				+  passwd: '8BfWBc18NBXl#CMd'

			
 
				+

			
 
				+redis:

			
 
				+  host: 'r-t4nb4n9i8je7u6ogk1pd.redis.singapore.rds.aliyuncs.com'

			
 
				+  port: 5000

			
 
				+  db: 10

			
 
				+  passwd: 'gHmNkVBd88sZybj'

			
--- a/crontab
+++ b/crontab
@@ -1,4 +1,4 @@
 
				-# START CRON JOB
			
 
				-11 13 * * * cd /app && /usr/local/bin/python app.pyc --run_all >> /var/log/app.log 2>&1
			
 
				-#* * * * * echo "asdf" >> /var/log/test.log
			
 
				-# END CRON JOB
			
 
				+# START CRON JOB

			
 
				+11 13 * * * cd /app && /usr/local/bin/python app.pyc --run_all >> /var/log/app.log 2>&1

			
 
				+#* * * * * echo "asdf" >> /var/log/test.log

			
 
				+# END CRON JOB

			
--- a/database/__init__.py
+++ b/database/__init__.py
@@ -1,9 +1,9 @@
 
				-from database.db.mysql import MySqlDatabaseHelper
			
 
				-from database.db.redis_db import RedisDatabaseHelper
			
 
				-from database.dao.mysql_dao import MySqlDao
			
 
				-
			
 
				-__all__ = [
			
 
				-    "MySqlDatabaseHelper",
			
 
				-    "RedisDatabaseHelper",
			
 
				-    "MySqlDao"
			
 
				+from database.db.mysql import MySqlDatabaseHelper

			
 
				+from database.db.redis_db import RedisDatabaseHelper

			
 
				+from database.dao.mysql_dao import MySqlDao

			
 
				+

			
 
				+__all__ = [

			
 
				+    "MySqlDatabaseHelper",

			
 
				+    "RedisDatabaseHelper",

			
 
				+    "MySqlDao"

			
 
				 ]
			
--- a/database/dao/mysql_dao.py
+++ b/database/dao/mysql_dao.py
@@ -1,132 +1,132 @@
 
				-from database import MySqlDatabaseHelper
			
 
				-from sqlalchemy import text
			
 
				-import pandas as pd
			
 
				-
			
 
				-class MySqlDao:
			
 
				-    _instance = None
			
 
				-    
			
 
				-    def __new__(cls):
			
 
				-        if not cls._instance:
			
 
				-            cls._instance = super(MySqlDao, cls).__new__(cls)
			
 
				-            cls._instance._initialized = False
			
 
				-        return cls._instance
			
 
				-    
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        if self._initialized:
			
 
				-            return
			
 
				-        
			
 
				-        self.db_helper = MySqlDatabaseHelper()
			
 
				-        self._product_tablename = "tads_brandcul_product_info"
			
 
				-        self._cust_tablename = "tads_brandcul_cust_info"
			
 
				-        self._order_tablename = "tads_brandcul_cust_order"
			
 
				-        self._mock_order_tablename = "yunfu_mock_data"
			
 
				-        self._shopping_tablename = "tads_brandcul_cust_info_lbs"
			
 
				-        # self._shopping_tablename = "yunfu_shopping_mock_data"
			
 
				-        
			
 
				-        self._initialized = True
			
 
				-        
			
 
				-    def load_product_data(self, city_uuid):
			
 
				-        """从数据库中读取商品信息"""
			
 
				-        query = f"SELECT * FROM {self._product_tablename} WHERE city_uuid = :city_uuid"
			
 
				-        params = {"city_uuid": city_uuid}
			
 
				-        
			
 
				-        data = self.db_helper.load_data_with_page(query, params)
			
 
				-        return data
			
 
				-        
			
 
				-    def load_cust_data(self, city_uuid):
			
 
				-        """从数据库中读取商户信息"""
			
 
				-        query = f"SELECT * FROM {self._cust_tablename} WHERE BA_CITY_ORG_CODE = :city_uuid"
			
 
				-        params = {"city_uuid": city_uuid}
			
 
				-        
			
 
				-        data = self.db_helper.load_data_with_page(query, params)
			
 
				-        return data
			
 
				-    
			
 
				-    def load_order_data(self, city_uuid):
			
 
				-        """从数据库中读取订单信息"""
			
 
				-        query = f"SELECT * FROM {self._order_tablename} WHERE city_uuid = :city_uuid"
			
 
				-        params = {"city_uuid": city_uuid}
			
 
				-        
			
 
				-        data = self.db_helper.load_data_with_page(query, params)
			
 
				-        data.drop('stat_month', axis=1, inplace=True)
			
 
				-        data.drop('city_uuid', axis=1, inplace=True)
			
 
				-        
			
 
				-        return data
			
 
				-    
			
 
				-    def load_mock_order_data(self):
			
 
				-        """从数据库中读取mock的订单信息"""
			
 
				-        query = f"SELECT * FROM {self._mock_order_tablename}"
			
 
				-        
			
 
				-        data = self.db_helper.load_data_with_page(query, {})
			
 
				-        
			
 
				-        return data
			
 
				-    
			
 
				-    def load_shopping_data(self, city_uuid):
			
 
				-        """从数据库中读取商圈数据"""
			
 
				-        query = f"SELECT * FROM {self._shopping_tablename} WHERE city_uuid = :city_uuid"
			
 
				-        params = {"city_uuid": city_uuid}
			
 
				-        
			
 
				-        data = self.db_helper.load_data_with_page(query, params)
			
 
				-        
			
 
				-        return data
			
 
				-    
			
 
				-    def get_cust_list(self, city_uuid):
			
 
				-        """获取商户列表"""
			
 
				-        data = self.load_cust_data(city_uuid)
			
 
				-        cust_list = data["BB_RETAIL_CUSTOMER_CODE"].to_list()
			
 
				-        if len(cust_list) == 0:
			
 
				-            return []
			
 
				-        
			
 
				-        return cust_list
			
 
				-    
			
 
				-    def get_product_by_id(self, city_uuid, product_id):
			
 
				-        """根据city_uuid 和 product_id 从表中获取拼柜信息"""
			
 
				-        query = text(f"""
			
 
				-            SELECT *
			
 
				-            FROM {self._product_tablename}
			
 
				-            WHERE city_uuid = :city_uuid
			
 
				-            AND product_code = :product_id
			
 
				-        """)
			
 
				-        params = {"city_uuid": city_uuid, "product_id": product_id}
			
 
				-        data = self.db_helper.fetch_one(query, params)
			
 
				-        
			
 
				-        return data
			
 
				-    
			
 
				-    def get_cust_by_ids(self, city_uuid, cust_id_list):
			
 
				-        """根据零售户列表查询其信息"""
			
 
				-        if not cust_id_list:
			
 
				-            return None
			
 
				-        
			
 
				-        cust_id_str = ",".join([f"'{cust_id}'" for cust_id in cust_id_list])
			
 
				-        query = text(f"""
			
 
				-            SELECT *
			
 
				-            FROM {self._cust_tablename}
			
 
				-            WHERE BA_CITY_ORG_CODE = :city_uuid
			
 
				-            AND BB_RETAIL_CUSTOMER_CODE IN ({cust_id_str})
			
 
				-        """)
			
 
				-        params = {"city_uuid": city_uuid}
			
 
				-        data = self.db_helper.fetch_all(query, params)
			
 
				-        
			
 
				-        return data
			
 
				-    
			
 
				-    def data_preprocess(self, data: pd.DataFrame):
			
 
				-        
			
 
				-        data.drop(["cust_uuid", "longitude", "latitude", "range_radius"], axis=1, inplace=True)
			
 
				-        remaining_cols = data.columns.drop(["city_uuid", "cust_code"])
			
 
				-        col_with_missing = remaining_cols[data[remaining_cols].isnull().any()].tolist() # 判断有缺失的字段
			
 
				-        col_all_missing = remaining_cols[data[remaining_cols].isnull().all()].to_list() # 全部缺失的字段
			
 
				-        col_partial_missing = list(set(col_with_missing) - set(col_all_missing)) # 部分缺失的字段
			
 
				-        
			
 
				-        for col in col_partial_missing:
			
 
				-            data[col] = data[col].fillna(data[col].mean())
			
 
				-        
			
 
				-        for col in col_all_missing:
			
 
				-            data[col] = data[col].fillna(0).infer_objects(copy=False)
			
 
				-        
			
 
				-        
			
 
				-if __name__ == "__main__":
			
 
				-    dao = MySqlDao()
			
 
				-    city_uuid = "00000000000000000000000011445301"
			
 
				-    # city_uuid = "00000000000000000000000011441801"
			
 
				-    cust_id_list = ["441800100006", "441800100051", "441800100811"]
			
 
				+from database import MySqlDatabaseHelper

			
 
				+from sqlalchemy import text

			
 
				+import pandas as pd

			
 
				+

			
 
				+class MySqlDao:

			
 
				+    _instance = None

			
 
				+    

			
 
				+    def __new__(cls):

			
 
				+        if not cls._instance:

			
 
				+            cls._instance = super(MySqlDao, cls).__new__(cls)

			
 
				+            cls._instance._initialized = False

			
 
				+        return cls._instance

			
 
				+    

			
 
				+    

			
 
				+    def __init__(self):

			
 
				+        if self._initialized:

			
 
				+            return

			
 
				+        

			
 
				+        self.db_helper = MySqlDatabaseHelper()

			
 
				+        self._product_tablename = "tads_brandcul_product_info_f"

			
 
				+        self._cust_tablename = "tads_brandcul_cust_info_f"

			
 
				+        self._order_tablename = "tads_brandcul_consumer_order"

			
 
				+        self._mock_order_tablename = "yunfu_mock_data"

			
 
				+        self._shopping_tablename = "tads_brandcul_cust_info_lbs_f"

			
 
				+        # self._shopping_tablename = "yunfu_shopping_mock_data"

			
 
				+        

			
 
				+        self._initialized = True

			
 
				+        

			
 
				+    def load_product_data(self, city_uuid):

			
 
				+        """从数据库中读取商品信息"""

			
 
				+        query = f"SELECT * FROM {self._product_tablename} WHERE city_uuid = :city_uuid"

			
 
				+        params = {"city_uuid": city_uuid}

			
 
				+        

			
 
				+        data = self.db_helper.load_data_with_page(query, params)

			
 
				+        return data

			
 
				+        

			
 
				+    def load_cust_data(self, city_uuid):

			
 
				+        """从数据库中读取商户信息"""

			
 
				+        query = f"SELECT * FROM {self._cust_tablename} WHERE BA_CITY_ORG_CODE = :city_uuid"

			
 
				+        params = {"city_uuid": city_uuid}

			
 
				+        

			
 
				+        data = self.db_helper.load_data_with_page(query, params)

			
 
				+        return data

			
 
				+    

			
 
				+    def load_order_data(self, city_uuid):

			
 
				+        """从数据库中读取订单信息"""

			
 
				+        query = f"SELECT * FROM {self._order_tablename} WHERE city_uuid = :city_uuid"

			
 
				+        params = {"city_uuid": city_uuid}

			
 
				+        

			
 
				+        data = self.db_helper.load_data_with_page(query, params)

			
 
				+        data.drop('stat_month', axis=1, inplace=True)

			
 
				+        data.drop('city_uuid', axis=1, inplace=True)

			
 
				+        

			
 
				+        return data

			
 
				+    

			
 
				+    def load_mock_order_data(self):

			
 
				+        """从数据库中读取mock的订单信息"""

			
 
				+        query = f"SELECT * FROM {self._mock_order_tablename}"

			
 
				+        

			
 
				+        data = self.db_helper.load_data_with_page(query, {})

			
 
				+        

			
 
				+        return data

			
 
				+    

			
 
				+    def load_shopping_data(self, city_uuid):

			
 
				+        """从数据库中读取商圈数据"""

			
 
				+        query = f"SELECT * FROM {self._shopping_tablename} WHERE city_uuid = :city_uuid"

			
 
				+        params = {"city_uuid": city_uuid}

			
 
				+        

			
 
				+        data = self.db_helper.load_data_with_page(query, params)

			
 
				+        

			
 
				+        return data

			
 
				+    

			
 
				+    def get_cust_list(self, city_uuid):

			
 
				+        """获取商户列表"""

			
 
				+        data = self.load_cust_data(city_uuid)

			
 
				+        cust_list = data["BB_RETAIL_CUSTOMER_CODE"].to_list()

			
 
				+        if len(cust_list) == 0:

			
 
				+            return []

			
 
				+        

			
 
				+        return cust_list

			
 
				+    

			
 
				+    def get_product_by_id(self, city_uuid, product_id):

			
 
				+        """根据city_uuid 和 product_id 从表中获取拼柜信息"""

			
 
				+        query = text(f"""

			
 
				+            SELECT *

			
 
				+            FROM {self._product_tablename}

			
 
				+            WHERE city_uuid = :city_uuid

			
 
				+            AND product_code = :product_id

			
 
				+        """)

			
 
				+        params = {"city_uuid": city_uuid, "product_id": product_id}

			
 
				+        data = self.db_helper.fetch_one(query, params)

			
 
				+        

			
 
				+        return data

			
 
				+    

			
 
				+    def get_cust_by_ids(self, city_uuid, cust_id_list):

			
 
				+        """根据零售户列表查询其信息"""

			
 
				+        if not cust_id_list:

			
 
				+            return None

			
 
				+        

			
 
				+        cust_id_str = ",".join([f"'{cust_id}'" for cust_id in cust_id_list])

			
 
				+        query = text(f"""

			
 
				+            SELECT *

			
 
				+            FROM {self._cust_tablename}

			
 
				+            WHERE BA_CITY_ORG_CODE = :city_uuid

			
 
				+            AND BB_RETAIL_CUSTOMER_CODE IN ({cust_id_str})

			
 
				+        """)

			
 
				+        params = {"city_uuid": city_uuid}

			
 
				+        data = self.db_helper.fetch_all(query, params)

			
 
				+        

			
 
				+        return data

			
 
				+    

			
 
				+    def data_preprocess(self, data: pd.DataFrame):

			
 
				+        

			
 
				+        data.drop(["cust_uuid", "longitude", "latitude", "range_radius"], axis=1, inplace=True)

			
 
				+        remaining_cols = data.columns.drop(["city_uuid", "cust_code"])

			
 
				+        col_with_missing = remaining_cols[data[remaining_cols].isnull().any()].tolist() # 判断有缺失的字段

			
 
				+        col_all_missing = remaining_cols[data[remaining_cols].isnull().all()].to_list() # 全部缺失的字段

			
 
				+        col_partial_missing = list(set(col_with_missing) - set(col_all_missing)) # 部分缺失的字段

			
 
				+        

			
 
				+        for col in col_partial_missing:

			
 
				+            data[col] = data[col].fillna(data[col].mean())

			
 
				+        

			
 
				+        for col in col_all_missing:

			
 
				+            data[col] = data[col].fillna(0).infer_objects(copy=False)

			
 
				+        

			
 
				+        

			
 
				+if __name__ == "__main__":

			
 
				+    dao = MySqlDao()

			
 
				+    city_uuid = "00000000000000000000000011445301"

			
 
				+    # city_uuid = "00000000000000000000000011441801"

			
 
				+    cust_id_list = ["441800100006", "441800100051", "441800100811"]

			
 
				     cust_list = dao.load_mock_order_data()
			
--- a/database/db/mysql.py
+++ b/database/db/mysql.py
@@ -1,113 +1,113 @@
 
				-from config import load_config
			
 
				-import pandas as pd
			
 
				-from sqlalchemy import create_engine, text
			
 
				-from sqlalchemy.orm import sessionmaker
			
 
				-from sqlalchemy.exc import SQLAlchemyError
			
 
				-from tqdm import tqdm
			
 
				-
			
 
				-cfgs = load_config()
			
 
				-
			
 
				-
			
 
				-class MySqlDatabaseHelper:
			
 
				-    _instance = None
			
 
				-    
			
 
				-    def __new__(cls):
			
 
				-        if not cls._instance:
			
 
				-            cls._instance = super(MySqlDatabaseHelper, cls).__new__(cls)
			
 
				-            cls._instance._initialized = False
			
 
				-        return cls._instance
			
 
				-        
			
 
				-    def __init__(self):
			
 
				-        if self._initialized:
			
 
				-            return
			
 
				-        
			
 
				-        self._host = cfgs['mysql']['host']
			
 
				-        self._port = cfgs['mysql']['port']
			
 
				-        self._user = cfgs['mysql']['user']
			
 
				-        self._passwd = cfgs['mysql']['passwd']
			
 
				-        self._dbname = cfgs['mysql']['db']
			
 
				-        
			
 
				-        self.connect_database()
			
 
				-        self._initialized = True
			
 
				-        
			
 
				-    def connect_database(self):
			
 
				-        # 创建数据库连接
			
 
				-        try:
			
 
				-            conn = "mysql+pymysql://" + self._user + ":" + self._passwd + "@" + self._host + ":" + str(self._port) + "/" + self._dbname
			
 
				-        except Exception as e:
			
 
				-            raise ConnectionAbortedError(f"failed to create connection string: {e}")
			
 
				-        
			
 
				-        # 通过连接池创建engine
			
 
				-        self.engine = create_engine(
			
 
				-            conn,
			
 
				-            pool_size=10, # 设置连接池大小
			
 
				-            max_overflow=20, # 超过连接池大小时的额外连接数
			
 
				-            pool_recycle=3600 # 回收连接时间
			
 
				-        )
			
 
				-        
			
 
				-        self._DBSession = sessionmaker(bind=self.engine)
			
 
				-        
			
 
				-    def load_data_with_page(self, query, params, page_size=1000):
			
 
				-        """分页查询数据"""
			
 
				-        data = pd.DataFrame()
			
 
				-        count_query = text(query.replace("SELECT *", "SELECT COUNT(*)"))
			
 
				-        query += " LIMIT :limit OFFSET :offset"
			
 
				-        query = text(query)
			
 
				-    
			
 
				-        # 获取总行数
			
 
				-        total_rows = self.fetch_one(count_query, params)[0]
			
 
				-
			
 
				-        page = 1
			
 
				-        with tqdm(total=total_rows, desc="Loading data", unit="rows") as pbar:  # 初始化进度条
			
 
				-            while True:
			
 
				-                offset = (page - 1) * page_size  # 计算偏移量
			
 
				-                params["limit"] = page_size
			
 
				-                params["offset"] = offset
			
 
				-
			
 
				-                df = pd.DataFrame(self.fetch_all(query, params))
			
 
				-                if df.empty:
			
 
				-                    break
			
 
				-                data = pd.concat([data, df], ignore_index=True)
			
 
				-            
			
 
				-                # 更新进度条
			
 
				-                pbar.update(len(df))  # 更新进度条的行数
			
 
				-            
			
 
				-                page += 1
			
 
				-        return data
			
 
				-        
			
 
				-        
			
 
				-    def fetch_all(self, query, params=None):
			
 
				-        """执行SQL查询并返回所有结果"""
			
 
				-        session = self._DBSession()
			
 
				-        try:
			
 
				-            results = session.execute(query, params or {}).fetchall()
			
 
				-            return results
			
 
				-        except SQLAlchemyError as e:
			
 
				-            session.rollback()
			
 
				-            print(f"error: {e}")
			
 
				-        finally:
			
 
				-            session.close()
			
 
				-            
			
 
				-    def fetch_one(self, query, params=None):
			
 
				-        """执行SQL查询并返回单条结果"""
			
 
				-        session = self._DBSession()
			
 
				-        try:
			
 
				-            result = session.execute(query, params or {}).fetchone()
			
 
				-            return result
			
 
				-        except SQLAlchemyError as e:
			
 
				-            session.rollback()
			
 
				-            print(f"error: {e}")
			
 
				-        finally:
			
 
				-            session.close()
			
 
				-            
			
 
				-    def execute_query(self, query, params=None):
			
 
				-        """执行SQL语句 (无返回值, 如INSERT, UPDATE, DELETE)"""
			
 
				-        session = self._DBSession()
			
 
				-        try:
			
 
				-            session.execute(query, params or {})
			
 
				-            session.commit()
			
 
				-        except SQLAlchemyError as e:
			
 
				-            session.rollback()
			
 
				-            print(f"Error: {e}")
			
 
				-        finally:
			
 
				-            session.close()
			
 
				+from config import load_config

			
 
				+import pandas as pd

			
 
				+from sqlalchemy import create_engine, text

			
 
				+from sqlalchemy.orm import sessionmaker

			
 
				+from sqlalchemy.exc import SQLAlchemyError

			
 
				+from tqdm import tqdm

			
 
				+

			
 
				+cfgs = load_config()

			
 
				+

			
 
				+

			
 
				+class MySqlDatabaseHelper:

			
 
				+    _instance = None

			
 
				+    

			
 
				+    def __new__(cls):

			
 
				+        if not cls._instance:

			
 
				+            cls._instance = super(MySqlDatabaseHelper, cls).__new__(cls)

			
 
				+            cls._instance._initialized = False

			
 
				+        return cls._instance

			
 
				+        

			
 
				+    def __init__(self):

			
 
				+        if self._initialized:

			
 
				+            return

			
 
				+        

			
 
				+        self._host = cfgs['mysql']['host']

			
 
				+        self._port = cfgs['mysql']['port']

			
 
				+        self._user = cfgs['mysql']['user']

			
 
				+        self._passwd = cfgs['mysql']['passwd']

			
 
				+        self._dbname = cfgs['mysql']['db']

			
 
				+        

			
 
				+        self.connect_database()

			
 
				+        self._initialized = True

			
 
				+        

			
 
				+    def connect_database(self):

			
 
				+        # 创建数据库连接

			
 
				+        try:

			
 
				+            conn = "mysql+pymysql://" + self._user + ":" + self._passwd + "@" + self._host + ":" + str(self._port) + "/" + self._dbname

			
 
				+        except Exception as e:

			
 
				+            raise ConnectionAbortedError(f"failed to create connection string: {e}")

			
 
				+        

			
 
				+        # 通过连接池创建engine

			
 
				+        self.engine = create_engine(

			
 
				+            conn,

			
 
				+            pool_size=10, # 设置连接池大小

			
 
				+            max_overflow=20, # 超过连接池大小时的额外连接数

			
 
				+            pool_recycle=3600 # 回收连接时间

			
 
				+        )

			
 
				+        

			
 
				+        self._DBSession = sessionmaker(bind=self.engine)

			
 
				+        

			
 
				+    def load_data_with_page(self, query, params, page_size=1000):

			
 
				+        """分页查询数据"""

			
 
				+        data = pd.DataFrame()

			
 
				+        count_query = text(query.replace("SELECT *", "SELECT COUNT(*)"))

			
 
				+        query += " LIMIT :limit OFFSET :offset"

			
 
				+        query = text(query)

			
 
				+    

			
 
				+        # 获取总行数

			
 
				+        total_rows = self.fetch_one(count_query, params)[0]

			
 
				+

			
 
				+        page = 1

			
 
				+        with tqdm(total=total_rows, desc="Loading data", unit="rows") as pbar:  # 初始化进度条

			
 
				+            while True:

			
 
				+                offset = (page - 1) * page_size  # 计算偏移量

			
 
				+                params["limit"] = page_size

			
 
				+                params["offset"] = offset

			
 
				+

			
 
				+                df = pd.DataFrame(self.fetch_all(query, params))

			
 
				+                if df.empty:

			
 
				+                    break

			
 
				+                data = pd.concat([data, df], ignore_index=True)

			
 
				+            

			
 
				+                # 更新进度条

			
 
				+                pbar.update(len(df))  # 更新进度条的行数

			
 
				+            

			
 
				+                page += 1

			
 
				+        return data

			
 
				+        

			
 
				+        

			
 
				+    def fetch_all(self, query, params=None):

			
 
				+        """执行SQL查询并返回所有结果"""

			
 
				+        session = self._DBSession()

			
 
				+        try:

			
 
				+            results = session.execute(query, params or {}).fetchall()

			
 
				+            return results

			
 
				+        except SQLAlchemyError as e:

			
 
				+            session.rollback()

			
 
				+            print(f"error: {e}")

			
 
				+        finally:

			
 
				+            session.close()

			
 
				+            

			
 
				+    def fetch_one(self, query, params=None):

			
 
				+        """执行SQL查询并返回单条结果"""

			
 
				+        session = self._DBSession()

			
 
				+        try:

			
 
				+            result = session.execute(query, params or {}).fetchone()

			
 
				+            return result

			
 
				+        except SQLAlchemyError as e:

			
 
				+            session.rollback()

			
 
				+            print(f"error: {e}")

			
 
				+        finally:

			
 
				+            session.close()

			
 
				+            

			
 
				+    def execute_query(self, query, params=None):

			
 
				+        """执行SQL语句 (无返回值, 如INSERT, UPDATE, DELETE)"""

			
 
				+        session = self._DBSession()

			
 
				+        try:

			
 
				+            session.execute(query, params or {})

			
 
				+            session.commit()

			
 
				+        except SQLAlchemyError as e:

			
 
				+            session.rollback()

			
 
				+            print(f"Error: {e}")

			
 
				+        finally:

			
 
				+            session.close()

			
--- a/database/db/redis_db.py
+++ b/database/db/redis_db.py
@@ -1,52 +1,52 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-import redis
			
 
				-from config import load_config
			
 
				-
			
 
				-cfgs = load_config()
			
 
				-
			
 
				-
			
 
				-class RedisDatabaseHelper:
			
 
				-    _instance = None
			
 
				-    
			
 
				-    def __new__(cls):
			
 
				-        if not cls._instance:
			
 
				-            cls._instance = super(RedisDatabaseHelper, cls).__new__(cls)
			
 
				-            cls._instance._initialized = False
			
 
				-        return cls._instance
			
 
				-        
			
 
				-    def __init__(self):
			
 
				-        if self._initialized:
			
 
				-            return
			
 
				-        self.redis = redis.StrictRedis(host=cfgs['redis']['host'],
			
 
				-                                       port=cfgs['redis']['port'],
			
 
				-                                       password=cfgs['redis']['passwd'],
			
 
				-                                       db=cfgs['redis']['db'],
			
 
				-                                       decode_responses=True)
			
 
				-        
			
 
				-        self._initialized = True
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    import random
			
 
				-    # 连接到 Redis 服务器
			
 
				-    r = RedisDatabaseHelper().redis
			
 
				-
			
 
				-    # 有序集合的键名
			
 
				-    zset_key = 'configs:hotkeys'
			
 
				-
			
 
				-    data_list = ['ORDER_FULLORDR_RATE', 'MONTH6_SALE_QTY_YOY', 'MONTH6_SALE_QTY_MOM', 'MONTH6_SALE_QTY']
			
 
				-
			
 
				-    # 清空已有的有序集合（可选，若需要全新的集合可执行此操作）
			
 
				-    r.delete(zset_key)
			
 
				-    
			
 
				-    for item in data_list:
			
 
				-        # 生成 80 到 100 之间的随机数，小数点后保留 4 位
			
 
				-        score = round(random.uniform(80, 100), 4)
			
 
				-        # 将元素和对应的分数添加到有序集合中
			
 
				-        r.zadd(zset_key, {item: score})
			
 
				-
			
 
				-    # # 从 Redis 中读取有序集合并打印
			
 
				-    # result = r.zrange(zset_key, 0, -1, withscores=True)
			
 
				-    # for item, score in result:
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
 
				+import redis

			
 
				+from config import load_config

			
 
				+

			
 
				+cfgs = load_config()

			
 
				+

			
 
				+

			
 
				+class RedisDatabaseHelper:

			
 
				+    _instance = None

			
 
				+    

			
 
				+    def __new__(cls):

			
 
				+        if not cls._instance:

			
 
				+            cls._instance = super(RedisDatabaseHelper, cls).__new__(cls)

			
 
				+            cls._instance._initialized = False

			
 
				+        return cls._instance

			
 
				+        

			
 
				+    def __init__(self):

			
 
				+        if self._initialized:

			
 
				+            return

			
 
				+        self.redis = redis.StrictRedis(host=cfgs['redis']['host'],

			
 
				+                                       port=cfgs['redis']['port'],

			
 
				+                                       password=cfgs['redis']['passwd'],

			
 
				+                                       db=cfgs['redis']['db'],

			
 
				+                                       decode_responses=True)

			
 
				+        

			
 
				+        self._initialized = True

			
 
				+

			
 
				+

			
 
				+if __name__ == '__main__':

			
 
				+    import random

			
 
				+    # 连接到 Redis 服务器

			
 
				+    r = RedisDatabaseHelper().redis

			
 
				+

			
 
				+    # 有序集合的键名

			
 
				+    zset_key = 'configs:hotkeys'

			
 
				+

			
 
				+    data_list = ['ORDER_FULLORDR_RATE', 'MONTH6_SALE_QTY_YOY', 'MONTH6_SALE_QTY_MOM', 'MONTH6_SALE_QTY']

			
 
				+

			
 
				+    # 清空已有的有序集合（可选，若需要全新的集合可执行此操作）

			
 
				+    r.delete(zset_key)

			
 
				+    

			
 
				+    for item in data_list:

			
 
				+        # 生成 80 到 100 之间的随机数，小数点后保留 4 位

			
 
				+        score = round(random.uniform(80, 100), 4)

			
 
				+        # 将元素和对应的分数添加到有序集合中

			
 
				+        r.zadd(zset_key, {item: score})

			
 
				+

			
 
				+    # # 从 Redis 中读取有序集合并打印

			
 
				+    # result = r.zrange(zset_key, 0, -1, withscores=True)

			
 
				+    # for item, score in result:

			
 
				     #     print(f"元素: {item}, 分数: {score}")
			
--- a/gbdt_lr.py
+++ b/gbdt_lr.py
@@ -1,164 +1,164 @@
 
				-import argparse
			
 
				-import os
			
 
				-from models.rank import DataProcess, Trainer, GbdtLrModel
			
 
				-import time
			
 
				-import pandas as pd
			
 
				-
			
 
				-# train_data_path = "./moldes/rank/data/gbdt_data.csv"
			
 
				-# model_path = "./models/rank/weights"
			
 
				-
			
 
				-def train(args):
			
 
				-    model_dir = os.path.join(args.model_path, args.city_uuid)
			
 
				-    train_data_dir = args.train_data_dir
			
 
				-    if not os.path.exists(model_dir):
			
 
				-        os.makedirs(model_dir)
			
 
				-    
			
 
				-    if not os.path.exists(train_data_dir):
			
 
				-        os.makedirs(train_data_dir)
			
 
				-    
			
 
				-    # 准备数据集  
			
 
				-    print("正在整合训练数据...")
			
 
				-    processor = DataProcess(args.city_uuid, args.train_data_dir)
			
 
				-    processor.data_process()
			
 
				-    print("训练数据整合完成！")
			
 
				-    
			
 
				-    # 进行训练
			
 
				-    print("开始训练原始模型")
			
 
				-    trainer(args, os.path.join(args.train_data_dir, "original_train_data.csv"), model_dir, "ori_model.pkl")
			
 
				-    
			
 
				-    print("开始训练pos模型")
			
 
				-    trainer(args, os.path.join(args.train_data_dir, "pos_train_data.csv"), model_dir, "pos_model.pkl")
			
 
				-    
			
 
				-    print("开始训练shopping模型")
			
 
				-    trainer(args, os.path.join(args.train_data_dir, "shopping_train_data.csv"), model_dir, "shopping_model.pkl")
			
 
				-
			
 
				-def trainer(args, train_data_path, model_dir, model_name):
			
 
				-    trainer = Trainer(train_data_path)
			
 
				-    
			
 
				-    start_time = time.time()
			
 
				-    trainer.train()
			
 
				-    end_time = time.time()
			
 
				-    
			
 
				-    training_time_hours = (end_time - start_time) / 3600
			
 
				-    print(f"训练时间: {training_time_hours:.4f} 小时")
			
 
				-    
			
 
				-    eval_metrics = trainer.evaluate()
			
 
				-    
			
 
				-    # 输出评估结果
			
 
				-    print("GBDT-LR Evaluation Metrics:")
			
 
				-    for metric, value in eval_metrics.items():
			
 
				-        print(f"{metric}: {value:.4f}")
			
 
				-        
			
 
				-    # 保存模型
			
 
				-    trainer.save_model(os.path.join(model_dir, model_name))
			
 
				-
			
 
				-def recommend_by_product(args):
			
 
				-    model_dir = os.path.join(args.model_path, args.city_uuid)
			
 
				-    if not os.path.exists(model_dir):
			
 
				-        print("暂无该城市的模型，请先进行模型训练")
			
 
				-        return
			
 
				-    
			
 
				-    # 加载模型
			
 
				-    model = GbdtLrModel(os.path.join(model_dir, args.model_name))
			
 
				-    recommend_list = model.sort(args.city_uuid, args.product_id)
			
 
				-    for item in recommend_list[:min(args.last_n, len(recommend_list))]:
			
 
				-        print(item)
			
 
				-
			
 
				-def get_features_importance(args):
			
 
				-    model_dir = os.path.join(args.model_path, args.city_uuid)
			
 
				-    if not os.path.exists(model_dir):
			
 
				-        print("暂无该城市的模型，请先进行模型训练")
			
 
				-        return
			
 
				-    
			
 
				-    # # 加载模型
			
 
				-    # model = GbdtLrModel(os.path.join(model_dir, args.model_name))
			
 
				-    # cust_features_importance, product_features_importance = model.generate_feats_importance()
			
 
				-    
			
 
				-    # # 将字典列表转换为 DataFrame
			
 
				-    # cust_df = pd.DataFrame([
			
 
				-    #     {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
			
 
				-    #     for item in cust_features_importance
			
 
				-    # ])
			
 
				-    
			
 
				-    # product_df = pd.DataFrame([
			
 
				-    #     {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
			
 
				-    #     for item in product_features_importance
			
 
				-    # ])
			
 
				-    
			
 
				-    # cust_file_path = os.path.join(model_dir, "cust_features_importance.csv")
			
 
				-    # product_file_path = os.path.join(model_dir, "product_features_importance.csv")
			
 
				-    # cust_df.to_csv(cust_file_path, index=False, encoding='utf-8')
			
 
				-    # product_df.to_csv(product_file_path, index=False, encoding='utf-8')
			
 
				-    
			
 
				-    get_features_importance_by_model(model_dir, "ori_model")
			
 
				-    get_features_importance_by_model(model_dir, "pos_model")
			
 
				-    get_features_importance_by_model(model_dir, "shopping_model")
			
 
				-    
			
 
				-def get_features_importance_by_model(model_dir, modelname):
			
 
				-    model = GbdtLrModel(os.path.join(model_dir, f"{modelname}.pkl"))
			
 
				-    cust_features_importance, product_features_importance, order_features_importance = model.generate_feats_importance()
			
 
				-    
			
 
				-    # 将字典列表转换为 DataFrame
			
 
				-    cust_df = pd.DataFrame([
			
 
				-        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
			
 
				-        for item in cust_features_importance
			
 
				-    ])
			
 
				-    
			
 
				-    product_df = pd.DataFrame([
			
 
				-        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
			
 
				-        for item in product_features_importance
			
 
				-    ])
			
 
				-    
			
 
				-    order_df = pd.DataFrame([
			
 
				-        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
			
 
				-        for item in order_features_importance
			
 
				-    ])
			
 
				-    
			
 
				-    importance_dir = os.path.join(model_dir, "importance")
			
 
				-    if modelname == 'ori_model':
			
 
				-        importance_dir = os.path.join(importance_dir, "ori")
			
 
				-    elif modelname == 'pos_model':
			
 
				-        importance_dir = os.path.join(importance_dir, "pos")
			
 
				-    elif modelname == 'shopping_model':
			
 
				-        importance_dir = os.path.join(importance_dir, "shopping")
			
 
				-    
			
 
				-    if not os.path.exists(importance_dir):
			
 
				-        os.makedirs(importance_dir)
			
 
				-        
			
 
				-    cust_file_path = os.path.join(importance_dir, "cust_features_importance.csv")
			
 
				-    product_file_path = os.path.join(importance_dir, "product_features_importance.csv")
			
 
				-    order_file_path = os.path.join(importance_dir, "order_features_importance.csv")
			
 
				-    
			
 
				-    cust_df.to_csv(cust_file_path, index=False, encoding='utf-8')
			
 
				-    product_df.to_csv(product_file_path, index=False, encoding='utf-8')
			
 
				-    order_df.to_csv(order_file_path, index=False, encoding='utf-8')
			
 
				-        
			
 
				-def run():
			
 
				-    parser = argparse.ArgumentParser()
			
 
				-    
			
 
				-    parser.add_argument("--run_train", action='store_true')
			
 
				-    parser.add_argument("--recommend", action='store_true')
			
 
				-    parser.add_argument("--importance", action='store_true')
			
 
				-    
			
 
				-    parser.add_argument("--train_data_dir", type=str, default="./data")
			
 
				-    parser.add_argument("--model_path", type=str, default="./models/rank/weights")
			
 
				-    parser.add_argument("--model_name", type=str, default='model.pkl')
			
 
				-    parser.add_argument("--last_n", type=int, default=200)
			
 
				-    
			
 
				-    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301')
			
 
				-    parser.add_argument("--product_id", type=str, default='110102')
			
 
				-    
			
 
				-    
			
 
				-    args = parser.parse_args()
			
 
				-    
			
 
				-    if args.run_train:
			
 
				-        train(args)
			
 
				-        
			
 
				-    if args.recommend:
			
 
				-        recommend_by_product(args)
			
 
				-        
			
 
				-    if args.importance:
			
 
				-        get_features_importance(args)
			
 
				-        
			
 
				-if __name__ == "__main__":
			
 
				+import argparse

			
 
				+import os

			
 
				+from models.rank import DataProcess, Trainer, GbdtLrModel

			
 
				+import time

			
 
				+import pandas as pd

			
 
				+

			
 
				+# train_data_path = "./moldes/rank/data/gbdt_data.csv"

			
 
				+# model_path = "./models/rank/weights"

			
 
				+

			
 
				+def train(args):

			
 
				+    model_dir = os.path.join(args.model_path, args.city_uuid)

			
 
				+    train_data_dir = args.train_data_dir

			
 
				+    if not os.path.exists(model_dir):

			
 
				+        os.makedirs(model_dir)

			
 
				+    

			
 
				+    if not os.path.exists(train_data_dir):

			
 
				+        os.makedirs(train_data_dir)

			
 
				+    

			
 
				+    # 准备数据集  

			
 
				+    print("正在整合训练数据...")

			
 
				+    processor = DataProcess(args.city_uuid, args.train_data_dir)

			
 
				+    processor.data_process()

			
 
				+    print("训练数据整合完成！")

			
 
				+    

			
 
				+    # 进行训练

			
 
				+    print("开始训练原始模型")

			
 
				+    trainer(args, os.path.join(args.train_data_dir, "original_train_data.csv"), model_dir, "ori_model.pkl")

			
 
				+    

			
 
				+    print("开始训练pos模型")

			
 
				+    trainer(args, os.path.join(args.train_data_dir, "pos_train_data.csv"), model_dir, "pos_model.pkl")

			
 
				+    

			
 
				+    print("开始训练shopping模型")

			
 
				+    trainer(args, os.path.join(args.train_data_dir, "shopping_train_data.csv"), model_dir, "shopping_model.pkl")

			
 
				+

			
 
				+def trainer(args, train_data_path, model_dir, model_name):

			
 
				+    trainer = Trainer(train_data_path)

			
 
				+    

			
 
				+    start_time = time.time()

			
 
				+    trainer.train()

			
 
				+    end_time = time.time()

			
 
				+    

			
 
				+    training_time_hours = (end_time - start_time) / 3600

			
 
				+    print(f"训练时间: {training_time_hours:.4f} 小时")

			
 
				+    

			
 
				+    eval_metrics = trainer.evaluate()

			
 
				+    

			
 
				+    # 输出评估结果

			
 
				+    print("GBDT-LR Evaluation Metrics:")

			
 
				+    for metric, value in eval_metrics.items():

			
 
				+        print(f"{metric}: {value:.4f}")

			
 
				+        

			
 
				+    # 保存模型

			
 
				+    trainer.save_model(os.path.join(model_dir, model_name))

			
 
				+

			
 
				+def recommend_by_product(args):

			
 
				+    model_dir = os.path.join(args.model_path, args.city_uuid)

			
 
				+    if not os.path.exists(model_dir):

			
 
				+        print("暂无该城市的模型，请先进行模型训练")

			
 
				+        return

			
 
				+    

			
 
				+    # 加载模型

			
 
				+    model = GbdtLrModel(os.path.join(model_dir, args.model_name))

			
 
				+    recommend_list = model.sort(args.city_uuid, args.product_id)

			
 
				+    for item in recommend_list[:min(args.last_n, len(recommend_list))]:

			
 
				+        print(item)

			
 
				+

			
 
				+def get_features_importance(args):

			
 
				+    model_dir = os.path.join(args.model_path, args.city_uuid)

			
 
				+    if not os.path.exists(model_dir):

			
 
				+        print("暂无该城市的模型，请先进行模型训练")

			
 
				+        return

			
 
				+    

			
 
				+    # # 加载模型

			
 
				+    # model = GbdtLrModel(os.path.join(model_dir, args.model_name))

			
 
				+    # cust_features_importance, product_features_importance = model.generate_feats_importance()

			
 
				+    

			
 
				+    # # 将字典列表转换为 DataFrame

			
 
				+    # cust_df = pd.DataFrame([

			
 
				+    #     {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}

			
 
				+    #     for item in cust_features_importance

			
 
				+    # ])

			
 
				+    

			
 
				+    # product_df = pd.DataFrame([

			
 
				+    #     {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}

			
 
				+    #     for item in product_features_importance

			
 
				+    # ])

			
 
				+    

			
 
				+    # cust_file_path = os.path.join(model_dir, "cust_features_importance.csv")

			
 
				+    # product_file_path = os.path.join(model_dir, "product_features_importance.csv")

			
 
				+    # cust_df.to_csv(cust_file_path, index=False, encoding='utf-8')

			
 
				+    # product_df.to_csv(product_file_path, index=False, encoding='utf-8')

			
 
				+    

			
 
				+    get_features_importance_by_model(model_dir, "ori_model")

			
 
				+    get_features_importance_by_model(model_dir, "pos_model")

			
 
				+    get_features_importance_by_model(model_dir, "shopping_model")

			
 
				+    

			
 
				+def get_features_importance_by_model(model_dir, modelname):

			
 
				+    model = GbdtLrModel(os.path.join(model_dir, f"{modelname}.pkl"))

			
 
				+    cust_features_importance, product_features_importance, order_features_importance = model.generate_feats_importance()

			
 
				+    

			
 
				+    # 将字典列表转换为 DataFrame

			
 
				+    cust_df = pd.DataFrame([

			
 
				+        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}

			
 
				+        for item in cust_features_importance

			
 
				+    ])

			
 
				+    

			
 
				+    product_df = pd.DataFrame([

			
 
				+        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}

			
 
				+        for item in product_features_importance

			
 
				+    ])

			
 
				+    

			
 
				+    order_df = pd.DataFrame([

			
 
				+        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}

			
 
				+        for item in order_features_importance

			
 
				+    ])

			
 
				+    

			
 
				+    importance_dir = os.path.join(model_dir, "importance")

			
 
				+    if modelname == 'ori_model':

			
 
				+        importance_dir = os.path.join(importance_dir, "ori")

			
 
				+    elif modelname == 'pos_model':

			
 
				+        importance_dir = os.path.join(importance_dir, "pos")

			
 
				+    elif modelname == 'shopping_model':

			
 
				+        importance_dir = os.path.join(importance_dir, "shopping")

			
 
				+    

			
 
				+    if not os.path.exists(importance_dir):

			
 
				+        os.makedirs(importance_dir)

			
 
				+        

			
 
				+    cust_file_path = os.path.join(importance_dir, "cust_features_importance.csv")

			
 
				+    product_file_path = os.path.join(importance_dir, "product_features_importance.csv")

			
 
				+    order_file_path = os.path.join(importance_dir, "order_features_importance.csv")

			
 
				+    

			
 
				+    cust_df.to_csv(cust_file_path, index=False, encoding='utf-8')

			
 
				+    product_df.to_csv(product_file_path, index=False, encoding='utf-8')

			
 
				+    order_df.to_csv(order_file_path, index=False, encoding='utf-8')

			
 
				+        

			
 
				+def run():

			
 
				+    parser = argparse.ArgumentParser()

			
 
				+    

			
 
				+    parser.add_argument("--run_train", action='store_true')

			
 
				+    parser.add_argument("--recommend", action='store_true')

			
 
				+    parser.add_argument("--importance", action='store_true')

			
 
				+    

			
 
				+    parser.add_argument("--train_data_dir", type=str, default="./data")

			
 
				+    parser.add_argument("--model_path", type=str, default="./models/rank/weights")

			
 
				+    parser.add_argument("--model_name", type=str, default='model.pkl')

			
 
				+    parser.add_argument("--last_n", type=int, default=200)

			
 
				+    

			
 
				+    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301')

			
 
				+    parser.add_argument("--product_id", type=str, default='110102')

			
 
				+    

			
 
				+    

			
 
				+    args = parser.parse_args()

			
 
				+    

			
 
				+    if args.run_train:

			
 
				+        train(args)

			
 
				+        

			
 
				+    if args.recommend:

			
 
				+        recommend_by_product(args)

			
 
				+        

			
 
				+    if args.importance:

			
 
				+        get_features_importance(args)

			
 
				+        

			
 
				+if __name__ == "__main__":

			
 
				     run()
			
--- a/gbdt_lr_api.py
+++ b/gbdt_lr_api.py
@@ -1,97 +1,97 @@
 
				-import argparse
			
 
				-import os
			
 
				-from models.rank import DataProcess, Trainer, GbdtLrModel
			
 
				-import time
			
 
				-import pandas as pd
			
 
				-from fastapi import FastAPI, HTTPException
			
 
				-from pydantic import BaseModel
			
 
				-
			
 
				-app = FastAPI()
			
 
				-
			
 
				-model_path = "./models/rank/weights"
			
 
				-model_name = "model.pkl"
			
 
				-
			
 
				-# 定义请求体
			
 
				-class TrainRequest(BaseModel):
			
 
				-    city_uuid: str
			
 
				-    train_data_path: str = "./models/rank/train_data/gbdt_data.csv"
			
 
				-    model_path: str = model_path
			
 
				-    model_name: str = model_name
			
 
				-    
			
 
				-class RecommendRequest(BaseModel):
			
 
				-    city_uuid: str
			
 
				-    product_id: str
			
 
				-    last_n: int = 200
			
 
				-    model_path: str = model_path
			
 
				-    model_name: str = model_name
			
 
				-    
			
 
				-class ImportanceRequest(BaseModel):
			
 
				-    city_uuid: str
			
 
				-    model_path: str = model_path
			
 
				-    model_name: str = model_name
			
 
				-    
			
 
				-@app.post("/train")
			
 
				-def train(request: TrainRequest):
			
 
				-    model_dir = os.path.join(request.model_path, request.city_uuid)
			
 
				-    train_data_dir = os.path.dirname(request.train_data_path)
			
 
				-    if not os.path.exists(model_dir):
			
 
				-        os.makedirs(model_dir)
			
 
				-    
			
 
				-    if not os.path.exists(train_data_dir):
			
 
				-        os.makedirs(train_data_dir)
			
 
				-        
			
 
				-    # 准备数据集  
			
 
				-    print("正在整合训练数据...")
			
 
				-    processor = DataProcess(request.city_uuid, request.train_data_path)
			
 
				-    processor.data_process()
			
 
				-    print("训练数据整合完成！")
			
 
				-    
			
 
				-    # 进行训练
			
 
				-    trainer = Trainer(request.train_data_path)
			
 
				-    
			
 
				-    start_time = time.time()
			
 
				-    trainer.train()
			
 
				-    end_time = time.time()
			
 
				-    
			
 
				-    training_time_hours = (end_time - start_time) / 3600
			
 
				-    print(f"训练时间: {training_time_hours:.4f} 小时")
			
 
				-    
			
 
				-    eval_metrics = trainer.evaluate()
			
 
				-    
			
 
				-    # 保存模型
			
 
				-    trainer.save_model(os.path.join(model_dir, request.model_name))
			
 
				-    
			
 
				-    # 输出评估结果
			
 
				-    print("GBDT-LR Evaluation Metrics:")
			
 
				-    for metric, value in eval_metrics.items():
			
 
				-        print(f"{metric}: {value:.4f}")
			
 
				-    
			
 
				-    return {"message": "训练完成!"}
			
 
				-
			
 
				-@app.post("/recommend")
			
 
				-def recommend(request: RecommendRequest):
			
 
				-    model_dir = os.path.join(request.model_path, request.city_uuid)
			
 
				-    if not os.path.exists(model_dir):
			
 
				-        raise HTTPException(status_code=404, detail="暂无该城市的模型，请先进行模型训练")
			
 
				-    
			
 
				-    # 加载模型
			
 
				-    model = GbdtLrModel(os.path.join(model_dir, request.model_name))
			
 
				-    recommend_list = model.sort(request.city_uuid, request.product_id)
			
 
				-    
			
 
				-    return {"recommendations": recommend_list[:min(request.last_n, len(recommend_list))]}
			
 
				-
			
 
				-@app.post("/importance")
			
 
				-def importance(request: ImportanceRequest):
			
 
				-    model_dir = os.path.join(request.model_path, request.city_uuid)
			
 
				-    if not os.path.exists(model_dir):
			
 
				-        raise HTTPException(status_code=404, detail="暂无该城市的模型，请先进行模型训练")
			
 
				-    
			
 
				-    # 加载模型
			
 
				-    model = GbdtLrModel(os.path.join(model_dir, request.model_name))
			
 
				-    cust_features_importance, product_features_importance = model.generate_feats_importance()
			
 
				-    
			
 
				-    return {"cust_features_importance": cust_features_importance, "product_features_importance": product_features_importance}
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    import uvicorn
			
 
				+import argparse

			
 
				+import os

			
 
				+from models.rank import DataProcess, Trainer, GbdtLrModel

			
 
				+import time

			
 
				+import pandas as pd

			
 
				+from fastapi import FastAPI, HTTPException

			
 
				+from pydantic import BaseModel

			
 
				+

			
 
				+app = FastAPI()

			
 
				+

			
 
				+model_path = "./models/rank/weights"

			
 
				+model_name = "model.pkl"

			
 
				+

			
 
				+# 定义请求体

			
 
				+class TrainRequest(BaseModel):

			
 
				+    city_uuid: str

			
 
				+    train_data_path: str = "./models/rank/train_data/gbdt_data.csv"

			
 
				+    model_path: str = model_path

			
 
				+    model_name: str = model_name

			
 
				+    

			
 
				+class RecommendRequest(BaseModel):

			
 
				+    city_uuid: str

			
 
				+    product_id: str

			
 
				+    last_n: int = 200

			
 
				+    model_path: str = model_path

			
 
				+    model_name: str = model_name

			
 
				+    

			
 
				+class ImportanceRequest(BaseModel):

			
 
				+    city_uuid: str

			
 
				+    model_path: str = model_path

			
 
				+    model_name: str = model_name

			
 
				+    

			
 
				+@app.post("/train")

			
 
				+def train(request: TrainRequest):

			
 
				+    model_dir = os.path.join(request.model_path, request.city_uuid)

			
 
				+    train_data_dir = os.path.dirname(request.train_data_path)

			
 
				+    if not os.path.exists(model_dir):

			
 
				+        os.makedirs(model_dir)

			
 
				+    

			
 
				+    if not os.path.exists(train_data_dir):

			
 
				+        os.makedirs(train_data_dir)

			
 
				+        

			
 
				+    # 准备数据集  

			
 
				+    print("正在整合训练数据...")

			
 
				+    processor = DataProcess(request.city_uuid, request.train_data_path)

			
 
				+    processor.data_process()

			
 
				+    print("训练数据整合完成！")

			
 
				+    

			
 
				+    # 进行训练

			
 
				+    trainer = Trainer(request.train_data_path)

			
 
				+    

			
 
				+    start_time = time.time()

			
 
				+    trainer.train()

			
 
				+    end_time = time.time()

			
 
				+    

			
 
				+    training_time_hours = (end_time - start_time) / 3600

			
 
				+    print(f"训练时间: {training_time_hours:.4f} 小时")

			
 
				+    

			
 
				+    eval_metrics = trainer.evaluate()

			
 
				+    

			
 
				+    # 保存模型

			
 
				+    trainer.save_model(os.path.join(model_dir, request.model_name))

			
 
				+    

			
 
				+    # 输出评估结果

			
 
				+    print("GBDT-LR Evaluation Metrics:")

			
 
				+    for metric, value in eval_metrics.items():

			
 
				+        print(f"{metric}: {value:.4f}")

			
 
				+    

			
 
				+    return {"message": "训练完成!"}

			
 
				+

			
 
				+@app.post("/recommend")

			
 
				+def recommend(request: RecommendRequest):

			
 
				+    model_dir = os.path.join(request.model_path, request.city_uuid)

			
 
				+    if not os.path.exists(model_dir):

			
 
				+        raise HTTPException(status_code=404, detail="暂无该城市的模型，请先进行模型训练")

			
 
				+    

			
 
				+    # 加载模型

			
 
				+    model = GbdtLrModel(os.path.join(model_dir, request.model_name))

			
 
				+    recommend_list = model.sort(request.city_uuid, request.product_id)

			
 
				+    

			
 
				+    return {"recommendations": recommend_list[:min(request.last_n, len(recommend_list))]}

			
 
				+

			
 
				+@app.post("/importance")

			
 
				+def importance(request: ImportanceRequest):

			
 
				+    model_dir = os.path.join(request.model_path, request.city_uuid)

			
 
				+    if not os.path.exists(model_dir):

			
 
				+        raise HTTPException(status_code=404, detail="暂无该城市的模型，请先进行模型训练")

			
 
				+    

			
 
				+    # 加载模型

			
 
				+    model = GbdtLrModel(os.path.join(model_dir, request.model_name))

			
 
				+    cust_features_importance, product_features_importance = model.generate_feats_importance()

			
 
				+    

			
 
				+    return {"cust_features_importance": cust_features_importance, "product_features_importance": product_features_importance}

			
 
				+

			
 
				+if __name__ == "__main__":

			
 
				+    import uvicorn

			
 
				     uvicorn.run(app, host="0.0.0.0", port=8000)
			
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -1,12 +1,12 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-from models.recall.hot_recall import HotRecallModel
			
 
				-from models.recall.itemCF.calculate_similarity_matrix import calculate_similarity_and_save_results
			
 
				-from models.recall.itemCF.user_item_score import UserItemScore
			
 
				-from models.recall.itemCF.ItemCF import ItemCFModel
			
 
				-__all__ = [
			
 
				-    "HotRecallModel",
			
 
				-    "UserItemScore",
			
 
				-    "calculate_similarity_and_save_results",
			
 
				-    "ItemCFModel"
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
 
				+from models.recall.hot_recall import HotRecallModel

			
 
				+from models.recall.itemCF.calculate_similarity_matrix import calculate_similarity_and_save_results

			
 
				+from models.recall.itemCF.user_item_score import UserItemScore

			
 
				+from models.recall.itemCF.ItemCF import ItemCFModel

			
 
				+__all__ = [

			
 
				+    "HotRecallModel",

			
 
				+    "UserItemScore",

			
 
				+    "calculate_similarity_and_save_results",

			
 
				+    "ItemCFModel"

			
 
				 ]
			
--- a/models/rank/__init__.py
+++ b/models/rank/__init__.py
@@ -1,11 +1,11 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-from models.rank.data.preprocess import DataProcess
			
 
				-from models.rank.gbdt_lr import Trainer
			
 
				-from models.rank.gbdt_lr_sort import GbdtLrModel
			
 
				-
			
 
				-__all__ = [
			
 
				-    "DataProcess",
			
 
				-    "Trainer",
			
 
				-    "GbdtLrModel"
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
 
				+from models.rank.data.preprocess import DataProcess

			
 
				+from models.rank.gbdt_lr import Trainer

			
 
				+from models.rank.gbdt_lr_sort import GbdtLrModel

			
 
				+

			
 
				+__all__ = [

			
 
				+    "DataProcess",

			
 
				+    "Trainer",

			
 
				+    "GbdtLrModel"

			
 
				 ]
			
--- a/models/rank/data/__init__.py
+++ b/models/rank/data/__init__.py
@@ -1,12 +1,13 @@
 
				-from models.rank.data.config import CustConfig, ProductConfig, OrderConfig, ImportanceFeaturesMap
			
 
				-from models.rank.data.dataloader import DataLoader
			
 
				-from models.rank.data.utils import one_hot_embedding, sample_data_clear
			
 
				-__all__ = [
			
 
				-    "CustConfig",
			
 
				-    "ProductConfig",
			
 
				-    "OrderConfig",
			
 
				-    "DataLoader",
			
 
				-    "one_hot_embedding",
			
 
				-    "sample_data_clear",
			
 
				-    "ImportanceFeaturesMap"
			
 
				+from models.rank.data.config import CustConfig, ProductConfig, OrderConfig, ShopConfig, ImportanceFeaturesMap

			
 
				+from models.rank.data.dataloader import DataLoader

			
 
				+from models.rank.data.utils import one_hot_embedding, sample_data_clear

			
 
				+__all__ = [

			
 
				+    "CustConfig",

			
 
				+    "ProductConfig",

			
 
				+    "OrderConfig",

			
 
				+    "ShopConfig",

			
 
				+    "DataLoader",

			
 
				+    "one_hot_embedding",

			
 
				+    "sample_data_clear",

			
 
				+    "ImportanceFeaturesMap"

			
 
				 ]
			
--- a/models/rank/data/config.py
+++ b/models/rank/data/config.py
@@ -1,447 +1,867 @@
 
				-class CustConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "BB_RETAIL_CUSTOMER_CODE",              # 零售户代码
			
 
				-        "BB_RTL_CUST_MARKET_TYPE_NAME",         # 零售户市场类型名称
			
 
				-        "BB_RTL_CUST_BUSINESS_TYPE_NAME",       # 零售客户业态名称
			
 
				-        "BB_RTL_CUST_CHAIN_FLAG",               # 零售户连锁标识
			
 
				-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME",    # 零售户信用等级名称
			
 
				-        "MD04_DIR_SAL_STORE_FLAG",              # 直营店标识
			
 
				-        "BB_CUSTOMER_MANAGER_SCOPE_NAME",       # 零售户经营范围名称
			
 
				-        "BB_RTL_CUST_TERMINAL_LEVEL_NAME",      # 零售户终端层级名称
			
 
				-        "OPERATOR_EDU",                         # 零售客户经营者文化程度
			
 
				-        "STORE_AREA",                           # 店铺经营面积
			
 
				-        "OPERATOR_AGE",                         # 经营者年龄
			
 
				-        "PRODUCT_INSALE_QTY",                   # 在销品规数
			
 
				-    ]
			
 
				-    
			
 
				-    ONEHOT_CAT = {
			
 
				-        "BB_RTL_CUST_MARKET_TYPE_NAME":           ["城网", "农网"],
			
 
				-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":         ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],
			
 
				-        "BB_RTL_CUST_CHAIN_FLAG":                 ["是", "否"],
			
 
				-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":      ["AAA", "AA", "A", "B", "C", "D"],
			
 
				-        "MD04_DIR_SAL_STORE_FLAG":                ["是", "否"],
			
 
				-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":         ["是", "否"],
			
 
				-        "BB_RTL_CUST_TERMINAL_LEVEL_NAME":        ["普通终端", "一般现代终端", "合作终端", "加盟终端", "直营终端"],
			
 
				-        "OPERATOR_EDU":                           [1, 2, 3, 4, 5, 6, 7, "无数据"],
			
 
				-        "STORE_AREA":                             ["0-20", "21-50", "51-100", "101-150", "151-200", "201-300", "301-400", "401-600", "601-1000", "1001-2000", "2000以上"],
			
 
				-        "OPERATOR_AGE":                           ["19-30", "31-40", "41-50", "51-65", "66-80", "80以上"],
			
 
				-        "PRODUCT_INSALE_QTY":                     ["0-10", "11-20", "21-30", "31-40", "41-50", "51-60", 
			
 
				-                                                   "61-70", "71-80", "81-90", "91-100", "101-110", "111-120", 
			
 
				-                                                   "121-130", "131-140", "141-150", "151-160", "161-170", "171-180", 
			
 
				-                                                   "181-190", "191-200", "201-210", "211-220", "221-230", "231-240", 
			
 
				-                                                   "241-250", "251-260", "261-270", "271-280", "281-290", "291-350"],
			
 
				-    }
			
 
				-
			
 
				-class ProductConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "city_uuid",                     # 地市id
			
 
				-        "product_code",                  # 商品编码
			
 
				-        "factory_name",                  # 产地(工业公司名称)
			
 
				-        "brand_name",                    # 品牌名称
			
 
				-        "is_low_tar",                    # 低焦油卷烟
			
 
				-        "is_medium",                     # 中支烟
			
 
				-        "is_tiny",                       # 细支烟
			
 
				-        "is_coarse",                     # 粗支烟（同时非中非细）
			
 
				-        "is_exploding_beads",            # 爆珠烟
			
 
				-        "no_is_exploding_beads",         # 非爆珠烟
			
 
				-        "is_abnormity",                  # 异形包装
			
 
				-        "no_is_abnormity",               # 非异形包装
			
 
				-        "is_cig",                        # 雪茄烟
			
 
				-        "no_is_cig",                     # 非雪茄烟
			
 
				-        "is_chuangxin",                  # 创新品类
			
 
				-        "no_is_chuangxin",               # 非创新品类
			
 
				-        "direct_retail_price",           # 卷烟建议零售价
			
 
				-        "tbc_total_length",              # 烟支总长度
			
 
				-        "product_style",                 # 包装类型
			
 
				-    ]
			
 
				-    
			
 
				-    ONEHOT_CAT = {
			
 
				-        "factory_name":                    ["安徽中烟", "澳门云福卷烟厂", "北欧烟草集团", "博格集团", "重庆中烟", "川渝中烟", "菲利普莫里斯亚洲", 
			
 
				-                                            "福建中烟", "甘肃工业", "广东中烟", "广西中烟", "贵州中烟", "海南红塔", "河北中烟", "河南中烟", 
			
 
				-                                            "黑龙江工业", "红塔辽宁烟草", "湖北中烟", "湖南中烟", "吉林工业", "家源开发股份有限公司", 
			
 
				-                                            "嘉莱赫国际有限公司", "江苏中烟", "江西中烟", "凯德控股有限公司", "力量雪茄烟草有限公司", 
			
 
				-                                            "南洋兄弟烟草股份", "内蒙古昆明卷烟", "日本烟草（香港）有限公司", "三宝麟国际集团", "厦门调拨站", 
			
 
				-                                            "山东中烟", "山西昆明烟草", "陕西中烟", "上海烟草(集团)公司", "上海烟草公司", "深圳工业", "四川中烟", 
			
 
				-                                            "特富意烟草(国际)", "雪茄客烟草国际贸易有限公司", "耀莱雪茄控股有限公司", "引领国际有限公司", 
			
 
				-                                            "英飞烽香港有限公司", "英美烟草中国有限公司", "云南中烟", "浙江中烟", "中茄国际贸易有限公司", 
			
 
				-                                            "中烟英美烟草国际有限公司", "株式会社 KT&G"],
			
 
				-        "brand_name":                      ["万宝路", "555", "骆驼(国外)", "大华", "娇子", "大青山", "龙凤呈祥", "黄鹤楼", "真龙", "七匹狼", 
			
 
				-                                            "芙蓉王", "双喜(广)", "贵烟", "钓鱼台", "红双喜(南洋)", "云烟", "蒙特", "富恩特", "拉·加莱拉", "苏烟", 
			
 
				-                                            "丹纳曼", "黄山", "南京", "利群", "金桥", "泰山", "好日子", "石林", "美登", "红河", "嘉辉", "七星", 
			
 
				-                                            "都彭", "天下秀", "长城", "高希霸", "钻石", "金圣", "王冠雪茄", "黄金叶", "中南海", "长白山", "红旗渠", 
			
 
				-                                            "建牌", "大卫杜夫", "罗密欧", "茂大", "红金龙", "天子", "熊猫", "双喜(深)", "大前门", "兰州", 
			
 
				-                                            "红双喜(沪)", "雄狮", "广州", "红玫王", "黄果树", "红塔山", "福", "小熊猫", "爱喜", "蒙特利", "玉溪", 
			
 
				-                                            "都宝", "麦克纽杜", "卡里罗", "中华", "牡丹(沪)", "阿里山", "顺百利", "白沙", "羊城", "白云", 
			
 
				-                                            "特美思", "国宾", "帕特加", "比德奥", "冬虫夏草", "威龙（湛江）", "香格里拉", "红梅", "延安", 
			
 
				-                                            "特富意", "石狮", "金香港", "好猫", "登喜路", "乐迪", "林海灵芝", "椰树", "北京", "大红鹰", "大丰收", 
			
 
				-                                            "红双喜(武汉)", "五叶神", "狮", "优民", "将军", "遵义", "恒大", "飞马", "红三环", "芙蓉", "工字", 
			
 
				-                                            "古田", "狮牌", "君力", "哈尔滨", "梦都", "香梅(阜阳)", "哈德门", "梅州", "红山茶", "猴王", "沙龙", 
			
 
				-                                            "潘趣", "狮子牌", "上海", "红玫", "醒宝", "广州湾", "百乐门", "关塔那摩", "威斯", "五一", "寿百年", 
			
 
				-                                            "人民大会堂", "土楼", "三沙", "西湖", "光明", "阿诗玛", "宝亨", "恭贺新禧", "长寿", "茶花", "迎客松", 
			
 
				-                                            "龙烟", "金澳门", "宝岛", "多米尼加之花", "国喜", "金驼", "君特欧", "上游", "幸福", "春城", "吉庆", 
			
 
				-                                            "黄山松", "黄金龙", "紫气东来", "彼亚赛", "银辉", "潮牌", "庐山", "三峡", "壹支笔", "双叶"],
			
 
				-        "is_low_tar":                      ["是", "否"],
			
 
				-        "is_medium":                       ["是", "否"],
			
 
				-        "is_tiny":                         ["是", "否"],
			
 
				-        "is_coarse":                       ["是", "否"],
			
 
				-        "is_exploding_beads":              ["是", "否"],
			
 
				-        "no_is_exploding_beads":           ["是", "否"],
			
 
				-        "is_abnormity":                    ["是", "否"],
			
 
				-        "no_is_abnormity":                 ["是", "否"],
			
 
				-        "is_cig":                          ["是", "否"],
			
 
				-        "no_is_cig":                       ["是", "否"],
			
 
				-        "is_chuangxin":                    ["是", "否"],
			
 
				-        "no_is_chuangxin":                 ["是", "否"],
			
 
				-        "direct_retail_price":             ["0-10", "10-30", "31-50", "51-100", "10-19.9", "250-499.9", "200-249.9", 
			
 
				-                                            "5-9.9", "0-5", "100-109.9", "150-199.9", "101-150", "120-129.9", "大于500", 
			
 
				-                                            "20-29.9", "30-39.9", "140-149.9", "50-59.9", "40-49.9", "80-89.9", "60-69.9", 
			
 
				-                                            "70-79.9", "大于150", "130-139.9", "90-99.9", "110-119.9"],
			
 
				-        "tbc_total_length":                ["小于79", "80-89", "90-100", "大于120"],
			
 
				-        "product_style":                   ["包装类型（条盒硬盒）", "包装类型（条包硬盒）", "包装类型（条盒软盒）", "包装类型（条包软盒）", "包装类型（铁盒）", "包装类型（其它）"],
			
 
				-    }
			
 
				-
			
 
				-class OrderConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "cust_uuid",                      # 零售户uuid
			
 
				-        "cust_code",                      # 零售户编码
			
 
				-        "product_code",                   # 品牌规格编码
			
 
				-        "sale_qty",                       # 销量包
			
 
				-        "sale_qty_l",                     # 销量上期
			
 
				-        "sale_qty_hb",                    # 销量环比
			
 
				-        "sale_amt",                       # 销售额包
			
 
				-    ]
			
 
				-    
			
 
				-
			
 
				-class ShopConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "cust_code",                      # 客户编码
			
 
				-        "r_home_num",                     # 常驻人口_居住人数
			
 
				-        "r_work_num",                     # 常驻人口_工作人数
			
 
				-        "r_resident_num",                 # 常驻人口_工作或居住人数
			
 
				-        "r_urban_cons_middle",            # 常驻人口_城市消费水平_中
			
 
				-        "r_urban_cons_low",               # 常驻人口_城市消费水平_低
			
 
				-        "r_urban_cons_lower",             # 常驻人口_城市消费水平_次低
			
 
				-        "r_urban_cons_secondhigh",        # 常驻人口_城市消费水平_次高
			
 
				-        "r_urban_cons_high",              # 常驻人口_城市消费水平_高
			
 
				-        "r_edu_junior_middle",            # 常驻人口_学历_初中
			
 
				-        "r_edu_doctor",                   # 常驻人口_学历_博士
			
 
				-        "r_edu_specialty",                # 常驻人口_学历_大专
			
 
				-        "r_edu_primary",                  # 常驻人口_学历_小学
			
 
				-        "r_edu_college",                  # 常驻人口_学历_本科
			
 
				-        "r_edu_postgraduate",             # 常驻人口_学历_硕士
			
 
				-        "r_edu_senior_middle",            # 常驻人口_学历_高中
			
 
				-        "r_house_price79999",             # 常驻人口_居住社区房价_60000_79999
			
 
				-        "r_house_price59999",             # 常驻人口_居住社区房价_40000_59999
			
 
				-        "r_house_price39999",             # 常驻人口_居住社区房价_20000_39999
			
 
				-        "r_house_price19999",             # 常驻人口_居住社区房价_10000_19999
			
 
				-        "r_house_price9999",              # 常驻人口_居住社区房价_8000_9999
			
 
				-        "r_house_price7999",              # 常驻人口_居住社区房价_5000_7999
			
 
				-        "r_house_price4999",              # 常驻人口_居住社区房价_2000_4999
			
 
				-        "r_age_17",                       # 常驻人口_年龄_0_17
			
 
				-        "r_age_24",                       # 常驻人口_年龄_18_24
			
 
				-        "r_age_30",                       # 常驻人口_年龄_25_30
			
 
				-        "r_age_35",                       # 常驻人口_年龄_31_35
			
 
				-        "r_age_40",                       # 常驻人口_年龄_36_40
			
 
				-        "r_age_45",                       # 常驻人口_年龄_41_45
			
 
				-        "r_age_60",                       # 常驻人口_年龄_46_60
			
 
				-        "r_age_over_60",                  # 常驻人口_年龄_61以上
			
 
				-        "r_sex_woman",                    # 常驻人口_性别_女
			
 
				-        "r_sex_man",                      # 常驻人口_性别_男
			
 
				-        "r_catering_50",                  # 常驻人口_餐饮消费水平_50
			
 
				-        "r_catering_100",                 # 常驻人口_餐饮消费水平_100
			
 
				-        "r_catering_150",                 # 常驻人口_餐饮消费水平_150
			
 
				-        "r_catering_200",                 # 常驻人口_餐饮消费水平_200
			
 
				-        "r_catering_500",                 # 常驻人口_餐饮消费水平_500
			
 
				-        "r_catering_over_500",            # 常驻人口_餐饮消费水平_500以上
			
 
				-        "r_catering_times_2",             # 常驻人口_餐饮消费频次_1_2
			
 
				-        "r_catering_times_4",             # 常驻人口_餐饮消费频次_2_4
			
 
				-        "r_catering_times_6",             # 常驻人口_餐饮消费频次_4_6
			
 
				-        "r_catering_times_8",             # 常驻人口_餐饮消费频次_6_8
			
 
				-        "r_catering_times_10",            # 常驻人口_餐饮消费频次_8_10
			
 
				-        "r_catering_times_11",            # 常驻人口_餐饮消费频次_11以上
			
 
				-        "r_native_beijing",               # 常驻人口_家乡地_北京市
			
 
				-        "r_native_tianjing",              # 常驻人口_家乡地_天津市
			
 
				-        "r_native_hebei",                 # 常驻人口_家乡地_河北省
			
 
				-        "r_native_shanxi",                # 常驻人口_家乡地_山西省
			
 
				-        "r_native_neimeng",               # 常驻人口_家乡地_内蒙古
			
 
				-        "r_native_liaoning",              # 常驻人口_家乡地_辽宁省
			
 
				-        "r_native_jilin",                 # 常驻人口_家乡地_吉林省
			
 
				-        "r_native_heilongjiang",          # 常驻人口_家乡地_黑龙江省
			
 
				-        "r_native_shanghai",              # 常驻人口_家乡地_上海市
			
 
				-        "r_native_jiangsu",               # 常驻人口_家乡地_江苏省
			
 
				-        "r_native_zhejiang",              # 常驻人口_家乡地_浙江省
			
 
				-        "r_native_anhui",                 # 常驻人口_家乡地_安徽省
			
 
				-        "r_native_fujian",                # 常驻人口_家乡地_福建省
			
 
				-        "r_native_jiangix",               # 常驻人口_家乡地_江西省
			
 
				-        "r_native_shandong",              # 常驻人口_家乡地_山东省
			
 
				-        "r_native_henan",                 # 常驻人口_家乡地_河南省
			
 
				-        "r_native_hubei",                 # 常驻人口_家乡地_湖北省
			
 
				-        "r_native_hunan",                 # 常驻人口_家乡地_湖南省
			
 
				-        "r_native_guangdong",             # 常驻人口_家乡地_广东省
			
 
				-        "r_native_hainan",                # 常驻人口_家乡地_海南省
			
 
				-        "r_native_sichuan",               # 常驻人口_家乡地_四川省
			
 
				-        "r_native_guizhou",               # 常驻人口_家乡地_贵州省
			
 
				-        "r_native_yunnan",                # 常驻人口_家乡地_云南省
			
 
				-        "r_native_shan",                  # 常驻人口_家乡地_陕西省
			
 
				-        "r_native_gansu",                 # 常驻人口_家乡地_甘肃省
			
 
				-        "r_native_qinghai",               # 常驻人口_家乡地_青海省
			
 
				-        "r_native_guangxi",               # 常驻人口_家乡地_广西壮族自治区
			
 
				-        "r_native_ningxia",               # 常驻人口_家乡地_宁夏回族自治区
			
 
				-        "r_native_xinjiang",              # 常驻人口_家乡地_新疆维吾尔自治区
			
 
				-        "r_native_xizang",                # 常驻人口_家乡地_西藏自治区
			
 
				-        "r_native_chongqing",             # 常驻人口_家乡地_重庆市
			
 
				-        "r_native_hongkong",              # 常驻人口_家乡地_香港
			
 
				-        "r_native_macao",                 # 常驻人口_家乡地_澳门
			
 
				-        "r_native_taiwan",                # 常驻人口_家乡地_台湾
			
 
				-        "r_native_other",                 # 常驻人口_家乡地_其它
			
 
				-        "f_flow_num",                     # 流动人口_工作日_日均流动人口数量
			
 
				-        "f_holiday_flow_num",             # 流动人口_节假日_日均流动人口数量
			
 
				-        "f_workday_flow_num",             # 流动人口_日均流动人口数量
			
 
				-        "f_flowurban_cons_middle",        # 日均流动_城市消费水平_中
			
 
				-        "f_flowurban_cons_low",           # 日均流动_城市消费水平_低
			
 
				-        "f_flowurban_cons_lower",         # 日均流动_城市消费水平_次低
			
 
				-        "f_flowurban_cons_second_high",   # 日均流动_城市消费水平_次高
			
 
				-        "f_flowurban_cons_high",          # 日均流动_城市消费水平_高
			
 
				-        "f_flowedu_junior_middle",        # 日均流动_学历_初中
			
 
				-        "f_flowedu_doctor",               # 日均流动_学历_博士
			
 
				-        "f_flowedu_specialty",            # 日均流动_学历_大专
			
 
				-        "f_flowedu_primary",              # 日均流动_学历_小学
			
 
				-        "f_flowedu_college",              # 日均流动_学历_本科
			
 
				-        "f_flowedu_postgraduate",         # 日均流动_学历_硕士
			
 
				-        "f_flowedu_senior_middle",        # 日均流动_学历_高中
			
 
				-        "f_flowhouse_middle",             # 日均流动_居住社区房价_中
			
 
				-        "f_flowhouse_low",                # 日均流动_居住社区房价_低
			
 
				-        "f_flowhouse_lower",              # 日均流动_居住社区房价_次低
			
 
				-        "f_flowhouse_second_high",        # 日均流动_居住社区房价_次高
			
 
				-        "f_flowhouse_high",               # 日均流动_居住社区房价_高
			
 
				-        "f_flowage_17",                   # 日均流动_年龄_0_17
			
 
				-        "f_flowage_24",                   # 日均流动_年龄_18_24
			
 
				-        "f_flowage_30",                   # 日均流动_年龄_25_30
			
 
				-        "f_flowage_35",                   # 日均流动_年龄_31_35
			
 
				-        "f_flowage_40",                   # 日均流动_年龄_36_40
			
 
				-        "f_flowage_45",                   # 日均流动_年龄_41_45
			
 
				-        "f_flowage_60",                   # 日均流动_年龄_46_60
			
 
				-        "f_flowage_over_60",              # 日均流动_年龄_61以上
			
 
				-        "f_flowsex_woman",                # 日均流动_性别_女
			
 
				-        "f_flowsex_man",                  # 日均流动_性别_男
			
 
				-        "f_holidayurban_cons_middle",     # 节假日流动_城市消费水平_中
			
 
				-        "f_holidayurban_cons_low",        # 节假日流动_城市消费水平_低
			
 
				-        "f_holidayurban_cons_lower",      # 节假日流动_城市消费水平_次低
			
 
				-        "f_holidayurban_cons_secondhigh", # 节假日流动_城市消费水平_次高
			
 
				-        "f_holidayurban_cons_high",       # 节假日流动_城市消费水平_高
			
 
				-        "f_holidayedu_junior_middle",     # 节假日流动_学历_初中
			
 
				-        "f_holidayedu_doctor",            # 节假日流动_学历_博士
			
 
				-        "f_holidayedu_specialty",         # 节假日流动_学历_大专
			
 
				-        "f_holidayedu_primary",           # 节假日流动_学历_小学
			
 
				-        "f_holidayedu_college",           # 节假日流动_学历_本科
			
 
				-        "f_holidayedu_postgraduate",      # 节假日流动_学历_硕士
			
 
				-        "f_holidayedu_senior_middle",     # 节假日流动_学历_高中
			
 
				-        "f_holidayhouse_middle",          # 节假日流动_居住社区房价_中
			
 
				-        "f_holidayhouse_low",             # 节假日流动_居住社区房价_低
			
 
				-        "f_holidayhouse_lower",           # 节假日流动_居住社区房价_次低
			
 
				-        "f_holidayhouse_second_high",     # 节假日流动_居住社区房价_次高
			
 
				-        "f_holidayhouse_high",            # 节假日流动_居住社区房价_高
			
 
				-        "f_holidayage_17",                # 节假日流动_年龄_0_17
			
 
				-        "f_holidayage_24",                # 节假日流动_年龄_18_24
			
 
				-        "f_holidayage_30",                # 节假日流动_年龄_25_30
			
 
				-        "f_holidayage_35",                # 节假日流动_年龄_31_35
			
 
				-        "f_holidayage_40",                # 节假日流动_年龄_36_40
			
 
				-        "f_holidayage_45",                # 节假日流动_年龄_41_45
			
 
				-        "f_holidayage_60",                # 节假日流动_年龄_46_60
			
 
				-        "f_holidayage_over_60",           # 节假日流动_年龄_61以上
			
 
				-        "f_holidaysex_woman",             # 节假日流动_性别_女
			
 
				-        "f_holidaysex_man",               # 节假日流动_性别_男
			
 
				-        "f_workday_urban_cons_middle",    # 工作日流动_城市消费水平_中
			
 
				-        "f_workday_urban_cons_low",       # 工作日流动_城市消费水平_低
			
 
				-        "f_workday_urban_cons_lower",     # 工作日流动_城市消费水平_次低
			
 
				-        "f_workday_urban_cons_secondhigh",# 工作日流动_城市消费水平_次高
			
 
				-        "f_workday_urban_cons_high",      # 工作日流动_城市消费水平_高
			
 
				-        "f_workday_edu_junior_middle",    # 工作日流动_学历_初中
			
 
				-        "f_workday_edu_doctor",           # 工作日流动_学历_博士
			
 
				-        "f_workday_edu_specialty",        # 工作日流动_学历_大专
			
 
				-        "f_workday_edu_primary",          # 工作日流动_学历_小学
			
 
				-        "f_workday_edu_college",          # 工作日流动_学历_本科
			
 
				-        "f_workday_edu_postgraduate",     # 工作日流动_学历_硕士
			
 
				-        "f_workday_edu_senior_middle",    # 工作日流动_学历_高中
			
 
				-        "f_workday_house_middle",         # 工作日流动_居住社区房价_中
			
 
				-        "f_workday_house_low",            # 工作日流动_居住社区房价_低
			
 
				-        "f_workday_house_lower",          # 工作日流动_居住社区房价_次低
			
 
				-        "f_workday_house_second_high",    # 工作日流动_居住社区房价_次高
			
 
				-        "f_workday_house_high",           # 工作日流动_居住社区房价_高
			
 
				-        "f_workday_age_17",               # 工作日流动_年龄_0_17
			
 
				-        "f_workday_age_24",               # 工作日流动_年龄_18_24
			
 
				-        "f_workday_age_30",               # 工作日流动_年龄_25_30
			
 
				-        "f_workday_age_35",               # 工作日流动_年龄_31_35
			
 
				-        "f_workday_age_40",               # 工作日流动_年龄_36_40
			
 
				-        "f_workday_age_45",               # 工作日流动_年龄_41_45
			
 
				-        "f_workday_age_60",               # 工作日流动_年龄_46_60
			
 
				-        "f_workday_age_over_60",          # 工作日流动_年龄_61以上
			
 
				-        "f_workday_sex_woman",            # 工作日流动_性别_女
			
 
				-        "f_workday_sex_man",              # 工作日流动_性别_男
			
 
				-    ]
			
 
				-    
			
 
				-    ONEHOT_CAT = {
			
 
				-        "r_home_num":                        ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10000以上"],
			
 
				-        "r_work_num":                        ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10000以上"],
			
 
				-        "r_resident_num":                    ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-20000", "20000以上"],
			
 
				-        "r_urban_cons_middle":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_urban_cons_low":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_urban_cons_lower":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_urban_cons_secondhigh":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_urban_cons_high":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_junior_middle":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_doctor":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_specialty":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_primary":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_college":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_postgraduate":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_edu_senior_middle":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price79999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price59999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price39999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price19999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price9999":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price7999":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_house_price4999":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_17":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_24":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_30":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_35":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_40":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_45":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_60":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_age_over_60":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_sex_woman":                       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_sex_man":                         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_50":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_100":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_150":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_200":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_500":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_over_500":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_times_2":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_times_4":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_times_6":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_times_8":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_times_10":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_catering_times_11":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_beijing":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_tianjing":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_hebei":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_shanxi":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_neimeng":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_liaoning":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_jilin":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_heilongjiang":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_shanghai":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_jiangsu":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_zhejiang":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_anhui":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_fujian":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_jiangix":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_shandong":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_henan":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_hubei":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_hunan":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_guangdong":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_hainan":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_sichuan":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_guizhou":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_yunnan":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_shan":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_gansu":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_qinghai":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_guangxi":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_ningxia":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_xinjiang":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_xizang":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_chongqing":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_hongkong":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_macao":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_taiwan":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "r_native_other":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flow_num":                        ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-50000", "50001-100000", "100000以上"],
			
 
				-        "f_holiday_flow_num":                ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-50000", "50001-100000", "100000以上"],
			
 
				-        "f_workday_flow_num":                ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-50000", "50001-100000", "100000以上"],
			
 
				-        "f_flowurban_cons_middle":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowurban_cons_low":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowurban_cons_lower":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowurban_cons_second_high":      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowurban_cons_high":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_junior_middle":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_doctor":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_specialty":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_primary":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_college":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_postgraduate":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowedu_senior_middle":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowhouse_middle":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowhouse_low":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowhouse_lower":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowhouse_second_high":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowhouse_high":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_17":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_24":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_30":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_35":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_40":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_45":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_60":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowage_over_60":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowsex_woman":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_flowsex_man":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayurban_cons_middle":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayurban_cons_low":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayurban_cons_lower":         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayurban_cons_secondhigh":    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayurban_cons_high":          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_junior_middle":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_doctor":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_specialty":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_primary":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_college":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_postgraduate":         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayedu_senior_middle":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayhouse_middle":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayhouse_low":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayhouse_lower":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayhouse_second_high":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayhouse_high":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_17":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_24":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_30":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_35":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_40":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_45":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_60":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidayage_over_60":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidaysex_woman":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_holidaysex_man":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_urban_cons_middle":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_urban_cons_low":          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_urban_cons_lower":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_urban_cons_secondhigh":   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_urban_cons_high":         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_junior_middle":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_doctor":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_specialty":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_primary":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_college":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_postgraduate":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_edu_senior_middle":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_house_middle":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_house_low":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_house_lower":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_house_second_high":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_house_high":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_17":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_24":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_30":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_35":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_40":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_45":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_60":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_age_over_60":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_sex_woman":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-        "f_workday_sex_man":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],
			
 
				-    }
			
 
				+class CustConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "BB_RETAIL_CUSTOMER_CODE",              # 零售户代码

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME",         # 零售户市场类型名称

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME",       # 零售客户业态名称

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG",               # 零售户连锁标识

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME",    # 零售户信用等级名称

			
 
				+        "MD04_DIR_SAL_STORE_FLAG",              # 直营店标识

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME",       # 零售户经营范围名称

			
 
				+        "BB_RTL_CUST_TERMINAL_LEVEL_NAME",      # 零售户终端层级名称

			
 
				+        "OPERATOR_EDU",                         # 零售客户经营者文化程度

			
 
				+        "STORE_AREA",                           # 店铺经营面积

			
 
				+        "OPERATOR_AGE",                         # 经营者年龄

			
 
				+        "PRODUCT_INSALE_QTY",                   # 在销品规数

			
 
				+    ]

			
 
				+    

			
 
				+    ONEHOT_CAT = {

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME":           ["城网", "农网"],

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":         ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG":                 ["是", "否"],

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":      ["AAA", "AA", "A", "B", "C", "D"],

			
 
				+        "MD04_DIR_SAL_STORE_FLAG":                ["是", "否"],

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":         ["是", "否"],

			
 
				+        "BB_RTL_CUST_TERMINAL_LEVEL_NAME":        ["普通终端", "一般现代终端", "合作终端", "加盟终端", "直营终端"],

			
 
				+        "OPERATOR_EDU":                           [1, 2, 3, 4, 5, 6, 7, "无数据"],

			
 
				+        "STORE_AREA":                             ["0-20", "21-50", "51-100", "101-150", "151-200", "201-300", "301-400", "401-600", "601-1000", "1001-2000", "2000以上"],

			
 
				+        "OPERATOR_AGE":                           ["19-30", "31-40", "41-50", "51-65", "66-80", "80以上"],

			
 
				+        "PRODUCT_INSALE_QTY":                     ["0-10", "11-20", "21-30", "31-40", "41-50", "51-60", 

			
 
				+                                                   "61-70", "71-80", "81-90", "91-100", "101-110", "111-120", 

			
 
				+                                                   "121-130", "131-140", "141-150", "151-160", "161-170", "171-180", 

			
 
				+                                                   "181-190", "191-200", "201-210", "211-220", "221-230", "231-240", 

			
 
				+                                                   "241-250", "251-260", "261-270", "271-280", "281-290", "291-350"],

			
 
				+    }

			
 
				+    

			
 
				+    CLEANING_RULES = {

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME":         {"method": "fillna", "opt": "fill", "value": "城网", "type": "str"},

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":       {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG":               {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":    {"method": "fillna", "opt": "fill", "value": "B", "type": "str"},

			
 
				+        "MD04_DIR_SAL_STORE_FLAG":              {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":       {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "BB_RTL_CUST_TERMINAL_LEVEL_NAME":      {"method": "fillna", "opt": "fill", "value": "普通终端", "type": "str"},

			
 
				+        "OPERATOR_EDU":                         {"method": "fillna", "opt": "fill", "value": "无数据", "type": "str"},

			
 
				+        "STORE_AREA":                           {"method": "fillna", "opt": "fill", "value": "0-20", "type": "str"},

			
 
				+        "OPERATOR_AGE":                         {"method": "fillna", "opt": "fill", "value": "31-40", "type": "str"},

			
 
				+        "PRODUCT_INSALE_QTY":                   {"method": "fillna", "opt": "fill", "value": "0-10", "type": "str"},

			
 
				+    }

			
 
				+

			
 
				+class ProductConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "product_code",                  # 商品编码

			
 
				+        "factory_name",                  # 产地(工业公司名称)

			
 
				+        "brand_name",                    # 品牌名称

			
 
				+        "is_low_tar",                    # 低焦油卷烟

			
 
				+        "is_medium",                     # 中支烟

			
 
				+        "is_tiny",                       # 细支烟

			
 
				+        "is_coarse",                     # 粗支烟（同时非中非细）

			
 
				+        "is_exploding_beads",            # 爆珠烟

			
 
				+        "is_abnormity",                  # 异形包装

			
 
				+        "is_cig",                        # 雪茄烟

			
 
				+        "is_chuangxin",                  # 创新品类

			
 
				+        "direct_retail_price",           # 卷烟建议零售价

			
 
				+        "tbc_total_length",              # 烟支总长度

			
 
				+        "product_style",                 # 包装类型

			
 
				+    ]

			
 
				+    

			
 
				+    ONEHOT_CAT = {

			
 
				+        "factory_name":                    ["安徽中烟", "澳门云福卷烟厂", "北欧烟草集团", "博格集团", "重庆中烟", "川渝中烟", "菲利普莫里斯亚洲", 

			
 
				+                                            "福建中烟", "甘肃工业", "广东中烟", "广西中烟", "贵州中烟", "海南红塔", "河北中烟", "河南中烟", 

			
 
				+                                            "黑龙江工业", "红塔辽宁烟草", "湖北中烟", "湖南中烟", "吉林工业", "家源开发股份有限公司", 

			
 
				+                                            "嘉莱赫国际有限公司", "江苏中烟", "江西中烟", "凯德控股有限公司", "力量雪茄烟草有限公司", 

			
 
				+                                            "南洋兄弟烟草股份", "内蒙古昆明卷烟", "日本烟草（香港）有限公司", "三宝麟国际集团", "厦门调拨站", 

			
 
				+                                            "山东中烟", "山西昆明烟草", "陕西中烟", "上海烟草(集团)公司", "上海烟草公司", "深圳工业", "四川中烟", 

			
 
				+                                            "特富意烟草(国际)", "雪茄客烟草国际贸易有限公司", "耀莱雪茄控股有限公司", "引领国际有限公司", 

			
 
				+                                            "英飞烽香港有限公司", "英美烟草中国有限公司", "云南中烟", "浙江中烟", "中茄国际贸易有限公司", 

			
 
				+                                            "中烟英美烟草国际有限公司", "株式会社 KT&G", "无"],

			
 
				+        "brand_name":                      ["万宝路", "555", "骆驼(国外)", "大华", "娇子", "大青山", "龙凤呈祥", "黄鹤楼", "真龙", "七匹狼", 

			
 
				+                                            "芙蓉王", "双喜(广)", "贵烟", "钓鱼台", "红双喜(南洋)", "云烟", "蒙特", "富恩特", "拉·加莱拉", "苏烟", 

			
 
				+                                            "丹纳曼", "黄山", "南京", "利群", "金桥", "泰山", "好日子", "石林", "美登", "红河", "嘉辉", "七星", 

			
 
				+                                            "都彭", "天下秀", "长城", "高希霸", "钻石", "金圣", "王冠雪茄", "黄金叶", "中南海", "长白山", "红旗渠", 

			
 
				+                                            "建牌", "大卫杜夫", "罗密欧", "茂大", "红金龙", "天子", "熊猫", "双喜(深)", "大前门", "兰州", 

			
 
				+                                            "红双喜(沪)", "雄狮", "广州", "红玫王", "黄果树", "红塔山", "福", "小熊猫", "爱喜", "蒙特利", "玉溪", 

			
 
				+                                            "都宝", "麦克纽杜", "卡里罗", "中华", "牡丹(沪)", "阿里山", "顺百利", "白沙", "羊城", "白云", 

			
 
				+                                            "特美思", "国宾", "帕特加", "比德奥", "冬虫夏草", "威龙（湛江）", "香格里拉", "红梅", "延安", 

			
 
				+                                            "特富意", "石狮", "金香港", "好猫", "登喜路", "乐迪", "林海灵芝", "椰树", "北京", "大红鹰", "大丰收", 

			
 
				+                                            "红双喜(武汉)", "五叶神", "狮", "优民", "将军", "遵义", "恒大", "飞马", "红三环", "芙蓉", "工字", 

			
 
				+                                            "古田", "狮牌", "君力", "哈尔滨", "梦都", "香梅(阜阳)", "哈德门", "梅州", "红山茶", "猴王", "沙龙", 

			
 
				+                                            "潘趣", "狮子牌", "上海", "红玫", "醒宝", "广州湾", "百乐门", "关塔那摩", "威斯", "五一", "寿百年", 

			
 
				+                                            "人民大会堂", "土楼", "三沙", "西湖", "光明", "阿诗玛", "宝亨", "恭贺新禧", "长寿", "茶花", "迎客松", 

			
 
				+                                            "龙烟", "金澳门", "宝岛", "多米尼加之花", "国喜", "金驼", "君特欧", "上游", "幸福", "春城", "吉庆", 

			
 
				+                                            "黄山松", "黄金龙", "紫气东来", "彼亚赛", "银辉", "潮牌", "庐山", "三峡", "壹支笔", "双叶", "无"],

			
 
				+        "is_low_tar":                      ["是", "否"],

			
 
				+        "is_medium":                       ["是", "否"],

			
 
				+        "is_tiny":                         ["是", "否"],

			
 
				+        "is_coarse":                       ["是", "否"],

			
 
				+        "is_exploding_beads":              ["是", "否"],

			
 
				+        "is_abnormity":                    ["是", "否"],

			
 
				+        "is_cig":                          ["是", "否"],

			
 
				+        "is_chuangxin":                    ["是", "否"],

			
 
				+        "direct_retail_price":             ["0-10", "10-30", "31-50", "51-100", "10-19.9", "250-499.9", "200-249.9", 

			
 
				+                                            "5-9.9", "0-5", "100-109.9", "150-199.9", "101-150", "120-129.9", "大于500", 

			
 
				+                                            "20-29.9", "30-39.9", "140-149.9", "50-59.9", "40-49.9", "80-89.9", "60-69.9", 

			
 
				+                                            "70-79.9", "大于150", "130-139.9", "90-99.9", "110-119.9"],

			
 
				+        "tbc_total_length":                ["小于79", "80-89", "90-100", "大于120"],

			
 
				+        "product_style":                   ["条盒硬盒", "条包硬盒", "条盒软盒", "条包软盒", "铁盒", "其他"],

			
 
				+    }

			
 
				+    

			
 
				+    CLEANING_RULES = {

			
 
				+        "factory_name":          {"method": "fillna", "opt": "fill", "value": "无", "type": "str"},

			
 
				+        "brand_name":            {"method": "fillna", "opt": "fill", "value": "无", "type": "str"},

			
 
				+        "is_low_tar":            {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_medium":             {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_tiny":               {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_coarse":             {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_exploding_beads":    {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_abnormity":          {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_cig":                {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "is_chuangxin":          {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "direct_retail_price":   {"method": "fillna", "opt": "fill", "value": "0-5", "type": "str"},

			
 
				+        "tbc_total_length":      {"method": "fillna", "opt": "fill", "value": "小于79", "type": "str"},

			
 
				+        "product_style":         {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},

			
 
				+    }

			
 
				+

			
 
				+class OrderConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "cust_code",                      # 零售户编码

			
 
				+        "product_code",                   # 品牌规格编码

			
 
				+        "sale_qty",                       # 销量包

			
 
				+        # "sale_qty_l",                     # 销量上期

			
 
				+        # "sale_qty_hb",                    # 销量环比

			
 
				+        # "sale_amt",                       # 销售额包

			
 
				+    ]

			
 
				+    

			
 
				+

			
 
				+class ShopConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "cust_code",                      # 客户编码

			
 
				+        "r_home_num",                     # 常驻人口_居住人数

			
 
				+        "r_work_num",                     # 常驻人口_工作人数

			
 
				+        "r_resident_num",                 # 常驻人口_工作或居住人数

			
 
				+        "r_urban_cons_middle",            # 常驻人口_城市消费水平_中

			
 
				+        "r_urban_cons_low",               # 常驻人口_城市消费水平_低

			
 
				+        "r_urban_cons_lower",             # 常驻人口_城市消费水平_次低

			
 
				+        "r_urban_cons_secondhigh",        # 常驻人口_城市消费水平_次高

			
 
				+        "r_urban_cons_high",              # 常驻人口_城市消费水平_高

			
 
				+        "r_edu_junior_middle",            # 常驻人口_学历_初中

			
 
				+        "r_edu_doctor",                   # 常驻人口_学历_博士

			
 
				+        "r_edu_specialty",                # 常驻人口_学历_大专

			
 
				+        "r_edu_primary",                  # 常驻人口_学历_小学

			
 
				+        "r_edu_college",                  # 常驻人口_学历_本科

			
 
				+        "r_edu_postgraduate",             # 常驻人口_学历_硕士

			
 
				+        "r_edu_senior_middle",            # 常驻人口_学历_高中

			
 
				+        "r_house_price79999",             # 常驻人口_居住社区房价_60000_79999

			
 
				+        "r_house_price59999",             # 常驻人口_居住社区房价_40000_59999

			
 
				+        "r_house_price39999",             # 常驻人口_居住社区房价_20000_39999

			
 
				+        "r_house_price19999",             # 常驻人口_居住社区房价_10000_19999

			
 
				+        "r_house_price9999",              # 常驻人口_居住社区房价_8000_9999

			
 
				+        "r_house_price7999",              # 常驻人口_居住社区房价_5000_7999

			
 
				+        "r_house_price4999",              # 常驻人口_居住社区房价_2000_4999

			
 
				+        "r_age_17",                       # 常驻人口_年龄_0_17

			
 
				+        "r_age_24",                       # 常驻人口_年龄_18_24

			
 
				+        "r_age_30",                       # 常驻人口_年龄_25_30

			
 
				+        "r_age_35",                       # 常驻人口_年龄_31_35

			
 
				+        "r_age_40",                       # 常驻人口_年龄_36_40

			
 
				+        "r_age_45",                       # 常驻人口_年龄_41_45

			
 
				+        "r_age_60",                       # 常驻人口_年龄_46_60

			
 
				+        "r_age_over_60",                  # 常驻人口_年龄_61以上

			
 
				+        "r_sex_woman",                    # 常驻人口_性别_女

			
 
				+        "r_sex_man",                      # 常驻人口_性别_男

			
 
				+        "r_catering_50",                  # 常驻人口_餐饮消费水平_50

			
 
				+        "r_catering_100",                 # 常驻人口_餐饮消费水平_100

			
 
				+        "r_catering_150",                 # 常驻人口_餐饮消费水平_150

			
 
				+        "r_catering_200",                 # 常驻人口_餐饮消费水平_200

			
 
				+        "r_catering_500",                 # 常驻人口_餐饮消费水平_500

			
 
				+        "r_catering_over_500",            # 常驻人口_餐饮消费水平_500以上

			
 
				+        "r_catering_times_2",             # 常驻人口_餐饮消费频次_1_2

			
 
				+        "r_catering_times_4",             # 常驻人口_餐饮消费频次_2_4

			
 
				+        "r_catering_times_6",             # 常驻人口_餐饮消费频次_4_6

			
 
				+        "r_catering_times_8",             # 常驻人口_餐饮消费频次_6_8

			
 
				+        "r_catering_times_10",            # 常驻人口_餐饮消费频次_8_10

			
 
				+        "r_catering_times_11",            # 常驻人口_餐饮消费频次_11以上

			
 
				+        "r_native_beijing",               # 常驻人口_家乡地_北京市

			
 
				+        "r_native_tianjing",              # 常驻人口_家乡地_天津市

			
 
				+        "r_native_hebei",                 # 常驻人口_家乡地_河北省

			
 
				+        "r_native_shanxi",                # 常驻人口_家乡地_山西省

			
 
				+        "r_native_neimeng",               # 常驻人口_家乡地_内蒙古

			
 
				+        "r_native_liaoning",              # 常驻人口_家乡地_辽宁省

			
 
				+        "r_native_jilin",                 # 常驻人口_家乡地_吉林省

			
 
				+        "r_native_heilongjiang",          # 常驻人口_家乡地_黑龙江省

			
 
				+        "r_native_shanghai",              # 常驻人口_家乡地_上海市

			
 
				+        "r_native_jiangsu",               # 常驻人口_家乡地_江苏省

			
 
				+        "r_native_zhejiang",              # 常驻人口_家乡地_浙江省

			
 
				+        "r_native_anhui",                 # 常驻人口_家乡地_安徽省

			
 
				+        "r_native_fujian",                # 常驻人口_家乡地_福建省

			
 
				+        "r_native_jiangix",               # 常驻人口_家乡地_江西省

			
 
				+        "r_native_shandong",              # 常驻人口_家乡地_山东省

			
 
				+        "r_native_henan",                 # 常驻人口_家乡地_河南省

			
 
				+        "r_native_hubei",                 # 常驻人口_家乡地_湖北省

			
 
				+        "r_native_hunan",                 # 常驻人口_家乡地_湖南省

			
 
				+        "r_native_guangdong",             # 常驻人口_家乡地_广东省

			
 
				+        "r_native_hainan",                # 常驻人口_家乡地_海南省

			
 
				+        "r_native_sichuan",               # 常驻人口_家乡地_四川省

			
 
				+        "r_native_guizhou",               # 常驻人口_家乡地_贵州省

			
 
				+        "r_native_yunnan",                # 常驻人口_家乡地_云南省

			
 
				+        "r_native_shan",                  # 常驻人口_家乡地_陕西省

			
 
				+        "r_native_gansu",                 # 常驻人口_家乡地_甘肃省

			
 
				+        "r_native_qinghai",               # 常驻人口_家乡地_青海省

			
 
				+        "r_native_guangxi",               # 常驻人口_家乡地_广西壮族自治区

			
 
				+        "r_native_ningxia",               # 常驻人口_家乡地_宁夏回族自治区

			
 
				+        "r_native_xinjiang",              # 常驻人口_家乡地_新疆维吾尔自治区

			
 
				+        "r_native_xizang",                # 常驻人口_家乡地_西藏自治区

			
 
				+        "r_native_chongqing",             # 常驻人口_家乡地_重庆市

			
 
				+        "r_native_hongkong",              # 常驻人口_家乡地_香港

			
 
				+        "r_native_macao",                 # 常驻人口_家乡地_澳门

			
 
				+        "r_native_taiwan",                # 常驻人口_家乡地_台湾

			
 
				+        "r_native_other",                 # 常驻人口_家乡地_其它

			
 
				+        "f_flow_num",                     # 流动人口_工作日_日均流动人口数量

			
 
				+        "f_holiday_flow_num",             # 流动人口_节假日_日均流动人口数量

			
 
				+        "f_workday_flow_num",             # 流动人口_日均流动人口数量

			
 
				+        "f_flowurban_cons_middle",        # 日均流动_城市消费水平_中

			
 
				+        "f_flowurban_cons_low",           # 日均流动_城市消费水平_低

			
 
				+        "f_flowurban_cons_lower",         # 日均流动_城市消费水平_次低

			
 
				+        "f_flowurban_cons_second_high",   # 日均流动_城市消费水平_次高

			
 
				+        "f_flowurban_cons_high",          # 日均流动_城市消费水平_高

			
 
				+        "f_flowedu_junior_middle",        # 日均流动_学历_初中

			
 
				+        "f_flowedu_doctor",               # 日均流动_学历_博士

			
 
				+        "f_flowedu_specialty",            # 日均流动_学历_大专

			
 
				+        "f_flowedu_primary",              # 日均流动_学历_小学

			
 
				+        "f_flowedu_college",              # 日均流动_学历_本科

			
 
				+        "f_flowedu_postgraduate",         # 日均流动_学历_硕士

			
 
				+        "f_flowedu_senior_middle",        # 日均流动_学历_高中

			
 
				+        "f_flowhouse_middle",             # 日均流动_居住社区房价_中

			
 
				+        "f_flowhouse_low",                # 日均流动_居住社区房价_低

			
 
				+        "f_flowhouse_lower",              # 日均流动_居住社区房价_次低

			
 
				+        "f_flowhouse_second_high",        # 日均流动_居住社区房价_次高

			
 
				+        "f_flowhouse_high",               # 日均流动_居住社区房价_高

			
 
				+        "f_flowage_17",                   # 日均流动_年龄_0_17

			
 
				+        "f_flowage_24",                   # 日均流动_年龄_18_24

			
 
				+        "f_flowage_30",                   # 日均流动_年龄_25_30

			
 
				+        "f_flowage_35",                   # 日均流动_年龄_31_35

			
 
				+        "f_flowage_40",                   # 日均流动_年龄_36_40

			
 
				+        "f_flowage_45",                   # 日均流动_年龄_41_45

			
 
				+        "f_flowage_60",                   # 日均流动_年龄_46_60

			
 
				+        "f_flowage_over_60",              # 日均流动_年龄_61以上

			
 
				+        "f_flowsex_woman",                # 日均流动_性别_女

			
 
				+        "f_flowsex_man",                  # 日均流动_性别_男

			
 
				+        "f_holidayurban_cons_middle",     # 节假日流动_城市消费水平_中

			
 
				+        "f_holidayurban_cons_low",        # 节假日流动_城市消费水平_低

			
 
				+        "f_holidayurban_cons_lower",      # 节假日流动_城市消费水平_次低

			
 
				+        "f_holidayurban_cons_secondhigh", # 节假日流动_城市消费水平_次高

			
 
				+        "f_holidayurban_cons_high",       # 节假日流动_城市消费水平_高

			
 
				+        "f_holidayedu_junior_middle",     # 节假日流动_学历_初中

			
 
				+        "f_holidayedu_doctor",            # 节假日流动_学历_博士

			
 
				+        "f_holidayedu_specialty",         # 节假日流动_学历_大专

			
 
				+        "f_holidayedu_primary",           # 节假日流动_学历_小学

			
 
				+        "f_holidayedu_college",           # 节假日流动_学历_本科

			
 
				+        "f_holidayedu_postgraduate",      # 节假日流动_学历_硕士

			
 
				+        "f_holidayedu_senior_middle",     # 节假日流动_学历_高中

			
 
				+        "f_holidayhouse_middle",          # 节假日流动_居住社区房价_中

			
 
				+        "f_holidayhouse_low",             # 节假日流动_居住社区房价_低

			
 
				+        "f_holidayhouse_lower",           # 节假日流动_居住社区房价_次低

			
 
				+        "f_holidayhouse_second_high",     # 节假日流动_居住社区房价_次高

			
 
				+        "f_holidayhouse_high",            # 节假日流动_居住社区房价_高

			
 
				+        "f_holidayage_17",                # 节假日流动_年龄_0_17

			
 
				+        "f_holidayage_24",                # 节假日流动_年龄_18_24

			
 
				+        "f_holidayage_30",                # 节假日流动_年龄_25_30

			
 
				+        "f_holidayage_35",                # 节假日流动_年龄_31_35

			
 
				+        "f_holidayage_40",                # 节假日流动_年龄_36_40

			
 
				+        "f_holidayage_45",                # 节假日流动_年龄_41_45

			
 
				+        "f_holidayage_60",                # 节假日流动_年龄_46_60

			
 
				+        "f_holidayage_over_60",           # 节假日流动_年龄_61以上

			
 
				+        "f_holidaysex_woman",             # 节假日流动_性别_女

			
 
				+        "f_holidaysex_man",               # 节假日流动_性别_男

			
 
				+        "f_workday_urban_cons_middle",    # 工作日流动_城市消费水平_中

			
 
				+        "f_workday_urban_cons_low",       # 工作日流动_城市消费水平_低

			
 
				+        "f_workday_urban_cons_lower",     # 工作日流动_城市消费水平_次低

			
 
				+        "f_workday_urban_cons_secondhigh",# 工作日流动_城市消费水平_次高

			
 
				+        "f_workday_urban_cons_high",      # 工作日流动_城市消费水平_高

			
 
				+        "f_workday_edu_junior_middle",    # 工作日流动_学历_初中

			
 
				+        "f_workday_edu_doctor",           # 工作日流动_学历_博士

			
 
				+        "f_workday_edu_specialty",        # 工作日流动_学历_大专

			
 
				+        "f_workday_edu_primary",          # 工作日流动_学历_小学

			
 
				+        "f_workday_edu_college",          # 工作日流动_学历_本科

			
 
				+        "f_workday_edu_postgraduate",     # 工作日流动_学历_硕士

			
 
				+        "f_workday_edu_senior_middle",    # 工作日流动_学历_高中

			
 
				+        "f_workday_house_middle",         # 工作日流动_居住社区房价_中

			
 
				+        "f_workday_house_low",            # 工作日流动_居住社区房价_低

			
 
				+        "f_workday_house_lower",          # 工作日流动_居住社区房价_次低

			
 
				+        "f_workday_house_second_high",    # 工作日流动_居住社区房价_次高

			
 
				+        "f_workday_house_high",           # 工作日流动_居住社区房价_高

			
 
				+        "f_workday_age_17",               # 工作日流动_年龄_0_17

			
 
				+        "f_workday_age_24",               # 工作日流动_年龄_18_24

			
 
				+        "f_workday_age_30",               # 工作日流动_年龄_25_30

			
 
				+        "f_workday_age_35",               # 工作日流动_年龄_31_35

			
 
				+        "f_workday_age_40",               # 工作日流动_年龄_36_40

			
 
				+        "f_workday_age_45",               # 工作日流动_年龄_41_45

			
 
				+        "f_workday_age_60",               # 工作日流动_年龄_46_60

			
 
				+        "f_workday_age_over_60",          # 工作日流动_年龄_61以上

			
 
				+        "f_workday_sex_woman",            # 工作日流动_性别_女

			
 
				+        "f_workday_sex_man",              # 工作日流动_性别_男

			
 
				+    ]

			
 
				+    

			
 
				+    ONEHOT_CAT = {

			
 
				+        "r_home_num":                        ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10000以上"],

			
 
				+        "r_work_num":                        ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10000以上"],

			
 
				+        "r_resident_num":                    ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-20000", "20000以上"],

			
 
				+        "r_urban_cons_middle":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_urban_cons_low":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_urban_cons_lower":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_urban_cons_secondhigh":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_urban_cons_high":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_junior_middle":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_doctor":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_specialty":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_primary":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_college":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_postgraduate":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_edu_senior_middle":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price79999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price59999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price39999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price19999":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price9999":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price7999":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_house_price4999":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_17":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_24":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_30":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_35":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_40":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_45":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_60":                          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_age_over_60":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_sex_woman":                       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_sex_man":                         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_50":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_100":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_150":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_200":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_500":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_over_500":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_times_2":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_times_4":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_times_6":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_times_8":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_times_10":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_catering_times_11":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_beijing":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_tianjing":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_hebei":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_shanxi":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_neimeng":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_liaoning":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_jilin":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_heilongjiang":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_shanghai":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_jiangsu":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_zhejiang":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_anhui":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_fujian":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_jiangix":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_shandong":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_henan":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_hubei":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_hunan":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_guangdong":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_hainan":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_sichuan":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_guizhou":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_yunnan":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_shan":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_gansu":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_qinghai":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_guangxi":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_ningxia":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_xinjiang":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_xizang":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_chongqing":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_hongkong":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_macao":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_taiwan":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "r_native_other":                    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flow_num":                        ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-50000", "50001-100000", "100000以上"],

			
 
				+        "f_holiday_flow_num":                ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-50000", "50001-100000", "100000以上"],

			
 
				+        "f_workday_flow_num":                ["0-100", "101-500", "501-2000", "2001-5000", "5001-10000", "10001-50000", "50001-100000", "100000以上"],

			
 
				+        "f_flowurban_cons_middle":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowurban_cons_low":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowurban_cons_lower":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowurban_cons_second_high":      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowurban_cons_high":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_junior_middle":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_doctor":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_specialty":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_primary":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_college":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_postgraduate":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowedu_senior_middle":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowhouse_middle":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowhouse_low":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowhouse_lower":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowhouse_second_high":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowhouse_high":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_17":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_24":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_30":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_35":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_40":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_45":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_60":                      ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowage_over_60":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowsex_woman":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_flowsex_man":                     ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayurban_cons_middle":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayurban_cons_low":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayurban_cons_lower":         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayurban_cons_secondhigh":    ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayurban_cons_high":          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_junior_middle":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_doctor":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_specialty":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_primary":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_college":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_postgraduate":         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayedu_senior_middle":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayhouse_middle":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayhouse_low":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayhouse_lower":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayhouse_second_high":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayhouse_high":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_17":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_24":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_30":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_35":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_40":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_45":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_60":                   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidayage_over_60":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidaysex_woman":                ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_holidaysex_man":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_urban_cons_middle":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_urban_cons_low":          ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_urban_cons_lower":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_urban_cons_secondhigh":   ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_urban_cons_high":         ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_junior_middle":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_doctor":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_specialty":           ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_primary":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_college":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_postgraduate":        ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_edu_senior_middle":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_house_middle":            ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_house_low":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_house_lower":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_house_second_high":       ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_house_high":              ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_17":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_24":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_30":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_35":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_40":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_45":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_60":                  ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_age_over_60":             ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_sex_woman":               ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+        "f_workday_sex_man":                 ["0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100"],

			
 
				+    }

			
 
				+    

			
 
				+    CLEANING_RULES = {

			
 
				+        "r_home_num":                    {"method": "fillna", "opt": "fill", "value": "501-2000", "type": "str"},

			
 
				+        "r_work_num":                    {"method": "fillna", "opt": "fill", "value": "501-2000", "type": "str"},

			
 
				+        "r_resident_num":                {"method": "fillna", "opt": "fill", "value": "501-2000", "type": "str"},

			
 
				+        "r_urban_cons_middle":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_urban_cons_low":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_urban_cons_lower":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_urban_cons_secondhigh":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_urban_cons_high":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_junior_middle":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_doctor":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_specialty":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_primary":                 {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_college":                 {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_postgraduate":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_edu_senior_middle":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price79999":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price59999":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price39999":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price19999":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price9999":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price7999":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_house_price4999":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_17":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_24":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_30":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_35":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_40":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_45":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_60":                      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_age_over_60":                 {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_sex_woman":                   {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_sex_man":                     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_50":                 {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_100":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_150":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_200":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_500":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_over_500":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_times_2":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_times_4":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_times_6":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_times_8":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_times_10":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_catering_times_11":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_beijing":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_tianjing":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_hebei":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_shanxi":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_neimeng":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_liaoning":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_jilin":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_heilongjiang":         {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_shanghai":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_jiangsu":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_zhejiang":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_anhui":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_fujian":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_jiangix":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_shandong":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_henan":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_hubei":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_hunan":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_guangdong":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_hainan":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_sichuan":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_guizhou":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_yunnan":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_shan":                 {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_gansu":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_qinghai":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_guangxi":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_ningxia":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_xinjiang":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_xizang":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_chongqing":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_hongkong":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_macao":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_taiwan":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "r_native_other":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flow_num":                    {"method": "fillna", "opt": "fill", "value": "2001-5000", "type": "str"},

			
 
				+        "f_holiday_flow_num":            {"method": "fillna", "opt": "fill", "value": "2001-5000", "type": "str"},

			
 
				+        "f_workday_flow_num":            {"method": "fillna", "opt": "fill", "value": "2001-5000", "type": "str"},

			
 
				+        "f_flowurban_cons_middle":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowurban_cons_low":          {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowurban_cons_lower":        {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowurban_cons_second_high":  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowurban_cons_high":         {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_junior_middle":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_doctor":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_specialty":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_primary":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_college":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_postgraduate":        {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowedu_senior_middle":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowhouse_middle":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowhouse_low":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowhouse_lower":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowhouse_second_high":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowhouse_high":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_17":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_24":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_30":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_35":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_40":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_45":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_60":                  {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowage_over_60":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowsex_woman":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_flowsex_man":                 {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayurban_cons_middle":    {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayurban_cons_low":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayurban_cons_lower":     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayurban_cons_secondhigh": {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayurban_cons_high":      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_junior_middle":    {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_doctor":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_specialty":        {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_primary":          {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_college":          {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_postgraduate":     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayedu_senior_middle":    {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayhouse_middle":         {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayhouse_low":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayhouse_lower":          {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayhouse_second_high":    {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayhouse_high":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_17":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_24":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_30":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_35":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_40":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_45":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_60":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidayage_over_60":          {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidaysex_woman":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_holidaysex_man":              {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_urban_cons_middle":     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_urban_cons_low":        {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_urban_cons_lower":      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_urban_cons_secondhigh": {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_urban_cons_high":       {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_junior_middle":     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_doctor":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_specialty":         {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_primary":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_college":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_postgraduate":      {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_edu_senior_middle":     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_house_middle":          {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_house_low":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_house_lower":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_house_second_high":     {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_house_high":            {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_17":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_24":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_30":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_35":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_40":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_45":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_60":                {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_age_over_60":           {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_sex_woman":             {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+        "f_workday_sex_man":               {"method": "fillna", "opt": "fill", "value": "40-50", "type": "str"},

			
 
				+    }

			
 
				+

			
 
				+class ImportanceFeaturesMap:

			
 
				+    CUSTOM_FEATRUES_MAP = {

			
 
				+        "BB_RTL_CUST_GRADE_NAME":                           "零售户分档名称",

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME":                     "零售户市场类型名称",

			
 
				+        "STORE_AREA":                                       "店铺经营面积",

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":                   "零售户业态名称",

			
 
				+        "OPERATOR_EDU_LEVEL":                               "零售客户经营者文化程",

			
 
				+        "OPERATOR_AGE":                                     "经营者年龄",

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG":                           "零售户连锁标识",

			
 
				+        "PRESENT_STAR_TERMINAL":                            "终端星级",

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":                "零售户信用等级名称",

			
 
				+        "MD04_DIR_SAL_STORE_FLAG":                          "直营店标识",

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":                   "零售户经营范围名称",

			
 
				+        "PRODUCT_INSALE_QTY":                               "在销品规数",

			
 
				+        # "CUST_INVESTMENT":                                  "店铺资源投入建设",

			
 
				+    }

			
 
				+    

			
 
				+    PRODUCT_FEATRUES_MAP = {

			
 
				+        # ProductConfig 字段映射

			
 
				+        "direct_retail_price":                              "建议零售价",

			
 
				+        "is_low_tar":                                       "是否低焦油烟",

			
 
				+        "tar_qty":                                          "焦油含量",

			
 
				+        "is_exploding_beads":                               "是否爆珠",

			
 
				+        "is_shortbranch":                                   "是否短支烟",

			
 
				+        "is_medium":                                        "是否中支烟",

			
 
				+        "is_tiny":                                          "是否细支",

			
 
				+        "product_style_code_name":                          "包装类型名称",

			
 
				+        "org_is_abnormity":                                 "是否异形包装",

			
 
				+        "is_chuangxin":                                     "是否创新品类",

			
 
				+        "is_key_brand":                                     "是否重点品牌",

			
 
				+        "foster_level_hy":                                  "是否行业共育品规",

			
 
				+        "foster_level_sj":                                  "是否省级共育品规",

			
 
				+        "is_cigar":                                         "是否雪茄型卷烟",

			
 
				+        "co_qty":                                           "一氧化碳含量",

			
 
				+        "tbc_total_length":                                 "烟支总长度",

			
 
				+        "tbc_length":                                       "烟支长度",

			
 
				+        "filter_length":                                    "滤嘴长度",

			
 
				+    }

			
 
				+    

			
 
				+    ORDER_FEATURE_MAP = {

			
 
				+        "MONTH6_SALE_QTY": "近半年销量(箱)",

			
 
				+        "MONTH6_SALE_AMT": "近半年销售额(万元)",

			
 
				+        "MONTH6_GROSS_PROFIT_RATE": "近半年毛利率",

			
 
				+        "MONTH6_SALE_QTY_YOY": "销量同比",

			
 
				+        "MONTH6_SALE_QTY_MOM": "销量环比",

			
 
				+        "MONTH6_SALE_AMT_YOY": "销售额（购进额）同比",

			
 
				+        "MONTH6_SALE_AMT_MOM": "销售额（购进额）环比",

			
 
				+        "STOCK_QTY": "库存",

			
 
				+        "ORDER_FULLORDR_RATE": "订足率",

			
 
				+        "ORDER_FULLORDR_RATE_MOM": "订足率环比",

			
 
				+        "FULL_FILLMENT_RATE": "订单满足率",

			
 
				+        "CUSTOMER_REPURCHASE_RATE": "会员重购率（部分有会员）",

			
 
				+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC": "新品订货量占同价类比重",

			
 
				+        "DEMAND_RATE": "需求量满足率",

			
 
				+        "LISTING_RATE": "品规上架率",

			
 
				+        "PUT_MARKET_FINISH_RATE": "投放完成率",

			
 
				+        "OUT_STOCK_DAYS": "断货天数（部分零售商有）",

			
 
				+        "UNPACKING_RATE": "拆包率",

			
 
				+        "city_uuid": "城市UUID"

			
 
				+    }

			
 
				+    

			
 
				+    POS_FEATURE_MAP = {

			
 
				+        "YLT_TURNOVER_RATE": "易灵通动销率",

			
 
				+        "YLT_BAR_PACKAGE_SALE_OCC": "易灵通条包销售占比",

			
 
				+        "POS_PACKAGE_PRICE": "POS机单包价格",

			
 
				+    }

			
 
				+    

			
 
				+    SHOPING_FEATURES_MAP = {

			
 
				+        # 商圈 字段映射

			
 
				+        "r_home_num": "常驻人口_居住人数",

			
 
				+        "r_work_num": "常驻人口_工作人数",

			
 
				+        "r_resident_num": "常驻人口_工作或居住人数",

			
 
				+        "r_urban_cons_middle": "常驻人口_城市消费水平_中",

			
 
				+        "r_urban_cons_low": "常驻人口_城市消费水平_低",

			
 
				+        "r_urban_cons_lower": "常驻人口_城市消费水平_次低",

			
 
				+        "r_urban_cons_secondhigh": "常驻人口_城市消费水平_次高",

			
 
				+        "r_urban_cons_high": "常驻人口_城市消费水平_高",

			
 
				+        "r_edu_junior_middle": "常驻人口_学历_初中",

			
 
				+        "r_edu_doctor": "常驻人口_学历_博士",

			
 
				+        "r_edu_specialty": "常驻人口_学历_大专",

			
 
				+        "r_edu_primary": "常驻人口_学历_小学",

			
 
				+        "r_edu_college": "常驻人口_学历_本科",

			
 
				+        "r_edu_postgraduate": "常驻人口_学历_硕士",

			
 
				+        "r_edu_senior_middle": "常驻人口_学历_高中",

			
 
				+        "r_house_price79999": "常驻人口_居住社区房价_60000_79999",

			
 
				+        "r_house_price59999": "常驻人口_居住社区房价_40000_59999",

			
 
				+        "r_house_price39999": "常驻人口_居住社区房价_20000_39999",

			
 
				+        "r_house_price19999": "常驻人口_居住社区房价_10000_19999",

			
 
				+        "r_house_price9999": "常驻人口_居住社区房价_8000_9999",

			
 
				+        "r_house_price7999": "常驻人口_居住社区房价_5000_7999",

			
 
				+        "r_house_price4999": "常驻人口_居住社区房价_2000_4999",

			
 
				+        "r_age_17": "常驻人口_年龄_0_17",

			
 
				+        "r_age_24": "常驻人口_年龄_18_24",

			
 
				+        "r_age_30": "常驻人口_年龄_25_30",

			
 
				+        "r_age_35": "常驻人口_年龄_31_35",

			
 
				+        "r_age_40": "常驻人口_年龄_36_40",

			
 
				+        "r_age_45": "常驻人口_年龄_41_45",

			
 
				+        "r_age_60": "常驻人口_年龄_46_60",

			
 
				+        "r_age_over_60": "常驻人口_年龄_61以上",

			
 
				+        "r_sex_woman": "常驻人口_性别_女",

			
 
				+        "r_sex_man": "常驻人口_性别_男",

			
 
				+        "r_catering_50": "常驻人口_餐饮消费水平_50",

			
 
				+        "r_catering_100": "常驻人口_餐饮消费水平_100",

			
 
				+        "r_catering_150": "常驻人口_餐饮消费水平_150",

			
 
				+        "r_catering_200": "常驻人口_餐饮消费水平_200",

			
 
				+        "r_catering_500": "常驻人口_餐饮消费水平_500",

			
 
				+        "r_catering_over_500": "常驻人口_餐饮消费水平_500以上",

			
 
				+        "r_catering_times_2": "常驻人口_餐饮消费频次_1_2",

			
 
				+        "r_catering_times_4": "常驻人口_餐饮消费频次_2_4",

			
 
				+        "r_catering_times_6": "常驻人口_餐饮消费频次_4_6",

			
 
				+        "r_catering_times_8": "常驻人口_餐饮消费频次_6_8",

			
 
				+        "r_catering_times_10": "常驻人口_餐饮消费频次_8_10",

			
 
				+        "r_catering_times_11": "常驻人口_餐饮消费频次_11以上",

			
 
				+        "r_native_beijing": "常驻人口_家乡地_北京市",

			
 
				+        "r_native_tianjing": "常驻人口_家乡地_天津市",

			
 
				+        "r_native_hebei": "常驻人口_家乡地_河北省",

			
 
				+        "r_native_shanxi": "常驻人口_家乡地_山西省",

			
 
				+        "r_native_neimeng": "常驻人口_家乡地_内蒙古",

			
 
				+        "r_native_liaoning": "常驻人口_家乡地_辽宁省",

			
 
				+        "r_native_jilin": "常驻人口_家乡地_吉林省",

			
 
				+        "r_native_heilongjiang": "常驻人口_家乡地_黑龙江省",

			
 
				+        "r_native_shanghai": "常驻人口_家乡地_上海市",

			
 
				+        "r_native_jiangsu": "常驻人口_家乡地_江苏省",

			
 
				+        "r_native_zhejiang": "常驻人口_家乡地_浙江省",

			
 
				+        "r_native_anhui": "常驻人口_家乡地_安徽省",

			
 
				+        "r_native_fujian": "常驻人口_家乡地_福建省",

			
 
				+        "r_native_jiangix": "常驻人口_家乡地_江西省",

			
 
				+        "r_native_shandong": "常驻人口_家乡地_山东省",

			
 
				+        "r_native_henan": "常驻人口_家乡地_河南省",

			
 
				+        "r_native_hubei": "常驻人口_家乡地_湖北省",

			
 
				+        "r_native_hunan": "常驻人口_家乡地_湖南省",

			
 
				+        "r_native_guangdong": "常驻人口_家乡地_广东省",

			
 
				+        "r_native_hainan": "常驻人口_家乡地_海南省",

			
 
				+        "r_native_sichuan": "常驻人口_家乡地_四川省",

			
 
				+        "r_native_guizhou": "常驻人口_家乡地_贵州省",

			
 
				+        "r_native_yunnan": "常驻人口_家乡地_云南省",

			
 
				+        "r_native_shan": "常驻人口_家乡地_陕西省",

			
 
				+        "r_native_gansu": "常驻人口_家乡地_甘肃省",

			
 
				+        "r_native_qinghai": "常驻人口_家乡地_青海省",

			
 
				+        "r_native_guangxi": "常驻人口_家乡地_广西壮族自治区",

			
 
				+        "r_native_ningxia": "常驻人口_家乡地_宁夏回族自治区",

			
 
				+        "r_native_xinjiang": "常驻人口_家乡地_新疆维吾尔自治区",

			
 
				+        "r_native_xizang": "常驻人口_家乡地_西藏自治区",

			
 
				+        "r_native_chongqing": "常驻人口_家乡地_重庆市",

			
 
				+        "r_native_hongkong": "常驻人口_家乡地_香港",

			
 
				+        "r_native_macao": "常驻人口_家乡地_澳门",

			
 
				+        "r_native_taiwan": "常驻人口_家乡地_台湾",

			
 
				+        "r_native_other": "常驻人口_家乡地_其它",

			
 
				+        "f_flow_num": "流动人口_日均流动人口数量",

			
 
				+        "f_holiday_flow_num": "流动人口_节假日日均流动人口数量",

			
 
				+        "f_workday_flow_num": "流动人口_工作日日均流动人口数量",

			
 
				+        "f_flowurban_cons_middle": "日均流动_城市消费水平_中",

			
 
				+        "f_flowurban_cons_low": "日均流动_城市消费水平_低",

			
 
				+        "f_flowurban_cons_lower": "日均流动_城市消费水平_次低",

			
 
				+        "f_flowurban_cons_second_high": "日均流动_城市消费水平_次高",

			
 
				+        "f_flowurban_cons_high": "日均流动_城市消费水平_高",

			
 
				+        "f_flowedu_junior_middle": "日均流动_学历_初中",

			
 
				+        "f_flowedu_doctor": "日均流动_学历_博士",

			
 
				+        "f_flowedu_specialty": "日均流动_学历_大专",

			
 
				+        "f_flowedu_primary": "日均流动_学历_小学",

			
 
				+        "f_flowedu_college": "日均流动_学历_本科",

			
 
				+        "f_flowedu_postgraduate": "日均流动_学历_硕士",

			
 
				+        "f_flowedu_senior_middle": "日均流动_学历_高中",

			
 
				+        "f_flowhouse_middle": "日均流动_居住社区房价_中",

			
 
				+        "f_flowhouse_low": "日均流动_居住社区房价_低",

			
 
				+        "f_flowhouse_lower": "日均流动_居住社区房价_次低",

			
 
				+        "f_flowhouse_second_high": "日均流动_居住社区房价_次高",

			
 
				+        "f_flowhouse_high": "日均流动_居住社区房价_高",

			
 
				+        "f_flowage_17": "日均流动_年龄_0_17",

			
 
				+        "f_flowage_24": "日均流动_年龄_18_24",

			
 
				+        "f_flowage_30": "日均流动_年龄_25_30",

			
 
				+        "f_flowage_35": "日均流动_年龄_31_35",

			
 
				+        "f_flowage_40": "日均流动_年龄_36_40",

			
 
				+        "f_flowage_45": "日均流动_年龄_41_45",

			
 
				+        "f_flowage_60": "日均流动_年龄_46_60",

			
 
				+        "f_flowage_over_60": "日均流动_年龄_61以上",

			
 
				+        "f_flowsex_woman": "日均流动_性别_女",

			
 
				+        "f_flowsex_man": "日均流动_性别_男",

			
 
				+        "f_holidayurban_cons_middle": "节假日流动_城市消费水平_中",

			
 
				+        "f_holidayurban_cons_low": "节假日流动_城市消费水平_低",

			
 
				+        "f_holidayurban_cons_lower": "节假日流动_城市消费水平_次低",

			
 
				+        "f_holidayurban_cons_secondhigh": "节假日流动_城市消费水平_次高",

			
 
				+        "f_holidayurban_cons_high": "节假日流动_城市消费水平_高",

			
 
				+        "f_holidayedu_junior_middle": "节假日流动_学历_初中",

			
 
				+        "f_holidayedu_doctor": "节假日流动_学历_博士",

			
 
				+        "f_holidayedu_specialty": "节假日流动_学历_大专",

			
 
				+        "f_holidayedu_primary": "节假日流动_学历_小学",

			
 
				+        "f_holidayedu_college": "节假日流动_学历_本科",

			
 
				+        "f_holidayedu_postgraduate": "节假日流动_学历_硕士",

			
 
				+        "f_holidayedu_senior_middle": "节假日流动_学历_高中",

			
 
				+        "f_holidayhouse_middle": "节假日流动_居住社区房价_中",

			
 
				+        "f_holidayhouse_low": "节假日流动_居住社区房价_低",

			
 
				+        "f_holidayhouse_lower": "节假日流动_居住社区房价_次低",

			
 
				+        "f_holidayhouse_second_high": "节假日流动_居住社区房价_次高",

			
 
				+        "f_holidayhouse_high": "节假日流动_居住社区房价_高",

			
 
				+        "f_holidayage_17": "节假日流动_年龄_0_17",

			
 
				+        "f_holidayage_24": "节假日流动_年龄_18_24",

			
 
				+        "f_holidayage_30": "节假日流动_年龄_25_30",

			
 
				+        "f_holidayage_35": "节假日流动_年龄_31_35",

			
 
				+        "f_holidayage_40": "节假日流动_年龄_36_40",

			
 
				+        "f_holidayage_45": "节假日流动_年龄_41_45",

			
 
				+        "f_holidayage_60": "节假日流动_年龄_46_60",

			
 
				+        "f_holidayage_over_60": "节假日流动_年龄_61以上",

			
 
				+        "f_holidaysex_woman": "节假日流动_性别_女",

			
 
				+        "f_holidaysex_man": "节假日流动_性别_男",

			
 
				+        "f_workday_urban_cons_middle": "工作日流动_城市消费水平_中",

			
 
				+        "f_workday_urban_cons_low": "工作日流动_城市消费水平_低",

			
 
				+        "f_workday_urban_cons_lower": "工作日流动_城市消费水平_次低",

			
 
				+        "f_workday_urban_cons_secondhigh": "工作日流动_城市消费水平_次高",

			
 
				+        "f_workday_urban_cons_high": "工作日流动_城市消费水平_高",

			
 
				+        "f_workday_edu_junior_middle": "工作日流动_学历_初中",

			
 
				+        "f_workday_edu_doctor": "工作日流动_学历_博士",

			
 
				+        "f_workday_edu_specialty": "工作日流动_学历_大专",

			
 
				+        "f_workday_edu_primary": "工作日流动_学历_小学",

			
 
				+        "f_workday_edu_college": "工作日流动_学历_本科",

			
 
				+        "f_workday_edu_postgraduate": "工作日流动_学历_硕士",

			
 
				+        "f_workday_edu_senior_middle": "工作日流动_学历_高中",

			
 
				+        "f_workday_house_middle": "工作日流动_居住社区房价_中",

			
 
				+        "f_workday_house_low": "工作日流动_居住社区房价_低",

			
 
				+        "f_workday_house_lower": "工作日流动_居住社区房价_次低",

			
 
				+        "f_workday_house_second_high": "工作日流动_居住社区房价_次高",

			
 
				+        "f_workday_house_high": "工作日流动_居住社区房价_高",

			
 
				+        "f_workday_age_17": "工作日流动_年龄_0_17",

			
 
				+        "f_workday_age_24": "工作日流动_年龄_18_24",

			
 
				+        "f_workday_age_30": "工作日流动_年龄_25_30",

			
 
				+        "f_workday_age_35": "工作日流动_年龄_31_35",

			
 
				+        "f_workday_age_40": "工作日流动_年龄_36_40",

			
 
				+        "f_workday_age_45": "工作日流动_年龄_41_45",

			
 
				+        "f_workday_age_60": "工作日流动_年龄_46_60",

			
 
				+        "f_workday_age_over_60": "工作日流动_年龄_61以上",

			
 
				+        "f_workday_sex_woman": "工作日流动_性别_女",

			
 
				+        "f_workday_sex_man": "工作日流动_性别_男"

			
 
				+    }
			
--- a/models/rank/data/config_ori.py
+++ b/models/rank/data/config_ori.py
@@ -1,511 +1,511 @@
 
				-class CustConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "BB_RETAIL_CUSTOMER_CODE",                     # 零售户代码
			
 
				-        "BB_RTL_CUST_GRADE_NAME",                      # 零售户分档名称
			
 
				-        "BB_RTL_CUST_MARKET_TYPE_NAME",                # 零售户市场类型名称
			
 
				-        "STORE_AREA",                                  # 店铺经营面积
			
 
				-        "BB_RTL_CUST_BUSINESS_TYPE_NAME",              # 零售户业态名称
			
 
				-        "OPERATOR_EDU_LEVEL",                          # 零售客户经营者文化程
			
 
				-        "OPERATOR_AGE",                                # 经营者年龄
			
 
				-        "BB_RTL_CUST_CHAIN_FLAG",                      # 零售户连锁标识
			
 
				-        "PRESENT_STAR_TERMINAL",                       # 终端星级
			
 
				-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME",           # 零售户信用等级名称
			
 
				-        "MD04_DIR_SAL_STORE_FLAG",                     # 直营店标识
			
 
				-        "BB_CUSTOMER_MANAGER_SCOPE_NAME",              # 零售户经营范围名称
			
 
				-        "PRODUCT_INSALE_QTY",                          # 在销品规数
			
 
				-        # "CUST_INVESTMENT",                             # 店铺资源投入建设
			
 
				-        
			
 
				-        # "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",       # 新品订货量占同价类比重
			
 
				-        # "PRODUCT_LISTING_RATE",                        # 品规上架率
			
 
				-        # "STOCKOUT_DAYS",                              # 断货天数
			
 
				-        # "YLT_TURNOVER_RATE",                           # 易灵通动销率
			
 
				-        # "YLT_BAR_PACKAGE_SALE_OCC",                    # 易灵通条包销售占比
			
 
				-        # "UNPACKING_RATE",                              # 拆包率
			
 
				-        
			
 
				-        
			
 
				-        # "BB_RTL_CUST_POSITION_TYPE_NAME",              # 零售户商圈类型名称
			
 
				-        
			
 
				-        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME",             # 零售户业态细分名称
			
 
				-        
			
 
				-        # "BB_RTL_CUST_TERMINAL_LEVEL_NAME",             # 零售户终端层级名称
			
 
				-        # "BB_RTL_CUST_TERMINALEVEL_NAME",               # 零售户终端层级细分名称
			
 
				-        # "MD04_MG_SAMPLE_CUST_FLAG",                    # 样本户标识
			
 
				-        # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG",            # 零售户大户标识
			
 
				-        # "BB_RTL_CUST_OPERATE_METHOD_NAME",             # 零售户经营方式名称
			
 
				-        # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME",          # 零售户卷烟经营规模名称
			
 
				-        
			
 
				-        # "AVERAGE_CONSUMER_FLOW",                       # 月均消费人流
			
 
				-        # "NEW_PRODUCT_MEMBERS_QTY",                     # 新品消费会员数量
			
 
				-    ]
			
 
				-    # 数据清洗规则
			
 
				-    CLEANING_RULES = {
			
 
				-        "BB_RTL_CUST_GRADE_NAME":                   {"method": "fillna", "opt": "fill", "value": "十五档", "type": "str"},
			
 
				-        "BB_RTL_CUST_MARKET_TYPE_NAME":             {"method": "fillna", "opt": "fill", "value": "城网", "type": "str"},
			
 
				-        "STORE_AREA":                               {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
			
 
				-        "OPERATOR_EDU_LEVEL":                       {"method": "fillna", "opt": "fill", "value": "无数据", "type": "str"},
			
 
				-        "OPERATOR_AGE":                             {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "BB_RTL_CUST_CHAIN_FLAG":                   {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
			
 
				-        "PRESENT_STAR_TERMINAL":                    {"method": "fillna", "opt": "fill", "value": "非星级", "type": "str"},
			
 
				-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        {"method": "fillna", "opt": "fill", "value": "B", "type": "str"},
			
 
				-        "MD04_DIR_SAL_STORE_FLAG":                  {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
			
 
				-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":           {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
			
 
				-        "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "CUST_INVESTMENT":                          {"method": "fillna", "opt": "fill", "type": 0}
			
 
				-        
			
 
				-        
			
 
				-        # "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC":    {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "PRODUCT_LISTING_RATE":                     {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "STOCKOUT_DAYS":                            {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "YLT_TURNOVER_RATE":                        {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "NEW_PRODUCT_MEMBERS_QTY":                  {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "UNPACKING_RATE":                           {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        
			
 
				-        
			
 
				-        
			
 
				-        
			
 
				-        # "BB_RTL_CUST_POSITION_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
			
 
				-        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME":          {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
			
 
				-        # "BB_RTL_CUST_TERMINALEVEL_NAME":          {"method": "fillna", "opt": "replace", "value": "BB_RTL_CUST_TERMINAL_LEVEL_NAME", "type": "str"},
			
 
				-        # "MD04_MG_SAMPLE_CUST_FLAG":                 {"method": "fillna", "value": "N", "opt": "fill"},
			
 
				-        # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG":         {"method": "fillna", "value": "N", "opt": "fill"},
			
 
				-        # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME":       {"method": "fillna", "value": "中", "opt": "fill"},
			
 
				-    }
			
 
				-    
			
 
				-    ONEHOT_CAT = {
			
 
				-        "BB_RTL_CUST_GRADE_NAME":                   ['一档', '二档', '三档', '四档', '五档', '六档', '七档', '八档', '九档', '十档', '十一档', '十二档', 
			
 
				-                                                    '十三档', '十四档', '十五档', '十六档', '十七档', '十八档', '十九档', '二十档', '二十一档', '二十二档', 
			
 
				-                                                    '二十三档', '二十四档', '二十五档', '二十六档', '二十七档', '二十八档', '二十九档', '三十档'],
			
 
				-        "BB_RTL_CUST_MARKET_TYPE_NAME":             ["城网", "农网"],
			
 
				-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],
			
 
				-        "OPERATOR_EDU_LEVEL":                       [1, 2, 3, 4, 5, 6, 7, "无数据"],
			
 
				-        "BB_RTL_CUST_CHAIN_FLAG":                   ["是", "否"],
			
 
				-        "PRESENT_STAR_TERMINAL":                    ["一星", "二星", "三星", "四星", "五星", "非星级"],
			
 
				-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        ["AAA", "AA", "A", "B", "C", "D"],
			
 
				-        "MD04_DIR_SAL_STORE_FLAG":                  ["是", "否"],
			
 
				-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":           ["是", "否"],
			
 
				-        
			
 
				-        
			
 
				-        
			
 
				-        # "BB_RTL_CUST_POSITION_TYPE_NAME":           ["居民区", "商业娱乐区", "交通枢纽区", "旅游景区", "工业区", "集贸区", "院校学区", "办公区", "其他"]
			
 
				-    }
			
 
				-    
			
 
				-class ProductConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "product_code",                                # 商品编码
			
 
				-        "direct_retail_price",                         # 建议零售价
			
 
				-        "is_low_tar",                                  # 是否低焦油烟
			
 
				-        "tar_qty",                                     # 焦油含量
			
 
				-        "is_exploding_beads",                          # 是否爆珠
			
 
				-        "is_shortbranch",                              # 是否短支烟
			
 
				-        "is_medium",                                   # 是否中支烟
			
 
				-        "is_tiny",                                     # 是否细支
			
 
				-        "product_style_code_name",                     # 包装类型名称
			
 
				-        "org_is_abnormity",                            # 是否异形包装
			
 
				-        "is_chuangxin",                                # 是否创新品类
			
 
				-        "is_key_brand",                                # 是否重点品牌
			
 
				-        "foster_level_hy",                             # 是否行业共育品规
			
 
				-        "foster_level_sj",                             # 是否省级共育品规
			
 
				-        "is_cigar",                                    # 是否雪茄型卷烟
			
 
				-        "co_qty",                                      # 一氧化碳含量
			
 
				-        "tbc_total_length",                            # 烟支总长度
			
 
				-        "tbc_length",                                  # 烟支长度
			
 
				-        "filter_length",                               # 滤嘴长度
			
 
				-        
			
 
				-
			
 
				-        
			
 
				-        # "adjust_price",                                # 含税调拨价
			
 
				-        # "notwithtax_adjust_price",                     # 不含税调拨价
			
 
				-        # "whole_sale_price",                            # 统一批发价
			
 
				-        # "allot_price",                                 # 调拨价
			
 
				-        # "direct_whole_price",                          # 批发指导价
			
 
				-        # "retail_price",                                # 零售价
			
 
				-        # "price_type_name",                             # 卷烟价类名称
			
 
				-        # "gear_type_name",                              # 卷烟档位名称
			
 
				-        # "category_type_name",                          # 卷烟品类名称
			
 
				-        # "is_high_level",                               # 是否高端烟
			
 
				-        # "is_upscale_level",                            # 是否高端烟不含高价
			
 
				-        # "is_high_price",                               # 是否高价烟
			
 
				-        # "is_low_price",                                # 是否低价烟
			
 
				-        # "is_encourage",                                # 是否全国鼓励品牌
			
 
				-        # "is_abnormity",                                # 是否异形包装
			
 
				-        # "is_intake",                                   # 是否进口烟
			
 
				-        # "is_short",                                    # 是否紧俏品牌
			
 
				-        # "is_ordinary_price_type",                      # 是否普一类烟
			
 
				-        # "source_type",                                 # 来源类型
			
 
				-        # "chinese_mix",                                 # 中式混合
			
 
				-        # "sub_price_type_name",                         # 细分卷烟价类名称
			
 
				-    ]
			
 
				-    
			
 
				-    CLEANING_RULES = {
			
 
				-        "direct_retail_price":                         {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "is_low_tar":                                  {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "tar_qty":                                     {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "is_exploding_beads":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "is_shortbranch":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "is_medium":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "is_tiny":                                     {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "product_style_code_name":                     {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
			
 
				-        "org_is_abnormity":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "is_chuangxin":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "is_key_brand":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "foster_level_hy":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "foster_level_sj":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "is_cigar":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        "co_qty":                                      {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "tbc_total_length":                            {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "tbc_length":                                  {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        "filter_length":                               {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        
			
 
				-        
			
 
				-        # "adjust_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "notwithtax_adjust_price":                     {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "whole_sale_price":                            {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "allot_price":                                 {"method": "fillna", "opt": "fill", "type": "num", "value": 0.0},
			
 
				-        # "direct_whole_price":                          {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "retail_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
			
 
				-        # "price_type_name":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "一类烟"},
			
 
				-        # "gear_type_name":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
			
 
				-        # "category_type_name":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
			
 
				-        # "is_high_level":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_upscale_level":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_high_price":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_low_price":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_encourage":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_abnormity":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_intake":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_short":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "is_ordinary_price_type":                      {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "source_type":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
			
 
				-        # "chinese_mix":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
			
 
				-        # "sub_price_type_name":                         {"method": "fillna", "opt": "fill", "type": "str", "value": "普一类烟"},
			
 
				-    }
			
 
				-    
			
 
				-
			
 
				-    ONEHOT_CAT = {
			
 
				-        "is_low_tar":                                  ["是", "否"],
			
 
				-        "is_exploding_beads":                          ["是", "否"],
			
 
				-        "is_shortbranch":                              ["是", "否"],
			
 
				-        "is_medium":                                   ["是", "否"],
			
 
				-        "is_tiny":                                     ["是", "否"],
			
 
				-        "product_style_code_name":                     ["条盒硬盒", "条包硬盒", "条盒软盒", "条包软盒", "铁盒", "其他"],
			
 
				-        "org_is_abnormity":                            ["是", "否"],
			
 
				-        "is_chuangxin":                                ["是", "否"],
			
 
				-        "is_key_brand":                                ["是", "否"],
			
 
				-        "foster_level_hy":                             ["是", "否"],
			
 
				-        "foster_level_sj":                             ["是", "否"],
			
 
				-        "is_cigar":                                    ["是", "否"],
			
 
				-        
			
 
				-        
			
 
				-        
			
 
				-        # "price_type_name":                             ["一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
			
 
				-        # "gear_type_name":                              ["第一档位", "第二档位", "第三档位", "第四档位", "第五档位", "第六档位", "第七档位", "第八档位", "其他"],
			
 
				-        # "category_type_name":                          ["第1品类", "第2品类", "第3品类", "第4品类", "第5品类", "第6品类", "第7品类", 
			
 
				-        #                                                 "第8品类", "第9品类", "第10品类", "第11品类", "第12品类", "第13品类", "其他"],
			
 
				-        # "is_high_level":                               ["是", "否"],
			
 
				-        # "is_upscale_level":                            ["是", "否"],
			
 
				-        # "is_high_price":                               ["是", "否"],
			
 
				-        # "is_low_price":                                ["是", "否"],
			
 
				-        # "is_encourage":                                ["是", "否"],
			
 
				-        # "is_abnormity":                                ["是", "否"],
			
 
				-        # "is_intake":                                   ["是", "否"],
			
 
				-        # "is_short":                                    ["是", "否"],
			
 
				-        # "is_ordinary_price_type":                      ["是", "否"],
			
 
				-        # "source_type":                                 ["是", "否"],
			
 
				-        # "chinese_mix":                                 ["是", "否"],
			
 
				-        # "sub_price_type_name":                         ["高端烟", "高价位烟", "普一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
			
 
				-    }
			
 
				-    
			
 
				-class OrderConfig:
			
 
				-    FEATURE_COLUMNS = [
			
 
				-        "BB_RETAIL_CUSTOMER_CODE",                          # 零售户编码
			
 
				-        "PRODUCT_CODE",                                     # 卷烟编码
			
 
				-        "MONTH6_SALE_QTY",                                  # 近半年销量(箱)
			
 
				-        "MONTH6_SALE_AMT",                                  # 近半年销售额(万元)
			
 
				-        "MONTH6_GROSS_PROFIT_RATE",                         # 近半年毛利率
			
 
				-        "MONTH6_SALE_QTY_YOY",                              # 销售量同比
			
 
				-        "MONTH6_SALE_QTY_MOM",                              # 销售量环比
			
 
				-        "MONTH6_SALE_AMT_YOY",                              # 销售额(购进额)同比
			
 
				-        "MONTH6_SALE_AMT_MOM",                              # 销售额(购进额)环比
			
 
				-        "STOCK_QTY",                                        # 库存
			
 
				-        "ORDER_FULLORDR_RATE",                              # 订足率
			
 
				-        "FULL_FILLMENT_RATE",                               # 订单满足率
			
 
				-        "ORDER_FULLORDR_RATE_MOM",                          # 订足率环比
			
 
				-        "CUSTOMER_REPURCHASE_RATE",                         # 会员重购率  
			
 
				-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",            # 新品订货量占同价类比重/decimal(18,6)
			
 
				-        "DEMAND_RATE",                                      # 需求量满足率
			
 
				-        "LISTING_RATE",                                     # 品规商上架率
			
 
				-        "PUT_MARKET_FINISH_RATE",                           # 投放完成率
			
 
				-        "OUT_STOCK_DAYS",                                   # 断货天数
			
 
				-        "YLT_TURNOVER_RATE",                                # 易灵通动销率
			
 
				-        "YLT_BAR_PACKAGE_SALE_OCC",                         # 易灵通调包销售占比
			
 
				-        "UNPACKING_RATE",                                   # 拆包率
			
 
				-        "POS_PACKAGE_PRICE",                                # pos机单包价格
			
 
				-    ]
			
 
				-    
			
 
				-    CLEANING_FEATURES = [
			
 
				-        "MONTH6_SALE_QTY",
			
 
				-        "MONTH6_SALE_AMT",
			
 
				-        "MONTH6_GROSS_PROFIT_RATE",
			
 
				-        "MONTH6_SALE_QTY_YOY",
			
 
				-        "MONTH6_SALE_QTY_MOM",
			
 
				-        "MONTH6_SALE_AMT_YOY",
			
 
				-        "MONTH6_SALE_AMT_MOM",
			
 
				-        "STOCK_QTY",
			
 
				-        "ORDER_FULLORDR_RATE",
			
 
				-        "FULL_FILLMENT_RATE",
			
 
				-        "ORDER_FULLORDR_RATE_MOM",
			
 
				-        "CUSTOMER_REPURCHASE_RATE",
			
 
				-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",
			
 
				-        "DEMAND_RATE",
			
 
				-        "LISTING_RATE",
			
 
				-        "PUT_MARKET_FINISH_RATE",
			
 
				-        "OUT_STOCK_DAYS",
			
 
				-        "UNPACKING_RATE",
			
 
				-    ]
			
 
				-    
			
 
				-    WEIGHTS = {
			
 
				-        "MONTH6_SALE_QTY":                                  0.15,
			
 
				-        "MONTH6_SALE_QTY_MOM":                              0.2,
			
 
				-        "ORDER_FULLORDR_RATE":                              0.3,
			
 
				-        "ORDER_FULLORDR_RATE_MOM":                          0.35,
			
 
				-    }
			
 
				-    
			
 
				-    POSFEATURES = [
			
 
				-        "YLT_TURNOVER_RATE","YLT_BAR_PACKAGE_SALE_OCC","POS_PACKAGE_PRICE"
			
 
				-    ]
			
 
				-    
			
 
				-class ImportanceFeaturesMap:
			
 
				-    CUSTOM_FEATRUES_MAP = {
			
 
				-        "BB_RTL_CUST_GRADE_NAME":                           "零售户分档名称",
			
 
				-        "BB_RTL_CUST_MARKET_TYPE_NAME":                     "零售户市场类型名称",
			
 
				-        "STORE_AREA":                                       "店铺经营面积",
			
 
				-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":                   "零售户业态名称",
			
 
				-        "OPERATOR_EDU_LEVEL":                               "零售客户经营者文化程",
			
 
				-        "OPERATOR_AGE":                                     "经营者年龄",
			
 
				-        "BB_RTL_CUST_CHAIN_FLAG":                           "零售户连锁标识",
			
 
				-        "PRESENT_STAR_TERMINAL":                            "终端星级",
			
 
				-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":                "零售户信用等级名称",
			
 
				-        "MD04_DIR_SAL_STORE_FLAG":                          "直营店标识",
			
 
				-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":                   "零售户经营范围名称",
			
 
				-        "PRODUCT_INSALE_QTY":                               "在销品规数",
			
 
				-        # "CUST_INVESTMENT":                                  "店铺资源投入建设",
			
 
				-    }
			
 
				-    
			
 
				-    PRODUCT_FEATRUES_MAP = {
			
 
				-        # ProductConfig 字段映射
			
 
				-        "direct_retail_price":                              "建议零售价",
			
 
				-        "is_low_tar":                                       "是否低焦油烟",
			
 
				-        "tar_qty":                                          "焦油含量",
			
 
				-        "is_exploding_beads":                               "是否爆珠",
			
 
				-        "is_shortbranch":                                   "是否短支烟",
			
 
				-        "is_medium":                                        "是否中支烟",
			
 
				-        "is_tiny":                                          "是否细支",
			
 
				-        "product_style_code_name":                          "包装类型名称",
			
 
				-        "org_is_abnormity":                                 "是否异形包装",
			
 
				-        "is_chuangxin":                                     "是否创新品类",
			
 
				-        "is_key_brand":                                     "是否重点品牌",
			
 
				-        "foster_level_hy":                                  "是否行业共育品规",
			
 
				-        "foster_level_sj":                                  "是否省级共育品规",
			
 
				-        "is_cigar":                                         "是否雪茄型卷烟",
			
 
				-        "co_qty":                                           "一氧化碳含量",
			
 
				-        "tbc_total_length":                                 "烟支总长度",
			
 
				-        "tbc_length":                                       "烟支长度",
			
 
				-        "filter_length":                                    "滤嘴长度",
			
 
				-    }
			
 
				-    
			
 
				-    ORDER_FEATURE_MAP = {
			
 
				-        "MONTH6_SALE_QTY": "近半年销量(箱)",
			
 
				-        "MONTH6_SALE_AMT": "近半年销售额(万元)",
			
 
				-        "MONTH6_GROSS_PROFIT_RATE": "近半年毛利率",
			
 
				-        "MONTH6_SALE_QTY_YOY": "销量同比",
			
 
				-        "MONTH6_SALE_QTY_MOM": "销量环比",
			
 
				-        "MONTH6_SALE_AMT_YOY": "销售额（购进额）同比",
			
 
				-        "MONTH6_SALE_AMT_MOM": "销售额（购进额）环比",
			
 
				-        "STOCK_QTY": "库存",
			
 
				-        "ORDER_FULLORDR_RATE": "订足率",
			
 
				-        "ORDER_FULLORDR_RATE_MOM": "订足率环比",
			
 
				-        "FULL_FILLMENT_RATE": "订单满足率",
			
 
				-        "CUSTOMER_REPURCHASE_RATE": "会员重购率（部分有会员）",
			
 
				-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC": "新品订货量占同价类比重",
			
 
				-        "DEMAND_RATE": "需求量满足率",
			
 
				-        "LISTING_RATE": "品规上架率",
			
 
				-        "PUT_MARKET_FINISH_RATE": "投放完成率",
			
 
				-        "OUT_STOCK_DAYS": "断货天数（部分零售商有）",
			
 
				-        "UNPACKING_RATE": "拆包率",
			
 
				-        "city_uuid": "城市UUID"
			
 
				-    }
			
 
				-    
			
 
				-    POS_FEATURE_MAP = {
			
 
				-        "YLT_TURNOVER_RATE": "易灵通动销率",
			
 
				-        "YLT_BAR_PACKAGE_SALE_OCC": "易灵通条包销售占比",
			
 
				-        "POS_PACKAGE_PRICE": "POS机单包价格",
			
 
				-    }
			
 
				-    
			
 
				-    SHOPING_FEATURES_MAP = {
			
 
				-        # 商圈 字段映射
			
 
				-        "r_home_num": "常驻人口_居住人数",
			
 
				-        "r_work_num": "常驻人口_工作人数",
			
 
				-        "r_resident_num": "常驻人口_工作或居住人数",
			
 
				-        "r_urban_cons_middle": "常驻人口_城市消费水平_中",
			
 
				-        "r_urban_cons_low": "常驻人口_城市消费水平_低",
			
 
				-        "r_urban_cons_lower": "常驻人口_城市消费水平_次低",
			
 
				-        "r_urban_cons_secondhigh": "常驻人口_城市消费水平_次高",
			
 
				-        "r_urban_cons_high": "常驻人口_城市消费水平_高",
			
 
				-        "r_edu_junior_middle": "常驻人口_学历_初中",
			
 
				-        "r_edu_doctor": "常驻人口_学历_博士",
			
 
				-        "r_edu_specialty": "常驻人口_学历_大专",
			
 
				-        "r_edu_primary": "常驻人口_学历_小学",
			
 
				-        "r_edu_college": "常驻人口_学历_本科",
			
 
				-        "r_edu_postgraduate": "常驻人口_学历_硕士",
			
 
				-        "r_edu_senior_middle": "常驻人口_学历_高中",
			
 
				-        "r_house_price79999": "常驻人口_居住社区房价_60000_79999",
			
 
				-        "r_house_price59999": "常驻人口_居住社区房价_40000_59999",
			
 
				-        "r_house_price39999": "常驻人口_居住社区房价_20000_39999",
			
 
				-        "r_house_price19999": "常驻人口_居住社区房价_10000_19999",
			
 
				-        "r_house_price9999": "常驻人口_居住社区房价_8000_9999",
			
 
				-        "r_house_price7999": "常驻人口_居住社区房价_5000_7999",
			
 
				-        "r_house_price4999": "常驻人口_居住社区房价_2000_4999",
			
 
				-        "r_age_17": "常驻人口_年龄_0_17",
			
 
				-        "r_age_24": "常驻人口_年龄_18_24",
			
 
				-        "r_age_30": "常驻人口_年龄_25_30",
			
 
				-        "r_age_35": "常驻人口_年龄_31_35",
			
 
				-        "r_age_40": "常驻人口_年龄_36_40",
			
 
				-        "r_age_45": "常驻人口_年龄_41_45",
			
 
				-        "r_age_60": "常驻人口_年龄_46_60",
			
 
				-        "r_age_over_60": "常驻人口_年龄_61以上",
			
 
				-        "r_sex_woman": "常驻人口_性别_女",
			
 
				-        "r_sex_man": "常驻人口_性别_男",
			
 
				-        "r_catering_50": "常驻人口_餐饮消费水平_50",
			
 
				-        "r_catering_100": "常驻人口_餐饮消费水平_100",
			
 
				-        "r_catering_150": "常驻人口_餐饮消费水平_150",
			
 
				-        "r_catering_200": "常驻人口_餐饮消费水平_200",
			
 
				-        "r_catering_500": "常驻人口_餐饮消费水平_500",
			
 
				-        "r_catering_over_500": "常驻人口_餐饮消费水平_500以上",
			
 
				-        "r_catering_times_2": "常驻人口_餐饮消费频次_1_2",
			
 
				-        "r_catering_times_4": "常驻人口_餐饮消费频次_2_4",
			
 
				-        "r_catering_times_6": "常驻人口_餐饮消费频次_4_6",
			
 
				-        "r_catering_times_8": "常驻人口_餐饮消费频次_6_8",
			
 
				-        "r_catering_times_10": "常驻人口_餐饮消费频次_8_10",
			
 
				-        "r_catering_times_11": "常驻人口_餐饮消费频次_11以上",
			
 
				-        "r_native_beijing": "常驻人口_家乡地_北京市",
			
 
				-        "r_native_tianjing": "常驻人口_家乡地_天津市",
			
 
				-        "r_native_hebei": "常驻人口_家乡地_河北省",
			
 
				-        "r_native_shanxi": "常驻人口_家乡地_山西省",
			
 
				-        "r_native_neimeng": "常驻人口_家乡地_内蒙古",
			
 
				-        "r_native_liaoning": "常驻人口_家乡地_辽宁省",
			
 
				-        "r_native_jilin": "常驻人口_家乡地_吉林省",
			
 
				-        "r_native_heilongjiang": "常驻人口_家乡地_黑龙江省",
			
 
				-        "r_native_shanghai": "常驻人口_家乡地_上海市",
			
 
				-        "r_native_jiangsu": "常驻人口_家乡地_江苏省",
			
 
				-        "r_native_zhejiang": "常驻人口_家乡地_浙江省",
			
 
				-        "r_native_anhui": "常驻人口_家乡地_安徽省",
			
 
				-        "r_native_fujian": "常驻人口_家乡地_福建省",
			
 
				-        "r_native_jiangix": "常驻人口_家乡地_江西省",
			
 
				-        "r_native_shandong": "常驻人口_家乡地_山东省",
			
 
				-        "r_native_henan": "常驻人口_家乡地_河南省",
			
 
				-        "r_native_hubei": "常驻人口_家乡地_湖北省",
			
 
				-        "r_native_hunan": "常驻人口_家乡地_湖南省",
			
 
				-        "r_native_guangdong": "常驻人口_家乡地_广东省",
			
 
				-        "r_native_hainan": "常驻人口_家乡地_海南省",
			
 
				-        "r_native_sichuan": "常驻人口_家乡地_四川省",
			
 
				-        "r_native_guizhou": "常驻人口_家乡地_贵州省",
			
 
				-        "r_native_yunnan": "常驻人口_家乡地_云南省",
			
 
				-        "r_native_shan": "常驻人口_家乡地_陕西省",
			
 
				-        "r_native_gansu": "常驻人口_家乡地_甘肃省",
			
 
				-        "r_native_qinghai": "常驻人口_家乡地_青海省",
			
 
				-        "r_native_guangxi": "常驻人口_家乡地_广西壮族自治区",
			
 
				-        "r_native_ningxia": "常驻人口_家乡地_宁夏回族自治区",
			
 
				-        "r_native_xinjiang": "常驻人口_家乡地_新疆维吾尔自治区",
			
 
				-        "r_native_xizang": "常驻人口_家乡地_西藏自治区",
			
 
				-        "r_native_chongqing": "常驻人口_家乡地_重庆市",
			
 
				-        "r_native_hongkong": "常驻人口_家乡地_香港",
			
 
				-        "r_native_macao": "常驻人口_家乡地_澳门",
			
 
				-        "r_native_taiwan": "常驻人口_家乡地_台湾",
			
 
				-        "r_native_other": "常驻人口_家乡地_其它",
			
 
				-        "f_flow_num": "流动人口_日均流动人口数量",
			
 
				-        "f_holiday_flow_num": "流动人口_节假日日均流动人口数量",
			
 
				-        "f_workday_flow_num": "流动人口_工作日日均流动人口数量",
			
 
				-        "f_flowurban_cons_middle": "日均流动_城市消费水平_中",
			
 
				-        "f_flowurban_cons_low": "日均流动_城市消费水平_低",
			
 
				-        "f_flowurban_cons_lower": "日均流动_城市消费水平_次低",
			
 
				-        "f_flowurban_cons_second_high": "日均流动_城市消费水平_次高",
			
 
				-        "f_flowurban_cons_high": "日均流动_城市消费水平_高",
			
 
				-        "f_flowedu_junior_middle": "日均流动_学历_初中",
			
 
				-        "f_flowedu_doctor": "日均流动_学历_博士",
			
 
				-        "f_flowedu_specialty": "日均流动_学历_大专",
			
 
				-        "f_flowedu_primary": "日均流动_学历_小学",
			
 
				-        "f_flowedu_college": "日均流动_学历_本科",
			
 
				-        "f_flowedu_postgraduate": "日均流动_学历_硕士",
			
 
				-        "f_flowedu_senior_middle": "日均流动_学历_高中",
			
 
				-        "f_flowhouse_middle": "日均流动_居住社区房价_中",
			
 
				-        "f_flowhouse_low": "日均流动_居住社区房价_低",
			
 
				-        "f_flowhouse_lower": "日均流动_居住社区房价_次低",
			
 
				-        "f_flowhouse_second_high": "日均流动_居住社区房价_次高",
			
 
				-        "f_flowhouse_high": "日均流动_居住社区房价_高",
			
 
				-        "f_flowage_17": "日均流动_年龄_0_17",
			
 
				-        "f_flowage_24": "日均流动_年龄_18_24",
			
 
				-        "f_flowage_30": "日均流动_年龄_25_30",
			
 
				-        "f_flowage_35": "日均流动_年龄_31_35",
			
 
				-        "f_flowage_40": "日均流动_年龄_36_40",
			
 
				-        "f_flowage_45": "日均流动_年龄_41_45",
			
 
				-        "f_flowage_60": "日均流动_年龄_46_60",
			
 
				-        "f_flowage_over_60": "日均流动_年龄_61以上",
			
 
				-        "f_flowsex_woman": "日均流动_性别_女",
			
 
				-        "f_flowsex_man": "日均流动_性别_男",
			
 
				-        "f_holidayurban_cons_middle": "节假日流动_城市消费水平_中",
			
 
				-        "f_holidayurban_cons_low": "节假日流动_城市消费水平_低",
			
 
				-        "f_holidayurban_cons_lower": "节假日流动_城市消费水平_次低",
			
 
				-        "f_holidayurban_cons_secondhigh": "节假日流动_城市消费水平_次高",
			
 
				-        "f_holidayurban_cons_high": "节假日流动_城市消费水平_高",
			
 
				-        "f_holidayedu_junior_middle": "节假日流动_学历_初中",
			
 
				-        "f_holidayedu_doctor": "节假日流动_学历_博士",
			
 
				-        "f_holidayedu_specialty": "节假日流动_学历_大专",
			
 
				-        "f_holidayedu_primary": "节假日流动_学历_小学",
			
 
				-        "f_holidayedu_college": "节假日流动_学历_本科",
			
 
				-        "f_holidayedu_postgraduate": "节假日流动_学历_硕士",
			
 
				-        "f_holidayedu_senior_middle": "节假日流动_学历_高中",
			
 
				-        "f_holidayhouse_middle": "节假日流动_居住社区房价_中",
			
 
				-        "f_holidayhouse_low": "节假日流动_居住社区房价_低",
			
 
				-        "f_holidayhouse_lower": "节假日流动_居住社区房价_次低",
			
 
				-        "f_holidayhouse_second_high": "节假日流动_居住社区房价_次高",
			
 
				-        "f_holidayhouse_high": "节假日流动_居住社区房价_高",
			
 
				-        "f_holidayage_17": "节假日流动_年龄_0_17",
			
 
				-        "f_holidayage_24": "节假日流动_年龄_18_24",
			
 
				-        "f_holidayage_30": "节假日流动_年龄_25_30",
			
 
				-        "f_holidayage_35": "节假日流动_年龄_31_35",
			
 
				-        "f_holidayage_40": "节假日流动_年龄_36_40",
			
 
				-        "f_holidayage_45": "节假日流动_年龄_41_45",
			
 
				-        "f_holidayage_60": "节假日流动_年龄_46_60",
			
 
				-        "f_holidayage_over_60": "节假日流动_年龄_61以上",
			
 
				-        "f_holidaysex_woman": "节假日流动_性别_女",
			
 
				-        "f_holidaysex_man": "节假日流动_性别_男",
			
 
				-        "f_workday_urban_cons_middle": "工作日流动_城市消费水平_中",
			
 
				-        "f_workday_urban_cons_low": "工作日流动_城市消费水平_低",
			
 
				-        "f_workday_urban_cons_lower": "工作日流动_城市消费水平_次低",
			
 
				-        "f_workday_urban_cons_secondhigh": "工作日流动_城市消费水平_次高",
			
 
				-        "f_workday_urban_cons_high": "工作日流动_城市消费水平_高",
			
 
				-        "f_workday_edu_junior_middle": "工作日流动_学历_初中",
			
 
				-        "f_workday_edu_doctor": "工作日流动_学历_博士",
			
 
				-        "f_workday_edu_specialty": "工作日流动_学历_大专",
			
 
				-        "f_workday_edu_primary": "工作日流动_学历_小学",
			
 
				-        "f_workday_edu_college": "工作日流动_学历_本科",
			
 
				-        "f_workday_edu_postgraduate": "工作日流动_学历_硕士",
			
 
				-        "f_workday_edu_senior_middle": "工作日流动_学历_高中",
			
 
				-        "f_workday_house_middle": "工作日流动_居住社区房价_中",
			
 
				-        "f_workday_house_low": "工作日流动_居住社区房价_低",
			
 
				-        "f_workday_house_lower": "工作日流动_居住社区房价_次低",
			
 
				-        "f_workday_house_second_high": "工作日流动_居住社区房价_次高",
			
 
				-        "f_workday_house_high": "工作日流动_居住社区房价_高",
			
 
				-        "f_workday_age_17": "工作日流动_年龄_0_17",
			
 
				-        "f_workday_age_24": "工作日流动_年龄_18_24",
			
 
				-        "f_workday_age_30": "工作日流动_年龄_25_30",
			
 
				-        "f_workday_age_35": "工作日流动_年龄_31_35",
			
 
				-        "f_workday_age_40": "工作日流动_年龄_36_40",
			
 
				-        "f_workday_age_45": "工作日流动_年龄_41_45",
			
 
				-        "f_workday_age_60": "工作日流动_年龄_46_60",
			
 
				-        "f_workday_age_over_60": "工作日流动_年龄_61以上",
			
 
				-        "f_workday_sex_woman": "工作日流动_性别_女",
			
 
				-        "f_workday_sex_man": "工作日流动_性别_男"
			
 
				+class CustConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "BB_RETAIL_CUSTOMER_CODE",                     # 零售户代码

			
 
				+        "BB_RTL_CUST_GRADE_NAME",                      # 零售户分档名称

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME",                # 零售户市场类型名称

			
 
				+        "STORE_AREA",                                  # 店铺经营面积

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME",              # 零售户业态名称

			
 
				+        "OPERATOR_EDU_LEVEL",                          # 零售客户经营者文化程

			
 
				+        "OPERATOR_AGE",                                # 经营者年龄

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG",                      # 零售户连锁标识

			
 
				+        "PRESENT_STAR_TERMINAL",                       # 终端星级

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME",           # 零售户信用等级名称

			
 
				+        "MD04_DIR_SAL_STORE_FLAG",                     # 直营店标识

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME",              # 零售户经营范围名称

			
 
				+        "PRODUCT_INSALE_QTY",                          # 在销品规数

			
 
				+        # "CUST_INVESTMENT",                             # 店铺资源投入建设

			
 
				+        

			
 
				+        # "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",       # 新品订货量占同价类比重

			
 
				+        # "PRODUCT_LISTING_RATE",                        # 品规上架率

			
 
				+        # "STOCKOUT_DAYS",                              # 断货天数

			
 
				+        # "YLT_TURNOVER_RATE",                           # 易灵通动销率

			
 
				+        # "YLT_BAR_PACKAGE_SALE_OCC",                    # 易灵通条包销售占比

			
 
				+        # "UNPACKING_RATE",                              # 拆包率

			
 
				+        

			
 
				+        

			
 
				+        # "BB_RTL_CUST_POSITION_TYPE_NAME",              # 零售户商圈类型名称

			
 
				+        

			
 
				+        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME",             # 零售户业态细分名称

			
 
				+        

			
 
				+        # "BB_RTL_CUST_TERMINAL_LEVEL_NAME",             # 零售户终端层级名称

			
 
				+        # "BB_RTL_CUST_TERMINALEVEL_NAME",               # 零售户终端层级细分名称

			
 
				+        # "MD04_MG_SAMPLE_CUST_FLAG",                    # 样本户标识

			
 
				+        # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG",            # 零售户大户标识

			
 
				+        # "BB_RTL_CUST_OPERATE_METHOD_NAME",             # 零售户经营方式名称

			
 
				+        # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME",          # 零售户卷烟经营规模名称

			
 
				+        

			
 
				+        # "AVERAGE_CONSUMER_FLOW",                       # 月均消费人流

			
 
				+        # "NEW_PRODUCT_MEMBERS_QTY",                     # 新品消费会员数量

			
 
				+    ]

			
 
				+    # 数据清洗规则

			
 
				+    CLEANING_RULES = {

			
 
				+        "BB_RTL_CUST_GRADE_NAME":                   {"method": "fillna", "opt": "fill", "value": "十五档", "type": "str"},

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME":             {"method": "fillna", "opt": "fill", "value": "城网", "type": "str"},

			
 
				+        "STORE_AREA":                               {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},

			
 
				+        "OPERATOR_EDU_LEVEL":                       {"method": "fillna", "opt": "fill", "value": "无数据", "type": "str"},

			
 
				+        "OPERATOR_AGE":                             {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG":                   {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "PRESENT_STAR_TERMINAL":                    {"method": "fillna", "opt": "fill", "value": "非星级", "type": "str"},

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        {"method": "fillna", "opt": "fill", "value": "B", "type": "str"},

			
 
				+        "MD04_DIR_SAL_STORE_FLAG":                  {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":           {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},

			
 
				+        "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "CUST_INVESTMENT":                          {"method": "fillna", "opt": "fill", "type": 0}

			
 
				+        

			
 
				+        

			
 
				+        # "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC":    {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "PRODUCT_LISTING_RATE":                     {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "STOCKOUT_DAYS":                            {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "YLT_TURNOVER_RATE":                        {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "NEW_PRODUCT_MEMBERS_QTY":                  {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "UNPACKING_RATE":                           {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        

			
 
				+        

			
 
				+        

			
 
				+        

			
 
				+        # "BB_RTL_CUST_POSITION_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},

			
 
				+        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME":          {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},

			
 
				+        # "BB_RTL_CUST_TERMINALEVEL_NAME":          {"method": "fillna", "opt": "replace", "value": "BB_RTL_CUST_TERMINAL_LEVEL_NAME", "type": "str"},

			
 
				+        # "MD04_MG_SAMPLE_CUST_FLAG":                 {"method": "fillna", "value": "N", "opt": "fill"},

			
 
				+        # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG":         {"method": "fillna", "value": "N", "opt": "fill"},

			
 
				+        # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME":       {"method": "fillna", "value": "中", "opt": "fill"},

			
 
				+    }

			
 
				+    

			
 
				+    ONEHOT_CAT = {

			
 
				+        "BB_RTL_CUST_GRADE_NAME":                   ['一档', '二档', '三档', '四档', '五档', '六档', '七档', '八档', '九档', '十档', '十一档', '十二档', 

			
 
				+                                                    '十三档', '十四档', '十五档', '十六档', '十七档', '十八档', '十九档', '二十档', '二十一档', '二十二档', 

			
 
				+                                                    '二十三档', '二十四档', '二十五档', '二十六档', '二十七档', '二十八档', '二十九档', '三十档'],

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME":             ["城网", "农网"],

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],

			
 
				+        "OPERATOR_EDU_LEVEL":                       [1, 2, 3, 4, 5, 6, 7, "无数据"],

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG":                   ["是", "否"],

			
 
				+        "PRESENT_STAR_TERMINAL":                    ["一星", "二星", "三星", "四星", "五星", "非星级"],

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        ["AAA", "AA", "A", "B", "C", "D"],

			
 
				+        "MD04_DIR_SAL_STORE_FLAG":                  ["是", "否"],

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":           ["是", "否"],

			
 
				+        

			
 
				+        

			
 
				+        

			
 
				+        # "BB_RTL_CUST_POSITION_TYPE_NAME":           ["居民区", "商业娱乐区", "交通枢纽区", "旅游景区", "工业区", "集贸区", "院校学区", "办公区", "其他"]

			
 
				+    }

			
 
				+    

			
 
				+class ProductConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "product_code",                                # 商品编码

			
 
				+        "direct_retail_price",                         # 建议零售价

			
 
				+        "is_low_tar",                                  # 是否低焦油烟

			
 
				+        "tar_qty",                                     # 焦油含量

			
 
				+        "is_exploding_beads",                          # 是否爆珠

			
 
				+        "is_shortbranch",                              # 是否短支烟

			
 
				+        "is_medium",                                   # 是否中支烟

			
 
				+        "is_tiny",                                     # 是否细支

			
 
				+        "product_style_code_name",                     # 包装类型名称

			
 
				+        "org_is_abnormity",                            # 是否异形包装

			
 
				+        "is_chuangxin",                                # 是否创新品类

			
 
				+        "is_key_brand",                                # 是否重点品牌

			
 
				+        "foster_level_hy",                             # 是否行业共育品规

			
 
				+        "foster_level_sj",                             # 是否省级共育品规

			
 
				+        "is_cigar",                                    # 是否雪茄型卷烟

			
 
				+        "co_qty",                                      # 一氧化碳含量

			
 
				+        "tbc_total_length",                            # 烟支总长度

			
 
				+        "tbc_length",                                  # 烟支长度

			
 
				+        "filter_length",                               # 滤嘴长度

			
 
				+        

			
 
				+

			
 
				+        

			
 
				+        # "adjust_price",                                # 含税调拨价

			
 
				+        # "notwithtax_adjust_price",                     # 不含税调拨价

			
 
				+        # "whole_sale_price",                            # 统一批发价

			
 
				+        # "allot_price",                                 # 调拨价

			
 
				+        # "direct_whole_price",                          # 批发指导价

			
 
				+        # "retail_price",                                # 零售价

			
 
				+        # "price_type_name",                             # 卷烟价类名称

			
 
				+        # "gear_type_name",                              # 卷烟档位名称

			
 
				+        # "category_type_name",                          # 卷烟品类名称

			
 
				+        # "is_high_level",                               # 是否高端烟

			
 
				+        # "is_upscale_level",                            # 是否高端烟不含高价

			
 
				+        # "is_high_price",                               # 是否高价烟

			
 
				+        # "is_low_price",                                # 是否低价烟

			
 
				+        # "is_encourage",                                # 是否全国鼓励品牌

			
 
				+        # "is_abnormity",                                # 是否异形包装

			
 
				+        # "is_intake",                                   # 是否进口烟

			
 
				+        # "is_short",                                    # 是否紧俏品牌

			
 
				+        # "is_ordinary_price_type",                      # 是否普一类烟

			
 
				+        # "source_type",                                 # 来源类型

			
 
				+        # "chinese_mix",                                 # 中式混合

			
 
				+        # "sub_price_type_name",                         # 细分卷烟价类名称

			
 
				+    ]

			
 
				+    

			
 
				+    CLEANING_RULES = {

			
 
				+        "direct_retail_price":                         {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "is_low_tar":                                  {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "tar_qty":                                     {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "is_exploding_beads":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "is_shortbranch":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "is_medium":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "is_tiny":                                     {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "product_style_code_name":                     {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},

			
 
				+        "org_is_abnormity":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "is_chuangxin":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "is_key_brand":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "foster_level_hy":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "foster_level_sj":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "is_cigar":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        "co_qty":                                      {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "tbc_total_length":                            {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "tbc_length":                                  {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        "filter_length":                               {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        

			
 
				+        

			
 
				+        # "adjust_price":                                {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "notwithtax_adjust_price":                     {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "whole_sale_price":                            {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "allot_price":                                 {"method": "fillna", "opt": "fill", "type": "num", "value": 0.0},

			
 
				+        # "direct_whole_price":                          {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "retail_price":                                {"method": "fillna", "opt": "mean", "type": "num"},

			
 
				+        # "price_type_name":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "一类烟"},

			
 
				+        # "gear_type_name":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},

			
 
				+        # "category_type_name":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},

			
 
				+        # "is_high_level":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_upscale_level":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_high_price":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_low_price":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_encourage":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_abnormity":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_intake":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_short":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "is_ordinary_price_type":                      {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "source_type":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},

			
 
				+        # "chinese_mix":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},

			
 
				+        # "sub_price_type_name":                         {"method": "fillna", "opt": "fill", "type": "str", "value": "普一类烟"},

			
 
				+    }

			
 
				+    

			
 
				+

			
 
				+    ONEHOT_CAT = {

			
 
				+        "is_low_tar":                                  ["是", "否"],

			
 
				+        "is_exploding_beads":                          ["是", "否"],

			
 
				+        "is_shortbranch":                              ["是", "否"],

			
 
				+        "is_medium":                                   ["是", "否"],

			
 
				+        "is_tiny":                                     ["是", "否"],

			
 
				+        "product_style_code_name":                     ["条盒硬盒", "条包硬盒", "条盒软盒", "条包软盒", "铁盒", "其他"],

			
 
				+        "org_is_abnormity":                            ["是", "否"],

			
 
				+        "is_chuangxin":                                ["是", "否"],

			
 
				+        "is_key_brand":                                ["是", "否"],

			
 
				+        "foster_level_hy":                             ["是", "否"],

			
 
				+        "foster_level_sj":                             ["是", "否"],

			
 
				+        "is_cigar":                                    ["是", "否"],

			
 
				+        

			
 
				+        

			
 
				+        

			
 
				+        # "price_type_name":                             ["一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],

			
 
				+        # "gear_type_name":                              ["第一档位", "第二档位", "第三档位", "第四档位", "第五档位", "第六档位", "第七档位", "第八档位", "其他"],

			
 
				+        # "category_type_name":                          ["第1品类", "第2品类", "第3品类", "第4品类", "第5品类", "第6品类", "第7品类", 

			
 
				+        #                                                 "第8品类", "第9品类", "第10品类", "第11品类", "第12品类", "第13品类", "其他"],

			
 
				+        # "is_high_level":                               ["是", "否"],

			
 
				+        # "is_upscale_level":                            ["是", "否"],

			
 
				+        # "is_high_price":                               ["是", "否"],

			
 
				+        # "is_low_price":                                ["是", "否"],

			
 
				+        # "is_encourage":                                ["是", "否"],

			
 
				+        # "is_abnormity":                                ["是", "否"],

			
 
				+        # "is_intake":                                   ["是", "否"],

			
 
				+        # "is_short":                                    ["是", "否"],

			
 
				+        # "is_ordinary_price_type":                      ["是", "否"],

			
 
				+        # "source_type":                                 ["是", "否"],

			
 
				+        # "chinese_mix":                                 ["是", "否"],

			
 
				+        # "sub_price_type_name":                         ["高端烟", "高价位烟", "普一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],

			
 
				+    }

			
 
				+    

			
 
				+class OrderConfig:

			
 
				+    FEATURE_COLUMNS = [

			
 
				+        "BB_RETAIL_CUSTOMER_CODE",                          # 零售户编码

			
 
				+        "PRODUCT_CODE",                                     # 卷烟编码

			
 
				+        "MONTH6_SALE_QTY",                                  # 近半年销量(箱)

			
 
				+        "MONTH6_SALE_AMT",                                  # 近半年销售额(万元)

			
 
				+        "MONTH6_GROSS_PROFIT_RATE",                         # 近半年毛利率

			
 
				+        "MONTH6_SALE_QTY_YOY",                              # 销售量同比

			
 
				+        "MONTH6_SALE_QTY_MOM",                              # 销售量环比

			
 
				+        "MONTH6_SALE_AMT_YOY",                              # 销售额(购进额)同比

			
 
				+        "MONTH6_SALE_AMT_MOM",                              # 销售额(购进额)环比

			
 
				+        "STOCK_QTY",                                        # 库存

			
 
				+        "ORDER_FULLORDR_RATE",                              # 订足率

			
 
				+        "FULL_FILLMENT_RATE",                               # 订单满足率

			
 
				+        "ORDER_FULLORDR_RATE_MOM",                          # 订足率环比

			
 
				+        "CUSTOMER_REPURCHASE_RATE",                         # 会员重购率  

			
 
				+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",            # 新品订货量占同价类比重/decimal(18,6)

			
 
				+        "DEMAND_RATE",                                      # 需求量满足率

			
 
				+        "LISTING_RATE",                                     # 品规商上架率

			
 
				+        "PUT_MARKET_FINISH_RATE",                           # 投放完成率

			
 
				+        "OUT_STOCK_DAYS",                                   # 断货天数

			
 
				+        "YLT_TURNOVER_RATE",                                # 易灵通动销率

			
 
				+        "YLT_BAR_PACKAGE_SALE_OCC",                         # 易灵通调包销售占比

			
 
				+        "UNPACKING_RATE",                                   # 拆包率

			
 
				+        "POS_PACKAGE_PRICE",                                # pos机单包价格

			
 
				+    ]

			
 
				+    

			
 
				+    CLEANING_FEATURES = [

			
 
				+        "MONTH6_SALE_QTY",

			
 
				+        "MONTH6_SALE_AMT",

			
 
				+        "MONTH6_GROSS_PROFIT_RATE",

			
 
				+        "MONTH6_SALE_QTY_YOY",

			
 
				+        "MONTH6_SALE_QTY_MOM",

			
 
				+        "MONTH6_SALE_AMT_YOY",

			
 
				+        "MONTH6_SALE_AMT_MOM",

			
 
				+        "STOCK_QTY",

			
 
				+        "ORDER_FULLORDR_RATE",

			
 
				+        "FULL_FILLMENT_RATE",

			
 
				+        "ORDER_FULLORDR_RATE_MOM",

			
 
				+        "CUSTOMER_REPURCHASE_RATE",

			
 
				+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",

			
 
				+        "DEMAND_RATE",

			
 
				+        "LISTING_RATE",

			
 
				+        "PUT_MARKET_FINISH_RATE",

			
 
				+        "OUT_STOCK_DAYS",

			
 
				+        "UNPACKING_RATE",

			
 
				+    ]

			
 
				+    

			
 
				+    WEIGHTS = {

			
 
				+        "MONTH6_SALE_QTY":                                  0.15,

			
 
				+        "MONTH6_SALE_QTY_MOM":                              0.2,

			
 
				+        "ORDER_FULLORDR_RATE":                              0.3,

			
 
				+        "ORDER_FULLORDR_RATE_MOM":                          0.35,

			
 
				+    }

			
 
				+    

			
 
				+    POSFEATURES = [

			
 
				+        "YLT_TURNOVER_RATE","YLT_BAR_PACKAGE_SALE_OCC","POS_PACKAGE_PRICE"

			
 
				+    ]

			
 
				+    

			
 
				+class ImportanceFeaturesMap:

			
 
				+    CUSTOM_FEATRUES_MAP = {

			
 
				+        "BB_RTL_CUST_GRADE_NAME":                           "零售户分档名称",

			
 
				+        "BB_RTL_CUST_MARKET_TYPE_NAME":                     "零售户市场类型名称",

			
 
				+        "STORE_AREA":                                       "店铺经营面积",

			
 
				+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":                   "零售户业态名称",

			
 
				+        "OPERATOR_EDU_LEVEL":                               "零售客户经营者文化程",

			
 
				+        "OPERATOR_AGE":                                     "经营者年龄",

			
 
				+        "BB_RTL_CUST_CHAIN_FLAG":                           "零售户连锁标识",

			
 
				+        "PRESENT_STAR_TERMINAL":                            "终端星级",

			
 
				+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":                "零售户信用等级名称",

			
 
				+        "MD04_DIR_SAL_STORE_FLAG":                          "直营店标识",

			
 
				+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":                   "零售户经营范围名称",

			
 
				+        "PRODUCT_INSALE_QTY":                               "在销品规数",

			
 
				+        # "CUST_INVESTMENT":                                  "店铺资源投入建设",

			
 
				+    }

			
 
				+    

			
 
				+    PRODUCT_FEATRUES_MAP = {

			
 
				+        # ProductConfig 字段映射

			
 
				+        "direct_retail_price":                              "建议零售价",

			
 
				+        "is_low_tar":                                       "是否低焦油烟",

			
 
				+        "tar_qty":                                          "焦油含量",

			
 
				+        "is_exploding_beads":                               "是否爆珠",

			
 
				+        "is_shortbranch":                                   "是否短支烟",

			
 
				+        "is_medium":                                        "是否中支烟",

			
 
				+        "is_tiny":                                          "是否细支",

			
 
				+        "product_style_code_name":                          "包装类型名称",

			
 
				+        "org_is_abnormity":                                 "是否异形包装",

			
 
				+        "is_chuangxin":                                     "是否创新品类",

			
 
				+        "is_key_brand":                                     "是否重点品牌",

			
 
				+        "foster_level_hy":                                  "是否行业共育品规",

			
 
				+        "foster_level_sj":                                  "是否省级共育品规",

			
 
				+        "is_cigar":                                         "是否雪茄型卷烟",

			
 
				+        "co_qty":                                           "一氧化碳含量",

			
 
				+        "tbc_total_length":                                 "烟支总长度",

			
 
				+        "tbc_length":                                       "烟支长度",

			
 
				+        "filter_length":                                    "滤嘴长度",

			
 
				+    }

			
 
				+    

			
 
				+    ORDER_FEATURE_MAP = {

			
 
				+        "MONTH6_SALE_QTY": "近半年销量(箱)",

			
 
				+        "MONTH6_SALE_AMT": "近半年销售额(万元)",

			
 
				+        "MONTH6_GROSS_PROFIT_RATE": "近半年毛利率",

			
 
				+        "MONTH6_SALE_QTY_YOY": "销量同比",

			
 
				+        "MONTH6_SALE_QTY_MOM": "销量环比",

			
 
				+        "MONTH6_SALE_AMT_YOY": "销售额（购进额）同比",

			
 
				+        "MONTH6_SALE_AMT_MOM": "销售额（购进额）环比",

			
 
				+        "STOCK_QTY": "库存",

			
 
				+        "ORDER_FULLORDR_RATE": "订足率",

			
 
				+        "ORDER_FULLORDR_RATE_MOM": "订足率环比",

			
 
				+        "FULL_FILLMENT_RATE": "订单满足率",

			
 
				+        "CUSTOMER_REPURCHASE_RATE": "会员重购率（部分有会员）",

			
 
				+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC": "新品订货量占同价类比重",

			
 
				+        "DEMAND_RATE": "需求量满足率",

			
 
				+        "LISTING_RATE": "品规上架率",

			
 
				+        "PUT_MARKET_FINISH_RATE": "投放完成率",

			
 
				+        "OUT_STOCK_DAYS": "断货天数（部分零售商有）",

			
 
				+        "UNPACKING_RATE": "拆包率",

			
 
				+        "city_uuid": "城市UUID"

			
 
				+    }

			
 
				+    

			
 
				+    POS_FEATURE_MAP = {

			
 
				+        "YLT_TURNOVER_RATE": "易灵通动销率",

			
 
				+        "YLT_BAR_PACKAGE_SALE_OCC": "易灵通条包销售占比",

			
 
				+        "POS_PACKAGE_PRICE": "POS机单包价格",

			
 
				+    }

			
 
				+    

			
 
				+    SHOPING_FEATURES_MAP = {

			
 
				+        # 商圈 字段映射

			
 
				+        "r_home_num": "常驻人口_居住人数",

			
 
				+        "r_work_num": "常驻人口_工作人数",

			
 
				+        "r_resident_num": "常驻人口_工作或居住人数",

			
 
				+        "r_urban_cons_middle": "常驻人口_城市消费水平_中",

			
 
				+        "r_urban_cons_low": "常驻人口_城市消费水平_低",

			
 
				+        "r_urban_cons_lower": "常驻人口_城市消费水平_次低",

			
 
				+        "r_urban_cons_secondhigh": "常驻人口_城市消费水平_次高",

			
 
				+        "r_urban_cons_high": "常驻人口_城市消费水平_高",

			
 
				+        "r_edu_junior_middle": "常驻人口_学历_初中",

			
 
				+        "r_edu_doctor": "常驻人口_学历_博士",

			
 
				+        "r_edu_specialty": "常驻人口_学历_大专",

			
 
				+        "r_edu_primary": "常驻人口_学历_小学",

			
 
				+        "r_edu_college": "常驻人口_学历_本科",

			
 
				+        "r_edu_postgraduate": "常驻人口_学历_硕士",

			
 
				+        "r_edu_senior_middle": "常驻人口_学历_高中",

			
 
				+        "r_house_price79999": "常驻人口_居住社区房价_60000_79999",

			
 
				+        "r_house_price59999": "常驻人口_居住社区房价_40000_59999",

			
 
				+        "r_house_price39999": "常驻人口_居住社区房价_20000_39999",

			
 
				+        "r_house_price19999": "常驻人口_居住社区房价_10000_19999",

			
 
				+        "r_house_price9999": "常驻人口_居住社区房价_8000_9999",

			
 
				+        "r_house_price7999": "常驻人口_居住社区房价_5000_7999",

			
 
				+        "r_house_price4999": "常驻人口_居住社区房价_2000_4999",

			
 
				+        "r_age_17": "常驻人口_年龄_0_17",

			
 
				+        "r_age_24": "常驻人口_年龄_18_24",

			
 
				+        "r_age_30": "常驻人口_年龄_25_30",

			
 
				+        "r_age_35": "常驻人口_年龄_31_35",

			
 
				+        "r_age_40": "常驻人口_年龄_36_40",

			
 
				+        "r_age_45": "常驻人口_年龄_41_45",

			
 
				+        "r_age_60": "常驻人口_年龄_46_60",

			
 
				+        "r_age_over_60": "常驻人口_年龄_61以上",

			
 
				+        "r_sex_woman": "常驻人口_性别_女",

			
 
				+        "r_sex_man": "常驻人口_性别_男",

			
 
				+        "r_catering_50": "常驻人口_餐饮消费水平_50",

			
 
				+        "r_catering_100": "常驻人口_餐饮消费水平_100",

			
 
				+        "r_catering_150": "常驻人口_餐饮消费水平_150",

			
 
				+        "r_catering_200": "常驻人口_餐饮消费水平_200",

			
 
				+        "r_catering_500": "常驻人口_餐饮消费水平_500",

			
 
				+        "r_catering_over_500": "常驻人口_餐饮消费水平_500以上",

			
 
				+        "r_catering_times_2": "常驻人口_餐饮消费频次_1_2",

			
 
				+        "r_catering_times_4": "常驻人口_餐饮消费频次_2_4",

			
 
				+        "r_catering_times_6": "常驻人口_餐饮消费频次_4_6",

			
 
				+        "r_catering_times_8": "常驻人口_餐饮消费频次_6_8",

			
 
				+        "r_catering_times_10": "常驻人口_餐饮消费频次_8_10",

			
 
				+        "r_catering_times_11": "常驻人口_餐饮消费频次_11以上",

			
 
				+        "r_native_beijing": "常驻人口_家乡地_北京市",

			
 
				+        "r_native_tianjing": "常驻人口_家乡地_天津市",

			
 
				+        "r_native_hebei": "常驻人口_家乡地_河北省",

			
 
				+        "r_native_shanxi": "常驻人口_家乡地_山西省",

			
 
				+        "r_native_neimeng": "常驻人口_家乡地_内蒙古",

			
 
				+        "r_native_liaoning": "常驻人口_家乡地_辽宁省",

			
 
				+        "r_native_jilin": "常驻人口_家乡地_吉林省",

			
 
				+        "r_native_heilongjiang": "常驻人口_家乡地_黑龙江省",

			
 
				+        "r_native_shanghai": "常驻人口_家乡地_上海市",

			
 
				+        "r_native_jiangsu": "常驻人口_家乡地_江苏省",

			
 
				+        "r_native_zhejiang": "常驻人口_家乡地_浙江省",

			
 
				+        "r_native_anhui": "常驻人口_家乡地_安徽省",

			
 
				+        "r_native_fujian": "常驻人口_家乡地_福建省",

			
 
				+        "r_native_jiangix": "常驻人口_家乡地_江西省",

			
 
				+        "r_native_shandong": "常驻人口_家乡地_山东省",

			
 
				+        "r_native_henan": "常驻人口_家乡地_河南省",

			
 
				+        "r_native_hubei": "常驻人口_家乡地_湖北省",

			
 
				+        "r_native_hunan": "常驻人口_家乡地_湖南省",

			
 
				+        "r_native_guangdong": "常驻人口_家乡地_广东省",

			
 
				+        "r_native_hainan": "常驻人口_家乡地_海南省",

			
 
				+        "r_native_sichuan": "常驻人口_家乡地_四川省",

			
 
				+        "r_native_guizhou": "常驻人口_家乡地_贵州省",

			
 
				+        "r_native_yunnan": "常驻人口_家乡地_云南省",

			
 
				+        "r_native_shan": "常驻人口_家乡地_陕西省",

			
 
				+        "r_native_gansu": "常驻人口_家乡地_甘肃省",

			
 
				+        "r_native_qinghai": "常驻人口_家乡地_青海省",

			
 
				+        "r_native_guangxi": "常驻人口_家乡地_广西壮族自治区",

			
 
				+        "r_native_ningxia": "常驻人口_家乡地_宁夏回族自治区",

			
 
				+        "r_native_xinjiang": "常驻人口_家乡地_新疆维吾尔自治区",

			
 
				+        "r_native_xizang": "常驻人口_家乡地_西藏自治区",

			
 
				+        "r_native_chongqing": "常驻人口_家乡地_重庆市",

			
 
				+        "r_native_hongkong": "常驻人口_家乡地_香港",

			
 
				+        "r_native_macao": "常驻人口_家乡地_澳门",

			
 
				+        "r_native_taiwan": "常驻人口_家乡地_台湾",

			
 
				+        "r_native_other": "常驻人口_家乡地_其它",

			
 
				+        "f_flow_num": "流动人口_日均流动人口数量",

			
 
				+        "f_holiday_flow_num": "流动人口_节假日日均流动人口数量",

			
 
				+        "f_workday_flow_num": "流动人口_工作日日均流动人口数量",

			
 
				+        "f_flowurban_cons_middle": "日均流动_城市消费水平_中",

			
 
				+        "f_flowurban_cons_low": "日均流动_城市消费水平_低",

			
 
				+        "f_flowurban_cons_lower": "日均流动_城市消费水平_次低",

			
 
				+        "f_flowurban_cons_second_high": "日均流动_城市消费水平_次高",

			
 
				+        "f_flowurban_cons_high": "日均流动_城市消费水平_高",

			
 
				+        "f_flowedu_junior_middle": "日均流动_学历_初中",

			
 
				+        "f_flowedu_doctor": "日均流动_学历_博士",

			
 
				+        "f_flowedu_specialty": "日均流动_学历_大专",

			
 
				+        "f_flowedu_primary": "日均流动_学历_小学",

			
 
				+        "f_flowedu_college": "日均流动_学历_本科",

			
 
				+        "f_flowedu_postgraduate": "日均流动_学历_硕士",

			
 
				+        "f_flowedu_senior_middle": "日均流动_学历_高中",

			
 
				+        "f_flowhouse_middle": "日均流动_居住社区房价_中",

			
 
				+        "f_flowhouse_low": "日均流动_居住社区房价_低",

			
 
				+        "f_flowhouse_lower": "日均流动_居住社区房价_次低",

			
 
				+        "f_flowhouse_second_high": "日均流动_居住社区房价_次高",

			
 
				+        "f_flowhouse_high": "日均流动_居住社区房价_高",

			
 
				+        "f_flowage_17": "日均流动_年龄_0_17",

			
 
				+        "f_flowage_24": "日均流动_年龄_18_24",

			
 
				+        "f_flowage_30": "日均流动_年龄_25_30",

			
 
				+        "f_flowage_35": "日均流动_年龄_31_35",

			
 
				+        "f_flowage_40": "日均流动_年龄_36_40",

			
 
				+        "f_flowage_45": "日均流动_年龄_41_45",

			
 
				+        "f_flowage_60": "日均流动_年龄_46_60",

			
 
				+        "f_flowage_over_60": "日均流动_年龄_61以上",

			
 
				+        "f_flowsex_woman": "日均流动_性别_女",

			
 
				+        "f_flowsex_man": "日均流动_性别_男",

			
 
				+        "f_holidayurban_cons_middle": "节假日流动_城市消费水平_中",

			
 
				+        "f_holidayurban_cons_low": "节假日流动_城市消费水平_低",

			
 
				+        "f_holidayurban_cons_lower": "节假日流动_城市消费水平_次低",

			
 
				+        "f_holidayurban_cons_secondhigh": "节假日流动_城市消费水平_次高",

			
 
				+        "f_holidayurban_cons_high": "节假日流动_城市消费水平_高",

			
 
				+        "f_holidayedu_junior_middle": "节假日流动_学历_初中",

			
 
				+        "f_holidayedu_doctor": "节假日流动_学历_博士",

			
 
				+        "f_holidayedu_specialty": "节假日流动_学历_大专",

			
 
				+        "f_holidayedu_primary": "节假日流动_学历_小学",

			
 
				+        "f_holidayedu_college": "节假日流动_学历_本科",

			
 
				+        "f_holidayedu_postgraduate": "节假日流动_学历_硕士",

			
 
				+        "f_holidayedu_senior_middle": "节假日流动_学历_高中",

			
 
				+        "f_holidayhouse_middle": "节假日流动_居住社区房价_中",

			
 
				+        "f_holidayhouse_low": "节假日流动_居住社区房价_低",

			
 
				+        "f_holidayhouse_lower": "节假日流动_居住社区房价_次低",

			
 
				+        "f_holidayhouse_second_high": "节假日流动_居住社区房价_次高",

			
 
				+        "f_holidayhouse_high": "节假日流动_居住社区房价_高",

			
 
				+        "f_holidayage_17": "节假日流动_年龄_0_17",

			
 
				+        "f_holidayage_24": "节假日流动_年龄_18_24",

			
 
				+        "f_holidayage_30": "节假日流动_年龄_25_30",

			
 
				+        "f_holidayage_35": "节假日流动_年龄_31_35",

			
 
				+        "f_holidayage_40": "节假日流动_年龄_36_40",

			
 
				+        "f_holidayage_45": "节假日流动_年龄_41_45",

			
 
				+        "f_holidayage_60": "节假日流动_年龄_46_60",

			
 
				+        "f_holidayage_over_60": "节假日流动_年龄_61以上",

			
 
				+        "f_holidaysex_woman": "节假日流动_性别_女",

			
 
				+        "f_holidaysex_man": "节假日流动_性别_男",

			
 
				+        "f_workday_urban_cons_middle": "工作日流动_城市消费水平_中",

			
 
				+        "f_workday_urban_cons_low": "工作日流动_城市消费水平_低",

			
 
				+        "f_workday_urban_cons_lower": "工作日流动_城市消费水平_次低",

			
 
				+        "f_workday_urban_cons_secondhigh": "工作日流动_城市消费水平_次高",

			
 
				+        "f_workday_urban_cons_high": "工作日流动_城市消费水平_高",

			
 
				+        "f_workday_edu_junior_middle": "工作日流动_学历_初中",

			
 
				+        "f_workday_edu_doctor": "工作日流动_学历_博士",

			
 
				+        "f_workday_edu_specialty": "工作日流动_学历_大专",

			
 
				+        "f_workday_edu_primary": "工作日流动_学历_小学",

			
 
				+        "f_workday_edu_college": "工作日流动_学历_本科",

			
 
				+        "f_workday_edu_postgraduate": "工作日流动_学历_硕士",

			
 
				+        "f_workday_edu_senior_middle": "工作日流动_学历_高中",

			
 
				+        "f_workday_house_middle": "工作日流动_居住社区房价_中",

			
 
				+        "f_workday_house_low": "工作日流动_居住社区房价_低",

			
 
				+        "f_workday_house_lower": "工作日流动_居住社区房价_次低",

			
 
				+        "f_workday_house_second_high": "工作日流动_居住社区房价_次高",

			
 
				+        "f_workday_house_high": "工作日流动_居住社区房价_高",

			
 
				+        "f_workday_age_17": "工作日流动_年龄_0_17",

			
 
				+        "f_workday_age_24": "工作日流动_年龄_18_24",

			
 
				+        "f_workday_age_30": "工作日流动_年龄_25_30",

			
 
				+        "f_workday_age_35": "工作日流动_年龄_31_35",

			
 
				+        "f_workday_age_40": "工作日流动_年龄_36_40",

			
 
				+        "f_workday_age_45": "工作日流动_年龄_41_45",

			
 
				+        "f_workday_age_60": "工作日流动_年龄_46_60",

			
 
				+        "f_workday_age_over_60": "工作日流动_年龄_61以上",

			
 
				+        "f_workday_sex_woman": "工作日流动_性别_女",

			
 
				+        "f_workday_sex_man": "工作日流动_性别_男"

			
 
				     }
			
--- a/models/rank/data/dataloader.py
+++ b/models/rank/data/dataloader.py
@@ -1,62 +1,63 @@
 
				-import pandas as pd
			
 
				-from models.rank.data.config import CustConfig, ProductConfig
			
 
				-from sklearn.model_selection import train_test_split
			
 
				-from sklearn.preprocessing import StandardScaler
			
 
				-from models.rank.data.utils import one_hot_embedding
			
 
				-
			
 
				-class DataLoader:
			
 
				-    def __init__(self,path):
			
 
				-        self._gbdt_data_path = path
			
 
				-        self._load_data()
			
 
				-    
			
 
				-    def _load_data(self):
			
 
				-       
			
 
				-        self._gbdt_data = pd.read_csv(self._gbdt_data_path, encoding="utf-8")
			
 
				-        self._gbdt_data.drop('BB_RETAIL_CUSTOMER_CODE', axis=1, inplace=True)
			
 
				-        self._gbdt_data.drop('product_code', axis=1, inplace=True)
			
 
				-        
			
 
				-        self._onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}
			
 
				-        
			
 
				-        self._onehot_columns = list(self._onehot_feats.keys())
			
 
				-        self._numeric_columns = self._gbdt_data.drop(self._onehot_columns + ["label"], axis=1).columns
			
 
				-        
			
 
				-        # 将类别数据进行one-hot编码
			
 
				-        self._gbdt_data = one_hot_embedding(self._gbdt_data, self._onehot_feats)
			
 
				-        
			
 
				-    
			
 
				-    def split_dataset(self):
			
 
				-        """数据集划分，将数据集划分为训练集、验证集、测试集"""
			
 
				-        # 1. 分离特征和标签
			
 
				-        features = self._gbdt_data.drop("label", axis=1)
			
 
				-        labels = self._gbdt_data["label"]
			
 
				-        
			
 
				-        # 2. 划分数据集，80%训练集、20%的测试集
			
 
				-        X_train, X_test, y_train, y_test = train_test_split(
			
 
				-            features, labels, 
			
 
				-            test_size=0.2, 
			
 
				-            random_state=42, 
			
 
				-            shuffle=True,
			
 
				-            stratify=labels,
			
 
				-        )
			
 
				-        
			
 
				-        # 3. 数据标准化（仅对特征进行标准化）
			
 
				-        scaler = StandardScaler()
			
 
				-        X_train[self._numeric_columns] = scaler.fit_transform(X_train[self._numeric_columns])
			
 
				-        X_test[self._numeric_columns] = scaler.fit_transform(X_test[self._numeric_columns])
			
 
				-        
			
 
				-        train_dataset = {"data": X_train, "label": y_train}
			
 
				-        test_dataset = {"data": X_test, "label": y_test}
			
 
				-        
			
 
				-        return train_dataset, test_dataset
			
 
				-    
			
 
				-if __name__ == '__main__':
			
 
				-    path = './models/rank/data/gbdt_data.csv'
			
 
				-    dataloader = DataLoader(path)
			
 
				-    train_dataset, test_dataset = dataloader.split_dataset()
			
 
				-    
			
 
				-    # 打印训练集和测试集的正负样本分布
			
 
				-    print("训练集正负样本分布：")
			
 
				-    print(train_dataset["label"].value_counts(normalize=True))
			
 
				-    
			
 
				-    print("测试集正负样本分布：")
			
 
				+import pandas as pd

			
 
				+from models.rank.data.config import CustConfig, ProductConfig, ShopConfig

			
 
				+from sklearn.model_selection import train_test_split

			
 
				+from sklearn.preprocessing import StandardScaler

			
 
				+from models.rank.data.utils import one_hot_embedding

			
 
				+

			
 
				+class DataLoader:

			
 
				+    def __init__(self,path):

			
 
				+        self._gbdt_data_path = path

			
 
				+        self._load_data()

			
 
				+    

			
 
				+    def _load_data(self):

			
 
				+       

			
 
				+        self._gbdt_data = pd.read_csv(self._gbdt_data_path, encoding="utf-8")

			
 
				+        self._gbdt_data.drop('cust_code', axis=1, inplace=True)

			
 
				+        self._gbdt_data.drop('product_code', axis=1, inplace=True)

			
 
				+        

			
 
				+        self._onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT, **ShopConfig.ONEHOT_CAT}

			
 
				+        

			
 
				+        self._onehot_columns = list(self._onehot_feats.keys())

			
 
				+        self._numeric_columns = self._gbdt_data.drop(self._onehot_columns + ["label"], axis=1).columns

			
 
				+        

			
 
				+        # 将类别数据进行one-hot编码

			
 
				+        self._gbdt_data = one_hot_embedding(self._gbdt_data, self._onehot_feats)

			
 
				+        

			
 
				+    

			
 
				+    def split_dataset(self):

			
 
				+        """数据集划分，将数据集划分为训练集、验证集、测试集"""

			
 
				+        # 1. 分离特征和标签

			
 
				+        features = self._gbdt_data.drop("label", axis=1)

			
 
				+        labels = self._gbdt_data["label"]

			
 
				+        

			
 
				+        # 2. 划分数据集，80%训练集、20%的测试集

			
 
				+        X_train, X_test, y_train, y_test = train_test_split(

			
 
				+            features, labels, 

			
 
				+            test_size=0.2, 

			
 
				+            random_state=42, 

			
 
				+            shuffle=True,

			
 
				+            stratify=labels,

			
 
				+        )

			
 
				+        

			
 
				+        # 3. 数据标准化（仅对特征进行标准化）

			
 
				+        if len(self._numeric_columns) != 0:

			
 
				+            scaler = StandardScaler()

			
 
				+            X_train[self._numeric_columns] = scaler.fit_transform(X_train[self._numeric_columns])

			
 
				+            X_test[self._numeric_columns] = scaler.fit_transform(X_test[self._numeric_columns])

			
 
				+        

			
 
				+        train_dataset = {"data": X_train, "label": y_train}

			
 
				+        test_dataset = {"data": X_test, "label": y_test}

			
 
				+        

			
 
				+        return train_dataset, test_dataset

			
 
				+    

			
 
				+if __name__ == '__main__':

			
 
				+    path = './data/train_data.csv'

			
 
				+    dataloader = DataLoader(path)

			
 
				+    train_dataset, test_dataset = dataloader.split_dataset()

			
 
				+    

			
 
				+    # 打印训练集和测试集的正负样本分布

			
 
				+    print("训练集正负样本分布：")

			
 
				+    print(train_dataset["label"].value_counts(normalize=True))

			
 
				+    

			
 
				+    print("测试集正负样本分布：")

			
 
				     print(test_dataset["label"].value_counts(normalize=True))
			
--- a/models/rank/data/preprocess.py
+++ b/models/rank/data/preprocess.py
@@ -1,224 +1,141 @@
 
				-from database import MySqlDao
			
 
				-from models.rank.data.config import CustConfig, ProductConfig, OrderConfig
			
 
				-import os
			
 
				-import pandas as pd
			
 
				-from sklearn.preprocessing import MinMaxScaler
			
 
				-from sklearn.utils import shuffle
			
 
				-import numpy as np
			
 
				-
			
 
				-class DataProcess():
			
 
				-    def __init__(self, city_uuid, save_dir):
			
 
				-        self._mysql_dao = MySqlDao()
			
 
				-        self.save_dir = save_dir
			
 
				-        print("正在加载cust_info...")
			
 
				-        self._cust_data = self._mysql_dao.load_cust_data(city_uuid)
			
 
				-        print("正在加载product_info...")
			
 
				-        self._product_data = self._mysql_dao.load_product_data(city_uuid)
			
 
				-        print("正在加载order_info...")
			
 
				-        self._order_data = self._mysql_dao.load_order_data(city_uuid)
			
 
				-        # self._order_data = self._mysql_dao.load_mock_order_data()
			
 
				-        print("正在加载shopping_info...")
			
 
				-        self._shopping_data = self._mysql_dao.load_shopping_data(city_uuid)
			
 
				-        
			
 
				-    def data_process(self):
			
 
				-        """数据预处理"""
			
 
				-        ori_train_data_save_path = os.path.join(self.save_dir, "original_train_data.csv")
			
 
				-        pos_train_data_save_path = os.path.join(self.save_dir, "pos_train_data.csv")
			
 
				-        shopping_train_data_save_path = os.path.join(self.save_dir, "shopping_train_data.csv")
			
 
				-        if os.path.exists(ori_train_data_save_path):
			
 
				-            os.remove(ori_train_data_save_path)
			
 
				-        if os.path.exists(pos_train_data_save_path):
			
 
				-            os.remove(pos_train_data_save_path)
			
 
				-        if os.path.exists(shopping_train_data_save_path):
			
 
				-            os.remove(shopping_train_data_save_path)
			
 
				-        
			
 
				-        # 1. 获取指定的特征组合
			
 
				-        self._cust_data = self._cust_data[CustConfig.FEATURE_COLUMNS]
			
 
				-        self._product_data = self._product_data[ProductConfig.FEATURE_COLUMNS]
			
 
				-        self._order_data = self._order_data[OrderConfig.FEATURE_COLUMNS]
			
 
				-        
			
 
				-        # 2. 数据清洗
			
 
				-        self._clean_cust_data()
			
 
				-        self._clean_product_data()
			
 
				-        self._clean_order_data()
			
 
				-        self._clean_shopping_data()
			
 
				-        
			
 
				-        # 3. 生成训练数据集
			
 
				-        ori_train_data = self._generate_original_train_data(is_pos=False)
			
 
				-        shopping_train_data = self._generate_shopping_train_data()
			
 
				-        pos_train_data = self._generate_pos_train_data()
			
 
				-        
			
 
				-        ori_train_data.to_csv(ori_train_data_save_path, index=False)
			
 
				-        shopping_train_data.to_csv(shopping_train_data_save_path, index=False)
			
 
				-        pos_train_data.to_csv(pos_train_data_save_path, index=False)
			
 
				-        
			
 
				-
			
 
				-    def _clean_cust_data(self):
			
 
				-        """用户信息表数据清洗"""
			
 
				-        self._cust_data["BB_RETAIL_CUSTOMER_CODE"] = self._cust_data["BB_RETAIL_CUSTOMER_CODE"].astype(str)
			
 
				-        # 根据配置规则清洗数据
			
 
				-        for feature, rules, in CustConfig.CLEANING_RULES.items():
			
 
				-            if rules["type"] == "num":
			
 
				-                # 先将数值型字符串转换为数值
			
 
				-                self._cust_data[feature] = pd.to_numeric(self._cust_data[feature], errors="coerce")
			
 
				-                
			
 
				-            if rules["method"] == "fillna":
			
 
				-                if rules["opt"] == "fill":
			
 
				-                    self._cust_data[feature] = self._cust_data[feature].fillna(rules["value"]).infer_objects(copy=False)
			
 
				-                elif rules["opt"] == "replace":
			
 
				-                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[rules["value"]]).infer_objects(copy=False)
			
 
				-                elif rules["opt"] == "mean":
			
 
				-                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[feature].mean()).infer_objects(copy=False)
			
 
				-                self._cust_data[feature] = self._cust_data[feature].infer_objects(copy=False)
			
 
				-    
			
 
				-    def _clean_product_data(self):
			
 
				-        """卷烟信息表数据清洗"""
			
 
				-        self._product_data["product_code"] = self._product_data["product_code"].astype(str)
			
 
				-        for feature, rules, in ProductConfig.CLEANING_RULES.items():
			
 
				-            if rules["type"] == "num":
			
 
				-                self._product_data[feature] = pd.to_numeric(self._product_data[feature], errors="coerce")
			
 
				-            
			
 
				-            if rules["method"] == "fillna":
			
 
				-                if rules["opt"] == "fill":
			
 
				-                    self._product_data[feature] = self._product_data[feature].fillna(rules["value"]).infer_objects(copy=False)
			
 
				-                elif rules["opt"] == "mean":
			
 
				-                    self._product_data[feature] = self._product_data[feature].fillna(self._product_data[feature].mean()).infer_objects(copy=False)
			
 
				-                self._product_data[feature] = self._product_data[feature].infer_objects(copy=False)
			
 
				-                    
			
 
				-    def _clean_order_data(self):
			
 
				-        remaining_cols = self._order_data.columns.drop(OrderConfig.POSFEATURES) # 数据清洗时先不对pos数据做处理
			
 
				-        col_all_missing = remaining_cols[self._order_data[remaining_cols].isnull().all()].to_list()
			
 
				-        self._order_data.drop(columns=col_all_missing, inplace=True)
			
 
				-        
			
 
				-        # 去除重复值和填补缺失值
			
 
				-        self._order_data.drop_duplicates(inplace=True)
			
 
				-        self._order_data[remaining_cols.drop(col_all_missing)] = self._order_data[remaining_cols.drop(col_all_missing)].fillna(0)
			
 
				-        self._order_data = self._order_data.infer_objects(copy=False) 
			
 
				-        
			
 
				-        
			
 
				-    def _clean_shopping_data(self):
			
 
				-        """处理商圈数据缺省值"""
			
 
				-        self._shopping_data.drop(columns=["cust_uuid", "longitude", "latitude", "range_radius"], axis=1, inplace=True)
			
 
				-        remaining_cols = self._shopping_data.columns.drop(["city_uuid", "cust_code"])
			
 
				-        col_with_missing = remaining_cols[self._shopping_data[remaining_cols].isnull().any()].tolist() # 判断有缺失的字段
			
 
				-        col_all_missing = remaining_cols[self._shopping_data[remaining_cols].isnull().all()].to_list() # 全部缺失的字段
			
 
				-        col_partial_missing = list(set(col_with_missing) - set(col_all_missing)) # 部分缺失的字段
			
 
				-        
			
 
				-        for col in col_partial_missing:
			
 
				-            self._shopping_data[col] = self._shopping_data[col].fillna(self._shopping_data[col].mean())
			
 
				-        
			
 
				-        for col in col_all_missing:
			
 
				-            self._shopping_data[col] = self._shopping_data[col].fillna(0).infer_objects(copy=False)
			
 
				-    
			
 
				-    def _generate_original_train_data(self, is_pos):
			
 
				-        union_data = self._union_order_cust_product(is_pos)
			
 
				-        scored_data = self._calculate_score(union_data)
			
 
				-        labeled_data = self._labeled_data(scored_data)
			
 
				-        
			
 
				-        # labeled_data.to_csv(save_path, index=False)
			
 
				-        return labeled_data
			
 
				-        
			
 
				-        
			
 
				-    
			
 
				-    def _generate_pos_train_data(self):
			
 
				-        pos_data = self._generate_original_train_data(is_pos=True)
			
 
				-        pos_data.dropna(subset=['YLT_TURNOVER_RATE'], inplace=True)
			
 
				-        pos_data[OrderConfig.POSFEATURES] = pos_data[OrderConfig.POSFEATURES].fillna(0)
			
 
				-        pos_data = pos_data.infer_objects(copy=False)
			
 
				-        return pos_data
			
 
				-        
			
 
				-    
			
 
				-    def _generate_shopping_train_data(self):
			
 
				-        orignal_data = self._generate_original_train_data(is_pos=False)
			
 
				-        cust_feats = self._shopping_data.set_index("cust_code")
			
 
				-        
			
 
				-        shopping_train_data = orignal_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
			
 
				-        return shopping_train_data
			
 
				-    
			
 
				-    def _union_order_cust_product(self, is_pos):
			
 
				-        """联合order表、商户表、卷烟表"""
			
 
				-        union_data = self._order_data.copy()
			
 
				-        if not is_pos:
			
 
				-            union_data.drop(OrderConfig.POSFEATURES, axis=1, inplace=True)
			
 
				-        union_data.rename(columns={"PRODUCT_CODE": "product_code"}, inplace=True)
			
 
				-        # union_data = union_data.drop(OrderConfig.POSFEATURES) # 去除pos数据特征字段
			
 
				-        cust_feats = self._cust_data.set_index("BB_RETAIL_CUSTOMER_CODE")
			
 
				-        product_feats = self._product_data.set_index("product_code")
			
 
				-        
			
 
				-        union_data = union_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
			
 
				-        union_data = union_data.join(product_feats, on="product_code", how="inner")
			
 
				-        
			
 
				-        return union_data
			
 
				-        # self._train_data = shuffle(self._train_data, random_state=42)
			
 
				-        
			
 
				-    def _calculate_score(self, union_data):
			
 
				-        """计算联合数据记录的分数"""
			
 
				-        # 对参与算分的特征值进行归一化
			
 
				-        scaler = MinMaxScaler()
			
 
				-        union_data[list(OrderConfig.WEIGHTS.keys())] = scaler.fit_transform(union_data[list(OrderConfig.WEIGHTS.keys())])
			
 
				-        # 计算加权分数
			
 
				-        union_data["score"] = sum(union_data[feat] * weight 
			
 
				-                          for feat, weight in OrderConfig.WEIGHTS.items())
			
 
				-        
			
 
				-        return union_data
			
 
				-    
			
 
				-    def _labeled_data(self, scored_data):
			
 
				-        """通过计算分数打标签"""
			
 
				-        # 按品规分组计算中位数
			
 
				-        product_medians = scored_data.groupby("product_code")["score"].median().reset_index()
			
 
				-        product_medians.columns = ["product_code", "median_score"]
			
 
				-        
			
 
				-        # 合并中位数到原始订单数据
			
 
				-        temp_data = pd.merge(scored_data, product_medians, on="product_code", how="left")
			
 
				-        
			
 
				-        # 生成标签 (1: 大于等于中位数, 0: 小于中位数)
			
 
				-        temp_data["label"] = np.where(
			
 
				-            temp_data["score"] >= temp_data["median_score"], 1, 0
			
 
				-        )
			
 
				-        temp_data = temp_data.sort_values("score", ascending=False)
			
 
				-        temp_data.drop(columns=["median_score", "score"], inplace=True)
			
 
				-        scored_data = shuffle(temp_data, random_state=42)
			
 
				-        return scored_data
			
 
				-    
			
 
				-    # def _descartes(self):
			
 
				-    #     """将零售户信息与卷烟信息进行笛卡尔积连接"""
			
 
				-    #     self._cust_data["descartes"] = 1
			
 
				-    #     self._product_data["descartes"] = 1
			
 
				-        
			
 
				-    #     self._descartes_data = pd.merge(self._cust_data, self._product_data, on="descartes").drop("descartes", axis=1)
			
 
				-        
			
 
				-    # def _labeled_data_from_descartes(self):
			
 
				-    #     """根据order表信息给descartes_data数据打标签"""
			
 
				-    #     # 获取order表中的正样本组合
			
 
				-    #     order_combinations = self._order_data[["BB_RETAIL_CUSTOMER_CODE", "PRODUCT_CODE"]].drop_duplicates()
			
 
				-    #     order_set = set(zip(order_combinations["BB_RETAIL_CUSTOMER_CODE"], order_combinations["PRODUCT_CODE"]))
			
 
				-        
			
 
				-    #     # 在descartes_data中打标签：正样本为1，负样本为0
			
 
				-    #     self._descartes_data['label'] = self._descartes_data.apply(
			
 
				-    #         lambda row: 1 if (row['BB_RETAIL_CUSTOMER_CODE'], row['product_code']) in order_set else 0, axis=1)
			
 
				-
			
 
				-    # def _generate_train_data_from_descartes(self):
			
 
				-    #     """从descartes_data中生成训练数据"""
			
 
				-    #     positive_samples = self._descartes_data[self._descartes_data["label"] == 1]
			
 
				-    #     negative_samples = self._descartes_data[self._descartes_data["label"] == 0]
			
 
				-        
			
 
				-    #     positive_count = len(positive_samples)
			
 
				-    #     negative_count = min(1 * positive_count, len(negative_samples))
			
 
				-    #     print(positive_count)
			
 
				-    #     print(negative_count)
			
 
				-        
			
 
				-    #     # 随机抽取2倍正样本数量的负样本
			
 
				-    #     negative_samples_sampled = negative_samples.sample(n=negative_count, random_state=42)
			
 
				-    #     # 合并正负样本
			
 
				-    #     self._train_data = pd.concat([positive_samples, negative_samples_sampled], axis=0)
			
 
				-    #     self._train_data = self._train_data.sample(frac=1, random_state=42).reset_index(drop=True)
			
 
				-        
			
 
				-    #     # 保存训练数据
			
 
				-    #     self._train_data.to_csv(self._save_res_path, index=False)
			
 
				-    
			
 
				-if __name__ == '__main__':
			
 
				-    city_uuid = "00000000000000000000000011445301"
			
 
				-    # city_uuid = "00000000000000000000000011441801"
			
 
				-    save_dir = "./data"
			
 
				-    processor = DataProcess(city_uuid, save_dir)
			
 
				+from database import MySqlDao

			
 
				+from models.rank.data.config import CustConfig, ProductConfig, OrderConfig, ShopConfig

			
 
				+import os

			
 
				+import pandas as pd

			
 
				+from sklearn.preprocessing import MinMaxScaler

			
 
				+from sklearn.utils import shuffle

			
 
				+import numpy as np

			
 
				+

			
 
				+class DataProcess():

			
 
				+    def __init__(self, city_uuid, save_dir):

			
 
				+        self._mysql_dao = MySqlDao()

			
 
				+        self.save_dir = save_dir

			
 
				+        print("正在加载cust_info...")

			
 
				+        self._cust_data = self._mysql_dao.load_cust_data(city_uuid)

			
 
				+        print("正在加载product_info...")

			
 
				+        self._product_data = self._mysql_dao.load_product_data(city_uuid)

			
 
				+        print("正在加载order_info...")

			
 
				+        self._order_data = self._mysql_dao.load_order_data(city_uuid)

			
 
				+        # self._order_data = self._mysql_dao.load_mock_order_data()

			
 
				+        print("正在加载shopping_info...")

			
 
				+        self._shopping_data = self._mysql_dao.load_shopping_data(city_uuid)

			
 
				+        

			
 
				+    def data_process(self):

			
 
				+        """数据预处理"""

			
 
				+        train_data_save_path = os.path.join(self.save_dir, "train_data.csv")

			
 
				+        if os.path.exists(train_data_save_path):

			
 
				+            os.remove(train_data_save_path)

			
 
				+        

			
 
				+        # 1. 获取指定的特征组合

			
 
				+        self._cust_data = self._cust_data[CustConfig.FEATURE_COLUMNS]

			
 
				+        self._product_data = self._product_data[ProductConfig.FEATURE_COLUMNS]

			
 
				+        self._order_data = self._order_data[OrderConfig.FEATURE_COLUMNS]

			
 
				+        self._shopping_data = self._shopping_data[ShopConfig.FEATURE_COLUMNS]

			
 
				+        

			
 
				+        # 2. 数据清洗

			
 
				+        self._clean_cust_data()

			
 
				+        self._clean_product_data()

			
 
				+        self._clean_order_data()

			
 
				+        self._clean_shopping_data()

			
 
				+        

			
 
				+        # 3. 生成训练数据集

			
 
				+        train_data = self._generate_train_data()

			
 
				+        train_data.to_csv(train_data_save_path, index=False, encoding="utf-8")

			
 
				+        

			
 
				+

			
 
				+    def _clean_cust_data(self):

			
 
				+        """用户信息表数据清洗"""

			
 
				+        self._cust_data["BB_RETAIL_CUSTOMER_CODE"] = self._cust_data["BB_RETAIL_CUSTOMER_CODE"].astype(str)

			
 
				+        # 根据配置规则清洗数据

			
 
				+        for feature, rules, in CustConfig.CLEANING_RULES.items():

			
 
				+            if rules["type"] == "num":

			
 
				+                # 先将数值型字符串转换为数值

			
 
				+                self._cust_data[feature] = pd.to_numeric(self._cust_data[feature], errors="coerce")

			
 
				+                

			
 
				+            if rules["method"] == "fillna":

			
 
				+                if rules["opt"] == "fill":

			
 
				+                    self._cust_data[feature] = self._cust_data[feature].fillna(rules["value"]).infer_objects(copy=False)

			
 
				+                elif rules["opt"] == "replace":

			
 
				+                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[rules["value"]]).infer_objects(copy=False)

			
 
				+                elif rules["opt"] == "mean":

			
 
				+                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[feature].mean()).infer_objects(copy=False)

			
 
				+                self._cust_data[feature] = self._cust_data[feature].infer_objects(copy=False)

			
 
				+    

			
 
				+    def _clean_product_data(self):

			
 
				+        """卷烟信息表数据清洗"""

			
 
				+        self._product_data["product_code"] = self._product_data["product_code"].astype(str)

			
 
				+        for feature, rules, in ProductConfig.CLEANING_RULES.items():

			
 
				+            if rules["type"] == "num":

			
 
				+                self._product_data[feature] = pd.to_numeric(self._product_data[feature], errors="coerce")

			
 
				+            

			
 
				+            if rules["method"] == "fillna":

			
 
				+                if rules["opt"] == "fill":

			
 
				+                    self._product_data[feature] = self._product_data[feature].fillna(rules["value"]).infer_objects(copy=False)

			
 
				+                elif rules["opt"] == "mean":

			
 
				+                    self._product_data[feature] = self._product_data[feature].fillna(self._product_data[feature].mean()).infer_objects(copy=False)

			
 
				+                self._product_data[feature] = self._product_data[feature].infer_objects(copy=False)

			
 
				+                    

			
 
				+    def _clean_order_data(self):

			
 
				+        self._order_data["cust_code"] = self._order_data["cust_code"].astype(str)

			
 
				+        self._order_data["product_code"] = self._order_data["product_code"].astype(str)

			
 
				+        

			
 
				+        # self._order_data[order_cols.drop(col_all_missing)] = self._order_data[order_cols.drop(col_all_missing)].fillna(0)

			
 
				+        self._order_data["sale_qty"] = self._order_data["sale_qty"].fillna(0)

			
 
				+        self._order_data = self._order_data.infer_objects(copy=False)

			
 
				+        

			
 
				+        # 将销售量进行分组求和

			
 
				+        self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()

			
 
				+        

			
 
				+        

			
 
				+    def _clean_shopping_data(self):

			
 
				+        """处理商圈数据缺省值"""

			
 
				+        self._shopping_data["cust_code"] = self._shopping_data["cust_code"].astype(str)

			
 
				+        # 根据配置规则清洗数据

			
 
				+        for feature, rules, in ShopConfig.CLEANING_RULES.items():

			
 
				+            if rules["type"] == "num":

			
 
				+                # 先将数值型字符串转换为数值

			
 
				+                self._shopping_data[feature] = pd.to_numeric(self._shopping_data[feature], errors="coerce")

			
 
				+                

			
 
				+            if rules["method"] == "fillna":

			
 
				+                if rules["opt"] == "fill":

			
 
				+                    self._shopping_data[feature] = self._shopping_data[feature].fillna(rules["value"]).infer_objects(copy=False)

			
 
				+                elif rules["opt"] == "replace":

			
 
				+                    self._shopping_data[feature] = self._shopping_data[feature].fillna(self._shopping_data[rules["value"]]).infer_objects(copy=False)

			
 
				+                elif rules["opt"] == "mean":

			
 
				+                    self._shopping_data[feature] = self._shopping_data[feature].fillna(self._shopping_data[feature].mean()).infer_objects(copy=False)

			
 
				+                self._shopping_data[feature] = self._shopping_data[feature].infer_objects(copy=False)

			
 
				+    

			
 
				+    def _generate_train_data(self):

			
 
				+        """生成训练数据"""

			
 
				+        # 将商户表与商圈表进行连接

			
 
				+        cust_feats = self._shopping_data.set_index("cust_code")

			
 
				+        self._cust_data = self._cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")

			
 
				+        

			
 
				+        union_data = self._union_order_cust_product()

			
 
				+        

			
 
				+        train_data = self._labeled_data(union_data)

			
 
				+        

			
 
				+        return train_data

			
 
				+    

			
 
				+    def _union_order_cust_product(self):

			
 
				+        """联合order表、商户表、卷烟表"""

			
 
				+        # 使用merge进行连接

			
 
				+        union_data = self._order_data.merge(self._product_data, on="product_code", how="inner")

			
 
				+        union_data = union_data.merge(self._cust_data, left_on='cust_code', right_on='BB_RETAIL_CUSTOMER_CODE', how="inner")

			
 
				+        union_data = union_data.drop(columns=['BB_RETAIL_CUSTOMER_CODE'])

			
 
				+        

			
 
				+        return union_data

			
 
				+        

			
 
				+    def _labeled_data(self, union_data):

			
 
				+        union_data['label'] = union_data['sale_qty'].apply(lambda x: 0 if x == 0 else 1)

			
 
				+        train_data = union_data.drop(columns=['sale_qty'])

			
 
				+        train_data = shuffle(train_data, random_state=42)

			
 
				+        

			
 
				+        return train_data

			
 
				+    

			
 
				+if __name__ == '__main__':

			
 
				+    city_uuid = "00000000000000000000000011445301"

			
 
				+    # city_uuid = "00000000000000000000000011441801"

			
 
				+    save_dir = "./data"

			
 
				+    processor = DataProcess(city_uuid, save_dir)

			
 
				     processor.data_process()
			
--- a/models/rank/data/utils.py
+++ b/models/rank/data/utils.py
@@ -1,24 +1,24 @@
 
				-import pandas as pd
			
 
				-def one_hot_embedding(dataframe, onehout_feat):
			
 
				-    """对数据的指定特征做embedding编码"""
			
 
				-    # 先将指定的特征进行Categorical处理
			
 
				-    for feat, categories in onehout_feat.items():
			
 
				-        dataframe[feat] = pd.Categorical(dataframe[feat], categories=categories, ordered=False)
			
 
				-    dataframe = pd.get_dummies(
			
 
				-        dataframe,
			
 
				-        columns=list(onehout_feat.keys()),
			
 
				-        prefix_sep="_",
			
 
				-        dtype=int,
			
 
				-    )
			
 
				-    return dataframe
			
 
				-
			
 
				-def sample_data_clear(data, config):
			
 
				-    for feature, rules, in config.CLEANING_RULES.items():
			
 
				-        if rules["type"] == "num":
			
 
				-            data[feature] = pd.to_numeric(data[feature], errors="coerce")
			
 
				-        if rules["method"] == "fill":
			
 
				-            if rules["type"] == "str":
			
 
				-                data[feature] = data[feature].fillna(rules["value"])
			
 
				-            elif rules["type"] == "num":
			
 
				-                data[feature] = data[feature].fillna(0.0)
			
 
				+import pandas as pd

			
 
				+def one_hot_embedding(dataframe, onehout_feat):

			
 
				+    """对数据的指定特征做embedding编码"""

			
 
				+    # 先将指定的特征进行Categorical处理

			
 
				+    for feat, categories in onehout_feat.items():

			
 
				+        dataframe[feat] = pd.Categorical(dataframe[feat], categories=categories, ordered=False)

			
 
				+    dataframe = pd.get_dummies(

			
 
				+        dataframe,

			
 
				+        columns=list(onehout_feat.keys()),

			
 
				+        prefix_sep="_",

			
 
				+        dtype=int,

			
 
				+    )

			
 
				+    return dataframe

			
 
				+

			
 
				+def sample_data_clear(data, config):

			
 
				+    for feature, rules, in config.CLEANING_RULES.items():

			
 
				+        if rules["type"] == "num":

			
 
				+            data[feature] = pd.to_numeric(data[feature], errors="coerce")

			
 
				+        if rules["method"] == "fill":

			
 
				+            if rules["type"] == "str":

			
 
				+                data[feature] = data[feature].fillna(rules["value"])

			
 
				+            elif rules["type"] == "num":

			
 
				+                data[feature] = data[feature].fillna(0.0)

			
 
				     return data
			
--- a/models/rank/gbdt_lr.py
+++ b/models/rank/gbdt_lr.py
@@ -1,125 +1,125 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-import numpy as np
			
 
				-from models.rank.data import DataLoader
			
 
				-from sklearn.ensemble import GradientBoostingClassifier
			
 
				-from sklearn.linear_model import LogisticRegression
			
 
				-from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
			
 
				-from sklearn.model_selection import GridSearchCV
			
 
				-from sklearn.preprocessing import OneHotEncoder
			
 
				-import joblib
			
 
				-import time
			
 
				-
			
 
				-class Trainer:
			
 
				-    def __init__(self, path):
			
 
				-        self._load_data(path)
			
 
				-        
			
 
				-        # 初始化GBDT和LR模型参数
			
 
				-        self._gbdt_params = {
			
 
				-            'n_estimators': 100,
			
 
				-            'learning_rate': 0.01,
			
 
				-            'max_depth': 6,
			
 
				-            'subsample': 0.8,
			
 
				-            'random_state': 42,
			
 
				-        }
			
 
				-        self._lr_params = {
			
 
				-            "max_iter": 1000,
			
 
				-            'C': 1.0, 
			
 
				-            'penalty': 'elasticnet', 
			
 
				-            'l1_ratio': 0.8,  # 添加 l1_ratio 参数，可以根据需要调整
			
 
				-            'solver': 'saga',
			
 
				-            'random_state': 42,
			
 
				-            'class_weight': 'balanced'
			
 
				-        }
			
 
				-        
			
 
				-        # 初始化模型
			
 
				-        self._gbdt_model = GradientBoostingClassifier(**self._gbdt_params)
			
 
				-        self._lr_model = LogisticRegression(**self._lr_params)
			
 
				-        
			
 
				-        self._onehot_encoder = OneHotEncoder(sparse_output=True, handle_unknown='ignore')
			
 
				-        
			
 
				-    def _load_data(self, path):
			
 
				-        dataloader = DataLoader(path)
			
 
				-        self._train_dataset, self._test_dataset = dataloader.split_dataset()
			
 
				-        
			
 
				-    def train(self):
			
 
				-        """模型训练"""
			
 
				-        print("开始训练GBDT模型...")
			
 
				-        # 训练GBDT模型
			
 
				-        self._gbdt_model.fit(self._train_dataset["data"], self._train_dataset["label"])
			
 
				-        
			
 
				-        # 获取GBDT的每棵树的分数（决策值）
			
 
				-        gbdt_train_preds = self._gbdt_model.apply(self._train_dataset["data"])[:, :, 0]  # 仅取每棵树的叶节点输出
			
 
				-        
			
 
				-        gbdt_feats_encoded = self._onehot_encoder.fit_transform(gbdt_train_preds)
			
 
				-        
			
 
				-        print("开始训练LR模型...")
			
 
				-        # 使用决策树输出作为LR的输入特征
			
 
				-        self._lr_model.fit(gbdt_feats_encoded, self._train_dataset["label"])
			
 
				-        
			
 
				-    def predict(self, X):
			
 
				-        # 获取GBDT模型的预测分数
			
 
				-        gbdt_preds = self._gbdt_model.apply(X)[:, :, 0]
			
 
				-        
			
 
				-        gbdt_feats_encoded = self._onehot_encoder.transform(gbdt_preds)
			
 
				-        
			
 
				-        # 使用训练好的LR模型输出概率
			
 
				-        return self._lr_model.predict(gbdt_feats_encoded)
			
 
				-    
			
 
				-    def predict_proba(self, X):
			
 
				-        # 获取GBDT模型的预测分数
			
 
				-        gbdt_preds = self._gbdt_model.apply(X)[:, :, 0]
			
 
				-        
			
 
				-        gbdt_feats_encoded = self._onehot_encoder.transform(gbdt_preds)
			
 
				-        
			
 
				-        # 使用训练好的LR模型输出概率
			
 
				-        return self._lr_model.predict_proba(gbdt_feats_encoded)
			
 
				-        
			
 
				-    def evaluate(self):
			
 
				-        # 对测试集进行预测
			
 
				-        y_pred = self.predict(self._test_dataset["data"])
			
 
				-        y_pred_proba = self.predict_proba(self._test_dataset["data"])[:, 1]  # 获取正类的概率
			
 
				-        
			
 
				-        # 计算各类评估指标
			
 
				-        accuracy = accuracy_score(self._test_dataset["label"], y_pred)
			
 
				-        precision = precision_score(self._test_dataset["label"], y_pred)
			
 
				-        recall = recall_score(self._test_dataset["label"], y_pred)
			
 
				-        f1 = f1_score(self._test_dataset["label"], y_pred)
			
 
				-        roc_auc = roc_auc_score(self._test_dataset["label"], y_pred_proba)    
			
 
				-        
			
 
				-        return {
			
 
				-            'accuracy': accuracy,
			
 
				-            'precision': precision,
			
 
				-            'recall': recall,
			
 
				-            'f1_score': f1,
			
 
				-            'roc_auc': roc_auc
			
 
				-        }
			
 
				-        
			
 
				-    def save_model(self, model_path):
			
 
				-        """将模型保存到本地"""
			
 
				-        models = {"gbdt_model": self._gbdt_model, "lr_model": self._lr_model, "onehot_encoder": self._onehot_encoder}
			
 
				-        joblib.dump(models, model_path)
			
 
				-    
			
 
				-     
			
 
				-if __name__ == "__main__":
			
 
				-    gbdt_data_path = "./models/rank/data/gbdt_data.csv"
			
 
				-    trainer = Trainer(gbdt_data_path)
			
 
				-    
			
 
				-    start_time = time.time()
			
 
				-    trainer.train()
			
 
				-    end_time = time.time()
			
 
				-    
			
 
				-    training_time_hours = (end_time - start_time) / 3600
			
 
				-    print(f"训练时间: {training_time_hours:.4f} 小时")
			
 
				-    
			
 
				-    eval_metrics = trainer.evaluate()
			
 
				-    
			
 
				-    # 输出评估结果
			
 
				-    print("GBDT-LR Evaluation Metrics:")
			
 
				-    for metric, value in eval_metrics.items():
			
 
				-        print(f"{metric}: {value:.4f}")
			
 
				-        
			
 
				-    # 保存模型
			
 
				-    model_path = "./models/rank/weights/model.pkl"
			
 
				-    trainer.save_model(model_path)
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
 
				+import numpy as np

			
 
				+from models.rank.data import DataLoader

			
 
				+from sklearn.ensemble import GradientBoostingClassifier

			
 
				+from sklearn.linear_model import LogisticRegression

			
 
				+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

			
 
				+from sklearn.model_selection import GridSearchCV

			
 
				+from sklearn.preprocessing import OneHotEncoder

			
 
				+import joblib

			
 
				+import time

			
 
				+

			
 
				+class Trainer:

			
 
				+    def __init__(self, path):

			
 
				+        self._load_data(path)

			
 
				+        

			
 
				+        # 初始化GBDT和LR模型参数

			
 
				+        self._gbdt_params = {

			
 
				+            'n_estimators': 100,

			
 
				+            'learning_rate': 0.01,

			
 
				+            'max_depth': 6,

			
 
				+            'subsample': 0.8,

			
 
				+            'random_state': 42,

			
 
				+        }

			
 
				+        self._lr_params = {

			
 
				+            "max_iter": 1000,

			
 
				+            'C': 1.0, 

			
 
				+            'penalty': 'elasticnet', 

			
 
				+            'l1_ratio': 0.8,  # 添加 l1_ratio 参数，可以根据需要调整

			
 
				+            'solver': 'saga',

			
 
				+            'random_state': 42,

			
 
				+            'class_weight': 'balanced'

			
 
				+        }

			
 
				+        

			
 
				+        # 初始化模型

			
 
				+        self._gbdt_model = GradientBoostingClassifier(**self._gbdt_params)

			
 
				+        self._lr_model = LogisticRegression(**self._lr_params)

			
 
				+        

			
 
				+        self._onehot_encoder = OneHotEncoder(sparse_output=True, handle_unknown='ignore')

			
 
				+        

			
 
				+    def _load_data(self, path):

			
 
				+        dataloader = DataLoader(path)

			
 
				+        self._train_dataset, self._test_dataset = dataloader.split_dataset()

			
 
				+        

			
 
				+    def train(self):

			
 
				+        """模型训练"""

			
 
				+        print("开始训练GBDT模型...")

			
 
				+        # 训练GBDT模型

			
 
				+        self._gbdt_model.fit(self._train_dataset["data"], self._train_dataset["label"])

			
 
				+        

			
 
				+        # 获取GBDT的每棵树的分数（决策值）

			
 
				+        gbdt_train_preds = self._gbdt_model.apply(self._train_dataset["data"])[:, :, 0]  # 仅取每棵树的叶节点输出

			
 
				+        

			
 
				+        gbdt_feats_encoded = self._onehot_encoder.fit_transform(gbdt_train_preds)

			
 
				+        

			
 
				+        print("开始训练LR模型...")

			
 
				+        # 使用决策树输出作为LR的输入特征

			
 
				+        self._lr_model.fit(gbdt_feats_encoded, self._train_dataset["label"])

			
 
				+        

			
 
				+    def predict(self, X):

			
 
				+        # 获取GBDT模型的预测分数

			
 
				+        gbdt_preds = self._gbdt_model.apply(X)[:, :, 0]

			
 
				+        

			
 
				+        gbdt_feats_encoded = self._onehot_encoder.transform(gbdt_preds)

			
 
				+        

			
 
				+        # 使用训练好的LR模型输出概率

			
 
				+        return self._lr_model.predict(gbdt_feats_encoded)

			
 
				+    

			
 
				+    def predict_proba(self, X):

			
 
				+        # 获取GBDT模型的预测分数

			
 
				+        gbdt_preds = self._gbdt_model.apply(X)[:, :, 0]

			
 
				+        

			
 
				+        gbdt_feats_encoded = self._onehot_encoder.transform(gbdt_preds)

			
 
				+        

			
 
				+        # 使用训练好的LR模型输出概率

			
 
				+        return self._lr_model.predict_proba(gbdt_feats_encoded)

			
 
				+        

			
 
				+    def evaluate(self):

			
 
				+        # 对测试集进行预测

			
 
				+        y_pred = self.predict(self._test_dataset["data"])

			
 
				+        y_pred_proba = self.predict_proba(self._test_dataset["data"])[:, 1]  # 获取正类的概率

			
 
				+        

			
 
				+        # 计算各类评估指标

			
 
				+        accuracy = accuracy_score(self._test_dataset["label"], y_pred)

			
 
				+        precision = precision_score(self._test_dataset["label"], y_pred)

			
 
				+        recall = recall_score(self._test_dataset["label"], y_pred)

			
 
				+        f1 = f1_score(self._test_dataset["label"], y_pred)

			
 
				+        roc_auc = roc_auc_score(self._test_dataset["label"], y_pred_proba)    

			
 
				+        

			
 
				+        return {

			
 
				+            'accuracy': accuracy,

			
 
				+            'precision': precision,

			
 
				+            'recall': recall,

			
 
				+            'f1_score': f1,

			
 
				+            'roc_auc': roc_auc

			
 
				+        }

			
 
				+        

			
 
				+    def save_model(self, model_path):

			
 
				+        """将模型保存到本地"""

			
 
				+        models = {"gbdt_model": self._gbdt_model, "lr_model": self._lr_model, "onehot_encoder": self._onehot_encoder}

			
 
				+        joblib.dump(models, model_path)

			
 
				+    

			
 
				+     

			
 
				+if __name__ == "__main__":

			
 
				+    gbdt_data_path = "./data/train_data.csv"

			
 
				+    trainer = Trainer(gbdt_data_path)

			
 
				+    

			
 
				+    start_time = time.time()

			
 
				+    trainer.train()

			
 
				+    end_time = time.time()

			
 
				+    

			
 
				+    training_time_hours = (end_time - start_time) / 3600

			
 
				+    print(f"训练时间: {training_time_hours:.4f} 小时")

			
 
				+    

			
 
				+    eval_metrics = trainer.evaluate()

			
 
				+    

			
 
				+    # 输出评估结果

			
 
				+    print("GBDT-LR Evaluation Metrics:")

			
 
				+    for metric, value in eval_metrics.items():

			
 
				+        print(f"{metric}: {value:.4f}")

			
 
				+        

			
 
				+    # 保存模型

			
 
				+    model_path = "./models/rank/weights/model.pkl"

			
 
				+    trainer.save_model(model_path)

			
 
				     
			
--- a/models/rank/gbdt_lr_sort.py
+++ b/models/rank/gbdt_lr_sort.py
@@ -1,137 +1,137 @@
 
				-import joblib
			
 
				-# from dao import Redis, get_product_by_id, get_custs_by_ids, load_cust_data_from_mysql
			
 
				-from database import RedisDatabaseHelper, MySqlDao
			
 
				-from models.rank.data import ProductConfig, CustConfig, ImportanceFeaturesMap
			
 
				-from models.rank.data.utils import one_hot_embedding, sample_data_clear
			
 
				-import pandas as pd
			
 
				-from sklearn.preprocessing import StandardScaler
			
 
				-import os
			
 
				-
			
 
				-
			
 
				-class GbdtLrModel:
			
 
				-    def __init__(self, model_path):
			
 
				-        self.load_model(model_path)
			
 
				-        self.redis = RedisDatabaseHelper().redis
			
 
				-        self._mysql_dao = MySqlDao()
			
 
				-    
			
 
				-    def load_model(self, model_path):
			
 
				-        self._modelname = os.path.basename(model_path).split(".")[0]
			
 
				-        models = joblib.load(model_path)
			
 
				-        self.gbdt_model, self.lr_model, self.onehot_encoder = models["gbdt_model"], models["lr_model"], models["onehot_encoder"]
			
 
				-        
			
 
				-    
			
 
				-    # def get_recall_list(self, city_uuid, product_id):
			
 
				-    #     """根据卷烟id获取召回的商铺列表"""
			
 
				-    #     key = f"fc:{city_uuid}:{product_id}"
			
 
				-    #     self.recall_cust_list = self.redis.zrange(key, 0, -1, withscores=False)
			
 
				-    
			
 
				-    # def load_recall_data(self, city_uuid, product_id):
			
 
				-    #     self.product_data = self._mysql_dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
			
 
				-    #     self.custs_data = self._mysql_dao.get_cust_by_ids(city_uuid, self.recall_cust_list)[CustConfig.FEATURE_COLUMNS]
			
 
				-        
			
 
				-    def get_cust_and_product_data(self, city_uuid, product_id):
			
 
				-        """从商户数据库中获取指定城市所有商户的id"""
			
 
				-        self.product_data = self._mysql_dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
			
 
				-        self.custs_data = self._mysql_dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS]
			
 
				-    
			
 
				-    def generate_feats_map(self, city_uuid, product_id):
			
 
				-        """组合卷烟、商户特征矩阵"""
			
 
				-        # self.get_recall_list(city_uuid, product_id)
			
 
				-        # self.load_recall_data(city_uuid, product_id)
			
 
				-        
			
 
				-        self.get_cust_and_product_data(city_uuid, product_id)
			
 
				-        # 做数据清洗
			
 
				-        self.product_data = sample_data_clear(self.product_data, ProductConfig)
			
 
				-        self.custs_data = sample_data_clear(self.custs_data, CustConfig)
			
 
				-        
			
 
				-        # 笛卡尔积联合
			
 
				-        self.custs_data["descartes"] = 1
			
 
				-        self.product_data["descartes"] = 1
			
 
				-        self.feats_map = pd.merge(self.custs_data, self.product_data, on="descartes").drop("descartes", axis=1)
			
 
				-        self.recall_cust_list = self.feats_map["BB_RETAIL_CUSTOMER_CODE"].to_list()
			
 
				-        self.feats_map.drop('BB_RETAIL_CUSTOMER_CODE', axis=1, inplace=True)
			
 
				-        self.feats_map.drop('product_code', axis=1, inplace=True)
			
 
				-        
			
 
				-        # onehot编码
			
 
				-        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}
			
 
				-        onehot_columns = list(onehot_feats.keys())
			
 
				-        numeric_columns = self.feats_map.drop(onehot_columns, axis=1).columns
			
 
				-        self.feats_map = one_hot_embedding(self.feats_map, onehot_feats)
			
 
				-        
			
 
				-        # 数字特征归一化
			
 
				-        scaler = StandardScaler()
			
 
				-        self.feats_map[numeric_columns] = scaler.fit_transform(self.feats_map[numeric_columns])
			
 
				-    
			
 
				-    def sort(self, city_uuid, product_id):
			
 
				-        self.generate_feats_map(city_uuid, product_id)
			
 
				-        
			
 
				-        gbdt_preds = self.gbdt_model.apply(self.feats_map)[:, :, 0]
			
 
				-        gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
			
 
				-        scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1]
			
 
				-        
			
 
				-        self.recommend_list = []
			
 
				-        for cust_id, score in zip(self.recall_cust_list, scores):
			
 
				-            self.recommend_list.append({cust_id: float(score)})
			
 
				-            
			
 
				-        self.recommend_list = sorted(self.recommend_list, key=lambda x: list(x.values())[0], reverse=True)
			
 
				-        # for res in self.recommend_list[:200]:
			
 
				-        #     print(res)
			
 
				-        return self.recommend_list
			
 
				-    
			
 
				-    def generate_feats_importance(self):
			
 
				-        """生成特征重要性"""
			
 
				-        # 获取GBDT模型的特征重要性
			
 
				-        feats_importance = self.gbdt_model.feature_importances_
			
 
				-        
			
 
				-        # 获取特征名称
			
 
				-        feats_names = self.gbdt_model.feature_names_in_
			
 
				-        
			
 
				-        importance_dict = dict(zip(feats_names, feats_importance))
			
 
				-        
			
 
				-        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}
			
 
				-        for feat, categories in onehot_feats.items():
			
 
				-            related_columns = [col for col in feats_names if col.startswith(feat)]
			
 
				-            if related_columns:
			
 
				-                # 合并类别重要性
			
 
				-                combined_importance = sum(importance_dict[col] for col in related_columns)
			
 
				-                # 删除onehot类别列
			
 
				-                for col in related_columns:
			
 
				-                    del importance_dict[col]
			
 
				-                # 添加合并后的重要性
			
 
				-                importance_dict[feat] = combined_importance
			
 
				-        
			
 
				-        # 排序
			
 
				-        sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)
			
 
				-        
			
 
				-        # 输出特征重要性
			
 
				-        cust_features_importance = []
			
 
				-        product_features_importance = []
			
 
				-        order_features_importance = []
			
 
				-        
			
 
				-        for feat, importance in sorted_importance:
			
 
				-            if feat in list(ImportanceFeaturesMap.CUSTOM_FEATRUES_MAP.keys()):
			
 
				-                cust_features_importance.append({ImportanceFeaturesMap.CUSTOM_FEATRUES_MAP[feat]: float(importance)})
			
 
				-            if feat in list(ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP.keys()):
			
 
				-                product_features_importance.append({ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[feat]: float(importance)})
			
 
				-            if feat in list(ImportanceFeaturesMap.ORDER_FEATURE_MAP.keys()):
			
 
				-                order_features_importance.append({ImportanceFeaturesMap.ORDER_FEATURE_MAP[feat]: float(importance)})
			
 
				-                
			
 
				-            # 零消特征重要性
			
 
				-            if self._modelname == 'pos_model' and feat in list(ImportanceFeaturesMap.POS_FEATURE_MAP.keys()):
			
 
				-                order_features_importance.append({ImportanceFeaturesMap.POS_FEATURE_MAP[feat]: float(importance)})
			
 
				-                
			
 
				-            # 商圈特征重要性
			
 
				-            if self._modelname == 'shopping_model' and feat in list(ImportanceFeaturesMap.SHOPING_FEATURES_MAP.keys()):
			
 
				-                cust_features_importance.append({ImportanceFeaturesMap.SHOPING_FEATURES_MAP[feat]: float(importance)})
			
 
				-        return cust_features_importance, product_features_importance, order_features_importance
			
 
				-    
			
 
				-if __name__ == "__main__":
			
 
				-    model_path = "./models/rank/weights/00000000000000000000000011445301/shopping_model.pkl"
			
 
				-    city_uuid = "00000000000000000000000011445301"
			
 
				-    product_id = "110102"
			
 
				-    gbdt_sort = GbdtLrModel(model_path)
			
 
				-    # gbdt_sort.sort(city_uuid, product_id)
			
 
				-    
			
 
				-    importances = gbdt_sort.generate_feats_importance()
			
 
				-    for importance in importances:
			
 
				+import joblib

			
 
				+# from dao import Redis, get_product_by_id, get_custs_by_ids, load_cust_data_from_mysql

			
 
				+from database import RedisDatabaseHelper, MySqlDao

			
 
				+from models.rank.data import ProductConfig, CustConfig, ImportanceFeaturesMap

			
 
				+from models.rank.data.utils import one_hot_embedding, sample_data_clear

			
 
				+import pandas as pd

			
 
				+from sklearn.preprocessing import StandardScaler

			
 
				+import os

			
 
				+

			
 
				+

			
 
				+class GbdtLrModel:

			
 
				+    def __init__(self, model_path):

			
 
				+        self.load_model(model_path)

			
 
				+        self.redis = RedisDatabaseHelper().redis

			
 
				+        self._mysql_dao = MySqlDao()

			
 
				+    

			
 
				+    def load_model(self, model_path):

			
 
				+        self._modelname = os.path.basename(model_path).split(".")[0]

			
 
				+        models = joblib.load(model_path)

			
 
				+        self.gbdt_model, self.lr_model, self.onehot_encoder = models["gbdt_model"], models["lr_model"], models["onehot_encoder"]

			
 
				+        

			
 
				+    

			
 
				+    # def get_recall_list(self, city_uuid, product_id):

			
 
				+    #     """根据卷烟id获取召回的商铺列表"""

			
 
				+    #     key = f"fc:{city_uuid}:{product_id}"

			
 
				+    #     self.recall_cust_list = self.redis.zrange(key, 0, -1, withscores=False)

			
 
				+    

			
 
				+    # def load_recall_data(self, city_uuid, product_id):

			
 
				+    #     self.product_data = self._mysql_dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]

			
 
				+    #     self.custs_data = self._mysql_dao.get_cust_by_ids(city_uuid, self.recall_cust_list)[CustConfig.FEATURE_COLUMNS]

			
 
				+        

			
 
				+    def get_cust_and_product_data(self, city_uuid, product_id):

			
 
				+        """从商户数据库中获取指定城市所有商户的id"""

			
 
				+        self.product_data = self._mysql_dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]

			
 
				+        self.custs_data = self._mysql_dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS]

			
 
				+    

			
 
				+    def generate_feats_map(self, city_uuid, product_id):

			
 
				+        """组合卷烟、商户特征矩阵"""

			
 
				+        # self.get_recall_list(city_uuid, product_id)

			
 
				+        # self.load_recall_data(city_uuid, product_id)

			
 
				+        

			
 
				+        self.get_cust_and_product_data(city_uuid, product_id)

			
 
				+        # 做数据清洗

			
 
				+        self.product_data = sample_data_clear(self.product_data, ProductConfig)

			
 
				+        self.custs_data = sample_data_clear(self.custs_data, CustConfig)

			
 
				+        

			
 
				+        # 笛卡尔积联合

			
 
				+        self.custs_data["descartes"] = 1

			
 
				+        self.product_data["descartes"] = 1

			
 
				+        self.feats_map = pd.merge(self.custs_data, self.product_data, on="descartes").drop("descartes", axis=1)

			
 
				+        self.recall_cust_list = self.feats_map["BB_RETAIL_CUSTOMER_CODE"].to_list()

			
 
				+        self.feats_map.drop('BB_RETAIL_CUSTOMER_CODE', axis=1, inplace=True)

			
 
				+        self.feats_map.drop('product_code', axis=1, inplace=True)

			
 
				+        

			
 
				+        # onehot编码

			
 
				+        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}

			
 
				+        onehot_columns = list(onehot_feats.keys())

			
 
				+        numeric_columns = self.feats_map.drop(onehot_columns, axis=1).columns

			
 
				+        self.feats_map = one_hot_embedding(self.feats_map, onehot_feats)

			
 
				+        

			
 
				+        # 数字特征归一化

			
 
				+        scaler = StandardScaler()

			
 
				+        self.feats_map[numeric_columns] = scaler.fit_transform(self.feats_map[numeric_columns])

			
 
				+    

			
 
				+    def sort(self, city_uuid, product_id):

			
 
				+        self.generate_feats_map(city_uuid, product_id)

			
 
				+        

			
 
				+        gbdt_preds = self.gbdt_model.apply(self.feats_map)[:, :, 0]

			
 
				+        gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)

			
 
				+        scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1]

			
 
				+        

			
 
				+        self.recommend_list = []

			
 
				+        for cust_id, score in zip(self.recall_cust_list, scores):

			
 
				+            self.recommend_list.append({cust_id: float(score)})

			
 
				+            

			
 
				+        self.recommend_list = sorted(self.recommend_list, key=lambda x: list(x.values())[0], reverse=True)

			
 
				+        # for res in self.recommend_list[:200]:

			
 
				+        #     print(res)

			
 
				+        return self.recommend_list

			
 
				+    

			
 
				+    def generate_feats_importance(self):

			
 
				+        """生成特征重要性"""

			
 
				+        # 获取GBDT模型的特征重要性

			
 
				+        feats_importance = self.gbdt_model.feature_importances_

			
 
				+        

			
 
				+        # 获取特征名称

			
 
				+        feats_names = self.gbdt_model.feature_names_in_

			
 
				+        

			
 
				+        importance_dict = dict(zip(feats_names, feats_importance))

			
 
				+        

			
 
				+        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}

			
 
				+        for feat, categories in onehot_feats.items():

			
 
				+            related_columns = [col for col in feats_names if col.startswith(feat)]

			
 
				+            if related_columns:

			
 
				+                # 合并类别重要性

			
 
				+                combined_importance = sum(importance_dict[col] for col in related_columns)

			
 
				+                # 删除onehot类别列

			
 
				+                for col in related_columns:

			
 
				+                    del importance_dict[col]

			
 
				+                # 添加合并后的重要性

			
 
				+                importance_dict[feat] = combined_importance

			
 
				+        

			
 
				+        # 排序

			
 
				+        sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)

			
 
				+        

			
 
				+        # 输出特征重要性

			
 
				+        cust_features_importance = []

			
 
				+        product_features_importance = []

			
 
				+        order_features_importance = []

			
 
				+        

			
 
				+        for feat, importance in sorted_importance:

			
 
				+            if feat in list(ImportanceFeaturesMap.CUSTOM_FEATRUES_MAP.keys()):

			
 
				+                cust_features_importance.append({ImportanceFeaturesMap.CUSTOM_FEATRUES_MAP[feat]: float(importance)})

			
 
				+            if feat in list(ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP.keys()):

			
 
				+                product_features_importance.append({ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[feat]: float(importance)})

			
 
				+            if feat in list(ImportanceFeaturesMap.ORDER_FEATURE_MAP.keys()):

			
 
				+                order_features_importance.append({ImportanceFeaturesMap.ORDER_FEATURE_MAP[feat]: float(importance)})

			
 
				+                

			
 
				+            # 零消特征重要性

			
 
				+            if self._modelname == 'pos_model' and feat in list(ImportanceFeaturesMap.POS_FEATURE_MAP.keys()):

			
 
				+                order_features_importance.append({ImportanceFeaturesMap.POS_FEATURE_MAP[feat]: float(importance)})

			
 
				+                

			
 
				+            # 商圈特征重要性

			
 
				+            if self._modelname == 'shopping_model' and feat in list(ImportanceFeaturesMap.SHOPING_FEATURES_MAP.keys()):

			
 
				+                cust_features_importance.append({ImportanceFeaturesMap.SHOPING_FEATURES_MAP[feat]: float(importance)})

			
 
				+        return cust_features_importance, product_features_importance, order_features_importance

			
 
				+    

			
 
				+if __name__ == "__main__":

			
 
				+    model_path = "./models/rank/weights/00000000000000000000000011445301/shopping_model.pkl"

			
 
				+    city_uuid = "00000000000000000000000011445301"

			
 
				+    product_id = "110102"

			
 
				+    gbdt_sort = GbdtLrModel(model_path)

			
 
				+    # gbdt_sort.sort(city_uuid, product_id)

			
 
				+    

			
 
				+    importances = gbdt_sort.generate_feats_importance()

			
 
				+    for importance in importances:

			
 
				         print(importance)
			
--- a/models/recall/hot_recall.py
+++ b/models/recall/hot_recall.py
@@ -1,77 +1,77 @@
 
				-#!/usr/bin/env python
			
 
				-# -*- encoding: utf-8 -*-
			
 
				-'''
			
 
				-@filename     : hot_recall.py
			
 
				-@description     : 热度召回算法   
			
 
				-@time     : 2025/01/21/00
			
 
				-@author     : Sherlock1011 & Min1027
			
 
				-@Version     : 1.0
			
 
				-'''
			
 
				-import pandas as pd
			
 
				-from database import RedisDatabaseHelper
			
 
				-from tqdm import tqdm
			
 
				-
			
 
				-class HotRecallModel:
			
 
				-    def __init__(self, order_data):
			
 
				-        self._redis_db = RedisDatabaseHelper()
			
 
				-        self._hotkeys = self.get_hotkeys()
			
 
				-        self._order_data = order_data
			
 
				-
			
 
				-
			
 
				-    def get_hotkeys(self):
			
 
				-        info = self._redis_db.redis.zrange("configs:hotkeys", 0, -1, withscores=True)
			
 
				-        hotkeys = []
			
 
				-        for item, _ in info:
			
 
				-            hotkeys.append(item)
			
 
				-        return hotkeys
			
 
				-        
			
 
				-    def _calculate_hot_score(self, hot_name):
			
 
				-        """
			
 
				-        根据热度指标计算热度得分
			
 
				-        :param hot_name: 热度指标A
			
 
				-        :type param: string
			
 
				-        :return: 所有热度指标的得分
			
 
				-        :rtype: list
			
 
				-        """
			
 
				-        results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()
			
 
				-        sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)
			
 
				-        item_hot_score = []
			
 
				-        # mock热度召回最大分数
			
 
				-        max_score = 1.0
			
 
				-        total_score = sorted_results.loc[0, hot_name] / max_score
			
 
				-        for row in sorted_results.itertuples(index=True, name="Row"):
			
 
				-            item = {row[1]:(row[2]/total_score)*100}
			
 
				-            item_hot_score.append(item)
			
 
				-        return {"key":f"{hot_name}", "value":item_hot_score}
			
 
				-
			
 
				-    def calculate_all_hot_score(self, city_uuid):
			
 
				-        """
			
 
				-        计算所有的热度指标得分
			
 
				-        """
			
 
				-        # hot_datas = []
			
 
				-        for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):
			
 
				-            self.to_redis(self._calculate_hot_score(hotkey_name), city_uuid)
			
 
				-
			
 
				-    def to_redis(self, rec_content_score, city_uuid):
			
 
				-        hotkey_name = rec_content_score["key"]
			
 
				-        rec_item_id = f"hot:{city_uuid}:{str(hotkey_name)}" # 修正 rec_item_id 拼接方式
			
 
				-        print("自动清除历史id前数量", self._redis_db.redis.zcard(rec_item_id))
			
 
				-        # 清空 sorted set 数据，确保不会影响后续的存储
			
 
				-        self._redis_db.redis.delete(rec_item_id)
			
 
				-        print("自动清除历史id后数量", self._redis_db.redis.zcard(rec_item_id))
			
 
				-         
			
 
				-        res = {}
			
 
				-
			
 
				-        for item in rec_content_score["value"]:  
			
 
				-            for content, score in item.items():  # item 形如 {A001: 75.0}
			
 
				-                res[content] = float(score)  # 确保 score 是 float 类型
			
 
				-
			
 
				-        if res:  # 只有当 res 不为空时才执行 zadd
			
 
				-            self._redis_db.redis.zadd(rec_item_id, res)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    # 序列化
			
 
				-    model = HotRecallModel()
			
 
				-    model.calculate_all_hot_score()
			
 
				-    # joblib.dump(model, "hot_recall.model")
			
 
				+#!/usr/bin/env python

			
 
				+# -*- encoding: utf-8 -*-

			
 
				+'''

			
 
				+@filename     : hot_recall.py

			
 
				+@description     : 热度召回算法   

			
 
				+@time     : 2025/01/21/00

			
 
				+@author     : Sherlock1011 & Min1027

			
 
				+@Version     : 1.0

			
 
				+'''

			
 
				+import pandas as pd

			
 
				+from database import RedisDatabaseHelper

			
 
				+from tqdm import tqdm

			
 
				+

			
 
				+class HotRecallModel:

			
 
				+    def __init__(self, order_data):

			
 
				+        self._redis_db = RedisDatabaseHelper()

			
 
				+        self._hotkeys = self.get_hotkeys()

			
 
				+        self._order_data = order_data

			
 
				+

			
 
				+

			
 
				+    def get_hotkeys(self):

			
 
				+        info = self._redis_db.redis.zrange("configs:hotkeys", 0, -1, withscores=True)

			
 
				+        hotkeys = []

			
 
				+        for item, _ in info:

			
 
				+            hotkeys.append(item)

			
 
				+        return hotkeys

			
 
				+        

			
 
				+    def _calculate_hot_score(self, hot_name):

			
 
				+        """

			
 
				+        根据热度指标计算热度得分

			
 
				+        :param hot_name: 热度指标A

			
 
				+        :type param: string

			
 
				+        :return: 所有热度指标的得分

			
 
				+        :rtype: list

			
 
				+        """

			
 
				+        results = self._order_data.groupby("BB_RETAIL_CUSTOMER_CODE")[hot_name].mean().reset_index()

			
 
				+        sorted_results = results.sort_values(by=hot_name, ascending=False).reset_index(drop=True)

			
 
				+        item_hot_score = []

			
 
				+        # mock热度召回最大分数

			
 
				+        max_score = 1.0

			
 
				+        total_score = sorted_results.loc[0, hot_name] / max_score

			
 
				+        for row in sorted_results.itertuples(index=True, name="Row"):

			
 
				+            item = {row[1]:(row[2]/total_score)*100}

			
 
				+            item_hot_score.append(item)

			
 
				+        return {"key":f"{hot_name}", "value":item_hot_score}

			
 
				+

			
 
				+    def calculate_all_hot_score(self, city_uuid):

			
 
				+        """

			
 
				+        计算所有的热度指标得分

			
 
				+        """

			
 
				+        # hot_datas = []

			
 
				+        for hotkey_name in tqdm(self._hotkeys, desc="hot_recall:正在计算热度分数"):

			
 
				+            self.to_redis(self._calculate_hot_score(hotkey_name), city_uuid)

			
 
				+

			
 
				+    def to_redis(self, rec_content_score, city_uuid):

			
 
				+        hotkey_name = rec_content_score["key"]

			
 
				+        rec_item_id = f"hot:{city_uuid}:{str(hotkey_name)}" # 修正 rec_item_id 拼接方式

			
 
				+        print("自动清除历史id前数量", self._redis_db.redis.zcard(rec_item_id))

			
 
				+        # 清空 sorted set 数据，确保不会影响后续的存储

			
 
				+        self._redis_db.redis.delete(rec_item_id)

			
 
				+        print("自动清除历史id后数量", self._redis_db.redis.zcard(rec_item_id))

			
 
				+         

			
 
				+        res = {}

			
 
				+

			
 
				+        for item in rec_content_score["value"]:  

			
 
				+            for content, score in item.items():  # item 形如 {A001: 75.0}

			
 
				+                res[content] = float(score)  # 确保 score 是 float 类型

			
 
				+

			
 
				+        if res:  # 只有当 res 不为空时才执行 zadd

			
 
				+            self._redis_db.redis.zadd(rec_item_id, res)

			
 
				+

			
 
				+

			
 
				+if __name__ == "__main__":

			
 
				+    # 序列化

			
 
				+    model = HotRecallModel()

			
 
				+    model.calculate_all_hot_score()

			
 
				+    # joblib.dump(model, "hot_recall.model")

			
--- a/models/recall/item2vec.py
+++ b/models/recall/item2vec.py
@@ -1,120 +1,120 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-import gensim
			
 
				-from dao.mysql_client import Mysql
			
 
				-
			
 
				-class Item2Vec(object):
			
 
				-    def __init__(self):
			
 
				-        mysql_client = Mysql()
			
 
				-        # 创建会话
			
 
				-        self.session = mysql_client.create_session()
			
 
				-def load_item_sequences_from_mysql():
			
 
				-    try:
			
 
				-        conn = mysql.connector.connect(
			
 
				-            host='localhost',
			
 
				-            user='your_username',
			
 
				-            password='your_password',
			
 
				-            database='your_database'
			
 
				-        )
			
 
				-        cursor = conn.cursor()
			
 
				-        query = "SELECT user_id, sequence FROM item_sequences"
			
 
				-        cursor.execute(query)
			
 
				-        for row in cursor:
			
 
				-            user_id, sequence_str = row
			
 
				-            sequence = sequence_str.split(',')
			
 
				-            yield user_id, sequence
			
 
				-        cursor.close()
			
 
				-        conn.close()
			
 
				-    except mysql.connector.Error as err:
			
 
				-        print(f"数据库连接或查询出错: {err}")
			
 
				-
			
 
				-
			
 
				-def load_item_attributes_from_mysql():
			
 
				-    try:
			
 
				-        conn = mysql.connector.connect(
			
 
				-            host='localhost',
			
 
				-            user='your_username',
			
 
				-            password='your_password',
			
 
				-            database='your_database'
			
 
				-        )
			
 
				-        cursor = conn.cursor()
			
 
				-        query = "SELECT item, attributes FROM item_attributes"
			
 
				-        cursor.execute(query)
			
 
				-        item_attributes = {}
			
 
				-        for item, attributes_str in cursor:
			
 
				-            attributes = attributes_str.split(',')
			
 
				-            item_attributes[item] = attributes
			
 
				-        cursor.close()
			
 
				-        conn.close()
			
 
				-        return item_attributes
			
 
				-    except mysql.connector.Error as err:
			
 
				-        print(f"数据库连接或查询出错: {err}")
			
 
				-
			
 
				-
			
 
				-def load_user_attributes_from_mysql():
			
 
				-    try:
			
 
				-        conn = mysql.connector.connect(
			
 
				-            host='localhost',
			
 
				-            user='your_username',
			
 
				-            password='your_password',
			
 
				-            database='your_database'
			
 
				-        )
			
 
				-        cursor = conn.cursor()
			
 
				-        query = "SELECT user_id, taste, cigarette_length, cigarette_type, packaging_color FROM user_attributes"
			
 
				-        cursor.execute(query)
			
 
				-        for row in cursor:
			
 
				-            user_id, taste, cigarette_length, cigarette_type, packaging_color = row
			
 
				-            user_attrs = [attr for attr in [taste, cigarette_length, cigarette_type, packaging_color] if attr]
			
 
				-            yield user_id, user_attrs
			
 
				-        cursor.close()
			
 
				-        conn.close()
			
 
				-    except mysql.connector.Error as err:
			
 
				-        print(f"数据库连接或查询出错: {err}")
			
 
				-
			
 
				-
			
 
				-def combine_user_item_attributes(item_sequences, item_attributes):
			
 
				-    user_attributes = {user_id: attrs for user_id, attrs in load_user_attributes_from_mysql()}
			
 
				-    for user_id, sequence in item_sequences:
			
 
				-        user_attrs = user_attributes.get(user_id, [])
			
 
				-        combined_sequence = user_attrs.copy()
			
 
				-        for item in sequence:
			
 
				-            combined_sequence.append(item)
			
 
				-            combined_sequence.extend(item_attributes.get(item, []))
			
 
				-        yield combined_sequence
			
 
				-
			
 
				-
			
 
				-def train_item2vec(item_sequences, vector_size=100, window=5, min_count=10, workers=4):
			
 
				-    model = gensim.models.Word2Vec(sentences=item_sequences, vector_size=vector_size, window=window,
			
 
				-                                   min_count=min_count, workers=workers)
			
 
				-    return model
			
 
				-
			
 
				-
			
 
				-def get_item_vector(item, model):
			
 
				-    try:
			
 
				-        return model.wv[item]
			
 
				-    except KeyError:
			
 
				-        print(f"物品 {item} 未在模型中找到。")
			
 
				-        return None
			
 
				-
			
 
				-
			
 
				-def find_similar_items(item, model, topn=5):
			
 
				-    try:
			
 
				-        similar_items = model.wv.most_similar(item, topn=topn)
			
 
				-        filtered_similar_items = [(item, score) for item, score in similar_items if not item.startswith(('attr', 'user_'))]
			
 
				-        return filtered_similar_items
			
 
				-    except KeyError:
			
 
				-        print(f"物品 {item} 未在模型中找到。")
			
 
				-        return None
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    item_sequences = load_item_sequences_from_mysql()
			
 
				-    item_attributes = load_item_attributes_from_mysql()
			
 
				-    combined_sequences = combine_user_item_attributes(item_sequences, item_attributes)
			
 
				-    item2vec_model = train_item2vec(combined_sequences)
			
 
				-    item_vector = get_item_vector('item1', item2vec_model)
			
 
				-    if item_vector is not None:
			
 
				-        print(f"物品 'item1' 的向量表示: {item_vector}")
			
 
				-    similar_items = find_similar_items('item1', item2vec_model, topn=3)
			
 
				-    if similar_items is not None:
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
 
				+import gensim

			
 
				+from dao.mysql_client import Mysql

			
 
				+

			
 
				+class Item2Vec(object):

			
 
				+    def __init__(self):

			
 
				+        mysql_client = Mysql()

			
 
				+        # 创建会话

			
 
				+        self.session = mysql_client.create_session()

			
 
				+def load_item_sequences_from_mysql():

			
 
				+    try:

			
 
				+        conn = mysql.connector.connect(

			
 
				+            host='localhost',

			
 
				+            user='your_username',

			
 
				+            password='your_password',

			
 
				+            database='your_database'

			
 
				+        )

			
 
				+        cursor = conn.cursor()

			
 
				+        query = "SELECT user_id, sequence FROM item_sequences"

			
 
				+        cursor.execute(query)

			
 
				+        for row in cursor:

			
 
				+            user_id, sequence_str = row

			
 
				+            sequence = sequence_str.split(',')

			
 
				+            yield user_id, sequence

			
 
				+        cursor.close()

			
 
				+        conn.close()

			
 
				+    except mysql.connector.Error as err:

			
 
				+        print(f"数据库连接或查询出错: {err}")

			
 
				+

			
 
				+

			
 
				+def load_item_attributes_from_mysql():

			
 
				+    try:

			
 
				+        conn = mysql.connector.connect(

			
 
				+            host='localhost',

			
 
				+            user='your_username',

			
 
				+            password='your_password',

			
 
				+            database='your_database'

			
 
				+        )

			
 
				+        cursor = conn.cursor()

			
 
				+        query = "SELECT item, attributes FROM item_attributes"

			
 
				+        cursor.execute(query)

			
 
				+        item_attributes = {}

			
 
				+        for item, attributes_str in cursor:

			
 
				+            attributes = attributes_str.split(',')

			
 
				+            item_attributes[item] = attributes

			
 
				+        cursor.close()

			
 
				+        conn.close()

			
 
				+        return item_attributes

			
 
				+    except mysql.connector.Error as err:

			
 
				+        print(f"数据库连接或查询出错: {err}")

			
 
				+

			
 
				+

			
 
				+def load_user_attributes_from_mysql():

			
 
				+    try:

			
 
				+        conn = mysql.connector.connect(

			
 
				+            host='localhost',

			
 
				+            user='your_username',

			
 
				+            password='your_password',

			
 
				+            database='your_database'

			
 
				+        )

			
 
				+        cursor = conn.cursor()

			
 
				+        query = "SELECT user_id, taste, cigarette_length, cigarette_type, packaging_color FROM user_attributes"

			
 
				+        cursor.execute(query)

			
 
				+        for row in cursor:

			
 
				+            user_id, taste, cigarette_length, cigarette_type, packaging_color = row

			
 
				+            user_attrs = [attr for attr in [taste, cigarette_length, cigarette_type, packaging_color] if attr]

			
 
				+            yield user_id, user_attrs

			
 
				+        cursor.close()

			
 
				+        conn.close()

			
 
				+    except mysql.connector.Error as err:

			
 
				+        print(f"数据库连接或查询出错: {err}")

			
 
				+

			
 
				+

			
 
				+def combine_user_item_attributes(item_sequences, item_attributes):

			
 
				+    user_attributes = {user_id: attrs for user_id, attrs in load_user_attributes_from_mysql()}

			
 
				+    for user_id, sequence in item_sequences:

			
 
				+        user_attrs = user_attributes.get(user_id, [])

			
 
				+        combined_sequence = user_attrs.copy()

			
 
				+        for item in sequence:

			
 
				+            combined_sequence.append(item)

			
 
				+            combined_sequence.extend(item_attributes.get(item, []))

			
 
				+        yield combined_sequence

			
 
				+

			
 
				+

			
 
				+def train_item2vec(item_sequences, vector_size=100, window=5, min_count=10, workers=4):

			
 
				+    model = gensim.models.Word2Vec(sentences=item_sequences, vector_size=vector_size, window=window,

			
 
				+                                   min_count=min_count, workers=workers)

			
 
				+    return model

			
 
				+

			
 
				+

			
 
				+def get_item_vector(item, model):

			
 
				+    try:

			
 
				+        return model.wv[item]

			
 
				+    except KeyError:

			
 
				+        print(f"物品 {item} 未在模型中找到。")

			
 
				+        return None

			
 
				+

			
 
				+

			
 
				+def find_similar_items(item, model, topn=5):

			
 
				+    try:

			
 
				+        similar_items = model.wv.most_similar(item, topn=topn)

			
 
				+        filtered_similar_items = [(item, score) for item, score in similar_items if not item.startswith(('attr', 'user_'))]

			
 
				+        return filtered_similar_items

			
 
				+    except KeyError:

			
 
				+        print(f"物品 {item} 未在模型中找到。")

			
 
				+        return None

			
 
				+

			
 
				+

			
 
				+if __name__ == "__main__":

			
 
				+    item_sequences = load_item_sequences_from_mysql()

			
 
				+    item_attributes = load_item_attributes_from_mysql()

			
 
				+    combined_sequences = combine_user_item_attributes(item_sequences, item_attributes)

			
 
				+    item2vec_model = train_item2vec(combined_sequences)

			
 
				+    item_vector = get_item_vector('item1', item2vec_model)

			
 
				+    if item_vector is not None:

			
 
				+        print(f"物品 'item1' 的向量表示: {item_vector}")

			
 
				+    similar_items = find_similar_items('item1', item2vec_model, topn=3)

			
 
				+    if similar_items is not None:

			
 
				         print(f"与物品 'item1' 最相似的 3 个物品: {similar_items}")
			
--- a/models/recall/itemCF/ItemCF.py
+++ b/models/recall/itemCF/ItemCF.py
@@ -1,103 +1,103 @@
 
				-from database import RedisDatabaseHelper
			
 
				-import pandas as pd
			
 
				-import numpy as np
			
 
				-from tqdm import tqdm
			
 
				-from scipy.sparse import csr_matrix
			
 
				-from joblib import Parallel, delayed
			
 
				-import joblib
			
 
				-
			
 
				-class ItemCFModel:
			
 
				-    def __init__(self):
			
 
				-        self._recommendations = {}
			
 
				-        
			
 
				-    def train(self, score_path, similatity_path, city_uuid, n=100, k=10, top_n=100, n_jobs=4):
			
 
				-        self._score_df = pd.read_csv(score_path)
			
 
				-        self._similarity_df = pd.read_csv(similatity_path, index_col=0)
			
 
				-        self._similarity_matrix = csr_matrix(self._similarity_df.values)
			
 
				-        self._shop_index = {shop: idx for idx, shop in enumerate(self._similarity_df.index)}
			
 
				-        self._index_shop = {idx: shop for idx, shop in enumerate(self._similarity_df.index)}
			
 
				-        
			
 
				-        def process_product(product_code, scores):
			
 
				-            # 获取热度最高的n个商户
			
 
				-            top_n_shops = scores.nlargest(n, "SCORE")["BB_RETAIL_CUSTOMER_CODE"].values
			
 
				-            top_n_indices = [self._shop_index[shop] for shop in top_n_shops]
			
 
				-            
			
 
				-            # 找到每个商户最相似的k个商户
			
 
				-            similar_shops = {}
			
 
				-            for shop_idx in top_n_indices:
			
 
				-                similarities = self._similarity_matrix[shop_idx].toarray().flatten()
			
 
				-                similar_indices = np.argpartition(similarities, -k-1)[-k-1:]
			
 
				-                similar_indices = similar_indices[similar_indices != shop_idx][:k]
			
 
				-                similar_shops[self._index_shop[shop_idx]] = [self._index_shop[idx] for idx in similar_indices]
			
 
				-            
			
 
				-            # 生成候选商户列表
			
 
				-            candidate_shops = list(set([m for sublist in similar_shops.values() for m in sublist]))
			
 
				-            candidate_indices = [self._shop_index[shop] for shop in candidate_shops]
			
 
				-            
			
 
				-            # 计算每个候选商户的兴趣得分
			
 
				-            interest_scores = {}
			
 
				-            for candidate_idx in candidate_indices:
			
 
				-                interest_score = 0
			
 
				-                for shop_idx in top_n_indices:
			
 
				-                    if self._index_shop[candidate_idx] in similar_shops[self._index_shop[shop_idx]]:
			
 
				-                        shop_score = scores[scores["BB_RETAIL_CUSTOMER_CODE"]==self._index_shop[shop_idx]]["SCORE"].values[0]
			
 
				-                        interest_score += shop_score * self._similarity_matrix[shop_idx, candidate_idx]
			
 
				-                interest_scores[self._index_shop[candidate_idx]] = interest_score
			
 
				-            
			
 
				-            # 将候选商户的兴趣得分转换为字典列表，并按照从大到小排列
			
 
				-            sorted_candidates = sorted([{shop_id: s} for shop_id, s in interest_scores.items()],
			
 
				-                                       key=lambda x: list(x.values())[0], reverse=True)[:top_n]
			
 
				-            
			
 
				-            return product_code, sorted_candidates
			
 
				-        
			
 
				-        # 并行处理每个品规
			
 
				-        results = Parallel(n_jobs=n_jobs)(delayed(process_product)(product_code, scores) 
			
 
				-                                          for product_code, scores in tqdm(self._score_df.groupby("PRODUCT_CODE"), desc="train:正在计算候选得分"))
			
 
				-        print(len(results))
			
 
				-        # 存储结果
			
 
				-        self._recommendations = {product_code: sorted_candidates for product_code, sorted_candidates in results}
			
 
				-        self.to_redis_zset(city_uuid)
			
 
				-    
			
 
				-    def to_redis_zset(self, city_uuid):
			
 
				-        """
			
 
				-        将 self._recommendations 中的数据保存到 Redis 的 Sorted Set (ZSET) 中
			
 
				-        存储格式为 fc:product_code，其中商户 ID 作为成员，得分作为分数
			
 
				-        """
			
 
				-        redis_db = RedisDatabaseHelper()
			
 
				-        
			
 
				-        # 存redis之前，先进行删除操作
			
 
				-        pattern = f"fc:{city_uuid}:*"
			
 
				-        keys_to_delete = redis_db.redis.keys(pattern)
			
 
				-        if keys_to_delete:
			
 
				-            redis_db.redis.delete(*keys_to_delete)
			
 
				-            
			
 
				-        for product_code, recommendations in tqdm(self._recommendations.items(), desc="train:正在存储推荐结果"):
			
 
				-            redis_key = f"fc:{city_uuid}:{product_code}"
			
 
				-            zset_data = {}
			
 
				-            for rec in recommendations:
			
 
				-                for shop_id, score in rec.items():
			
 
				-                    try:
			
 
				-                        zset_data[shop_id] = float(score)
			
 
				-                    except ValueError as e:
			
 
				-                        print(f"Error converting score to float for shop_id {shop_id}: {score}")
			
 
				-                        raise e
			
 
				-            
			
 
				-            redis_db.redis.zadd(redis_key, zset_data)
			
 
				-    
			
 
				-if __name__ == "__main__":
			
 
				-    score_path = "./models/recall/itemCF/matrix/score.csv"
			
 
				-    similarity_path = "./models/recall/itemCF/matrix/similarity.csv"
			
 
				-    # itemcf_model = ItemCFModel()
			
 
				-    # itemcf_model.train(score_path, similarity_path, n_jobs=4)
			
 
				-    # recommend_list = itemcf_model.inference(110111)
			
 
				-    # itemcf_model.to_redis_zset()
			
 
				-    # print(len(recommend_list))
			
 
				-    # print(recommend_list)
			
 
				-    # joblib.dump(itemcf_model, "itemCF.model")
			
 
				-    
			
 
				-    # model = joblib.load("./itemCF.model")
			
 
				-    # recommend_list = model.inference(110102)
			
 
				-    # print(len(recommend_list))
			
 
				-    # print(recommend_list)
			
 
				-    data = pd.read_csv(similarity_path, index_col=0)
			
 
				+from database import RedisDatabaseHelper

			
 
				+import pandas as pd

			
 
				+import numpy as np

			
 
				+from tqdm import tqdm

			
 
				+from scipy.sparse import csr_matrix

			
 
				+from joblib import Parallel, delayed

			
 
				+import joblib

			
 
				+

			
 
				+class ItemCFModel:

			
 
				+    def __init__(self):

			
 
				+        self._recommendations = {}

			
 
				+        

			
 
				+    def train(self, score_path, similatity_path, city_uuid, n=100, k=10, top_n=100, n_jobs=4):

			
 
				+        self._score_df = pd.read_csv(score_path)

			
 
				+        self._similarity_df = pd.read_csv(similatity_path, index_col=0)

			
 
				+        self._similarity_matrix = csr_matrix(self._similarity_df.values)

			
 
				+        self._shop_index = {shop: idx for idx, shop in enumerate(self._similarity_df.index)}

			
 
				+        self._index_shop = {idx: shop for idx, shop in enumerate(self._similarity_df.index)}

			
 
				+        

			
 
				+        def process_product(product_code, scores):

			
 
				+            # 获取热度最高的n个商户

			
 
				+            top_n_shops = scores.nlargest(n, "SCORE")["BB_RETAIL_CUSTOMER_CODE"].values

			
 
				+            top_n_indices = [self._shop_index[shop] for shop in top_n_shops]

			
 
				+            

			
 
				+            # 找到每个商户最相似的k个商户

			
 
				+            similar_shops = {}

			
 
				+            for shop_idx in top_n_indices:

			
 
				+                similarities = self._similarity_matrix[shop_idx].toarray().flatten()

			
 
				+                similar_indices = np.argpartition(similarities, -k-1)[-k-1:]

			
 
				+                similar_indices = similar_indices[similar_indices != shop_idx][:k]

			
 
				+                similar_shops[self._index_shop[shop_idx]] = [self._index_shop[idx] for idx in similar_indices]

			
 
				+            

			
 
				+            # 生成候选商户列表

			
 
				+            candidate_shops = list(set([m for sublist in similar_shops.values() for m in sublist]))

			
 
				+            candidate_indices = [self._shop_index[shop] for shop in candidate_shops]

			
 
				+            

			
 
				+            # 计算每个候选商户的兴趣得分

			
 
				+            interest_scores = {}

			
 
				+            for candidate_idx in candidate_indices:

			
 
				+                interest_score = 0

			
 
				+                for shop_idx in top_n_indices:

			
 
				+                    if self._index_shop[candidate_idx] in similar_shops[self._index_shop[shop_idx]]:

			
 
				+                        shop_score = scores[scores["BB_RETAIL_CUSTOMER_CODE"]==self._index_shop[shop_idx]]["SCORE"].values[0]

			
 
				+                        interest_score += shop_score * self._similarity_matrix[shop_idx, candidate_idx]

			
 
				+                interest_scores[self._index_shop[candidate_idx]] = interest_score

			
 
				+            

			
 
				+            # 将候选商户的兴趣得分转换为字典列表，并按照从大到小排列

			
 
				+            sorted_candidates = sorted([{shop_id: s} for shop_id, s in interest_scores.items()],

			
 
				+                                       key=lambda x: list(x.values())[0], reverse=True)[:top_n]

			
 
				+            

			
 
				+            return product_code, sorted_candidates

			
 
				+        

			
 
				+        # 并行处理每个品规

			
 
				+        results = Parallel(n_jobs=n_jobs)(delayed(process_product)(product_code, scores) 

			
 
				+                                          for product_code, scores in tqdm(self._score_df.groupby("PRODUCT_CODE"), desc="train:正在计算候选得分"))

			
 
				+        print(len(results))

			
 
				+        # 存储结果

			
 
				+        self._recommendations = {product_code: sorted_candidates for product_code, sorted_candidates in results}

			
 
				+        self.to_redis_zset(city_uuid)

			
 
				+    

			
 
				+    def to_redis_zset(self, city_uuid):

			
 
				+        """

			
 
				+        将 self._recommendations 中的数据保存到 Redis 的 Sorted Set (ZSET) 中

			
 
				+        存储格式为 fc:product_code，其中商户 ID 作为成员，得分作为分数

			
 
				+        """

			
 
				+        redis_db = RedisDatabaseHelper()

			
 
				+        

			
 
				+        # 存redis之前，先进行删除操作

			
 
				+        pattern = f"fc:{city_uuid}:*"

			
 
				+        keys_to_delete = redis_db.redis.keys(pattern)

			
 
				+        if keys_to_delete:

			
 
				+            redis_db.redis.delete(*keys_to_delete)

			
 
				+            

			
 
				+        for product_code, recommendations in tqdm(self._recommendations.items(), desc="train:正在存储推荐结果"):

			
 
				+            redis_key = f"fc:{city_uuid}:{product_code}"

			
 
				+            zset_data = {}

			
 
				+            for rec in recommendations:

			
 
				+                for shop_id, score in rec.items():

			
 
				+                    try:

			
 
				+                        zset_data[shop_id] = float(score)

			
 
				+                    except ValueError as e:

			
 
				+                        print(f"Error converting score to float for shop_id {shop_id}: {score}")

			
 
				+                        raise e

			
 
				+            

			
 
				+            redis_db.redis.zadd(redis_key, zset_data)

			
 
				+    

			
 
				+if __name__ == "__main__":

			
 
				+    score_path = "./models/recall/itemCF/matrix/score.csv"

			
 
				+    similarity_path = "./models/recall/itemCF/matrix/similarity.csv"

			
 
				+    # itemcf_model = ItemCFModel()

			
 
				+    # itemcf_model.train(score_path, similarity_path, n_jobs=4)

			
 
				+    # recommend_list = itemcf_model.inference(110111)

			
 
				+    # itemcf_model.to_redis_zset()

			
 
				+    # print(len(recommend_list))

			
 
				+    # print(recommend_list)

			
 
				+    # joblib.dump(itemcf_model, "itemCF.model")

			
 
				+    

			
 
				+    # model = joblib.load("./itemCF.model")

			
 
				+    # recommend_list = model.inference(110102)

			
 
				+    # print(len(recommend_list))

			
 
				+    # print(recommend_list)

			
 
				+    data = pd.read_csv(similarity_path, index_col=0)

			
 
				     print(data)
			
--- a/models/recall/itemCF/calculate_similarity_matrix.py
+++ b/models/recall/itemCF/calculate_similarity_matrix.py
@@ -1,79 +1,79 @@
 
				-from database import MySqlDao
			
 
				-import pandas as pd
			
 
				-import numpy as np
			
 
				-
			
 
				-from itertools import combinations
			
 
				-from tqdm import tqdm
			
 
				-
			
 
				-dao = MySqlDao()
			
 
				-def build_co_occurence_matrix(order_data):
			
 
				-    """
			
 
				-    构建商户共现矩阵
			
 
				-    """
			
 
				-    # 获取所有商户的唯一列表
			
 
				-    shops = order_data["BB_RETAIL_CUSTOMER_CODE"].unique()
			
 
				-    num_shops = len(shops)
			
 
				-    
			
 
				-    # 创建商户到索引的映射
			
 
				-    shops_to_index = {shop: idx for idx, shop in enumerate(shops)}
			
 
				-    # 初始化共现矩阵（上三角部分）
			
 
				-    co_occurrence_matrix = np.zeros((num_shops, num_shops), dtype=int)
			
 
				-    
			
 
				-    # 按照品规分组
			
 
				-    grouped = order_data.groupby("PRODUCT_CODE")["BB_RETAIL_CUSTOMER_CODE"].apply(list)
			
 
				-    
			
 
				-    # 遍历每个品规的商户列表
			
 
				-    for shop_in_product in grouped:
			
 
				-        # 生成商户对
			
 
				-        shop_pairs = combinations(shop_in_product, 2)
			
 
				-        for shop1, shop2 in shop_pairs:
			
 
				-            # 获取商户索引
			
 
				-            idx1 = shops_to_index[shop1]
			
 
				-            idx2 = shops_to_index[shop2]
			
 
				-            # 更新共现矩阵
			
 
				-            co_occurrence_matrix[idx1, idx2] += 1
			
 
				-            co_occurrence_matrix[idx2, idx1] += 1
			
 
				-    return co_occurrence_matrix, shops, shops_to_index
			
 
				-
			
 
				-def calculate_similarity_matrix(co_occurrence_matrix, order_data, shops_to_index):
			
 
				-    """
			
 
				-    使用向量计算商铺之间的相似度矩阵
			
 
				-    """
			
 
				-    # 计算每个商铺售卖品规的总次数
			
 
				-    shop_counts = order_data.groupby("BB_RETAIL_CUSTOMER_CODE").size()
			
 
				-    
			
 
				-    # 将商户售卖次数转换为数组
			
 
				-    counts = np.array([shop_counts[shop] for shop in shops_to_index.keys()])
			
 
				-    
			
 
				-    # 计算分母部分 (sqrt(count_i * count_j))
			
 
				-    denominator = np.sqrt(np.outer(counts, counts))
			
 
				-    
			
 
				-    # 计算相似度矩阵
			
 
				-    similarity_matrix = co_occurrence_matrix / denominator
			
 
				-    
			
 
				-    # 将对角线设置为1
			
 
				-    np.fill_diagonal(similarity_matrix, 1.0)
			
 
				-    
			
 
				-    return similarity_matrix
			
 
				-
			
 
				-def save_matrix(matrix, shops, save_path):
			
 
				-    """
			
 
				-    保存共现矩阵
			
 
				-    """
			
 
				-    matrix_df = pd.DataFrame(matrix, index=shops, columns=shops)
			
 
				-    matrix_df.to_csv(save_path, index=True, encoding="utf-8")
			
 
				-    
			
 
				-def calculate_similarity_and_save_results(order_data, similarity_matrix_save_path):
			
 
				-    co_occurrence_matrix, shops, shops_to_index = build_co_occurence_matrix(order_data)
			
 
				-    similarity_matrix = calculate_similarity_matrix(co_occurrence_matrix, order_data, shops_to_index)
			
 
				-    save_matrix(similarity_matrix, shops, similarity_matrix_save_path)
			
 
				-    
			
 
				-if __name__ == "__main__":
			
 
				-    co_occurrence_save_path = "./models/recall/itemCF/matrix/occurrence.csv"
			
 
				-    similarity_matrix_save_path = "./models/recall/itemCF/matrix/similarity.csv"
			
 
				-    # 从数据库中读取订单数据
			
 
				-    order_data = dao.load_order_data()
			
 
				-    
			
 
				-    calculate_similarity_and_save_results(order_data, similarity_matrix_save_path)
			
 
				-    
			
 
				+from database import MySqlDao

			
 
				+import pandas as pd

			
 
				+import numpy as np

			
 
				+

			
 
				+from itertools import combinations

			
 
				+from tqdm import tqdm

			
 
				+

			
 
				+dao = MySqlDao()

			
 
				+def build_co_occurence_matrix(order_data):

			
 
				+    """

			
 
				+    构建商户共现矩阵

			
 
				+    """

			
 
				+    # 获取所有商户的唯一列表

			
 
				+    shops = order_data["BB_RETAIL_CUSTOMER_CODE"].unique()

			
 
				+    num_shops = len(shops)

			
 
				+    

			
 
				+    # 创建商户到索引的映射

			
 
				+    shops_to_index = {shop: idx for idx, shop in enumerate(shops)}

			
 
				+    # 初始化共现矩阵（上三角部分）

			
 
				+    co_occurrence_matrix = np.zeros((num_shops, num_shops), dtype=int)

			
 
				+    

			
 
				+    # 按照品规分组

			
 
				+    grouped = order_data.groupby("PRODUCT_CODE")["BB_RETAIL_CUSTOMER_CODE"].apply(list)

			
 
				+    

			
 
				+    # 遍历每个品规的商户列表

			
 
				+    for shop_in_product in grouped:

			
 
				+        # 生成商户对

			
 
				+        shop_pairs = combinations(shop_in_product, 2)

			
 
				+        for shop1, shop2 in shop_pairs:

			
 
				+            # 获取商户索引

			
 
				+            idx1 = shops_to_index[shop1]

			
 
				+            idx2 = shops_to_index[shop2]

			
 
				+            # 更新共现矩阵

			
 
				+            co_occurrence_matrix[idx1, idx2] += 1

			
 
				+            co_occurrence_matrix[idx2, idx1] += 1

			
 
				+    return co_occurrence_matrix, shops, shops_to_index

			
 
				+

			
 
				+def calculate_similarity_matrix(co_occurrence_matrix, order_data, shops_to_index):

			
 
				+    """

			
 
				+    使用向量计算商铺之间的相似度矩阵

			
 
				+    """

			
 
				+    # 计算每个商铺售卖品规的总次数

			
 
				+    shop_counts = order_data.groupby("BB_RETAIL_CUSTOMER_CODE").size()

			
 
				+    

			
 
				+    # 将商户售卖次数转换为数组

			
 
				+    counts = np.array([shop_counts[shop] for shop in shops_to_index.keys()])

			
 
				+    

			
 
				+    # 计算分母部分 (sqrt(count_i * count_j))

			
 
				+    denominator = np.sqrt(np.outer(counts, counts))

			
 
				+    

			
 
				+    # 计算相似度矩阵

			
 
				+    similarity_matrix = co_occurrence_matrix / denominator

			
 
				+    

			
 
				+    # 将对角线设置为1

			
 
				+    np.fill_diagonal(similarity_matrix, 1.0)

			
 
				+    

			
 
				+    return similarity_matrix

			
 
				+

			
 
				+def save_matrix(matrix, shops, save_path):

			
 
				+    """

			
 
				+    保存共现矩阵

			
 
				+    """

			
 
				+    matrix_df = pd.DataFrame(matrix, index=shops, columns=shops)

			
 
				+    matrix_df.to_csv(save_path, index=True, encoding="utf-8")

			
 
				+    

			
 
				+def calculate_similarity_and_save_results(order_data, similarity_matrix_save_path):

			
 
				+    co_occurrence_matrix, shops, shops_to_index = build_co_occurence_matrix(order_data)

			
 
				+    similarity_matrix = calculate_similarity_matrix(co_occurrence_matrix, order_data, shops_to_index)

			
 
				+    save_matrix(similarity_matrix, shops, similarity_matrix_save_path)

			
 
				+    

			
 
				+if __name__ == "__main__":

			
 
				+    co_occurrence_save_path = "./models/recall/itemCF/matrix/occurrence.csv"

			
 
				+    similarity_matrix_save_path = "./models/recall/itemCF/matrix/similarity.csv"

			
 
				+    # 从数据库中读取订单数据

			
 
				+    order_data = dao.load_order_data()

			
 
				+    

			
 
				+    calculate_similarity_and_save_results(order_data, similarity_matrix_save_path)

			
 
				+    

			
 
				     
			
--- a/models/recall/itemCF/user_item_score.py
+++ b/models/recall/itemCF/user_item_score.py
@@ -1,82 +1,82 @@
 
				-#!/usr/bin/env python
			
 
				-# -*- encoding: utf-8 -*-
			
 
				-'''
			
 
				-@filename     : ShopScore.py
			
 
				-@description     : 品规-商户-评分矩阵：品规(用户)对商铺(物品)的评分矩阵，将结果保存在score.csv文件中
			
 
				-@time     : 2025/01/31/02
			
 
				-@author     : Sherlock1011 & Min1027
			
 
				-@Version     : 1.0
			
 
				-'''
			
 
				-
			
 
				-
			
 
				-from database import MySqlDao
			
 
				-from decimal import Decimal
			
 
				-
			
 
				-# 算法封装成一个类
			
 
				-class UserItemScore:
			
 
				-    """TODO 1. 将结果保存到redis数据库中"""
			
 
				-    def __init__(self):
			
 
				-        self.weights = {
			
 
				-            "MONTH6_SALE_QTY": Decimal(0.1),
			
 
				-            "MONTH6_SALE_AMT": Decimal(0.1),
			
 
				-            "MONTH6_GROSS_PROFIT_RATE": Decimal(0.03),
			
 
				-            "MONTH6_SALE_QTY_YOY": Decimal(0.1),
			
 
				-            "MONTH6_SALE_QTY_MOM": Decimal(0.1),
			
 
				-            "MONTH6_SALE_AMT_YOY": Decimal(0.1),
			
 
				-            "MONTH6_SALE_AMT_MOM": Decimal(0.1),
			
 
				-            "ORDER_FULLORDR_RATE": Decimal(0.1),
			
 
				-            "CUSTOMER_REPURCHASE_RATE": Decimal(0.1),
			
 
				-            "NEW_PRODUCT_ORDER_QTY_OCC": Decimal(0.03),
			
 
				-            "LISTING_RATE": Decimal(0.1),
			
 
				-            "OUT_STOCK_DAYS": Decimal(0.02),
			
 
				-            "RETAIL_PRICE_INDEX": Decimal(0.02)
			
 
				-        }
			
 
				-        self.dao = MySqlDao()
			
 
				-
			
 
				-    # 均值方差归一化函数
			
 
				-    def standardize_column(self, column):
			
 
				-        if(column.max() == column.min() and column.max() == 0):
			
 
				-            return 0
			
 
				-        elif (column.max() == column.min() and column.max() != 0):
			
 
				-            return 1
			
 
				-        else:
			
 
				-            return (column - column.min()) / (column.max() - column.min())
			
 
				-
			
 
				-    # 按照品规分组归一化并计算评分
			
 
				-    def calculate_heart_per_product(self, group):
			
 
				-        for column in self.weights.keys():
			
 
				-            if column == "OUT_STOCK_DAYS":
			
 
				-                group[column] = 1 - self.standardize_column(group[column])
			
 
				-            else:
			
 
				-                group[column] = self.standardize_column(group[column])
			
 
				-        group["SCORE"] = group.apply(
			
 
				-            lambda row: sum(Decimal(row[col]) * weight for col, weight in self.weights.items()) * 100, axis=1
			
 
				-        )
			
 
				-        return group
			
 
				-
			
 
				-    # 主算法函数：计算品规-商铺评分矩阵
			
 
				-    def score(self, order_data):
			
 
				-       
			
 
				-
			
 
				-        # 应用分组计算
			
 
				-        df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
			
 
				-        df_result = df_result.sort_values(by=["PRODUCT_CODE", "SCORE"], ascending=[True, False])
			
 
				-
			
 
				-        # 选择要保存的列
			
 
				-        return df_result[['PRODUCT_CODE', 'BB_RETAIL_CUSTOMER_CODE', 'SCORE']]
			
 
				- 
			
 
				-if __name__ == "__main__":
			
 
				-    # 创建一个 ItemCF 类的实例
			
 
				-    item_cf_algorithm = UserItemScore()
			
 
				-    dao = MySqlDao()
			
 
				-    # 读取数据
			
 
				-    order_data = dao.load_order_data()
			
 
				-
			
 
				-    # 调用算法
			
 
				-    scores = item_cf_algorithm.score(order_data)
			
 
				-    
			
 
				-    scores_path = "./models/recall/itemCF/matrix/score.csv"
			
 
				-    
			
 
				-    # 保存评分结果到csv文件
			
 
				-    scores.to_csv(scores_path, index=False, encoding="utf-8")
			
 
				+#!/usr/bin/env python

			
 
				+# -*- encoding: utf-8 -*-

			
 
				+'''

			
 
				+@filename     : ShopScore.py

			
 
				+@description     : 品规-商户-评分矩阵：品规(用户)对商铺(物品)的评分矩阵，将结果保存在score.csv文件中

			
 
				+@time     : 2025/01/31/02

			
 
				+@author     : Sherlock1011 & Min1027

			
 
				+@Version     : 1.0

			
 
				+'''

			
 
				+

			
 
				+

			
 
				+from database import MySqlDao

			
 
				+from decimal import Decimal

			
 
				+

			
 
				+# 算法封装成一个类

			
 
				+class UserItemScore:

			
 
				+    """TODO 1. 将结果保存到redis数据库中"""

			
 
				+    def __init__(self):

			
 
				+        self.weights = {

			
 
				+            "MONTH6_SALE_QTY": Decimal(0.1),

			
 
				+            "MONTH6_SALE_AMT": Decimal(0.1),

			
 
				+            "MONTH6_GROSS_PROFIT_RATE": Decimal(0.03),

			
 
				+            "MONTH6_SALE_QTY_YOY": Decimal(0.1),

			
 
				+            "MONTH6_SALE_QTY_MOM": Decimal(0.1),

			
 
				+            "MONTH6_SALE_AMT_YOY": Decimal(0.1),

			
 
				+            "MONTH6_SALE_AMT_MOM": Decimal(0.1),

			
 
				+            "ORDER_FULLORDR_RATE": Decimal(0.1),

			
 
				+            "CUSTOMER_REPURCHASE_RATE": Decimal(0.1),

			
 
				+            "NEW_PRODUCT_ORDER_QTY_OCC": Decimal(0.03),

			
 
				+            "LISTING_RATE": Decimal(0.1),

			
 
				+            "OUT_STOCK_DAYS": Decimal(0.02),

			
 
				+            "RETAIL_PRICE_INDEX": Decimal(0.02)

			
 
				+        }

			
 
				+        self.dao = MySqlDao()

			
 
				+

			
 
				+    # 均值方差归一化函数

			
 
				+    def standardize_column(self, column):

			
 
				+        if(column.max() == column.min() and column.max() == 0):

			
 
				+            return 0

			
 
				+        elif (column.max() == column.min() and column.max() != 0):

			
 
				+            return 1

			
 
				+        else:

			
 
				+            return (column - column.min()) / (column.max() - column.min())

			
 
				+

			
 
				+    # 按照品规分组归一化并计算评分

			
 
				+    def calculate_heart_per_product(self, group):

			
 
				+        for column in self.weights.keys():

			
 
				+            if column == "OUT_STOCK_DAYS":

			
 
				+                group[column] = 1 - self.standardize_column(group[column])

			
 
				+            else:

			
 
				+                group[column] = self.standardize_column(group[column])

			
 
				+        group["SCORE"] = group.apply(

			
 
				+            lambda row: sum(Decimal(row[col]) * weight for col, weight in self.weights.items()) * 100, axis=1

			
 
				+        )

			
 
				+        return group

			
 
				+

			
 
				+    # 主算法函数：计算品规-商铺评分矩阵

			
 
				+    def score(self, order_data):

			
 
				+       

			
 
				+

			
 
				+        # 应用分组计算

			
 
				+        df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)

			
 
				+        df_result = df_result.sort_values(by=["PRODUCT_CODE", "SCORE"], ascending=[True, False])

			
 
				+

			
 
				+        # 选择要保存的列

			
 
				+        return df_result[['PRODUCT_CODE', 'BB_RETAIL_CUSTOMER_CODE', 'SCORE']]

			
 
				+ 

			
 
				+if __name__ == "__main__":

			
 
				+    # 创建一个 ItemCF 类的实例

			
 
				+    item_cf_algorithm = UserItemScore()

			
 
				+    dao = MySqlDao()

			
 
				+    # 读取数据

			
 
				+    order_data = dao.load_order_data()

			
 
				+

			
 
				+    # 调用算法

			
 
				+    scores = item_cf_algorithm.score(order_data)

			
 
				+    

			
 
				+    scores_path = "./models/recall/itemCF/matrix/score.csv"

			
 
				+    

			
 
				+    # 保存评分结果到csv文件

			
 
				+    scores.to_csv(scores_path, index=False, encoding="utf-8")

			
 
				     
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,46 +1,46 @@
 
				-asttokens==3.0.0
			
 
				-async-timeout==5.0.1
			
 
				-comm==0.2.2
			
 
				-debugpy==1.8.12
			
 
				-decorator==5.1.1
			
 
				-et_xmlfile==2.0.0
			
 
				-exceptiongroup==1.2.2
			
 
				-executing==2.2.0
			
 
				-filelock==3.17.0
			
 
				-greenlet==3.1.1
			
 
				-ipykernel==6.29.5
			
 
				-ipython==8.31.0
			
 
				-jedi==0.19.2
			
 
				-joblib==1.4.2
			
 
				-matplotlib-inline==0.1.7
			
 
				-nest-asyncio==1.6.0
			
 
				-numpy==2.2.2
			
 
				-openpyxl==3.1.5
			
 
				-packaging==24.2
			
 
				-pandas==2.2.3
			
 
				-parso==0.8.4
			
 
				-pexpect==4.9.0
			
 
				-platformdirs==4.3.6
			
 
				-prompt_toolkit==3.0.50
			
 
				-psutil==6.1.1
			
 
				-ptyprocess==0.7.0
			
 
				-pure_eval==0.2.3
			
 
				-Pygments==2.19.1
			
 
				-PyMySQL==1.1.1
			
 
				-python-dateutil==2.9.0.post0
			
 
				-pytz==2024.2
			
 
				-PyYAML==6.0.2
			
 
				-pyzmq==26.2.1
			
 
				-redis==5.2.1
			
 
				-scikit-learn==1.6.1
			
 
				-scipy==1.15.1
			
 
				-six==1.17.0
			
 
				-SQLAlchemy==2.0.37
			
 
				-stack-data==0.6.3
			
 
				-threadpoolctl==3.5.0
			
 
				-tornado==6.4.2
			
 
				-tqdm==4.67.1
			
 
				-traitlets==5.14.3
			
 
				-typing_extensions==4.12.2
			
 
				-tzdata==2025.1
			
 
				-wcwidth==0.2.13
			
 
				+asttokens==3.0.0

			
 
				+async-timeout==5.0.1

			
 
				+comm==0.2.2

			
 
				+debugpy==1.8.12

			
 
				+decorator==5.1.1

			
 
				+et_xmlfile==2.0.0

			
 
				+exceptiongroup==1.2.2

			
 
				+executing==2.2.0

			
 
				+filelock==3.17.0

			
 
				+greenlet==3.1.1

			
 
				+ipykernel==6.29.5

			
 
				+ipython==8.31.0

			
 
				+jedi==0.19.2

			
 
				+joblib==1.4.2

			
 
				+matplotlib-inline==0.1.7

			
 
				+nest-asyncio==1.6.0

			
 
				+numpy==2.2.2

			
 
				+openpyxl==3.1.5

			
 
				+packaging==24.2

			
 
				+pandas==2.2.3

			
 
				+parso==0.8.4

			
 
				+pexpect==4.9.0

			
 
				+platformdirs==4.3.6

			
 
				+prompt_toolkit==3.0.50

			
 
				+psutil==6.1.1

			
 
				+ptyprocess==0.7.0

			
 
				+pure_eval==0.2.3

			
 
				+Pygments==2.19.1

			
 
				+PyMySQL==1.1.1

			
 
				+python-dateutil==2.9.0.post0

			
 
				+pytz==2024.2

			
 
				+PyYAML==6.0.2

			
 
				+pyzmq==26.2.1

			
 
				+redis==5.2.1

			
 
				+scikit-learn==1.6.1

			
 
				+scipy==1.15.1

			
 
				+six==1.17.0

			
 
				+SQLAlchemy==2.0.37

			
 
				+stack-data==0.6.3

			
 
				+threadpoolctl==3.5.0

			
 
				+tornado==6.4.2

			
 
				+tqdm==4.67.1

			
 
				+traitlets==5.14.3

			
 
				+typing_extensions==4.12.2

			
 
				+tzdata==2025.1

			
 
				+wcwidth==0.2.13

			
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -1,2 +1,2 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				+#!/usr/bin/env python3

			
 
				+# -*- coding:utf-8 -*-

			
--- a/utils/mock_data_to_database.py
+++ b/utils/mock_data_to_database.py
@@ -1,83 +1,83 @@
 
				-#!/usr/bin/env python
			
 
				-# -*- encoding: utf-8 -*-
			
 
				-'''
			
 
				-@filename     : mock_data_to_database.py
			
 
				-@description     : 将mock数据写入到数据库中
			
 
				-@time     : 2025/01/31/00
			
 
				-@author     : Sherlock1011 & Min1027
			
 
				-@Version     : 1.0
			
 
				-'''
			
 
				-from dao.mysql_client import Mysql
			
 
				-
			
 
				-import pandas as pd
			
 
				-from sqlalchemy import Column, Integer, VARCHAR, Float, DECIMAL
			
 
				-from sqlalchemy.ext.declarative import declarative_base
			
 
				-
			
 
				-# 定义数据库表结构
			
 
				-Base = declarative_base()
			
 
				-
			
 
				-class MockOrder(Base):
			
 
				-    __tablename__ = "mock_order"
			
 
				-    id = Column(Integer, primary_key=True, autoincrement=True)  # 添加主键列
			
 
				-    BB_RETAIL_CUSTOMER_CODE = Column(VARCHAR(50))
			
 
				-    PRODUCT_CODE = Column(VARCHAR(50))
			
 
				-    MONTH6_SALE_QTY = Column(DECIMAL(18, 6))
			
 
				-    MONTH6_SALE_AMT = Column(DECIMAL(18, 6))
			
 
				-    MONTH6_GROSS_PROFIT_RATE = Column(DECIMAL(18, 6))
			
 
				-    MONTH6_SALE_QTY_YOY = Column(DECIMAL(18, 6))
			
 
				-    MONTH6_SALE_QTY_MOM = Column(DECIMAL(18, 6))
			
 
				-    MONTH6_SALE_AMT_YOY = Column(DECIMAL(18, 6))
			
 
				-    MONTH6_SALE_AMT_MOM = Column(DECIMAL(18, 6))
			
 
				-    ORDER_FULLORDR_RATE = Column(DECIMAL(18, 6))
			
 
				-    CUSTOMER_REPURCHASE_RATE = Column(DECIMAL(18, 6))
			
 
				-    NEW_PRODUCT_ORDER_QTY_OCC = Column(DECIMAL(18, 6))
			
 
				-    LISTING_RATE = Column(DECIMAL(18, 6))
			
 
				-    OUT_STOCK_DAYS = Column(DECIMAL(18, 6))
			
 
				-    RETAIL_PRICE_INDEX = Column(DECIMAL(18, 6))
			
 
				-    
			
 
				-def insert_data(db, data_path):
			
 
				-    df = pd.read_excel(data_path)
			
 
				-    session = db.create_session()
			
 
				-    try:
			
 
				-        df.columns = ['BB_RETAIL_CUSTOMER_CODE', 
			
 
				-                      'PRODUCT_CODE', 
			
 
				-                      'MONTH6_SALE_QTY', 
			
 
				-                      'MONTH6_SALE_AMT', 
			
 
				-                      'MONTH6_GROSS_PROFIT_RATE',
			
 
				-                      'MONTH6_SALE_QTY_YOY', 
			
 
				-                      'MONTH6_SALE_QTY_MOM', 
			
 
				-                      'MONTH6_SALE_AMT_YOY', 
			
 
				-                      'MONTH6_SALE_AMT_MOM', 
			
 
				-                      'ORDER_FULLORDR_RATE',
			
 
				-                      'CUSTOMER_REPURCHASE_RATE', 
			
 
				-                      'NEW_PRODUCT_ORDER_QTY_OCC', 
			
 
				-                      'LISTING_RATE', 
			
 
				-                      'OUT_STOCK_DAYS', 
			
 
				-                      'RETAIL_PRICE_INDEX',
			
 
				-                      ]  # 确保列名匹配
			
 
				-        session.bulk_insert_mappings(MockOrder, df.to_dict(orient='records'))
			
 
				-        session.commit()
			
 
				-        print("数据成功插入数据库")
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        session.rollback()
			
 
				-        print(f"插入数据时出错: {e}")
			
 
				-        
			
 
				-    finally:
			
 
				-        session.close()
			
 
				-    
			
 
				-        
			
 
				-if __name__ == "__main__":
			
 
				-    data_path = "./data/order.xlsx"
			
 
				-    # 创建数据库链接
			
 
				-    db = Mysql()
			
 
				-    
			
 
				-
			
 
				-    # 创建表（如果不存在）
			
 
				-    Base.metadata.create_all(db.engine)
			
 
				-    insert_data(db, data_path)
			
 
				-    
			
 
				-    db.closed()
			
 
				-    
			
 
				-    
			
 
				+#!/usr/bin/env python

			
 
				+# -*- encoding: utf-8 -*-

			
 
				+'''

			
 
				+@filename     : mock_data_to_database.py

			
 
				+@description     : 将mock数据写入到数据库中

			
 
				+@time     : 2025/01/31/00

			
 
				+@author     : Sherlock1011 & Min1027

			
 
				+@Version     : 1.0

			
 
				+'''

			
 
				+from dao.mysql_client import Mysql

			
 
				+

			
 
				+import pandas as pd

			
 
				+from sqlalchemy import Column, Integer, VARCHAR, Float, DECIMAL

			
 
				+from sqlalchemy.ext.declarative import declarative_base

			
 
				+

			
 
				+# 定义数据库表结构

			
 
				+Base = declarative_base()

			
 
				+

			
 
				+class MockOrder(Base):

			
 
				+    __tablename__ = "mock_order"

			
 
				+    id = Column(Integer, primary_key=True, autoincrement=True)  # 添加主键列

			
 
				+    BB_RETAIL_CUSTOMER_CODE = Column(VARCHAR(50))

			
 
				+    PRODUCT_CODE = Column(VARCHAR(50))

			
 
				+    MONTH6_SALE_QTY = Column(DECIMAL(18, 6))

			
 
				+    MONTH6_SALE_AMT = Column(DECIMAL(18, 6))

			
 
				+    MONTH6_GROSS_PROFIT_RATE = Column(DECIMAL(18, 6))

			
 
				+    MONTH6_SALE_QTY_YOY = Column(DECIMAL(18, 6))

			
 
				+    MONTH6_SALE_QTY_MOM = Column(DECIMAL(18, 6))

			
 
				+    MONTH6_SALE_AMT_YOY = Column(DECIMAL(18, 6))

			
 
				+    MONTH6_SALE_AMT_MOM = Column(DECIMAL(18, 6))

			
 
				+    ORDER_FULLORDR_RATE = Column(DECIMAL(18, 6))

			
 
				+    CUSTOMER_REPURCHASE_RATE = Column(DECIMAL(18, 6))

			
 
				+    NEW_PRODUCT_ORDER_QTY_OCC = Column(DECIMAL(18, 6))

			
 
				+    LISTING_RATE = Column(DECIMAL(18, 6))

			
 
				+    OUT_STOCK_DAYS = Column(DECIMAL(18, 6))

			
 
				+    RETAIL_PRICE_INDEX = Column(DECIMAL(18, 6))

			
 
				+    

			
 
				+def insert_data(db, data_path):

			
 
				+    df = pd.read_excel(data_path)

			
 
				+    session = db.create_session()

			
 
				+    try:

			
 
				+        df.columns = ['BB_RETAIL_CUSTOMER_CODE', 

			
 
				+                      'PRODUCT_CODE', 

			
 
				+                      'MONTH6_SALE_QTY', 

			
 
				+                      'MONTH6_SALE_AMT', 

			
 
				+                      'MONTH6_GROSS_PROFIT_RATE',

			
 
				+                      'MONTH6_SALE_QTY_YOY', 

			
 
				+                      'MONTH6_SALE_QTY_MOM', 

			
 
				+                      'MONTH6_SALE_AMT_YOY', 

			
 
				+                      'MONTH6_SALE_AMT_MOM', 

			
 
				+                      'ORDER_FULLORDR_RATE',

			
 
				+                      'CUSTOMER_REPURCHASE_RATE', 

			
 
				+                      'NEW_PRODUCT_ORDER_QTY_OCC', 

			
 
				+                      'LISTING_RATE', 

			
 
				+                      'OUT_STOCK_DAYS', 

			
 
				+                      'RETAIL_PRICE_INDEX',

			
 
				+                      ]  # 确保列名匹配

			
 
				+        session.bulk_insert_mappings(MockOrder, df.to_dict(orient='records'))

			
 
				+        session.commit()

			
 
				+        print("数据成功插入数据库")

			
 
				+        

			
 
				+    except Exception as e:

			
 
				+        session.rollback()

			
 
				+        print(f"插入数据时出错: {e}")

			
 
				+        

			
 
				+    finally:

			
 
				+        session.close()

			
 
				+    

			
 
				+        

			
 
				+if __name__ == "__main__":

			
 
				+    data_path = "./data/order.xlsx"

			
 
				+    # 创建数据库链接

			
 
				+    db = Mysql()

			
 
				+    

			
 
				+

			
 
				+    # 创建表（如果不存在）

			
 
				+    Base.metadata.create_all(db.engine)

			
 
				+    insert_data(db, data_path)

			
 
				+    

			
 
				+    db.closed()

			
 
				+    

			
 
				+    

			
 
				     
			
--- a/烟草模型部署文档.md
+++ b/烟草模型部署文档.md
@@ -1,167 +1,167 @@
 
				-# 烟草推荐模型部署文档
			
 
				-
			
 
				-## 1、配置文件说明：
			
 
				-
			
 
				-- ### database_config.yaml  这个是数据配置文件
			
 
				-    
			
 
				-
			
 
				-```
			
 
				-mysql:
			
 
				-  host: 'rm-t4n6rz18y4t5x47y70o.mysql.singapore.rds.aliyuncs.com'
			
 
				-  port: 3036
			
 
				-  db: 'brand_cultivation'
			
 
				-  user: 'xxxxx'
			
 
				-  passwd: 'xxxxx'
			
 
				-
			
 
				-redis:
			
 
				-  host: 'r-t4nb4n9i8je7u6ogk1pd.redis.singapore.rds.aliyuncs.com'
			
 
				-  port: 5000
			
 
				-  db: 10
			
 
				-  passwd: 'xxxxx'
			
 
				-```
			
 
				-
			
 
				-- ### crontab 定时任务配置文件
			
 
				-    
			
 
				-
			
 
				-```
			
 
				-# START CRON JOB
			
 
				-1 2 * * * python /app/app.pyc --run_all
			
 
				-# END CRON JOB
			
 
				-```
			
 
				-
			
 
				-&nbsp;
			
 
				-
			
 
				-## 2、模型启动配置说明：
			
 
				-
			
 
				-### app.py
			
 
				-
			
 
				-```
			
 
				-    parser.add_argument("--run_all", action='store_true')
			
 
				-    parser.add_argument("--run_hot", action='store_true')
			
 
				-    parser.add_argument("--run_itemcf", action='store_true')
			
 
				-    parser.add_argument("--run_itemcf_inference", action='store_true')
			
 
				-    parser.add_argument("--city_uuid", type=str, help="City UUID for filtering data")
			
 
				-```
			
 
				-
			
 
				-### 总共有4种启动模式分别是：
			
 
				-
			
 
				-1\. 启动热度召回和协同过滤  
			
 
				-        2. 启动热度召回  
			
 
				-        3. 启动协同过滤  
			
 
				-        4. 启动系统过滤推理
			
 
				-
			
 
				-## 3、GBDT LR模型训练推理启动
			
 
				-
			
 
				-### gbdt_lr.py
			
 
				-
			
 
				-```
			
 
				-    parser.add_argument("--run_train", action='store_true')
			
 
				-    parser.add_argument("--recommend", action='store_true')
			
 
				-    parser.add_argument("--importance", action='store_true')
			
 
				-
			
 
				-    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301')
			
 
				-    parser.add_argument("--product_id", type=str, default='110102')
			
 
				-```
			
 
				-
			
 
				-### gbdt_lr总共3个功能：
			
 
				-
			
 
				-1\. 启动gbdt_lr训练  python -m gbdt_lr --run_train --city_uuid "00000000000000000000000011445301"  
			
 
				-        2. 根据城市id和product_id进行推荐，需要指定city_uuid、product_id。      python -m gbdt_lr --recommend --city_uuid "00000000000000000000000011445301" --product_id '110102'  
			
 
				-        3. 获取指定城市的特征重要性指标。  python -m gbdt_lr --importance --city_uuid "00000000000000000000000011445301"    
			
 
				-注意：在数据准备阶段，会将训练数据保存到./models/rank/data/gbdt_data.csv中  
			
 
				-模型文件会存放在 ./models/rank/weights/city_uuid/model.pkl  
			
 
				-重要性指标会存放在 ./models/rank/weights/下，分别是商户指标重要性和卷烟指标重要性  
			
 
				-
			
 
				-## 4、模型docker运行配置说明：
			
 
				-
			
 
				-### docker镜像是：registry.cn-hangzhou.aliyuncs.com/hexiaoshi/brandcultivation:0.0.1
			
 
				-
			
 
				-```yaml
			
 
				-docker run --name BrandCultivation -d -v /export/brandcultivation/crontab:/etc/cron.d/crontab -v /export/brandcultivation/database_config.yaml:/app/config/database_config.yaml  registry.cn-hangzhou.aliyuncs.com/hexiaoshi/brandcultivation:0.0.1
			
 
				-```
			
 
				-
			
 
				-## 5、模型kubernetes运行配置说明
			
 
				-
			
 
				-yaml文件如下：
			
 
				-
			
 
				-```yaml
			
 
				-apiVersion: apps/v1
			
 
				-kind: Deployment
			
 
				-metadata:
			
 
				-  name: brandcultivation
			
 
				-  namespace: default
			
 
				-  labels:
			
 
				-    app: brandcultivation
			
 
				-spec:
			
 
				-  selector:
			
 
				-    matchLabels:
			
 
				-      app: brandcultivation
			
 
				-  replicas: 1
			
 
				-  strategy:
			
 
				-    rollingUpdate:
			
 
				-      maxSurge: 25%
			
 
				-      maxUnavailable: 25%
			
 
				-    type: RollingUpdate
			
 
				-  template:
			
 
				-    metadata:
			
 
				-      labels:
			
 
				-        app: brandcultivation
			
 
				-    spec:
			
 
				-      containers:
			
 
				-      - name: brandcultivation
			
 
				-        image: registry.cn-hangzhou.aliyuncs.com/hexiaoshi/brandcultivation:0.0.1
			
 
				-        imagePullPolicy: IfNotPresent
			
 
				-        resources:
			
 
				-          requests:
			
 
				-            cpu: 4000m
			
 
				-            memory: 4096Mi
			
 
				-            ephemeral-storage: 20Gi             
			
 
				-          limits:
			
 
				-            cpu: 4000m
			
 
				-            memory: 4096Mi
			
 
				-            ephemeral-storage: 20Gi            
			
 
				-        ports:
			
 
				-        - containerPort:  80
			
 
				-          name: brandcultivation
			
 
				-        volumeMounts:
			
 
				-        - name: localtime
			
 
				-          mountPath: /etc/localtime
			
 
				-        - name: config
			
 
				-          mountPath: /app/config/database_config.yaml
			
 
				-          subPath: database_config.yaml
			
 
				-        - name: config
			
 
				-          mountPath: /etc/cron.d/crontab
			
 
				-          subPath: crontab          
			
 
				-        - name: localtime
			
 
				-          hostPath:
			
 
				-            path: /usr/share/zoneinfo/Asia/Shanghai
			
 
				-        - name: config
			
 
				-          configMap:
			
 
				-            name: brandcultivation
			
 
				-      restartPolicy: Always
			
 
				----
			
 
				-kind: ConfigMap
			
 
				-apiVersion: v1
			
 
				-metadata:
			
 
				-  name: brandcultivation
			
 
				-  namespace: default
			
 
				-data:
			
 
				-  database_config.yaml: |
			
 
				-    mysql:
			
 
				-      host: 'rm-t4n6rz18y4t5x47y70o.mysql.singapore.rds.aliyuncs.com'
			
 
				-      port: 3036
			
 
				-      db: 'brand_cultivation'
			
 
				-      user: 'BrandCultivation'
			
 
				-      passwd: '8BfWBc18NBXl#CMd'
			
 
				-
			
 
				-    redis:
			
 
				-      host: 'r-t4nb4n9i8je7u6ogk1pd.redis.singapore.rds.aliyuncs.com'
			
 
				-      port: 5000
			
 
				-      db: 10
			
 
				-      passwd: 'gHmNkVBd88sZybj'
			
 
				-  crontab: |
			
 
				-    # START CRON JOB
			
 
				-    1 2 * * * python /app/app.pyc
			
 
				-    # END CRON JOB
			
 
				-
			
 
				+# 烟草推荐模型部署文档

			
 
				+

			
 
				+## 1、配置文件说明：

			
 
				+

			
 
				+- ### database_config.yaml  这个是数据配置文件

			
 
				+    

			
 
				+

			
 
				+```

			
 
				+mysql:

			
 
				+  host: 'rm-t4n6rz18y4t5x47y70o.mysql.singapore.rds.aliyuncs.com'

			
 
				+  port: 3036

			
 
				+  db: 'brand_cultivation'

			
 
				+  user: 'xxxxx'

			
 
				+  passwd: 'xxxxx'

			
 
				+

			
 
				+redis:

			
 
				+  host: 'r-t4nb4n9i8je7u6ogk1pd.redis.singapore.rds.aliyuncs.com'

			
 
				+  port: 5000

			
 
				+  db: 10

			
 
				+  passwd: 'xxxxx'

			
 
				+```

			
 
				+

			
 
				+- ### crontab 定时任务配置文件

			
 
				+    

			
 
				+

			
 
				+```

			
 
				+# START CRON JOB

			
 
				+1 2 * * * python /app/app.pyc --run_all

			
 
				+# END CRON JOB

			
 
				+```

			
 
				+

			
 
				+&nbsp;

			
 
				+

			
 
				+## 2、模型启动配置说明：

			
 
				+

			
 
				+### app.py

			
 
				+

			
 
				+```

			
 
				+    parser.add_argument("--run_all", action='store_true')

			
 
				+    parser.add_argument("--run_hot", action='store_true')

			
 
				+    parser.add_argument("--run_itemcf", action='store_true')

			
 
				+    parser.add_argument("--run_itemcf_inference", action='store_true')

			
 
				+    parser.add_argument("--city_uuid", type=str, help="City UUID for filtering data")

			
 
				+```

			
 
				+

			
 
				+### 总共有4种启动模式分别是：

			
 
				+

			
 
				+1\. 启动热度召回和协同过滤  

			
 
				+        2. 启动热度召回  

			
 
				+        3. 启动协同过滤  

			
 
				+        4. 启动系统过滤推理

			
 
				+

			
 
				+## 3、GBDT LR模型训练推理启动

			
 
				+

			
 
				+### gbdt_lr.py

			
 
				+

			
 
				+```

			
 
				+    parser.add_argument("--run_train", action='store_true')

			
 
				+    parser.add_argument("--recommend", action='store_true')

			
 
				+    parser.add_argument("--importance", action='store_true')

			
 
				+

			
 
				+    parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301')

			
 
				+    parser.add_argument("--product_id", type=str, default='110102')

			
 
				+```

			
 
				+

			
 
				+### gbdt_lr总共3个功能：

			
 
				+

			
 
				+1\. 启动gbdt_lr训练  python -m gbdt_lr --run_train --city_uuid "00000000000000000000000011445301"  

			
 
				+        2. 根据城市id和product_id进行推荐，需要指定city_uuid、product_id。      python -m gbdt_lr --recommend --city_uuid "00000000000000000000000011445301" --product_id '110102'  

			
 
				+        3. 获取指定城市的特征重要性指标。  python -m gbdt_lr --importance --city_uuid "00000000000000000000000011445301"    

			
 
				+注意：在数据准备阶段，会将训练数据保存到./models/rank/data/gbdt_data.csv中  

			
 
				+模型文件会存放在 ./models/rank/weights/city_uuid/model.pkl  

			
 
				+重要性指标会存放在 ./models/rank/weights/下，分别是商户指标重要性和卷烟指标重要性  

			
 
				+

			
 
				+## 4、模型docker运行配置说明：

			
 
				+

			
 
				+### docker镜像是：registry.cn-hangzhou.aliyuncs.com/hexiaoshi/brandcultivation:0.0.1

			
 
				+

			
 
				+```yaml

			
 
				+docker run --name BrandCultivation -d -v /export/brandcultivation/crontab:/etc/cron.d/crontab -v /export/brandcultivation/database_config.yaml:/app/config/database_config.yaml  registry.cn-hangzhou.aliyuncs.com/hexiaoshi/brandcultivation:0.0.1

			
 
				+```

			
 
				+

			
 
				+## 5、模型kubernetes运行配置说明

			
 
				+

			
 
				+yaml文件如下：

			
 
				+

			
 
				+```yaml

			
 
				+apiVersion: apps/v1

			
 
				+kind: Deployment

			
 
				+metadata:

			
 
				+  name: brandcultivation

			
 
				+  namespace: default

			
 
				+  labels:

			
 
				+    app: brandcultivation

			
 
				+spec:

			
 
				+  selector:

			
 
				+    matchLabels:

			
 
				+      app: brandcultivation

			
 
				+  replicas: 1

			
 
				+  strategy:

			
 
				+    rollingUpdate:

			
 
				+      maxSurge: 25%

			
 
				+      maxUnavailable: 25%

			
 
				+    type: RollingUpdate

			
 
				+  template:

			
 
				+    metadata:

			
 
				+      labels:

			
 
				+        app: brandcultivation

			
 
				+    spec:

			
 
				+      containers:

			
 
				+      - name: brandcultivation

			
 
				+        image: registry.cn-hangzhou.aliyuncs.com/hexiaoshi/brandcultivation:0.0.1

			
 
				+        imagePullPolicy: IfNotPresent

			
 
				+        resources:

			
 
				+          requests:

			
 
				+            cpu: 4000m

			
 
				+            memory: 4096Mi

			
 
				+            ephemeral-storage: 20Gi             

			
 
				+          limits:

			
 
				+            cpu: 4000m

			
 
				+            memory: 4096Mi

			
 
				+            ephemeral-storage: 20Gi            

			
 
				+        ports:

			
 
				+        - containerPort:  80

			
 
				+          name: brandcultivation

			
 
				+        volumeMounts:

			
 
				+        - name: localtime

			
 
				+          mountPath: /etc/localtime

			
 
				+        - name: config

			
 
				+          mountPath: /app/config/database_config.yaml

			
 
				+          subPath: database_config.yaml

			
 
				+        - name: config

			
 
				+          mountPath: /etc/cron.d/crontab

			
 
				+          subPath: crontab          

			
 
				+        - name: localtime

			
 
				+          hostPath:

			
 
				+            path: /usr/share/zoneinfo/Asia/Shanghai

			
 
				+        - name: config

			
 
				+          configMap:

			
 
				+            name: brandcultivation

			
 
				+      restartPolicy: Always

			
 
				+---

			
 
				+kind: ConfigMap

			
 
				+apiVersion: v1

			
 
				+metadata:

			
 
				+  name: brandcultivation

			
 
				+  namespace: default

			
 
				+data:

			
 
				+  database_config.yaml: |

			
 
				+    mysql:

			
 
				+      host: 'rm-t4n6rz18y4t5x47y70o.mysql.singapore.rds.aliyuncs.com'

			
 
				+      port: 3036

			
 
				+      db: 'brand_cultivation'

			
 
				+      user: 'BrandCultivation'

			
 
				+      passwd: '8BfWBc18NBXl#CMd'

			
 
				+

			
 
				+    redis:

			
 
				+      host: 'r-t4nb4n9i8je7u6ogk1pd.redis.singapore.rds.aliyuncs.com'

			
 
				+      port: 5000

			
 
				+      db: 10

			
 
				+      passwd: 'gHmNkVBd88sZybj'

			
 
				+  crontab: |

			
 
				+    # START CRON JOB

			
 
				+    1 2 * * * python /app/app.pyc

			
 
				+    # END CRON JOB

			
 
				+

			
 
				 ```