Sherlock пре 1 година
родитељ
комит
4d611026c0
5 измењених фајлова са 159 додато и 127 уклоњено
  1. 1 1
      app.py
  2. 1 1
      dao/mysql_client.py
  3. 156 123
      models/rank/data/config.py
  4. 0 1
      models/rank/data/dataloader.py
  5. 1 1
      models/rank/gbdt_lr.py

+ 1 - 1
app.py

@@ -59,7 +59,7 @@ def run():
     # parser.add_argument("--similarity_matrix_path", type=str, default="./models/recall/itemCF/matrix/similarity.csv")
     parser.add_argument("--n", type=int, default=100)
     parser.add_argument("--k", type=int, default=20)
-    parser.add_argument("--top_n", type=int, default=200, help='default n * k')
+    parser.add_argument("--top_n", type=int, default=2000, help='default n * k')
     parser.add_argument("--n_jobs", type=int, default=4)
     parser.add_argument("--city_uuid", type=str, default='00000000000000000000000011445301', help="City UUID for filtering data")
     

+ 1 - 1
dao/mysql_client.py

@@ -95,7 +95,7 @@ class Mysql(object):
         
         query = text(f"""
             SELECT * 
-            FROM tads_brandcul_cust_info 
+            FROM tads_brandcul_cust_info_bak
             WHERE BA_CITY_ORG_CODE = :city_uuid 
             AND BB_RETAIL_CUSTOMER_CODE IN ({cust_id_str})
         """)

+ 156 - 123
models/rank/data/config.py

@@ -1,65 +1,86 @@
 class CustConfig:
     FEATURE_COLUMNS = [
         "BB_RETAIL_CUSTOMER_CODE",                     # 零售户代码
-        "BB_RTL_CUST_POSITION_TYPE_NAME",              # 零售户商圈类型名称
-        "BB_RTL_CUST_MARKET_TYPE_NAME",                # 零售户市场类型名称
-        # "BB_RTL_CUST_BUSINESS_TYPE_NAME",              # 零售户业态名称
-        "BB_RTL_CUST_SUB_BUSI_PLACE_NAME",             # 零售户业态细分名称
         "BB_RTL_CUST_GRADE_NAME",                      # 零售户分档名称
+        "BB_RTL_CUST_MARKET_TYPE_NAME",                # 零售户市场类型名称
+        "STORE_AREA",                                  # 店铺经营面积
+        "BB_RTL_CUST_BUSINESS_TYPE_NAME",              # 零售户业态名称
+        "OPERATOR_EDU_LEVEL",                          # 零售客户经营者文化程
+        "OPERATOR_AGE",                                # 经营者年龄
+        "BB_RTL_CUST_CHAIN_FLAG",                      # 零售户连锁标识
+        "PRESENT_STAR_TERMINAL",                       # 终端星级
+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME",           # 零售户信用等级名称
+        "MD04_DIR_SAL_STORE_FLAG",                     # 直营店标识
+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",       # 新品订货量占同价类比重
+        "PRODUCT_LISTING_RATE",                        # 品规上架率
+        # "STOCKOUT_DAYS",                              # 断货天数
+        # "YLT_TURNOVER_RATE",                           # 易灵通动销率
+        # "YLT_BAR_PACKAGE_SALE_OCC",                    # 易灵通条包销售占比
+        # "PRODUCT_INSALE_QTY",                          # 在销品规数
+        # "UNPACKING_RATE",                              # 拆包率
+        
+        
+        # "BB_RTL_CUST_POSITION_TYPE_NAME",              # 零售户商圈类型名称
+        
+        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME",             # 零售户业态细分名称
+        
         # "BB_RTL_CUST_TERMINAL_LEVEL_NAME",             # 零售户终端层级名称
         # "BB_RTL_CUST_TERMINALEVEL_NAME",               # 零售户终端层级细分名称
-        # "MD04_MG_RTL_CUST_CREDITCLASS_NAME",           # 零售户信用等级名称
         # "MD04_MG_SAMPLE_CUST_FLAG",                    # 样本户标识
         # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG",            # 零售户大户标识
         # "BB_CUSTOMER_MANAGER_SCOPE_NAME",              # 零售户经营范围名称
         # "BB_RTL_CUST_OPERATE_METHOD_NAME",             # 零售户经营方式名称
         # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME",          # 零售户卷烟经营规模名称
-        "BB_RTL_CUST_CHAIN_FLAG",                      # 零售户连锁标识
-        "MD04_DIR_SAL_STORE_FLAG",                     # 直营店标识
-        "STORE_AREA",                                  # 店铺经营面积
-        "OPERATOR_AGE",                                # 经营者年龄
-        "OPERATOR_EDU_LEVEL",                          # 零售客户经营者文化程
+        
         # "AVERAGE_CONSUMER_FLOW",                       # 月均消费人流
         # "NEW_PRODUCT_MEMBERS_QTY",                     # 新品消费会员数量
     ]
     # 数据清洗规则
     CLEANING_RULES = {
-        "BB_RTL_CUST_POSITION_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
-        "BB_RTL_CUST_MARKET_TYPE_NAME":             {"method": "fillna", "opt": "fill", "value": "城网", "type": "str"},
-        "BB_RTL_CUST_SUB_BUSI_PLACE_NAME":          {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
         "BB_RTL_CUST_GRADE_NAME":                   {"method": "fillna", "opt": "fill", "value": "十五档", "type": "str"},
+        "BB_RTL_CUST_MARKET_TYPE_NAME":             {"method": "fillna", "opt": "fill", "value": "城网", "type": "str"},
+        "STORE_AREA":                               {"method": "fillna", "opt": "mean", "type": "num"},
+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
+        "OPERATOR_EDU_LEVEL":                       {"method": "fillna", "opt": "fill", "value": "无数据", "type": "str"},
+        "OPERATOR_AGE":                             {"method": "fillna", "opt": "mean", "type": "num"},
+        "BB_RTL_CUST_CHAIN_FLAG":                   {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
+        "PRESENT_STAR_TERMINAL":                    {"method": "fillna", "opt": "fill", "value": "非星级", "type": "str"},
+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        {"method": "fillna", "opt": "fill", "value": "D", "type": "str"},
+        "MD04_DIR_SAL_STORE_FLAG":                  {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC":    {"method": "fillna", "opt": "mean", "type": "num"},
+        "PRODUCT_LISTING_RATE":                     {"method": "fillna", "opt": "mean", "type": "num"},
+        # "STOCKOUT_DAYS":                            {"method": "fillna", "opt": "mean", "type": "num"},
+        # "YLT_TURNOVER_RATE":                        {"method": "fillna", "opt": "mean", "type": "num"},
+        # "NEW_PRODUCT_MEMBERS_QTY":                  {"method": "fillna", "opt": "mean", "type": "num"},
+        # "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},
+        # "UNPACKING_RATE":                           {"method": "fillna", "opt": "mean", "type": "num"},
+        
+        
+        
+        
+        # "BB_RTL_CUST_POSITION_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
+        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME":          {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
         # "BB_RTL_CUST_TERMINALEVEL_NAME":          {"method": "fillna", "opt": "replace", "value": "BB_RTL_CUST_TERMINAL_LEVEL_NAME", "type": "str"},
-        # "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        {"method": "fillna", "opt": "fill", "value": "未评价", "type": "str"},
         # "MD04_MG_SAMPLE_CUST_FLAG":                 {"method": "fillna", "value": "N", "opt": "fill"},
         # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG":         {"method": "fillna", "value": "N", "opt": "fill"},
         # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME":       {"method": "fillna", "value": "中", "opt": "fill"},
-        "BB_RTL_CUST_CHAIN_FLAG":                   {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
-        "MD04_DIR_SAL_STORE_FLAG":                  {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
-        "STORE_AREA":                               {"method": "fillna", "opt": "mean", "type": "num"},
-        "OPERATOR_AGE":                             {"method": "fillna", "opt": "mean", "type": "num"},
-        "OPERATOR_EDU_LEVEL":                       {"method": "fillna", "opt": "fill", "value": "01", "type": "str"},
     }
-    # one-hot编码
-    ONEHOT = [
-        "BB_RTL_CUST_POSITION_TYPE_NAME",
-        "BB_RTL_CUST_MARKET_TYPE_NAME",
-        "BB_RTL_CUST_SUB_BUSI_PLACE_NAME",
-        "BB_RTL_CUST_GRADE_NAME",
-        "BB_RTL_CUST_CHAIN_FLAG",
-        "MD04_DIR_SAL_STORE_FLAG",
-        "OPERATOR_EDU_LEVEL",
-    ]
     
     ONEHOT_CAT = {
-        "BB_RTL_CUST_POSITION_TYPE_NAME":           ["居民区", "商业娱乐区", "交通枢纽区", "旅游景区", "工业区", "集贸区", "院校学区", "办公区", "其他"],
-        "BB_RTL_CUST_MARKET_TYPE_NAME":             ["城网", "农网"],
-        "BB_RTL_CUST_SUB_BUSI_PLACE_NAME":          ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],
         "BB_RTL_CUST_GRADE_NAME":                   ['一档', '二档', '三档', '四档', '五档', '六档', '七档', '八档', '九档', '十档', '十一档', '十二档', 
                                                     '十三档', '十四档', '十五档', '十六档', '十七档', '十八档', '十九档', '二十档', '二十一档', '二十二档', 
                                                     '二十三档', '二十四档', '二十五档', '二十六档', '二十七档', '二十八档', '二十九档', '三十档'],
+        "BB_RTL_CUST_MARKET_TYPE_NAME":             ["城网", "农网"],
+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],
+        "OPERATOR_EDU_LEVEL":                       [1, 2, 3, 4, 5, 6, 7, "无数据"],
         "BB_RTL_CUST_CHAIN_FLAG":                   ["是", "否"],
+        "PRESENT_STAR_TERMINAL":                    ["一星", "二星", "三星", "四星", "五星", "非星级"],
+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        ["AAA", "AA", "A", "B", "C", "D"],
         "MD04_DIR_SAL_STORE_FLAG":                  ["是", "否"],
-        "OPERATOR_EDU_LEVEL":                       [1, 2, 3, 4, 5, 6, 7]
+        
+        
+        
+        # "BB_RTL_CUST_POSITION_TYPE_NAME":           ["居民区", "商业娱乐区", "交通枢纽区", "旅游景区", "工业区", "集贸区", "院校学区", "办公区", "其他"]
     }
     
     
@@ -67,111 +88,123 @@ class CustConfig:
 class ProductConfig:
     FEATURE_COLUMNS = [
         "product_code",                                # 商品编码
-        # "factory_name",                                # 工业公司名称
-        # "brand_code",                                  # 品牌编码
-        "adjust_price",                                # 含税调拨价
-        "notwithtax_adjust_price",                     # 不含税调拨价
-        "whole_sale_price",                            # 统一批发价
         "direct_retail_price",                         # 建议零售价
-        "allot_price",                                 # 调拨价
-        "direct_whole_price",                          # 批发指导价
-        "retail_price",                                # 零售价
-        "price_type_name",                             # 卷烟价类名称
-        "gear_type_name",                              # 卷烟档位名称
-        "category_type_name",                          # 卷烟品类名称
-        "is_key_brand",                                # 是否重点品牌
-        "is_high_level",                               # 是否高端烟
-        "is_upscale_level",                            # 是否高端烟不含高价
-        "is_high_price",                               # 是否高价烟
-        "is_low_price",                                # 是否低价烟
         "is_low_tar",                                  # 是否低焦油烟
-        "is_encourage",                                # 是否全国鼓励品牌
-        "is_abnormity",                                # 是否异形包装
-        "is_intake",                                   # 是否进口烟
-        "is_short",                                    # 是否紧俏品牌
-        "is_medium",                                   # 是否中支烟
-        "is_shortbranch",                              # 是否短支烟
-        "is_ordinary_price_type",                      # 是否普一类烟
-        "source_type",                                 # 来源类型
         "tar_qty",                                     # 焦油含量
+        "is_exploding_beads",                          # 是否爆珠
+        "is_shortbranch",                              # 是否短支烟
+        "is_medium",                                   # 是否中支烟
+        "is_tiny",                                     # 是否细支
         "product_style_code_name",                     # 包装类型名称
-        "chinese_mix",                                 # 中式混合
-        "sub_price_type_name",                         # 细分卷烟价类名称
+        "org_is_abnormity",                            # 是否异形包装
+        "is_chuangxin",                                # 是否创新品类
+        "is_key_brand",                                # 是否重点品牌
+        "foster_level_hy",                             # 是否行业共育品规
+        "foster_level_sj",                             # 是否省级共育品规
+        "is_cigar",                                    # 是否雪茄型卷烟
+        "co_qty",                                      # 一氧化碳含量
+        "tbc_total_length",                            # 烟支总长度
+        "tbc_length",                                  # 烟支长度
+        "filter_length",                               # 滤嘴长度
+        
+
+        
+        # "adjust_price",                                # 含税调拨价
+        # "notwithtax_adjust_price",                     # 不含税调拨价
+        # "whole_sale_price",                            # 统一批发价
+        # "allot_price",                                 # 调拨价
+        # "direct_whole_price",                          # 批发指导价
+        # "retail_price",                                # 零售价
+        # "price_type_name",                             # 卷烟价类名称
+        # "gear_type_name",                              # 卷烟档位名称
+        # "category_type_name",                          # 卷烟品类名称
+        # "is_high_level",                               # 是否高端烟
+        # "is_upscale_level",                            # 是否高端烟不含高价
+        # "is_high_price",                               # 是否高价烟
+        # "is_low_price",                                # 是否低价烟
+        # "is_encourage",                                # 是否全国鼓励品牌
+        # "is_abnormity",                                # 是否异形包装
+        # "is_intake",                                   # 是否进口烟
+        # "is_short",                                    # 是否紧俏品牌
+        # "is_ordinary_price_type",                      # 是否普一类烟
+        # "source_type",                                 # 来源类型
+        # "chinese_mix",                                 # 中式混合
+        # "sub_price_type_name",                         # 细分卷烟价类名称
     ]
     
     CLEANING_RULES = {
-        "adjust_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
-        "notwithtax_adjust_price":                     {"method": "fillna", "opt": "mean", "type": "num"},
-        "whole_sale_price":                            {"method": "fillna", "opt": "mean", "type": "num"},
         "direct_retail_price":                         {"method": "fillna", "opt": "mean", "type": "num"},
-        "allot_price":                                 {"method": "fillna", "opt": "fill", "type": "num", "value": 0.0},
-        "direct_whole_price":                          {"method": "fillna", "opt": "mean", "type": "num"},
-        "retail_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
-        "price_type_name":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "一类烟"},
-        "gear_type_name":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        "category_type_name":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        "is_key_brand":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_high_level":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_upscale_level":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_high_price":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_low_price":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
         "is_low_tar":                                  {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_encourage":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_abnormity":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_intake":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_short":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_medium":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_shortbranch":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_ordinary_price_type":                      {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "source_type":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
         "tar_qty":                                     {"method": "fillna", "opt": "mean", "type": "num"},
+        "is_exploding_beads":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "is_shortbranch":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "is_medium":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
         "product_style_code_name":                     {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        "chinese_mix":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "sub_price_type_name":                         {"method": "fillna", "opt": "fill", "type": "str", "value": "普一类烟"},
+        "org_is_abnormity":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "is_chuangxin":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "is_key_brand":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "foster_level_hy":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "foster_level_sj":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "is_cigar":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        "co_qty":                                      {"method": "fillna", "opt": "mean", "type": "num"},
+        "tbc_total_length":                            {"method": "fillna", "opt": "mean", "type": "num"},
+        "tbc_length":                                  {"method": "fillna", "opt": "mean", "type": "num"},
+        "filter_length":                               {"method": "fillna", "opt": "mean", "type": "num"},
+        
+        
+        # "adjust_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
+        # "notwithtax_adjust_price":                     {"method": "fillna", "opt": "mean", "type": "num"},
+        # "whole_sale_price":                            {"method": "fillna", "opt": "mean", "type": "num"},
+        # "allot_price":                                 {"method": "fillna", "opt": "fill", "type": "num", "value": 0.0},
+        # "direct_whole_price":                          {"method": "fillna", "opt": "mean", "type": "num"},
+        # "retail_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
+        # "price_type_name":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "一类烟"},
+        # "gear_type_name":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
+        # "category_type_name":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
+        # "is_high_level":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_upscale_level":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_high_price":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_low_price":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_encourage":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_abnormity":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_intake":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_short":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "is_ordinary_price_type":                      {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "source_type":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
+        # "chinese_mix":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
+        # "sub_price_type_name":                         {"method": "fillna", "opt": "fill", "type": "str", "value": "普一类烟"},
     }
     
-    ONEHOT = [
-        "price_type_name",                             # 卷烟价类名称
-        "gear_type_name",                              # 卷烟档位名称
-        "category_type_name",                          # 卷烟品类名称
-        "is_key_brand",                                # 是否重点品牌
-        "is_high_level",                               # 是否高端烟
-        "is_upscale_level",                            # 是否高端烟不含高价
-        "is_high_price",                               # 是否高价烟
-        "is_low_price",                                # 是否低价烟
-        "is_low_tar",                                  # 是否低焦油烟
-        "is_encourage",                                # 是否全国鼓励品牌
-        "is_abnormity",                                # 是否异形包装
-        "is_intake",                                   # 是否进口烟
-        "is_short",                                    # 是否紧俏品牌
-        "is_medium",                                   # 是否中支烟
-        "is_shortbranch",                              # 是否短支烟
-        "is_ordinary_price_type",                      # 是否普一类烟
-        "source_type",                                 # 来源类型
-        "product_style_code_name",                     # 包装类型名称
-        "chinese_mix",                                 # 中式混合
-        "sub_price_type_name",                         # 细分卷烟价类名称
-    ]
+
     ONEHOT_CAT = {
-        "price_type_name":                             ["一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
-        "gear_type_name":                              ["第一档位", "第二档位", "第三档位", "第四档位", "第五档位", "第六档位", "第七档位", "第八档位", "其他"],
-        "category_type_name":                          ["第1品类", "第2品类", "第3品类", "第4品类", "第5品类", "第6品类", "第7品类", 
-                                                        "第8品类", "第9品类", "第10品类", "第11品类", "第12品类", "第13品类", "其他"],
-        "is_key_brand":                                ["是", "否"],
-        "is_high_level":                               ["是", "否"],
-        "is_upscale_level":                            ["是", "否"],
-        "is_high_price":                               ["是", "否"],
-        "is_low_price":                                ["是", "否"],
         "is_low_tar":                                  ["是", "否"],
-        "is_encourage":                                ["是", "否"],
-        "is_abnormity":                                ["是", "否"],
-        "is_intake":                                   ["是", "否"],
-        "is_short":                                    ["是", "否"],
-        "is_medium":                                   ["是", "否"],
+        "is_exploding_beads":                          ["是", "否"],
         "is_shortbranch":                              ["是", "否"],
-        "is_ordinary_price_type":                      ["是", "否"],
-        "source_type":                                 ["是", "否"],
+        "is_medium":                                   ["是", "否"],
         "product_style_code_name":                     ["条盒硬盒", "条包硬盒", "条盒软盒", "条包软盒", "铁盒", "其他"],
-        "chinese_mix":                                 ["是", "否"],
-        "sub_price_type_name":                         ["高端烟", "高价位烟", "普一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
+        "org_is_abnormity":                            ["是", "否"],
+        "is_chuangxin":                                ["是", "否"],
+        "is_key_brand":                                ["是", "否"],
+        "foster_level_hy":                             ["是", "否"],
+        "foster_level_sj":                             ["是", "否"],
+        "is_cigar":                                    ["是", "否"],
+        
+        
+        
+        # "price_type_name":                             ["一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
+        # "gear_type_name":                              ["第一档位", "第二档位", "第三档位", "第四档位", "第五档位", "第六档位", "第七档位", "第八档位", "其他"],
+        # "category_type_name":                          ["第1品类", "第2品类", "第3品类", "第4品类", "第5品类", "第6品类", "第7品类", 
+        #                                                 "第8品类", "第9品类", "第10品类", "第11品类", "第12品类", "第13品类", "其他"],
+        # "is_high_level":                               ["是", "否"],
+        # "is_upscale_level":                            ["是", "否"],
+        # "is_high_price":                               ["是", "否"],
+        # "is_low_price":                                ["是", "否"],
+        # "is_encourage":                                ["是", "否"],
+        # "is_abnormity":                                ["是", "否"],
+        # "is_intake":                                   ["是", "否"],
+        # "is_short":                                    ["是", "否"],
+        # "is_ordinary_price_type":                      ["是", "否"],
+        # "source_type":                                 ["是", "否"],
+        # "chinese_mix":                                 ["是", "否"],
+        # "sub_price_type_name":                         ["高端烟", "高价位烟", "普一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
     }

+ 0 - 1
models/rank/data/dataloader.py

@@ -38,7 +38,6 @@ class DataLoader:
         scaler = StandardScaler()
         X_train[self._numeric_columns] = scaler.fit_transform(X_train[self._numeric_columns])
         X_test[self._numeric_columns] = scaler.fit_transform(X_test[self._numeric_columns])
-        print(X_test["notwithtax_adjust_price"])
         
         train_dataset = {"data": X_train, "label": y_train}
         test_dataset = {"data": X_test, "label": y_test}

+ 1 - 1
models/rank/gbdt_lr.py

@@ -24,7 +24,7 @@ class Trainer:
         self._lr_params = {
             "max_iter": 1000,
             'C': 1.0, 
-            'penalty': 'l2', 
+            'penalty': 'l1', 
             # 'l1_ratio': 0.5,  # 添加 l1_ratio 参数,可以根据需要调整
             'solver': 'sag',
             'random_state': 42,