Преглед на файлове

更新特征重要性map

yangzeyu преди 11 месеца
родител
ревизия
f4db2212a9
променени са 2 файла, в които са добавени 211 реда и са изтрити 246 реда
  1. 188 226
      models/rank/data/config.py
  2. 23 20
      models/rank/gbdt_lr_sort.py

+ 188 - 226
models/rank/data/config.py

@@ -131,7 +131,6 @@ class OrderConfig:
         # "sale_amt",                       # 销售额包
     ]
     
-
 class ShopConfig:
     FEATURE_COLUMNS = [
         "cust_code",                      # 客户编码
@@ -633,235 +632,198 @@ class ShopConfig:
     }
 
 class ImportanceFeaturesMap:
-    CUSTOM_FEATRUES_MAP = {
-        "BB_RTL_CUST_GRADE_NAME":                           "零售户分档名称",
-        "BB_RTL_CUST_MARKET_TYPE_NAME":                     "零售户市场类型名称",
-        "STORE_AREA":                                       "店铺经营面积",
-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":                   "零售户业态名称",
-        "OPERATOR_EDU_LEVEL":                               "零售客户经营者文化程",
-        "OPERATOR_AGE":                                     "经营者年龄",
-        "BB_RTL_CUST_CHAIN_FLAG":                           "零售户连锁标识",
-        "PRESENT_STAR_TERMINAL":                            "终端星级",
-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":                "零售户信用等级名称",
-        "MD04_DIR_SAL_STORE_FLAG":                          "直营店标识",
-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":                   "零售户经营范围名称",
-        "PRODUCT_INSALE_QTY":                               "在销品规数",
-        # "CUST_INVESTMENT":                                  "店铺资源投入建设",
+    CUSTOM_FEATURES_MAP = {
+        "BB_RTL_CUST_MARKET_TYPE_NAME":         "零售户市场类型名称",
+        "BB_RTL_CUST_BUSINESS_TYPE_NAME":       "零售客户业态名称",
+        "BB_RTL_CUST_CHAIN_FLAG":               "零售户连锁标识",
+        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":    "零售户信用等级名称",
+        "MD04_DIR_SAL_STORE_FLAG":              "直营店标识",
+        "BB_CUSTOMER_MANAGER_SCOPE_NAME":       "零售户经营范围名称",
+        "BB_RTL_CUST_TERMINAL_LEVEL_NAME":      "零售户终端层级名称",
+        "OPERATOR_EDU":                         "零售客户经营者文化程度",
+        "STORE_AREA":                           "店铺经营面积",
+        "OPERATOR_AGE":                         "经营者年龄",
+        "PRODUCT_INSALE_QTY":                   "在销品规数",
     }
     
     PRODUCT_FEATRUES_MAP = {
-        # ProductConfig 字段映射
-        "direct_retail_price":                              "建议零售价",
-        "is_low_tar":                                       "是否低焦油烟",
-        "tar_qty":                                          "焦油含量",
-        "is_exploding_beads":                               "是否爆珠",
-        "is_shortbranch":                                   "是否短支烟",
-        "is_medium":                                        "是否中支烟",
-        "is_tiny":                                          "是否细支",
-        "product_style_code_name":                          "包装类型名称",
-        "org_is_abnormity":                                 "是否异形包装",
-        "is_chuangxin":                                     "是否创新品类",
-        "is_key_brand":                                     "是否重点品牌",
-        "foster_level_hy":                                  "是否行业共育品规",
-        "foster_level_sj":                                  "是否省级共育品规",
-        "is_cigar":                                         "是否雪茄型卷烟",
-        "co_qty":                                           "一氧化碳含量",
-        "tbc_total_length":                                 "烟支总长度",
-        "tbc_length":                                       "烟支长度",
-        "filter_length":                                    "滤嘴长度",
-    }
-    
-    ORDER_FEATURE_MAP = {
-        "MONTH6_SALE_QTY": "近半年销量(箱)",
-        "MONTH6_SALE_AMT": "近半年销售额(万元)",
-        "MONTH6_GROSS_PROFIT_RATE": "近半年毛利率",
-        "MONTH6_SALE_QTY_YOY": "销量同比",
-        "MONTH6_SALE_QTY_MOM": "销量环比",
-        "MONTH6_SALE_AMT_YOY": "销售额(购进额)同比",
-        "MONTH6_SALE_AMT_MOM": "销售额(购进额)环比",
-        "STOCK_QTY": "库存",
-        "ORDER_FULLORDR_RATE": "订足率",
-        "ORDER_FULLORDR_RATE_MOM": "订足率环比",
-        "FULL_FILLMENT_RATE": "订单满足率",
-        "CUSTOMER_REPURCHASE_RATE": "会员重购率(部分有会员)",
-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC": "新品订货量占同价类比重",
-        "DEMAND_RATE": "需求量满足率",
-        "LISTING_RATE": "品规上架率",
-        "PUT_MARKET_FINISH_RATE": "投放完成率",
-        "OUT_STOCK_DAYS": "断货天数(部分零售商有)",
-        "UNPACKING_RATE": "拆包率",
-        "city_uuid": "城市UUID"
-    }
-    
-    POS_FEATURE_MAP = {
-        "YLT_TURNOVER_RATE": "易灵通动销率",
-        "YLT_BAR_PACKAGE_SALE_OCC": "易灵通条包销售占比",
-        "POS_PACKAGE_PRICE": "POS机单包价格",
+        "factory_name":              "产地",
+        "brand_name":                "品牌名称",
+        "is_low_tar":                "低焦油卷烟",
+        "is_medium":                 "中支烟",
+        "is_tiny":                   "细支烟",
+        "is_coarse":                 "粗支烟",
+        "is_exploding_beads":        "爆珠烟",
+        "is_abnormity":              "异形包装",
+        "is_cig":                    "雪茄烟",
+        "is_chuangxin":              "创新品类",
+        "direct_retail_price":       "卷烟建议零售价",
+        "tbc_total_length":          "烟支总长度",
+        "product_style":             "包装类型",
     }
     
     SHOPING_FEATURES_MAP = {
-        # 商圈 字段映射
-        "r_home_num": "常驻人口_居住人数",
-        "r_work_num": "常驻人口_工作人数",
-        "r_resident_num": "常驻人口_工作或居住人数",
-        "r_urban_cons_middle": "常驻人口_城市消费水平_中",
-        "r_urban_cons_low": "常驻人口_城市消费水平_低",
-        "r_urban_cons_lower": "常驻人口_城市消费水平_次低",
-        "r_urban_cons_secondhigh": "常驻人口_城市消费水平_次高",
-        "r_urban_cons_high": "常驻人口_城市消费水平_高",
-        "r_edu_junior_middle": "常驻人口_学历_初中",
-        "r_edu_doctor": "常驻人口_学历_博士",
-        "r_edu_specialty": "常驻人口_学历_大专",
-        "r_edu_primary": "常驻人口_学历_小学",
-        "r_edu_college": "常驻人口_学历_本科",
-        "r_edu_postgraduate": "常驻人口_学历_硕士",
-        "r_edu_senior_middle": "常驻人口_学历_高中",
-        "r_house_price79999": "常驻人口_居住社区房价_60000_79999",
-        "r_house_price59999": "常驻人口_居住社区房价_40000_59999",
-        "r_house_price39999": "常驻人口_居住社区房价_20000_39999",
-        "r_house_price19999": "常驻人口_居住社区房价_10000_19999",
-        "r_house_price9999": "常驻人口_居住社区房价_8000_9999",
-        "r_house_price7999": "常驻人口_居住社区房价_5000_7999",
-        "r_house_price4999": "常驻人口_居住社区房价_2000_4999",
-        "r_age_17": "常驻人口_年龄_0_17",
-        "r_age_24": "常驻人口_年龄_18_24",
-        "r_age_30": "常驻人口_年龄_25_30",
-        "r_age_35": "常驻人口_年龄_31_35",
-        "r_age_40": "常驻人口_年龄_36_40",
-        "r_age_45": "常驻人口_年龄_41_45",
-        "r_age_60": "常驻人口_年龄_46_60",
-        "r_age_over_60": "常驻人口_年龄_61以上",
-        "r_sex_woman": "常驻人口_性别_女",
-        "r_sex_man": "常驻人口_性别_男",
-        "r_catering_50": "常驻人口_餐饮消费水平_50",
-        "r_catering_100": "常驻人口_餐饮消费水平_100",
-        "r_catering_150": "常驻人口_餐饮消费水平_150",
-        "r_catering_200": "常驻人口_餐饮消费水平_200",
-        "r_catering_500": "常驻人口_餐饮消费水平_500",
-        "r_catering_over_500": "常驻人口_餐饮消费水平_500以上",
-        "r_catering_times_2": "常驻人口_餐饮消费频次_1_2",
-        "r_catering_times_4": "常驻人口_餐饮消费频次_2_4",
-        "r_catering_times_6": "常驻人口_餐饮消费频次_4_6",
-        "r_catering_times_8": "常驻人口_餐饮消费频次_6_8",
-        "r_catering_times_10": "常驻人口_餐饮消费频次_8_10",
-        "r_catering_times_11": "常驻人口_餐饮消费频次_11以上",
-        "r_native_beijing": "常驻人口_家乡地_北京市",
-        "r_native_tianjing": "常驻人口_家乡地_天津市",
-        "r_native_hebei": "常驻人口_家乡地_河北省",
-        "r_native_shanxi": "常驻人口_家乡地_山西省",
-        "r_native_neimeng": "常驻人口_家乡地_内蒙古",
-        "r_native_liaoning": "常驻人口_家乡地_辽宁省",
-        "r_native_jilin": "常驻人口_家乡地_吉林省",
-        "r_native_heilongjiang": "常驻人口_家乡地_黑龙江省",
-        "r_native_shanghai": "常驻人口_家乡地_上海市",
-        "r_native_jiangsu": "常驻人口_家乡地_江苏省",
-        "r_native_zhejiang": "常驻人口_家乡地_浙江省",
-        "r_native_anhui": "常驻人口_家乡地_安徽省",
-        "r_native_fujian": "常驻人口_家乡地_福建省",
-        "r_native_jiangix": "常驻人口_家乡地_江西省",
-        "r_native_shandong": "常驻人口_家乡地_山东省",
-        "r_native_henan": "常驻人口_家乡地_河南省",
-        "r_native_hubei": "常驻人口_家乡地_湖北省",
-        "r_native_hunan": "常驻人口_家乡地_湖南省",
-        "r_native_guangdong": "常驻人口_家乡地_广东省",
-        "r_native_hainan": "常驻人口_家乡地_海南省",
-        "r_native_sichuan": "常驻人口_家乡地_四川省",
-        "r_native_guizhou": "常驻人口_家乡地_贵州省",
-        "r_native_yunnan": "常驻人口_家乡地_云南省",
-        "r_native_shan": "常驻人口_家乡地_陕西省",
-        "r_native_gansu": "常驻人口_家乡地_甘肃省",
-        "r_native_qinghai": "常驻人口_家乡地_青海省",
-        "r_native_guangxi": "常驻人口_家乡地_广西壮族自治区",
-        "r_native_ningxia": "常驻人口_家乡地_宁夏回族自治区",
-        "r_native_xinjiang": "常驻人口_家乡地_新疆维吾尔自治区",
-        "r_native_xizang": "常驻人口_家乡地_西藏自治区",
-        "r_native_chongqing": "常驻人口_家乡地_重庆市",
-        "r_native_hongkong": "常驻人口_家乡地_香港",
-        "r_native_macao": "常驻人口_家乡地_澳门",
-        "r_native_taiwan": "常驻人口_家乡地_台湾",
-        "r_native_other": "常驻人口_家乡地_其它",
-        "f_flow_num": "流动人口_日均流动人口数量",
-        "f_holiday_flow_num": "流动人口_节假日日均流动人口数量",
-        "f_workday_flow_num": "流动人口_工作日日均流动人口数量",
-        "f_flowurban_cons_middle": "日均流动_城市消费水平_中",
-        "f_flowurban_cons_low": "日均流动_城市消费水平_低",
-        "f_flowurban_cons_lower": "日均流动_城市消费水平_次低",
-        "f_flowurban_cons_second_high": "日均流动_城市消费水平_次高",
-        "f_flowurban_cons_high": "日均流动_城市消费水平_高",
-        "f_flowedu_junior_middle": "日均流动_学历_初中",
-        "f_flowedu_doctor": "日均流动_学历_博士",
-        "f_flowedu_specialty": "日均流动_学历_大专",
-        "f_flowedu_primary": "日均流动_学历_小学",
-        "f_flowedu_college": "日均流动_学历_本科",
-        "f_flowedu_postgraduate": "日均流动_学历_硕士",
-        "f_flowedu_senior_middle": "日均流动_学历_高中",
-        "f_flowhouse_middle": "日均流动_居住社区房价_中",
-        "f_flowhouse_low": "日均流动_居住社区房价_低",
-        "f_flowhouse_lower": "日均流动_居住社区房价_次低",
-        "f_flowhouse_second_high": "日均流动_居住社区房价_次高",
-        "f_flowhouse_high": "日均流动_居住社区房价_高",
-        "f_flowage_17": "日均流动_年龄_0_17",
-        "f_flowage_24": "日均流动_年龄_18_24",
-        "f_flowage_30": "日均流动_年龄_25_30",
-        "f_flowage_35": "日均流动_年龄_31_35",
-        "f_flowage_40": "日均流动_年龄_36_40",
-        "f_flowage_45": "日均流动_年龄_41_45",
-        "f_flowage_60": "日均流动_年龄_46_60",
-        "f_flowage_over_60": "日均流动_年龄_61以上",
-        "f_flowsex_woman": "日均流动_性别_女",
-        "f_flowsex_man": "日均流动_性别_男",
-        "f_holidayurban_cons_middle": "节假日流动_城市消费水平_中",
-        "f_holidayurban_cons_low": "节假日流动_城市消费水平_低",
-        "f_holidayurban_cons_lower": "节假日流动_城市消费水平_次低",
-        "f_holidayurban_cons_secondhigh": "节假日流动_城市消费水平_次高",
-        "f_holidayurban_cons_high": "节假日流动_城市消费水平_高",
-        "f_holidayedu_junior_middle": "节假日流动_学历_初中",
-        "f_holidayedu_doctor": "节假日流动_学历_博士",
-        "f_holidayedu_specialty": "节假日流动_学历_大专",
-        "f_holidayedu_primary": "节假日流动_学历_小学",
-        "f_holidayedu_college": "节假日流动_学历_本科",
-        "f_holidayedu_postgraduate": "节假日流动_学历_硕士",
-        "f_holidayedu_senior_middle": "节假日流动_学历_高中",
-        "f_holidayhouse_middle": "节假日流动_居住社区房价_中",
-        "f_holidayhouse_low": "节假日流动_居住社区房价_低",
-        "f_holidayhouse_lower": "节假日流动_居住社区房价_次低",
-        "f_holidayhouse_second_high": "节假日流动_居住社区房价_次高",
-        "f_holidayhouse_high": "节假日流动_居住社区房价_高",
-        "f_holidayage_17": "节假日流动_年龄_0_17",
-        "f_holidayage_24": "节假日流动_年龄_18_24",
-        "f_holidayage_30": "节假日流动_年龄_25_30",
-        "f_holidayage_35": "节假日流动_年龄_31_35",
-        "f_holidayage_40": "节假日流动_年龄_36_40",
-        "f_holidayage_45": "节假日流动_年龄_41_45",
-        "f_holidayage_60": "节假日流动_年龄_46_60",
-        "f_holidayage_over_60": "节假日流动_年龄_61以上",
-        "f_holidaysex_woman": "节假日流动_性别_女",
-        "f_holidaysex_man": "节假日流动_性别_男",
-        "f_workday_urban_cons_middle": "工作日流动_城市消费水平_中",
-        "f_workday_urban_cons_low": "工作日流动_城市消费水平_低",
-        "f_workday_urban_cons_lower": "工作日流动_城市消费水平_次低",
-        "f_workday_urban_cons_secondhigh": "工作日流动_城市消费水平_次高",
-        "f_workday_urban_cons_high": "工作日流动_城市消费水平_高",
-        "f_workday_edu_junior_middle": "工作日流动_学历_初中",
-        "f_workday_edu_doctor": "工作日流动_学历_博士",
-        "f_workday_edu_specialty": "工作日流动_学历_大专",
-        "f_workday_edu_primary": "工作日流动_学历_小学",
-        "f_workday_edu_college": "工作日流动_学历_本科",
-        "f_workday_edu_postgraduate": "工作日流动_学历_硕士",
-        "f_workday_edu_senior_middle": "工作日流动_学历_高中",
-        "f_workday_house_middle": "工作日流动_居住社区房价_中",
-        "f_workday_house_low": "工作日流动_居住社区房价_低",
-        "f_workday_house_lower": "工作日流动_居住社区房价_次低",
-        "f_workday_house_second_high": "工作日流动_居住社区房价_次高",
-        "f_workday_house_high": "工作日流动_居住社区房价_高",
-        "f_workday_age_17": "工作日流动_年龄_0_17",
-        "f_workday_age_24": "工作日流动_年龄_18_24",
-        "f_workday_age_30": "工作日流动_年龄_25_30",
-        "f_workday_age_35": "工作日流动_年龄_31_35",
-        "f_workday_age_40": "工作日流动_年龄_36_40",
-        "f_workday_age_45": "工作日流动_年龄_41_45",
-        "f_workday_age_60": "工作日流动_年龄_46_60",
-        "f_workday_age_over_60": "工作日流动_年龄_61以上",
-        "f_workday_sex_woman": "工作日流动_性别_女",
-        "f_workday_sex_man": "工作日流动_性别_男"
+        "r_home_num":                          "常驻人口_居住人数",
+        "r_work_num":                          "常驻人口_工作人数",
+        "r_resident_num":                      "常驻人口_工作或居住人数",
+        "r_urban_cons_middle":                 "常驻人口_城市消费水平_中",
+        "r_urban_cons_low":                    "常驻人口_城市消费水平_低",
+        "r_urban_cons_lower":                  "常驻人口_城市消费水平_次低",
+        "r_urban_cons_secondhigh":             "常驻人口_城市消费水平_次高",
+        "r_urban_cons_high":                   "常驻人口_城市消费水平_高",
+        "r_edu_junior_middle":                 "常驻人口_学历_初中",
+        "r_edu_doctor":                        "常驻人口_学历_博士",
+        "r_edu_specialty":                     "常驻人口_学历_大专",
+        "r_edu_primary":                       "常驻人口_学历_小学",
+        "r_edu_college":                       "常驻人口_学历_本科",
+        "r_edu_postgraduate":                  "常驻人口_学历_硕士",
+        "r_edu_senior_middle":                 "常驻人口_学历_高中",
+        "r_house_price79999":                  "常驻人口_居住社区房价_60000_79999",
+        "r_house_price59999":                  "常驻人口_居住社区房价_40000_59999",
+        "r_house_price39999":                  "常驻人口_居住社区房价_20000_39999",
+        "r_house_price19999":                  "常驻人口_居住社区房价_10000_19999",
+        "r_house_price9999":                   "常驻人口_居住社区房价_8000_9999",
+        "r_house_price7999":                   "常驻人口_居住社区房价_5000_7999",
+        "r_house_price4999":                   "常驻人口_居住社区房价_2000_4999",
+        "r_age_17":                            "常驻人口_年龄_0_17",
+        "r_age_24":                            "常驻人口_年龄_18_24",
+        "r_age_30":                            "常驻人口_年龄_25_30",
+        "r_age_35":                            "常驻人口_年龄_31_35",
+        "r_age_40":                            "常驻人口_年龄_36_40",
+        "r_age_45":                            "常驻人口_年龄_41_45",
+        "r_age_60":                            "常驻人口_年龄_46_60",
+        "r_age_over_60":                       "常驻人口_年龄_61以上",
+        "r_sex_woman":                         "常驻人口_性别_女",
+        "r_sex_man":                           "常驻人口_性别_男",
+        "r_catering_50":                       "常驻人口_餐饮消费水平_50",
+        "r_catering_100":                      "常驻人口_餐饮消费水平_100",
+        "r_catering_150":                      "常驻人口_餐饮消费水平_150",
+        "r_catering_200":                      "常驻人口_餐饮消费水平_200",
+        "r_catering_500":                      "常驻人口_餐饮消费水平_500",
+        "r_catering_over_500":                 "常驻人口_餐饮消费水平_500以上",
+        "r_catering_times_2":                  "常驻人口_餐饮消费频次_1_2",
+        "r_catering_times_4":                  "常驻人口_餐饮消费频次_2_4",
+        "r_catering_times_6":                  "常驻人口_餐饮消费频次_4_6",
+        "r_catering_times_8":                  "常驻人口_餐饮消费频次_6_8",
+        "r_catering_times_10":                 "常驻人口_餐饮消费频次_8_10",
+        "r_catering_times_11":                 "常驻人口_餐饮消费频次_11以上",
+        "r_native_beijing":                    "常驻人口_家乡地_北京市",
+        "r_native_tianjing":                   "常驻人口_家乡地_天津市",
+        "r_native_hebei":                      "常驻人口_家乡地_河北省",
+        "r_native_shanxi":                     "常驻人口_家乡地_山西省",
+        "r_native_neimeng":                    "常驻人口_家乡地_内蒙古",
+        "r_native_liaoning":                   "常驻人口_家乡地_辽宁省",
+        "r_native_jilin":                      "常驻人口_家乡地_吉林省",
+        "r_native_heilongjiang":               "常驻人口_家乡地_黑龙江省",
+        "r_native_shanghai":                   "常驻人口_家乡地_上海市",
+        "r_native_jiangsu":                    "常驻人口_家乡地_江苏省",
+        "r_native_zhejiang":                   "常驻人口_家乡地_浙江省",
+        "r_native_anhui":                      "常驻人口_家乡地_安徽省",
+        "r_native_fujian":                     "常驻人口_家乡地_福建省",
+        "r_native_jiangix":                    "常驻人口_家乡地_江西省",
+        "r_native_shandong":                   "常驻人口_家乡地_山东省",
+        "r_native_henan":                      "常驻人口_家乡地_河南省",
+        "r_native_hubei":                      "常驻人口_家乡地_湖北省",
+        "r_native_hunan":                      "常驻人口_家乡地_湖南省",
+        "r_native_guangdong":                  "常驻人口_家乡地_广东省",
+        "r_native_hainan":                     "常驻人口_家乡地_海南省",
+        "r_native_sichuan":                    "常驻人口_家乡地_四川省",
+        "r_native_guizhou":                    "常驻人口_家乡地_贵州省",
+        "r_native_yunnan":                     "常驻人口_家乡地_云南省",
+        "r_native_shan":                       "常驻人口_家乡地_陕西省",
+        "r_native_gansu":                      "常驻人口_家乡地_甘肃省",
+        "r_native_qinghai":                    "常驻人口_家乡地_青海省",
+        "r_native_guangxi":                    "常驻人口_家乡地_广西壮族自治区",
+        "r_native_ningxia":                    "常驻人口_家乡地_宁夏回族自治区",
+        "r_native_xinjiang":                   "常驻人口_家乡地_新疆维吾尔自治区",
+        "r_native_xizang":                     "常驻人口_家乡地_西藏自治区",
+        "r_native_chongqing":                  "常驻人口_家乡地_重庆市",
+        "r_native_hongkong":                   "常驻人口_家乡地_香港",
+        "r_native_macao":                      "常驻人口_家乡地_澳门",
+        "r_native_taiwan":                     "常驻人口_家乡地_台湾",
+        "r_native_other":                      "常驻人口_家乡地_其它",
+        "f_flow_num":                          "流动人口_工作日_日均流动人口数量",
+        "f_holiday_flow_num":                  "流动人口_节假日_日均流动人口数量",
+        "f_workday_flow_num":                  "流动人口_日均流动人口数量",
+        "f_flowurban_cons_middle":             "日均流动_城市消费水平_中",
+        "f_flowurban_cons_low":                "日均流动_城市消费水平_低",
+        "f_flowurban_cons_lower":              "日均流动_城市消费水平_次低",
+        "f_flowurban_cons_second_high":        "日均流动_城市消费水平_次高",
+        "f_flowurban_cons_high":               "日均流动_城市消费水平_高",
+        "f_flowedu_junior_middle":             "日均流动_学历_初中",
+        "f_flowedu_doctor":                    "日均流动_学历_博士",
+        "f_flowedu_specialty":                 "日均流动_学历_大专",
+        "f_flowedu_primary":                   "日均流动_学历_小学",
+        "f_flowedu_college":                   "日均流动_学历_本科",
+        "f_flowedu_postgraduate":              "日均流动_学历_硕士",
+        "f_flowedu_senior_middle":             "日均流动_学历_高中",
+        "f_flowhouse_middle":                  "日均流动_居住社区房价_中",
+        "f_flowhouse_low":                     "日均流动_居住社区房价_低",
+        "f_flowhouse_lower":                   "日均流动_居住社区房价_次低",
+        "f_flowhouse_second_high":             "日均流动_居住社区房价_次高",
+        "f_flowhouse_high":                    "日均流动_居住社区房价_高",
+        "f_flowage_17":                        "日均流动_年龄_0_17",
+        "f_flowage_24":                        "日均流动_年龄_18_24",
+        "f_flowage_30":                        "日均流动_年龄_25_30",
+        "f_flowage_35":                        "日均流动_年龄_31_35",
+        "f_flowage_40":                        "日均流动_年龄_36_40",
+        "f_flowage_45":                        "日均流动_年龄_41_45",
+        "f_flowage_60":                        "日均流动_年龄_46_60",
+        "f_flowage_over_60":                   "日均流动_年龄_61以上",
+        "f_flowsex_woman":                     "日均流动_性别_女",
+        "f_flowsex_man":                       "日均流动_性别_男",
+        "f_holidayurban_cons_middle":          "节假日流动_城市消费水平_中",
+        "f_holidayurban_cons_low":             "节假日流动_城市消费水平_低",
+        "f_holidayurban_cons_lower":           "节假日流动_城市消费水平_次低",
+        "f_holidayurban_cons_secondhigh":      "节假日流动_城市消费水平_次高",
+        "f_holidayurban_cons_high":            "节假日流动_城市消费水平_高",
+        "f_holidayedu_junior_middle":          "节假日流动_学历_初中",
+        "f_holidayedu_doctor":                 "节假日流动_学历_博士",
+        "f_holidayedu_specialty":              "节假日流动_学历_大专",
+        "f_holidayedu_primary":                "节假日流动_学历_小学",
+        "f_holidayedu_college":                "节假日流动_学历_本科",
+        "f_holidayedu_postgraduate":           "节假日流动_学历_硕士",
+        "f_holidayedu_senior_middle":          "节假日流动_学历_高中",
+        "f_holidayhouse_middle":               "节假日流动_居住社区房价_中",
+        "f_holidayhouse_low":                  "节假日流动_居住社区房价_低",
+        "f_holidayhouse_lower":                "节假日流动_居住社区房价_次低",
+        "f_holidayhouse_second_high":          "节假日流动_居住社区房价_次高",
+        "f_holidayhouse_high":                 "节假日流动_居住社区房价_高",
+        "f_holidayage_17":                     "节假日流动_年龄_0_17",
+        "f_holidayage_24":                     "节假日流动_年龄_18_24",
+        "f_holidayage_30":                     "节假日流动_年龄_25_30",
+        "f_holidayage_35":                     "节假日流动_年龄_31_35",
+        "f_holidayage_40":                     "节假日流动_年龄_36_40",
+        "f_holidayage_45":                     "节假日流动_年龄_41_45",
+        "f_holidayage_60":                     "节假日流动_年龄_46_60",
+        "f_holidayage_over_60":                "节假日流动_年龄_61以上",
+        "f_holidaysex_woman":                  "节假日流动_性别_女",
+        "f_holidaysex_man":                    "节假日流动_性别_男",
+        "f_workday_urban_cons_middle":         "工作日流动_城市消费水平_中",
+        "f_workday_urban_cons_low":            "工作日流动_城市消费水平_低",
+        "f_workday_urban_cons_lower":          "工作日流动_城市消费水平_次低",
+        "f_workday_urban_cons_secondhigh":     "工作日流动_城市消费水平_次高",
+        "f_workday_urban_cons_high":           "工作日流动_城市消费水平_高",
+        "f_workday_edu_junior_middle":         "工作日流动_学历_初中",
+        "f_workday_edu_doctor":                "工作日流动_学历_博士",
+        "f_workday_edu_specialty":             "工作日流动_学历_大专",
+        "f_workday_edu_primary":               "工作日流动_学历_小学",
+        "f_workday_edu_college":               "工作日流动_学历_本科",
+        "f_workday_edu_postgraduate":          "工作日流动_学历_硕士",
+        "f_workday_edu_senior_middle":         "工作日流动_学历_高中",
+        "f_workday_house_middle":              "工作日流动_居住社区房价_中",
+        "f_workday_house_low":                 "工作日流动_居住社区房价_低",
+        "f_workday_house_lower":               "工作日流动_居住社区房价_次低",
+        "f_workday_house_second_high":         "工作日流动_居住社区房价_次高",
+        "f_workday_house_high":                "工作日流动_居住社区房价_高",
+        "f_workday_age_17":                    "工作日流动_年龄_0_17",
+        "f_workday_age_24":                    "工作日流动_年龄_18_24",
+        "f_workday_age_30":                    "工作日流动_年龄_25_30",
+        "f_workday_age_35":                    "工作日流动_年龄_31_35",
+        "f_workday_age_40":                    "工作日流动_年龄_36_40",
+        "f_workday_age_45":                    "工作日流动_年龄_41_45",
+        "f_workday_age_60":                    "工作日流动_年龄_46_60",
+        "f_workday_age_over_60":               "工作日流动_年龄_61以上",
+        "f_workday_sex_woman":                 "工作日流动_性别_女",
+        "f_workday_sex_man":                   "工作日流动_性别_男",
     }

+ 23 - 20
models/rank/gbdt_lr_sort.py

@@ -1,7 +1,7 @@
 import joblib
 # from dao import Redis, get_product_by_id, get_custs_by_ids, load_cust_data_from_mysql
 from database import RedisDatabaseHelper, MySqlDao
-from models.rank.data import ProductConfig, CustConfig, ImportanceFeaturesMap
+from models.rank.data import ProductConfig, CustConfig, ShopConfig, ImportanceFeaturesMap
 from models.rank.data.utils import one_hot_embedding, sample_data_clear
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
@@ -15,7 +15,6 @@ class GbdtLrModel:
         self._mysql_dao = MySqlDao()
     
     def load_model(self, model_path):
-        self._modelname = os.path.basename(model_path).split(".")[0]
         models = joblib.load(model_path)
         self.gbdt_model, self.lr_model, self.onehot_encoder = models["gbdt_model"], models["lr_model"], models["onehot_encoder"]
         
@@ -88,8 +87,9 @@ class GbdtLrModel:
         
         importance_dict = dict(zip(feats_names, feats_importance))
         
-        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}
+        onehot_feats = {**CustConfig.ONEHOT_CAT, **ShopConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT}
         for feat, categories in onehot_feats.items():
+            
             related_columns = [col for col in feats_names if col.startswith(feat)]
             if related_columns:
                 # 合并类别重要性
@@ -106,32 +106,35 @@ class GbdtLrModel:
         # 输出特征重要性
         cust_features_importance = []
         product_features_importance = []
-        order_features_importance = []
         
         for feat, importance in sorted_importance:
-            if feat in list(ImportanceFeaturesMap.CUSTOM_FEATRUES_MAP.keys()):
-                cust_features_importance.append({ImportanceFeaturesMap.CUSTOM_FEATRUES_MAP[feat]: float(importance)})
+            if feat in list(ImportanceFeaturesMap.CUSTOM_FEATURES_MAP.keys()):
+                cust_features_importance.append({ImportanceFeaturesMap.CUSTOM_FEATURES_MAP[feat]: float(importance)})
+            if feat in list(ImportanceFeaturesMap.SHOPING_FEATURES_MAP.keys()):
+                cust_features_importance.append({ImportanceFeaturesMap.SHOPING_FEATURES_MAP[feat]: float(importance)})
             if feat in list(ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP.keys()):
                 product_features_importance.append({ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[feat]: float(importance)})
-            if feat in list(ImportanceFeaturesMap.ORDER_FEATURE_MAP.keys()):
-                order_features_importance.append({ImportanceFeaturesMap.ORDER_FEATURE_MAP[feat]: float(importance)})
-                
-            # 零消特征重要性
-            if self._modelname == 'pos_model' and feat in list(ImportanceFeaturesMap.POS_FEATURE_MAP.keys()):
-                order_features_importance.append({ImportanceFeaturesMap.POS_FEATURE_MAP[feat]: float(importance)})
                 
-            # 商圈特征重要性
-            if self._modelname == 'shopping_model' and feat in list(ImportanceFeaturesMap.SHOPING_FEATURES_MAP.keys()):
-                cust_features_importance.append({ImportanceFeaturesMap.SHOPING_FEATURES_MAP[feat]: float(importance)})
-        return cust_features_importance, product_features_importance, order_features_importance
+        return cust_features_importance, product_features_importance
     
 if __name__ == "__main__":
-    model_path = "./models/rank/weights/00000000000000000000000011445301/shopping_model.pkl"
+    model_path = "./models/rank/weights/00000000000000000000000011445301/gbdtlr_model.pkl"
     city_uuid = "00000000000000000000000011445301"
     product_id = "110102"
     gbdt_sort = GbdtLrModel(model_path)
     # gbdt_sort.sort(city_uuid, product_id)
     
-    importances = gbdt_sort.generate_feats_importance()
-    for importance in importances:
-        print(importance)
+    cust_features_importance,  product_features_importance = gbdt_sort.generate_feats_importance()
+
+    cust_df = pd.DataFrame([
+        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
+        for item in cust_features_importance
+    ])
+    cust_df.to_csv("./data/cust_feats.csv", index=False)
+    
+    product_df = pd.DataFrame([
+        {"Features": list(item.keys())[0], "Importance": list(item.values())[0]}
+        for item in product_features_importance
+    ])
+    product_df.to_csv("./data/product_feats.csv", index=False)
+