瀏覽代碼

过滤推荐列表中的歇业商户

Sherlock 10 月之前
父節點
當前提交
7e4227c588
共有 5 個文件被更改,包括 31 次插入538 次删除
  1. 1 1
      config/service_config.yaml
  2. 13 22
      database/dao/mysql_dao.py
  3. 0 511
      models/rank/data/config_ori.py
  4. 1 1
      models/rank/gbdt_lr_inference.py
  5. 16 3
      models/recommend.py

+ 1 - 1
config/service_config.yaml

@@ -1,2 +1,2 @@
 aliyun:
-  upload_url: "https://10.79.117.86/screen/mapi/file/fileUpload"
+  upload_url: "http://10.79.117.86/screen/mapi/file/fileUpload"

+ 13 - 22
database/dao/mysql_dao.py

@@ -82,15 +82,6 @@ class MySqlDao:
         
         return data
     
-    def get_cust_list(self, city_uuid):
-        """获取商户列表"""
-        data = self.load_cust_data(city_uuid)
-        cust_list = data["BB_RETAIL_CUSTOMER_CODE"].to_list()
-        if len(cust_list) == 0:
-            return []
-        
-        return cust_list
-    
     def get_product_by_id(self, city_uuid, product_id):
         """根据city_uuid 和 product_id 从表中获取拼柜信息"""
         query = f"""
@@ -229,6 +220,16 @@ class MySqlDao:
         
         return data
     
+    def get_cust_list(self, city_uuid):
+        query = f"SELECT DISTINCT BB_RETAIL_CUSTOMER_CODE FROM {self._cust_tablename} WHERE BA_CITY_ORG_CODE = :city_uuid"
+        params = {"city_uuid": city_uuid}
+        
+        data = pd.DataFrame(self.db_helper.fetch_all(text(query), params))
+        
+        cust_list = data["BB_RETAIL_CUSTOMER_CODE"].tolist()
+        
+        return cust_list
+    
     def data_preprocess(self, data: pd.DataFrame):
         
         data.drop(["cust_uuid", "longitude", "latitude", "range_radius"], axis=1, inplace=True)
@@ -248,16 +249,6 @@ if __name__ == "__main__":
     dao = MySqlDao()
     city_uuid = "00000000000000000000000011445301"
     
-    order_data = dao.load_order_data(city_uuid)
-    order_data["sale_qty"] = order_data["sale_qty"].fillna(0)
-    order_data = order_data.infer_objects(copy=False)
-        
-        # 将销售量进行分组求和
-    order_data = order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()
-    
-    cust_data = dao.load_cust_data(city_uuid)
-    cust_data = cust_data["BB_RETAIL_CUSTOMER_NAME"]
-    
-    sale_data = order_data.merge(cust_data, left_on='cust_code', right_on='BB_RETAIL_CUSTOMER_CODE', how="inner")
-    
-    sale_data.to_csv("./data/sale.csv", index=False)
+    cust_list = dao.get_cust_list(city_uuid)["BB_RETAIL_CUSTOMER_CODE"].tolist()
+    for i in cust_list:
+        print(i)

+ 0 - 511
models/rank/data/config_ori.py

@@ -1,511 +0,0 @@
-class CustConfig:
-    FEATURE_COLUMNS = [
-        "BB_RETAIL_CUSTOMER_CODE",                     # 零售户代码
-        "BB_RTL_CUST_GRADE_NAME",                      # 零售户分档名称
-        "BB_RTL_CUST_MARKET_TYPE_NAME",                # 零售户市场类型名称
-        "STORE_AREA",                                  # 店铺经营面积
-        "BB_RTL_CUST_BUSINESS_TYPE_NAME",              # 零售户业态名称
-        "OPERATOR_EDU_LEVEL",                          # 零售客户经营者文化程
-        "OPERATOR_AGE",                                # 经营者年龄
-        "BB_RTL_CUST_CHAIN_FLAG",                      # 零售户连锁标识
-        "PRESENT_STAR_TERMINAL",                       # 终端星级
-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME",           # 零售户信用等级名称
-        "MD04_DIR_SAL_STORE_FLAG",                     # 直营店标识
-        "BB_CUSTOMER_MANAGER_SCOPE_NAME",              # 零售户经营范围名称
-        "PRODUCT_INSALE_QTY",                          # 在销品规数
-        # "CUST_INVESTMENT",                             # 店铺资源投入建设
-        
-        # "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",       # 新品订货量占同价类比重
-        # "PRODUCT_LISTING_RATE",                        # 品规上架率
-        # "STOCKOUT_DAYS",                              # 断货天数
-        # "YLT_TURNOVER_RATE",                           # 易灵通动销率
-        # "YLT_BAR_PACKAGE_SALE_OCC",                    # 易灵通条包销售占比
-        # "UNPACKING_RATE",                              # 拆包率
-        
-        
-        # "BB_RTL_CUST_POSITION_TYPE_NAME",              # 零售户商圈类型名称
-        
-        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME",             # 零售户业态细分名称
-        
-        # "BB_RTL_CUST_TERMINAL_LEVEL_NAME",             # 零售户终端层级名称
-        # "BB_RTL_CUST_TERMINALEVEL_NAME",               # 零售户终端层级细分名称
-        # "MD04_MG_SAMPLE_CUST_FLAG",                    # 样本户标识
-        # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG",            # 零售户大户标识
-        # "BB_RTL_CUST_OPERATE_METHOD_NAME",             # 零售户经营方式名称
-        # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME",          # 零售户卷烟经营规模名称
-        
-        # "AVERAGE_CONSUMER_FLOW",                       # 月均消费人流
-        # "NEW_PRODUCT_MEMBERS_QTY",                     # 新品消费会员数量
-    ]
-    # 数据清洗规则
-    CLEANING_RULES = {
-        "BB_RTL_CUST_GRADE_NAME":                   {"method": "fillna", "opt": "fill", "value": "十五档", "type": "str"},
-        "BB_RTL_CUST_MARKET_TYPE_NAME":             {"method": "fillna", "opt": "fill", "value": "城网", "type": "str"},
-        "STORE_AREA":                               {"method": "fillna", "opt": "mean", "type": "num"},
-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
-        "OPERATOR_EDU_LEVEL":                       {"method": "fillna", "opt": "fill", "value": "无数据", "type": "str"},
-        "OPERATOR_AGE":                             {"method": "fillna", "opt": "mean", "type": "num"},
-        "BB_RTL_CUST_CHAIN_FLAG":                   {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
-        "PRESENT_STAR_TERMINAL":                    {"method": "fillna", "opt": "fill", "value": "非星级", "type": "str"},
-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        {"method": "fillna", "opt": "fill", "value": "B", "type": "str"},
-        "MD04_DIR_SAL_STORE_FLAG":                  {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":           {"method": "fillna", "opt": "fill", "value": "否", "type": "str"},
-        "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},
-        # "CUST_INVESTMENT":                          {"method": "fillna", "opt": "fill", "type": 0}
-        
-        
-        # "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC":    {"method": "fillna", "opt": "mean", "type": "num"},
-        # "PRODUCT_LISTING_RATE":                     {"method": "fillna", "opt": "mean", "type": "num"},
-        # "STOCKOUT_DAYS":                            {"method": "fillna", "opt": "mean", "type": "num"},
-        # "YLT_TURNOVER_RATE":                        {"method": "fillna", "opt": "mean", "type": "num"},
-        # "NEW_PRODUCT_MEMBERS_QTY":                  {"method": "fillna", "opt": "mean", "type": "num"},
-        # "PRODUCT_INSALE_QTY":                       {"method": "fillna", "opt": "mean", "type": "num"},
-        # "UNPACKING_RATE":                           {"method": "fillna", "opt": "mean", "type": "num"},
-        
-        
-        
-        
-        # "BB_RTL_CUST_POSITION_TYPE_NAME":           {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
-        # "BB_RTL_CUST_SUB_BUSI_PLACE_NAME":          {"method": "fillna", "opt": "fill", "value": "其他", "type": "str"},
-        # "BB_RTL_CUST_TERMINALEVEL_NAME":          {"method": "fillna", "opt": "replace", "value": "BB_RTL_CUST_TERMINAL_LEVEL_NAME", "type": "str"},
-        # "MD04_MG_SAMPLE_CUST_FLAG":                 {"method": "fillna", "value": "N", "opt": "fill"},
-        # "MD07_RTL_CUST_IS_SALE_LARGE_FLAG":         {"method": "fillna", "value": "N", "opt": "fill"},
-        # "BB_RTL_CUST_CGT_OPERATE_SCOPE_NAME":       {"method": "fillna", "value": "中", "opt": "fill"},
-    }
-    
-    ONEHOT_CAT = {
-        "BB_RTL_CUST_GRADE_NAME":                   ['一档', '二档', '三档', '四档', '五档', '六档', '七档', '八档', '九档', '十档', '十一档', '十二档', 
-                                                    '十三档', '十四档', '十五档', '十六档', '十七档', '十八档', '十九档', '二十档', '二十一档', '二十二档', 
-                                                    '二十三档', '二十四档', '二十五档', '二十六档', '二十七档', '二十八档', '二十九档', '三十档'],
-        "BB_RTL_CUST_MARKET_TYPE_NAME":             ["城网", "农网"],
-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":           ["便利店", "超市", "烟草专业店", "娱乐服务类", "其他"],
-        "OPERATOR_EDU_LEVEL":                       [1, 2, 3, 4, 5, 6, 7, "无数据"],
-        "BB_RTL_CUST_CHAIN_FLAG":                   ["是", "否"],
-        "PRESENT_STAR_TERMINAL":                    ["一星", "二星", "三星", "四星", "五星", "非星级"],
-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":        ["AAA", "AA", "A", "B", "C", "D"],
-        "MD04_DIR_SAL_STORE_FLAG":                  ["是", "否"],
-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":           ["是", "否"],
-        
-        
-        
-        # "BB_RTL_CUST_POSITION_TYPE_NAME":           ["居民区", "商业娱乐区", "交通枢纽区", "旅游景区", "工业区", "集贸区", "院校学区", "办公区", "其他"]
-    }
-    
-class ProductConfig:
-    FEATURE_COLUMNS = [
-        "product_code",                                # 商品编码
-        "direct_retail_price",                         # 建议零售价
-        "is_low_tar",                                  # 是否低焦油烟
-        "tar_qty",                                     # 焦油含量
-        "is_exploding_beads",                          # 是否爆珠
-        "is_shortbranch",                              # 是否短支烟
-        "is_medium",                                   # 是否中支烟
-        "is_tiny",                                     # 是否细支
-        "product_style_code_name",                     # 包装类型名称
-        "org_is_abnormity",                            # 是否异形包装
-        "is_chuangxin",                                # 是否创新品类
-        "is_key_brand",                                # 是否重点品牌
-        "foster_level_hy",                             # 是否行业共育品规
-        "foster_level_sj",                             # 是否省级共育品规
-        "is_cigar",                                    # 是否雪茄型卷烟
-        "co_qty",                                      # 一氧化碳含量
-        "tbc_total_length",                            # 烟支总长度
-        "tbc_length",                                  # 烟支长度
-        "filter_length",                               # 滤嘴长度
-        
-
-        
-        # "adjust_price",                                # 含税调拨价
-        # "notwithtax_adjust_price",                     # 不含税调拨价
-        # "whole_sale_price",                            # 统一批发价
-        # "allot_price",                                 # 调拨价
-        # "direct_whole_price",                          # 批发指导价
-        # "retail_price",                                # 零售价
-        # "price_type_name",                             # 卷烟价类名称
-        # "gear_type_name",                              # 卷烟档位名称
-        # "category_type_name",                          # 卷烟品类名称
-        # "is_high_level",                               # 是否高端烟
-        # "is_upscale_level",                            # 是否高端烟不含高价
-        # "is_high_price",                               # 是否高价烟
-        # "is_low_price",                                # 是否低价烟
-        # "is_encourage",                                # 是否全国鼓励品牌
-        # "is_abnormity",                                # 是否异形包装
-        # "is_intake",                                   # 是否进口烟
-        # "is_short",                                    # 是否紧俏品牌
-        # "is_ordinary_price_type",                      # 是否普一类烟
-        # "source_type",                                 # 来源类型
-        # "chinese_mix",                                 # 中式混合
-        # "sub_price_type_name",                         # 细分卷烟价类名称
-    ]
-    
-    CLEANING_RULES = {
-        "direct_retail_price":                         {"method": "fillna", "opt": "mean", "type": "num"},
-        "is_low_tar":                                  {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "tar_qty":                                     {"method": "fillna", "opt": "mean", "type": "num"},
-        "is_exploding_beads":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_shortbranch":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_medium":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_tiny":                                     {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "product_style_code_name":                     {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        "org_is_abnormity":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_chuangxin":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_key_brand":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "foster_level_hy":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "foster_level_sj":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "is_cigar":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        "co_qty":                                      {"method": "fillna", "opt": "mean", "type": "num"},
-        "tbc_total_length":                            {"method": "fillna", "opt": "mean", "type": "num"},
-        "tbc_length":                                  {"method": "fillna", "opt": "mean", "type": "num"},
-        "filter_length":                               {"method": "fillna", "opt": "mean", "type": "num"},
-        
-        
-        # "adjust_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
-        # "notwithtax_adjust_price":                     {"method": "fillna", "opt": "mean", "type": "num"},
-        # "whole_sale_price":                            {"method": "fillna", "opt": "mean", "type": "num"},
-        # "allot_price":                                 {"method": "fillna", "opt": "fill", "type": "num", "value": 0.0},
-        # "direct_whole_price":                          {"method": "fillna", "opt": "mean", "type": "num"},
-        # "retail_price":                                {"method": "fillna", "opt": "mean", "type": "num"},
-        # "price_type_name":                             {"method": "fillna", "opt": "fill", "type": "str", "value": "一类烟"},
-        # "gear_type_name":                              {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        # "category_type_name":                          {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        # "is_high_level":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_upscale_level":                            {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_high_price":                               {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_low_price":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_encourage":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_abnormity":                                {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_intake":                                   {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_short":                                    {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "is_ordinary_price_type":                      {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "source_type":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "其他"},
-        # "chinese_mix":                                 {"method": "fillna", "opt": "fill", "type": "str", "value": "否"},
-        # "sub_price_type_name":                         {"method": "fillna", "opt": "fill", "type": "str", "value": "普一类烟"},
-    }
-    
-
-    ONEHOT_CAT = {
-        "is_low_tar":                                  ["是", "否"],
-        "is_exploding_beads":                          ["是", "否"],
-        "is_shortbranch":                              ["是", "否"],
-        "is_medium":                                   ["是", "否"],
-        "is_tiny":                                     ["是", "否"],
-        "product_style_code_name":                     ["条盒硬盒", "条包硬盒", "条盒软盒", "条包软盒", "铁盒", "其他"],
-        "org_is_abnormity":                            ["是", "否"],
-        "is_chuangxin":                                ["是", "否"],
-        "is_key_brand":                                ["是", "否"],
-        "foster_level_hy":                             ["是", "否"],
-        "foster_level_sj":                             ["是", "否"],
-        "is_cigar":                                    ["是", "否"],
-        
-        
-        
-        # "price_type_name":                             ["一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
-        # "gear_type_name":                              ["第一档位", "第二档位", "第三档位", "第四档位", "第五档位", "第六档位", "第七档位", "第八档位", "其他"],
-        # "category_type_name":                          ["第1品类", "第2品类", "第3品类", "第4品类", "第5品类", "第6品类", "第7品类", 
-        #                                                 "第8品类", "第9品类", "第10品类", "第11品类", "第12品类", "第13品类", "其他"],
-        # "is_high_level":                               ["是", "否"],
-        # "is_upscale_level":                            ["是", "否"],
-        # "is_high_price":                               ["是", "否"],
-        # "is_low_price":                                ["是", "否"],
-        # "is_encourage":                                ["是", "否"],
-        # "is_abnormity":                                ["是", "否"],
-        # "is_intake":                                   ["是", "否"],
-        # "is_short":                                    ["是", "否"],
-        # "is_ordinary_price_type":                      ["是", "否"],
-        # "source_type":                                 ["是", "否"],
-        # "chinese_mix":                                 ["是", "否"],
-        # "sub_price_type_name":                         ["高端烟", "高价位烟", "普一类烟", "二类烟", "三类烟", "四类烟", "五类烟", "无价类"],
-    }
-    
-class OrderConfig:
-    FEATURE_COLUMNS = [
-        "BB_RETAIL_CUSTOMER_CODE",                          # 零售户编码
-        "PRODUCT_CODE",                                     # 卷烟编码
-        "MONTH6_SALE_QTY",                                  # 近半年销量(箱)
-        "MONTH6_SALE_AMT",                                  # 近半年销售额(万元)
-        "MONTH6_GROSS_PROFIT_RATE",                         # 近半年毛利率
-        "MONTH6_SALE_QTY_YOY",                              # 销售量同比
-        "MONTH6_SALE_QTY_MOM",                              # 销售量环比
-        "MONTH6_SALE_AMT_YOY",                              # 销售额(购进额)同比
-        "MONTH6_SALE_AMT_MOM",                              # 销售额(购进额)环比
-        "STOCK_QTY",                                        # 库存
-        "ORDER_FULLORDR_RATE",                              # 订足率
-        "FULL_FILLMENT_RATE",                               # 订单满足率
-        "ORDER_FULLORDR_RATE_MOM",                          # 订足率环比
-        "CUSTOMER_REPURCHASE_RATE",                         # 会员重购率  
-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",            # 新品订货量占同价类比重/decimal(18,6)
-        "DEMAND_RATE",                                      # 需求量满足率
-        "LISTING_RATE",                                     # 品规商上架率
-        "PUT_MARKET_FINISH_RATE",                           # 投放完成率
-        "OUT_STOCK_DAYS",                                   # 断货天数
-        "YLT_TURNOVER_RATE",                                # 易灵通动销率
-        "YLT_BAR_PACKAGE_SALE_OCC",                         # 易灵通调包销售占比
-        "UNPACKING_RATE",                                   # 拆包率
-        "POS_PACKAGE_PRICE",                                # pos机单包价格
-    ]
-    
-    CLEANING_FEATURES = [
-        "MONTH6_SALE_QTY",
-        "MONTH6_SALE_AMT",
-        "MONTH6_GROSS_PROFIT_RATE",
-        "MONTH6_SALE_QTY_YOY",
-        "MONTH6_SALE_QTY_MOM",
-        "MONTH6_SALE_AMT_YOY",
-        "MONTH6_SALE_AMT_MOM",
-        "STOCK_QTY",
-        "ORDER_FULLORDR_RATE",
-        "FULL_FILLMENT_RATE",
-        "ORDER_FULLORDR_RATE_MOM",
-        "CUSTOMER_REPURCHASE_RATE",
-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",
-        "DEMAND_RATE",
-        "LISTING_RATE",
-        "PUT_MARKET_FINISH_RATE",
-        "OUT_STOCK_DAYS",
-        "UNPACKING_RATE",
-    ]
-    
-    WEIGHTS = {
-        "MONTH6_SALE_QTY":                                  0.15,
-        "MONTH6_SALE_QTY_MOM":                              0.2,
-        "ORDER_FULLORDR_RATE":                              0.3,
-        "ORDER_FULLORDR_RATE_MOM":                          0.35,
-    }
-    
-    POSFEATURES = [
-        "YLT_TURNOVER_RATE","YLT_BAR_PACKAGE_SALE_OCC","POS_PACKAGE_PRICE"
-    ]
-    
-class ImportanceFeaturesMap:
-    CUSTOM_FEATRUES_MAP = {
-        "BB_RTL_CUST_GRADE_NAME":                           "零售户分档名称",
-        "BB_RTL_CUST_MARKET_TYPE_NAME":                     "零售户市场类型名称",
-        "STORE_AREA":                                       "店铺经营面积",
-        "BB_RTL_CUST_BUSINESS_TYPE_NAME":                   "零售户业态名称",
-        "OPERATOR_EDU_LEVEL":                               "零售客户经营者文化程",
-        "OPERATOR_AGE":                                     "经营者年龄",
-        "BB_RTL_CUST_CHAIN_FLAG":                           "零售户连锁标识",
-        "PRESENT_STAR_TERMINAL":                            "终端星级",
-        "MD04_MG_RTL_CUST_CREDITCLASS_NAME":                "零售户信用等级名称",
-        "MD04_DIR_SAL_STORE_FLAG":                          "直营店标识",
-        "BB_CUSTOMER_MANAGER_SCOPE_NAME":                   "零售户经营范围名称",
-        "PRODUCT_INSALE_QTY":                               "在销品规数",
-        # "CUST_INVESTMENT":                                  "店铺资源投入建设",
-    }
-    
-    PRODUCT_FEATRUES_MAP = {
-        # ProductConfig 字段映射
-        "direct_retail_price":                              "建议零售价",
-        "is_low_tar":                                       "是否低焦油烟",
-        "tar_qty":                                          "焦油含量",
-        "is_exploding_beads":                               "是否爆珠",
-        "is_shortbranch":                                   "是否短支烟",
-        "is_medium":                                        "是否中支烟",
-        "is_tiny":                                          "是否细支",
-        "product_style_code_name":                          "包装类型名称",
-        "org_is_abnormity":                                 "是否异形包装",
-        "is_chuangxin":                                     "是否创新品类",
-        "is_key_brand":                                     "是否重点品牌",
-        "foster_level_hy":                                  "是否行业共育品规",
-        "foster_level_sj":                                  "是否省级共育品规",
-        "is_cigar":                                         "是否雪茄型卷烟",
-        "co_qty":                                           "一氧化碳含量",
-        "tbc_total_length":                                 "烟支总长度",
-        "tbc_length":                                       "烟支长度",
-        "filter_length":                                    "滤嘴长度",
-    }
-    
-    ORDER_FEATURE_MAP = {
-        "MONTH6_SALE_QTY": "近半年销量(箱)",
-        "MONTH6_SALE_AMT": "近半年销售额(万元)",
-        "MONTH6_GROSS_PROFIT_RATE": "近半年毛利率",
-        "MONTH6_SALE_QTY_YOY": "销量同比",
-        "MONTH6_SALE_QTY_MOM": "销量环比",
-        "MONTH6_SALE_AMT_YOY": "销售额(购进额)同比",
-        "MONTH6_SALE_AMT_MOM": "销售额(购进额)环比",
-        "STOCK_QTY": "库存",
-        "ORDER_FULLORDR_RATE": "订足率",
-        "ORDER_FULLORDR_RATE_MOM": "订足率环比",
-        "FULL_FILLMENT_RATE": "订单满足率",
-        "CUSTOMER_REPURCHASE_RATE": "会员重购率(部分有会员)",
-        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC": "新品订货量占同价类比重",
-        "DEMAND_RATE": "需求量满足率",
-        "LISTING_RATE": "品规上架率",
-        "PUT_MARKET_FINISH_RATE": "投放完成率",
-        "OUT_STOCK_DAYS": "断货天数(部分零售商有)",
-        "UNPACKING_RATE": "拆包率",
-        "city_uuid": "城市UUID"
-    }
-    
-    POS_FEATURE_MAP = {
-        "YLT_TURNOVER_RATE": "易灵通动销率",
-        "YLT_BAR_PACKAGE_SALE_OCC": "易灵通条包销售占比",
-        "POS_PACKAGE_PRICE": "POS机单包价格",
-    }
-    
-    SHOPING_FEATURES_MAP = {
-        # 商圈 字段映射
-        "r_home_num": "常驻人口_居住人数",
-        "r_work_num": "常驻人口_工作人数",
-        "r_resident_num": "常驻人口_工作或居住人数",
-        "r_urban_cons_middle": "常驻人口_城市消费水平_中",
-        "r_urban_cons_low": "常驻人口_城市消费水平_低",
-        "r_urban_cons_lower": "常驻人口_城市消费水平_次低",
-        "r_urban_cons_secondhigh": "常驻人口_城市消费水平_次高",
-        "r_urban_cons_high": "常驻人口_城市消费水平_高",
-        "r_edu_junior_middle": "常驻人口_学历_初中",
-        "r_edu_doctor": "常驻人口_学历_博士",
-        "r_edu_specialty": "常驻人口_学历_大专",
-        "r_edu_primary": "常驻人口_学历_小学",
-        "r_edu_college": "常驻人口_学历_本科",
-        "r_edu_postgraduate": "常驻人口_学历_硕士",
-        "r_edu_senior_middle": "常驻人口_学历_高中",
-        "r_house_price79999": "常驻人口_居住社区房价_60000_79999",
-        "r_house_price59999": "常驻人口_居住社区房价_40000_59999",
-        "r_house_price39999": "常驻人口_居住社区房价_20000_39999",
-        "r_house_price19999": "常驻人口_居住社区房价_10000_19999",
-        "r_house_price9999": "常驻人口_居住社区房价_8000_9999",
-        "r_house_price7999": "常驻人口_居住社区房价_5000_7999",
-        "r_house_price4999": "常驻人口_居住社区房价_2000_4999",
-        "r_age_17": "常驻人口_年龄_0_17",
-        "r_age_24": "常驻人口_年龄_18_24",
-        "r_age_30": "常驻人口_年龄_25_30",
-        "r_age_35": "常驻人口_年龄_31_35",
-        "r_age_40": "常驻人口_年龄_36_40",
-        "r_age_45": "常驻人口_年龄_41_45",
-        "r_age_60": "常驻人口_年龄_46_60",
-        "r_age_over_60": "常驻人口_年龄_61以上",
-        "r_sex_woman": "常驻人口_性别_女",
-        "r_sex_man": "常驻人口_性别_男",
-        "r_catering_50": "常驻人口_餐饮消费水平_50",
-        "r_catering_100": "常驻人口_餐饮消费水平_100",
-        "r_catering_150": "常驻人口_餐饮消费水平_150",
-        "r_catering_200": "常驻人口_餐饮消费水平_200",
-        "r_catering_500": "常驻人口_餐饮消费水平_500",
-        "r_catering_over_500": "常驻人口_餐饮消费水平_500以上",
-        "r_catering_times_2": "常驻人口_餐饮消费频次_1_2",
-        "r_catering_times_4": "常驻人口_餐饮消费频次_2_4",
-        "r_catering_times_6": "常驻人口_餐饮消费频次_4_6",
-        "r_catering_times_8": "常驻人口_餐饮消费频次_6_8",
-        "r_catering_times_10": "常驻人口_餐饮消费频次_8_10",
-        "r_catering_times_11": "常驻人口_餐饮消费频次_11以上",
-        "r_native_beijing": "常驻人口_家乡地_北京市",
-        "r_native_tianjing": "常驻人口_家乡地_天津市",
-        "r_native_hebei": "常驻人口_家乡地_河北省",
-        "r_native_shanxi": "常驻人口_家乡地_山西省",
-        "r_native_neimeng": "常驻人口_家乡地_内蒙古",
-        "r_native_liaoning": "常驻人口_家乡地_辽宁省",
-        "r_native_jilin": "常驻人口_家乡地_吉林省",
-        "r_native_heilongjiang": "常驻人口_家乡地_黑龙江省",
-        "r_native_shanghai": "常驻人口_家乡地_上海市",
-        "r_native_jiangsu": "常驻人口_家乡地_江苏省",
-        "r_native_zhejiang": "常驻人口_家乡地_浙江省",
-        "r_native_anhui": "常驻人口_家乡地_安徽省",
-        "r_native_fujian": "常驻人口_家乡地_福建省",
-        "r_native_jiangix": "常驻人口_家乡地_江西省",
-        "r_native_shandong": "常驻人口_家乡地_山东省",
-        "r_native_henan": "常驻人口_家乡地_河南省",
-        "r_native_hubei": "常驻人口_家乡地_湖北省",
-        "r_native_hunan": "常驻人口_家乡地_湖南省",
-        "r_native_guangdong": "常驻人口_家乡地_广东省",
-        "r_native_hainan": "常驻人口_家乡地_海南省",
-        "r_native_sichuan": "常驻人口_家乡地_四川省",
-        "r_native_guizhou": "常驻人口_家乡地_贵州省",
-        "r_native_yunnan": "常驻人口_家乡地_云南省",
-        "r_native_shan": "常驻人口_家乡地_陕西省",
-        "r_native_gansu": "常驻人口_家乡地_甘肃省",
-        "r_native_qinghai": "常驻人口_家乡地_青海省",
-        "r_native_guangxi": "常驻人口_家乡地_广西壮族自治区",
-        "r_native_ningxia": "常驻人口_家乡地_宁夏回族自治区",
-        "r_native_xinjiang": "常驻人口_家乡地_新疆维吾尔自治区",
-        "r_native_xizang": "常驻人口_家乡地_西藏自治区",
-        "r_native_chongqing": "常驻人口_家乡地_重庆市",
-        "r_native_hongkong": "常驻人口_家乡地_香港",
-        "r_native_macao": "常驻人口_家乡地_澳门",
-        "r_native_taiwan": "常驻人口_家乡地_台湾",
-        "r_native_other": "常驻人口_家乡地_其它",
-        "f_flow_num": "流动人口_日均流动人口数量",
-        "f_holiday_flow_num": "流动人口_节假日日均流动人口数量",
-        "f_workday_flow_num": "流动人口_工作日日均流动人口数量",
-        "f_flowurban_cons_middle": "日均流动_城市消费水平_中",
-        "f_flowurban_cons_low": "日均流动_城市消费水平_低",
-        "f_flowurban_cons_lower": "日均流动_城市消费水平_次低",
-        "f_flowurban_cons_second_high": "日均流动_城市消费水平_次高",
-        "f_flowurban_cons_high": "日均流动_城市消费水平_高",
-        "f_flowedu_junior_middle": "日均流动_学历_初中",
-        "f_flowedu_doctor": "日均流动_学历_博士",
-        "f_flowedu_specialty": "日均流动_学历_大专",
-        "f_flowedu_primary": "日均流动_学历_小学",
-        "f_flowedu_college": "日均流动_学历_本科",
-        "f_flowedu_postgraduate": "日均流动_学历_硕士",
-        "f_flowedu_senior_middle": "日均流动_学历_高中",
-        "f_flowhouse_middle": "日均流动_居住社区房价_中",
-        "f_flowhouse_low": "日均流动_居住社区房价_低",
-        "f_flowhouse_lower": "日均流动_居住社区房价_次低",
-        "f_flowhouse_second_high": "日均流动_居住社区房价_次高",
-        "f_flowhouse_high": "日均流动_居住社区房价_高",
-        "f_flowage_17": "日均流动_年龄_0_17",
-        "f_flowage_24": "日均流动_年龄_18_24",
-        "f_flowage_30": "日均流动_年龄_25_30",
-        "f_flowage_35": "日均流动_年龄_31_35",
-        "f_flowage_40": "日均流动_年龄_36_40",
-        "f_flowage_45": "日均流动_年龄_41_45",
-        "f_flowage_60": "日均流动_年龄_46_60",
-        "f_flowage_over_60": "日均流动_年龄_61以上",
-        "f_flowsex_woman": "日均流动_性别_女",
-        "f_flowsex_man": "日均流动_性别_男",
-        "f_holidayurban_cons_middle": "节假日流动_城市消费水平_中",
-        "f_holidayurban_cons_low": "节假日流动_城市消费水平_低",
-        "f_holidayurban_cons_lower": "节假日流动_城市消费水平_次低",
-        "f_holidayurban_cons_secondhigh": "节假日流动_城市消费水平_次高",
-        "f_holidayurban_cons_high": "节假日流动_城市消费水平_高",
-        "f_holidayedu_junior_middle": "节假日流动_学历_初中",
-        "f_holidayedu_doctor": "节假日流动_学历_博士",
-        "f_holidayedu_specialty": "节假日流动_学历_大专",
-        "f_holidayedu_primary": "节假日流动_学历_小学",
-        "f_holidayedu_college": "节假日流动_学历_本科",
-        "f_holidayedu_postgraduate": "节假日流动_学历_硕士",
-        "f_holidayedu_senior_middle": "节假日流动_学历_高中",
-        "f_holidayhouse_middle": "节假日流动_居住社区房价_中",
-        "f_holidayhouse_low": "节假日流动_居住社区房价_低",
-        "f_holidayhouse_lower": "节假日流动_居住社区房价_次低",
-        "f_holidayhouse_second_high": "节假日流动_居住社区房价_次高",
-        "f_holidayhouse_high": "节假日流动_居住社区房价_高",
-        "f_holidayage_17": "节假日流动_年龄_0_17",
-        "f_holidayage_24": "节假日流动_年龄_18_24",
-        "f_holidayage_30": "节假日流动_年龄_25_30",
-        "f_holidayage_35": "节假日流动_年龄_31_35",
-        "f_holidayage_40": "节假日流动_年龄_36_40",
-        "f_holidayage_45": "节假日流动_年龄_41_45",
-        "f_holidayage_60": "节假日流动_年龄_46_60",
-        "f_holidayage_over_60": "节假日流动_年龄_61以上",
-        "f_holidaysex_woman": "节假日流动_性别_女",
-        "f_holidaysex_man": "节假日流动_性别_男",
-        "f_workday_urban_cons_middle": "工作日流动_城市消费水平_中",
-        "f_workday_urban_cons_low": "工作日流动_城市消费水平_低",
-        "f_workday_urban_cons_lower": "工作日流动_城市消费水平_次低",
-        "f_workday_urban_cons_secondhigh": "工作日流动_城市消费水平_次高",
-        "f_workday_urban_cons_high": "工作日流动_城市消费水平_高",
-        "f_workday_edu_junior_middle": "工作日流动_学历_初中",
-        "f_workday_edu_doctor": "工作日流动_学历_博士",
-        "f_workday_edu_specialty": "工作日流动_学历_大专",
-        "f_workday_edu_primary": "工作日流动_学历_小学",
-        "f_workday_edu_college": "工作日流动_学历_本科",
-        "f_workday_edu_postgraduate": "工作日流动_学历_硕士",
-        "f_workday_edu_senior_middle": "工作日流动_学历_高中",
-        "f_workday_house_middle": "工作日流动_居住社区房价_中",
-        "f_workday_house_low": "工作日流动_居住社区房价_低",
-        "f_workday_house_lower": "工作日流动_居住社区房价_次低",
-        "f_workday_house_second_high": "工作日流动_居住社区房价_次高",
-        "f_workday_house_high": "工作日流动_居住社区房价_高",
-        "f_workday_age_17": "工作日流动_年龄_0_17",
-        "f_workday_age_24": "工作日流动_年龄_18_24",
-        "f_workday_age_30": "工作日流动_年龄_25_30",
-        "f_workday_age_35": "工作日流动_年龄_31_35",
-        "f_workday_age_40": "工作日流动_年龄_36_40",
-        "f_workday_age_45": "工作日流动_年龄_41_45",
-        "f_workday_age_60": "工作日流动_年龄_46_60",
-        "f_workday_age_over_60": "工作日流动_年龄_61以上",
-        "f_workday_sex_woman": "工作日流动_性别_女",
-        "f_workday_sex_man": "工作日流动_性别_男"
-    }

+ 1 - 1
models/rank/gbdt_lr_inference.py

@@ -55,7 +55,7 @@ class GbdtLrModel:
         
         gbdt_preds = self.gbdt_model.predict(recommend_sample, pred_leaf=True)
         gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
-        scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1]
+        scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1] * 100
         
         recommend_list = []
         for cust_id, score in zip(recall_list, scores):

+ 16 - 3
models/recommend.py

@@ -69,6 +69,7 @@ class Recommend:
         # 获取推理用的feats_map
         feats_map = generate_feats_map(product_data, cust_data)
         recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
+        recommend_list = self.filter_recommend_list(recommend_list)
         return recommend_list
     
     def get_recommend_list_by_item2vec(self, product_id, recall_count=500):
@@ -76,8 +77,18 @@ class Recommend:
         recommend_list = self._item2vec_model.get_recommend_cust_list(product_id, top=recall_count)
         recommend_list = recommend_list.drop(columns=["sale_qty"])
         recommend_list = recommend_list.to_dict(orient='records')
-        return recommend_list[:recall_count]
+        recommend_list = self.filter_recommend_list(recommend_list[:recall_count])
+        return recommend_list
         # return recommend_list
+        
+    def filter_recommend_list(self, recommend_list):
+        """过滤掉已经歇业的商铺"""
+        cust_set = set(self._dao.get_cust_list(self._city_uuid))
+        filter_recommend_list = [
+            item for item in recommend_list
+            if item["cust_code"] in cust_set
+        ]
+        return filter_recommend_list
     
     def get_recommend_and_delivery(self, recommend_list, delivery_count=5000):
         """根据推荐列表生成投放分配"""
@@ -109,6 +120,8 @@ if __name__ == "__main__":
     city_uuid = "00000000000000000000000011445301"
     product_id = '350139'
     recommend = Recommend(city_uuid)
-    recommend_list = recommend.get_recommend_list_by_item2vec(product_id)
-    recommend_data = recommend.get_recommend_and_delivery(recommend_list)
+    recommend_list = recommend.get_recommend_list_by_gbdtlr(product_id)
+    # for i in recommend_list:
+    #     print(i)
+    # recommend_data = recommend.get_recommend_and_delivery(recommend_list)