Przeglądaj źródła

增加order表数据清洗

yangzeyu 1 rok temu
rodzic
commit
68957ad97c
2 zmienionych plików z 22 dodań i 1 usunięć
  1. 21 0
      models/rank/data/config.py
  2. 1 1
      models/rank/data/preprocess.py

+ 21 - 0
models/rank/data/config.py

@@ -246,6 +246,27 @@ class OrderConfig:
         "POS_PACKAGE_PRICE",                                # pos机单包价格
     ]
     
+    CLEANING_FEATURES = [
+        "MONTH6_SALE_QTY",
+        "MONTH6_SALE_AMT",
+        "MONTH6_GROSS_PROFIT_RATE",
+        "MONTH6_SALE_QTY_YOY",
+        "MONTH6_SALE_QTY_MOM",
+        "MONTH6_SALE_AMT_YOY",
+        "MONTH6_SALE_AMT_MOM",
+        "STOCK_QTY",
+        "ORDER_FULLORDR_RATE",
+        "FULL_FILLMENT_RATE",
+        "ORDER_FULLORDR_RATE_MOM",
+        "CUSTOMER_REPURCHASE_RATE",
+        "NEW_PRODUCT_MEMBERS_QTY_SAMEPRICE_OCC",
+        "DEMAND_RATE",
+        "LISTING_RATE",
+        "PUT_MARKET_FINISH_RATE",
+        "OUT_STOCK_DAYS",
+        "UNPACKING_RATE",
+    ]
+    
     WEIGHTS = {
         "MONTH6_SALE_QTY":                                  0.15,
         "MONTH6_SALE_QTY_MOM":                              0.2,

+ 1 - 1
models/rank/data/preprocess.py

@@ -92,7 +92,7 @@ class DataProcess():
         
         # 去除重复值和填补缺失值
         self._order_data.drop_duplicates(inplace=True)
-        self._order_data[remaining_cols.drop(remaining_cols)].fillna(0, inplace=True)
+        self._order_data[remaining_cols.drop(col_all_missing)].fillna(0, inplace=True)
         self._order_data = self._order_data.infer_objects(copy=False)
         
     def _clean_shopping_data(self):