Quellcode durchsuchen

修复数据不匹配的bug

yangzeyu vor 1 Jahr
Ursprung
Commit
fa4647fdf6
1 geänderte Dateien mit 7 neuen und 7 gelöschten Zeilen
  1. 7 7
      models/rank/data/preprocess.py

+ 7 - 7
models/rank/data/preprocess.py

@@ -64,11 +64,11 @@ class DataProcess():
                 
             if rules["method"] == "fillna":
                 if rules["opt"] == "fill":
-                    self._cust_data[feature] = self._cust_data[feature].fillna(rules["value"])
+                    self._cust_data[feature] = self._cust_data[feature].fillna(rules["value"]).infer_objects(copy=False)
                 elif rules["opt"] == "replace":
-                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[rules["value"]])
+                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[rules["value"]]).infer_objects(copy=False)
                 elif rules["opt"] == "mean":
-                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[feature].mean())
+                    self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[feature].mean()).infer_objects(copy=False)
                 self._cust_data[feature] = self._cust_data[feature].infer_objects(copy=False)
     
     def _clean_product_data(self):
@@ -80,15 +80,15 @@ class DataProcess():
             
             if rules["method"] == "fillna":
                 if rules["opt"] == "fill":
-                    self._product_data[feature] = self._product_data[feature].fillna(rules["value"])
+                    self._product_data[feature] = self._product_data[feature].fillna(rules["value"]).infer_objects(copy=False)
                 elif rules["opt"] == "mean":
-                    self._product_data[feature] = self._product_data[feature].fillna(self._product_data[feature].mean())
+                    self._product_data[feature] = self._product_data[feature].fillna(self._product_data[feature].mean()).infer_objects(copy=False)
                 self._product_data[feature] = self._product_data[feature].infer_objects(copy=False)
                     
     def _clean_order_data(self):
         remaining_cols = self._order_data.columns.drop(OrderConfig.POSFEATURES) # 数据清洗时先不对pos数据做处理
         col_all_missing = remaining_cols[self._order_data[remaining_cols].isnull().all()].to_list()
-        self._order_data = self._order_data.drop(columns=col_all_missing)
+        self._order_data.drop(columns=col_all_missing, inplace=True)
         
         # 去除重复值和填补缺失值
         self._order_data.drop_duplicates(inplace=True)
@@ -166,7 +166,7 @@ class DataProcess():
         product_medians.columns = ["product_code", "median_score"]
         
         # 合并中位数到原始订单数据
-        temp_data = pd.merge(scored_data, product_medians, on="product_code")
+        temp_data = pd.merge(scored_data, product_medians, on="product_code", how="left")
         
         # 生成标签 (1: 大于等于中位数, 0: 小于中位数)
         scored_data["label"] = np.where(