|
|
@@ -64,11 +64,11 @@ class DataProcess():
|
|
|
|
|
|
if rules["method"] == "fillna":
|
|
|
if rules["opt"] == "fill":
|
|
|
- self._cust_data[feature] = self._cust_data[feature].fillna(rules["value"])
|
|
|
+ self._cust_data[feature] = self._cust_data[feature].fillna(rules["value"]).infer_objects(copy=False)
|
|
|
elif rules["opt"] == "replace":
|
|
|
- self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[rules["value"]])
|
|
|
+ self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[rules["value"]]).infer_objects(copy=False)
|
|
|
elif rules["opt"] == "mean":
|
|
|
- self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[feature].mean())
|
|
|
+ self._cust_data[feature] = self._cust_data[feature].fillna(self._cust_data[feature].mean()).infer_objects(copy=False)
|
|
|
self._cust_data[feature] = self._cust_data[feature].infer_objects(copy=False)
|
|
|
|
|
|
def _clean_product_data(self):
|
|
|
@@ -80,15 +80,15 @@ class DataProcess():
|
|
|
|
|
|
if rules["method"] == "fillna":
|
|
|
if rules["opt"] == "fill":
|
|
|
- self._product_data[feature] = self._product_data[feature].fillna(rules["value"])
|
|
|
+ self._product_data[feature] = self._product_data[feature].fillna(rules["value"]).infer_objects(copy=False)
|
|
|
elif rules["opt"] == "mean":
|
|
|
- self._product_data[feature] = self._product_data[feature].fillna(self._product_data[feature].mean())
|
|
|
+ self._product_data[feature] = self._product_data[feature].fillna(self._product_data[feature].mean()).infer_objects(copy=False)
|
|
|
self._product_data[feature] = self._product_data[feature].infer_objects(copy=False)
|
|
|
|
|
|
def _clean_order_data(self):
|
|
|
remaining_cols = self._order_data.columns.drop(OrderConfig.POSFEATURES) # 数据清洗时先不对pos数据做处理
|
|
|
col_all_missing = remaining_cols[self._order_data[remaining_cols].isnull().all()].to_list()
|
|
|
- self._order_data = self._order_data.drop(columns=col_all_missing)
|
|
|
+ self._order_data.drop(columns=col_all_missing, inplace=True)
|
|
|
|
|
|
# 去除重复值和填补缺失值
|
|
|
self._order_data.drop_duplicates(inplace=True)
|
|
|
@@ -166,7 +166,7 @@ class DataProcess():
|
|
|
product_medians.columns = ["product_code", "median_score"]
|
|
|
|
|
|
# 合并中位数到原始订单数据
|
|
|
- temp_data = pd.merge(scored_data, product_medians, on="product_code")
|
|
|
+ temp_data = pd.merge(scored_data, product_medians, on="product_code", how="left")
|
|
|
|
|
|
# 生成标签 (1: 大于等于中位数, 0: 小于中位数)
|
|
|
scored_data["label"] = np.where(
|