| 123456789101112131415161718192021222324 |
- import pandas as pd
- def one_hot_embedding(dataframe, onehout_feat):
- """对数据的指定特征做embedding编码"""
- # 先将指定的特征进行Categorical处理
- for feat, categories in onehout_feat.items():
- dataframe[feat] = pd.Categorical(dataframe[feat], categories=categories, ordered=False)
- dataframe = pd.get_dummies(
- dataframe,
- columns=list(onehout_feat.keys()),
- prefix_sep="_",
- dtype=int,
- )
- return dataframe
- def sample_data_clear(data, config):
- for feature, rules, in config.CLEANING_RULES.items():
- if rules["type"] == "num":
- data[feature] = pd.to_numeric(data[feature], errors="coerce")
- if rules["method"] == "fill":
- if rules["type"] == "str":
- data[feature] = data[feature].fillna(rules["value"])
- elif rules["type"] == "num":
- data[feature] = data[feature].fillna(0.0)
- return data
|