import pandas as pd def one_hot_embedding(dataframe, onehout_feat): """对数据的指定特征做embedding编码""" # 先将指定的特征进行Categorical处理 for feat, categories in onehout_feat.items(): dataframe[feat] = pd.Categorical(dataframe[feat], categories=categories, ordered=False) dataframe = pd.get_dummies( dataframe, columns=list(onehout_feat.keys()), prefix_sep="_", dtype=int, ) return dataframe def sample_data_clear(data, config): for feature, rules, in config.CLEANING_RULES.items(): if rules["type"] == "num": data[feature] = pd.to_numeric(data[feature], errors="coerce") if rules["method"] == "fill": if rules["type"] == "str": data[feature] = data[feature].fillna(rules["value"]) elif rules["type"] == "num": data[feature] = data[feature].fillna(0.0) return data