utils.py 947 B

123456789101112131415161718192021222324
  1. import pandas as pd
  2. def one_hot_embedding(dataframe, onehout_feat):
  3. """对数据的指定特征做embedding编码"""
  4. # 先将指定的特征进行Categorical处理
  5. for feat, categories in onehout_feat.items():
  6. dataframe[feat] = pd.Categorical(dataframe[feat], categories=categories, ordered=False)
  7. dataframe = pd.get_dummies(
  8. dataframe,
  9. columns=list(onehout_feat.keys()),
  10. prefix_sep="_",
  11. dtype=int,
  12. )
  13. return dataframe
  14. def sample_data_clear(data, config):
  15. for feature, rules, in config.CLEANING_RULES.items():
  16. if rules["type"] == "num":
  17. data[feature] = pd.to_numeric(data[feature], errors="coerce")
  18. if rules["method"] == "fill":
  19. if rules["type"] == "str":
  20. data[feature] = data[feature].fillna(rules["value"])
  21. elif rules["type"] == "num":
  22. data[feature] = data[feature].fillna(0.0)
  23. return data