utils.py 1.1 KB

12345678910111213141516171819202122232425
  1. import pandas as pd
  2. def one_hot_embedding(dataframe, onehout_feat):
  3. """对数据的指定特征做embedding编码"""
  4. # 先将指定的特征进行Categorical处理
  5. for feat, categories in onehout_feat.items():
  6. dataframe[feat] = pd.Categorical(dataframe[feat], categories=categories, ordered=False)
  7. dataframe = pd.get_dummies(
  8. dataframe,
  9. columns=list(onehout_feat.keys()),
  10. prefix_sep="_",
  11. dtype=int,
  12. )
  13. return dataframe
  14. def sample_data_clear(data, config):
  15. for feature, rules, in config.CLEANING_RULES.items():
  16. if rules["type"] == "num":
  17. data[feature] = pd.to_numeric(data[feature], errors="coerce")
  18. if rules["method"] == "fillna":
  19. if rules["opt"] == "fill":
  20. data[feature] = data[feature].fillna(rules["value"]).infer_objects(copy=False)
  21. elif rules["opt"] == "mean":
  22. data[feature] = data[feature].fillna(data[feature].mean()).infer_objects(copy=False)
  23. data[feature] = data[feature].infer_objects(copy=False)
  24. return data