item2vec.py 912 B

12345678910111213141516171819202122232425262728293031323334
  1. import joblib
  2. from models.item2vec import Item2VecDataProcess
  3. from gensim.models import Word2Vec
  4. class Item2Vec:
  5. def __init__(self, city_uuid):
  6. self._load_data(city_uuid)
  7. def _load_data(self, city_uuid):
  8. """加载特征sentence"""
  9. data_processor = Item2VecDataProcess(city_uuid)
  10. self._sentences = data_processor.generate_sentence()
  11. def train(self):
  12. self._model = Word2Vec(
  13. self._sentences,
  14. vector_size=64,
  15. window=4,
  16. min_count=1,
  17. sg=1, # skip-gram
  18. workers=4,
  19. epochs=20
  20. )
  21. def save_model(self, model_path):
  22. joblib.dump(self._model, model_path)
  23. if __name__ == "__main__":
  24. city_uuid = "00000000000000000000000011445301"
  25. model = Item2Vec(city_uuid)
  26. print("开始训练Item2Vec...")
  27. model.train()