score.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. from database import MySqlDao
  2. from models.rank.data.config import OrderConfig
  3. import numpy as np
  4. from sklearn.preprocessing import StandardScaler
  5. class UserItemScore:
  6. def __init__(self, city_uuid):
  7. self._dao = MySqlDao()
  8. self._load_data(city_uuid)
  9. def _load_data(self, city_uuid):
  10. """加载订单记录表"""
  11. print("item-cf: 正在加载order_info...")
  12. self._order_data = self._dao.load_order_data(city_uuid)
  13. self._order_data =self._order_data[OrderConfig.FEATURE_COLUMNS]
  14. # 数据清洗
  15. self._order_data["sale_qty"] = self._order_data["sale_qty"].fillna(0)
  16. self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()
  17. self._order_data = self._order_data[self._order_data["sale_qty"] != 0]
  18. # 归一化处理
  19. scaler = StandardScaler()
  20. normalized = scaler.fit_transform(self._order_data["sale_qty"].values.reshape(-1, 1))
  21. self._order_data["sale_qty"] = ((1 / (1 + np.exp(-normalized))) * 100).flatten()
  22. def generate_product_scores(self, save_path):
  23. self._order_data = self._order_data.rename(columns={'sale_qty': 'score'})
  24. self._order_data = self._order_data.sort_values(['product_code', 'score'], ascending=[True, False])
  25. self._score_data = self._order_data[['product_code', 'cust_code', 'score']]
  26. self._score_data.to_csv(save_path, index=False, encoding="utf-8")
  27. if __name__ == "__main__":
  28. save_path = "./data/itemcf/scores.csv"
  29. score_utils = UserItemScore("00000000000000000000000011445301")
  30. score_utils.generate_product_scores(save_path)