from database.dao.mysql_dao import MySqlDao from models.item2vec import Item2Vec from models.rank.data.config import OrderConfig, ProductConfig from models.rank.data.utils import sample_data_clear import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler class Item2VecModel: def __init__(self, city_uuid): self._dao = MySqlDao() self._city_uuid = city_uuid self._item2vec_model = Item2Vec(city_uuid) def generate_product_similarity_map(self, product_code): """根据product_code生成卷烟相似度矩阵""" product = self._dao.get_product_by_id(self._city_uuid, product_code)[ProductConfig.FEATURE_COLUMNS] product = sample_data_clear(product, ProductConfig) similarity_map = self._item2vec_model.get_similarity_map(product) similarity_map = pd.DataFrame(similarity_map) product_list = self._dao.load_product_data(self._city_uuid)[ProductConfig.FEATURE_COLUMNS + ["product_name"]] similarity_map = similarity_map.merge(product_list, on="product_code", how="inner") # self._similarity_map = self._similarity_map.query(f"product_code != {product_code}") return similarity_map def get_similarity_list(self, product_code, top=40): """获取与指卷烟最相似的top k个卷烟""" similarity_map = self.generate_product_similarity_map(product_code) similarity_list = similarity_map["product_code"].to_list() similarity_list = similarity_list[:top] return similarity_list def get_recommend_cust_list(self, product_code, top=100): """获取推荐的商户列表""" product_list = self.get_similarity_list(product_code) order_data = self._dao.get_order_by_product_ids(self._city_uuid, product_list)[OrderConfig.FEATURE_COLUMNS] order_data["sale_qty"] = order_data["sale_qty"].fillna(0) order_data = order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].mean() # 按照卷烟分组,取每款卷烟售卖最好的前50个商户 order_data = ( order_data .sort_values(["product_code", "sale_qty"], ascending=[True, False]) .groupby("product_code") .head(top) ) recommend_cust = ( order_data.groupby(["cust_code"], as_index=False)["sale_qty"].sum() .query("sale_qty > 0") .sort_values(["sale_qty"], ascending=[False]) ) # 对销量进行归一化 scaler = StandardScaler() normalized = scaler.fit_transform(recommend_cust["sale_qty"].values.reshape(-1, 1)) recommend_cust["recommend_score"] = ((1 / (1 + np.exp(-normalized))) * 100).flatten() # recommend_cust = recommend_cust.rename(columns={"sale_qty": "recommend_score"}) # recommend_cust.to_csv("./data/item2vec_recommend.csv", index=False) return recommend_cust if __name__ == "__main__": city_uuid = "00000000000000000000000011445301" product_id = "350139" model = Item2VecModel(city_uuid) model.get_similarity_list(product_id) # dao = MySqlDao() # data = dao.get_order_by_cust_and_product(city_uuid, "445300108802", "340223")[OrderConfig.FEATURE_COLUMNS] # data.to_csv("./data/result.csv", index=False)