Ver código fonte

规范输出结果

Sherlock 11 meses atrás
pai
commit
946e2f6651
4 arquivos alterados com 73 adições e 21 exclusões
  1. 28 2
      inference.py
  2. 4 5
      models/item2vec/inference.py
  3. 2 2
      models/rank/data/preprocess.py
  4. 39 12
      utils/result_process.py

+ 28 - 2
inference.py

@@ -92,7 +92,31 @@ def generate_features_shap(city_uuid, product_id, delivery_count):
 def eval(city_uuid, product_code):
     """推荐效果验证"""
     eval_report = get_cust_list_from_history_order(city_uuid, product_code)
-    eval_report.to_csv("./data/eval.csv", index=False)
+    eval_report.to_csv("./data/效果验证表.csv", index=False)
+    
+def generate_similarity_product(product_code):
+    product_similarity_map = item2vec.generate_product_similarity_map(product_code)
+    product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
+    product_similarity_map = product_similarity_map.rename(
+        columns={
+            "product_name": "卷烟名称",
+            "similarity": "相似度",
+            "factory_name": "生产厂商",
+            "brand_name": "品牌名称",
+            "is_low_tar":                "低焦油卷烟",
+            "is_medium":                 "中支烟",
+            "is_tiny":                   "细支烟",
+            "is_coarse":                 "粗支烟",
+            "is_exploding_beads":        "爆珠烟",
+            "is_abnormity":              "异形包装",
+            "is_cig":                    "雪茄烟",
+            "is_chuangxin":              "创新品类",
+            "direct_retail_price":       "卷烟建议零售价",
+            "tbc_total_length":          "烟支总长度",
+            "product_style":             "包装类型",
+        }
+    )
+    product_similarity_map.to_excel("./data/相似卷烟表.xlsx", index=False)
 
 def generate_delivery_strategy():
     
@@ -102,8 +126,10 @@ def run():
     pass
 
 if __name__ == '__main__':
-    # generate_features_shap("00000000000000000000000011445301", "350139", delivery_count=5000)
+    generate_features_shap("00000000000000000000000011445301", "350139", delivery_count=5000)
+    generate_similarity_product("350139")
     eval("00000000000000000000000011445301", "350355")
+    
     # recommend_list = get_recommend_list_by_gbdt_lr("00000000000000000000000011445301", "350139")
     # recommend_list = pd.DataFrame(recommend_list)
     # recommend_list.to_csv("./data/recommend_list.csv", index=False, encoding="utf-8-sig")

+ 4 - 5
models/item2vec/inference.py

@@ -19,7 +19,7 @@ class Item2VecModel:
         
         similarity_map = self._item2vec_model.get_similarity_map(product)
         similarity_map = pd.DataFrame(similarity_map)
-        product_list = self._dao.load_product_data(self._city_uuid)[ProductConfig.FEATURE_COLUMNS]
+        product_list = self._dao.load_product_data(self._city_uuid)[ProductConfig.FEATURE_COLUMNS + ["product_name"]]
         similarity_map = similarity_map.merge(product_list, on="product_code", how="inner")
         # self._similarity_map = self._similarity_map.query(f"product_code != {product_code}")
         return similarity_map
@@ -27,7 +27,6 @@ class Item2VecModel:
     def get_similarity_list(self, product_code, top=40):
         """获取与指卷烟最相似的top k个卷烟"""
         similarity_map = self.generate_product_similarity_map(product_code)
-        similarity_map.to_excel("./data/product_similarity.xlsx", index=False)
         similarity_list = similarity_map["product_code"].to_list()
         similarity_list = similarity_list[:top]
         return similarity_list
@@ -37,7 +36,7 @@ class Item2VecModel:
         product_list = self.get_similarity_list(product_code)
         order_data = self._dao.get_order_by_product_ids(self._city_uuid, product_list)[OrderConfig.FEATURE_COLUMNS]
         order_data["sale_qty"] = order_data["sale_qty"].fillna(0)
-        order_data = order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()
+        order_data = order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].mean()
         
         
         # 按照卷烟分组,取每款卷烟售卖最好的前50个商户
@@ -57,8 +56,8 @@ class Item2VecModel:
         # 对销量进行归一化
         scaler = StandardScaler()
         normalized = scaler.fit_transform(recommend_cust["sale_qty"].values.reshape(-1, 1))
-        recommend_cust["sale_qty"] = ((1 / (1 + np.exp(-normalized))) * 100).flatten()
-        recommend_cust = recommend_cust.rename(columns={"sale_qty": "recommend_score"})
+        recommend_cust["recommend_score"] = ((1 / (1 + np.exp(-normalized))) * 100).flatten()
+        # recommend_cust = recommend_cust.rename(columns={"sale_qty": "recommend_score"})
         # recommend_cust.to_csv("./data/item2vec_recommend.csv", index=False)
         
         return recommend_cust

+ 2 - 2
models/rank/data/preprocess.py

@@ -83,8 +83,8 @@ class DataProcess():
         self._order_data["sale_qty"] = self._order_data["sale_qty"].fillna(0)
         self._order_data = self._order_data.infer_objects(copy=False)
         
-        # 将销售量进行分组求
-        self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].sum()
+        # 将销售量进行分组求月平均销售额 
+        self._order_data = self._order_data.groupby(["cust_code", "product_code"], as_index=False)["sale_qty"].mean()
         
         
     def _clean_shopping_data(self):

+ 39 - 12
utils/result_process.py

@@ -37,29 +37,36 @@ def generate_report(city_uuid, data, filter_dict, recommend_data, delivery_count
     valid_cust_feats = group_sums[group_sums > 0].index.tolist()
     cust_relation = data[data["cust_feat"].isin(valid_cust_feats)]
     cust_relation = cust_relation.reset_index(drop=True)
-    cust_relation.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
+    cust_relation = cust_relation.rename(
+        columns = {
+            "product_feat": "卷烟特征",
+            "cust_feat": "商户特征",
+            "relation": "相关性"
+        }
+    )
+    cust_relation.to_csv(os.path.join(save_dir, "品规商户特征关系表.csv"), index=False, encoding='utf-8-sig')
     
     
     # 2. 品规信息
     cust_relation[:20].to_csv(os.path.join(save_dir, "cust_relation.csv"), index=False, encoding='utf-8-sig')
-    with open(os.path.join(save_dir, "product_info.csv"), "w", encoding='utf-8-sig') as f:
+    with open(os.path.join(save_dir, "卷烟信息表.csv"), "w", encoding='utf-8-sig') as f:
         for key, value in filter_dict.items():
             if key != 'product_code':
                 f.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
                 
     # 3. 生成推荐报告
     recommend_report = generate_recommend_report(city_uuid, recommend_data, delivery_count)
-    recommend_report.to_csv(os.path.join(save_dir, "recommend_report.csv"), index=False, encoding="utf-8-sig")
+    recommend_report.to_csv(os.path.join(save_dir, "商户售卖推荐表.csv"), index=False, encoding="utf-8-sig")
     
 def generate_recommend_report(city_uuid, recommend_data, delivery_count):
     recommend_data = pd.DataFrame(recommend_data)
     
-    recpmmend_list = recommend_data["cust_code"].to_list()
-    recommend_cust_info = dao.get_cust_by_ids(city_uuid, recpmmend_list)
+    recommend_list = recommend_data["cust_code"].to_list()
+    recommend_cust_info = dao.get_cust_by_ids(city_uuid, recommend_list)
     
     cust_ids = recommend_cust_info.set_index("BB_RETAIL_CUSTOMER_CODE")
     recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
-    recommend_data = recommend_data[["cust_code", "BB_RETAIL_CUSTOMER_NAME", "recommend_score"]]
+    recommend_data = recommend_data[["cust_code", "BB_RETAIL_CUSTOMER_NAME", "sale_qty", "recommend_score"]]
    # 1. 计算每个商户的理论应得数量(带小数)
     recommend_data["delivery_float"] = (
         recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
@@ -80,7 +87,17 @@ def generate_recommend_report(city_uuid, recommend_data, delivery_count):
     recommend_data = recommend_data.reset_index()
     # 5. 按recommend_score从大到小重新排序
     recommend_data = recommend_data.sort_values("index")
-    recommend_data = recommend_data.rename(columns={"index": "推荐序号", "BB_RETAIL_CUSTOMER_NAME": "商户名称", "recommend_score": "匹配评分", "delivery_count": "建议投放量(条)"})
+    recommend_data["sale_qty"] = recommend_data["sale_qty"].round(0).astype(int) # 将月均销量四舍五入取整
+    recommend_data = recommend_data.rename(
+        columns={
+            "index": "推荐序号", 
+            "cust_code": "商户编号", 
+            "BB_RETAIL_CUSTOMER_NAME": "商户名称", 
+            "sale_qty": "历史月均销量", 
+            "recommend_score": "推荐系数", 
+            "delivery_count": "建议投放量(条)"
+            }
+        )
     recommend_data["推荐序号"] = recommend_data["推荐序号"] + 1
     
     return recommend_data
@@ -93,19 +110,29 @@ def get_cust_list_from_history_order(city_uuid, product_code):
     # 确保cust_code是字符串类型
     order_data["cust_code"] = order_data["cust_code"].astype(str)
     
-    order_data = order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].sum().reset_index()
+    order_data = order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].mean().reset_index()
+    order_data["sale_qty"] = order_data["sale_qty"].round(0).astype(int)
     order_data = order_data.sort_values("sale_qty", ascending=False)
     
     # 读取推荐数据
-    recommend_data = pd.read_csv('./data/recommend_report.csv')
+    recommend_data = pd.read_csv('./data/商户售卖推荐表.csv')
     # recommend_data = recommend_data.drop(columns=["sale_qty"])
     # 确保recommend_data中的cust_code也是字符串类型
-    recommend_data["cust_code"] = recommend_data["cust_code"].astype(str)
-    cust_ids = recommend_data.set_index("cust_code")
+    recommend_data["商户编号"] = recommend_data["商户编号"].astype(str)
+    cust_ids = recommend_data.set_index("商户编号")
     
     # 执行合并操作
     merge_data = order_data.join(cust_ids, on="cust_code", how="left")
-    merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号"]]
+    merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号", "推荐系数", "历史月均销量"]]
+    merge_data = merge_data.rename(
+        columns={
+            "cust_code": "商户编号", 
+            "cust_name": "商户名称", 
+            "product_code": "卷烟编码", 
+            "product_name": "卷烟名称", 
+            "sale_qty": "月均销量"
+        }
+    )
     return merge_data
         
 if __name__ == "__main__":