Ver código fonte

封装推理流程,编写api后台调用接口

Sherlock 11 meses atrás
pai
commit
78e8cea278
9 arquivos alterados com 367 adições e 69 exclusões
  1. 66 0
      api.py
  2. 14 0
      api_test.py
  3. 2 2
      gnerate_report.py
  4. 3 2
      models/rank/__init__.py
  5. 23 48
      models/rank/gbdt_lr_inference.py
  6. 4 4
      models/recommend.py
  7. 151 12
      report.py
  8. 103 0
      utils/reports_utils.py
  9. 1 1
      utils/result_process.py

+ 66 - 0
api.py

@@ -0,0 +1,66 @@
+from fastapi import FastAPI, HTTPException, Request, status
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from database.dao.mysql_dao import MySqlDao
+from models import Recommend
+import os
+from pydantic import BaseModel
+import uvicorn
+
+app = FastAPI()
+dao = MySqlDao()
+
+# 添加全局异常处理器
+@app.exception_handler(RequestValidationError)
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+    return JSONResponse(
+        status_code=status.HTTP_400_BAD_REQUEST,
+        content={
+            "code": 400,
+            "msg": "请求参数错误",
+            "data": {
+                "detail": exc.errors(),
+                "body": exc.body
+            }
+        },
+    )
+
+# 定义请求体
+class RecommendRequest(BaseModel):
+    city_uuid: str              # 城市id
+    product_code: str           # 卷烟编码
+    recall_cust_count: int      # 推荐的商户数量
+    delivery_count: int              # 投放的品规数量
+    
+@app.post("/recommend")
+def recommend(request: RecommendRequest):
+    gbdtlr_model_path = os.path.join("./models/rank/weights", request.city_uuid, "gbdtlr_model.pkl")
+    if not os.path.exists(gbdtlr_model_path):
+        return {"code": 200, "msg": "model not defined", "data": {"recommendationInfo": "该城市的模型未训练,请先进行训练"}}
+    
+    # 初始化模型
+    recommend_model = Recommend(request.city_uuid)
+    
+    # 判断该品规是否是新品规
+    products_in_oreder = dao.get_product_from_order(request.city_uuid)["product_code"].unique().tolist()
+    if request.product_code in products_in_oreder:
+        recommend_list = recommend_model.get_recommend_list_by_gbdtlr(request.product_code, recall_count=request.recall_cust_count)
+    else:
+        recommend_list = recommend_model.get_recommend_list_by_item2vec(request.product_code, recall_count=request.recall_cust_count)
+    recommend_data = recommend_model.get_recommend_and_delivery(recommend_list, delivery_count=request.delivery_count)
+    request_data = []
+    for index, data in enumerate(recommend_data):
+        id = index + 1
+        request_data.append(
+            {
+                "id": id,
+                "cust_code": data["cust_code"],
+                "recommend_score": data["recommend_score"],
+                "delivery_count": data["delivery_count"]
+            }
+        )
+        
+    return {"code": 200, "msg": "success", "data": {"recommendationInfo": request_data}}
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

+ 14 - 0
api_test.py

@@ -0,0 +1,14 @@
+import requests
+import json
+
+url = "http://127.0.0.1:8000/recommend"
+payload = {
+    "city_uuid": "00000000000000000000000011445301",
+    "product_code": "350139",
+    "recall_cust_count": 1000,
+    "delivery_count": 5000
+}
+headers = {'Content-Type': 'application/json'}
+
+response = requests.post(url, data=json.dumps(payload), headers=headers)
+print(response.json())

+ 2 - 2
gnerate_report.py

@@ -3,7 +3,7 @@ from database import RedisDatabaseHelper, MySqlDao
 from models.item2vec import Item2VecModel
 from models.item2vec import Item2VecModel
 from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig
 from models.rank.data.config import CustConfig, ProductConfig, ShopConfig, OrderConfig
 from models.rank.data.utils import sample_data_clear
 from models.rank.data.utils import sample_data_clear
-from models.rank.gbdt_lr_inference import GbdtLrModel
+from models.rank.gbdt_lr_inference import GbdtLrModel, generate_feats_map
 from utils.result_process import get_cust_list_from_history_order, split_relation_subtable, generate_report
 from utils.result_process import get_cust_list_from_history_order, split_relation_subtable, generate_report
 import pandas as pd
 import pandas as pd
 
 
@@ -64,7 +64,7 @@ def generate_recommend_sample(city_uuid, product_id):
     cust_feats = shop_data.set_index("cust_code")
     cust_feats = shop_data.set_index("cust_code")
     cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
     cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
     
     
-    feats_map = gbdtlr_model.generate_feats_map(product_data, cust_data)
+    feats_map = generate_feats_map(product_data, cust_data)
     
     
     return feats_map, filter_dict, cust_list
     return feats_map, filter_dict, cust_list
 
 

+ 3 - 2
models/rank/__init__.py

@@ -2,10 +2,11 @@
 # -*- coding:utf-8 -*-
 # -*- coding:utf-8 -*-
 from models.rank.data.preprocess import DataProcess
 from models.rank.data.preprocess import DataProcess
 from models.rank.gbdt_lr import Trainer
 from models.rank.gbdt_lr import Trainer
-from models.rank.gbdt_lr_inference import GbdtLrModel
+from models.rank.gbdt_lr_inference import GbdtLrModel, generate_feats_map
 
 
 __all__ = [
 __all__ = [
     "DataProcess",
     "DataProcess",
     "Trainer",
     "Trainer",
-    "GbdtLrModel"
+    "GbdtLrModel",
+    "generate_feats_map"
 ]
 ]

+ 23 - 48
models/rank/gbdt_lr_inference.py

@@ -12,7 +12,29 @@ import shap
 from tqdm import tqdm
 from tqdm import tqdm
 from utils import split_relation_subtable
 from utils import split_relation_subtable
 import os
 import os
-import tempfile
+
+def generate_feats_map(product_data, cust_data):
+    """组合卷烟、商户特征矩阵"""
+    # 笛卡尔积联合
+    cust_data["descartes"] = 1
+    product_data["descartes"] = 1
+    feats_map = pd.merge(cust_data, product_data, on="descartes").drop("descartes", axis=1)
+    # recall_cust_list = feats_map["BB_RETAIL_CUSTOMER_CODE"].to_list()
+    feats_map.drop('BB_RETAIL_CUSTOMER_CODE', axis=1, inplace=True)
+    feats_map.drop('product_code', axis=1, inplace=True)
+        
+    # onehot编码
+    onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT, **ShopConfig.ONEHOT_CAT}
+    onehot_columns = list(onehot_feats.keys())
+    numeric_columns = feats_map.drop(onehot_columns, axis=1).columns
+    feats_map = one_hot_embedding(feats_map, onehot_feats)
+        
+    # 数字特征归一化
+    if len(numeric_columns) != 0:
+        scaler = StandardScaler()
+        feats_map[numeric_columns] = scaler.fit_transform(feats_map[numeric_columns])
+            
+    return feats_map
 
 
 class GbdtLrModel:
 class GbdtLrModel:
     def __init__(self, model_path):
     def __init__(self, model_path):
@@ -30,33 +52,7 @@ class GbdtLrModel:
         self.product_data = self._mysql_dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
         self.product_data = self._mysql_dao.get_product_by_id(city_uuid, product_id)[ProductConfig.FEATURE_COLUMNS]
         self.custs_data = self._mysql_dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS]
         self.custs_data = self._mysql_dao.load_cust_data(city_uuid)[CustConfig.FEATURE_COLUMNS]
     
     
-    def generate_feats_map(self, product_data, cust_data):
-        """组合卷烟、商户特征矩阵"""
-        # 笛卡尔积联合
-        cust_data["descartes"] = 1
-        product_data["descartes"] = 1
-        feats_map = pd.merge(cust_data, product_data, on="descartes").drop("descartes", axis=1)
-        # recall_cust_list = feats_map["BB_RETAIL_CUSTOMER_CODE"].to_list()
-        feats_map.drop('BB_RETAIL_CUSTOMER_CODE', axis=1, inplace=True)
-        feats_map.drop('product_code', axis=1, inplace=True)
-        
-        # onehot编码
-        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT, **ShopConfig.ONEHOT_CAT}
-        onehot_columns = list(onehot_feats.keys())
-        numeric_columns = feats_map.drop(onehot_columns, axis=1).columns
-        feats_map = one_hot_embedding(feats_map, onehot_feats)
-        
-        # 数字特征归一化
-        if len(numeric_columns) != 0:
-            scaler = StandardScaler()
-            feats_map[numeric_columns] = scaler.fit_transform(feats_map[numeric_columns])
-            
-        return feats_map
-    
     def get_recommend_list(self, recommend_sample, recall_list):
     def get_recommend_list(self, recommend_sample, recall_list):
-        # gbdt_preds = self.gbdt_model.apply(recommend_sample)[:, :, 0]
-        # gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
-        # scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1]
         
         
         gbdt_preds = self.gbdt_model.predict(recommend_sample, pred_leaf=True)
         gbdt_preds = self.gbdt_model.predict(recommend_sample, pred_leaf=True)
         gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
         gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
@@ -74,27 +70,6 @@ class GbdtLrModel:
         )
         )
         
         
         return recommend_list
         return recommend_list
-    
-    def inference_from_sample(self, sample):
-        inference_sample = sample.drop(columns=["BB_RETAIL_CUSTOMER_CODE", "product_code", "sale_qty", "product_name", "cust_code"])
-        
-        onehot_feats = {**CustConfig.ONEHOT_CAT, **ProductConfig.ONEHOT_CAT, **ShopConfig.ONEHOT_CAT}
-        onehot_columns = list(onehot_feats.keys())
-        numeric_columns = inference_sample.drop(onehot_columns, axis=1).columns
-        inference_sample = one_hot_embedding(inference_sample, onehot_feats)
-        print(numeric_columns)
-        # 数字特征归一化
-        if len(numeric_columns) != 0:
-            scaler = StandardScaler()
-            inference_sample[numeric_columns] = scaler.fit_transform(inference_sample[numeric_columns])
-        
-        gbdt_preds = self.gbdt_model.predict(inference_sample, pred_leaf=True)
-        gbdt_feats_encoded = self.onehot_encoder.transform(gbdt_preds)
-        scores = self.lr_model.predict_proba(gbdt_feats_encoded)[:, 1]
-        
-        sample["score"] = scores
-        
-        return sample[["cust_code", "product_code", "product_name", "sale_qty", "score"] + ProductConfig.FEATURE_COLUMNS]
         
         
     
     
     def generate_feats_importance(self):
     def generate_feats_importance(self):

+ 4 - 4
models/recommend.py

@@ -4,7 +4,7 @@ import os
 from models.item2vec.inference import Item2VecModel
 from models.item2vec.inference import Item2VecModel
 from models.rank.data.config import CustConfig, ProductConfig, ShopConfig
 from models.rank.data.config import CustConfig, ProductConfig, ShopConfig
 from models.rank.data.utils import sample_data_clear
 from models.rank.data.utils import sample_data_clear
-from models.rank.gbdt_lr_inference import GbdtLrModel
+from models.rank import GbdtLrModel, generate_feats_map
 import pandas as pd
 import pandas as pd
 
 
 
 
@@ -67,7 +67,7 @@ class Recommend:
         cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
         cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
         
         
         # 获取推理用的feats_map
         # 获取推理用的feats_map
-        feats_map = self._gbdtlr_model.generate_feats_map(product_data, cust_data)
+        feats_map = generate_feats_map(product_data, cust_data)
         recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
         recommend_list = self._gbdtlr_model.get_recommend_list(feats_map, recall_cust_list)
         return recommend_list
         return recommend_list
     
     
@@ -77,6 +77,7 @@ class Recommend:
         recommend_list = recommend_list.drop(columns=["sale_qty"])
         recommend_list = recommend_list.drop(columns=["sale_qty"])
         recommend_list = recommend_list.to_dict(orient='records')
         recommend_list = recommend_list.to_dict(orient='records')
         return recommend_list[:recall_count]
         return recommend_list[:recall_count]
+        # return recommend_list
     
     
     def get_recommend_and_delivery(self, recommend_list, delivery_count=5000):
     def get_recommend_and_delivery(self, recommend_list, delivery_count=5000):
         """根据推荐列表生成投放分配"""
         """根据推荐列表生成投放分配"""
@@ -110,5 +111,4 @@ if __name__ == "__main__":
     recommend = Recommend(city_uuid)
     recommend = Recommend(city_uuid)
     recommend_list = recommend.get_recommend_list_by_item2vec(product_id)
     recommend_list = recommend.get_recommend_list_by_item2vec(product_id)
     recommend_data = recommend.get_recommend_and_delivery(recommend_list)
     recommend_data = recommend.get_recommend_and_delivery(recommend_list)
-    for i in recommend_data:
-        print(i)
+    

+ 151 - 12
report.py

@@ -1,30 +1,169 @@
 from database.dao.mysql_dao import MySqlDao
 from database.dao.mysql_dao import MySqlDao
 from models import Recommend
 from models import Recommend
+from models.rank.data.config import CustConfig, ImportanceFeaturesMap, ProductConfig, ShopConfig
+from models.rank.data.utils import sample_data_clear
+from models.rank import generate_feats_map
+import os
+import argparse
+import pandas as pd
+from utils.reports_utils import feats_relation_process, calculate_delivery_by_recommend_data, eval_report_process
 
 
 class ReportUtils:
 class ReportUtils:
-    def __init__(self, city_uuid):
+    def __init__(self, city_uuid, product_id):
         self._recommend_model = Recommend(city_uuid)
         self._recommend_model = Recommend(city_uuid)
         self._city_uuid = city_uuid
         self._city_uuid = city_uuid
+        self._product_id = product_id
         self._dao = MySqlDao()
         self._dao = MySqlDao()
+        self._product_data =  self._dao.get_product_by_id(self._city_uuid, self._product_id)[ProductConfig.FEATURE_COLUMNS]
         
         
-    def _get_recommend_cust_list(self, product_id):
+    def _get_recommend_data(self, args):
         """获取推荐商户列表"""
         """获取推荐商户列表"""
         # 判断product_id是否是新品规
         # 判断product_id是否是新品规
         products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
         products_in_order = self._dao.get_product_from_order(self._city_uuid)["product_code"].unique().tolist()
-        recall_count = 1000 # 参数调整
-        if product_id in products_in_order:
-            recommend_list = self._recommend_model.get_recommend_list_by_gbdtlr(product_id, recall_count=recall_count)
+        # recall_count = 100 # 参数调整
+        if self._product_id in products_in_order:
+            recommend_data = self._recommend_model.get_recommend_list_by_gbdtlr(self._product_id, recall_count=args.recall_count)
         else:
         else:
-            recommend_list = self._recommend_model.get_recommend_list_by_item2vec(product_id, recall_count=recall_count)
-        recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
+            recommend_data = self._recommend_model.get_recommend_list_by_item2vec(self._product_id, recall_count=args.recall_count)
         
         
-        return recommend_list
+        # # 根据推荐列表获取商户售卖卷烟的月均销量总和
+        # recommend_list = list(map(lambda x: x["cust_code"], recommend_list))
+        # order_data = self._dao.get_order_by_cust(self._city_uuid, )
+        return recommend_data
+    
+    def _generate_feats_map(self, args):
+        """根据召回的推荐列表生成品规-商户features_map"""
+        recommend_data = self._get_recommend_data(args)
+        recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
+        # 获取卷烟的信息
+        product_data = self._product_data.copy()
+        
+        # 根据cust_lit获取商户信息和商圈信息
+        cust_data = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)[CustConfig.FEATURE_COLUMNS]
+        shop_data = self._dao.get_shop_by_ids(self._city_uuid, recommend_list)[ShopConfig.FEATURE_COLUMNS]
+        
+        product_data = sample_data_clear(product_data, ProductConfig)
+        cust_data = sample_data_clear(cust_data, CustConfig)
+        shop_data = sample_data_clear(shop_data, ShopConfig)
+        
+        cust_feats = shop_data.set_index("cust_code")
+        cust_data = cust_data.join(cust_feats, on="BB_RETAIL_CUSTOMER_CODE", how="inner")
+        
+        feats_map = generate_feats_map(product_data, cust_data)
+        
+        return feats_map
+    
+    def _get_product_content(self):
+        """获取品规的内容,并以字典的形式返回"""
+        product_data = self._product_data.copy()
+        filter_dict = product_data.to_dict('records')[0]
+        return filter_dict
+    
+    def generate_feats_ralation_report(self, args):
+        """生成特征相关性分析报告"""
+        feats_map = self._generate_feats_map(args)
+        product_content = self._get_product_content()
+        # 计算SHAP值
+        shap_result = self._recommend_model._gbdtlr_model.generate_shap_interance(feats_map)
+        report = feats_relation_process(shap_result, product_content)
+        
+        report.to_excel(os.path.join(args.report_dir, "品规商户特征关系表.xlsx"), index=False)
+        
+    def generate_product_report(self, args):
+        """生成推荐品规信息表"""
+        product_data = self._get_product_content()
+        with open(os.path.join(args.report_dir, "卷烟信息表.xlsx"), "w", encoding='utf-8-sig') as file:
+            for key, value in product_data.items():
+                if key != 'product_code':
+                    file.write(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}, {value}\n")
+                    
+    def generate_recommend_report(self, args):
+        """生成推荐报告,包括投放量"""
+        recommend_data = self._get_recommend_data(args)
+        recommend_list = list(map(lambda x: x["cust_code"], recommend_data))
+        recommend_cust_infos = self._dao.get_cust_by_ids(self._city_uuid, recommend_list)
+        report = calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, args.delivery_count)
+        
+        report.to_excel(os.path.join(args.report_dir, "商户售卖推荐表.xlsx"), index=False)
+        
+    def generate_similarity_product_report(self, args):
+        """生成相似卷烟表"""
+        product_similarity_map = self._recommend_model._item2vec_model.generate_product_similarity_map(self._product_id)
+        product_similarity_map = product_similarity_map[["product_name", "similarity", "brand_name", "factory_name", "is_low_tar", "is_medium", "is_tiny", "is_coarse", "is_exploding_beads", "is_abnormity", "is_cig", "is_chuangxin", "direct_retail_price", "tbc_total_length", "product_style"]]
+        product_similarity_map = product_similarity_map.rename(
+            columns={
+                "product_name":                 "卷烟名称",
+                "similarity":                   "相似度",
+                "factory_name":                 "生产厂商",
+                "brand_name":                   "品牌名称",
+                "is_low_tar":                   "低焦油卷烟",
+                "is_medium":                    "中支烟",
+                "is_tiny":                      "细支烟",
+                "is_coarse":                    "粗支烟",
+                "is_exploding_beads":           "爆珠烟",
+                "is_abnormity":                 "异形包装",
+                "is_cig":                       "雪茄烟",
+                "is_chuangxin":                 "创新品类",
+                "direct_retail_price":          "卷烟建议零售价",
+                "tbc_total_length":             "烟支总长度",
+                "product_style":                "包装类型",
+            }
+        )
+        product_similarity_map.to_excel(os.path.join(args.report_dir, "相似卷烟表.xlsx"), index=False)
+        
+    def generate_eval_data(self, args):
+        if self._product_id == '350139':
+            eval_product_id = "350355"
+        else:
+            eval_product_id = self._product_id
+        eval_order_data = self._dao.get_eval_order_by_product(self._city_uuid, eval_product_id)
+        if not os.path.exists(os.path.join(args.report_dir, "商户售卖推荐表.xlsx")):
+            print("请先生成'商户售卖推荐表'")
+        recommend_data = pd.read_excel(os.path.join(args.report_dir, "商户售卖推荐表.xlsx"))
+        report = eval_report_process(eval_order_data, recommend_data)
+        
+        report.to_excel(os.path.join(args.report_dir, "效果验证表.xlsx"))
+
+def generate_all_data(args, report_utils):
+    report_utils.generate_feats_ralation_report(args)
+    report_utils.generate_product_report(args)
+    report_utils.generate_recommend_report(args)
+    report_utils.generate_similarity_product_report(args) 
+    report_utils.generate_eval_data(args)
+    
+    
+def run():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--city_uuid", type=str, default="00000000000000000000000011445301")
+    parser.add_argument("--product_id", type=str, default="350139")
+    parser.add_argument("--recall_count", type=int, default=100)
+    parser.add_argument("--delivery_count", type=int, default=5000)
+    
+    parser.add_argument("--all_report", action='store_true')
+    # parser.add_argument()
+    # parser.add_argument()
+    
+    args = parser.parse_args()
+    
+    # 查找该城市的gbdt模型是否存在
+    args.gbdtlr_model_path = os.path.join("./models/rank/weights/", args.city_uuid, "gbdtlr_model.pkl")
+    args.report_dir = os.path.join("./data/report", args.city_uuid)
+    if not os.path.exists(args.gbdtlr_model_path):
+        print("该城市的模型还未训练,请先启动训练!!!")
+        
+    # 初始化report生成工具
+    report_utils = ReportUtils(args.city_uuid, args.product_id)
+    
+    # 创建报告保存文件夹
+    if not os.path.exists(args.report_dir):
+        os.makedirs(args.report_dir)
+        
+    if args.all_report:
+        generate_all_data(args, report_utils)
     
     
     
     
 if __name__ == "__main__":
 if __name__ == "__main__":
-    city_uuid = "00000000000000000000000011445301"
-    product_id = '350139'
-    report = ReportUtils(city_uuid)
-    recommend_list = report._get_recommend_cust_list(product_id)
+    run()
     
     
     
     

+ 103 - 0
utils/reports_utils.py

@@ -0,0 +1,103 @@
+from models.rank.data.config import ImportanceFeaturesMap
+import pandas as pd
+
+
+def filter_data(data, filter_dict):
+    """从shap结果中过滤特征相关性数据"""
+    product_content = []
+    for key, value in filter_dict.items():
+        if key != 'product_code':
+            product_content.append(f"{ImportanceFeaturesMap.PRODUCT_FEATRUES_MAP[key]}({value})")
+    data = data[data['product_feat'].isin(product_content)]
+    return data
+
+def feats_relation_process(shap_result, product_content):
+    """生成特征相关性分析报告"""
+    # 筛选商户相关性排序结果
+    report = filter_data(shap_result, product_content).copy()
+    cust_feats_sum = report.groupby("cust_feat")["relation"].sum()
+    # 筛选出正相关性的cust_feat
+    valid_cust_feats = cust_feats_sum[cust_feats_sum > 0].index.to_list()
+    report = report[report["cust_feat"].isin(valid_cust_feats)]
+    report = report.reset_index(drop=True)
+    report = report.rename(
+        columns = {
+            "product_feat": "卷烟特征",
+            "cust_feat": "商户特征",
+            "relation": "相关性"
+        }
+    )
+    return report
+
+def calculate_delivery_by_recommend_data(recommend_data, recommend_cust_infos, delivery_count):
+    """根据推荐数据计算投放量,并生成推荐商户报告"""
+    recommend_data = pd.DataFrame(recommend_data)
+    
+    cust_ids = recommend_cust_infos.set_index("BB_RETAIL_CUSTOMER_CODE")
+    recommend_data = recommend_data.join(cust_ids, on="cust_code", how="inner")
+    recommend_data = recommend_data[["cust_code", "BB_RETAIL_CUSTOMER_NAME", "recommend_score"]]
+   # 1. 计算每个商户的理论应得数量(带小数)
+    recommend_data["delivery_float"] = (
+        recommend_data["recommend_score"] / recommend_data["recommend_score"].sum() * delivery_count
+    )
+
+    # 2. 向下取整得到基础配额
+    recommend_data["delivery_count"] = recommend_data["delivery_float"].astype(int)
+
+    # 3. 计算余数并排序
+    recommend_data["remainder"] = recommend_data["delivery_float"] - recommend_data["delivery_count"]
+    recommend_data = recommend_data.sort_values("remainder", ascending=False)
+
+    # 4. 将剩余配额按余数从大到小分配
+    remaining = delivery_count - recommend_data["delivery_count"].sum()
+    recommend_data.iloc[:remaining, recommend_data.columns.get_loc("delivery_count")] += 1
+    
+    recommend_data = recommend_data.drop(columns=["delivery_float", "remainder"])
+    recommend_data = recommend_data.reset_index()
+    # 5. 按recommend_score从大到小重新排序
+    recommend_data = recommend_data.sort_values("index")
+    # recommend_data["sale_qty"] = recommend_data["sale_qty"].round(0).astype(int) # 将月均销量四舍五入取整
+    recommend_data = recommend_data.rename(
+        columns={
+            "index": "推荐序号", 
+            "cust_code": "商户编号", 
+            "BB_RETAIL_CUSTOMER_NAME": "商户名称", 
+            # "sale_qty": "历史月均销量", 
+            "recommend_score": "推荐系数", 
+            "delivery_count": "建议投放量(条)"
+            }
+        )
+    recommend_data["推荐序号"] = recommend_data["推荐序号"] + 1
+    
+    return recommend_data
+
+def eval_report_process(eval_order_data, recommend_data):
+    # 获取订单数据并处理
+    eval_order_data = eval_order_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "sale_amt"]]
+    
+    # 确保cust_code是字符串类型
+    eval_order_data["cust_code"] = eval_order_data["cust_code"].astype(str)
+    
+    eval_order_data = eval_order_data.groupby(["cust_code", "cust_name", "product_code", "product_name"])[["sale_qty", "sale_amt"]].mean().reset_index()
+    eval_order_data["sale_qty"] = eval_order_data["sale_qty"].round(0).astype(int)
+    eval_order_data = eval_order_data.sort_values("sale_qty", ascending=False)
+    
+    # recommend_data = recommend_data.drop(columns=["sale_qty"])
+    # 确保recommend_data中的cust_code也是字符串类型
+    recommend_data["商户编号"] = recommend_data["商户编号"].astype(str)
+    cust_ids = recommend_data.set_index("商户编号")
+    
+    # 执行合并操作
+    merge_data = eval_order_data.join(cust_ids, on="cust_code", how="left")
+    merge_data = merge_data[["cust_code", "cust_name", "product_code", "product_name", "sale_qty", "推荐序号", "推荐系数"]]
+    merge_data = merge_data.rename(
+        columns={
+            "cust_code": "商户编号", 
+            "cust_name": "商户名称", 
+            "product_code": "卷烟编码", 
+            "product_name": "卷烟名称", 
+            "sale_qty": "月均销量"
+        }
+    )
+    return merge_data
+    

+ 1 - 1
utils/result_process.py

@@ -31,7 +31,7 @@ def generate_report(city_uuid, data, filter_dict, recommend_data, delivery_count
     """根据总表筛选结果"""
     """根据总表筛选结果"""
     # 1. 筛选商户相关性排序结果
     # 1. 筛选商户相关性排序结果
     data = filter_data(data, filter_dict).copy()
     data = filter_data(data, filter_dict).copy()
-    # data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
+    data.to_csv(os.path.join(save_dir, "feats_interaction.csv"), index=False, encoding='utf-8-sig')
     group_sums = data.groupby("cust_feat")["relation"].sum()
     group_sums = data.groupby("cust_feat")["relation"].sum()
     # 筛选出总和非负的cust_feat
     # 筛选出总和非负的cust_feat
     valid_cust_feats = group_sums[group_sums > 0].index.tolist()
     valid_cust_feats = group_sums[group_sums > 0].index.tolist()