Переглянути джерело

Merge branch 'dev' of http://1.12.234.191:12000/huanghongbo/BrandCultivation into dev

huanghongbo 1 рік тому
батько
коміт
721b876bda

+ 4 - 0
.gitignore

@@ -0,0 +1,4 @@
+.idea/
+.vscode/
+__pycache__/
+*.pyc

BIN
config/__pycache__/__init__.cpython-38.pyc


BIN
config/__pycache__/database_config.cpython-38.pyc


BIN
dao/__pycache__/__init__.cpython-38.pyc


BIN
dao/__pycache__/mysql_client.cpython-38.pyc


+ 40 - 15
dao/mysql_client.py

@@ -36,26 +36,51 @@ class Mysql(object):
     def create_session(self):
         """创建返回一个新的数据库session"""
         return self._DBSession()
-
+    
+    def fetch_data_with_pagination(self, tablename, query_text, page=1, page_size=1000):
+        """分页查询数据"""
+        offset = (page - 1) * page_size # 计算偏移量
+        query = text(f"select {query_text} from {tablename} LIMIT :limit OFFSET :offset")
+        with self.create_session() as session:
+            results = session.execute(query, {"limit": page_size, "offset": offset}).fetchall()
+            df = pd.DataFrame(results)
+        return df
+    
+    def load_data(self, tablename, query_text, page=1, page_size=1000):
+        # 创建一个空的DataFrame用于存储所有数据
+        total_df = pd.DataFrame()
+    
+        try:
+            while True:
+                df = self.fetch_data_with_pagination(tablename, query_text, page, page_size)
+                if df.empty:
+                    break
+            
+                total_df = pd.concat([total_df, df], ignore_index=True)
+                print(f"Page {page}: Retrieved {len(df)} rows, Total rows so far: {len(total_df)}")
+                page += 1  # 继续下一页
+                
+        except Exception as e:
+            print(f"Error: {e}")
+            return None
+        
+        finally:
+            self.closed()
+            return total_df
+    
     def closed(self):
         """关闭连接,回收资源"""
         self.engine.dispose()
 
 
 if __name__ == '__main__':
+    
     client = Mysql()
-    # 创建会话
-    session = client.create_session()
+    tablename = "mock_order"
     
-    # 使用 session 执行查询等操作
-    try:
-        results = session.execute(
-            text("select * from tads_brandcul_cust_order")
-            ).all()
-        # 将结果转换为DataFrame
-        df = pd.DataFrame(results).drop(columns=['stat_month']) # 提取列名
-        
-        # 获取c
-        print(len(df.columns))
-    finally:
-        session.close()
+    # 设置分页参数
+    page = 1
+    page_size = 1000
+    
+    query_text = '*'
+    client.load_data("mock_order", query_text, page, page_size)

BIN
models/__pycache__/__init__.cpython-38.pyc


BIN
models/recall/__pycache__/__init__.cpython-38.pyc


BIN
models/recall/__pycache__/itemCF.cpython-38.pyc


+ 26 - 39
models/recall/itemCF.py → models/recall/itemCF/ShopScore.py

@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 '''
-@filename     : itemCF.py
-@description     : 基于物品的协同过滤算法
-@time     : 2025/01/21/00
+@filename     : ShopScore.py
+@description     : 品规-商户-评分矩阵:品规(用户)对商铺(物品)的评分矩阵,将结果保存在score.csv文件中
+@time     : 2025/01/31/02
 @author     : Sherlock1011 & Min1027
 @Version     : 1.0
 '''
@@ -42,66 +42,53 @@ class ItemCFModel:
         else:
             return (column - column.min()) / (column.max() - column.min())
 
-    # 按照品规分组归一化和计算热度值
+    # 按照品规分组归一化并计算评分
     def calculate_heart_per_product(self, group):
         for column in self.weights.keys():
             if column == "OUT_STOCK_DAYS":
                 group[column] = 1 - self.standardize_column(group[column])
             else:
                 group[column] = self.standardize_column(group[column])
-        group["FC_SCORE"] = group.apply(
+        group["SCORE"] = group.apply(
             lambda row: sum(Decimal(row[col]) * weight for col, weight in self.weights.items()) * 100, axis=1
         )
         return group
 
-    # 主算法函数:计算推荐结果
-    def recommend(self, order_data):
-        # 去除重复值和填补缺失值
-        order_data.drop_duplicates(inplace=True)
-        order_data.fillna(0, inplace=True)
+    # 主算法函数:计算品规-商铺评分矩阵
+    def score(self, order_data):
+       
 
         # 应用分组计算
         df_result = order_data.groupby("PRODUCT_CODE").apply(self.calculate_heart_per_product).reset_index(drop=True)
-        df_result = df_result.sort_values(by=["PRODUCT_CODE", "FC_SCORE"], ascending=[True, False])
+        df_result = df_result.sort_values(by=["PRODUCT_CODE", "SCORE"], ascending=[True, False])
 
-        recomend_list = []
-        for product_code, group in df_result.groupby("PRODUCT_CODE"):
-            group_values = group[["BB_RETAIL_CUSTOMER_CODE", "FC_SCORE"]].apply(
-                lambda row: {row["BB_RETAIL_CUSTOMER_CODE"]: row["FC_SCORE"]}, axis=1
-            ).tolist()
-            recomend_list.append({"keys": product_code, "value": group_values})
-        
-        return recomend_list
+        # 选择要保存的列
+        # df_result[['PRODUCT_CODE', 'BB_RETAIL_CUSTOMER_CODE', 'SCORE']].to_csv("./models/recall/itemCF/matrix/score.csv", index=False, encoding="utf-8")
+        return df_result[['PRODUCT_CODE', 'BB_RETAIL_CUSTOMER_CODE', 'SCORE']]
 
 def load_data_from_dataset():
+    """从数据库中读取数据"""
     client = Mysql()
-    # 创建会话
-    session = client.create_session()
+    tablename = "mock_order"
+    query_text = "*"
     
-    # 使用 session 执行查询等操作
-    try:
-        results = session.execute(
-            text("select * from tads_brandcul_cust_order")
-            ).all()
-        # 将结果转换为DataFrame
-        df = pd.DataFrame(results).drop(columns=['stat_month']) # 提取列名
-        
-    finally:
-        session.close()
-        
+    df = client.load_data(tablename, query_text)
+    
+     # 去除重复值和填补缺失值
+    df.drop_duplicates(inplace=True)
+    df.fillna(0, inplace=True)
     return df
  
 if __name__ == "__main__":
     # 创建一个 ItemCF 类的实例
     item_cf_algorithm = ItemCFModel()
-
+    
     # 读取数据
-    # order_data = pd.read_csv('order.csv')
     order_data = load_data_from_dataset()
 
     # 调用算法
-    recomand_list = item_cf_algorithm.recommend(order_data)
-    print(recomand_list)
-
-    # # 序列化
-    # joblib.dump(item_cf_algorithm, "item_cf.model")
+    scores = item_cf_algorithm.score(order_data)
+    
+    # 保存评分结果到csv文件
+    scores.to_csv("./models/recall/itemCF/matrix/score.csv", index=False, encoding="utf-8")
+    

BIN
models/recall/itemCF/__pycache__/ShopScore.cpython-310.pyc


BIN
models/recall/itemCF/__pycache__/itemCF.cpython-310.pyc


+ 83 - 0
utils/mock_data_to_database.py

@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@filename     : mock_data_to_database.py
+@description     : 将mock数据写入到数据库中
+@time     : 2025/01/31/00
+@author     : Sherlock1011 & Min1027
+@Version     : 1.0
+'''
+from dao.mysql_client import Mysql
+
+import pandas as pd
+from sqlalchemy import Column, Integer, VARCHAR, Float, DECIMAL
+from sqlalchemy.ext.declarative import declarative_base
+
+# 定义数据库表结构
+Base = declarative_base()
+
+class MockOrder(Base):
+    __tablename__ = "mock_order"
+    id = Column(Integer, primary_key=True, autoincrement=True)  # 添加主键列
+    BB_RETAIL_CUSTOMER_CODE = Column(VARCHAR(50))
+    PRODUCT_CODE = Column(VARCHAR(50))
+    MONTH6_SALE_QTY = Column(DECIMAL(18, 6))
+    MONTH6_SALE_AMT = Column(DECIMAL(18, 6))
+    MONTH6_GROSS_PROFIT_RATE = Column(DECIMAL(18, 6))
+    MONTH6_SALE_QTY_YOY = Column(DECIMAL(18, 6))
+    MONTH6_SALE_QTY_MOM = Column(DECIMAL(18, 6))
+    MONTH6_SALE_AMT_YOY = Column(DECIMAL(18, 6))
+    MONTH6_SALE_AMT_MOM = Column(DECIMAL(18, 6))
+    ORDER_FULLORDR_RATE = Column(DECIMAL(18, 6))
+    CUSTOMER_REPURCHASE_RATE = Column(DECIMAL(18, 6))
+    NEW_PRODUCT_ORDER_QTY_OCC = Column(DECIMAL(18, 6))
+    LISTING_RATE = Column(DECIMAL(18, 6))
+    OUT_STOCK_DAYS = Column(DECIMAL(18, 6))
+    RETAIL_PRICE_INDEX = Column(DECIMAL(18, 6))
+    
+def insert_data(db, data_path):
+    df = pd.read_excel(data_path)
+    session = db.create_session()
+    try:
+        df.columns = ['BB_RETAIL_CUSTOMER_CODE', 
+                      'PRODUCT_CODE', 
+                      'MONTH6_SALE_QTY', 
+                      'MONTH6_SALE_AMT', 
+                      'MONTH6_GROSS_PROFIT_RATE',
+                      'MONTH6_SALE_QTY_YOY', 
+                      'MONTH6_SALE_QTY_MOM', 
+                      'MONTH6_SALE_AMT_YOY', 
+                      'MONTH6_SALE_AMT_MOM', 
+                      'ORDER_FULLORDR_RATE',
+                      'CUSTOMER_REPURCHASE_RATE', 
+                      'NEW_PRODUCT_ORDER_QTY_OCC', 
+                      'LISTING_RATE', 
+                      'OUT_STOCK_DAYS', 
+                      'RETAIL_PRICE_INDEX',
+                      ]  # 确保列名匹配
+        session.bulk_insert_mappings(MockOrder, df.to_dict(orient='records'))
+        session.commit()
+        print("数据成功插入数据库")
+        
+    except Exception as e:
+        session.rollback()
+        print(f"插入数据时出错: {e}")
+        
+    finally:
+        session.close()
+    
+        
+if __name__ == "__main__":
+    data_path = "./data/order.xlsx"
+    # 创建数据库链接
+    db = Mysql()
+    
+
+    # 创建表(如果不存在)
+    Base.metadata.create_all(db.engine)
+    insert_data(db, data_path)
+    
+    db.closed()
+    
+    
+