file_stream.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import time
  2. from core import get_logger, settings
  3. from io import BytesIO
  4. import os
  5. import pandas as pd
  6. import requests
  7. logger = get_logger("utils.file_stream")
  8. class FileStreamUtils:
  9. upload_url = settings.file_upload_url
  10. download_url = settings.file_download_url
  11. headers = {
  12. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
  13. "Accept": "*/*",
  14. }
  15. if settings.file_service_cookie:
  16. headers["Cookie"] = settings.file_service_cookie
  17. @staticmethod
  18. def upload_files(reports_dir, files):
  19. files_id = {}
  20. for filename in files:
  21. file_path = os.path.join(reports_dir, f"{filename}.xlsx")
  22. start_time = time.time()
  23. try:
  24. with open(file_path, "rb") as f:
  25. upload_files = {"file": (os.path.basename(file_path), f)}
  26. response = requests.post(
  27. FileStreamUtils.upload_url,
  28. headers=FileStreamUtils.headers,
  29. files=upload_files,
  30. verify=True,
  31. )
  32. duration_ms = (time.time() - start_time) * 1000
  33. if response.json().get("success"):
  34. file_id = response.json()["data"]["file_info"]["fileid"]
  35. files_id[filename] = file_id
  36. logger.info(f"File uploaded: {filename} -> {file_id} ({duration_ms:.0f}ms)")
  37. else:
  38. logger.error(f"Upload failed for {filename}: {response.text}")
  39. return None
  40. except requests.exceptions.RequestException as e:
  41. logger.error(f"Upload request error for {filename}: {e}", exc_info=True)
  42. return None
  43. except Exception as e:
  44. logger.error(f"Upload error for {filename}: {e}", exc_info=True)
  45. return None
  46. return files_id
  47. @staticmethod
  48. def download_file(file_id, file_type="xlsx"):
  49. """通过file_id从阿里云文件数据库下载文件"""
  50. start_time = time.time()
  51. try:
  52. response = requests.get(
  53. f"{FileStreamUtils.download_url}/{file_id}",
  54. headers=FileStreamUtils.headers,
  55. verify=True,
  56. )
  57. duration_ms = (time.time() - start_time) * 1000
  58. if response.status_code == 200:
  59. file_content = BytesIO(response.content)
  60. if file_type == "xlsx":
  61. data = pd.read_excel(file_content, engine="openpyxl")
  62. elif file_type == "csv":
  63. data = pd.read_csv(file_content)
  64. else:
  65. raise ValueError(f"不支持的文件类型:{file_type}")
  66. logger.info(f"File downloaded: {file_id} ({duration_ms:.0f}ms, {len(response.content)} bytes)")
  67. return data
  68. else:
  69. logger.error(f"Download failed: file_id={file_id}, status={response.status_code}")
  70. return None
  71. except requests.exceptions.RequestException as e:
  72. logger.error(f"Download request error: file_id={file_id}, error={e}", exc_info=True)
  73. return None
  74. except Exception as e:
  75. logger.error(f"Download error: file_id={file_id}, error={e}", exc_info=True)
  76. return None