pyproject.toml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. [build-system]
  2. requires = ["setuptools", "wheel"]
  3. build-backend = "setuptools.build_meta"
  4. [project]
  5. # See https://setuptools.pypa.io/en/latest/userguide/quickstart.html for more project configuration options.
  6. name = "olmocr"
  7. dynamic = ["version"]
  8. readme = "README.md"
  9. classifiers = [
  10. "Intended Audience :: Science/Research",
  11. "Development Status :: 3 - Alpha",
  12. "License :: OSI Approved :: Apache Software License",
  13. "Programming Language :: Python :: 3",
  14. "Topic :: Scientific/Engineering :: Artificial Intelligence",
  15. ]
  16. authors = [
  17. {name = "Allen Institute for Artificial Intelligence", email = "jakep@allenai.org"}
  18. ]
  19. requires-python = ">=3.11"
  20. dependencies = [
  21. "cached-path",
  22. "smart_open",
  23. "pypdf>=5.2.0",
  24. "pypdfium2",
  25. "cryptography",
  26. "lingua-language-detector",
  27. "Pillow",
  28. "ftfy",
  29. "bleach",
  30. "markdown2",
  31. "filelock",
  32. "orjson",
  33. "requests",
  34. "zstandard",
  35. "boto3",
  36. "httpx",
  37. "torch>=2.5.1",
  38. "transformers==4.46.2",
  39. "img2pdf",
  40. "beaker-py",
  41. ]
  42. license = {file = "LICENSE"}
  43. [project.urls]
  44. Homepage = "https://github.com/allenai/olmocr"
  45. Repository = "https://github.com/allenai/olmocr"
  46. Changelog = "https://github.com/allenai/olmocr/blob/main/CHANGELOG.md"
  47. # Documentation = "https://olmocr.readthedocs.io/"
  48. [project.optional-dependencies]
  49. gpu = [
  50. "sgl-kernel==0.0.3.post1",
  51. "sglang[all]==0.4.2",
  52. ]
  53. dev = [
  54. "ruff",
  55. "mypy",
  56. "black",
  57. "isort",
  58. "pytest",
  59. "pytest-sphinx",
  60. "pytest-cov",
  61. "twine>=1.11.0",
  62. "build",
  63. "setuptools",
  64. "wheel",
  65. "Sphinx>=4.3.0,<7.1.0",
  66. "furo==2023.7.26",
  67. "myst-parser>=1.0,<2.1",
  68. "sphinx-copybutton==0.5.2",
  69. "sphinx-autobuild==2021.3.14",
  70. "sphinx-autodoc-typehints==1.23.3",
  71. "packaging",
  72. "necessary",
  73. "peft",
  74. "datasets",
  75. "omegaconf",
  76. "spacy",
  77. ]
  78. bench = [
  79. "tinyhost",
  80. "fuzzysearch",
  81. "rapidfuzz",
  82. "sequence_align",
  83. "syntok",
  84. "openai",
  85. "google-genai",
  86. "playwright",
  87. "mistralai",
  88. "lxml",
  89. "flask",
  90. ]
  91. train = [
  92. "torch",
  93. "torchvision",
  94. "accelerate",
  95. "datasets",
  96. "peft",
  97. "wandb",
  98. "omegaconf",
  99. "s3fs",
  100. "necessary",
  101. "einops",
  102. "transformers>=4.45.1"
  103. ]
  104. elo = [
  105. "numpy",
  106. "scipy",
  107. "pandas",
  108. "matplotlib"
  109. ]
  110. [tool.setuptools.packages.find]
  111. exclude = [
  112. "*.tests",
  113. "*.tests.*",
  114. "tests.*",
  115. "tests",
  116. "docs*",
  117. "scripts*"
  118. ]
  119. [tool.setuptools]
  120. include-package-data = true
  121. [tool.setuptools.package-data]
  122. olmocr = [
  123. "py.typed",
  124. "viewer/*.html",
  125. "eval/*.html",
  126. ]
  127. [tool.setuptools.dynamic]
  128. version = {attr = "olmocr.version.VERSION"}
  129. [tool.black]
  130. line-length = 160
  131. include = '\.pyi?$'
  132. exclude = '''
  133. (
  134. __pycache__
  135. | \.git
  136. | \.mypy_cache
  137. | \.pytest_cache
  138. | \.vscode
  139. | \.venv
  140. | \bdist\b
  141. | \bdoc\b
  142. )
  143. '''
  144. [tool.isort]
  145. profile = "black"
  146. multi_line_output = 3
  147. # You can override these pyright settings by adding a personal pyrightconfig.json file.
  148. [tool.pyright]
  149. reportPrivateImportUsage = false
  150. [tool.ruff]
  151. line-length = 160
  152. target-version = "py311"
  153. exclude = ["olmocr/train/molmo", "tests/*"]
  154. ignore = ["E722"] #igore bare except
  155. [tool.ruff.per-file-ignores]
  156. "__init__.py" = ["F401"]
  157. [tool.mypy]
  158. ignore_missing_imports = true
  159. no_site_packages = true
  160. check_untyped_defs = true
  161. exclude = ["olmocr/train/molmo/", "tests/*"]
  162. [[tool.mypy.overrides]]
  163. module = "tests.*"
  164. strict_optional = false
  165. [tool.pytest.ini_options]
  166. testpaths = "tests/"
  167. python_classes = [
  168. "Test*",
  169. "*Test"
  170. ]
  171. log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
  172. log_level = "DEBUG"
  173. markers = [
  174. "nonci: mark test as not intended for CI runs"
  175. ]