| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- [build-system]
- requires = ["setuptools", "wheel"]
- build-backend = "setuptools.build_meta"
- [project]
- # See https://setuptools.pypa.io/en/latest/userguide/quickstart.html for more project configuration options.
- name = "olmocr"
- dynamic = ["version"]
- readme = "README.md"
- classifiers = [
- "Intended Audience :: Science/Research",
- "Development Status :: 3 - Alpha",
- "License :: OSI Approved :: Apache Software License",
- "Programming Language :: Python :: 3",
- "Topic :: Scientific/Engineering :: Artificial Intelligence",
- ]
- authors = [
- {name = "Allen Institute for Artificial Intelligence", email = "jakep@allenai.org"}
- ]
- requires-python = ">=3.11"
- dependencies = [
- "cached-path",
- "smart_open",
- "pypdf>=5.2.0",
- "pypdfium2",
- "cryptography",
- "lingua-language-detector",
- "Pillow",
- "ftfy",
- "bleach",
- "markdown2",
- "filelock",
- "orjson",
- "requests",
- "zstandard",
- "boto3",
- "httpx",
- "torch>=2.5.1",
- "transformers==4.46.2",
- "img2pdf",
- "beaker-py",
- ]
- license = {file = "LICENSE"}
- [project.urls]
- Homepage = "https://github.com/allenai/olmocr"
- Repository = "https://github.com/allenai/olmocr"
- Changelog = "https://github.com/allenai/olmocr/blob/main/CHANGELOG.md"
- # Documentation = "https://olmocr.readthedocs.io/"
- [project.optional-dependencies]
- gpu = [
- "sgl-kernel==0.0.3.post1",
- "sglang[all]==0.4.2",
- ]
- dev = [
- "ruff",
- "mypy",
- "black",
- "isort",
- "pytest",
- "pytest-sphinx",
- "pytest-cov",
- "twine>=1.11.0",
- "build",
- "setuptools",
- "wheel",
- "Sphinx>=4.3.0,<7.1.0",
- "furo==2023.7.26",
- "myst-parser>=1.0,<2.1",
- "sphinx-copybutton==0.5.2",
- "sphinx-autobuild==2021.3.14",
- "sphinx-autodoc-typehints==1.23.3",
- "packaging",
- "necessary",
- "peft",
- "datasets",
- "omegaconf",
- "spacy",
- ]
- bench = [
- "tinyhost",
- "fuzzysearch",
- "rapidfuzz",
- "sequence_align",
- "syntok",
- "openai",
- "google-genai",
- "playwright",
- "mistralai",
- "lxml",
- "flask",
- ]
- train = [
- "torch",
- "torchvision",
- "accelerate",
- "datasets",
- "peft",
- "wandb",
- "omegaconf",
- "s3fs",
- "necessary",
- "einops",
- "transformers>=4.45.1"
- ]
- elo = [
- "numpy",
- "scipy",
- "pandas",
- "matplotlib"
- ]
- [tool.setuptools.packages.find]
- exclude = [
- "*.tests",
- "*.tests.*",
- "tests.*",
- "tests",
- "docs*",
- "scripts*"
- ]
- [tool.setuptools]
- include-package-data = true
- [tool.setuptools.package-data]
- olmocr = [
- "py.typed",
- "viewer/*.html",
- "eval/*.html",
- ]
- [tool.setuptools.dynamic]
- version = {attr = "olmocr.version.VERSION"}
- [tool.black]
- line-length = 160
- include = '\.pyi?$'
- exclude = '''
- (
- __pycache__
- | \.git
- | \.mypy_cache
- | \.pytest_cache
- | \.vscode
- | \.venv
- | \bdist\b
- | \bdoc\b
- )
- '''
- [tool.isort]
- profile = "black"
- multi_line_output = 3
- # You can override these pyright settings by adding a personal pyrightconfig.json file.
- [tool.pyright]
- reportPrivateImportUsage = false
- [tool.ruff]
- line-length = 160
- target-version = "py311"
- exclude = ["olmocr/train/molmo", "tests/*"]
- ignore = ["E722"] #igore bare except
- [tool.ruff.per-file-ignores]
- "__init__.py" = ["F401"]
- [tool.mypy]
- ignore_missing_imports = true
- no_site_packages = true
- check_untyped_defs = true
- exclude = ["olmocr/train/molmo/", "tests/*"]
- [[tool.mypy.overrides]]
- module = "tests.*"
- strict_optional = false
- [tool.pytest.ini_options]
- testpaths = "tests/"
- python_classes = [
- "Test*",
- "*Test"
- ]
- log_format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
- log_level = "DEBUG"
- markers = [
- "nonci: mark test as not intended for CI runs"
- ]
|