test_filter.py 553 B

1234567891011121314151617
  1. import os
  2. import unittest
  3. from pypdf import PdfReader
  4. from olmocr.filter import PdfFilter
  5. class PdfFilterTest(unittest.TestCase):
  6. def testFormLaterPages(self):
  7. self.filter = PdfFilter(apply_form_check=True)
  8. self.assertTrue(self.filter.filter_out_pdf(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf")))
  9. self.filter = PdfFilter(apply_form_check=False)
  10. self.assertFalse(self.filter.filter_out_pdf(os.path.join(os.path.dirname(__file__), "gnarly_pdfs", "form_on_later_pages.pdf")))