-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest.py
More file actions
24 lines (16 loc) · 834 Bytes
/
test.py
File metadata and controls
24 lines (16 loc) · 834 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from ExDocGen.ExtractedDocumentGenerator import ExtractedDocumentGenerator
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def main():
pdf_file_path = 'data/sample_short.pdf'
doc_gen = ExtractedDocumentGenerator()
extracted_doc = doc_gen.extract_from_path( pdf_file_path=pdf_file_path,
include_pages=[],
output_name='test')
extracted_doc.save_as_json('output.json')
print(f'The file {pdf_file_path} has {extracted_doc.num_pages} pages in it')
for page in extracted_doc:
print(page.get_text())
input('Press ENTER to see next page')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if __name__ == '__main__':
main()