-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf2md.py
More file actions
73 lines (58 loc) · 2.1 KB
/
pdf2md.py
File metadata and controls
73 lines (58 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
import os
import re
import sys
import time
import shutil
import getopt
import pdf2image
from PIL import Image
def main(argv):
dpi = 60
outfile = ""
pdffile = ""
try:
opts, args = getopt.getopt(argv,"h:p:d:",["pdf=","dpi="])
except getopt.GetoptError:
print("pdf2md -p <pdf file> -d <pdf dpi>")
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print("pdf2md -p <pdf file> -d <pdf dpi>")
sys.exit()
elif opt in ("-p", "--pdf"):
pdffile = arg
outfile = pdffile[:-4]
elif opt in ("-d", "--dpi"):
dpi = arg
if outfile != '' and pdffile != '':
parentdir = os.path.abspath(os.getcwd())
outfileformatted = re.sub(r'[^\w]', '', outfile.replace('-', '_'))
path = os.path.join(parentdir, outfileformatted)
pdfpath = os.path.join(path, 'pdf')
if os.path.isdir(path):
inp = int(input('Folder already exists, would you like to delete it?\n0 = no, 1 = yes: '))
if inp == 1:
shutil.rmtree(path)
else:
print('Exiting')
sys.exit()
os.mkdir(path)
os.mkdir(pdfpath)
images = pdftopil(pdffile, dpi)
image_counter = 0
for image in images:
image_counter += 1
image.save('%s/%s.png' % (pdfpath, image_counter), 'PNG')
with open('%s/%s.md' % (path, outfileformatted), 'a') as f:
f.write('### %s\n\n- %s\n\n--- \n\n' % (image_counter, image_counter, 'pdf/%s.png' % (image_counter), image_counter))
print('Done!')
else:
print("pdf2md -f <file name> -p <pdf file> -d <pdf dpi>")
def pdftopil(PDF_PATH, dpi):
start_time = time.time()
pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=dpi, output_folder=None, first_page=None, last_page=None, fmt='png', thread_count=100, userpw=None, use_cropbox=False, strict=False)
print ("Time taken : " + str(time.time() - start_time))
return pil_images
if __name__ == "__main__":
main(sys.argv[1:])