-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathPDF2Txt.py
More file actions
30 lines (24 loc) · 1.04 KB
/
PDF2Txt.py
File metadata and controls
30 lines (24 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
'''Takes SNCPatient ArcCheck pdf per arc as PDF ifle and extract gamma pass rates adn other related information using
regular expression'''
import PyPDF2
import re
class PDF2Txt():
def __init__(self):
self.Info={}
def ParsePDF(self,filename):
reader=PyPDF2.PdfFileReader(filename)
txt=reader.getPage(0).extractText()
#print(txt)
self.Info['PatientName']=re.search(r'Patient Name : (.*?)Patient ID',txt).group(1)
self.Info['PatientID']=re.search(r'Patient ID : (.*?)Plan Date',txt).group(1)
self.Info['PlanDate']=re.search(r'Plan Date : (.*?)SSD',txt).group(1)
Thresholds=re.search(r' Mode : (.*?)Dose Values',txt).group(1)
self.Info['DD']=float(Thresholds.split(':')[0])
self.Info['DTA'] = float(Thresholds.split(':')[1])
self.Info['DT'] = float(Thresholds.split(':')[2])
self.Info['GammaPass'] = float(Thresholds.split(':')[6])
return self.Info
# filename="Docs\\ArcCheck_PDFs\\5.pdf"
# parser=PDF2Txt()
# info=parser.ParsePDF(filename)
# print(info)