-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecognizeCellText.py
More file actions
137 lines (114 loc) · 4.21 KB
/
recognizeCellText.py
File metadata and controls
137 lines (114 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""
Using TSR and TCR annotations from json files, we construct masked images for each cell in the table and retrieve their
text using EasyOCR. We save our results in CSVs for table.
"""
import json
import cv2
import numpy as np
import os
import easyocr
import pandas as pd
datapath = "../data/20647788/TabRecSet/"
json_dir_path = datapath + "TSR_TCR_annotation/"
im_dir_path = datapath + "image/english_all-line/"
basepath = "."
text_csv_dir = "text_csvs"
def detect_text_in_all_tables():
"""
Generates masked images for each table cell based on the polygons provided in the json files. Detects the text in
these masked images and generates a csv of the result for each table.
"""
json_file_names = os.listdir(json_dir_path)
for count, json_file_name in enumerate(json_file_names):
data = load_json(json_dir_path + json_file_name)
if os.path.exists(im_dir_path + data['imagePath']):
create_text_csv_for_table(data)
# Update progress
if count % 500 == 0:
print("Created cell masks for " + str(count) + "\\" + str(len(json_file_names)))
def load_json(jsonpath):
"""
Loads in data from a json file.
Inputs:
- jsonpath: the filepath for the json file
"""
with open(jsonpath, 'r') as file:
data = json.load(file)
return data
def create_text_csv_for_table(data):
"""
Creates a polygon mask for each cell in the table and recognizes the text using EasyOCR. Saves the results to a csv
Inputs:
- data: the detected cell information for the table image
"""
df = pd.DataFrame(columns="position,text,detected_text".split(","))
impath = data['imagePath']
img_name = impath[:impath.rfind('.')]
img = cv2.imread(im_dir_path + impath)
# Create a masked image, detects the text, and adds a line to the df for each table cell
for shape in data['shapes']:
position, text = get_position_and_text(shape['label'])
masked_image = create_masked_image(shape, img)
detected_text = read_text(masked_image)
df.loc[len(df)] = np.array([position, text, detected_text], dtype=object)
# Save the csv
df.to_csv(basepath + "/" + text_csv_dir + "/" + img_name + ".csv", encoding='utf-8', index=False)
def get_position_and_text(label):
"""
Separates the position and text. The text always comes after the 4th occurrence of '-'.
Inputs:
- label: the label provided by the dataset of the form "col-row-colspan-rowspan-text"
Outputs:
- position: string of the form "col-row-colspan-rowspan"
- text: the text from the label
"""
val = -1
for i in range(0, 4):
val = label.find('-', val+1)
position = label[:val]
text = label[val+1:]
return position, text
def create_masked_image(shape, img):
"""
Combines the image and polygon mask into a masked image.
Inputs:
- shape: a list containing the points for a polygon surrounding the table cell
- img: the image to mask
Outputs:
- the masked image in RGB format
"""
points = np.array(shape['points'])
mask = create_polygon_mask(img, points)
masked_image_bgr = (mask * img).astype('int')
masked_image_rgb = cv2.cvtColor(masked_image_bgr.astype('uint8'), cv2.COLOR_BGR2RGB)
return masked_image_rgb
def create_polygon_mask(img, points):
"""
Creates a boolean mask from the polygon created by the given points.
Inputs:
- img: a BGR image.
- points: a list of points assumed to be within the images bounds.
Outputs:
- mask: a boolean mask of the polygon
"""
(h,w,c) = img.shape
mask = np.zeros((h,w,c), dtype=np.int32)
mask = cv2.fillPoly(mask, pts=np.int32([points]), color=(255, 255, 255))
mask = mask / 255
return mask
def read_text(image):
"""
Given an OpenCV image, returns the English text detected.
Inputs: an image
Outputs: the text detected in the image
"""
reader = easyocr.Reader(['en'])
result = reader.readtext(image)
if result == []:
return ""
else:
return result[0][1]
if __name__=="__main__":
if not os.path.isdir(basepath + "/" + text_csv_dir):
os.mkdir(basepath + "/" + text_csv_dir)
detect_text_in_all_tables()