-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnodebuilders.py
More file actions
114 lines (81 loc) · 3.58 KB
/
nodebuilders.py
File metadata and controls
114 lines (81 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import re
import util
class BasicBuilder(object):
def __init__(self, metadata=None):
self.metadata = None
def buildNode(self, word, last_node):
return word
def getWord(self, node):
return node
def getMetadata(self, node, key):
return None
class SentenceBuilder(object):
def __init__(self, metadata=None):
# A word is considered to end a sentence if it ends in one of . ? !
# optionally followed by some number of single or double quotes
self.endPattern = re.compile('.*[.!?][\'"]*$')
self.metadata = metadata or util.DefaultDict(dict)
def buildNode(self, word, last_node):
node = word
self.metadata[node] = {
# This word is considered to start a sentence if it is the first word
# or the last word ended a sentence
'startsSentence': True if last_node is None else self.getMetadata(last_node, 'endsSentence'),
'endsSentence': self.endPattern.match(word) is not None}
return node
def getWord(self, node):
return node
def getMetadata(self, node, key):
return self.metadata[node][key]
class ParagraphBuilder(object):
def __init__(self, metadata=None):
# A word is considered to end a sentence if it ends in one of . ? !
# optionally followed by some number of single or double quotes
self.endPattern = re.compile('.*[.!?][\'"]*$')
self.metadata = metadata or util.DefaultDict(dict)
def buildNode(self, word, last_node, lines_before, is_title):
node = word
self.metadata[node] = {
# This word is considered to start a sentence if it is the first word
# or the last word ended a sentence
'startsSentence': True if last_node is None else self.getMetaData(last_node, 'endsSentence'),
'endsSentence': self.endPattern.match(word) is not None,
'linesBefore': lines_before,
'isTitle': is_title}
return node
def getWord(self, node):
return node
def getMetadata(self, node, key):
return self.metadata[node][key]
class QuoteBuilder(object):
def __init__(self, metadata=None):
# A word is considered to end a sentence if it ends in one of . ? !
# optionally followed by some number of single or double quotes
self.endPattern = re.compile('.*[.!?][\'"]*$')
self.startQuotePattern = re.compile('[\'"]+')
self.endQuotePattern = re.compile('[\'"]+$')
self.metadata = metadata or util.DefaultDict(dict)
def buildNode(self, word, last_node, lines_before, is_title):
node = word
# This word is considered to start a sentence if it is the first word
# or the last word ended a sentence
startsSentence = True if last_node is None else self.getMetadata(last_node, 'endsSentence')
endsSentence = self.endPattern.match(word) is not None
startQuote = self.startQuotePattern.match(word)
endQuote = self.endQuotePattern.search(word)
if startQuote is not None:
startQuote = startQuote.group()
if endQuote is not None:
endQuote = endQuote.group()
self.metadata[node] = {
'startsSentence': startsSentence,
'endsSentence': endsSentence,
'linesBefore': lines_before,
'isTitle': is_title,
'quotesStarted': startQuote,
'quotesEnded': endQuote}
return word
def getWord(self, node):
return node
def getMetadata(self, node, key):
return self.metadata[node][key]