-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconcraft.py
More file actions
85 lines (67 loc) · 2.66 KB
/
concraft.py
File metadata and controls
85 lines (67 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import re
import signal
import socket
import subprocess
from retry import retry
from collections import OrderedDict
from utils import extract_gnc
PATH_TO_CONCRAFT = '~/.cabal/bin'
class Server:
def __enter__(self):
self.port = self.find_free_port()
self.server = self.start_server()
return self
def __exit__(self, exc_type, exc_value, traceback):
os.killpg(os.getpgid(self.server.pid), signal.SIGTERM)
def find_free_port(self):
sock = socket.socket()
sock.bind(('', 0))
port = sock.getsockname()[1]
return port
def start_server(self):
command = "{0}/concraft-pl server {0}/model.gz --port {1}".format(PATH_TO_CONCRAFT, self.port)
server = subprocess.Popen(command, shell=True, preexec_fn=os.setsid)
return server
def get_port(self):
return self.port
class Client:
def __init__(self, port):
self.port = port
@retry(subprocess.CalledProcessError, delay=10)
def call_concraft(self, sentence):
self.write_to_file(sentence)
command = "{0}/concraft-pl client --port {1} < input".format(PATH_TO_CONCRAFT, self.port)
return subprocess.check_output(command, shell=True).decode('utf-8')
def to_lemmas(self, sentence):
parsed = self.parse(self.call_concraft(sentence))
return ' '.join([value[1] for value in parsed])
def to_gnc(self, sentence):
parsed = self.parse(self.call_concraft(sentence))
return ' '.join([extract_gnc(value[2].split(':')) for value in parsed])
def to_pos(self, sentence):
parsed = self.parse(self.call_concraft(sentence))
return ' '.join([value[2].split(':')[0] for value in parsed])
def to_pos_tags(self, sentence):
parsed = self.parse(self.call_concraft(sentence))
return ' '.join([value[2] for value in parsed])
def parse(self, concraft_output):
parsed = []
for line in concraft_output.split('\n'):
if self.is_word(line):
parsed.append([line.split()[0]])
elif self.is_disamb(line):
if len(parsed[-1]) == 1:
parsed[-1].extend((self.extract_lemma(line), self.extract_tags(line)))
return parsed
def extract_lemma(self, disamb):
return disamb.split()[0].lower()
def extract_tags(self, disamb):
return disamb.split()[1].lower()
def is_word(self, line):
return line and not line.startswith('\t')
def is_disamb(self, line):
return line and line.split()[-1] == 'disamb'
def write_to_file(self, sentence):
with open('input', 'w') as input:
input.write(sentence)