-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
163 lines (138 loc) · 5.72 KB
/
app.py
File metadata and controls
163 lines (138 loc) · 5.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from flask import Flask, render_template, request
import os
import json
import ast
from mmif import Mmif
from collections import Counter
from modeling.summarize import summarize_file, summarize_from_text
from modeling.ner import get_entities
from modeling.cluster import cluster_nodes
from modeling.topic_model import get_topics, train_topic_model
from modeling.date import extract_date
from db import insert_data, get_all_data, delete_data
import requests
from flask_cors import CORS
app = Flask(__name__, static_url_path='/static')
# Enable CORS for all routes
CORS(app)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/all-nodes')
def get_all_nodes():
nodes = get_all_data("nodes")
nodes = [(id,
filename,
label,
apps.split(","),
summary,
long_summary,
transcript,
entities.split(","),
date,
temp,
hidden) for id, filename, label, apps, summary, long_summary, transcript, entities, date, temp, hidden in nodes]
nodes = [dict(zip(["id", "filename", "label", "apps", "summary", "long_summary", "transcript", "entities", "date", "temp", "hidden"], node)) for node in nodes]
return nodes
@app.route('/delete', methods=['POST'])
def delete():
try:
id = request.json['id']
nodes = get_all_nodes()
node = [node for node in nodes if node['id'] == id][0]
filename = node['filename']
delete_data("nodes", id)
# Delete the MMIF file from tmp
tmp_filename = os.path.join('tmp', filename)
os.remove(tmp_filename)
return json.dumps({"success": True})
except Exception as e:
return json.dumps({"error": e})
@app.route('/upload', methods=['GET', 'POST'])
def upload():
if request.method == 'GET':
return render_template('upload.html')
try:
file = request.files['file']
filename = file.filename.replace(".mmif", "").replace(".json", "")
file_content = file.read()
mmif = Mmif(file_content)
# Save the MMIF file to tmp
tmp_filename = os.path.join('tmp', file.filename)
with open(tmp_filename, 'wb') as tmp_file:
tmp_file.write(file_content)
summary, long_summary, transcript = summarize_file(mmif)
entities = get_entities(transcript)
# Store entities as list in descending order of frequency
c = Counter(entities)
entities = [entity.text for entity, _ in c.most_common(500)]
date = extract_date(file.filename, mmif)
apps = [str(view.metadata.app) for view in mmif.views]
new_node = { 'id': filename,
'filename': file.filename,
'label': filename,
'apps': ",".join(apps),
'summary': summary,
"long_summary": long_summary,
"transcript": transcript,
'entities': ",".join(entities),
'date': date,
'temp': False,
'hidden': False }
insert_data("nodes", new_node)
# Un-stringify list entries to pass back to the app
new_node['apps'] = new_node['apps'].split(",")
new_node['entities'] = new_node['entities'].split(",")
return json.dumps(new_node)
except Exception as e:
return json.dumps({"error": e})
@app.route('/cluster', methods=['POST'])
def cluster():
nodes = request.json['nodes']
clusters, n_clusters = cluster_nodes(nodes)
data = {'nodes': clusters, 'n_clusters': n_clusters}
return json.dumps(data)
@app.route('/topic_model', methods=['POST'])
def topic_model():
nodes = request.json['nodes']
# May be None
zeroshot_topics = request.json.get('zeroshot_topics', [])
docs = [node['long_summary'] for node in nodes]
# Pre-load all entities for more efficient topic model pre-processing
entities = set([entity.lower() for node in nodes for entity in node['entities']])
print("Zero-shot topics: ", zeroshot_topics)
topic_names, topic_distr = get_topics(docs=docs, entities=entities, zeroshot_topics=zeroshot_topics)
res = {}
res["names"] = topic_names
res["probs"] = {nodes[i]["id"]: topic_distr[i] for i in range(len(nodes))}
return json.dumps(res)
@app.route('/summarize_all', methods=['GET'])
def summarize_all():
nodes = get_all_nodes()
summaries = [node["summary"] for node in nodes]
summaries = "<NEXT ARTICLE>".join(summaries)
return summarize_from_text(summaries)
@app.route('/summarize_clusters', methods=['POST'])
def summarize_clusters():
print("Summarizing clusters...")
nodes = request.json['nodes']
n_clusters = max([node['cluster'] for node in nodes]) + 1
cluster_texts = [" ".join([node['summary'] for node in nodes if node['cluster'] == i]) for i in range(n_clusters)]
cluster_summaries = [summarize_from_text(cluster_text)[0] for cluster_text in cluster_texts]
# return summarize_from_text(" ".join(cluster_summaries)
return cluster_summaries
@app.route('/visualize', methods=['POST'])
def visualize():
id = request.json['id']
nodes = get_all_nodes()
node = [node for node in nodes if node['id'] == id][0]
filename = node['filename']
# Redirect to the visualization page with the id
file_path = os.path.join('tmp', filename)
files = {'file': open(file_path, 'rb')}
# The visualizer returns the document id if curl is in the headers
headers = {'User-Agent': 'basically curl'}
res = requests.post('http://localhost:5000/upload', files=files, headers=headers)
return {"res": res.text}
if __name__ == '__main__':
app.run(debug=False, port=5555)