-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathclassify.py
More file actions
executable file
·34 lines (31 loc) · 1.16 KB
/
classify.py
File metadata and controls
executable file
·34 lines (31 loc) · 1.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/usr/bin/env python3
import nltk
from Scraper.TechEUNewsScraper import TechEUNewsScraper
from Scraper.BBCNewsScraper import BBCNewsScraper
from Classifiers.ThemeClassifier import ThemeClassifier
nltk.download('punkt')
nltk.download('stopwords')
if __name__ == '__main__':
ns = TechEUNewsScraper()
bbcs = BBCNewsScraper()
print('Building corpus of tech articles...')
ns.build_corpus()
print('Building classifier...')
tc = ThemeClassifier(3)
tc.build(ns.corpus)
print('Article Themes identified : ')
print('----------------------------------')
print(tc.themes)
print('----------------------------------')
while True:
print('Input a tech article link that you will like to classify')
print('Link should be from https://www.bbc.co.uk/news/ site')
link = input('Enter a tech article link (empty to exit)>')
if not link:
print('Thanks for using this Classifier')
print('Good Day')
break
else:
content = bbcs.get_content(link)
theme_identified = tc.predict(content)
print(f'Article belongs to theme : {tc.themes[theme_identified]}')