-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest.py
More file actions
72 lines (61 loc) · 2.15 KB
/
test.py
File metadata and controls
72 lines (61 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 14 17:05:16 2021
@author: mrw
"""
import nltk
from document import Document
def test_lemmatize():
lemmatizer = nltk.stem.WordNetLemmatizer()
for w in ["last", "las", "us", "Q1-1", "decreased"]:
print(w, lemmatizer.lemmatize(w))
def test_strip():
for s in [" a", "b ", " c ", "d\n", "e"]:
print("<{}>".format(s), "<{}>".format(s.strip()))
def test_pos_tag():
s = "the wealthy customers decreased"
s = "But that may reduce us the power of this company."
tags = nltk.pos_tag(nltk.word_tokenize(s))
print(tags)
print([(w, wordnet_pos(t)) for w, t in tags])
def test_lemmatize_pos():
s = "the wealthy customers decreased"
s = "But that may reduce us the power of this company."
lemmatizer = nltk.stem.WordNetLemmatizer()
tags = nltk.pos_tag(nltk.word_tokenize(s))
print([lemmatizer.lemmatize(w, wordnet_pos(t)) for w, t in tags])
def test_tokens():
doc = Document(file_name = "./txt_files/d2.txt")
tokens = doc.create_tokens()
print(tokens)
def test_sentences():
doc = Document(file_name = "./txt_files/d2.txt")
sentences = doc.create_sentences()
print(sentences)
def test_tokens_from():
doc = Document("the brown foxes jumped over the fence")
tokens = doc.create_tokens_from(doc.content, lemmatized=False, strip_stopwords=False)
print(tokens)
tokens = doc.create_tokens_from(doc.content, lemmatized=False)
print(tokens)
tokens = doc.create_tokens_from(doc.content)
print(tokens)
# Lookup WordNet POS
# https://www.machinelearningplus.com/nlp/lemmatization-examples-python/
def wordnet_pos(tag):
tags = {"J": nltk.corpus.wordnet.ADJ,
"N": nltk.corpus.wordnet.NOUN,
"V": nltk.corpus.wordnet.VERB,
"R": nltk.corpus.wordnet.ADV}
# tag example: 'VBD' for verb
return tags.get(tag[0], nltk.corpus.wordnet.NOUN)
#-----------Main----------------
if __name__ == "__main__":
test_lemmatize()
test_strip()
test_pos_tag()
test_lemmatize_pos()
test_tokens()
test_sentences()
test_tokens_from()