forked from OIM3640/Text-Analysis-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
43 lines (38 loc) · 1.58 KB
/
main.py
File metadata and controls
43 lines (38 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from text_processing import Load_Txt, clean_and_filter,count_words,summary_stats
from sentiment import Pos_or_Neg, sentiment_score
from visualization import plot_sentiment #, top_n_counts_per_file
def main():
# Folder and Text Loader
text_folder = "text"
files = Load_Txt(text_folder, skip_header=True)
if not files:
return
for name, raw in files:
cleaned = clean_and_filter(raw) # clean the text
counts = count_words(cleaned) # dict: word -> freq
stats = summary_stats(cleaned) # your words/unique/TTR/avg len stats
items = counts.items()
sorted_items = sorted(items, key=lambda x: x[1], reverse=True)
top5 = sorted_items[:5] # top 5 words for a quick glance
for word, count in top5:
print(f"{word}: {count}")
print(f"\n{name}")
print(f" words: {stats['words']}")
print(f" unique_words: {stats['unique_words']}")
print(f" type_token_ratio: {stats['type_token_ratio']:.3f}")
print(f" avg_word_len: {stats['avg_word_len']:.2f}")
print(" top5:", ", ".join(f"{w}({c})" for w, c in top5))
# (simple) Sentiment Analysis
lexicon_folder = "Text_Words"
POS, NEG = Pos_or_Neg(lexicon_folder)
scores = {}
for name, text in files:
cleaned = clean_and_filter(text)
score = sentiment_score(cleaned, POS, NEG)
scores[name] = score
print(f"{name}: {score:+.3f}")
# Visualization
plot_sentiment(scores)
# top_n_counts_per_file(files, n=15, cleaner=None)
if __name__ == "__main__":
main()