forked from OIM3640/Text-Analysis-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsentiment_score.py
More file actions
97 lines (78 loc) · 3.38 KB
/
sentiment_score.py
File metadata and controls
97 lines (78 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import nltk
# Download the punkt tokenizer models. This is used by the sent_tokenize function.
nltk.download('punkt')
nltk.download('vader_lexicon')
import pickle
from nltk.sentiment import (SentimentIntensityAnalyzer,) # Chatgpt told me this is neccesary for generating the sentiment score
from nltk.tokenize import (sent_tokenize,) # cut into individual sentence, easier to handle.
def load_book_text_from_pickle(pickle_file_path):
"""load book text from pickle file"""
with open(pickle_file_path, "rb") as file:
text = pickle.load(file).lower()
return text
# List of key characters I found online
character_list = [
"Scrooge",
"Marley",
"Bob",
"Tim",
"Fred",
"Past",
"Present",
"Future",
]
def analyze_character_sentiment(
book_text,
characters=["Scrooge", "Marley", "Bob", "Tim", "Fred", "Past", "Present", "Future"],
):
"""analyze sentiment for sentences mentioning characters"""
tool = SentimentIntensityAnalyzer()
sentences = sent_tokenize(
book_text
) # Chatgpt debugged and said I need to split the text into sentences.
sentiment_counts = {
"Scrooge": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Marley": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Bob": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Tim": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Fred": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Past": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Present": {"good_person": 0, "neutral": 0, "bad_person": 0},
"Future": {"good_person": 0, "neutral": 0, "bad_person": 0},
}
for sentence in sentences: # go through each sentences.
for character in character_list:
if character.lower() in sentence:
score = tool.polarity_scores(sentence)["compound"]
if score >= 0.6:
sentiment_counts[character]["good_person"] += 1
elif score >= 0.3:
sentiment_counts[character]["neutral"] += 1
else:
sentiment_counts[character]["bad_person"] += 1
return sentiment_counts
def calculate_sentiment_percentage(sentiment_counts):
"""calculate percentage of sentiment for each character"""
sentiment_percentages = {}
for character in sentiment_counts:
counts = sentiment_counts[character]
total_mentions = 0 # initial
for sentiment in counts:
total_mentions += counts[sentiment]
# I think turning into percentage is better to process
good_percent = (counts["good_person"] / total_mentions) * 100
neutral_percent = (counts["neutral"] / total_mentions) * 100
bad_percent = (counts["bad_person"] / total_mentions) * 100
sentiment_percentages[character] = {
"good_person": good_percent,
"neutral": neutral_percent,
"bad_person": bad_percent,
}
return sentiment_percentages
pickle_file_path = "book_text.pkl"
if __name__ == "__main__":
book_text = load_book_text_from_pickle(pickle_file_path)
sentiment_counts = analyze_character_sentiment(book_text, character_list)
sentiment_percentages = calculate_sentiment_percentage(sentiment_counts)
for character, percentages in sentiment_percentages.items():
print(f"{character}: {percentages}")