forked from OIM3640/Text-Analysis-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsentiment.py
More file actions
45 lines (38 loc) · 1.95 KB
/
sentiment.py
File metadata and controls
45 lines (38 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
def Pos_or_Neg(folder: str):
"""Load positive and negative word lists from a folder."""
pos_words = set() # Creates a blank positive word set
neg_words = set() # Creates a blank negative word set
if not os.path.isdir(folder): # Checks if my (Text_Words) folder exist
print(f"[!] Folder '{folder}' not found.")
return pos_words, neg_words
for i in os.listdir(folder): # For each file in my folder(text) goes through and reads them
if i.lower().endswith(".txt"): # Converts file names into lowercase (Removing the case errors around capitalization)
path = os.path.join(folder, i) # Creates a valid path to each txt (Easier Access to the file)
try: # Reads files expecting utf-8 format
with open(path, "r", encoding="utf-8") as f:
words = set(f.read().split()) # reads the text, splits it into a list, and lastly stores as a set of unique words
except UnicodeDecodeError: # allows the opportunity to skip the first error and try to read in a different encoding
with open(path, "r", encoding="latin-1") as f:
words = set(f.read().split())
if "pos" in i.lower(): # Checks what file it came from
pos_words.update(words) # will place the words into the blank positive set
elif "neg" in i.lower():
neg_words.update(words)
print(f"Loaded {len(pos_words)} positive and {len(neg_words)} negative words.")
return pos_words, neg_words # Returns 2 sets that can be used in the future
def sentiment_score(text: str, pos_set, neg_set) -> float:
"""Simple sentiment score between -1 and +1."""
words = text.split()
pos = 0
neg = 0
for w in words:
if w in pos_set:
pos += 1
for w in words:
if w in neg_set:
neg+= 1
total = pos + neg
if total == 0:
return 0.0
return (pos - neg) / total