-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsentiment_analysis.py
More file actions
79 lines (64 loc) · 2.58 KB
/
sentiment_analysis.py
File metadata and controls
79 lines (64 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json
import warnings
# Suppress warnings related to deserialization
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
# Load tokenizer
with open(r'tokenizer20L.json', encoding='utf-8') as f:
data = json.load(f)
tokenizer = tokenizer_from_json(data)
# Load model
model = tf.keras.models.load_model(r'Sentiment_Analysis20L.h5')
# Preprocessing functions (you can enable these if needed)
def clean_text(text):
text = re.sub(r'[^A-Za-zÀ-ú ]+', '', text)
text = re.sub('book|one', '', text)
text = text.lower()
text = text.translate(str.maketrans('', '', string.punctuation))
text = re.sub(r'\s+', ' ', text).strip()
return text
def remove_stopwords(text):
stop_words = set(stopwords.words('english'))
tokens = nltk.word_tokenize(text.lower())
return " ".join([token for token in tokens if token not in stop_words])
def normalize_text(text):
stemmer = SnowballStemmer("english")
normalized_text = []
for word in text.split():
stemmed_word = stemmer.stem(word)
normalized_text.append(stemmed_word)
return ' '.join(normalized_text)
# Streamlit app
st.title('Amazon Reviews Sentiment Analysis')
review = st.text_area('Enter your review:', '')
if st.button('Predict'):
# Optionally preprocess the review
preprocessed_review = clean_text(review)
preprocessed_review = remove_stopwords(preprocessed_review)
preprocessed_review = normalize_text(preprocessed_review)
preprocessed_review = review # Use raw review for simplicity
# Tokenize and pad the preprocessed review
max_len = 200
sequence = tokenizer.texts_to_sequences([preprocessed_review])
padded_sequence = pad_sequences(sequence, maxlen=max_len)
# Make prediction
prediction = model.predict(padded_sequence)[0][0]
if prediction >= 0.5:
sentiment = 'Positive'
confidence = prediction * 100
color = 'green'
else:
sentiment = 'Negative'
confidence = (1 - prediction) * 100
color = 'red'
sentiment_formatted = f'<span style="color: {color}">{sentiment}</span>'
# Display the sentiment
st.write(f'Sentiment: {sentiment_formatted} (Confidence: {confidence:.2f}%)', unsafe_allow_html=True)