Sentiment_Analysis/sentiment_analysis.py at main · DevanshChhabra/Sentiment_Analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import tokenizer_from_json
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json
import warnings

# Suppress warnings related to deserialization
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')

# Load tokenizer
with open(r'tokenizer20L.json', encoding='utf-8') as f:
    data = json.load(f)
    tokenizer = tokenizer_from_json(data)

# Load model
model = tf.keras.models.load_model(r'Sentiment_Analysis20L.h5')

# Preprocessing functions (you can enable these if needed)
def clean_text(text):
    text = re.sub(r'[^A-Za-zÀ-ú ]+', '', text)
    text = re.sub('book|one', '', text)
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def remove_stopwords(text):
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text.lower())
    return " ".join([token for token in tokens if token not in stop_words])

def normalize_text(text):
    stemmer = SnowballStemmer("english")
    normalized_text = []
    for word in text.split():
        stemmed_word = stemmer.stem(word)
        normalized_text.append(stemmed_word)
    return ' '.join(normalized_text)

# Streamlit app
st.title('Amazon Reviews Sentiment Analysis')

review = st.text_area('Enter your review:', '')

if st.button('Predict'):
    # Optionally preprocess the review
    preprocessed_review = clean_text(review)
    preprocessed_review = remove_stopwords(preprocessed_review)
    preprocessed_review = normalize_text(preprocessed_review)

    preprocessed_review = review  # Use raw review for simplicity

    # Tokenize and pad the preprocessed review
    max_len = 200
    sequence = tokenizer.texts_to_sequences([preprocessed_review])
    padded_sequence = pad_sequences(sequence, maxlen=max_len)

    # Make prediction
    prediction = model.predict(padded_sequence)[0][0]

    if prediction >= 0.5:
        sentiment = 'Positive'
        confidence = prediction * 100
        color = 'green'
    else:
        sentiment = 'Negative'
        confidence = (1 - prediction) * 100
        color = 'red'

    sentiment_formatted = f'<span style="color: {color}">{sentiment}</span>'

    # Display the sentiment
    st.write(f'Sentiment: {sentiment_formatted} (Confidence: {confidence:.2f}%)', unsafe_allow_html=True)