Sentilytics / app.py
prasu16's picture
Update app.py
65f63c3 verified
# ===== IMPORTS =====
import gradio as gr
import pandas as pd
import pickle
import re
import nltk
import matplotlib.pyplot as plt
import os
from nltk.corpus import stopwords
from transformers import pipeline
# ===== NLTK SETUP =====
nltk.data.path.append("/root/nltk_data")
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
# ===== LOAD MODEL & VECTORIZER =====
with open("nb_model.pkl", "rb") as f:
model = pickle.load(f)
with open("tfidf.pkl", "rb") as f:
tfidf = pickle.load(f)
# ===== LOAD BERT =====
bert = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english"
)
# ===== CLEAN TEXT =====
def clean_text(text):
text = str(text).lower()
text = re.sub(r"http\S+|www\S+", "", text)
text = re.sub(r"@\w+|#\w+", "", text)
text = re.sub(r"[^a-z\s]", "", text)
return " ".join([w for w in text.split() if w not in stop_words])
# ===== TEXT ANALYSIS =====
def analyze_text(text_input):
if not text_input.strip():
return "❌ Please enter some text"
clean = clean_text(text_input)
vec = tfidf.transform([clean])
ml_pred = model.predict(vec)[0]
bert_out = bert(text_input)[0]
return f"""
✨ **Sentiment Analysis Result**
πŸ”Ή **ML Sentiment:** {ml_pred}
πŸ”Ή **BERT Sentiment:** {bert_out['label']}
πŸ”Ή **Confidence Score:** {round(bert_out['score'], 3)}
"""
# ===== CSV ANALYSIS =====
def analyze_csv(csv_file, text_column):
if csv_file is None:
return "❌ Please upload a CSV file", None
df = pd.read_csv(csv_file.name)
if text_column not in df.columns:
return f"❌ Column '{text_column}' not found in CSV", None
df["clean_text"] = df[text_column].astype(str).apply(clean_text)
vec = tfidf.transform(df["clean_text"])
df["sentiment"] = model.predict(vec)
counts = df["sentiment"].value_counts()
fig, ax = plt.subplots()
ax.pie(
counts.values,
labels=counts.index,
autopct="%1.1f%%",
startangle=90
)
ax.set_title("πŸ’¬ Sentiment Distribution")
summary = f"""
πŸ“Š **CSV Sentiment Summary**
🧾 Total Records: {len(df)}
😊 Positive: {counts.get("positive", 0)}
😐 Neutral: {counts.get("neutral", 0)}
😑 Negative: {counts.get("negative", 0)}
"""
return summary, fig
# ===== UI =====
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# **Sentilytics**
### Social Media Sentiment Analysis using ML & BERT
Analyze sentiment from **Text or CSV data**
""")
with gr.Tabs():
with gr.Tab("πŸ“ Analyze Text"):
text_input = gr.Textbox(
lines=4,
placeholder="Type your text here…",
label="Input Text"
)
text_btn = gr.Button("πŸ” Analyze Sentiment", variant="primary")
text_output = gr.Markdown()
text_btn.click(
analyze_text,
inputs=text_input,
outputs=text_output
)
with gr.Tab("πŸ“‚ Analyze CSV"):
csv_file = gr.File(label="Upload CSV file")
text_column = gr.Textbox(
value="text",
label="Text Column Name"
)
csv_btn = gr.Button("πŸ“ˆ Analyze CSV", variant="primary")
csv_text = gr.Markdown()
csv_plot = gr.Plot()
csv_btn.click(
analyze_csv,
inputs=[csv_file, text_column],
outputs=[csv_text, csv_plot]
)
gr.Markdown("""
---
πŸ’‘ **Powered by:** TF-IDF + Naive Bayes & BERT
πŸŽ“ **Made by:** Prasun Singh & Praman Jain
""")
# ===== APP LAUNCH =====
if __name__ == "__main__":
port = int(os.environ.get("PORT", 7860))
demo.launch(server_name="0.0.0.0", server_port=port)