# ===== IMPORTS ===== import gradio as gr import pandas as pd import pickle import re import nltk import matplotlib.pyplot as plt import os from nltk.corpus import stopwords from transformers import pipeline # ===== NLTK SETUP ===== nltk.data.path.append("/root/nltk_data") nltk.download("stopwords") stop_words = set(stopwords.words("english")) # ===== LOAD MODEL & VECTORIZER ===== with open("nb_model.pkl", "rb") as f: model = pickle.load(f) with open("tfidf.pkl", "rb") as f: tfidf = pickle.load(f) # ===== LOAD BERT ===== bert = pipeline( "sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english" ) # ===== CLEAN TEXT ===== def clean_text(text): text = str(text).lower() text = re.sub(r"http\S+|www\S+", "", text) text = re.sub(r"@\w+|#\w+", "", text) text = re.sub(r"[^a-z\s]", "", text) return " ".join([w for w in text.split() if w not in stop_words]) # ===== TEXT ANALYSIS ===== def analyze_text(text_input): if not text_input.strip(): return "❌ Please enter some text" clean = clean_text(text_input) vec = tfidf.transform([clean]) ml_pred = model.predict(vec)[0] bert_out = bert(text_input)[0] return f""" ✨ **Sentiment Analysis Result** πŸ”Ή **ML Sentiment:** {ml_pred} πŸ”Ή **BERT Sentiment:** {bert_out['label']} πŸ”Ή **Confidence Score:** {round(bert_out['score'], 3)} """ # ===== CSV ANALYSIS ===== def analyze_csv(csv_file, text_column): if csv_file is None: return "❌ Please upload a CSV file", None df = pd.read_csv(csv_file.name) if text_column not in df.columns: return f"❌ Column '{text_column}' not found in CSV", None df["clean_text"] = df[text_column].astype(str).apply(clean_text) vec = tfidf.transform(df["clean_text"]) df["sentiment"] = model.predict(vec) counts = df["sentiment"].value_counts() fig, ax = plt.subplots() ax.pie( counts.values, labels=counts.index, autopct="%1.1f%%", startangle=90 ) ax.set_title("πŸ’¬ Sentiment Distribution") summary = f""" πŸ“Š **CSV Sentiment Summary** 🧾 Total Records: {len(df)} 😊 Positive: {counts.get("positive", 0)} 😐 Neutral: {counts.get("neutral", 0)} 😑 Negative: {counts.get("negative", 0)} """ return summary, fig # ===== UI ===== with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(""" # **Sentilytics** ### Social Media Sentiment Analysis using ML & BERT Analyze sentiment from **Text or CSV data** """) with gr.Tabs(): with gr.Tab("πŸ“ Analyze Text"): text_input = gr.Textbox( lines=4, placeholder="Type your text here…", label="Input Text" ) text_btn = gr.Button("πŸ” Analyze Sentiment", variant="primary") text_output = gr.Markdown() text_btn.click( analyze_text, inputs=text_input, outputs=text_output ) with gr.Tab("πŸ“‚ Analyze CSV"): csv_file = gr.File(label="Upload CSV file") text_column = gr.Textbox( value="text", label="Text Column Name" ) csv_btn = gr.Button("πŸ“ˆ Analyze CSV", variant="primary") csv_text = gr.Markdown() csv_plot = gr.Plot() csv_btn.click( analyze_csv, inputs=[csv_file, text_column], outputs=[csv_text, csv_plot] ) gr.Markdown(""" --- πŸ’‘ **Powered by:** TF-IDF + Naive Bayes & BERT πŸŽ“ **Made by:** Prasun Singh & Praman Jain """) # ===== APP LAUNCH ===== if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) demo.launch(server_name="0.0.0.0", server_port=port)