Spaces:
Sleeping
Sleeping
| # ===== IMPORTS ===== | |
| import gradio as gr | |
| import pandas as pd | |
| import pickle | |
| import re | |
| import nltk | |
| import matplotlib.pyplot as plt | |
| import os | |
| from nltk.corpus import stopwords | |
| from transformers import pipeline | |
| # ===== NLTK SETUP ===== | |
| nltk.data.path.append("/root/nltk_data") | |
| nltk.download("stopwords") | |
| stop_words = set(stopwords.words("english")) | |
| # ===== LOAD MODEL & VECTORIZER ===== | |
| with open("nb_model.pkl", "rb") as f: | |
| model = pickle.load(f) | |
| with open("tfidf.pkl", "rb") as f: | |
| tfidf = pickle.load(f) | |
| # ===== LOAD BERT ===== | |
| bert = pipeline( | |
| "sentiment-analysis", | |
| model="distilbert-base-uncased-finetuned-sst-2-english" | |
| ) | |
| # ===== CLEAN TEXT ===== | |
| def clean_text(text): | |
| text = str(text).lower() | |
| text = re.sub(r"http\S+|www\S+", "", text) | |
| text = re.sub(r"@\w+|#\w+", "", text) | |
| text = re.sub(r"[^a-z\s]", "", text) | |
| return " ".join([w for w in text.split() if w not in stop_words]) | |
| # ===== TEXT ANALYSIS ===== | |
| def analyze_text(text_input): | |
| if not text_input.strip(): | |
| return "β Please enter some text" | |
| clean = clean_text(text_input) | |
| vec = tfidf.transform([clean]) | |
| ml_pred = model.predict(vec)[0] | |
| bert_out = bert(text_input)[0] | |
| return f""" | |
| β¨ **Sentiment Analysis Result** | |
| πΉ **ML Sentiment:** {ml_pred} | |
| πΉ **BERT Sentiment:** {bert_out['label']} | |
| πΉ **Confidence Score:** {round(bert_out['score'], 3)} | |
| """ | |
| # ===== CSV ANALYSIS ===== | |
| def analyze_csv(csv_file, text_column): | |
| if csv_file is None: | |
| return "β Please upload a CSV file", None | |
| df = pd.read_csv(csv_file.name) | |
| if text_column not in df.columns: | |
| return f"β Column '{text_column}' not found in CSV", None | |
| df["clean_text"] = df[text_column].astype(str).apply(clean_text) | |
| vec = tfidf.transform(df["clean_text"]) | |
| df["sentiment"] = model.predict(vec) | |
| counts = df["sentiment"].value_counts() | |
| fig, ax = plt.subplots() | |
| ax.pie( | |
| counts.values, | |
| labels=counts.index, | |
| autopct="%1.1f%%", | |
| startangle=90 | |
| ) | |
| ax.set_title("π¬ Sentiment Distribution") | |
| summary = f""" | |
| π **CSV Sentiment Summary** | |
| π§Ύ Total Records: {len(df)} | |
| π Positive: {counts.get("positive", 0)} | |
| π Neutral: {counts.get("neutral", 0)} | |
| π‘ Negative: {counts.get("negative", 0)} | |
| """ | |
| return summary, fig | |
| # ===== UI ===== | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # **Sentilytics** | |
| ### Social Media Sentiment Analysis using ML & BERT | |
| Analyze sentiment from **Text or CSV data** | |
| """) | |
| with gr.Tabs(): | |
| with gr.Tab("π Analyze Text"): | |
| text_input = gr.Textbox( | |
| lines=4, | |
| placeholder="Type your text hereβ¦", | |
| label="Input Text" | |
| ) | |
| text_btn = gr.Button("π Analyze Sentiment", variant="primary") | |
| text_output = gr.Markdown() | |
| text_btn.click( | |
| analyze_text, | |
| inputs=text_input, | |
| outputs=text_output | |
| ) | |
| with gr.Tab("π Analyze CSV"): | |
| csv_file = gr.File(label="Upload CSV file") | |
| text_column = gr.Textbox( | |
| value="text", | |
| label="Text Column Name" | |
| ) | |
| csv_btn = gr.Button("π Analyze CSV", variant="primary") | |
| csv_text = gr.Markdown() | |
| csv_plot = gr.Plot() | |
| csv_btn.click( | |
| analyze_csv, | |
| inputs=[csv_file, text_column], | |
| outputs=[csv_text, csv_plot] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| π‘ **Powered by:** TF-IDF + Naive Bayes & BERT | |
| π **Made by:** Prasun Singh & Praman Jain | |
| """) | |
| # ===== APP LAUNCH ===== | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 7860)) | |
| demo.launch(server_name="0.0.0.0", server_port=port) | |