# ===== IMPORTS =====
import gradio as gr
import pandas as pd
import pickle
import re
import nltk
import matplotlib.pyplot as plt
import os

from nltk.corpus import stopwords
from transformers import pipeline

# ===== NLTK SETUP =====
nltk.data.path.append("/root/nltk_data")
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

# ===== LOAD MODEL & VECTORIZER =====
with open("nb_model.pkl", "rb") as f:
    model = pickle.load(f)

with open("tfidf.pkl", "rb") as f:
    tfidf = pickle.load(f)

# ===== LOAD BERT =====
bert = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

# ===== CLEAN TEXT =====
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"@\w+|#\w+", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    return " ".join([w for w in text.split() if w not in stop_words])

# ===== TEXT ANALYSIS =====
def analyze_text(text_input):
    if not text_input.strip():
        return "❌ Please enter some text"

    clean = clean_text(text_input)
    vec = tfidf.transform([clean])
    ml_pred = model.predict(vec)[0]

    bert_out = bert(text_input)[0]

    return f"""
✨ **Sentiment Analysis Result**

🔹 **ML Sentiment:** {ml_pred}  
🔹 **BERT Sentiment:** {bert_out['label']}  
🔹 **Confidence Score:** {round(bert_out['score'], 3)}
"""

# ===== CSV ANALYSIS =====
def analyze_csv(csv_file, text_column):
    if csv_file is None:
        return "❌ Please upload a CSV file", None

    df = pd.read_csv(csv_file.name)

    if text_column not in df.columns:
        return f"❌ Column '{text_column}' not found in CSV", None

    df["clean_text"] = df[text_column].astype(str).apply(clean_text)
    vec = tfidf.transform(df["clean_text"])
    df["sentiment"] = model.predict(vec)

    counts = df["sentiment"].value_counts()

    fig, ax = plt.subplots()
    ax.pie(
        counts.values,
        labels=counts.index,
        autopct="%1.1f%%",
        startangle=90
    )
    ax.set_title("💬 Sentiment Distribution")

    summary = f"""
📊 **CSV Sentiment Summary**

🧾 Total Records: {len(df)}

😊 Positive: {counts.get("positive", 0)}
😐 Neutral: {counts.get("neutral", 0)}
😡 Negative: {counts.get("negative", 0)}
"""

    return summary, fig

# ===== UI =====
with gr.Blocks(theme=gr.themes.Soft()) as demo:

    gr.Markdown("""
# **Sentilytics**
### Social Media Sentiment Analysis using ML & BERT
Analyze sentiment from **Text or CSV data**
""")

    with gr.Tabs():

        with gr.Tab("📝 Analyze Text"):
            text_input = gr.Textbox(
                lines=4,
                placeholder="Type your text here…",
                label="Input Text"
            )
            text_btn = gr.Button("🔍 Analyze Sentiment", variant="primary")
            text_output = gr.Markdown()

            text_btn.click(
                analyze_text,
                inputs=text_input,
                outputs=text_output
            )

        with gr.Tab("📂 Analyze CSV"):
            csv_file = gr.File(label="Upload CSV file")
            text_column = gr.Textbox(
                value="text",
                label="Text Column Name"
            )
            csv_btn = gr.Button("📈 Analyze CSV", variant="primary")
            csv_text = gr.Markdown()
            csv_plot = gr.Plot()

            csv_btn.click(
                analyze_csv,
                inputs=[csv_file, text_column],
                outputs=[csv_text, csv_plot]
            )

    gr.Markdown("""
---
💡 **Powered by:** TF-IDF + Naive Bayes & BERT  
🎓 **Made by:** Prasun Singh & Praman Jain
""")

# ===== APP LAUNCH =====
if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    demo.launch(server_name="0.0.0.0", server_port=port)