Spaces:

prasu16
/

Sentilytics

Sleeping

App Files Files Community

Sentilytics / app.py

prasu16

Update app.py

65f63c3 verified 23 days ago

raw

history blame contribute delete

3.82 kB

	# ===== IMPORTS =====
	import gradio as gr
	import pandas as pd
	import pickle
	import re
	import nltk
	import matplotlib.pyplot as plt
	import os

	from nltk.corpus import stopwords
	from transformers import pipeline

	# ===== NLTK SETUP =====
	nltk.data.path.append("/root/nltk_data")
	nltk.download("stopwords")
	stop_words = set(stopwords.words("english"))

	# ===== LOAD MODEL & VECTORIZER =====
	with open("nb_model.pkl", "rb") as f:
	model = pickle.load(f)

	with open("tfidf.pkl", "rb") as f:
	tfidf = pickle.load(f)

	# ===== LOAD BERT =====
	bert = pipeline(
	"sentiment-analysis",
	model="distilbert-base-uncased-finetuned-sst-2-english"
	)

	# ===== CLEAN TEXT =====
	def clean_text(text):
	text = str(text).lower()
	text = re.sub(r"http\S+\|www\S+", "", text)
	text = re.sub(r"@\w+\|#\w+", "", text)
	text = re.sub(r"[^a-z\s]", "", text)
	return " ".join([w for w in text.split() if w not in stop_words])

	# ===== TEXT ANALYSIS =====
	def analyze_text(text_input):
	if not text_input.strip():
	return "❌ Please enter some text"

	clean = clean_text(text_input)
	vec = tfidf.transform([clean])
	ml_pred = model.predict(vec)[0]

	bert_out = bert(text_input)[0]

	return f"""
	✨ Sentiment Analysis Result

	🔹 ML Sentiment: {ml_pred}
	🔹 BERT Sentiment: {bert_out['label']}
	🔹 Confidence Score: {round(bert_out['score'], 3)}
	"""

	# ===== CSV ANALYSIS =====
	def analyze_csv(csv_file, text_column):
	if csv_file is None:
	return "❌ Please upload a CSV file", None

	df = pd.read_csv(csv_file.name)

	if text_column not in df.columns:
	return f"❌ Column '{text_column}' not found in CSV", None

	df["clean_text"] = df[text_column].astype(str).apply(clean_text)
	vec = tfidf.transform(df["clean_text"])
	df["sentiment"] = model.predict(vec)

	counts = df["sentiment"].value_counts()

	fig, ax = plt.subplots()
	ax.pie(
	counts.values,
	labels=counts.index,
	autopct="%1.1f%%",
	startangle=90
	)
	ax.set_title("💬 Sentiment Distribution")

	summary = f"""
	📊 CSV Sentiment Summary

	🧾 Total Records: {len(df)}

	😊 Positive: {counts.get("positive", 0)}
	😐 Neutral: {counts.get("neutral", 0)}
	😡 Negative: {counts.get("negative", 0)}
	"""

	return summary, fig

	# ===== UI =====
	with gr.Blocks(theme=gr.themes.Soft()) as demo:

	gr.Markdown("""
	# Sentilytics
	### Social Media Sentiment Analysis using ML & BERT
	Analyze sentiment from Text or CSV data
	""")

	with gr.Tabs():

	with gr.Tab("📝 Analyze Text"):
	text_input = gr.Textbox(
	lines=4,
	placeholder="Type your text here…",
	label="Input Text"
	)
	text_btn = gr.Button("🔍 Analyze Sentiment", variant="primary")
	text_output = gr.Markdown()

	text_btn.click(
	analyze_text,
	inputs=text_input,
	outputs=text_output
	)

	with gr.Tab("📂 Analyze CSV"):
	csv_file = gr.File(label="Upload CSV file")
	text_column = gr.Textbox(
	value="text",
	label="Text Column Name"
	)
	csv_btn = gr.Button("📈 Analyze CSV", variant="primary")
	csv_text = gr.Markdown()
	csv_plot = gr.Plot()

	csv_btn.click(
	analyze_csv,
	inputs=[csv_file, text_column],
	outputs=[csv_text, csv_plot]
	)

	gr.Markdown("""
	---
	💡 Powered by: TF-IDF + Naive Bayes & BERT
	🎓 Made by: Prasun Singh & Praman Jain
	""")

	# ===== APP LAUNCH =====
	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	demo.launch(server_name="0.0.0.0", server_port=port)