Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| from PIL import Image | |
| import pytesseract | |
| import difflib | |
| from io import BytesIO | |
| from transformers import pipeline | |
| import trafilatura | |
| from nltk.tokenize import sent_tokenize | |
| import nltk | |
| nltk.download("punkt") | |
| # === Load AI model === | |
| reviewer = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", max_new_tokens=200) | |
| device = "cpu" | |
| print(f"Device set to use {device}") | |
| # === Utility: Highlight diffs === | |
| def highlight_diff(original, suggestion): | |
| diff = difflib.ndiff(original.split(), suggestion.split()) | |
| result = "" | |
| for word in diff: | |
| if word.startswith("- "): | |
| result += f"<span style='color:red;text-decoration:line-through'>{word[2:]}</span> " | |
| elif word.startswith("+ "): | |
| result += f"<span style='color:green;font-weight:bold'>{word[2:]}</span> " | |
| elif word.startswith(" "): | |
| result += word[2:] + " " | |
| return result.strip() | |
| # === Extract blog content from URL === | |
| def extract_text_from_url(url): | |
| try: | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| response = requests.get(url, headers=headers, timeout=10) | |
| if response.status_code == 200: | |
| return trafilatura.extract(response.text) | |
| else: | |
| return f"β Blog Error: HTTP {response.status_code} on URL {url}" | |
| except Exception as e: | |
| return f"β Blog Error: {e}" | |
| # === Extract text from image URL (OCR) === | |
| def extract_text_from_image(image_url): | |
| try: | |
| img_data = requests.get(image_url).content | |
| image = Image.open(BytesIO(img_data)).convert("L") | |
| text = pytesseract.image_to_string(image) | |
| return text if text.strip() else "β OCR Error: No readable text found." | |
| except Exception as e: | |
| return f"β OCR Error: {e}" | |
| # === Suggestion generator === | |
| def generate_suggestions(text): | |
| sentences = sent_tokenize(text) | |
| suggestions = [] | |
| for sent in sentences: | |
| prompt = f"Improve the tone, grammar, clarity and flag any sensitive content:\n\n{sent}" | |
| output = reviewer(prompt, max_new_tokens=200)[0]["generated_text"] | |
| cleaned = output.replace(prompt, "").strip() | |
| suggestions.append(cleaned if cleaned else sent) | |
| return sentences, suggestions | |
| # === Final approval handler === | |
| def collect_decisions(originals, suggestions, *choices): | |
| results = [] | |
| for orig, sugg, choice in zip(originals, suggestions, choices): | |
| results.append(sugg if choice == "Accept" else orig) | |
| return "\n".join(results) | |
| # === Gradio UI === | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# β¨ Blog Reviewer AI") | |
| gr.Markdown("Detect tone issues, errors, and sensitive content β and clean them interactively!") | |
| with gr.Tab("π From Blog URL"): | |
| blog_url = gr.Textbox(label="Enter blog URL") | |
| fetch_btn = gr.Button("Fetch & Review") | |
| with gr.Tab("πΌοΈ From Image URL (OCR)"): | |
| image_url = gr.Textbox(label="Enter Image URL") | |
| image_btn = gr.Button("Extract & Review") | |
| with gr.Tab("π Paste Text"): | |
| pasted_text = gr.Textbox(label="Paste blog content here", lines=10) | |
| paste_btn = gr.Button("Review Text") | |
| output_section = gr.Column(visible=False) | |
| originals = gr.State([]) | |
| suggestions = gr.State([]) | |
| decision_radios = [] | |
| view_mode = gr.Radio(["Original", "Suggestion", "Side-by-Side"], value="Side-by-Side", label="Choose View") | |
| final_output = gr.Textbox(label="β Final Output", lines=12) | |
| finalize_btn = gr.Button("Generate Clean Version") | |
| sentence_blocks = [] | |
| # === Show suggestions UI === | |
| def show_review(text): | |
| origs, suggs = generate_suggestions(text) | |
| originals.value = origs | |
| suggestions.value = suggs | |
| return origs, suggs, True | |
| # === Populate sentence review rows dynamically === | |
| def populate_review_ui(origs, suggs): | |
| global decision_radios, sentence_blocks | |
| decision_radios = [] | |
| sentence_blocks = [] | |
| ui_blocks = [] | |
| for i, (orig, sugg) in enumerate(zip(origs, suggs)): | |
| orig_md = gr.Markdown(f"<b>{orig}</b>", visible=False) | |
| sugg_md = gr.Markdown(f"<b>{sugg}</b>", visible=False) | |
| diff_md = gr.Markdown(highlight_diff(orig, sugg), visible=True) | |
| radio = gr.Radio(["Accept", "Reject"], value="Accept", label=f"Suggestion {i+1}") | |
| decision_radios.append(radio) | |
| sentence_blocks.append((orig_md, sugg_md, diff_md)) | |
| ui_blocks.extend([orig_md, sugg_md, diff_md, radio]) | |
| return ui_blocks | |
| # === Toggle view mode === | |
| def toggle_view(view): | |
| updates = [] | |
| for orig_md, sugg_md, diff_md in sentence_blocks: | |
| if view == "Original": | |
| updates.extend([gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)]) | |
| elif view == "Suggestion": | |
| updates.extend([gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)]) | |
| else: # Side-by-side | |
| updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)]) | |
| return updates | |
| # === Final output handler === | |
| def finalize_output(origs, suggs, *choices): | |
| return collect_decisions(origs, suggs, *choices) | |
| # Button click handlers | |
| fetch_btn.click(fn=extract_text_from_url, inputs=blog_url, outputs=pasted_text) | |
| image_btn.click(fn=extract_text_from_image, inputs=image_url, outputs=pasted_text) | |
| paste_btn.click(fn=show_review, inputs=pasted_text, outputs=[originals, suggestions, output_section]) | |
| # Dynamic render trigger | |
| originals.change(fn=populate_review_ui, inputs=[originals, suggestions], outputs=[]) | |
| view_mode.change(fn=toggle_view, inputs=view_mode, | |
| outputs=[item for block in sentence_blocks for item in block]) | |
| finalize_btn.click(fn=finalize_output, inputs=[originals, suggestions] + decision_radios, outputs=final_output) | |
| demo.launch() | |