"""
Skybox generator: text → 2:1 equirectangular image (Stable Diffusion, local).
Uses FP16 to reduce VRAM. Output 1024x512 or 2048x1024.
"""

import os
import time
from pathlib import Path

import torch

# Default: v1.5 works without license acceptance. Use SD_MODEL_ID to prefer SD 2.1.
DEFAULT_MODEL_ID = "runwayml/stable-diffusion-v1-5"
FALLBACK_MODEL_ID = "runwayml/stable-diffusion-v1-5"  # Same; alternate if primary fails


def get_device() -> str:
    return "cuda" if torch.cuda.is_available() else "cpu"


def _is_complete_sd_dir(path: Path) -> bool:
    """True if path looks like a complete Stable Diffusion pipeline (has unet weights)."""
    if not path.is_dir():
        return False
    unet = path / "unet"
    if not unet.is_dir():
        return False
    return any(
        (unet / f).exists()
        for f in ("diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.bin")
    )


def _default_local_weights_dir() -> str | None:
    """First complete SD folder under weights/ (sd-v1-5 or stable-diffusion-2-1-base)."""
    try:
        root = Path(__file__).resolve().parent.parent
        for name in ("sd-v1-5", "stable-diffusion-2-1-base"):
            local = root / "weights" / name
            if _is_complete_sd_dir(local):
                return str(local)
        return None
    except Exception:
        return None


def _resolve_model_path_and_token():
    """Use local path if set or default weights/ folder exists, else Hub id. Token from HF_TOKEN or huggingface-cli login."""
    local = os.environ.get("SD_MODEL_PATH", "").strip()
    if local and os.path.isdir(local):
        return local, None
    default_local = _default_local_weights_dir()
    if default_local:
        return default_local, None
    model_id = os.environ.get("SD_MODEL_ID", DEFAULT_MODEL_ID)
    token = os.environ.get("HF_TOKEN") or True  # True = use cached login
    return model_id, token


def generate_skybox(
    prompt: str,
    output_dir: str = "outputs",
    width: int = 1024,
    height: int = 512,
    seed: int | None = None,
    model_id: str | None = None,
) -> tuple[str, float, float]:
    """
    Generate a 2:1 equirectangular skybox image from a text prompt.
    Returns (path_to_image, inference_time_sec, peak_vram_mb).
    """
    from diffusers import StableDiffusionPipeline

    device = get_device()
    dtype = torch.float16 if device == "cuda" else torch.float32

    Path(output_dir).mkdir(parents=True, exist_ok=True)

    pretrained, token = _resolve_model_path_and_token()
    load_id = model_id or pretrained
    local_only = os.path.isdir(load_id)
    pipe = None
    last_error = None

    def _load(pid: str, local: bool) -> bool:
        nonlocal pipe, last_error
        try:
            pipe = StableDiffusionPipeline.from_pretrained(
                pid,
                torch_dtype=dtype,
                safety_checker=None,
                token=None if local else (token or True),
                local_files_only=local,
            )
            return True
        except Exception as err:
            last_error = err
            return False

    if _load(load_id, local_only):
        pass
    elif not local_only and _load(FALLBACK_MODEL_ID, False):
        pass
    if pipe is None:
        raise RuntimeError(
            "Could not load Stable Diffusion. Need internet to download the model (first run).\n"
            "  - Set HF_TOKEN=your_token if behind firewall (huggingface.co/settings/tokens)\n"
            "  - Or download once: huggingface-cli download runwayml/stable-diffusion-v1-5 --local-dir ./weights/sd-v1-5"
        ) from last_error

    pipe = pipe.to(device)

    # Optional: enable xformers for lower VRAM (uncomment if installed)
    # if device == "cuda":
    #     pipe.enable_xformers_memory_efficient_attention()

    if device == "cuda":
        torch.cuda.reset_peak_memory_stats()
        torch.cuda.synchronize()

    generator = None
    if seed is not None:
        generator = torch.Generator(device=device).manual_seed(seed)

    t0 = time.perf_counter()
    image = pipe(
        prompt=prompt,
        width=width,
        height=height,
        num_inference_steps=50,
        generator=generator,
    ).images[0]

    if device == "cuda":
        torch.cuda.synchronize()
    t1 = time.perf_counter()
    inference_time = t1 - t0
    peak_vram_mb = (
        torch.cuda.max_memory_allocated() / 1024 / 1024
        if device == "cuda"
        else 0.0
    )

    # Save with safe filename
    safe_name = "".join(c if c.isalnum() or c in " -_" else "_" for c in prompt)[:60]
    out_path = os.path.join(output_dir, f"skybox_{safe_name.strip()}.png")
    image.save(out_path)

    return out_path, inference_time, peak_vram_mb