"""Face and mouth region detection using Gemini Vision."""
import json
from pathlib import Path
from .gemini_client import get_client
from .config import GEMINI_MODEL
from google.genai import types


def detect_face_regions(image_path: str | Path) -> dict:
    """
    Detect face and mouth regions in an image using Gemini Vision.

    Args:
        image_path: Path to the image file

    Returns:
        dict with detected regions (normalized coordinates 0-1)
    """
    client = get_client()
    image_path = Path(image_path)

    with open(image_path, "rb") as f:
        image_data = f.read()

    image_part = types.Part.from_bytes(
        data=image_data,
        mime_type=f"image/{image_path.suffix[1:].lower()}"
    )

    prompt = """Analyze this face image and identify the regions.

Return a JSON object with the following structure:
{
    "face": {
        "x": <left edge 0-1>,
        "y": <top edge 0-1>,
        "width": <width 0-1>,
        "height": <height 0-1>
    },
    "mouth": {
        "x": <left edge 0-1>,
        "y": <top edge 0-1>,
        "width": <width 0-1>,
        "height": <height 0-1>
    },
    "eyes": {
        "left": {"x": <center x>, "y": <center y>},
        "right": {"x": <center x>, "y": <center y>}
    },
    "face_angle": "frontal" | "left_profile" | "right_profile" | "three_quarter_left" | "three_quarter_right",
    "art_style": "realistic" | "anime" | "cartoon" | "pixel" | "other",
    "confidence": <0-1>
}

All coordinates should be normalized between 0 and 1, where (0,0) is top-left.
Only return the JSON, no other text."""

    response = client.models.generate_content(
        model=GEMINI_MODEL,
        contents=[prompt, image_part]
    )

    # Parse JSON from response
    text = response.text.strip()
    # Remove markdown code blocks if present
    if text.startswith("```"):
        text = text.split("```")[1]
        if text.startswith("json"):
            text = text[4:]
        text = text.strip()

    try:
        regions = json.loads(text)
        return {
            "success": True,
            "regions": regions,
            "image_path": str(image_path)
        }
    except json.JSONDecodeError as e:
        return {
            "success": False,
            "error": f"Failed to parse regions: {e}",
            "raw_response": response.text,
            "image_path": str(image_path)
        }


def crop_mouth_region(image_path: str | Path, output_path: str | Path, padding: float = 0.2) -> Path:
    """
    Crop the mouth region from an image.

    Args:
        image_path: Path to the source image
        output_path: Path to save the cropped image
        padding: Extra padding around the mouth region (0-1)

    Returns:
        Path to the cropped image
    """
    from PIL import Image

    image_path = Path(image_path)
    output_path = Path(output_path)

    # Detect regions
    result = detect_face_regions(image_path)
    if not result["success"]:
        raise ValueError(f"Failed to detect regions: {result.get('error')}")

    regions = result["regions"]
    mouth = regions.get("mouth", {})

    if not mouth:
        raise ValueError("No mouth region detected")

    # Load image
    img = Image.open(image_path)
    width, height = img.size

    # Calculate crop box with padding
    x = mouth.get("x", 0.3)
    y = mouth.get("y", 0.6)
    w = mouth.get("width", 0.4)
    h = mouth.get("height", 0.2)

    # Add padding
    x = max(0, x - padding * w)
    y = max(0, y - padding * h)
    w = min(1 - x, w * (1 + 2 * padding))
    h = min(1 - y, h * (1 + 2 * padding))

    # Convert to pixel coordinates
    left = int(x * width)
    top = int(y * height)
    right = int((x + w) * width)
    bottom = int((y + h) * height)

    # Crop and save
    cropped = img.crop((left, top, right, bottom))
    output_path.parent.mkdir(parents=True, exist_ok=True)
    cropped.save(output_path)

    return output_path


def validate_face_image(image_path: str | Path) -> dict:
    """
    Validate that an image is suitable for lip sync generation.

    Args:
        image_path: Path to the image file

    Returns:
        dict with validation results and recommendations
    """
    result = detect_face_regions(image_path)

    if not result["success"]:
        return {
            "valid": False,
            "reason": "Could not detect face regions",
            "recommendations": ["Use a clearer image with a visible face"]
        }

    regions = result["regions"]
    recommendations = []

    # Check face angle
    face_angle = regions.get("face_angle", "unknown")
    if face_angle not in ["frontal", "three_quarter_left", "three_quarter_right"]:
        recommendations.append("Use a frontal or 3/4 view face image for best results")

    # Check confidence
    confidence = regions.get("confidence", 0)
    if confidence < 0.7:
        recommendations.append("Image quality or face visibility may affect results")

    # Check mouth region
    mouth = regions.get("mouth", {})
    if not mouth or mouth.get("width", 0) < 0.1:
        recommendations.append("Mouth region is too small or not visible")

    return {
        "valid": len(recommendations) == 0 or confidence > 0.5,
        "regions": regions,
        "recommendations": recommendations,
        "confidence": confidence
    }