"""Output writers, LRC parsing, and metadata extraction for txtlyric-to-lrc.

Writers:

- :func:`result_to_lrc` -- emits classic LRC, inserting a clear-display
  marker whenever the gap between consecutive segments exceeds a threshold
  so that lines do not visually persist through instrumental pauses.
- :func:`result_to_lyricsfile` -- emits the YAML-based Lyricsfile format
  used by lrcget/lrclib, with word-level timings.

LRC input:

- :func:`parse_lrc_file` -- parses an LRC file into metadata tags and
  timed segments (with ``start``/``end``/``text``) suitable for
  ``stable_whisper.align_words``.
"""

from __future__ import annotations

import os
import re
import sys
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple

import mutagen
import stable_whisper
import yaml


song_file_extensions = {
    "mp3",
    "wav",
    "mp4",
    "mpeg",
    "mpga",
    "m4a",
    "webm",
    "flac",
    "opus",
}


def get_file_extension(filename: str) -> str:
    return os.path.splitext(filename)[1][1:]


def file_is_audio(filename: str) -> bool:
    return get_file_extension(filename) in song_file_extensions


# ---------------------------------------------------------------------------
# Metadata
# ---------------------------------------------------------------------------

@dataclass
class TrackMetadata:
    title: str
    artist: str
    album: Optional[str] = None
    duration_ms: Optional[int] = None
    language: Optional[str] = None
    offset_ms: Optional[int] = None


def extract_metadata(audio_path: str, language: Optional[str] = None) -> TrackMetadata:
    """Read tags + duration from the audio file via mutagen.

    Falls back to the placeholder strings ``"Unknown"`` / ``"Unknown Artist"``
    when the corresponding tag is missing, since Lyricsfile requires
    ``metadata.title`` and ``metadata.artist``.
    """
    f = mutagen.File(audio_path, easy=True)
    return TrackMetadata(
        title=_first_tag(f, "title") or "Unknown",
        artist=_first_tag(f, "artist") or "Unknown Artist",
        album=_first_tag(f, "album"),
        duration_ms=_duration_ms(f),
        language=language,
    )


def _first_tag(f, key: str) -> Optional[str]:
    if f is None:
        return None
    val = f.get(key)
    if not val:
        return None
    if isinstance(val, list):
        val = val[0] if val else None
    if val is None:
        return None
    val = str(val).strip()
    return val or None


def _duration_ms(f) -> Optional[int]:
    info = getattr(f, "info", None) if f is not None else None
    length = getattr(info, "length", None) if info is not None else None
    if length is None:
        return None
    return int(round(length * 1000))


# ---------------------------------------------------------------------------
# LRC parsing
# ---------------------------------------------------------------------------

_LRC_METADATA_KEYS_TO_LYRICSFILE = {
    "ti": "title",
    "ar": "artist",
    "al": "album",
    "la": "language",
    "lang": "language",
    "language": "language",
}

_TIMESTAMP_RE = re.compile(r"^(\d{1,3}):(\d{2})(?:[.:](\d{2,3}))?$")
_LINE_RE = re.compile(r"^\[([^\]]+)\](.*)$")


@dataclass
class ParsedLrc:
    """Result of parsing an LRC file."""
    segments: List[dict] = field(default_factory=list)
    metadata_tags: Dict[str, str] = field(default_factory=dict)
    dropped_tags: Dict[str, str] = field(default_factory=dict)


def _parse_lrc_timestamp(token: str) -> Optional[float]:
    """Parse an LRC timestamp token to seconds, or None if not a timestamp."""
    m = _TIMESTAMP_RE.match(token.strip())
    if not m:
        return None
    minutes = int(m.group(1))
    seconds = int(m.group(2))
    frac_raw = m.group(3) or "0"
    if len(frac_raw) == 2:
        frac_ms = int(frac_raw) * 10
    else:
        frac_ms = int(frac_raw)
    return minutes * 60 + seconds + frac_ms / 1000.0


def _parse_lrc_length(value: str) -> Optional[int]:
    """Parse an LRC [length:...] value to milliseconds."""
    value = value.strip()
    m = re.match(r"^(\d+):(\d{2})(?:[.:](\d{2,3}))?$", value)
    if not m:
        return None
    minutes = int(m.group(1))
    seconds = int(m.group(2))
    frac_raw = m.group(3) or "0"
    if len(frac_raw) == 2:
        frac_ms = int(frac_raw) * 10
    else:
        frac_ms = int(frac_raw)
    return (minutes * 60 + seconds) * 1000 + frac_ms


def _parse_lrc_offset(value: str) -> Optional[int]:
    """Parse an LRC [offset:...] value to milliseconds (integer, may be negative)."""
    value = value.strip()
    m = re.match(r"^([+-]?\d+)$", value)
    if not m:
        return None
    return int(m.group(1))


def parse_lrc_file(content: str) -> ParsedLrc:
    """Parse LRC content into timed segments and metadata.

    Returns a :class:`ParsedLrc` containing:

    - ``segments``: list of ``{"start": float, "end": float, "text": str}``
      suitable for ``model.align_words()``.  Gap markers (empty-text
      timestamps) contribute an ``end`` to the preceding segment but do not
      appear as segments themselves.
    - ``metadata_tags``: dict of recognised LRC metadata mapped to
      Lyricsfile-compatible keys.
    - ``dropped_tags``: dict of LRC metadata tags that have no Lyricsfile
      equivalent (warned about by the caller).

    If the LRC contains an ``[offset:...]`` tag, the offset is applied to
    all parsed timestamps so that the returned times are absolute.
    """
    result = ParsedLrc()

    raw_timed: List[Tuple[float, str]] = []

    for raw_line in content.splitlines():
        raw_line = raw_line.strip()
        if not raw_line:
            continue
        m = _LINE_RE.match(raw_line)
        if not m:
            continue

        bracket_content = m.group(1)
        after_bracket = m.group(2)

        ts = _parse_lrc_timestamp(bracket_content)
        if ts is not None:
            raw_timed.append((ts, after_bracket.strip()))
            continue

        # Metadata tag: [key:value]
        if ":" in bracket_content:
            key, _, value = bracket_content.partition(":")
            key = key.strip().lower()
            value = value.strip()
            if not value:
                continue

            lyricsfile_key = _LRC_METADATA_KEYS_TO_LYRICSFILE.get(key)
            if lyricsfile_key is not None:
                result.metadata_tags[lyricsfile_key] = value
            elif key == "length":
                length_ms = _parse_lrc_length(value)
                if length_ms is not None:
                    result.metadata_tags["duration_ms"] = str(length_ms)
                else:
                    result.dropped_tags[key] = value
            elif key == "offset":
                offset_ms = _parse_lrc_offset(value)
                if offset_ms is not None:
                    result.metadata_tags["offset_ms"] = str(offset_ms)
                else:
                    result.dropped_tags[key] = value
            else:
                result.dropped_tags[key] = value

    if not raw_timed:
        return result

    raw_timed.sort(key=lambda t: t[0])

    # Apply offset: shift all timestamps so output is absolute
    offset_s = 0.0
    if "offset_ms" in result.metadata_tags:
        offset_s = int(result.metadata_tags["offset_ms"]) / 1000.0

    adjusted: List[Tuple[float, str]] = [
        (max(0.0, ts + offset_s), text) for ts, text in raw_timed
    ]

    # Build segments: non-empty text lines become segments; empty-text
    # lines (gap markers) contribute an end time to the preceding segment.
    pending_segments: List[dict] = []
    for ts, text in adjusted:
        if text:
            pending_segments.append({"start": ts, "end": None, "text": text})
        elif pending_segments:
            # Gap marker: set the preceding segment's end
            pending_segments[-1]["end"] = ts

    # Fill in missing end times: end of seg N = start of seg N+1
    for i in range(len(pending_segments) - 1):
        if pending_segments[i]["end"] is None:
            pending_segments[i]["end"] = pending_segments[i + 1]["start"]

    # Last segment: if no explicit end (no trailing gap marker), use
    # start + 5s as a reasonable upper bound; align_words will confine
    # within whatever audio is available.
    if pending_segments and pending_segments[-1]["end"] is None:
        pending_segments[-1]["end"] = pending_segments[-1]["start"] + 5.0

    result.segments = pending_segments
    return result


def merge_metadata(
    audio_path: str,
    lrc_parsed: ParsedLrc,
    cli_language: Optional[str] = None,
) -> TrackMetadata:
    """Build a :class:`TrackMetadata` by merging LRC tags over mutagen tags.

    Precedence (highest first): LRC tag → mutagen tag → placeholder.
    The ``--language`` CLI flag overrides both LRC and mutagen for language.

    Warnings are printed to stderr for LRC metadata tags that have no
    Lyricsfile equivalent and are therefore dropped.
    """
    audio_meta = extract_metadata(audio_path, language=cli_language)

    tags = lrc_parsed.metadata_tags

    title = tags.get("title") or audio_meta.title
    artist = tags.get("artist") or audio_meta.artist
    album = tags.get("album") or audio_meta.album

    duration_ms = audio_meta.duration_ms
    if "duration_ms" in tags:
        try:
            duration_ms = int(tags["duration_ms"])
        except ValueError:
            pass

    language = cli_language or tags.get("language") or audio_meta.language

    for key, value in lrc_parsed.dropped_tags.items():
        print(
            f"  Warning: LRC tag [{key}:{value}] has no Lyricsfile equivalent; skipped",
            file=sys.stderr,
        )

    return TrackMetadata(
        title=title,
        artist=artist,
        album=album,
        duration_ms=duration_ms,
        language=language,
    )


def _format_lrc_timestamp(seconds: float) -> str:
    if seconds < 0:
        seconds = 0.0
    minutes = int(seconds // 60)
    remainder = seconds - minutes * 60
    return f"{minutes:02d}:{remainder:05.2f}"


def result_to_lrc(
    result: "stable_whisper.WhisperResult",
    gap_threshold: float = 1.5,
) -> str:
    """Render ``result`` as LRC text.

    Between consecutive segments, if ``next_segment.start - this_segment.end``
    exceeds ``gap_threshold`` seconds, an empty timestamp is emitted at
    ``this_segment.end`` so consumers stop displaying the line during the
    pause. A trailing empty timestamp at the end of the last segment is
    always emitted.
    """
    segments = list(result.segments)
    if not segments:
        return ""

    lines: List[str] = []
    for i, seg in enumerate(segments):
        text = (seg.text or "").strip()
        lines.append(f"[{_format_lrc_timestamp(seg.start)}]{text}")
        next_seg = segments[i + 1] if i + 1 < len(segments) else None
        if next_seg is None:
            lines.append(f"[{_format_lrc_timestamp(seg.end)}]")
        elif next_seg.start - seg.end > gap_threshold:
            lines.append(f"[{_format_lrc_timestamp(seg.end)}]")

    return "\n".join(lines) + "\n"


def result_to_lyricsfile(
    result: "stable_whisper.WhisperResult",
    metadata: TrackMetadata,
) -> str:
    """Render ``result`` as a Lyricsfile YAML string.

    The ``plain`` block is intentionally omitted; consumers receive only
    the synced ``lines`` array (with word-level timings when available).
    """
    metadata_obj = {
        "title": metadata.title,
        "artist": metadata.artist,
    }
    if metadata.album:
        metadata_obj["album"] = metadata.album
    if metadata.duration_ms is not None:
        metadata_obj["duration_ms"] = metadata.duration_ms
    if metadata.language:
        metadata_obj["language"] = metadata.language
    if metadata.offset_ms is not None:
        metadata_obj["offset_ms"] = metadata.offset_ms
    metadata_obj["instrumental"] = False

    lines_out = []
    for seg in result.segments:
        line_obj = {
            "text": (seg.text or "").strip(),
            "start_ms": _to_ms(seg.start),
            "end_ms": _to_ms(seg.end),
        }
        word_objs = _words_to_lyricsfile_words(seg.words) if seg.words else None
        if word_objs:
            line_obj["words"] = word_objs
        lines_out.append(line_obj)

    document = {
        "version": "1.0",
        "metadata": metadata_obj,
        "lines": lines_out,
    }

    return yaml.safe_dump(
        document,
        sort_keys=False,
        allow_unicode=True,
        default_flow_style=False,
    )


def _to_ms(seconds: float) -> int:
    return int(round(seconds * 1000))


def _words_to_lyricsfile_words(word_timings) -> List[dict]:
    """Convert stable-ts word objects to Lyricsfile word objects.

    Whisper's tokenization produces words with leading whitespace
    (e.g. ``" club"``). Lyricsfile expects trailing whitespace except on
    the final word. This re-attaches the leading space of word ``i+1``
    onto the trailing edge of word ``i``. For CJK languages stable-ts
    splits without spaces, in which case no spacing is injected.
    """
    bodies = [(w.word or "").lstrip() for w in word_timings]
    leading_spaces = [(w.word or "")[: len(w.word or "") - len((w.word or "").lstrip())] for w in word_timings]

    out: List[dict] = []
    for i, w in enumerate(word_timings):
        text = bodies[i]
        if i + 1 < len(word_timings) and leading_spaces[i + 1]:
            text = text + " "
        out.append(
            {
                "text": text,
                "start_ms": _to_ms(w.start),
                "end_ms": _to_ms(w.end),
            }
        )
    return out