#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import re
import sys
from dataclasses import dataclass, asdict
from pathlib import Path

from known_issues import classify_issue_ids, load_known_issue_ids


QUOTED_STRING_RE = re.compile(r'"([^"]+)"')
MESH_FILENAME_RE = re.compile(r'^meshfilename\s+"([^"]+)"', re.IGNORECASE)
BOMB_EFFECT_RE = re.compile(r'^bombeffect\s+"([^"]*)"', re.IGNORECASE)
ATTACH_FILE_RE = re.compile(r'^attachfile\s+"([^"]*)"', re.IGNORECASE)
SOUND_DATA_RE = re.compile(r'^SoundData\d+\s+[0-9.]+\s+"([^"]+)"', re.IGNORECASE)
YMIR_PREFIX = "d:/ymir work/"


@dataclass
class EffectCheck:
    file: str
    kind: str
    missing_references: list[str]


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Validate effect graph references for text-based .mse/.msf/.mss assets."
    )
    parser.add_argument(
        "--runtime-root",
        type=Path,
        required=True,
        help="Client runtime root containing assets/.",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="Emit JSON output.",
    )
    parser.add_argument(
        "--known-issues",
        type=str,
        default=None,
        help="Optional known issues baseline JSON. Defaults to repo known_issues/runtime_known_issues.json if present.",
    )
    parser.add_argument(
        "--strict-known-issues",
        action="store_true",
        help="Also fail when the known-issues baseline contains stale entries not observed anymore.",
    )
    return parser.parse_args()


def normalize_virtual_path(value: str) -> str:
    value = value.strip().strip('"').replace("\\", "/").lower()
    for prefix in ("d:/", "d:"):
        if value.startswith(prefix):
            value = value[len(prefix):]
            break
    return value.lstrip("/")


def build_asset_lookup(runtime_assets: Path) -> dict[str, Path]:
    lookup: dict[str, Path] = {}
    for path in runtime_assets.rglob("*"):
        if not path.is_file():
            continue
        rel = path.relative_to(runtime_assets)
        parts = rel.parts
        if len(parts) < 2:
            continue
        lookup.setdefault(Path(*parts[1:]).as_posix().lower(), path)
    return lookup


def virtual_dir_for_effect_file(effect_path: Path) -> str:
    parts = effect_path.parts
    try:
        ymir_index = [part.lower() for part in parts].index("ymir work")
    except ValueError:
        return ""
    return Path(*parts[ymir_index:]).parent.as_posix().lower()


def resolve_reference(raw: str, effect_virtual_dir: str) -> str:
    rel = raw.strip()
    if not rel:
        return ""
    normalized = normalize_virtual_path(rel)
    if raw.lower().startswith("d:/") or raw.lower().startswith("d:"):
        return normalized
    if effect_virtual_dir:
        return f"{effect_virtual_dir}/{normalized}".lower()
    return normalized


def derived_sound_script(effect_virtual_file: str) -> str | None:
    effect_virtual_file = effect_virtual_file.lower()
    if not effect_virtual_file.startswith("ymir work/"):
        return None
    stem = Path(effect_virtual_file).with_suffix("")
    relative = Path(*stem.parts[2:]).as_posix()
    return f"sound/{relative}.mss".lower()


def validate_mse(effect_path: Path, rel_from_assets: str, asset_lookup: dict[str, Path]) -> EffectCheck:
    effect_virtual_dir = virtual_dir_for_effect_file(effect_path)
    missing: list[str] = []
    current_group: str | None = None

    for raw_line in effect_path.read_text(encoding="utf-8", errors="ignore").splitlines():
        line = raw_line.strip()
        if not line:
            continue
        lower = line.lower()
        if lower.startswith("group "):
            current_group = lower.split()[1]
            continue
        mesh_match = MESH_FILENAME_RE.match(line)
        if mesh_match:
            ref = resolve_reference(mesh_match.group(1), effect_virtual_dir)
            if ref and ref not in asset_lookup:
                missing.append(mesh_match.group(1))
            continue
        if "texturefiles" in lower:
            current_group = "texturefiles"
            continue
        if current_group == "texturefiles":
            if line.startswith("}"):
                current_group = None
                continue
            for token in QUOTED_STRING_RE.findall(line):
                ref = resolve_reference(token, effect_virtual_dir)
                if ref and ref not in asset_lookup:
                    missing.append(token)

    sound_script = derived_sound_script(rel_from_assets)
    if sound_script and sound_script in asset_lookup:
        check = validate_mss(sound_script, asset_lookup)
        missing.extend(check.missing_references)

    return EffectCheck(file=rel_from_assets, kind="mse", missing_references=sorted(set(missing)))


def validate_msf(effect_path: Path, rel_from_assets: str, asset_lookup: dict[str, Path]) -> EffectCheck:
    effect_virtual_dir = virtual_dir_for_effect_file(effect_path)
    missing: list[str] = []
    for raw_line in effect_path.read_text(encoding="utf-8", errors="ignore").splitlines():
        line = raw_line.strip()
        if not line:
            continue
        match = BOMB_EFFECT_RE.match(line)
        if match and match.group(1).strip():
            ref = resolve_reference(match.group(1), effect_virtual_dir)
            if ref and ref not in asset_lookup:
                missing.append(match.group(1))
            continue
        match = ATTACH_FILE_RE.match(line)
        if match and match.group(1).strip():
            ref = resolve_reference(match.group(1), effect_virtual_dir)
            if ref and ref not in asset_lookup:
                missing.append(match.group(1))
    return EffectCheck(file=rel_from_assets, kind="msf", missing_references=sorted(set(missing)))


def validate_mss(rel_from_assets: str, asset_lookup: dict[str, Path]) -> EffectCheck:
    mss_path = asset_lookup.get(rel_from_assets.lower())
    missing: list[str] = []
    if not mss_path or not mss_path.is_file():
        return EffectCheck(file=rel_from_assets, kind="mss", missing_references=[])
    for raw_line in mss_path.read_text(encoding="utf-8", errors="ignore").splitlines():
        line = raw_line.strip()
        match = SOUND_DATA_RE.match(line)
        if not match:
            continue
        ref = normalize_virtual_path(match.group(1))
        if ref not in asset_lookup:
            missing.append(match.group(1))
    return EffectCheck(file=rel_from_assets, kind="mss", missing_references=sorted(set(missing)))


def main() -> int:
    args = parse_args()
    runtime_root = args.runtime_root.resolve()
    runtime_assets = runtime_root / "assets"
    if not runtime_assets.is_dir():
        raise SystemExit(f"assets dir not found: {runtime_assets}")

    asset_lookup = build_asset_lookup(runtime_assets)
    checks: list[EffectCheck] = []
    failures: list[str] = []
    issue_map: dict[str, str] = {}

    effect_pack_dir = runtime_assets / "Effect"
    for path in sorted(effect_pack_dir.rglob("*")):
        if not path.is_file():
            continue
        rel = path.relative_to(runtime_assets / "Effect").as_posix()
        suffix = path.suffix.lower()
        if suffix == ".mse":
            check = validate_mse(path, rel, asset_lookup)
        elif suffix == ".msf":
            check = validate_msf(path, rel, asset_lookup)
        else:
            continue
        checks.append(check)
        for missing in check.missing_references:
            normalized_missing = resolve_reference(missing, Path(check.file).parent.as_posix().lower())
            issue_id = f"effect:reference:{check.file}:{normalized_missing}"
            message = f"{check.file}: missing reference {missing}"
            failures.append(message)
            issue_map[issue_id] = message

    observed_issue_ids = set(issue_map.keys())
    known_path, known_issue_ids = load_known_issue_ids(__file__, "effect", args.known_issues)
    known_observed, unexpected_issue_ids, stale_known_issue_ids = classify_issue_ids(observed_issue_ids, known_issue_ids)

    result = {
        "ok": not unexpected_issue_ids and (not args.strict_known_issues or not stale_known_issue_ids),
        "checked_files": len(checks),
        "failures": failures,
        "issue_ids": sorted(observed_issue_ids),
        "known_issue_ids": sorted(known_observed),
        "unexpected_issue_ids": sorted(unexpected_issue_ids),
        "stale_known_issue_ids": sorted(stale_known_issue_ids),
        "unexpected_failures": [issue_map[issue_id] for issue_id in sorted(unexpected_issue_ids)],
        "stale_known_failures": sorted(stale_known_issue_ids),
        "known_issues_path": str(known_path) if known_path else None,
        "checks": [asdict(check) for check in checks],
    }

    if args.json:
        print(json.dumps(result, indent=2))
    else:
        print(f"ok={result['ok']} checked_files={result['checked_files']}")
        for failure in result["unexpected_failures"]:
            print(f"FAIL: {failure}")
        for issue_id in result["stale_known_issue_ids"]:
            print(f"STALE: {issue_id}")

    return 0 if result["ok"] else 1


if __name__ == "__main__":
    sys.exit(main())