diff --git a/deploy/systemd/bin/metinctl.in b/deploy/systemd/bin/metinctl.in index 119f663..e64e466 100644 --- a/deploy/systemd/bin/metinctl.in +++ b/deploy/systemd/bin/metinctl.in @@ -7,6 +7,7 @@ import os import shutil import subprocess import sys +import time from pathlib import Path REPO_ROOT = Path("{{REPO_ROOT}}") @@ -55,6 +56,10 @@ def parse_args() -> argparse.Namespace: incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value") incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle") + wait_ready = subparsers.add_parser("wait-ready", help="Wait until the headless login healthcheck passes") + wait_ready.add_argument("--timeout", type=int, default=120, help="Maximum seconds to wait") + wait_ready.add_argument("--interval", type=float, default=5.0, help="Seconds between healthcheck attempts") + subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck") return parser.parse_args() @@ -318,6 +323,33 @@ def run_healthcheck() -> int: return 0 +def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int: + if not HEALTHCHECK_PATH.exists(): + raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}") + + deadline = time.time() + timeout_seconds + attempt = 0 + last_returncode = 1 + + while time.time() < deadline: + attempt += 1 + print(f"Healthcheck attempt {attempt}...") + completed = subprocess.run( + build_command([str(HEALTHCHECK_PATH)], require_root=True), + check=False, + text=True, + ) + if completed.returncode == 0: + return 0 + last_returncode = completed.returncode + remaining = deadline - time.time() + if remaining <= 0: + break + time.sleep(min(interval_seconds, remaining)) + + raise SystemExit(f"Timed out waiting for login-ready state. Last healthcheck exit code: {last_returncode}") + + def run_incident_collect(tag: str, since: str, include_cores: bool) -> int: if not INCIDENT_COLLECTOR_PATH.exists(): raise SystemExit(f"Missing incident collector: {INCIDENT_COLLECTOR_PATH}") @@ -352,6 +384,8 @@ def main() -> int: return run_incident_collect(args.tag, args.since, args.include_cores) if args.command == "healthcheck": return run_healthcheck() + if args.command == "wait-ready": + return run_wait_ready(args.timeout, args.interval) raise SystemExit(f"Unsupported command: {args.command}")