ops: split ready and full healthchecks
This commit is contained in:
@@ -56,11 +56,12 @@ def parse_args() -> argparse.Namespace:
|
||||
incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value")
|
||||
incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle")
|
||||
|
||||
wait_ready = subparsers.add_parser("wait-ready", help="Wait until the headless login healthcheck passes")
|
||||
wait_ready = subparsers.add_parser("wait-ready", help="Wait until the runtime passes the login-ready probe")
|
||||
wait_ready.add_argument("--timeout", type=int, default=120, help="Maximum seconds to wait")
|
||||
wait_ready.add_argument("--interval", type=float, default=5.0, help="Seconds between healthcheck attempts")
|
||||
|
||||
subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
|
||||
healthcheck = subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
|
||||
healthcheck.add_argument("--mode", choices=("ready", "full"), default="full", help="Healthcheck depth")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@@ -316,10 +317,10 @@ def run_logs(target: str, lines: int, follow: bool) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def run_healthcheck() -> int:
|
||||
def run_healthcheck(mode: str) -> int:
|
||||
if not HEALTHCHECK_PATH.exists():
|
||||
raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}")
|
||||
run([str(HEALTHCHECK_PATH)], require_root=True)
|
||||
run([str(HEALTHCHECK_PATH), "--mode", mode], require_root=True)
|
||||
return 0
|
||||
|
||||
|
||||
@@ -335,7 +336,7 @@ def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int:
|
||||
attempt += 1
|
||||
print(f"Healthcheck attempt {attempt}...")
|
||||
completed = subprocess.run(
|
||||
build_command([str(HEALTHCHECK_PATH)], require_root=True),
|
||||
build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True),
|
||||
check=False,
|
||||
text=True,
|
||||
)
|
||||
@@ -383,7 +384,7 @@ def main() -> int:
|
||||
if args.command == "incident-collect":
|
||||
return run_incident_collect(args.tag, args.since, args.include_cores)
|
||||
if args.command == "healthcheck":
|
||||
return run_healthcheck()
|
||||
return run_healthcheck(args.mode)
|
||||
if args.command == "wait-ready":
|
||||
return run_wait_ready(args.timeout, args.interval)
|
||||
raise SystemExit(f"Unsupported command: {args.command}")
|
||||
|
||||
@@ -15,6 +15,7 @@ import channel_inventory
|
||||
|
||||
TEMPLATES_DIR = SCRIPT_DIR / "templates"
|
||||
BIN_DIR = SCRIPT_DIR / "bin"
|
||||
HEALTHCHECK_DIR = REPO_ROOT / "deploy" / "healthcheck"
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
@@ -148,6 +149,11 @@ def main() -> int:
|
||||
render_template(BIN_DIR / "metin-collect-incident.in", template_values),
|
||||
0o700,
|
||||
)
|
||||
copy_file(
|
||||
HEALTHCHECK_DIR / "metin-login-healthcheck.sh",
|
||||
sbin_dir / "metin-login-healthcheck",
|
||||
0o700,
|
||||
)
|
||||
|
||||
verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names]
|
||||
run(["systemd-analyze", "verify", *verify_units])
|
||||
|
||||
Reference in New Issue
Block a user