ops: add login-ready wait helper
This commit is contained in:
@@ -7,6 +7,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
REPO_ROOT = Path("{{REPO_ROOT}}")
|
REPO_ROOT = Path("{{REPO_ROOT}}")
|
||||||
@@ -55,6 +56,10 @@ def parse_args() -> argparse.Namespace:
|
|||||||
incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value")
|
incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value")
|
||||||
incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle")
|
incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle")
|
||||||
|
|
||||||
|
wait_ready = subparsers.add_parser("wait-ready", help="Wait until the headless login healthcheck passes")
|
||||||
|
wait_ready.add_argument("--timeout", type=int, default=120, help="Maximum seconds to wait")
|
||||||
|
wait_ready.add_argument("--interval", type=float, default=5.0, help="Seconds between healthcheck attempts")
|
||||||
|
|
||||||
subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
|
subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@@ -318,6 +323,33 @@ def run_healthcheck() -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int:
|
||||||
|
if not HEALTHCHECK_PATH.exists():
|
||||||
|
raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}")
|
||||||
|
|
||||||
|
deadline = time.time() + timeout_seconds
|
||||||
|
attempt = 0
|
||||||
|
last_returncode = 1
|
||||||
|
|
||||||
|
while time.time() < deadline:
|
||||||
|
attempt += 1
|
||||||
|
print(f"Healthcheck attempt {attempt}...")
|
||||||
|
completed = subprocess.run(
|
||||||
|
build_command([str(HEALTHCHECK_PATH)], require_root=True),
|
||||||
|
check=False,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
if completed.returncode == 0:
|
||||||
|
return 0
|
||||||
|
last_returncode = completed.returncode
|
||||||
|
remaining = deadline - time.time()
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
time.sleep(min(interval_seconds, remaining))
|
||||||
|
|
||||||
|
raise SystemExit(f"Timed out waiting for login-ready state. Last healthcheck exit code: {last_returncode}")
|
||||||
|
|
||||||
|
|
||||||
def run_incident_collect(tag: str, since: str, include_cores: bool) -> int:
|
def run_incident_collect(tag: str, since: str, include_cores: bool) -> int:
|
||||||
if not INCIDENT_COLLECTOR_PATH.exists():
|
if not INCIDENT_COLLECTOR_PATH.exists():
|
||||||
raise SystemExit(f"Missing incident collector: {INCIDENT_COLLECTOR_PATH}")
|
raise SystemExit(f"Missing incident collector: {INCIDENT_COLLECTOR_PATH}")
|
||||||
@@ -352,6 +384,8 @@ def main() -> int:
|
|||||||
return run_incident_collect(args.tag, args.since, args.include_cores)
|
return run_incident_collect(args.tag, args.since, args.include_cores)
|
||||||
if args.command == "healthcheck":
|
if args.command == "healthcheck":
|
||||||
return run_healthcheck()
|
return run_healthcheck()
|
||||||
|
if args.command == "wait-ready":
|
||||||
|
return run_wait_ready(args.timeout, args.interval)
|
||||||
raise SystemExit(f"Unsupported command: {args.command}")
|
raise SystemExit(f"Unsupported command: {args.command}")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user