From 4fccf13e0912ef70e554474db529d0c3e1a5cea4 Mon Sep 17 00:00:00 2001 From: server Date: Tue, 14 Apr 2026 13:58:13 +0200 Subject: [PATCH] ops: split ready and full healthchecks --- deploy/healthcheck/metin-login-healthcheck.sh | 73 +++++++++++++++---- deploy/systemd/bin/metinctl.in | 13 ++-- deploy/systemd/install_systemd.py | 6 ++ docs/healthchecks.md | 34 ++++++++- docs/server-management.md | 17 ++++- 5 files changed, 117 insertions(+), 26 deletions(-) diff --git a/deploy/healthcheck/metin-login-healthcheck.sh b/deploy/healthcheck/metin-login-healthcheck.sh index fa6c31d..cf265c0 100755 --- a/deploy/healthcheck/metin-login-healthcheck.sh +++ b/deploy/healthcheck/metin-login-healthcheck.sh @@ -8,6 +8,38 @@ if [[ "${EUID}" -ne 0 ]]; then exit 1 fi +MODE="full" + +while (($#)); do + case "$1" in + --mode) + shift + if (($# == 0)); then + echo "Missing value for --mode" >&2 + exit 1 + fi + MODE="$1" + ;; + --mode=*) + MODE="${1#*=}" + ;; + *) + echo "Unknown argument: $1" >&2 + exit 1 + ;; + esac + shift +done + +case "${MODE}" in + ready|full) + ;; + *) + echo "Unsupported mode: ${MODE} (expected ready or full)" >&2 + exit 1 + ;; +esac + : "${RUN_AS_USER:=mt2.jakubkadlec.dev}" : "${SERVER_HOST:=173.249.9.66}" : "${AUTH_PORT:=11000}" @@ -156,26 +188,35 @@ cleanup() { trap cleanup EXIT -DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")" -create_player_index "${DELETE_ACCOUNT_ID}" - FULL_ACCOUNT_ID="$(create_account "${FULL_LOGIN}" "${FULL_PASSWORD}" "${FULL_SOCIAL_ID}" "${FULL_EMAIL}")" create_player_index "${FULL_ACCOUNT_ID}" -echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}" -sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \ - "${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \ - --password-env=METIN_LOGIN_SMOKE_PASSWORD \ - --create-character-name="${DELETE_CHARACTER_NAME}" \ - --delete-private-code="${DELETE_PRIVATE_CODE}" \ +if [[ "${MODE}" == "full" ]]; then + DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")" + create_player_index "${DELETE_ACCOUNT_ID}" + + echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}" + sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \ + "${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \ + --password-env=METIN_LOGIN_SMOKE_PASSWORD \ + --create-character-name="${DELETE_CHARACTER_NAME}" \ + --delete-private-code="${DELETE_PRIVATE_CODE}" \ + --client-version="${CLIENT_VERSION}" +fi + +echo "Running ${MODE} login healthcheck for temporary account ${FULL_LOGIN}" +FULL_ARGS=( + "${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}" + --password-env=METIN_LOGIN_SMOKE_PASSWORD + --create-character-name="${FULL_CHARACTER_NAME}" --client-version="${CLIENT_VERSION}" +) + +if [[ "${MODE}" == "full" ]]; then + FULL_ARGS+=(--mall-password="${MALL_PASSWORD}") +fi -echo "Running full login healthcheck for temporary account ${FULL_LOGIN}" sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${FULL_PASSWORD}" \ - "${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}" \ - --password-env=METIN_LOGIN_SMOKE_PASSWORD \ - --create-character-name="${FULL_CHARACTER_NAME}" \ - --client-version="${CLIENT_VERSION}" \ - --mall-password="${MALL_PASSWORD}" + "${FULL_ARGS[@]}" -echo "Login healthcheck passed" +echo "${MODE^} login healthcheck passed" diff --git a/deploy/systemd/bin/metinctl.in b/deploy/systemd/bin/metinctl.in index e64e466..8864d0b 100644 --- a/deploy/systemd/bin/metinctl.in +++ b/deploy/systemd/bin/metinctl.in @@ -56,11 +56,12 @@ def parse_args() -> argparse.Namespace: incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value") incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle") - wait_ready = subparsers.add_parser("wait-ready", help="Wait until the headless login healthcheck passes") + wait_ready = subparsers.add_parser("wait-ready", help="Wait until the runtime passes the login-ready probe") wait_ready.add_argument("--timeout", type=int, default=120, help="Maximum seconds to wait") wait_ready.add_argument("--interval", type=float, default=5.0, help="Seconds between healthcheck attempts") - subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck") + healthcheck = subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck") + healthcheck.add_argument("--mode", choices=("ready", "full"), default="full", help="Healthcheck depth") return parser.parse_args() @@ -316,10 +317,10 @@ def run_logs(target: str, lines: int, follow: bool) -> int: return 0 -def run_healthcheck() -> int: +def run_healthcheck(mode: str) -> int: if not HEALTHCHECK_PATH.exists(): raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}") - run([str(HEALTHCHECK_PATH)], require_root=True) + run([str(HEALTHCHECK_PATH), "--mode", mode], require_root=True) return 0 @@ -335,7 +336,7 @@ def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int: attempt += 1 print(f"Healthcheck attempt {attempt}...") completed = subprocess.run( - build_command([str(HEALTHCHECK_PATH)], require_root=True), + build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True), check=False, text=True, ) @@ -383,7 +384,7 @@ def main() -> int: if args.command == "incident-collect": return run_incident_collect(args.tag, args.since, args.include_cores) if args.command == "healthcheck": - return run_healthcheck() + return run_healthcheck(args.mode) if args.command == "wait-ready": return run_wait_ready(args.timeout, args.interval) raise SystemExit(f"Unsupported command: {args.command}") diff --git a/deploy/systemd/install_systemd.py b/deploy/systemd/install_systemd.py index 0167feb..c18f0a4 100644 --- a/deploy/systemd/install_systemd.py +++ b/deploy/systemd/install_systemd.py @@ -15,6 +15,7 @@ import channel_inventory TEMPLATES_DIR = SCRIPT_DIR / "templates" BIN_DIR = SCRIPT_DIR / "bin" +HEALTHCHECK_DIR = REPO_ROOT / "deploy" / "healthcheck" def parse_args() -> argparse.Namespace: @@ -148,6 +149,11 @@ def main() -> int: render_template(BIN_DIR / "metin-collect-incident.in", template_values), 0o700, ) + copy_file( + HEALTHCHECK_DIR / "metin-login-healthcheck.sh", + sbin_dir / "metin-login-healthcheck", + 0o700, + ) verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names] run(["systemd-analyze", "verify", *verify_units]) diff --git a/docs/healthchecks.md b/docs/healthchecks.md index d80d56a..107c436 100644 --- a/docs/healthchecks.md +++ b/docs/healthchecks.md @@ -19,7 +19,12 @@ Installed on the VPS: ## What The Headless Healthcheck Verifies -The installed wrapper now performs two headless passes against the live server: +The installed wrapper supports two modes: + +- `--mode ready` +- `--mode full` + +The full mode performs two headless passes against the live server: 1. a select-screen create/delete pass 2. a full auth + channel + `ENTERGAME` + mall pass @@ -48,7 +53,7 @@ This is an end-to-end gameplay-path verification, not just a TCP port check. ## How The Wrapper Works -`metin-login-healthcheck.sh` does the following: +`metin-login-healthcheck.sh --mode full` does the following: - creates two temporary accounts in MariaDB - runs `metin_login_smoke` once in create/delete mode on the select screen @@ -58,6 +63,15 @@ This is an end-to-end gameplay-path verification, not just a TCP port check. - deletes both temporary accounts and any temporary character rows on exit - passes the configured client version expected by the server +`metin-login-healthcheck.sh --mode ready` is intentionally lighter: + +- creates one temporary account in MariaDB +- runs one headless login flow through auth + channel + character create + select + `ENTERGAME` +- does not run the delete pass +- does not open the mall + +This mode is the right readiness probe immediately after a service restart. It verifies that the server is login-ready without depending on the deeper post-login mall path. + It is intended for manual admin use on the VPS. ## Usage @@ -69,6 +83,12 @@ ssh mt2 /usr/local/sbin/metin-login-healthcheck ``` +Readiness-only mode: + +```bash +/usr/local/sbin/metin-login-healthcheck --mode ready +``` + The smoke binary can also be run directly: ```bash @@ -109,6 +129,16 @@ Useful direct flags: - `--mall-password=PASSWORD` after `ENTERGAME`, opens the in-game mall via encrypted chat command and verifies `MALL_OPEN` +Operational CLI: + +```bash +metinctl healthcheck --mode full +metinctl healthcheck --mode ready +metinctl wait-ready +``` + +`metinctl wait-ready` now uses the lighter `ready` mode on purpose. The deeper `full` mode remains available as an explicit admin healthcheck. + Example negative auth test: ```bash diff --git a/docs/server-management.md b/docs/server-management.md index 8244868..e2c68b6 100644 --- a/docs/server-management.md +++ b/docs/server-management.md @@ -41,6 +41,7 @@ The Debian deployment installs: - listing core files in the runtime tree - collecting incident bundles - running the root-only headless healthcheck +- waiting for login-ready state after restart ## Examples @@ -80,10 +81,22 @@ Tail auth logs: metinctl logs auth -n 200 -f ``` -Run the end-to-end healthcheck: +Run the deeper end-to-end healthcheck: ```bash -metinctl healthcheck +metinctl healthcheck --mode full +``` + +Run the lighter readiness probe: + +```bash +metinctl healthcheck --mode ready +``` + +Wait until a restarted stack is login-ready: + +```bash +metinctl wait-ready ``` List core files currently present in the runtime tree: