ops: split ready and full healthchecks

This commit is contained in:
server
2026-04-14 13:58:13 +02:00
parent 5b0da5a685
commit 4fccf13e09
5 changed files with 117 additions and 26 deletions

View File

@@ -8,6 +8,38 @@ if [[ "${EUID}" -ne 0 ]]; then
exit 1
fi
MODE="full"
while (($#)); do
case "$1" in
--mode)
shift
if (($# == 0)); then
echo "Missing value for --mode" >&2
exit 1
fi
MODE="$1"
;;
--mode=*)
MODE="${1#*=}"
;;
*)
echo "Unknown argument: $1" >&2
exit 1
;;
esac
shift
done
case "${MODE}" in
ready|full)
;;
*)
echo "Unsupported mode: ${MODE} (expected ready or full)" >&2
exit 1
;;
esac
: "${RUN_AS_USER:=mt2.jakubkadlec.dev}"
: "${SERVER_HOST:=173.249.9.66}"
: "${AUTH_PORT:=11000}"
@@ -156,26 +188,35 @@ cleanup() {
trap cleanup EXIT
DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")"
create_player_index "${DELETE_ACCOUNT_ID}"
FULL_ACCOUNT_ID="$(create_account "${FULL_LOGIN}" "${FULL_PASSWORD}" "${FULL_SOCIAL_ID}" "${FULL_EMAIL}")"
create_player_index "${FULL_ACCOUNT_ID}"
echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}"
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
--create-character-name="${DELETE_CHARACTER_NAME}" \
--delete-private-code="${DELETE_PRIVATE_CODE}" \
if [[ "${MODE}" == "full" ]]; then
DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")"
create_player_index "${DELETE_ACCOUNT_ID}"
echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}"
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
--create-character-name="${DELETE_CHARACTER_NAME}" \
--delete-private-code="${DELETE_PRIVATE_CODE}" \
--client-version="${CLIENT_VERSION}"
fi
echo "Running ${MODE} login healthcheck for temporary account ${FULL_LOGIN}"
FULL_ARGS=(
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}"
--password-env=METIN_LOGIN_SMOKE_PASSWORD
--create-character-name="${FULL_CHARACTER_NAME}"
--client-version="${CLIENT_VERSION}"
)
if [[ "${MODE}" == "full" ]]; then
FULL_ARGS+=(--mall-password="${MALL_PASSWORD}")
fi
echo "Running full login healthcheck for temporary account ${FULL_LOGIN}"
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${FULL_PASSWORD}" \
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}" \
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
--create-character-name="${FULL_CHARACTER_NAME}" \
--client-version="${CLIENT_VERSION}" \
--mall-password="${MALL_PASSWORD}"
"${FULL_ARGS[@]}"
echo "Login healthcheck passed"
echo "${MODE^} login healthcheck passed"

View File

@@ -56,11 +56,12 @@ def parse_args() -> argparse.Namespace:
incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value")
incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle")
wait_ready = subparsers.add_parser("wait-ready", help="Wait until the headless login healthcheck passes")
wait_ready = subparsers.add_parser("wait-ready", help="Wait until the runtime passes the login-ready probe")
wait_ready.add_argument("--timeout", type=int, default=120, help="Maximum seconds to wait")
wait_ready.add_argument("--interval", type=float, default=5.0, help="Seconds between healthcheck attempts")
subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
healthcheck = subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
healthcheck.add_argument("--mode", choices=("ready", "full"), default="full", help="Healthcheck depth")
return parser.parse_args()
@@ -316,10 +317,10 @@ def run_logs(target: str, lines: int, follow: bool) -> int:
return 0
def run_healthcheck() -> int:
def run_healthcheck(mode: str) -> int:
if not HEALTHCHECK_PATH.exists():
raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}")
run([str(HEALTHCHECK_PATH)], require_root=True)
run([str(HEALTHCHECK_PATH), "--mode", mode], require_root=True)
return 0
@@ -335,7 +336,7 @@ def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int:
attempt += 1
print(f"Healthcheck attempt {attempt}...")
completed = subprocess.run(
build_command([str(HEALTHCHECK_PATH)], require_root=True),
build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True),
check=False,
text=True,
)
@@ -383,7 +384,7 @@ def main() -> int:
if args.command == "incident-collect":
return run_incident_collect(args.tag, args.since, args.include_cores)
if args.command == "healthcheck":
return run_healthcheck()
return run_healthcheck(args.mode)
if args.command == "wait-ready":
return run_wait_ready(args.timeout, args.interval)
raise SystemExit(f"Unsupported command: {args.command}")

View File

@@ -15,6 +15,7 @@ import channel_inventory
TEMPLATES_DIR = SCRIPT_DIR / "templates"
BIN_DIR = SCRIPT_DIR / "bin"
HEALTHCHECK_DIR = REPO_ROOT / "deploy" / "healthcheck"
def parse_args() -> argparse.Namespace:
@@ -148,6 +149,11 @@ def main() -> int:
render_template(BIN_DIR / "metin-collect-incident.in", template_values),
0o700,
)
copy_file(
HEALTHCHECK_DIR / "metin-login-healthcheck.sh",
sbin_dir / "metin-login-healthcheck",
0o700,
)
verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names]
run(["systemd-analyze", "verify", *verify_units])

View File

@@ -19,7 +19,12 @@ Installed on the VPS:
## What The Headless Healthcheck Verifies
The installed wrapper now performs two headless passes against the live server:
The installed wrapper supports two modes:
- `--mode ready`
- `--mode full`
The full mode performs two headless passes against the live server:
1. a select-screen create/delete pass
2. a full auth + channel + `ENTERGAME` + mall pass
@@ -48,7 +53,7 @@ This is an end-to-end gameplay-path verification, not just a TCP port check.
## How The Wrapper Works
`metin-login-healthcheck.sh` does the following:
`metin-login-healthcheck.sh --mode full` does the following:
- creates two temporary accounts in MariaDB
- runs `metin_login_smoke` once in create/delete mode on the select screen
@@ -58,6 +63,15 @@ This is an end-to-end gameplay-path verification, not just a TCP port check.
- deletes both temporary accounts and any temporary character rows on exit
- passes the configured client version expected by the server
`metin-login-healthcheck.sh --mode ready` is intentionally lighter:
- creates one temporary account in MariaDB
- runs one headless login flow through auth + channel + character create + select + `ENTERGAME`
- does not run the delete pass
- does not open the mall
This mode is the right readiness probe immediately after a service restart. It verifies that the server is login-ready without depending on the deeper post-login mall path.
It is intended for manual admin use on the VPS.
## Usage
@@ -69,6 +83,12 @@ ssh mt2
/usr/local/sbin/metin-login-healthcheck
```
Readiness-only mode:
```bash
/usr/local/sbin/metin-login-healthcheck --mode ready
```
The smoke binary can also be run directly:
```bash
@@ -109,6 +129,16 @@ Useful direct flags:
- `--mall-password=PASSWORD`
after `ENTERGAME`, opens the in-game mall via encrypted chat command and verifies `MALL_OPEN`
Operational CLI:
```bash
metinctl healthcheck --mode full
metinctl healthcheck --mode ready
metinctl wait-ready
```
`metinctl wait-ready` now uses the lighter `ready` mode on purpose. The deeper `full` mode remains available as an explicit admin healthcheck.
Example negative auth test:
```bash

View File

@@ -41,6 +41,7 @@ The Debian deployment installs:
- listing core files in the runtime tree
- collecting incident bundles
- running the root-only headless healthcheck
- waiting for login-ready state after restart
## Examples
@@ -80,10 +81,22 @@ Tail auth logs:
metinctl logs auth -n 200 -f
```
Run the end-to-end healthcheck:
Run the deeper end-to-end healthcheck:
```bash
metinctl healthcheck
metinctl healthcheck --mode full
```
Run the lighter readiness probe:
```bash
metinctl healthcheck --mode ready
```
Wait until a restarted stack is login-ready:
```bash
metinctl wait-ready
```
List core files currently present in the runtime tree: