diff --git a/channel_inventory.py b/channel_inventory.py index ef700b0..4a38946 100644 --- a/channel_inventory.py +++ b/channel_inventory.py @@ -51,6 +51,35 @@ def get_channel_ids() -> list[int]: return [int(channel["id"]) for channel in iter_channels()] +def get_public_channel_ids( + selected_channel_ids: Iterable[int] | None = None, + *, + client_visible_only: bool = False, +) -> list[int]: + selected = None if selected_channel_ids is None else {int(channel_id) for channel_id in selected_channel_ids} + result: list[int] = [] + + for channel in iter_channels(): + channel_id = int(channel["id"]) + if selected is not None and channel_id not in selected: + continue + if not channel.get("public"): + continue + if client_visible_only and not channel.get("client_visible"): + continue + result.append(channel_id) + + return result + + +def has_public_channel( + selected_channel_ids: Iterable[int] | None = None, + *, + client_visible_only: bool = False, +) -> bool: + return bool(get_public_channel_ids(selected_channel_ids, client_visible_only=client_visible_only)) + + def get_channel_map() -> dict[int, dict[int, str]]: result: dict[int, dict[int, str]] = {} for channel in iter_channels(): diff --git a/deploy/systemd/README.md b/deploy/systemd/README.md index 2a1c03a..1c9d1b5 100644 --- a/deploy/systemd/README.md +++ b/deploy/systemd/README.md @@ -20,6 +20,8 @@ python3 deploy/systemd/install_systemd.py \ `--channel-limit 1` is also supported and will auto-include channel `99` when present in the channel inventory. +By default the installer refuses channel selections that omit every client-visible public channel. If you intentionally want an auth/internal-only stack, pass `--allow-internal-only`. + The channel selection and port layout now come from the versioned inventory file: - [deploy/channel-inventory.json](../channel-inventory.json) diff --git a/deploy/systemd/bin/metinctl.in b/deploy/systemd/bin/metinctl.in index b9e6df8..b9b3963 100644 --- a/deploy/systemd/bin/metinctl.in +++ b/deploy/systemd/bin/metinctl.in @@ -97,6 +97,9 @@ def parse_args() -> argparse.Namespace: ports_parser = subparsers.add_parser("ports", help="Show declared listener ports") ports_parser.add_argument("--live", action="store_true", help="Also show whether the port is currently listening") + public_ready = subparsers.add_parser("public-ready", help="Verify enabled client-visible public channels are active and listening") + public_ready.add_argument("--json", action="store_true", help="Print raw JSON") + for action in ("start", "stop", "restart"): action_parser = subparsers.add_parser(action, help=f"{action.title()} a managed target") action_parser.add_argument("target", help="stack, db, auth, game, channel:, instance:") @@ -196,6 +199,7 @@ def iter_port_rows() -> list[dict[str, str]]: "p2p_port": "-", "unit": channel_inventory.DB_UNIT, "visibility": "internal", + "client_visible": False, }, { "scope": "auth", @@ -204,6 +208,7 @@ def iter_port_rows() -> list[dict[str, str]]: "p2p_port": str(channel_inventory.get_auth()["p2p_port"]), "unit": channel_inventory.AUTH_UNIT, "visibility": "public", + "client_visible": False, }, ] @@ -221,6 +226,7 @@ def iter_port_rows() -> list[dict[str, str]]: "p2p_port": str(core["p2p_port"]), "unit": channel_inventory.game_unit(instance), "visibility": visibility, + "client_visible": bool(channel.get("client_visible")), } ) @@ -593,6 +599,60 @@ def print_units() -> int: return 0 +def public_runtime_report() -> dict[str, object]: + listening = live_ports() + entries: list[dict[str, object]] = [] + + for row in iter_port_rows(): + if not row["scope"].startswith("channel:"): + continue + if row["visibility"] != "public": + continue + if not row["client_visible"]: + continue + + active, sub_state, enabled = get_unit_state(row["unit"]) + live = int(row["port"]) in listening + entries.append( + { + "scope": row["scope"], + "name": row["name"], + "port": int(row["port"]), + "p2p_port": int(row["p2p_port"]), + "unit": row["unit"], + "active": active, + "sub": sub_state, + "enabled": enabled, + "live": live, + } + ) + + enabled_entries = [entry for entry in entries if entry["enabled"] == "enabled"] + issues: list[str] = [] + + if not enabled_entries: + issues.append("No client-visible public channel units are enabled.") + + for entry in enabled_entries: + if entry["active"] != "active": + issues.append(f"{entry['name']} unit is {entry['active']}/{entry['sub']}.") + if not entry["live"]: + issues.append(f"{entry['name']} port {entry['port']} is not listening.") + + ready = not issues + live_enabled = sum(1 for entry in enabled_entries if entry["live"]) + active_enabled = sum(1 for entry in enabled_entries if entry["active"] == "active") + return { + "ready": ready, + "declared_count": len(entries), + "enabled_count": len(enabled_entries), + "active_enabled_count": active_enabled, + "live_enabled_count": live_enabled, + "entries": entries, + "issues": issues, + } + + def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: units = [ channel_inventory.STACK_UNIT, @@ -622,6 +682,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: } incident_count = count_incident_bundles() core_count = len(iter_core_files()) + public_runtime = public_runtime_report() payload = { "repos": repos, @@ -648,6 +709,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: "orphan_count": stale_orphan_count, "total_count": stale_total_count, }, + "public_runtime": public_runtime, "core_count": core_count, "incident_count": incident_count, } @@ -688,6 +750,15 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: print("Public Ports") print_table(["name", "port", "p2p", "live"], public_port_rows) + print( + "client-visible public channels ready: " + f"{'yes' if public_runtime['ready'] else 'no'} " + f"({public_runtime['active_enabled_count']}/{public_runtime['enabled_count']} enabled units active, " + f"{public_runtime['live_enabled_count']}/{public_runtime['enabled_count']} listening)" + ) + if public_runtime["issues"]: + for issue in public_runtime["issues"]: + print(f"warning: {issue}") print() print(f"Auth ({hours}h)") @@ -983,6 +1054,41 @@ def print_ports(show_live: bool) -> int: return 0 +def print_public_ready(as_json: bool) -> int: + payload = public_runtime_report() + + if as_json: + print(json.dumps(payload, indent=2)) + else: + if payload["entries"]: + rows = [ + [ + str(entry["name"]), + str(entry["port"]), + str(entry["active"]), + str(entry["sub"]), + str(entry["enabled"]), + "yes" if entry["live"] else "no", + ] + for entry in payload["entries"] + ] + print_table(["name", "port", "active", "sub", "enabled", "live"], rows) + else: + print("No client-visible public channels declared in the inventory.") + + print() + print( + "ready: " + f"{'yes' if payload['ready'] else 'no'} " + f"({payload['active_enabled_count']}/{payload['enabled_count']} enabled units active, " + f"{payload['live_enabled_count']}/{payload['enabled_count']} listening)" + ) + for issue in payload["issues"]: + print(f"issue: {issue}") + + return 0 if payload["ready"] else 1 + + def print_cores(as_json: bool) -> int: entries = [] for path in iter_core_files(): @@ -1172,6 +1278,15 @@ def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int: while time.time() < deadline: attempt += 1 print(f"Healthcheck attempt {attempt}...") + public_runtime = public_runtime_report() + if not public_runtime["ready"]: + for issue in public_runtime["issues"]: + print(f"Public runtime not ready: {issue}") + remaining = deadline - time.time() + if remaining <= 0: + break + time.sleep(min(interval_seconds, remaining)) + continue completed = subprocess.run( build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True), check=False, @@ -1233,6 +1348,8 @@ def main() -> int: return print_status(args.target) if args.command == "ports": return print_ports(args.live) + if args.command == "public-ready": + return print_public_ready(args.json) if args.command == "cores": return print_cores(args.json) if args.command == "incidents": diff --git a/deploy/systemd/install_systemd.py b/deploy/systemd/install_systemd.py index 1ef2205..e6d4a9a 100644 --- a/deploy/systemd/install_systemd.py +++ b/deploy/systemd/install_systemd.py @@ -32,6 +32,11 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port") parser.add_argument("--wait-timeout", type=int, default=30, help="DB readiness timeout in seconds") parser.add_argument("--restart", action="store_true", help="Restart metin-server.service after install") + parser.add_argument( + "--allow-internal-only", + action="store_true", + help="Allow installs that omit every client-visible public channel", + ) channel_group = parser.add_mutually_exclusive_group(required=True) channel_group.add_argument( @@ -76,7 +81,7 @@ def copy_file(source: Path, destination: Path, mode: int) -> None: def resolve_channels(args: argparse.Namespace) -> list[int]: try: - return channel_inventory.resolve_selected_channels( + selected_channels = channel_inventory.resolve_selected_channels( channel_limit=args.channel_limit, explicit_channels=args.channels, ) @@ -84,6 +89,20 @@ def resolve_channels(args: argparse.Namespace) -> list[int]: print(str(exc), file=sys.stderr) raise SystemExit(1) + if not args.allow_internal_only and not channel_inventory.has_public_channel( + selected_channels, + client_visible_only=True, + ): + print( + "Selected channels do not include any client-visible public channel. " + "Add a public channel such as --channel 1, or pass --allow-internal-only " + "if an auth/internal-only stack is intentional.", + file=sys.stderr, + ) + raise SystemExit(1) + + return selected_channels + def resolve_instances(selected_channels: list[int]) -> list[str]: return channel_inventory.get_instances(selected_channels) diff --git a/docs/healthchecks.md b/docs/healthchecks.md index 107c436..62f53df 100644 --- a/docs/healthchecks.md +++ b/docs/healthchecks.md @@ -132,12 +132,15 @@ Useful direct flags: Operational CLI: ```bash +metinctl public-ready metinctl healthcheck --mode full metinctl healthcheck --mode ready metinctl wait-ready ``` -`metinctl wait-ready` now uses the lighter `ready` mode on purpose. The deeper `full` mode remains available as an explicit admin healthcheck. +`metinctl public-ready` verifies that every enabled client-visible public channel unit is active and that its declared listener port is actually up. + +`metinctl wait-ready` now first waits for the public runtime to be up and only then runs the lighter `ready` login probe. The deeper `full` mode remains available as an explicit admin healthcheck. Example negative auth test: diff --git a/docs/server-management.md b/docs/server-management.md index c0bb25b..56cad16 100644 --- a/docs/server-management.md +++ b/docs/server-management.md @@ -41,6 +41,7 @@ The Debian deployment installs: - listing managed units - checking service status - listing declared ports +- verifying that enabled public client-facing channels are actually up - listing recent auth failures - listing recent login sessions - listing stale open sessions without logout @@ -78,6 +79,12 @@ Show declared ports and whether they are currently listening: metinctl ports --live ``` +Verify that enabled client-visible public channels are active and listening: + +```bash +metinctl public-ready +``` + Show recent real auth failures and skip smoke-test logins: ```bash @@ -219,6 +226,7 @@ It also reconciles enabled game instance units against the selected channels: - selected game units are enabled - stale game units are disabled - if `--restart` is passed, stale game units are disabled with `--now` +- installs now refuse an auth/internal-only channel selection unless you pass `--allow-internal-only` This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.