Guard public channel readiness in systemd tooling

This commit is contained in:
server
2026-04-15 17:46:56 +02:00
parent 6f16f66543
commit 2179c46ce0
6 changed files with 180 additions and 2 deletions

View File

@@ -51,6 +51,35 @@ def get_channel_ids() -> list[int]:
return [int(channel["id"]) for channel in iter_channels()]
def get_public_channel_ids(
selected_channel_ids: Iterable[int] | None = None,
*,
client_visible_only: bool = False,
) -> list[int]:
selected = None if selected_channel_ids is None else {int(channel_id) for channel_id in selected_channel_ids}
result: list[int] = []
for channel in iter_channels():
channel_id = int(channel["id"])
if selected is not None and channel_id not in selected:
continue
if not channel.get("public"):
continue
if client_visible_only and not channel.get("client_visible"):
continue
result.append(channel_id)
return result
def has_public_channel(
selected_channel_ids: Iterable[int] | None = None,
*,
client_visible_only: bool = False,
) -> bool:
return bool(get_public_channel_ids(selected_channel_ids, client_visible_only=client_visible_only))
def get_channel_map() -> dict[int, dict[int, str]]:
result: dict[int, dict[int, str]] = {}
for channel in iter_channels():

View File

@@ -20,6 +20,8 @@ python3 deploy/systemd/install_systemd.py \
`--channel-limit 1` is also supported and will auto-include channel `99` when present in the channel inventory.
By default the installer refuses channel selections that omit every client-visible public channel. If you intentionally want an auth/internal-only stack, pass `--allow-internal-only`.
The channel selection and port layout now come from the versioned inventory file:
- [deploy/channel-inventory.json](../channel-inventory.json)

View File

@@ -97,6 +97,9 @@ def parse_args() -> argparse.Namespace:
ports_parser = subparsers.add_parser("ports", help="Show declared listener ports")
ports_parser.add_argument("--live", action="store_true", help="Also show whether the port is currently listening")
public_ready = subparsers.add_parser("public-ready", help="Verify enabled client-visible public channels are active and listening")
public_ready.add_argument("--json", action="store_true", help="Print raw JSON")
for action in ("start", "stop", "restart"):
action_parser = subparsers.add_parser(action, help=f"{action.title()} a managed target")
action_parser.add_argument("target", help="stack, db, auth, game, channel:<id>, instance:<name>")
@@ -196,6 +199,7 @@ def iter_port_rows() -> list[dict[str, str]]:
"p2p_port": "-",
"unit": channel_inventory.DB_UNIT,
"visibility": "internal",
"client_visible": False,
},
{
"scope": "auth",
@@ -204,6 +208,7 @@ def iter_port_rows() -> list[dict[str, str]]:
"p2p_port": str(channel_inventory.get_auth()["p2p_port"]),
"unit": channel_inventory.AUTH_UNIT,
"visibility": "public",
"client_visible": False,
},
]
@@ -221,6 +226,7 @@ def iter_port_rows() -> list[dict[str, str]]:
"p2p_port": str(core["p2p_port"]),
"unit": channel_inventory.game_unit(instance),
"visibility": visibility,
"client_visible": bool(channel.get("client_visible")),
}
)
@@ -593,6 +599,60 @@ def print_units() -> int:
return 0
def public_runtime_report() -> dict[str, object]:
listening = live_ports()
entries: list[dict[str, object]] = []
for row in iter_port_rows():
if not row["scope"].startswith("channel:"):
continue
if row["visibility"] != "public":
continue
if not row["client_visible"]:
continue
active, sub_state, enabled = get_unit_state(row["unit"])
live = int(row["port"]) in listening
entries.append(
{
"scope": row["scope"],
"name": row["name"],
"port": int(row["port"]),
"p2p_port": int(row["p2p_port"]),
"unit": row["unit"],
"active": active,
"sub": sub_state,
"enabled": enabled,
"live": live,
}
)
enabled_entries = [entry for entry in entries if entry["enabled"] == "enabled"]
issues: list[str] = []
if not enabled_entries:
issues.append("No client-visible public channel units are enabled.")
for entry in enabled_entries:
if entry["active"] != "active":
issues.append(f"{entry['name']} unit is {entry['active']}/{entry['sub']}.")
if not entry["live"]:
issues.append(f"{entry['name']} port {entry['port']} is not listening.")
ready = not issues
live_enabled = sum(1 for entry in enabled_entries if entry["live"])
active_enabled = sum(1 for entry in enabled_entries if entry["active"] == "active")
return {
"ready": ready,
"declared_count": len(entries),
"enabled_count": len(enabled_entries),
"active_enabled_count": active_enabled,
"live_enabled_count": live_enabled,
"entries": entries,
"issues": issues,
}
def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
units = [
channel_inventory.STACK_UNIT,
@@ -622,6 +682,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
}
incident_count = count_incident_bundles()
core_count = len(iter_core_files())
public_runtime = public_runtime_report()
payload = {
"repos": repos,
@@ -648,6 +709,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
"orphan_count": stale_orphan_count,
"total_count": stale_total_count,
},
"public_runtime": public_runtime,
"core_count": core_count,
"incident_count": incident_count,
}
@@ -688,6 +750,15 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
print("Public Ports")
print_table(["name", "port", "p2p", "live"], public_port_rows)
print(
"client-visible public channels ready: "
f"{'yes' if public_runtime['ready'] else 'no'} "
f"({public_runtime['active_enabled_count']}/{public_runtime['enabled_count']} enabled units active, "
f"{public_runtime['live_enabled_count']}/{public_runtime['enabled_count']} listening)"
)
if public_runtime["issues"]:
for issue in public_runtime["issues"]:
print(f"warning: {issue}")
print()
print(f"Auth ({hours}h)")
@@ -983,6 +1054,41 @@ def print_ports(show_live: bool) -> int:
return 0
def print_public_ready(as_json: bool) -> int:
payload = public_runtime_report()
if as_json:
print(json.dumps(payload, indent=2))
else:
if payload["entries"]:
rows = [
[
str(entry["name"]),
str(entry["port"]),
str(entry["active"]),
str(entry["sub"]),
str(entry["enabled"]),
"yes" if entry["live"] else "no",
]
for entry in payload["entries"]
]
print_table(["name", "port", "active", "sub", "enabled", "live"], rows)
else:
print("No client-visible public channels declared in the inventory.")
print()
print(
"ready: "
f"{'yes' if payload['ready'] else 'no'} "
f"({payload['active_enabled_count']}/{payload['enabled_count']} enabled units active, "
f"{payload['live_enabled_count']}/{payload['enabled_count']} listening)"
)
for issue in payload["issues"]:
print(f"issue: {issue}")
return 0 if payload["ready"] else 1
def print_cores(as_json: bool) -> int:
entries = []
for path in iter_core_files():
@@ -1172,6 +1278,15 @@ def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int:
while time.time() < deadline:
attempt += 1
print(f"Healthcheck attempt {attempt}...")
public_runtime = public_runtime_report()
if not public_runtime["ready"]:
for issue in public_runtime["issues"]:
print(f"Public runtime not ready: {issue}")
remaining = deadline - time.time()
if remaining <= 0:
break
time.sleep(min(interval_seconds, remaining))
continue
completed = subprocess.run(
build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True),
check=False,
@@ -1233,6 +1348,8 @@ def main() -> int:
return print_status(args.target)
if args.command == "ports":
return print_ports(args.live)
if args.command == "public-ready":
return print_public_ready(args.json)
if args.command == "cores":
return print_cores(args.json)
if args.command == "incidents":

View File

@@ -32,6 +32,11 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
parser.add_argument("--wait-timeout", type=int, default=30, help="DB readiness timeout in seconds")
parser.add_argument("--restart", action="store_true", help="Restart metin-server.service after install")
parser.add_argument(
"--allow-internal-only",
action="store_true",
help="Allow installs that omit every client-visible public channel",
)
channel_group = parser.add_mutually_exclusive_group(required=True)
channel_group.add_argument(
@@ -76,7 +81,7 @@ def copy_file(source: Path, destination: Path, mode: int) -> None:
def resolve_channels(args: argparse.Namespace) -> list[int]:
try:
return channel_inventory.resolve_selected_channels(
selected_channels = channel_inventory.resolve_selected_channels(
channel_limit=args.channel_limit,
explicit_channels=args.channels,
)
@@ -84,6 +89,20 @@ def resolve_channels(args: argparse.Namespace) -> list[int]:
print(str(exc), file=sys.stderr)
raise SystemExit(1)
if not args.allow_internal_only and not channel_inventory.has_public_channel(
selected_channels,
client_visible_only=True,
):
print(
"Selected channels do not include any client-visible public channel. "
"Add a public channel such as --channel 1, or pass --allow-internal-only "
"if an auth/internal-only stack is intentional.",
file=sys.stderr,
)
raise SystemExit(1)
return selected_channels
def resolve_instances(selected_channels: list[int]) -> list[str]:
return channel_inventory.get_instances(selected_channels)

View File

@@ -132,12 +132,15 @@ Useful direct flags:
Operational CLI:
```bash
metinctl public-ready
metinctl healthcheck --mode full
metinctl healthcheck --mode ready
metinctl wait-ready
```
`metinctl wait-ready` now uses the lighter `ready` mode on purpose. The deeper `full` mode remains available as an explicit admin healthcheck.
`metinctl public-ready` verifies that every enabled client-visible public channel unit is active and that its declared listener port is actually up.
`metinctl wait-ready` now first waits for the public runtime to be up and only then runs the lighter `ready` login probe. The deeper `full` mode remains available as an explicit admin healthcheck.
Example negative auth test:

View File

@@ -41,6 +41,7 @@ The Debian deployment installs:
- listing managed units
- checking service status
- listing declared ports
- verifying that enabled public client-facing channels are actually up
- listing recent auth failures
- listing recent login sessions
- listing stale open sessions without logout
@@ -78,6 +79,12 @@ Show declared ports and whether they are currently listening:
metinctl ports --live
```
Verify that enabled client-visible public channels are active and listening:
```bash
metinctl public-ready
```
Show recent real auth failures and skip smoke-test logins:
```bash
@@ -219,6 +226,7 @@ It also reconciles enabled game instance units against the selected channels:
- selected game units are enabled
- stale game units are disabled
- if `--restart` is passed, stale game units are disabled with `--now`
- installs now refuse an auth/internal-only channel selection unless you pass `--allow-internal-only`
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.