Guard public channel readiness in systemd tooling
This commit is contained in:
@@ -51,6 +51,35 @@ def get_channel_ids() -> list[int]:
|
|||||||
return [int(channel["id"]) for channel in iter_channels()]
|
return [int(channel["id"]) for channel in iter_channels()]
|
||||||
|
|
||||||
|
|
||||||
|
def get_public_channel_ids(
|
||||||
|
selected_channel_ids: Iterable[int] | None = None,
|
||||||
|
*,
|
||||||
|
client_visible_only: bool = False,
|
||||||
|
) -> list[int]:
|
||||||
|
selected = None if selected_channel_ids is None else {int(channel_id) for channel_id in selected_channel_ids}
|
||||||
|
result: list[int] = []
|
||||||
|
|
||||||
|
for channel in iter_channels():
|
||||||
|
channel_id = int(channel["id"])
|
||||||
|
if selected is not None and channel_id not in selected:
|
||||||
|
continue
|
||||||
|
if not channel.get("public"):
|
||||||
|
continue
|
||||||
|
if client_visible_only and not channel.get("client_visible"):
|
||||||
|
continue
|
||||||
|
result.append(channel_id)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def has_public_channel(
|
||||||
|
selected_channel_ids: Iterable[int] | None = None,
|
||||||
|
*,
|
||||||
|
client_visible_only: bool = False,
|
||||||
|
) -> bool:
|
||||||
|
return bool(get_public_channel_ids(selected_channel_ids, client_visible_only=client_visible_only))
|
||||||
|
|
||||||
|
|
||||||
def get_channel_map() -> dict[int, dict[int, str]]:
|
def get_channel_map() -> dict[int, dict[int, str]]:
|
||||||
result: dict[int, dict[int, str]] = {}
|
result: dict[int, dict[int, str]] = {}
|
||||||
for channel in iter_channels():
|
for channel in iter_channels():
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ python3 deploy/systemd/install_systemd.py \
|
|||||||
|
|
||||||
`--channel-limit 1` is also supported and will auto-include channel `99` when present in the channel inventory.
|
`--channel-limit 1` is also supported and will auto-include channel `99` when present in the channel inventory.
|
||||||
|
|
||||||
|
By default the installer refuses channel selections that omit every client-visible public channel. If you intentionally want an auth/internal-only stack, pass `--allow-internal-only`.
|
||||||
|
|
||||||
The channel selection and port layout now come from the versioned inventory file:
|
The channel selection and port layout now come from the versioned inventory file:
|
||||||
|
|
||||||
- [deploy/channel-inventory.json](../channel-inventory.json)
|
- [deploy/channel-inventory.json](../channel-inventory.json)
|
||||||
|
|||||||
@@ -97,6 +97,9 @@ def parse_args() -> argparse.Namespace:
|
|||||||
ports_parser = subparsers.add_parser("ports", help="Show declared listener ports")
|
ports_parser = subparsers.add_parser("ports", help="Show declared listener ports")
|
||||||
ports_parser.add_argument("--live", action="store_true", help="Also show whether the port is currently listening")
|
ports_parser.add_argument("--live", action="store_true", help="Also show whether the port is currently listening")
|
||||||
|
|
||||||
|
public_ready = subparsers.add_parser("public-ready", help="Verify enabled client-visible public channels are active and listening")
|
||||||
|
public_ready.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||||
|
|
||||||
for action in ("start", "stop", "restart"):
|
for action in ("start", "stop", "restart"):
|
||||||
action_parser = subparsers.add_parser(action, help=f"{action.title()} a managed target")
|
action_parser = subparsers.add_parser(action, help=f"{action.title()} a managed target")
|
||||||
action_parser.add_argument("target", help="stack, db, auth, game, channel:<id>, instance:<name>")
|
action_parser.add_argument("target", help="stack, db, auth, game, channel:<id>, instance:<name>")
|
||||||
@@ -196,6 +199,7 @@ def iter_port_rows() -> list[dict[str, str]]:
|
|||||||
"p2p_port": "-",
|
"p2p_port": "-",
|
||||||
"unit": channel_inventory.DB_UNIT,
|
"unit": channel_inventory.DB_UNIT,
|
||||||
"visibility": "internal",
|
"visibility": "internal",
|
||||||
|
"client_visible": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"scope": "auth",
|
"scope": "auth",
|
||||||
@@ -204,6 +208,7 @@ def iter_port_rows() -> list[dict[str, str]]:
|
|||||||
"p2p_port": str(channel_inventory.get_auth()["p2p_port"]),
|
"p2p_port": str(channel_inventory.get_auth()["p2p_port"]),
|
||||||
"unit": channel_inventory.AUTH_UNIT,
|
"unit": channel_inventory.AUTH_UNIT,
|
||||||
"visibility": "public",
|
"visibility": "public",
|
||||||
|
"client_visible": False,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -221,6 +226,7 @@ def iter_port_rows() -> list[dict[str, str]]:
|
|||||||
"p2p_port": str(core["p2p_port"]),
|
"p2p_port": str(core["p2p_port"]),
|
||||||
"unit": channel_inventory.game_unit(instance),
|
"unit": channel_inventory.game_unit(instance),
|
||||||
"visibility": visibility,
|
"visibility": visibility,
|
||||||
|
"client_visible": bool(channel.get("client_visible")),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -593,6 +599,60 @@ def print_units() -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def public_runtime_report() -> dict[str, object]:
|
||||||
|
listening = live_ports()
|
||||||
|
entries: list[dict[str, object]] = []
|
||||||
|
|
||||||
|
for row in iter_port_rows():
|
||||||
|
if not row["scope"].startswith("channel:"):
|
||||||
|
continue
|
||||||
|
if row["visibility"] != "public":
|
||||||
|
continue
|
||||||
|
if not row["client_visible"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
active, sub_state, enabled = get_unit_state(row["unit"])
|
||||||
|
live = int(row["port"]) in listening
|
||||||
|
entries.append(
|
||||||
|
{
|
||||||
|
"scope": row["scope"],
|
||||||
|
"name": row["name"],
|
||||||
|
"port": int(row["port"]),
|
||||||
|
"p2p_port": int(row["p2p_port"]),
|
||||||
|
"unit": row["unit"],
|
||||||
|
"active": active,
|
||||||
|
"sub": sub_state,
|
||||||
|
"enabled": enabled,
|
||||||
|
"live": live,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
enabled_entries = [entry for entry in entries if entry["enabled"] == "enabled"]
|
||||||
|
issues: list[str] = []
|
||||||
|
|
||||||
|
if not enabled_entries:
|
||||||
|
issues.append("No client-visible public channel units are enabled.")
|
||||||
|
|
||||||
|
for entry in enabled_entries:
|
||||||
|
if entry["active"] != "active":
|
||||||
|
issues.append(f"{entry['name']} unit is {entry['active']}/{entry['sub']}.")
|
||||||
|
if not entry["live"]:
|
||||||
|
issues.append(f"{entry['name']} port {entry['port']} is not listening.")
|
||||||
|
|
||||||
|
ready = not issues
|
||||||
|
live_enabled = sum(1 for entry in enabled_entries if entry["live"])
|
||||||
|
active_enabled = sum(1 for entry in enabled_entries if entry["active"] == "active")
|
||||||
|
return {
|
||||||
|
"ready": ready,
|
||||||
|
"declared_count": len(entries),
|
||||||
|
"enabled_count": len(enabled_entries),
|
||||||
|
"active_enabled_count": active_enabled,
|
||||||
|
"live_enabled_count": live_enabled,
|
||||||
|
"entries": entries,
|
||||||
|
"issues": issues,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
||||||
units = [
|
units = [
|
||||||
channel_inventory.STACK_UNIT,
|
channel_inventory.STACK_UNIT,
|
||||||
@@ -622,6 +682,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
|||||||
}
|
}
|
||||||
incident_count = count_incident_bundles()
|
incident_count = count_incident_bundles()
|
||||||
core_count = len(iter_core_files())
|
core_count = len(iter_core_files())
|
||||||
|
public_runtime = public_runtime_report()
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"repos": repos,
|
"repos": repos,
|
||||||
@@ -648,6 +709,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
|||||||
"orphan_count": stale_orphan_count,
|
"orphan_count": stale_orphan_count,
|
||||||
"total_count": stale_total_count,
|
"total_count": stale_total_count,
|
||||||
},
|
},
|
||||||
|
"public_runtime": public_runtime,
|
||||||
"core_count": core_count,
|
"core_count": core_count,
|
||||||
"incident_count": incident_count,
|
"incident_count": incident_count,
|
||||||
}
|
}
|
||||||
@@ -688,6 +750,15 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
|||||||
|
|
||||||
print("Public Ports")
|
print("Public Ports")
|
||||||
print_table(["name", "port", "p2p", "live"], public_port_rows)
|
print_table(["name", "port", "p2p", "live"], public_port_rows)
|
||||||
|
print(
|
||||||
|
"client-visible public channels ready: "
|
||||||
|
f"{'yes' if public_runtime['ready'] else 'no'} "
|
||||||
|
f"({public_runtime['active_enabled_count']}/{public_runtime['enabled_count']} enabled units active, "
|
||||||
|
f"{public_runtime['live_enabled_count']}/{public_runtime['enabled_count']} listening)"
|
||||||
|
)
|
||||||
|
if public_runtime["issues"]:
|
||||||
|
for issue in public_runtime["issues"]:
|
||||||
|
print(f"warning: {issue}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print(f"Auth ({hours}h)")
|
print(f"Auth ({hours}h)")
|
||||||
@@ -983,6 +1054,41 @@ def print_ports(show_live: bool) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def print_public_ready(as_json: bool) -> int:
|
||||||
|
payload = public_runtime_report()
|
||||||
|
|
||||||
|
if as_json:
|
||||||
|
print(json.dumps(payload, indent=2))
|
||||||
|
else:
|
||||||
|
if payload["entries"]:
|
||||||
|
rows = [
|
||||||
|
[
|
||||||
|
str(entry["name"]),
|
||||||
|
str(entry["port"]),
|
||||||
|
str(entry["active"]),
|
||||||
|
str(entry["sub"]),
|
||||||
|
str(entry["enabled"]),
|
||||||
|
"yes" if entry["live"] else "no",
|
||||||
|
]
|
||||||
|
for entry in payload["entries"]
|
||||||
|
]
|
||||||
|
print_table(["name", "port", "active", "sub", "enabled", "live"], rows)
|
||||||
|
else:
|
||||||
|
print("No client-visible public channels declared in the inventory.")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"ready: "
|
||||||
|
f"{'yes' if payload['ready'] else 'no'} "
|
||||||
|
f"({payload['active_enabled_count']}/{payload['enabled_count']} enabled units active, "
|
||||||
|
f"{payload['live_enabled_count']}/{payload['enabled_count']} listening)"
|
||||||
|
)
|
||||||
|
for issue in payload["issues"]:
|
||||||
|
print(f"issue: {issue}")
|
||||||
|
|
||||||
|
return 0 if payload["ready"] else 1
|
||||||
|
|
||||||
|
|
||||||
def print_cores(as_json: bool) -> int:
|
def print_cores(as_json: bool) -> int:
|
||||||
entries = []
|
entries = []
|
||||||
for path in iter_core_files():
|
for path in iter_core_files():
|
||||||
@@ -1172,6 +1278,15 @@ def run_wait_ready(timeout_seconds: int, interval_seconds: float) -> int:
|
|||||||
while time.time() < deadline:
|
while time.time() < deadline:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
print(f"Healthcheck attempt {attempt}...")
|
print(f"Healthcheck attempt {attempt}...")
|
||||||
|
public_runtime = public_runtime_report()
|
||||||
|
if not public_runtime["ready"]:
|
||||||
|
for issue in public_runtime["issues"]:
|
||||||
|
print(f"Public runtime not ready: {issue}")
|
||||||
|
remaining = deadline - time.time()
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
time.sleep(min(interval_seconds, remaining))
|
||||||
|
continue
|
||||||
completed = subprocess.run(
|
completed = subprocess.run(
|
||||||
build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True),
|
build_command([str(HEALTHCHECK_PATH), "--mode", "ready"], require_root=True),
|
||||||
check=False,
|
check=False,
|
||||||
@@ -1233,6 +1348,8 @@ def main() -> int:
|
|||||||
return print_status(args.target)
|
return print_status(args.target)
|
||||||
if args.command == "ports":
|
if args.command == "ports":
|
||||||
return print_ports(args.live)
|
return print_ports(args.live)
|
||||||
|
if args.command == "public-ready":
|
||||||
|
return print_public_ready(args.json)
|
||||||
if args.command == "cores":
|
if args.command == "cores":
|
||||||
return print_cores(args.json)
|
return print_cores(args.json)
|
||||||
if args.command == "incidents":
|
if args.command == "incidents":
|
||||||
|
|||||||
@@ -32,6 +32,11 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
|
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
|
||||||
parser.add_argument("--wait-timeout", type=int, default=30, help="DB readiness timeout in seconds")
|
parser.add_argument("--wait-timeout", type=int, default=30, help="DB readiness timeout in seconds")
|
||||||
parser.add_argument("--restart", action="store_true", help="Restart metin-server.service after install")
|
parser.add_argument("--restart", action="store_true", help="Restart metin-server.service after install")
|
||||||
|
parser.add_argument(
|
||||||
|
"--allow-internal-only",
|
||||||
|
action="store_true",
|
||||||
|
help="Allow installs that omit every client-visible public channel",
|
||||||
|
)
|
||||||
|
|
||||||
channel_group = parser.add_mutually_exclusive_group(required=True)
|
channel_group = parser.add_mutually_exclusive_group(required=True)
|
||||||
channel_group.add_argument(
|
channel_group.add_argument(
|
||||||
@@ -76,7 +81,7 @@ def copy_file(source: Path, destination: Path, mode: int) -> None:
|
|||||||
|
|
||||||
def resolve_channels(args: argparse.Namespace) -> list[int]:
|
def resolve_channels(args: argparse.Namespace) -> list[int]:
|
||||||
try:
|
try:
|
||||||
return channel_inventory.resolve_selected_channels(
|
selected_channels = channel_inventory.resolve_selected_channels(
|
||||||
channel_limit=args.channel_limit,
|
channel_limit=args.channel_limit,
|
||||||
explicit_channels=args.channels,
|
explicit_channels=args.channels,
|
||||||
)
|
)
|
||||||
@@ -84,6 +89,20 @@ def resolve_channels(args: argparse.Namespace) -> list[int]:
|
|||||||
print(str(exc), file=sys.stderr)
|
print(str(exc), file=sys.stderr)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
if not args.allow_internal_only and not channel_inventory.has_public_channel(
|
||||||
|
selected_channels,
|
||||||
|
client_visible_only=True,
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
"Selected channels do not include any client-visible public channel. "
|
||||||
|
"Add a public channel such as --channel 1, or pass --allow-internal-only "
|
||||||
|
"if an auth/internal-only stack is intentional.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
return selected_channels
|
||||||
|
|
||||||
|
|
||||||
def resolve_instances(selected_channels: list[int]) -> list[str]:
|
def resolve_instances(selected_channels: list[int]) -> list[str]:
|
||||||
return channel_inventory.get_instances(selected_channels)
|
return channel_inventory.get_instances(selected_channels)
|
||||||
|
|||||||
@@ -132,12 +132,15 @@ Useful direct flags:
|
|||||||
Operational CLI:
|
Operational CLI:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
metinctl public-ready
|
||||||
metinctl healthcheck --mode full
|
metinctl healthcheck --mode full
|
||||||
metinctl healthcheck --mode ready
|
metinctl healthcheck --mode ready
|
||||||
metinctl wait-ready
|
metinctl wait-ready
|
||||||
```
|
```
|
||||||
|
|
||||||
`metinctl wait-ready` now uses the lighter `ready` mode on purpose. The deeper `full` mode remains available as an explicit admin healthcheck.
|
`metinctl public-ready` verifies that every enabled client-visible public channel unit is active and that its declared listener port is actually up.
|
||||||
|
|
||||||
|
`metinctl wait-ready` now first waits for the public runtime to be up and only then runs the lighter `ready` login probe. The deeper `full` mode remains available as an explicit admin healthcheck.
|
||||||
|
|
||||||
Example negative auth test:
|
Example negative auth test:
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ The Debian deployment installs:
|
|||||||
- listing managed units
|
- listing managed units
|
||||||
- checking service status
|
- checking service status
|
||||||
- listing declared ports
|
- listing declared ports
|
||||||
|
- verifying that enabled public client-facing channels are actually up
|
||||||
- listing recent auth failures
|
- listing recent auth failures
|
||||||
- listing recent login sessions
|
- listing recent login sessions
|
||||||
- listing stale open sessions without logout
|
- listing stale open sessions without logout
|
||||||
@@ -78,6 +79,12 @@ Show declared ports and whether they are currently listening:
|
|||||||
metinctl ports --live
|
metinctl ports --live
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Verify that enabled client-visible public channels are active and listening:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl public-ready
|
||||||
|
```
|
||||||
|
|
||||||
Show recent real auth failures and skip smoke-test logins:
|
Show recent real auth failures and skip smoke-test logins:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -219,6 +226,7 @@ It also reconciles enabled game instance units against the selected channels:
|
|||||||
- selected game units are enabled
|
- selected game units are enabled
|
||||||
- stale game units are disabled
|
- stale game units are disabled
|
||||||
- if `--restart` is passed, stale game units are disabled with `--now`
|
- if `--restart` is passed, stale game units are disabled with `--now`
|
||||||
|
- installs now refuse an auth/internal-only channel selection unless you pass `--allow-internal-only`
|
||||||
|
|
||||||
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.
|
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user