diff --git a/deploy/systemd/bin/metinctl.in b/deploy/systemd/bin/metinctl.in index 8daa945..f7f1935 100644 --- a/deploy/systemd/bin/metinctl.in +++ b/deploy/systemd/bin/metinctl.in @@ -58,6 +58,12 @@ def parse_args() -> argparse.Namespace: auth_activity.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins") auth_activity.add_argument("--json", action="store_true", help="Print raw JSON") + auth_ips = subparsers.add_parser("auth-ips", help="Summarize auth activity by source IP") + auth_ips.add_argument("--hours", type=int, default=24, help="How many hours back to inspect") + auth_ips.add_argument("--limit", type=int, default=20, help="Maximum IPs to show") + auth_ips.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins") + auth_ips.add_argument("--json", action="store_true", help="Print raw JSON") + sessions = subparsers.add_parser("sessions", help="Show recent login sessions from loginlog2") sessions.add_argument("--hours", type=int, default=24, help="How many hours back to inspect") sessions.add_argument("--limit", type=int, default=20, help="Maximum sessions to show") @@ -65,6 +71,13 @@ def parse_args() -> argparse.Namespace: sessions.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists") sessions.add_argument("--json", action="store_true", help="Print raw JSON") + session_audit = subparsers.add_parser("session-audit", help="Show stale open sessions without logout") + session_audit.add_argument("--hours", type=int, default=72, help="How many hours back to inspect") + session_audit.add_argument("--stale-minutes", type=int, default=30, help="Minimum age for an open session to be considered stale") + session_audit.add_argument("--limit", type=int, default=20, help="Maximum sessions to show") + session_audit.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists") + session_audit.add_argument("--json", action="store_true", help="Print raw JSON") + status_parser = subparsers.add_parser("status", help="Show current unit state") status_parser.add_argument("target", nargs="?", default="all", help="stack, db, auth, game, channel:, instance:") @@ -402,6 +415,75 @@ LIMIT {int(limit)} return entries +def fetch_stale_sessions(hours: int, stale_minutes: int, limit: int, include_orphans: bool) -> list[dict[str, str]]: + where_clauses = [ + f"l.login_time >= NOW() - INTERVAL {int(hours)} HOUR", + "l.logout_time IS NULL", + f"TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) >= {int(stale_minutes)}", + ] + if not include_orphans: + where_clauses.append("a.login IS NOT NULL") + + query = f""" +SELECT + DATE_FORMAT(l.login_time, '%Y-%m-%d %H:%i:%s'), + l.type, + COALESCE(a.login, ''), + l.account_id, + l.pid, + COALESCE(INET_NTOA(l.ip), ''), + TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) +FROM log.loginlog2 l +LEFT JOIN account.account a ON a.id = l.account_id +WHERE {' AND '.join(where_clauses)} +ORDER BY l.login_time DESC +LIMIT {int(limit)} +""".strip() + + entries: list[dict[str, str]] = [] + for row in run_mariadb_query(query): + while len(row) < 7: + row.append("") + login_time, raw_type, login, account_id, pid, ip, age_minutes = row[:7] + entries.append( + { + "login_time": login_time, + "raw_type": raw_type, + "login": login or f"", + "account_id": account_id, + "pid": pid, + "ip": ip or "-", + "age_minutes": age_minutes or "0", + } + ) + return entries + + +def count_stale_sessions(hours: int, stale_minutes: int, include_orphans: bool) -> int: + where_clauses = [ + f"l.login_time >= NOW() - INTERVAL {int(hours)} HOUR", + "l.logout_time IS NULL", + f"TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) >= {int(stale_minutes)}", + ] + if not include_orphans: + where_clauses.append("a.login IS NOT NULL") + + query = f""" +SELECT COUNT(*) +FROM log.loginlog2 l +LEFT JOIN account.account a ON a.id = l.account_id +WHERE {' AND '.join(where_clauses)} +""".strip() + + rows = run_mariadb_query(query) + if not rows or not rows[0]: + return 0 + try: + return int(rows[0][0]) + except ValueError: + return 0 + + def live_ports() -> set[int]: if shutil.which("ss") is None: return set() @@ -478,6 +560,9 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: listening = live_ports() port_rows = iter_port_rows() auth_summary = summarize_auth_activity(hours, include_smoke) + stale_session_count = count_stale_sessions(hours=max(hours, 1), stale_minutes=30, include_orphans=False) + stale_total_count = count_stale_sessions(hours=max(hours, 1), stale_minutes=30, include_orphans=True) + stale_orphan_count = max(stale_total_count - stale_session_count, 0) repos = { "m2dev-server": git_summary(REPO_ROOT), "m2dev-server-src": git_summary(SOURCE_REPO_ROOT), @@ -499,6 +584,11 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: for row in port_rows ], "auth": auth_summary, + "stale_open_sessions": { + "user_count": stale_session_count, + "orphan_count": stale_orphan_count, + "total_count": stale_total_count, + }, "core_count": core_count, "incident_count": incident_count, } @@ -533,6 +623,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int: print(f"game instances active: {game_active}/{len(enabled_game_units)} enabled ({len(game_units)} declared)") print(f"core files: {core_count}") print(f"incident bundles: {incident_count}") + print(f"stale open sessions (>30m): {stale_session_count} user, {stale_orphan_count} orphan") print() print("Public Ports") @@ -603,6 +694,83 @@ def print_auth_activity(hours: int, limit: int, status: str, include_smoke: bool return 0 +def print_auth_ips(hours: int, limit: int, include_smoke: bool, as_json: bool) -> int: + events = filter_auth_events(hours, include_smoke, "all") + grouped: dict[str, dict[str, object]] = {} + + for event in events: + ip = str(event["ip"]) + bucket = grouped.setdefault( + ip, + { + "ip": ip, + "success_count": 0, + "failure_count": 0, + "last_seen": event["time"], + "last_login": str(event["login"]), + "last_reason": str(event["reason"]), + }, + ) + if event["status"] == "success": + bucket["success_count"] = int(bucket["success_count"]) + 1 + else: + bucket["failure_count"] = int(bucket["failure_count"]) + 1 + if event["time"] >= bucket["last_seen"]: + bucket["last_seen"] = event["time"] + bucket["last_login"] = str(event["login"]) + bucket["last_reason"] = str(event["reason"]) + + rows = sorted( + grouped.values(), + key=lambda item: ( + int(item["failure_count"]), + int(item["success_count"]), + item["last_seen"], + ), + reverse=True, + )[:limit] + + payload = { + "window_hours": hours, + "limit": limit, + "include_smoke": include_smoke, + "count": len(rows), + "entries": [ + { + "ip": str(row["ip"]), + "success_count": int(row["success_count"]), + "failure_count": int(row["failure_count"]), + "last_seen": row["last_seen"].strftime("%Y-%m-%d %H:%M:%S"), + "last_login": str(row["last_login"]), + "last_reason": str(row["last_reason"]), + } + for row in rows + ], + } + + if as_json: + print(json.dumps(payload, indent=2)) + return 0 + + if not rows: + print(f"No auth IP activity in the last {hours}h.") + return 0 + + table_rows = [ + [ + str(row["ip"]), + str(row["success_count"]), + str(row["failure_count"]), + row["last_seen"].strftime("%Y-%m-%d %H:%M:%S"), + str(row["last_login"]), + str(row["last_reason"]), + ] + for row in rows + ] + print_table(["ip", "success", "failure", "last_seen", "last_login", "last_reason"], table_rows) + return 0 + + def resolve_target_units(target: str) -> list[str]: normalized = target.strip().lower() @@ -797,6 +965,41 @@ def print_sessions(hours: int, limit: int, active_only: bool, include_orphans: b return 0 +def print_session_audit(hours: int, stale_minutes: int, limit: int, include_orphans: bool, as_json: bool) -> int: + entries = fetch_stale_sessions(hours, stale_minutes, limit, include_orphans) + payload = { + "window_hours": hours, + "stale_minutes": stale_minutes, + "limit": limit, + "include_orphans": include_orphans, + "count": len(entries), + "entries": entries, + } + + if as_json: + print(json.dumps(payload, indent=2)) + return 0 + + if not entries: + print(f"No stale open sessions older than {stale_minutes} minutes in the last {hours}h.") + return 0 + + rows = [ + [ + entry["login_time"], + entry["age_minutes"], + entry["login"], + entry["account_id"], + entry["pid"], + entry["ip"], + entry["raw_type"], + ] + for entry in entries + ] + print_table(["login_time", "age_min", "login", "account", "pid", "ip", "raw_type"], rows) + return 0 + + def run_healthcheck(mode: str) -> int: if not HEALTHCHECK_PATH.exists(): raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}") @@ -853,6 +1056,8 @@ def main() -> int: return print_summary(args.hours, args.include_smoke, args.json) if args.command == "auth-activity": return print_auth_activity(args.hours, args.limit, args.status, args.include_smoke, args.json) + if args.command == "auth-ips": + return print_auth_ips(args.hours, args.limit, args.include_smoke, args.json) if args.command == "status": return print_status(args.target) if args.command == "ports": @@ -865,6 +1070,8 @@ def main() -> int: return print_auth_failures(args.hours, args.limit, args.include_smoke, args.json) if args.command == "sessions": return print_sessions(args.hours, args.limit, args.active_only, args.include_orphans, args.json) + if args.command == "session-audit": + return print_session_audit(args.hours, args.stale_minutes, args.limit, args.include_orphans, args.json) if args.command in {"start", "stop", "restart"}: return run_unit_action(args.command, args.target) if args.command == "logs": diff --git a/docs/server-management.md b/docs/server-management.md index 336aa2e..9d19fea 100644 --- a/docs/server-management.md +++ b/docs/server-management.md @@ -34,12 +34,14 @@ The Debian deployment installs: - showing an operational summary - showing recent auth success/failure activity +- showing auth activity grouped by source IP - viewing inventory - listing managed units - checking service status - listing declared ports - listing recent auth failures - listing recent login sessions +- listing stale open sessions without logout - restarting the whole stack or specific channels/instances - viewing logs - listing core files in the runtime tree @@ -91,6 +93,12 @@ Show only recent auth failures including smoke tests: metinctl auth-activity --status failure --include-smoke ``` +Show auth activity grouped by IP: + +```bash +metinctl auth-ips +``` + Include smoke-test failures too: ```bash @@ -109,6 +117,18 @@ Show only sessions that still have no recorded logout: metinctl sessions --active-only ``` +Show stale open sessions older than 30 minutes: + +```bash +metinctl session-audit +``` + +Use a different stale threshold: + +```bash +metinctl session-audit --stale-minutes 10 +``` + Restart only channel 1 cores: ```bash