ops: add auth IP and stale session audit
This commit is contained in:
@@ -58,6 +58,12 @@ def parse_args() -> argparse.Namespace:
|
|||||||
auth_activity.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins")
|
auth_activity.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins")
|
||||||
auth_activity.add_argument("--json", action="store_true", help="Print raw JSON")
|
auth_activity.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||||
|
|
||||||
|
auth_ips = subparsers.add_parser("auth-ips", help="Summarize auth activity by source IP")
|
||||||
|
auth_ips.add_argument("--hours", type=int, default=24, help="How many hours back to inspect")
|
||||||
|
auth_ips.add_argument("--limit", type=int, default=20, help="Maximum IPs to show")
|
||||||
|
auth_ips.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins")
|
||||||
|
auth_ips.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||||
|
|
||||||
sessions = subparsers.add_parser("sessions", help="Show recent login sessions from loginlog2")
|
sessions = subparsers.add_parser("sessions", help="Show recent login sessions from loginlog2")
|
||||||
sessions.add_argument("--hours", type=int, default=24, help="How many hours back to inspect")
|
sessions.add_argument("--hours", type=int, default=24, help="How many hours back to inspect")
|
||||||
sessions.add_argument("--limit", type=int, default=20, help="Maximum sessions to show")
|
sessions.add_argument("--limit", type=int, default=20, help="Maximum sessions to show")
|
||||||
@@ -65,6 +71,13 @@ def parse_args() -> argparse.Namespace:
|
|||||||
sessions.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists")
|
sessions.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists")
|
||||||
sessions.add_argument("--json", action="store_true", help="Print raw JSON")
|
sessions.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||||
|
|
||||||
|
session_audit = subparsers.add_parser("session-audit", help="Show stale open sessions without logout")
|
||||||
|
session_audit.add_argument("--hours", type=int, default=72, help="How many hours back to inspect")
|
||||||
|
session_audit.add_argument("--stale-minutes", type=int, default=30, help="Minimum age for an open session to be considered stale")
|
||||||
|
session_audit.add_argument("--limit", type=int, default=20, help="Maximum sessions to show")
|
||||||
|
session_audit.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists")
|
||||||
|
session_audit.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||||
|
|
||||||
status_parser = subparsers.add_parser("status", help="Show current unit state")
|
status_parser = subparsers.add_parser("status", help="Show current unit state")
|
||||||
status_parser.add_argument("target", nargs="?", default="all", help="stack, db, auth, game, channel:<id>, instance:<name>")
|
status_parser.add_argument("target", nargs="?", default="all", help="stack, db, auth, game, channel:<id>, instance:<name>")
|
||||||
|
|
||||||
@@ -402,6 +415,75 @@ LIMIT {int(limit)}
|
|||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_stale_sessions(hours: int, stale_minutes: int, limit: int, include_orphans: bool) -> list[dict[str, str]]:
|
||||||
|
where_clauses = [
|
||||||
|
f"l.login_time >= NOW() - INTERVAL {int(hours)} HOUR",
|
||||||
|
"l.logout_time IS NULL",
|
||||||
|
f"TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) >= {int(stale_minutes)}",
|
||||||
|
]
|
||||||
|
if not include_orphans:
|
||||||
|
where_clauses.append("a.login IS NOT NULL")
|
||||||
|
|
||||||
|
query = f"""
|
||||||
|
SELECT
|
||||||
|
DATE_FORMAT(l.login_time, '%Y-%m-%d %H:%i:%s'),
|
||||||
|
l.type,
|
||||||
|
COALESCE(a.login, ''),
|
||||||
|
l.account_id,
|
||||||
|
l.pid,
|
||||||
|
COALESCE(INET_NTOA(l.ip), ''),
|
||||||
|
TIMESTAMPDIFF(MINUTE, l.login_time, NOW())
|
||||||
|
FROM log.loginlog2 l
|
||||||
|
LEFT JOIN account.account a ON a.id = l.account_id
|
||||||
|
WHERE {' AND '.join(where_clauses)}
|
||||||
|
ORDER BY l.login_time DESC
|
||||||
|
LIMIT {int(limit)}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
entries: list[dict[str, str]] = []
|
||||||
|
for row in run_mariadb_query(query):
|
||||||
|
while len(row) < 7:
|
||||||
|
row.append("")
|
||||||
|
login_time, raw_type, login, account_id, pid, ip, age_minutes = row[:7]
|
||||||
|
entries.append(
|
||||||
|
{
|
||||||
|
"login_time": login_time,
|
||||||
|
"raw_type": raw_type,
|
||||||
|
"login": login or f"<missing:{account_id}>",
|
||||||
|
"account_id": account_id,
|
||||||
|
"pid": pid,
|
||||||
|
"ip": ip or "-",
|
||||||
|
"age_minutes": age_minutes or "0",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def count_stale_sessions(hours: int, stale_minutes: int, include_orphans: bool) -> int:
|
||||||
|
where_clauses = [
|
||||||
|
f"l.login_time >= NOW() - INTERVAL {int(hours)} HOUR",
|
||||||
|
"l.logout_time IS NULL",
|
||||||
|
f"TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) >= {int(stale_minutes)}",
|
||||||
|
]
|
||||||
|
if not include_orphans:
|
||||||
|
where_clauses.append("a.login IS NOT NULL")
|
||||||
|
|
||||||
|
query = f"""
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM log.loginlog2 l
|
||||||
|
LEFT JOIN account.account a ON a.id = l.account_id
|
||||||
|
WHERE {' AND '.join(where_clauses)}
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
rows = run_mariadb_query(query)
|
||||||
|
if not rows or not rows[0]:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
return int(rows[0][0])
|
||||||
|
except ValueError:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def live_ports() -> set[int]:
|
def live_ports() -> set[int]:
|
||||||
if shutil.which("ss") is None:
|
if shutil.which("ss") is None:
|
||||||
return set()
|
return set()
|
||||||
@@ -478,6 +560,9 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
|||||||
listening = live_ports()
|
listening = live_ports()
|
||||||
port_rows = iter_port_rows()
|
port_rows = iter_port_rows()
|
||||||
auth_summary = summarize_auth_activity(hours, include_smoke)
|
auth_summary = summarize_auth_activity(hours, include_smoke)
|
||||||
|
stale_session_count = count_stale_sessions(hours=max(hours, 1), stale_minutes=30, include_orphans=False)
|
||||||
|
stale_total_count = count_stale_sessions(hours=max(hours, 1), stale_minutes=30, include_orphans=True)
|
||||||
|
stale_orphan_count = max(stale_total_count - stale_session_count, 0)
|
||||||
repos = {
|
repos = {
|
||||||
"m2dev-server": git_summary(REPO_ROOT),
|
"m2dev-server": git_summary(REPO_ROOT),
|
||||||
"m2dev-server-src": git_summary(SOURCE_REPO_ROOT),
|
"m2dev-server-src": git_summary(SOURCE_REPO_ROOT),
|
||||||
@@ -499,6 +584,11 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
|||||||
for row in port_rows
|
for row in port_rows
|
||||||
],
|
],
|
||||||
"auth": auth_summary,
|
"auth": auth_summary,
|
||||||
|
"stale_open_sessions": {
|
||||||
|
"user_count": stale_session_count,
|
||||||
|
"orphan_count": stale_orphan_count,
|
||||||
|
"total_count": stale_total_count,
|
||||||
|
},
|
||||||
"core_count": core_count,
|
"core_count": core_count,
|
||||||
"incident_count": incident_count,
|
"incident_count": incident_count,
|
||||||
}
|
}
|
||||||
@@ -533,6 +623,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
|||||||
print(f"game instances active: {game_active}/{len(enabled_game_units)} enabled ({len(game_units)} declared)")
|
print(f"game instances active: {game_active}/{len(enabled_game_units)} enabled ({len(game_units)} declared)")
|
||||||
print(f"core files: {core_count}")
|
print(f"core files: {core_count}")
|
||||||
print(f"incident bundles: {incident_count}")
|
print(f"incident bundles: {incident_count}")
|
||||||
|
print(f"stale open sessions (>30m): {stale_session_count} user, {stale_orphan_count} orphan")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print("Public Ports")
|
print("Public Ports")
|
||||||
@@ -603,6 +694,83 @@ def print_auth_activity(hours: int, limit: int, status: str, include_smoke: bool
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def print_auth_ips(hours: int, limit: int, include_smoke: bool, as_json: bool) -> int:
|
||||||
|
events = filter_auth_events(hours, include_smoke, "all")
|
||||||
|
grouped: dict[str, dict[str, object]] = {}
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
ip = str(event["ip"])
|
||||||
|
bucket = grouped.setdefault(
|
||||||
|
ip,
|
||||||
|
{
|
||||||
|
"ip": ip,
|
||||||
|
"success_count": 0,
|
||||||
|
"failure_count": 0,
|
||||||
|
"last_seen": event["time"],
|
||||||
|
"last_login": str(event["login"]),
|
||||||
|
"last_reason": str(event["reason"]),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if event["status"] == "success":
|
||||||
|
bucket["success_count"] = int(bucket["success_count"]) + 1
|
||||||
|
else:
|
||||||
|
bucket["failure_count"] = int(bucket["failure_count"]) + 1
|
||||||
|
if event["time"] >= bucket["last_seen"]:
|
||||||
|
bucket["last_seen"] = event["time"]
|
||||||
|
bucket["last_login"] = str(event["login"])
|
||||||
|
bucket["last_reason"] = str(event["reason"])
|
||||||
|
|
||||||
|
rows = sorted(
|
||||||
|
grouped.values(),
|
||||||
|
key=lambda item: (
|
||||||
|
int(item["failure_count"]),
|
||||||
|
int(item["success_count"]),
|
||||||
|
item["last_seen"],
|
||||||
|
),
|
||||||
|
reverse=True,
|
||||||
|
)[:limit]
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"window_hours": hours,
|
||||||
|
"limit": limit,
|
||||||
|
"include_smoke": include_smoke,
|
||||||
|
"count": len(rows),
|
||||||
|
"entries": [
|
||||||
|
{
|
||||||
|
"ip": str(row["ip"]),
|
||||||
|
"success_count": int(row["success_count"]),
|
||||||
|
"failure_count": int(row["failure_count"]),
|
||||||
|
"last_seen": row["last_seen"].strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
"last_login": str(row["last_login"]),
|
||||||
|
"last_reason": str(row["last_reason"]),
|
||||||
|
}
|
||||||
|
for row in rows
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
if as_json:
|
||||||
|
print(json.dumps(payload, indent=2))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
print(f"No auth IP activity in the last {hours}h.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
table_rows = [
|
||||||
|
[
|
||||||
|
str(row["ip"]),
|
||||||
|
str(row["success_count"]),
|
||||||
|
str(row["failure_count"]),
|
||||||
|
row["last_seen"].strftime("%Y-%m-%d %H:%M:%S"),
|
||||||
|
str(row["last_login"]),
|
||||||
|
str(row["last_reason"]),
|
||||||
|
]
|
||||||
|
for row in rows
|
||||||
|
]
|
||||||
|
print_table(["ip", "success", "failure", "last_seen", "last_login", "last_reason"], table_rows)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def resolve_target_units(target: str) -> list[str]:
|
def resolve_target_units(target: str) -> list[str]:
|
||||||
normalized = target.strip().lower()
|
normalized = target.strip().lower()
|
||||||
|
|
||||||
@@ -797,6 +965,41 @@ def print_sessions(hours: int, limit: int, active_only: bool, include_orphans: b
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def print_session_audit(hours: int, stale_minutes: int, limit: int, include_orphans: bool, as_json: bool) -> int:
|
||||||
|
entries = fetch_stale_sessions(hours, stale_minutes, limit, include_orphans)
|
||||||
|
payload = {
|
||||||
|
"window_hours": hours,
|
||||||
|
"stale_minutes": stale_minutes,
|
||||||
|
"limit": limit,
|
||||||
|
"include_orphans": include_orphans,
|
||||||
|
"count": len(entries),
|
||||||
|
"entries": entries,
|
||||||
|
}
|
||||||
|
|
||||||
|
if as_json:
|
||||||
|
print(json.dumps(payload, indent=2))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
print(f"No stale open sessions older than {stale_minutes} minutes in the last {hours}h.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
rows = [
|
||||||
|
[
|
||||||
|
entry["login_time"],
|
||||||
|
entry["age_minutes"],
|
||||||
|
entry["login"],
|
||||||
|
entry["account_id"],
|
||||||
|
entry["pid"],
|
||||||
|
entry["ip"],
|
||||||
|
entry["raw_type"],
|
||||||
|
]
|
||||||
|
for entry in entries
|
||||||
|
]
|
||||||
|
print_table(["login_time", "age_min", "login", "account", "pid", "ip", "raw_type"], rows)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def run_healthcheck(mode: str) -> int:
|
def run_healthcheck(mode: str) -> int:
|
||||||
if not HEALTHCHECK_PATH.exists():
|
if not HEALTHCHECK_PATH.exists():
|
||||||
raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}")
|
raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}")
|
||||||
@@ -853,6 +1056,8 @@ def main() -> int:
|
|||||||
return print_summary(args.hours, args.include_smoke, args.json)
|
return print_summary(args.hours, args.include_smoke, args.json)
|
||||||
if args.command == "auth-activity":
|
if args.command == "auth-activity":
|
||||||
return print_auth_activity(args.hours, args.limit, args.status, args.include_smoke, args.json)
|
return print_auth_activity(args.hours, args.limit, args.status, args.include_smoke, args.json)
|
||||||
|
if args.command == "auth-ips":
|
||||||
|
return print_auth_ips(args.hours, args.limit, args.include_smoke, args.json)
|
||||||
if args.command == "status":
|
if args.command == "status":
|
||||||
return print_status(args.target)
|
return print_status(args.target)
|
||||||
if args.command == "ports":
|
if args.command == "ports":
|
||||||
@@ -865,6 +1070,8 @@ def main() -> int:
|
|||||||
return print_auth_failures(args.hours, args.limit, args.include_smoke, args.json)
|
return print_auth_failures(args.hours, args.limit, args.include_smoke, args.json)
|
||||||
if args.command == "sessions":
|
if args.command == "sessions":
|
||||||
return print_sessions(args.hours, args.limit, args.active_only, args.include_orphans, args.json)
|
return print_sessions(args.hours, args.limit, args.active_only, args.include_orphans, args.json)
|
||||||
|
if args.command == "session-audit":
|
||||||
|
return print_session_audit(args.hours, args.stale_minutes, args.limit, args.include_orphans, args.json)
|
||||||
if args.command in {"start", "stop", "restart"}:
|
if args.command in {"start", "stop", "restart"}:
|
||||||
return run_unit_action(args.command, args.target)
|
return run_unit_action(args.command, args.target)
|
||||||
if args.command == "logs":
|
if args.command == "logs":
|
||||||
|
|||||||
@@ -34,12 +34,14 @@ The Debian deployment installs:
|
|||||||
|
|
||||||
- showing an operational summary
|
- showing an operational summary
|
||||||
- showing recent auth success/failure activity
|
- showing recent auth success/failure activity
|
||||||
|
- showing auth activity grouped by source IP
|
||||||
- viewing inventory
|
- viewing inventory
|
||||||
- listing managed units
|
- listing managed units
|
||||||
- checking service status
|
- checking service status
|
||||||
- listing declared ports
|
- listing declared ports
|
||||||
- listing recent auth failures
|
- listing recent auth failures
|
||||||
- listing recent login sessions
|
- listing recent login sessions
|
||||||
|
- listing stale open sessions without logout
|
||||||
- restarting the whole stack or specific channels/instances
|
- restarting the whole stack or specific channels/instances
|
||||||
- viewing logs
|
- viewing logs
|
||||||
- listing core files in the runtime tree
|
- listing core files in the runtime tree
|
||||||
@@ -91,6 +93,12 @@ Show only recent auth failures including smoke tests:
|
|||||||
metinctl auth-activity --status failure --include-smoke
|
metinctl auth-activity --status failure --include-smoke
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Show auth activity grouped by IP:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl auth-ips
|
||||||
|
```
|
||||||
|
|
||||||
Include smoke-test failures too:
|
Include smoke-test failures too:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -109,6 +117,18 @@ Show only sessions that still have no recorded logout:
|
|||||||
metinctl sessions --active-only
|
metinctl sessions --active-only
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Show stale open sessions older than 30 minutes:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl session-audit
|
||||||
|
```
|
||||||
|
|
||||||
|
Use a different stale threshold:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl session-audit --stale-minutes 10
|
||||||
|
```
|
||||||
|
|
||||||
Restart only channel 1 cores:
|
Restart only channel 1 cores:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
Reference in New Issue
Block a user