ops: add auth IP and stale session audit
This commit is contained in:
@@ -58,6 +58,12 @@ def parse_args() -> argparse.Namespace:
|
||||
auth_activity.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins")
|
||||
auth_activity.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||
|
||||
auth_ips = subparsers.add_parser("auth-ips", help="Summarize auth activity by source IP")
|
||||
auth_ips.add_argument("--hours", type=int, default=24, help="How many hours back to inspect")
|
||||
auth_ips.add_argument("--limit", type=int, default=20, help="Maximum IPs to show")
|
||||
auth_ips.add_argument("--include-smoke", action="store_true", help="Include smoke-test logins")
|
||||
auth_ips.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||
|
||||
sessions = subparsers.add_parser("sessions", help="Show recent login sessions from loginlog2")
|
||||
sessions.add_argument("--hours", type=int, default=24, help="How many hours back to inspect")
|
||||
sessions.add_argument("--limit", type=int, default=20, help="Maximum sessions to show")
|
||||
@@ -65,6 +71,13 @@ def parse_args() -> argparse.Namespace:
|
||||
sessions.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists")
|
||||
sessions.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||
|
||||
session_audit = subparsers.add_parser("session-audit", help="Show stale open sessions without logout")
|
||||
session_audit.add_argument("--hours", type=int, default=72, help="How many hours back to inspect")
|
||||
session_audit.add_argument("--stale-minutes", type=int, default=30, help="Minimum age for an open session to be considered stale")
|
||||
session_audit.add_argument("--limit", type=int, default=20, help="Maximum sessions to show")
|
||||
session_audit.add_argument("--include-orphans", action="store_true", help="Include rows whose account login no longer exists")
|
||||
session_audit.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||
|
||||
status_parser = subparsers.add_parser("status", help="Show current unit state")
|
||||
status_parser.add_argument("target", nargs="?", default="all", help="stack, db, auth, game, channel:<id>, instance:<name>")
|
||||
|
||||
@@ -402,6 +415,75 @@ LIMIT {int(limit)}
|
||||
return entries
|
||||
|
||||
|
||||
def fetch_stale_sessions(hours: int, stale_minutes: int, limit: int, include_orphans: bool) -> list[dict[str, str]]:
|
||||
where_clauses = [
|
||||
f"l.login_time >= NOW() - INTERVAL {int(hours)} HOUR",
|
||||
"l.logout_time IS NULL",
|
||||
f"TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) >= {int(stale_minutes)}",
|
||||
]
|
||||
if not include_orphans:
|
||||
where_clauses.append("a.login IS NOT NULL")
|
||||
|
||||
query = f"""
|
||||
SELECT
|
||||
DATE_FORMAT(l.login_time, '%Y-%m-%d %H:%i:%s'),
|
||||
l.type,
|
||||
COALESCE(a.login, ''),
|
||||
l.account_id,
|
||||
l.pid,
|
||||
COALESCE(INET_NTOA(l.ip), ''),
|
||||
TIMESTAMPDIFF(MINUTE, l.login_time, NOW())
|
||||
FROM log.loginlog2 l
|
||||
LEFT JOIN account.account a ON a.id = l.account_id
|
||||
WHERE {' AND '.join(where_clauses)}
|
||||
ORDER BY l.login_time DESC
|
||||
LIMIT {int(limit)}
|
||||
""".strip()
|
||||
|
||||
entries: list[dict[str, str]] = []
|
||||
for row in run_mariadb_query(query):
|
||||
while len(row) < 7:
|
||||
row.append("")
|
||||
login_time, raw_type, login, account_id, pid, ip, age_minutes = row[:7]
|
||||
entries.append(
|
||||
{
|
||||
"login_time": login_time,
|
||||
"raw_type": raw_type,
|
||||
"login": login or f"<missing:{account_id}>",
|
||||
"account_id": account_id,
|
||||
"pid": pid,
|
||||
"ip": ip or "-",
|
||||
"age_minutes": age_minutes or "0",
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def count_stale_sessions(hours: int, stale_minutes: int, include_orphans: bool) -> int:
|
||||
where_clauses = [
|
||||
f"l.login_time >= NOW() - INTERVAL {int(hours)} HOUR",
|
||||
"l.logout_time IS NULL",
|
||||
f"TIMESTAMPDIFF(MINUTE, l.login_time, NOW()) >= {int(stale_minutes)}",
|
||||
]
|
||||
if not include_orphans:
|
||||
where_clauses.append("a.login IS NOT NULL")
|
||||
|
||||
query = f"""
|
||||
SELECT COUNT(*)
|
||||
FROM log.loginlog2 l
|
||||
LEFT JOIN account.account a ON a.id = l.account_id
|
||||
WHERE {' AND '.join(where_clauses)}
|
||||
""".strip()
|
||||
|
||||
rows = run_mariadb_query(query)
|
||||
if not rows or not rows[0]:
|
||||
return 0
|
||||
try:
|
||||
return int(rows[0][0])
|
||||
except ValueError:
|
||||
return 0
|
||||
|
||||
|
||||
def live_ports() -> set[int]:
|
||||
if shutil.which("ss") is None:
|
||||
return set()
|
||||
@@ -478,6 +560,9 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
||||
listening = live_ports()
|
||||
port_rows = iter_port_rows()
|
||||
auth_summary = summarize_auth_activity(hours, include_smoke)
|
||||
stale_session_count = count_stale_sessions(hours=max(hours, 1), stale_minutes=30, include_orphans=False)
|
||||
stale_total_count = count_stale_sessions(hours=max(hours, 1), stale_minutes=30, include_orphans=True)
|
||||
stale_orphan_count = max(stale_total_count - stale_session_count, 0)
|
||||
repos = {
|
||||
"m2dev-server": git_summary(REPO_ROOT),
|
||||
"m2dev-server-src": git_summary(SOURCE_REPO_ROOT),
|
||||
@@ -499,6 +584,11 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
||||
for row in port_rows
|
||||
],
|
||||
"auth": auth_summary,
|
||||
"stale_open_sessions": {
|
||||
"user_count": stale_session_count,
|
||||
"orphan_count": stale_orphan_count,
|
||||
"total_count": stale_total_count,
|
||||
},
|
||||
"core_count": core_count,
|
||||
"incident_count": incident_count,
|
||||
}
|
||||
@@ -533,6 +623,7 @@ def print_summary(hours: int, include_smoke: bool, as_json: bool) -> int:
|
||||
print(f"game instances active: {game_active}/{len(enabled_game_units)} enabled ({len(game_units)} declared)")
|
||||
print(f"core files: {core_count}")
|
||||
print(f"incident bundles: {incident_count}")
|
||||
print(f"stale open sessions (>30m): {stale_session_count} user, {stale_orphan_count} orphan")
|
||||
print()
|
||||
|
||||
print("Public Ports")
|
||||
@@ -603,6 +694,83 @@ def print_auth_activity(hours: int, limit: int, status: str, include_smoke: bool
|
||||
return 0
|
||||
|
||||
|
||||
def print_auth_ips(hours: int, limit: int, include_smoke: bool, as_json: bool) -> int:
|
||||
events = filter_auth_events(hours, include_smoke, "all")
|
||||
grouped: dict[str, dict[str, object]] = {}
|
||||
|
||||
for event in events:
|
||||
ip = str(event["ip"])
|
||||
bucket = grouped.setdefault(
|
||||
ip,
|
||||
{
|
||||
"ip": ip,
|
||||
"success_count": 0,
|
||||
"failure_count": 0,
|
||||
"last_seen": event["time"],
|
||||
"last_login": str(event["login"]),
|
||||
"last_reason": str(event["reason"]),
|
||||
},
|
||||
)
|
||||
if event["status"] == "success":
|
||||
bucket["success_count"] = int(bucket["success_count"]) + 1
|
||||
else:
|
||||
bucket["failure_count"] = int(bucket["failure_count"]) + 1
|
||||
if event["time"] >= bucket["last_seen"]:
|
||||
bucket["last_seen"] = event["time"]
|
||||
bucket["last_login"] = str(event["login"])
|
||||
bucket["last_reason"] = str(event["reason"])
|
||||
|
||||
rows = sorted(
|
||||
grouped.values(),
|
||||
key=lambda item: (
|
||||
int(item["failure_count"]),
|
||||
int(item["success_count"]),
|
||||
item["last_seen"],
|
||||
),
|
||||
reverse=True,
|
||||
)[:limit]
|
||||
|
||||
payload = {
|
||||
"window_hours": hours,
|
||||
"limit": limit,
|
||||
"include_smoke": include_smoke,
|
||||
"count": len(rows),
|
||||
"entries": [
|
||||
{
|
||||
"ip": str(row["ip"]),
|
||||
"success_count": int(row["success_count"]),
|
||||
"failure_count": int(row["failure_count"]),
|
||||
"last_seen": row["last_seen"].strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"last_login": str(row["last_login"]),
|
||||
"last_reason": str(row["last_reason"]),
|
||||
}
|
||||
for row in rows
|
||||
],
|
||||
}
|
||||
|
||||
if as_json:
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 0
|
||||
|
||||
if not rows:
|
||||
print(f"No auth IP activity in the last {hours}h.")
|
||||
return 0
|
||||
|
||||
table_rows = [
|
||||
[
|
||||
str(row["ip"]),
|
||||
str(row["success_count"]),
|
||||
str(row["failure_count"]),
|
||||
row["last_seen"].strftime("%Y-%m-%d %H:%M:%S"),
|
||||
str(row["last_login"]),
|
||||
str(row["last_reason"]),
|
||||
]
|
||||
for row in rows
|
||||
]
|
||||
print_table(["ip", "success", "failure", "last_seen", "last_login", "last_reason"], table_rows)
|
||||
return 0
|
||||
|
||||
|
||||
def resolve_target_units(target: str) -> list[str]:
|
||||
normalized = target.strip().lower()
|
||||
|
||||
@@ -797,6 +965,41 @@ def print_sessions(hours: int, limit: int, active_only: bool, include_orphans: b
|
||||
return 0
|
||||
|
||||
|
||||
def print_session_audit(hours: int, stale_minutes: int, limit: int, include_orphans: bool, as_json: bool) -> int:
|
||||
entries = fetch_stale_sessions(hours, stale_minutes, limit, include_orphans)
|
||||
payload = {
|
||||
"window_hours": hours,
|
||||
"stale_minutes": stale_minutes,
|
||||
"limit": limit,
|
||||
"include_orphans": include_orphans,
|
||||
"count": len(entries),
|
||||
"entries": entries,
|
||||
}
|
||||
|
||||
if as_json:
|
||||
print(json.dumps(payload, indent=2))
|
||||
return 0
|
||||
|
||||
if not entries:
|
||||
print(f"No stale open sessions older than {stale_minutes} minutes in the last {hours}h.")
|
||||
return 0
|
||||
|
||||
rows = [
|
||||
[
|
||||
entry["login_time"],
|
||||
entry["age_minutes"],
|
||||
entry["login"],
|
||||
entry["account_id"],
|
||||
entry["pid"],
|
||||
entry["ip"],
|
||||
entry["raw_type"],
|
||||
]
|
||||
for entry in entries
|
||||
]
|
||||
print_table(["login_time", "age_min", "login", "account", "pid", "ip", "raw_type"], rows)
|
||||
return 0
|
||||
|
||||
|
||||
def run_healthcheck(mode: str) -> int:
|
||||
if not HEALTHCHECK_PATH.exists():
|
||||
raise SystemExit(f"Missing healthcheck wrapper: {HEALTHCHECK_PATH}")
|
||||
@@ -853,6 +1056,8 @@ def main() -> int:
|
||||
return print_summary(args.hours, args.include_smoke, args.json)
|
||||
if args.command == "auth-activity":
|
||||
return print_auth_activity(args.hours, args.limit, args.status, args.include_smoke, args.json)
|
||||
if args.command == "auth-ips":
|
||||
return print_auth_ips(args.hours, args.limit, args.include_smoke, args.json)
|
||||
if args.command == "status":
|
||||
return print_status(args.target)
|
||||
if args.command == "ports":
|
||||
@@ -865,6 +1070,8 @@ def main() -> int:
|
||||
return print_auth_failures(args.hours, args.limit, args.include_smoke, args.json)
|
||||
if args.command == "sessions":
|
||||
return print_sessions(args.hours, args.limit, args.active_only, args.include_orphans, args.json)
|
||||
if args.command == "session-audit":
|
||||
return print_session_audit(args.hours, args.stale_minutes, args.limit, args.include_orphans, args.json)
|
||||
if args.command in {"start", "stop", "restart"}:
|
||||
return run_unit_action(args.command, args.target)
|
||||
if args.command == "logs":
|
||||
|
||||
@@ -34,12 +34,14 @@ The Debian deployment installs:
|
||||
|
||||
- showing an operational summary
|
||||
- showing recent auth success/failure activity
|
||||
- showing auth activity grouped by source IP
|
||||
- viewing inventory
|
||||
- listing managed units
|
||||
- checking service status
|
||||
- listing declared ports
|
||||
- listing recent auth failures
|
||||
- listing recent login sessions
|
||||
- listing stale open sessions without logout
|
||||
- restarting the whole stack or specific channels/instances
|
||||
- viewing logs
|
||||
- listing core files in the runtime tree
|
||||
@@ -91,6 +93,12 @@ Show only recent auth failures including smoke tests:
|
||||
metinctl auth-activity --status failure --include-smoke
|
||||
```
|
||||
|
||||
Show auth activity grouped by IP:
|
||||
|
||||
```bash
|
||||
metinctl auth-ips
|
||||
```
|
||||
|
||||
Include smoke-test failures too:
|
||||
|
||||
```bash
|
||||
@@ -109,6 +117,18 @@ Show only sessions that still have no recorded logout:
|
||||
metinctl sessions --active-only
|
||||
```
|
||||
|
||||
Show stale open sessions older than 30 minutes:
|
||||
|
||||
```bash
|
||||
metinctl session-audit
|
||||
```
|
||||
|
||||
Use a different stale threshold:
|
||||
|
||||
```bash
|
||||
metinctl session-audit --stale-minutes 10
|
||||
```
|
||||
|
||||
Restart only channel 1 cores:
|
||||
|
||||
```bash
|
||||
|
||||
Reference in New Issue
Block a user