forked from metin-server/m2dev-server
ops: add incident collection pipeline
This commit is contained in:
@@ -34,11 +34,14 @@ The channel selection and port layout now come from the versioned inventory file
|
|||||||
- `/usr/local/libexec/metin-game-instance-start`
|
- `/usr/local/libexec/metin-game-instance-start`
|
||||||
- `/usr/local/libexec/metin-wait-port`
|
- `/usr/local/libexec/metin-wait-port`
|
||||||
- `/usr/local/bin/metinctl`
|
- `/usr/local/bin/metinctl`
|
||||||
|
- `/usr/local/sbin/metin-collect-incident`
|
||||||
|
|
||||||
The `metin-db-ready.service` gate waits until the DB socket is actually accepting connections before `auth` and `game` units start.
|
The `metin-db-ready.service` gate waits until the DB socket is actually accepting connections before `auth` and `game` units start.
|
||||||
|
|
||||||
The installer also reconciles enabled `metin-game@...` instances against the selected channel set so stale units do not stay enabled forever.
|
The installer also reconciles enabled `metin-game@...` instances against the selected channel set so stale units do not stay enabled forever.
|
||||||
|
|
||||||
|
The runtime unit templates now also set `LimitCORE=infinity` for `db`, `auth`, and `game` services.
|
||||||
|
|
||||||
## Optional Environment File
|
## Optional Environment File
|
||||||
|
|
||||||
The runtime units support an optional `EnvironmentFile` for host-local overrides:
|
The runtime units support an optional `EnvironmentFile` for host-local overrides:
|
||||||
|
|||||||
191
deploy/systemd/bin/metin-collect-incident.in
Normal file
191
deploy/systemd/bin/metin-collect-incident.in
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO_ROOT = Path("{{REPO_ROOT}}")
|
||||||
|
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
|
||||||
|
INCIDENT_ROOT_DEFAULT = Path("/var/lib/metin/incidents")
|
||||||
|
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
|
import channel_inventory
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Collect a Metin runtime incident bundle")
|
||||||
|
parser.add_argument("--tag", default="manual", help="Short incident tag used in the bundle directory name")
|
||||||
|
parser.add_argument("--since", default="-30 minutes", help="journalctl --since value")
|
||||||
|
parser.add_argument("--output-root", default=str(INCIDENT_ROOT_DEFAULT), help="Incident bundle root directory")
|
||||||
|
parser.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_root() -> None:
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
raise SystemExit("Run as root.")
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_tag(value: str) -> str:
|
||||||
|
filtered = "".join(char if char.isalnum() or char in {"-", "_"} else "-" for char in value.strip())
|
||||||
|
return filtered or "manual"
|
||||||
|
|
||||||
|
|
||||||
|
def run(command: list[str], check: bool = True) -> subprocess.CompletedProcess[str]:
|
||||||
|
return subprocess.run(command, check=check, capture_output=True, text=True)
|
||||||
|
|
||||||
|
|
||||||
|
def write_text(path: Path, content: str) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(content, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def write_command_output(bundle_dir: Path, filename: str, command: list[str], check: bool = False) -> None:
|
||||||
|
completed = run(command, check=check)
|
||||||
|
content = f"$ {' '.join(command)}\n\n"
|
||||||
|
if completed.stdout:
|
||||||
|
content += completed.stdout
|
||||||
|
if completed.stderr:
|
||||||
|
content += "\n[stderr]\n" + completed.stderr
|
||||||
|
write_text(bundle_dir / filename, content)
|
||||||
|
|
||||||
|
|
||||||
|
def copy_log_tails(bundle_dir: Path) -> None:
|
||||||
|
logs_dir = bundle_dir / "logs"
|
||||||
|
for path in sorted(RUNTIME_ROOT.glob("channels/**/syslog.log")) + sorted(RUNTIME_ROOT.glob("channels/**/syserr.log")):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
relative = path.relative_to(RUNTIME_ROOT)
|
||||||
|
destination = logs_dir / relative
|
||||||
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
completed = run(["tail", "-n", "400", str(path)], check=False)
|
||||||
|
content = f"# tail -n 400 {path}\n\n"
|
||||||
|
if completed.stdout:
|
||||||
|
content += completed.stdout
|
||||||
|
if completed.stderr:
|
||||||
|
content += "\n[stderr]\n" + completed.stderr
|
||||||
|
destination.write_text(content, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def find_core_files() -> list[Path]:
|
||||||
|
matches: list[Path] = []
|
||||||
|
for path in sorted(RUNTIME_ROOT.glob("channels/**/core*")):
|
||||||
|
if path.is_file():
|
||||||
|
matches.append(path)
|
||||||
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
def write_core_metadata(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||||
|
rows = []
|
||||||
|
for path in core_files:
|
||||||
|
stat = path.stat()
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"path": str(path),
|
||||||
|
"size_bytes": stat.st_size,
|
||||||
|
"mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
write_text(bundle_dir / "core-files.json", json.dumps(rows, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
def copy_core_files(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||||
|
cores_dir = bundle_dir / "cores"
|
||||||
|
for path in core_files:
|
||||||
|
relative = path.relative_to(RUNTIME_ROOT)
|
||||||
|
destination = cores_dir / relative
|
||||||
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.copy2(path, destination)
|
||||||
|
|
||||||
|
|
||||||
|
def git_summary(repo_path: Path) -> dict[str, object]:
|
||||||
|
summary: dict[str, object] = {"path": str(repo_path), "present": repo_path.exists()}
|
||||||
|
if not repo_path.exists():
|
||||||
|
return summary
|
||||||
|
|
||||||
|
head = run(["git", "-C", str(repo_path), "rev-parse", "HEAD"], check=False)
|
||||||
|
status = run(["git", "-C", str(repo_path), "status", "--short"], check=False)
|
||||||
|
branch = run(["git", "-C", str(repo_path), "rev-parse", "--abbrev-ref", "HEAD"], check=False)
|
||||||
|
summary.update(
|
||||||
|
{
|
||||||
|
"head": head.stdout.strip(),
|
||||||
|
"branch": branch.stdout.strip(),
|
||||||
|
"dirty": bool(status.stdout.strip()),
|
||||||
|
"status": status.stdout.splitlines(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
ensure_root()
|
||||||
|
|
||||||
|
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
||||||
|
tag = sanitize_tag(args.tag)
|
||||||
|
output_root = Path(args.output_root)
|
||||||
|
bundle_dir = output_root / f"{timestamp}-{tag}"
|
||||||
|
bundle_dir.mkdir(parents=True, exist_ok=False)
|
||||||
|
os.chmod(bundle_dir, 0o700)
|
||||||
|
|
||||||
|
units = [
|
||||||
|
channel_inventory.STACK_UNIT,
|
||||||
|
channel_inventory.DB_UNIT,
|
||||||
|
channel_inventory.DB_READY_UNIT,
|
||||||
|
channel_inventory.AUTH_UNIT,
|
||||||
|
*channel_inventory.get_game_units(),
|
||||||
|
]
|
||||||
|
|
||||||
|
source_repo = RUNTIME_ROOT.parent.parent / "repos" / "m2dev-server-src"
|
||||||
|
runtime_repo = REPO_ROOT
|
||||||
|
meta = {
|
||||||
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"hostname": socket.gethostname(),
|
||||||
|
"runtime_root": str(RUNTIME_ROOT),
|
||||||
|
"output_root": str(output_root),
|
||||||
|
"tag": tag,
|
||||||
|
"since": args.since,
|
||||||
|
"repos": {
|
||||||
|
"m2dev-server": git_summary(runtime_repo),
|
||||||
|
"m2dev-server-src": git_summary(source_repo),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
write_text(bundle_dir / "meta.json", json.dumps(meta, indent=2))
|
||||||
|
|
||||||
|
write_command_output(bundle_dir, "uname.txt", ["uname", "-a"])
|
||||||
|
write_command_output(bundle_dir, "df.txt", ["df", "-h"])
|
||||||
|
write_command_output(bundle_dir, "free.txt", ["free", "-h"], check=False)
|
||||||
|
write_command_output(bundle_dir, "ports.txt", ["ss", "-ltnp"], check=False)
|
||||||
|
write_command_output(bundle_dir, "systemctl-status.txt", ["systemctl", "status", "--no-pager", *units], check=False)
|
||||||
|
|
||||||
|
journal_dir = bundle_dir / "journal"
|
||||||
|
for unit in units:
|
||||||
|
safe_name = unit.replace("@", "_").replace(".", "_")
|
||||||
|
write_command_output(
|
||||||
|
journal_dir,
|
||||||
|
f"{safe_name}.log",
|
||||||
|
["journalctl", "--no-pager", "--since", args.since, "-u", unit],
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
copy_log_tails(bundle_dir)
|
||||||
|
|
||||||
|
core_files = find_core_files()
|
||||||
|
write_core_metadata(bundle_dir, core_files)
|
||||||
|
if args.include_cores and core_files:
|
||||||
|
copy_core_files(bundle_dir, core_files)
|
||||||
|
|
||||||
|
print(bundle_dir)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
@@ -12,6 +12,8 @@ from pathlib import Path
|
|||||||
REPO_ROOT = Path("{{REPO_ROOT}}")
|
REPO_ROOT = Path("{{REPO_ROOT}}")
|
||||||
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
|
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
|
||||||
HEALTHCHECK_PATH = Path("/usr/local/sbin/metin-login-healthcheck")
|
HEALTHCHECK_PATH = Path("/usr/local/sbin/metin-login-healthcheck")
|
||||||
|
INCIDENT_COLLECTOR_PATH = Path("/usr/local/sbin/metin-collect-incident")
|
||||||
|
INCIDENT_ROOT = Path("/var/lib/metin/incidents")
|
||||||
|
|
||||||
sys.path.insert(0, str(REPO_ROOT))
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
@@ -42,6 +44,17 @@ def parse_args() -> argparse.Namespace:
|
|||||||
logs_parser.add_argument("-n", "--lines", type=int, default=100, help="Number of journal lines")
|
logs_parser.add_argument("-n", "--lines", type=int, default=100, help="Number of journal lines")
|
||||||
logs_parser.add_argument("-f", "--follow", action="store_true", help="Follow the journal")
|
logs_parser.add_argument("-f", "--follow", action="store_true", help="Follow the journal")
|
||||||
|
|
||||||
|
cores_parser = subparsers.add_parser("cores", help="List core files under the runtime tree")
|
||||||
|
cores_parser.add_argument("--json", action="store_true", help="Print raw JSON")
|
||||||
|
|
||||||
|
incidents_parser = subparsers.add_parser("incidents", help="List collected incident bundles")
|
||||||
|
incidents_parser.add_argument("--limit", type=int, default=10, help="Maximum number of bundles to show")
|
||||||
|
|
||||||
|
incident_collect = subparsers.add_parser("incident-collect", help="Collect an incident bundle")
|
||||||
|
incident_collect.add_argument("--tag", default="manual", help="Short incident tag")
|
||||||
|
incident_collect.add_argument("--since", default="-30 minutes", help="journalctl --since value")
|
||||||
|
incident_collect.add_argument("--include-cores", action="store_true", help="Copy matching core files into the bundle")
|
||||||
|
|
||||||
subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
|
subparsers.add_parser("healthcheck", help="Run the root-only headless healthcheck")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
@@ -121,6 +134,10 @@ def iter_port_rows() -> list[dict[str, str]]:
|
|||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def iter_core_files() -> list[Path]:
|
||||||
|
return [path for path in sorted(RUNTIME_ROOT.glob("channels/**/core*")) if path.is_file()]
|
||||||
|
|
||||||
|
|
||||||
def live_ports() -> set[int]:
|
def live_ports() -> set[int]:
|
||||||
if shutil.which("ss") is None:
|
if shutil.which("ss") is None:
|
||||||
return set()
|
return set()
|
||||||
@@ -236,6 +253,47 @@ def print_ports(show_live: bool) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def print_cores(as_json: bool) -> int:
|
||||||
|
entries = []
|
||||||
|
for path in iter_core_files():
|
||||||
|
stat = path.stat()
|
||||||
|
entries.append(
|
||||||
|
{
|
||||||
|
"path": str(path),
|
||||||
|
"relative_path": str(path.relative_to(RUNTIME_ROOT)),
|
||||||
|
"size_bytes": stat.st_size,
|
||||||
|
"mtime_epoch": int(stat.st_mtime),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if as_json:
|
||||||
|
print(json.dumps(entries, indent=2))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
print("No core files found under the runtime tree.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
rows = [[entry["relative_path"], str(entry["size_bytes"]), str(entry["mtime_epoch"])] for entry in entries]
|
||||||
|
print_table(["path", "size_bytes", "mtime_epoch"], rows)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def print_incidents(limit: int) -> int:
|
||||||
|
if not INCIDENT_ROOT.exists():
|
||||||
|
print(f"No incident directory: {INCIDENT_ROOT}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
bundles = sorted((path for path in INCIDENT_ROOT.iterdir() if path.is_dir()), reverse=True)[:limit]
|
||||||
|
if not bundles:
|
||||||
|
print(f"No incident bundles in {INCIDENT_ROOT}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
rows = [[bundle.name, str(bundle)] for bundle in bundles]
|
||||||
|
print_table(["bundle", "path"], rows)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def run_unit_action(action: str, target: str) -> int:
|
def run_unit_action(action: str, target: str) -> int:
|
||||||
units = resolve_target_units(target)
|
units = resolve_target_units(target)
|
||||||
run(["systemctl", action, *units], require_root=True)
|
run(["systemctl", action, *units], require_root=True)
|
||||||
@@ -260,6 +318,17 @@ def run_healthcheck() -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def run_incident_collect(tag: str, since: str, include_cores: bool) -> int:
|
||||||
|
if not INCIDENT_COLLECTOR_PATH.exists():
|
||||||
|
raise SystemExit(f"Missing incident collector: {INCIDENT_COLLECTOR_PATH}")
|
||||||
|
|
||||||
|
command = [str(INCIDENT_COLLECTOR_PATH), "--tag", tag, "--since", since]
|
||||||
|
if include_cores:
|
||||||
|
command.append("--include-cores")
|
||||||
|
run(command, require_root=True)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
@@ -271,10 +340,16 @@ def main() -> int:
|
|||||||
return print_status(args.target)
|
return print_status(args.target)
|
||||||
if args.command == "ports":
|
if args.command == "ports":
|
||||||
return print_ports(args.live)
|
return print_ports(args.live)
|
||||||
|
if args.command == "cores":
|
||||||
|
return print_cores(args.json)
|
||||||
|
if args.command == "incidents":
|
||||||
|
return print_incidents(args.limit)
|
||||||
if args.command in {"start", "stop", "restart"}:
|
if args.command in {"start", "stop", "restart"}:
|
||||||
return run_unit_action(args.command, args.target)
|
return run_unit_action(args.command, args.target)
|
||||||
if args.command == "logs":
|
if args.command == "logs":
|
||||||
return run_logs(args.target, args.lines, args.follow)
|
return run_logs(args.target, args.lines, args.follow)
|
||||||
|
if args.command == "incident-collect":
|
||||||
|
return run_incident_collect(args.tag, args.since, args.include_cores)
|
||||||
if args.command == "healthcheck":
|
if args.command == "healthcheck":
|
||||||
return run_healthcheck()
|
return run_healthcheck()
|
||||||
raise SystemExit(f"Unsupported command: {args.command}")
|
raise SystemExit(f"Unsupported command: {args.command}")
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser.add_argument("--systemd-dir", default="/etc/systemd/system", help="systemd unit destination")
|
parser.add_argument("--systemd-dir", default="/etc/systemd/system", help="systemd unit destination")
|
||||||
parser.add_argument("--libexec-dir", default="/usr/local/libexec", help="Helper script destination")
|
parser.add_argument("--libexec-dir", default="/usr/local/libexec", help="Helper script destination")
|
||||||
parser.add_argument("--bin-dir", default="/usr/local/bin", help="Binary/script destination")
|
parser.add_argument("--bin-dir", default="/usr/local/bin", help="Binary/script destination")
|
||||||
|
parser.add_argument("--sbin-dir", default="/usr/local/sbin", help="Root-only binary/script destination")
|
||||||
parser.add_argument("--env-file", default="/etc/metin/metin.env", help="Optional EnvironmentFile path for runtime overrides")
|
parser.add_argument("--env-file", default="/etc/metin/metin.env", help="Optional EnvironmentFile path for runtime overrides")
|
||||||
parser.add_argument("--wait-host", default="127.0.0.1", help="DB readiness host")
|
parser.add_argument("--wait-host", default="127.0.0.1", help="DB readiness host")
|
||||||
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
|
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
|
||||||
@@ -100,6 +101,7 @@ def main() -> int:
|
|||||||
systemd_dir = Path(args.systemd_dir)
|
systemd_dir = Path(args.systemd_dir)
|
||||||
libexec_dir = Path(args.libexec_dir)
|
libexec_dir = Path(args.libexec_dir)
|
||||||
bin_dir = Path(args.bin_dir)
|
bin_dir = Path(args.bin_dir)
|
||||||
|
sbin_dir = Path(args.sbin_dir)
|
||||||
|
|
||||||
selected_channels = resolve_channels(args)
|
selected_channels = resolve_channels(args)
|
||||||
instances = resolve_instances(selected_channels)
|
instances = resolve_instances(selected_channels)
|
||||||
@@ -141,6 +143,11 @@ def main() -> int:
|
|||||||
render_template(BIN_DIR / "metinctl.in", template_values),
|
render_template(BIN_DIR / "metinctl.in", template_values),
|
||||||
0o755,
|
0o755,
|
||||||
)
|
)
|
||||||
|
write_text(
|
||||||
|
sbin_dir / "metin-collect-incident",
|
||||||
|
render_template(BIN_DIR / "metin-collect-incident.in", template_values),
|
||||||
|
0o700,
|
||||||
|
)
|
||||||
|
|
||||||
verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names]
|
verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names]
|
||||||
run(["systemd-analyze", "verify", *verify_units])
|
run(["systemd-analyze", "verify", *verify_units])
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ RestartSec=5
|
|||||||
KillSignal=SIGTERM
|
KillSignal=SIGTERM
|
||||||
TimeoutStopSec=60
|
TimeoutStopSec=60
|
||||||
LimitNOFILE=65535
|
LimitNOFILE=65535
|
||||||
|
LimitCORE=infinity
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
RequiredBy=metin-server.service
|
RequiredBy=metin-server.service
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ RestartSec=5
|
|||||||
KillSignal=SIGTERM
|
KillSignal=SIGTERM
|
||||||
TimeoutStopSec=180
|
TimeoutStopSec=180
|
||||||
LimitNOFILE=65535
|
LimitNOFILE=65535
|
||||||
|
LimitCORE=infinity
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
RequiredBy=metin-server.service
|
RequiredBy=metin-server.service
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ RestartSec=5
|
|||||||
KillSignal=SIGTERM
|
KillSignal=SIGTERM
|
||||||
TimeoutStopSec=60
|
TimeoutStopSec=60
|
||||||
LimitNOFILE=65535
|
LimitNOFILE=65535
|
||||||
|
LimitCORE=infinity
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
RequiredBy=metin-server.service
|
RequiredBy=metin-server.service
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ The Debian deployment installs:
|
|||||||
- listing declared ports
|
- listing declared ports
|
||||||
- restarting the whole stack or specific channels/instances
|
- restarting the whole stack or specific channels/instances
|
||||||
- viewing logs
|
- viewing logs
|
||||||
|
- listing core files in the runtime tree
|
||||||
|
- collecting incident bundles
|
||||||
- running the root-only headless healthcheck
|
- running the root-only headless healthcheck
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
@@ -84,6 +86,24 @@ Run the end-to-end healthcheck:
|
|||||||
metinctl healthcheck
|
metinctl healthcheck
|
||||||
```
|
```
|
||||||
|
|
||||||
|
List core files currently present in the runtime tree:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl cores
|
||||||
|
```
|
||||||
|
|
||||||
|
Collect an incident bundle with logs, unit status, port state and repository revisions:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl incident-collect --tag auth-timeout --since "-20 minutes"
|
||||||
|
```
|
||||||
|
|
||||||
|
List the most recent incident bundles:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
metinctl incidents
|
||||||
|
```
|
||||||
|
|
||||||
## systemd installer behavior
|
## systemd installer behavior
|
||||||
|
|
||||||
`deploy/systemd/install_systemd.py` now uses the same inventory and installs `metinctl`.
|
`deploy/systemd/install_systemd.py` now uses the same inventory and installs `metinctl`.
|
||||||
@@ -95,3 +115,26 @@ It also reconciles enabled game instance units against the selected channels:
|
|||||||
- if `--restart` is passed, stale game units are disabled with `--now`
|
- if `--restart` is passed, stale game units are disabled with `--now`
|
||||||
|
|
||||||
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.
|
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.
|
||||||
|
|
||||||
|
## Crash / Incident Pipeline
|
||||||
|
|
||||||
|
The Debian deployment now also installs:
|
||||||
|
|
||||||
|
- `/usr/local/sbin/metin-collect-incident`
|
||||||
|
|
||||||
|
The collector creates a timestamped bundle under:
|
||||||
|
|
||||||
|
- `/var/lib/metin/incidents`
|
||||||
|
|
||||||
|
Each bundle contains:
|
||||||
|
|
||||||
|
- repo revisions for `m2dev-server` and `m2dev-server-src`
|
||||||
|
- `systemctl status` for the whole stack
|
||||||
|
- recent `journalctl` output per unit
|
||||||
|
- listener state from `ss -ltnp`
|
||||||
|
- tailed runtime `syslog.log` and `syserr.log` files
|
||||||
|
- metadata for any `core*` files found under `runtime/server/channels`
|
||||||
|
|
||||||
|
If you call it with `--include-cores`, matching core files are copied into the bundle as well.
|
||||||
|
|
||||||
|
The runtime units now also declare `LimitCORE=infinity`, so after the next service restart the processes are allowed to emit core dumps when the host kernel/core policy permits it.
|
||||||
|
|||||||
Reference in New Issue
Block a user