ops: snapshot crash executables

This commit is contained in:
server
2026-04-14 17:05:11 +02:00
parent 0bc6559283
commit 6f16f66543
3 changed files with 107 additions and 6 deletions

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import argparse import argparse
import json import json
import os import os
import re
import shutil import shutil
import socket import socket
import subprocess import subprocess
@@ -106,6 +107,78 @@ def copy_core_files(bundle_dir: Path, core_files: list[Path]) -> None:
shutil.copy2(path, destination) shutil.copy2(path, destination)
def infer_execfn_from_file_output(core_path: Path) -> Path | None:
completed = run(["file", str(core_path)], check=False)
if completed.returncode != 0:
return None
match = re.search(r"execfn: '([^']+)'", completed.stdout)
if not match:
return None
candidate = Path(match.group(1))
if candidate.exists():
return candidate.resolve()
return None
def infer_executable_for_core(core_path: Path) -> Path | None:
execfn_candidate = infer_execfn_from_file_output(core_path)
if execfn_candidate:
return execfn_candidate
parent_name = core_path.parent.name
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
if parent_name == "db":
candidate = (core_path.parent / "db").resolve()
return candidate if candidate.is_file() else None
if parent_name == "auth":
candidate = (core_path.parent / "game_auth").resolve()
return candidate if candidate.is_file() else None
if parent_name.startswith("core") and grandparent_name.startswith("channel"):
candidate = (core_path.parent / f"{grandparent_name}_{parent_name}").resolve()
return candidate if candidate.is_file() else None
return None
def executable_metadata(path: Path) -> dict[str, object]:
stat = path.stat()
return {
"path": str(path),
"size_bytes": stat.st_size,
"mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
}
def write_core_executable_metadata(bundle_dir: Path, core_files: list[Path]) -> None:
rows = []
for core_path in core_files:
row: dict[str, object] = {"core": str(core_path)}
executable = infer_executable_for_core(core_path)
if executable:
row["executable"] = executable_metadata(executable)
else:
row["executable"] = None
rows.append(row)
write_text(bundle_dir / "core-executables.json", json.dumps(rows, indent=2))
def copy_core_executables(bundle_dir: Path, core_files: list[Path]) -> None:
executables_dir = bundle_dir / "executables"
copied: set[Path] = set()
for core_path in core_files:
executable = infer_executable_for_core(core_path)
if not executable or executable in copied:
continue
copied.add(executable)
relative = executable.relative_to(RUNTIME_ROOT)
destination = executables_dir / relative
destination.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(executable, destination)
def git_summary(repo_path: Path) -> dict[str, object]: def git_summary(repo_path: Path) -> dict[str, object]:
summary: dict[str, object] = {"path": str(repo_path), "present": repo_path.exists()} summary: dict[str, object] = {"path": str(repo_path), "present": repo_path.exists()}
if not repo_path.exists(): if not repo_path.exists():
@@ -180,8 +253,10 @@ def main() -> int:
core_files = find_core_files() core_files = find_core_files()
write_core_metadata(bundle_dir, core_files) write_core_metadata(bundle_dir, core_files)
write_core_executable_metadata(bundle_dir, core_files)
if args.include_cores and core_files: if args.include_cores and core_files:
copy_core_files(bundle_dir, core_files) copy_core_files(bundle_dir, core_files)
copy_core_executables(bundle_dir, core_files)
print(bundle_dir) print(bundle_dir)
return 0 return 0

View File

@@ -6,6 +6,7 @@ import os
import re import re
import shutil import shutil
import subprocess import subprocess
from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}") RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
@@ -75,10 +76,15 @@ def infer_executable(core_path: Path, exe_arg: str | None) -> Path:
raise SystemExit(f"Executable not found: {exe_path}") raise SystemExit(f"Executable not found: {exe_path}")
return exe_path return exe_path
execfn_candidate = infer_execfn_from_file_output(core_path)
parent_name = core_path.parent.name parent_name = core_path.parent.name
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else "" grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
candidates: list[Path] = [] candidates: list[Path] = []
if execfn_candidate:
candidates.append(execfn_candidate)
if parent_name == "db": if parent_name == "db":
candidates.append(core_path.parent / "db") candidates.append(core_path.parent / "db")
elif parent_name == "auth": elif parent_name == "auth":
@@ -86,10 +92,6 @@ def infer_executable(core_path: Path, exe_arg: str | None) -> Path:
elif parent_name.startswith("core") and grandparent_name.startswith("channel"): elif parent_name.startswith("core") and grandparent_name.startswith("channel"):
candidates.append(core_path.parent / f"{grandparent_name}_{parent_name}") candidates.append(core_path.parent / f"{grandparent_name}_{parent_name}")
execfn_candidate = infer_execfn_from_file_output(core_path)
if execfn_candidate:
candidates.append(execfn_candidate)
for candidate in candidates: for candidate in candidates:
if candidate.is_file(): if candidate.is_file():
return candidate.resolve() return candidate.resolve()
@@ -114,6 +116,28 @@ def render_file_info(path: Path) -> str:
return format_section(f"file {path}", body) return format_section(f"file {path}", body)
def render_executable_freshness(core_path: Path, exe_path: Path) -> str:
core_stat = core_path.stat()
exe_stat = exe_path.stat()
core_mtime = datetime.fromtimestamp(core_stat.st_mtime, tz=timezone.utc).isoformat()
exe_mtime = datetime.fromtimestamp(exe_stat.st_mtime, tz=timezone.utc).isoformat()
lines = [
f"core_mtime: {core_mtime}",
f"exe_mtime: {exe_mtime}",
]
if exe_stat.st_mtime > core_stat.st_mtime + 1:
lines.append(
"warning: executable is newer than the core file; symbols may not match. "
"Prefer an executable snapshot from an incident bundle or pass --exe explicitly."
)
else:
lines.append("status: executable is not newer than the core file")
return format_section("core/executable freshness", "\n".join(lines))
def render_readelf_notes(core_path: Path) -> str: def render_readelf_notes(core_path: Path) -> str:
if not shutil.which("readelf"): if not shutil.which("readelf"):
return "" return ""
@@ -173,6 +197,7 @@ def main() -> int:
), ),
render_file_info(core_path), render_file_info(core_path),
render_file_info(exe_path), render_file_info(exe_path),
render_executable_freshness(core_path, exe_path),
] ]
readelf_section = render_readelf_notes(core_path) readelf_section = render_readelf_notes(core_path)

View File

@@ -241,8 +241,9 @@ Each bundle contains:
- listener state from `ss -ltnp` - listener state from `ss -ltnp`
- tailed runtime `syslog.log` and `syserr.log` files - tailed runtime `syslog.log` and `syserr.log` files
- metadata for any `core*` files found under `runtime/server/channels` - metadata for any `core*` files found under `runtime/server/channels`
- metadata for the executable inferred for each core file
If you call it with `--include-cores`, matching core files are copied into the bundle as well. If you call it with `--include-cores`, matching core files are copied into the bundle as well. In the same mode, the inferred executable files are copied too, so a later redeploy does not destroy your ability to symbolicate the crash with the original binary snapshot.
The runtime units now also declare `LimitCORE=infinity`, so after the next service restart the processes are allowed to emit core dumps when the host kernel/core policy permits it. The runtime units now also declare `LimitCORE=infinity`, so after the next service restart the processes are allowed to emit core dumps when the host kernel/core policy permits it.
@@ -252,4 +253,4 @@ For quick manual crash triage outside the incident bundle flow, use:
metinctl backtrace metinctl backtrace
``` ```
It defaults to the newest core file under the runtime tree, infers the executable path, and uses `gdb` or `lldb` when present on the host. If no supported debugger is installed, it still prints file/readelf metadata for the core and executable. It defaults to the newest core file under the runtime tree, infers the executable path, and uses `gdb` or `lldb` when present on the host. If no supported debugger is installed, it still prints file/readelf metadata for the core and executable. If the current executable is newer than the core file, the helper prints an explicit warning because the backtrace may no longer match the crashed binary.