ops: snapshot crash executables
This commit is contained in:
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
@@ -106,6 +107,78 @@ def copy_core_files(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||
shutil.copy2(path, destination)
|
||||
|
||||
|
||||
def infer_execfn_from_file_output(core_path: Path) -> Path | None:
|
||||
completed = run(["file", str(core_path)], check=False)
|
||||
if completed.returncode != 0:
|
||||
return None
|
||||
|
||||
match = re.search(r"execfn: '([^']+)'", completed.stdout)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
candidate = Path(match.group(1))
|
||||
if candidate.exists():
|
||||
return candidate.resolve()
|
||||
return None
|
||||
|
||||
|
||||
def infer_executable_for_core(core_path: Path) -> Path | None:
|
||||
execfn_candidate = infer_execfn_from_file_output(core_path)
|
||||
if execfn_candidate:
|
||||
return execfn_candidate
|
||||
|
||||
parent_name = core_path.parent.name
|
||||
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
|
||||
|
||||
if parent_name == "db":
|
||||
candidate = (core_path.parent / "db").resolve()
|
||||
return candidate if candidate.is_file() else None
|
||||
if parent_name == "auth":
|
||||
candidate = (core_path.parent / "game_auth").resolve()
|
||||
return candidate if candidate.is_file() else None
|
||||
if parent_name.startswith("core") and grandparent_name.startswith("channel"):
|
||||
candidate = (core_path.parent / f"{grandparent_name}_{parent_name}").resolve()
|
||||
return candidate if candidate.is_file() else None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def executable_metadata(path: Path) -> dict[str, object]:
|
||||
stat = path.stat()
|
||||
return {
|
||||
"path": str(path),
|
||||
"size_bytes": stat.st_size,
|
||||
"mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def write_core_executable_metadata(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||
rows = []
|
||||
for core_path in core_files:
|
||||
row: dict[str, object] = {"core": str(core_path)}
|
||||
executable = infer_executable_for_core(core_path)
|
||||
if executable:
|
||||
row["executable"] = executable_metadata(executable)
|
||||
else:
|
||||
row["executable"] = None
|
||||
rows.append(row)
|
||||
write_text(bundle_dir / "core-executables.json", json.dumps(rows, indent=2))
|
||||
|
||||
|
||||
def copy_core_executables(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||
executables_dir = bundle_dir / "executables"
|
||||
copied: set[Path] = set()
|
||||
for core_path in core_files:
|
||||
executable = infer_executable_for_core(core_path)
|
||||
if not executable or executable in copied:
|
||||
continue
|
||||
copied.add(executable)
|
||||
relative = executable.relative_to(RUNTIME_ROOT)
|
||||
destination = executables_dir / relative
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(executable, destination)
|
||||
|
||||
|
||||
def git_summary(repo_path: Path) -> dict[str, object]:
|
||||
summary: dict[str, object] = {"path": str(repo_path), "present": repo_path.exists()}
|
||||
if not repo_path.exists():
|
||||
@@ -180,8 +253,10 @@ def main() -> int:
|
||||
|
||||
core_files = find_core_files()
|
||||
write_core_metadata(bundle_dir, core_files)
|
||||
write_core_executable_metadata(bundle_dir, core_files)
|
||||
if args.include_cores and core_files:
|
||||
copy_core_files(bundle_dir, core_files)
|
||||
copy_core_executables(bundle_dir, core_files)
|
||||
|
||||
print(bundle_dir)
|
||||
return 0
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
|
||||
@@ -75,10 +76,15 @@ def infer_executable(core_path: Path, exe_arg: str | None) -> Path:
|
||||
raise SystemExit(f"Executable not found: {exe_path}")
|
||||
return exe_path
|
||||
|
||||
execfn_candidate = infer_execfn_from_file_output(core_path)
|
||||
|
||||
parent_name = core_path.parent.name
|
||||
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
|
||||
|
||||
candidates: list[Path] = []
|
||||
if execfn_candidate:
|
||||
candidates.append(execfn_candidate)
|
||||
|
||||
if parent_name == "db":
|
||||
candidates.append(core_path.parent / "db")
|
||||
elif parent_name == "auth":
|
||||
@@ -86,10 +92,6 @@ def infer_executable(core_path: Path, exe_arg: str | None) -> Path:
|
||||
elif parent_name.startswith("core") and grandparent_name.startswith("channel"):
|
||||
candidates.append(core_path.parent / f"{grandparent_name}_{parent_name}")
|
||||
|
||||
execfn_candidate = infer_execfn_from_file_output(core_path)
|
||||
if execfn_candidate:
|
||||
candidates.append(execfn_candidate)
|
||||
|
||||
for candidate in candidates:
|
||||
if candidate.is_file():
|
||||
return candidate.resolve()
|
||||
@@ -114,6 +116,28 @@ def render_file_info(path: Path) -> str:
|
||||
return format_section(f"file {path}", body)
|
||||
|
||||
|
||||
def render_executable_freshness(core_path: Path, exe_path: Path) -> str:
|
||||
core_stat = core_path.stat()
|
||||
exe_stat = exe_path.stat()
|
||||
core_mtime = datetime.fromtimestamp(core_stat.st_mtime, tz=timezone.utc).isoformat()
|
||||
exe_mtime = datetime.fromtimestamp(exe_stat.st_mtime, tz=timezone.utc).isoformat()
|
||||
|
||||
lines = [
|
||||
f"core_mtime: {core_mtime}",
|
||||
f"exe_mtime: {exe_mtime}",
|
||||
]
|
||||
|
||||
if exe_stat.st_mtime > core_stat.st_mtime + 1:
|
||||
lines.append(
|
||||
"warning: executable is newer than the core file; symbols may not match. "
|
||||
"Prefer an executable snapshot from an incident bundle or pass --exe explicitly."
|
||||
)
|
||||
else:
|
||||
lines.append("status: executable is not newer than the core file")
|
||||
|
||||
return format_section("core/executable freshness", "\n".join(lines))
|
||||
|
||||
|
||||
def render_readelf_notes(core_path: Path) -> str:
|
||||
if not shutil.which("readelf"):
|
||||
return ""
|
||||
@@ -173,6 +197,7 @@ def main() -> int:
|
||||
),
|
||||
render_file_info(core_path),
|
||||
render_file_info(exe_path),
|
||||
render_executable_freshness(core_path, exe_path),
|
||||
]
|
||||
|
||||
readelf_section = render_readelf_notes(core_path)
|
||||
|
||||
@@ -241,8 +241,9 @@ Each bundle contains:
|
||||
- listener state from `ss -ltnp`
|
||||
- tailed runtime `syslog.log` and `syserr.log` files
|
||||
- metadata for any `core*` files found under `runtime/server/channels`
|
||||
- metadata for the executable inferred for each core file
|
||||
|
||||
If you call it with `--include-cores`, matching core files are copied into the bundle as well.
|
||||
If you call it with `--include-cores`, matching core files are copied into the bundle as well. In the same mode, the inferred executable files are copied too, so a later redeploy does not destroy your ability to symbolicate the crash with the original binary snapshot.
|
||||
|
||||
The runtime units now also declare `LimitCORE=infinity`, so after the next service restart the processes are allowed to emit core dumps when the host kernel/core policy permits it.
|
||||
|
||||
@@ -252,4 +253,4 @@ For quick manual crash triage outside the incident bundle flow, use:
|
||||
metinctl backtrace
|
||||
```
|
||||
|
||||
It defaults to the newest core file under the runtime tree, infers the executable path, and uses `gdb` or `lldb` when present on the host. If no supported debugger is installed, it still prints file/readelf metadata for the core and executable.
|
||||
It defaults to the newest core file under the runtime tree, infers the executable path, and uses `gdb` or `lldb` when present on the host. If no supported debugger is installed, it still prints file/readelf metadata for the core and executable. If the current executable is newer than the core file, the helper prints an explicit warning because the backtrace may no longer match the crashed binary.
|
||||
|
||||
Reference in New Issue
Block a user