Compare commits
2 Commits
main
...
claude/ser
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2b55949de | ||
|
|
5b65928007 |
@@ -51,35 +51,6 @@ def get_channel_ids() -> list[int]:
|
||||
return [int(channel["id"]) for channel in iter_channels()]
|
||||
|
||||
|
||||
def get_public_channel_ids(
|
||||
selected_channel_ids: Iterable[int] | None = None,
|
||||
*,
|
||||
client_visible_only: bool = False,
|
||||
) -> list[int]:
|
||||
selected = None if selected_channel_ids is None else {int(channel_id) for channel_id in selected_channel_ids}
|
||||
result: list[int] = []
|
||||
|
||||
for channel in iter_channels():
|
||||
channel_id = int(channel["id"])
|
||||
if selected is not None and channel_id not in selected:
|
||||
continue
|
||||
if not channel.get("public"):
|
||||
continue
|
||||
if client_visible_only and not channel.get("client_visible"):
|
||||
continue
|
||||
result.append(channel_id)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def has_public_channel(
|
||||
selected_channel_ids: Iterable[int] | None = None,
|
||||
*,
|
||||
client_visible_only: bool = False,
|
||||
) -> bool:
|
||||
return bool(get_public_channel_ids(selected_channel_ids, client_visible_only=client_visible_only))
|
||||
|
||||
|
||||
def get_channel_map() -> dict[int, dict[int, str]]:
|
||||
result: dict[int, dict[int, str]] = {}
|
||||
for channel in iter_channels():
|
||||
|
||||
@@ -8,38 +8,6 @@ if [[ "${EUID}" -ne 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
MODE="full"
|
||||
|
||||
while (($#)); do
|
||||
case "$1" in
|
||||
--mode)
|
||||
shift
|
||||
if (($# == 0)); then
|
||||
echo "Missing value for --mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
MODE="$1"
|
||||
;;
|
||||
--mode=*)
|
||||
MODE="${1#*=}"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
case "${MODE}" in
|
||||
ready|full)
|
||||
;;
|
||||
*)
|
||||
echo "Unsupported mode: ${MODE} (expected ready or full)" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
: "${RUN_AS_USER:=mt2.jakubkadlec.dev}"
|
||||
: "${SERVER_HOST:=173.249.9.66}"
|
||||
: "${AUTH_PORT:=11000}"
|
||||
@@ -58,26 +26,19 @@ if ! id "${RUN_AS_USER}" >/dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
unique_suffix() {
|
||||
printf '%s%s' "$(date +%s%N | tail -c 9)" "$(openssl rand -hex 2)"
|
||||
}
|
||||
|
||||
DELETE_SUFFIX="$(unique_suffix)"
|
||||
FULL_SUFFIX="$(unique_suffix)"
|
||||
|
||||
DELETE_LOGIN="smkd${DELETE_SUFFIX}"
|
||||
DELETE_LOGIN="smkdel$(date +%s)"
|
||||
DELETE_PASSWORD="$(openssl rand -hex 6)"
|
||||
DELETE_SOCIAL_ID="$(date +%s%N | tail -c 14)"
|
||||
DELETE_EMAIL="${DELETE_LOGIN}@example.invalid"
|
||||
DELETE_CHARACTER_NAME="d${DELETE_SUFFIX}"
|
||||
DELETE_CHARACTER_NAME="c${DELETE_LOGIN}"
|
||||
DELETE_PRIVATE_CODE="${DELETE_SOCIAL_ID: -7}"
|
||||
DELETE_ACCOUNT_ID=""
|
||||
|
||||
FULL_LOGIN="smkf${FULL_SUFFIX}"
|
||||
FULL_LOGIN="smkfull$(date +%s)"
|
||||
FULL_PASSWORD="$(openssl rand -hex 6)"
|
||||
FULL_SOCIAL_ID="$(date +%s%N | tail -c 14)"
|
||||
FULL_EMAIL="${FULL_LOGIN}@example.invalid"
|
||||
FULL_CHARACTER_NAME="f${FULL_SUFFIX}"
|
||||
FULL_CHARACTER_NAME="c${FULL_LOGIN}"
|
||||
FULL_ACCOUNT_ID=""
|
||||
|
||||
cleanup_account() {
|
||||
@@ -123,8 +84,6 @@ create_account() {
|
||||
local social_id="$3"
|
||||
local email="$4"
|
||||
|
||||
cleanup_account "" "${login}"
|
||||
|
||||
mysql -N account <<SQL
|
||||
INSERT INTO account (
|
||||
login,
|
||||
@@ -197,13 +156,12 @@ cleanup() {
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
FULL_ACCOUNT_ID="$(create_account "${FULL_LOGIN}" "${FULL_PASSWORD}" "${FULL_SOCIAL_ID}" "${FULL_EMAIL}")"
|
||||
create_player_index "${FULL_ACCOUNT_ID}"
|
||||
|
||||
if [[ "${MODE}" == "full" ]]; then
|
||||
DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")"
|
||||
create_player_index "${DELETE_ACCOUNT_ID}"
|
||||
|
||||
FULL_ACCOUNT_ID="$(create_account "${FULL_LOGIN}" "${FULL_PASSWORD}" "${FULL_SOCIAL_ID}" "${FULL_EMAIL}")"
|
||||
create_player_index "${FULL_ACCOUNT_ID}"
|
||||
|
||||
echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}"
|
||||
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \
|
||||
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \
|
||||
@@ -211,21 +169,13 @@ if [[ "${MODE}" == "full" ]]; then
|
||||
--create-character-name="${DELETE_CHARACTER_NAME}" \
|
||||
--delete-private-code="${DELETE_PRIVATE_CODE}" \
|
||||
--client-version="${CLIENT_VERSION}"
|
||||
fi
|
||||
|
||||
echo "Running ${MODE} login healthcheck for temporary account ${FULL_LOGIN}"
|
||||
FULL_ARGS=(
|
||||
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}"
|
||||
--password-env=METIN_LOGIN_SMOKE_PASSWORD
|
||||
--create-character-name="${FULL_CHARACTER_NAME}"
|
||||
--client-version="${CLIENT_VERSION}"
|
||||
)
|
||||
|
||||
if [[ "${MODE}" == "full" ]]; then
|
||||
FULL_ARGS+=(--mall-password="${MALL_PASSWORD}")
|
||||
fi
|
||||
|
||||
echo "Running full login healthcheck for temporary account ${FULL_LOGIN}"
|
||||
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${FULL_PASSWORD}" \
|
||||
"${FULL_ARGS[@]}"
|
||||
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}" \
|
||||
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
|
||||
--create-character-name="${FULL_CHARACTER_NAME}" \
|
||||
--client-version="${CLIENT_VERSION}" \
|
||||
--mall-password="${MALL_PASSWORD}"
|
||||
|
||||
echo "${MODE^} login healthcheck passed"
|
||||
echo "Login healthcheck passed"
|
||||
|
||||
@@ -20,8 +20,6 @@ python3 deploy/systemd/install_systemd.py \
|
||||
|
||||
`--channel-limit 1` is also supported and will auto-include channel `99` when present in the channel inventory.
|
||||
|
||||
By default the installer refuses channel selections that omit every client-visible public channel. If you intentionally want an auth/internal-only stack, pass `--allow-internal-only`.
|
||||
|
||||
The channel selection and port layout now come from the versioned inventory file:
|
||||
|
||||
- [deploy/channel-inventory.json](../channel-inventory.json)
|
||||
@@ -37,7 +35,6 @@ The channel selection and port layout now come from the versioned inventory file
|
||||
- `/usr/local/libexec/metin-wait-port`
|
||||
- `/usr/local/bin/metinctl`
|
||||
- `/usr/local/sbin/metin-collect-incident`
|
||||
- `/usr/local/sbin/metin-core-backtrace`
|
||||
|
||||
The `metin-db-ready.service` gate waits until the DB socket is actually accepting connections before `auth` and `game` units start.
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
@@ -107,78 +106,6 @@ def copy_core_files(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||
shutil.copy2(path, destination)
|
||||
|
||||
|
||||
def infer_execfn_from_file_output(core_path: Path) -> Path | None:
|
||||
completed = run(["file", str(core_path)], check=False)
|
||||
if completed.returncode != 0:
|
||||
return None
|
||||
|
||||
match = re.search(r"execfn: '([^']+)'", completed.stdout)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
candidate = Path(match.group(1))
|
||||
if candidate.exists():
|
||||
return candidate.resolve()
|
||||
return None
|
||||
|
||||
|
||||
def infer_executable_for_core(core_path: Path) -> Path | None:
|
||||
execfn_candidate = infer_execfn_from_file_output(core_path)
|
||||
if execfn_candidate:
|
||||
return execfn_candidate
|
||||
|
||||
parent_name = core_path.parent.name
|
||||
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
|
||||
|
||||
if parent_name == "db":
|
||||
candidate = (core_path.parent / "db").resolve()
|
||||
return candidate if candidate.is_file() else None
|
||||
if parent_name == "auth":
|
||||
candidate = (core_path.parent / "game_auth").resolve()
|
||||
return candidate if candidate.is_file() else None
|
||||
if parent_name.startswith("core") and grandparent_name.startswith("channel"):
|
||||
candidate = (core_path.parent / f"{grandparent_name}_{parent_name}").resolve()
|
||||
return candidate if candidate.is_file() else None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def executable_metadata(path: Path) -> dict[str, object]:
|
||||
stat = path.stat()
|
||||
return {
|
||||
"path": str(path),
|
||||
"size_bytes": stat.st_size,
|
||||
"mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
|
||||
def write_core_executable_metadata(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||
rows = []
|
||||
for core_path in core_files:
|
||||
row: dict[str, object] = {"core": str(core_path)}
|
||||
executable = infer_executable_for_core(core_path)
|
||||
if executable:
|
||||
row["executable"] = executable_metadata(executable)
|
||||
else:
|
||||
row["executable"] = None
|
||||
rows.append(row)
|
||||
write_text(bundle_dir / "core-executables.json", json.dumps(rows, indent=2))
|
||||
|
||||
|
||||
def copy_core_executables(bundle_dir: Path, core_files: list[Path]) -> None:
|
||||
executables_dir = bundle_dir / "executables"
|
||||
copied: set[Path] = set()
|
||||
for core_path in core_files:
|
||||
executable = infer_executable_for_core(core_path)
|
||||
if not executable or executable in copied:
|
||||
continue
|
||||
copied.add(executable)
|
||||
relative = executable.relative_to(RUNTIME_ROOT)
|
||||
destination = executables_dir / relative
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(executable, destination)
|
||||
|
||||
|
||||
def git_summary(repo_path: Path) -> dict[str, object]:
|
||||
summary: dict[str, object] = {"path": str(repo_path), "present": repo_path.exists()}
|
||||
if not repo_path.exists():
|
||||
@@ -253,10 +180,8 @@ def main() -> int:
|
||||
|
||||
core_files = find_core_files()
|
||||
write_core_metadata(bundle_dir, core_files)
|
||||
write_core_executable_metadata(bundle_dir, core_files)
|
||||
if args.include_cores and core_files:
|
||||
copy_core_files(bundle_dir, core_files)
|
||||
copy_core_executables(bundle_dir, core_files)
|
||||
|
||||
print(bundle_dir)
|
||||
return 0
|
||||
|
||||
@@ -1,222 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Generate a backtrace for a Metin runtime core file")
|
||||
parser.add_argument("--core", help="Core file path. Defaults to the newest core under the runtime tree.")
|
||||
parser.add_argument("--exe", help="Executable path override. If omitted, infer it from the core path.")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def ensure_root() -> None:
|
||||
if os.geteuid() != 0:
|
||||
raise SystemExit("Run as root.")
|
||||
|
||||
|
||||
def run(command: list[str], check: bool = False) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(command, check=check, capture_output=True, text=True)
|
||||
|
||||
|
||||
def iter_core_files() -> list[Path]:
|
||||
return sorted(
|
||||
(path for path in RUNTIME_ROOT.glob("channels/**/core*") if path.is_file()),
|
||||
key=lambda path: path.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
|
||||
def resolve_core_path(core_arg: str | None) -> Path:
|
||||
if core_arg:
|
||||
candidate = Path(core_arg)
|
||||
if not candidate.is_absolute():
|
||||
runtime_relative = RUNTIME_ROOT / core_arg
|
||||
if runtime_relative.exists():
|
||||
candidate = runtime_relative
|
||||
candidate = candidate.resolve()
|
||||
if not candidate.is_file():
|
||||
raise SystemExit(f"Core file not found: {candidate}")
|
||||
return candidate
|
||||
|
||||
cores = iter_core_files()
|
||||
if not cores:
|
||||
raise SystemExit(f"No core files found under {RUNTIME_ROOT}")
|
||||
return cores[0]
|
||||
|
||||
|
||||
def infer_execfn_from_file_output(core_path: Path) -> Path | None:
|
||||
completed = run(["file", str(core_path)])
|
||||
if completed.returncode != 0:
|
||||
return None
|
||||
|
||||
match = re.search(r"execfn: '([^']+)'", completed.stdout)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
candidate = Path(match.group(1))
|
||||
if candidate.is_file():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def infer_executable(core_path: Path, exe_arg: str | None) -> Path:
|
||||
if exe_arg:
|
||||
exe_path = Path(exe_arg).resolve()
|
||||
if not exe_path.is_file():
|
||||
raise SystemExit(f"Executable not found: {exe_path}")
|
||||
return exe_path
|
||||
|
||||
execfn_candidate = infer_execfn_from_file_output(core_path)
|
||||
|
||||
parent_name = core_path.parent.name
|
||||
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
|
||||
|
||||
candidates: list[Path] = []
|
||||
if execfn_candidate:
|
||||
candidates.append(execfn_candidate)
|
||||
|
||||
if parent_name == "db":
|
||||
candidates.append(core_path.parent / "db")
|
||||
elif parent_name == "auth":
|
||||
candidates.append(core_path.parent / "game_auth")
|
||||
elif parent_name.startswith("core") and grandparent_name.startswith("channel"):
|
||||
candidates.append(core_path.parent / f"{grandparent_name}_{parent_name}")
|
||||
|
||||
for candidate in candidates:
|
||||
if candidate.is_file():
|
||||
return candidate.resolve()
|
||||
|
||||
raise SystemExit(f"Could not infer executable for core file: {core_path}")
|
||||
|
||||
|
||||
def preferred_debugger() -> str | None:
|
||||
for tool in ("gdb", "lldb"):
|
||||
if shutil.which(tool):
|
||||
return tool
|
||||
return None
|
||||
|
||||
|
||||
def format_section(title: str, body: str) -> str:
|
||||
return f"== {title} ==\n{body.rstrip()}\n"
|
||||
|
||||
|
||||
def render_file_info(path: Path) -> str:
|
||||
completed = run(["file", str(path)])
|
||||
body = completed.stdout or completed.stderr or "<no output>"
|
||||
return format_section(f"file {path}", body)
|
||||
|
||||
|
||||
def render_executable_freshness(core_path: Path, exe_path: Path) -> str:
|
||||
core_stat = core_path.stat()
|
||||
exe_stat = exe_path.stat()
|
||||
core_mtime = datetime.fromtimestamp(core_stat.st_mtime, tz=timezone.utc).isoformat()
|
||||
exe_mtime = datetime.fromtimestamp(exe_stat.st_mtime, tz=timezone.utc).isoformat()
|
||||
|
||||
lines = [
|
||||
f"core_mtime: {core_mtime}",
|
||||
f"exe_mtime: {exe_mtime}",
|
||||
]
|
||||
|
||||
if exe_stat.st_mtime > core_stat.st_mtime + 1:
|
||||
lines.append(
|
||||
"warning: executable is newer than the core file; symbols may not match. "
|
||||
"Prefer an executable snapshot from an incident bundle or pass --exe explicitly."
|
||||
)
|
||||
else:
|
||||
lines.append("status: executable is not newer than the core file")
|
||||
|
||||
return format_section("core/executable freshness", "\n".join(lines))
|
||||
|
||||
|
||||
def render_readelf_notes(core_path: Path) -> str:
|
||||
if not shutil.which("readelf"):
|
||||
return ""
|
||||
completed = run(["readelf", "-n", str(core_path)])
|
||||
body = completed.stdout or completed.stderr or "<no output>"
|
||||
return format_section(f"readelf -n {core_path}", body)
|
||||
|
||||
|
||||
def render_debugger_backtrace(debugger: str, exe_path: Path, core_path: Path) -> str:
|
||||
if debugger == "gdb":
|
||||
command = [
|
||||
"gdb",
|
||||
"-batch",
|
||||
"-ex",
|
||||
"set pagination off",
|
||||
"-ex",
|
||||
"thread apply all bt full",
|
||||
str(exe_path),
|
||||
str(core_path),
|
||||
]
|
||||
elif debugger == "lldb":
|
||||
command = [
|
||||
"lldb",
|
||||
"--batch",
|
||||
"-o",
|
||||
"thread backtrace all",
|
||||
"-c",
|
||||
str(core_path),
|
||||
str(exe_path),
|
||||
]
|
||||
else:
|
||||
raise SystemExit(f"Unsupported debugger: {debugger}")
|
||||
|
||||
completed = run(command)
|
||||
output = completed.stdout or completed.stderr or "<no output>"
|
||||
return format_section("backtrace", f"$ {' '.join(command)}\n\n{output}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
ensure_root()
|
||||
|
||||
core_path = resolve_core_path(args.core)
|
||||
exe_path = infer_executable(core_path, args.exe)
|
||||
debugger = preferred_debugger()
|
||||
|
||||
sections = [
|
||||
format_section(
|
||||
"summary",
|
||||
"\n".join(
|
||||
[
|
||||
f"core: {core_path}",
|
||||
f"executable: {exe_path}",
|
||||
f"debugger: {debugger or '<none>'}",
|
||||
]
|
||||
),
|
||||
),
|
||||
render_file_info(core_path),
|
||||
render_file_info(exe_path),
|
||||
render_executable_freshness(core_path, exe_path),
|
||||
]
|
||||
|
||||
readelf_section = render_readelf_notes(core_path)
|
||||
if readelf_section:
|
||||
sections.append(readelf_section)
|
||||
|
||||
if debugger:
|
||||
sections.append(render_debugger_backtrace(debugger, exe_path, core_path))
|
||||
else:
|
||||
sections.append(
|
||||
format_section(
|
||||
"backtrace",
|
||||
"No supported debugger found. Install gdb or lldb on the host to generate a stack trace.",
|
||||
)
|
||||
)
|
||||
|
||||
print("\n".join(section.rstrip() for section in sections if section).rstrip())
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,6 @@ import channel_inventory
|
||||
|
||||
TEMPLATES_DIR = SCRIPT_DIR / "templates"
|
||||
BIN_DIR = SCRIPT_DIR / "bin"
|
||||
HEALTHCHECK_DIR = REPO_ROOT / "deploy" / "healthcheck"
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
@@ -32,11 +31,6 @@ def parse_args() -> argparse.Namespace:
|
||||
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
|
||||
parser.add_argument("--wait-timeout", type=int, default=30, help="DB readiness timeout in seconds")
|
||||
parser.add_argument("--restart", action="store_true", help="Restart metin-server.service after install")
|
||||
parser.add_argument(
|
||||
"--allow-internal-only",
|
||||
action="store_true",
|
||||
help="Allow installs that omit every client-visible public channel",
|
||||
)
|
||||
|
||||
channel_group = parser.add_mutually_exclusive_group(required=True)
|
||||
channel_group.add_argument(
|
||||
@@ -81,7 +75,7 @@ def copy_file(source: Path, destination: Path, mode: int) -> None:
|
||||
|
||||
def resolve_channels(args: argparse.Namespace) -> list[int]:
|
||||
try:
|
||||
selected_channels = channel_inventory.resolve_selected_channels(
|
||||
return channel_inventory.resolve_selected_channels(
|
||||
channel_limit=args.channel_limit,
|
||||
explicit_channels=args.channels,
|
||||
)
|
||||
@@ -89,20 +83,6 @@ def resolve_channels(args: argparse.Namespace) -> list[int]:
|
||||
print(str(exc), file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
if not args.allow_internal_only and not channel_inventory.has_public_channel(
|
||||
selected_channels,
|
||||
client_visible_only=True,
|
||||
):
|
||||
print(
|
||||
"Selected channels do not include any client-visible public channel. "
|
||||
"Add a public channel such as --channel 1, or pass --allow-internal-only "
|
||||
"if an auth/internal-only stack is intentional.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
return selected_channels
|
||||
|
||||
|
||||
def resolve_instances(selected_channels: list[int]) -> list[str]:
|
||||
return channel_inventory.get_instances(selected_channels)
|
||||
@@ -168,16 +148,6 @@ def main() -> int:
|
||||
render_template(BIN_DIR / "metin-collect-incident.in", template_values),
|
||||
0o700,
|
||||
)
|
||||
write_text(
|
||||
sbin_dir / "metin-core-backtrace",
|
||||
render_template(BIN_DIR / "metin-core-backtrace.in", template_values),
|
||||
0o700,
|
||||
)
|
||||
copy_file(
|
||||
HEALTHCHECK_DIR / "metin-login-healthcheck.sh",
|
||||
sbin_dir / "metin-login-healthcheck",
|
||||
0o700,
|
||||
)
|
||||
|
||||
verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names]
|
||||
run(["systemd-analyze", "verify", *verify_units])
|
||||
|
||||
@@ -19,12 +19,7 @@ Installed on the VPS:
|
||||
|
||||
## What The Headless Healthcheck Verifies
|
||||
|
||||
The installed wrapper supports two modes:
|
||||
|
||||
- `--mode ready`
|
||||
- `--mode full`
|
||||
|
||||
The full mode performs two headless passes against the live server:
|
||||
The installed wrapper now performs two headless passes against the live server:
|
||||
|
||||
1. a select-screen create/delete pass
|
||||
2. a full auth + channel + `ENTERGAME` + mall pass
|
||||
@@ -53,7 +48,7 @@ This is an end-to-end gameplay-path verification, not just a TCP port check.
|
||||
|
||||
## How The Wrapper Works
|
||||
|
||||
`metin-login-healthcheck.sh --mode full` does the following:
|
||||
`metin-login-healthcheck.sh` does the following:
|
||||
|
||||
- creates two temporary accounts in MariaDB
|
||||
- runs `metin_login_smoke` once in create/delete mode on the select screen
|
||||
@@ -63,15 +58,6 @@ This is an end-to-end gameplay-path verification, not just a TCP port check.
|
||||
- deletes both temporary accounts and any temporary character rows on exit
|
||||
- passes the configured client version expected by the server
|
||||
|
||||
`metin-login-healthcheck.sh --mode ready` is intentionally lighter:
|
||||
|
||||
- creates one temporary account in MariaDB
|
||||
- runs one headless login flow through auth + channel + character create + select + `ENTERGAME`
|
||||
- does not run the delete pass
|
||||
- does not open the mall
|
||||
|
||||
This mode is the right readiness probe immediately after a service restart. It verifies that the server is login-ready without depending on the deeper post-login mall path.
|
||||
|
||||
It is intended for manual admin use on the VPS.
|
||||
|
||||
## Usage
|
||||
@@ -83,12 +69,6 @@ ssh mt2
|
||||
/usr/local/sbin/metin-login-healthcheck
|
||||
```
|
||||
|
||||
Readiness-only mode:
|
||||
|
||||
```bash
|
||||
/usr/local/sbin/metin-login-healthcheck --mode ready
|
||||
```
|
||||
|
||||
The smoke binary can also be run directly:
|
||||
|
||||
```bash
|
||||
@@ -129,19 +109,6 @@ Useful direct flags:
|
||||
- `--mall-password=PASSWORD`
|
||||
after `ENTERGAME`, opens the in-game mall via encrypted chat command and verifies `MALL_OPEN`
|
||||
|
||||
Operational CLI:
|
||||
|
||||
```bash
|
||||
metinctl public-ready
|
||||
metinctl healthcheck --mode full
|
||||
metinctl healthcheck --mode ready
|
||||
metinctl wait-ready
|
||||
```
|
||||
|
||||
`metinctl public-ready` verifies that every enabled client-visible public channel unit is active and that its declared listener port is actually up.
|
||||
|
||||
`metinctl wait-ready` now first waits for the public runtime to be up and only then runs the lighter `ready` login probe. The deeper `full` mode remains available as an explicit admin healthcheck.
|
||||
|
||||
Example negative auth test:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -32,26 +32,15 @@ The Debian deployment installs:
|
||||
|
||||
`metinctl` is a lightweight operational CLI for:
|
||||
|
||||
- showing an operational summary
|
||||
- showing recent auth success/failure activity
|
||||
- showing auth activity grouped by source IP
|
||||
- showing recent `syserr.log` entries
|
||||
- summarizing recurring `syserr.log` entries
|
||||
- viewing inventory
|
||||
- listing managed units
|
||||
- checking service status
|
||||
- listing declared ports
|
||||
- verifying that enabled public client-facing channels are actually up
|
||||
- listing recent auth failures
|
||||
- listing recent login sessions
|
||||
- listing stale open sessions without logout
|
||||
- restarting the whole stack or specific channels/instances
|
||||
- viewing logs
|
||||
- listing core files in the runtime tree
|
||||
- generating a backtrace for the newest or selected core file
|
||||
- collecting incident bundles
|
||||
- running the root-only headless healthcheck
|
||||
- waiting for login-ready state after restart
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -67,90 +56,12 @@ Show current unit state:
|
||||
metinctl status
|
||||
```
|
||||
|
||||
Show a quick operational summary:
|
||||
|
||||
```bash
|
||||
metinctl summary
|
||||
```
|
||||
|
||||
Show declared ports and whether they are currently listening:
|
||||
|
||||
```bash
|
||||
metinctl ports --live
|
||||
```
|
||||
|
||||
Verify that enabled client-visible public channels are active and listening:
|
||||
|
||||
```bash
|
||||
metinctl public-ready
|
||||
```
|
||||
|
||||
Show recent real auth failures and skip smoke-test logins:
|
||||
|
||||
```bash
|
||||
metinctl auth-failures
|
||||
```
|
||||
|
||||
Show recent auth success/failure flow:
|
||||
|
||||
```bash
|
||||
metinctl auth-activity
|
||||
```
|
||||
|
||||
Show only recent auth failures including smoke tests:
|
||||
|
||||
```bash
|
||||
metinctl auth-activity --status failure --include-smoke
|
||||
```
|
||||
|
||||
Show auth activity grouped by IP:
|
||||
|
||||
```bash
|
||||
metinctl auth-ips
|
||||
```
|
||||
|
||||
Show the latest runtime errors collected from all `syserr.log` files:
|
||||
|
||||
```bash
|
||||
metinctl recent-errors
|
||||
```
|
||||
|
||||
Show the most repeated runtime errors in the last 24 hours:
|
||||
|
||||
```bash
|
||||
metinctl error-summary
|
||||
```
|
||||
|
||||
Include smoke-test failures too:
|
||||
|
||||
```bash
|
||||
metinctl auth-failures --include-smoke
|
||||
```
|
||||
|
||||
Show recent login sessions from `log.loginlog2`:
|
||||
|
||||
```bash
|
||||
metinctl sessions
|
||||
```
|
||||
|
||||
Show only sessions that still have no recorded logout:
|
||||
|
||||
```bash
|
||||
metinctl sessions --active-only
|
||||
```
|
||||
|
||||
Show stale open sessions older than 30 minutes:
|
||||
|
||||
```bash
|
||||
metinctl session-audit
|
||||
```
|
||||
|
||||
Use a different stale threshold:
|
||||
|
||||
```bash
|
||||
metinctl session-audit --stale-minutes 10
|
||||
```
|
||||
|
||||
Restart only channel 1 cores:
|
||||
|
||||
```bash
|
||||
@@ -169,22 +80,10 @@ Tail auth logs:
|
||||
metinctl logs auth -n 200 -f
|
||||
```
|
||||
|
||||
Run the deeper end-to-end healthcheck:
|
||||
Run the end-to-end healthcheck:
|
||||
|
||||
```bash
|
||||
metinctl healthcheck --mode full
|
||||
```
|
||||
|
||||
Run the lighter readiness probe:
|
||||
|
||||
```bash
|
||||
metinctl healthcheck --mode ready
|
||||
```
|
||||
|
||||
Wait until a restarted stack is login-ready:
|
||||
|
||||
```bash
|
||||
metinctl wait-ready
|
||||
metinctl healthcheck
|
||||
```
|
||||
|
||||
List core files currently present in the runtime tree:
|
||||
@@ -193,18 +92,6 @@ List core files currently present in the runtime tree:
|
||||
metinctl cores
|
||||
```
|
||||
|
||||
Generate a backtrace for the newest core file:
|
||||
|
||||
```bash
|
||||
metinctl backtrace
|
||||
```
|
||||
|
||||
Generate a backtrace for one specific core file:
|
||||
|
||||
```bash
|
||||
metinctl backtrace --core channels/channel1/core1/core.2255450
|
||||
```
|
||||
|
||||
Collect an incident bundle with logs, unit status, port state and repository revisions:
|
||||
|
||||
```bash
|
||||
@@ -226,7 +113,6 @@ It also reconciles enabled game instance units against the selected channels:
|
||||
- selected game units are enabled
|
||||
- stale game units are disabled
|
||||
- if `--restart` is passed, stale game units are disabled with `--now`
|
||||
- installs now refuse an auth/internal-only channel selection unless you pass `--allow-internal-only`
|
||||
|
||||
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.
|
||||
|
||||
@@ -235,7 +121,6 @@ This makes channel enablement declarative instead of depending on whatever happe
|
||||
The Debian deployment now also installs:
|
||||
|
||||
- `/usr/local/sbin/metin-collect-incident`
|
||||
- `/usr/local/sbin/metin-core-backtrace`
|
||||
|
||||
The collector creates a timestamped bundle under:
|
||||
|
||||
@@ -249,16 +134,7 @@ Each bundle contains:
|
||||
- listener state from `ss -ltnp`
|
||||
- tailed runtime `syslog.log` and `syserr.log` files
|
||||
- metadata for any `core*` files found under `runtime/server/channels`
|
||||
- metadata for the executable inferred for each core file
|
||||
|
||||
If you call it with `--include-cores`, matching core files are copied into the bundle as well. In the same mode, the inferred executable files are copied too, so a later redeploy does not destroy your ability to symbolicate the crash with the original binary snapshot.
|
||||
If you call it with `--include-cores`, matching core files are copied into the bundle as well.
|
||||
|
||||
The runtime units now also declare `LimitCORE=infinity`, so after the next service restart the processes are allowed to emit core dumps when the host kernel/core policy permits it.
|
||||
|
||||
For quick manual crash triage outside the incident bundle flow, use:
|
||||
|
||||
```bash
|
||||
metinctl backtrace
|
||||
```
|
||||
|
||||
It defaults to the newest core file under the runtime tree, infers the executable path, and uses `gdb` or `lldb` when present on the host. If no supported debugger is installed, it still prints file/readelf metadata for the core and executable. If the current executable is newer than the core file, the helper prints an explicit warning because the backtrace may no longer match the crashed binary.
|
||||
|
||||
424
docs/server-runtime.md
Normal file
424
docs/server-runtime.md
Normal file
@@ -0,0 +1,424 @@
|
||||
# Server runtime audit
|
||||
|
||||
Engineer-to-engineer writeup of what the VPS `mt2.jakubkadlec.dev` is actually
|
||||
running as of 2026-04-14. Existing docs under `docs/` describe the intended
|
||||
layout (`debian-runtime.md`, `database-bootstrap.md`, `config-and-secrets.md`);
|
||||
this document is a ground-truth snapshot from a live recon session, with PIDs,
|
||||
paths, versions and surprises.
|
||||
|
||||
Companion: `docs/server-topology.md` for the ASCII diagram and port table.
|
||||
|
||||
## TL;DR
|
||||
|
||||
- Only one metin binary is alive right now: the **`db`** helper on port `9000`
|
||||
(PID `1788997` at audit time, cwd
|
||||
`/home/mt2.jakubkadlec.dev/metin/runtime/server/channels/db`).
|
||||
- **`game_auth` and all `channel*_core*` processes are NOT running.** The listing
|
||||
in the original prompt (auth `:11000/12000`, channel1 cores `:11011/12011`
|
||||
etc.) reflects *intended* state from the systemd units, not the current live
|
||||
process table. `ss -tlnp` only shows `0.0.0.0:9000` for m2.
|
||||
- The game/auth binaries are **not present on disk either**. Only
|
||||
`share/bin/db` exists; there is no `share/bin/game_auth` and no
|
||||
`share/bin/channel*_core*`. Those channels cannot start even if requested.
|
||||
- The `db` unit is currently **flapping / crash-looping**. `systemctl` reports
|
||||
`deactivating (stop-sigterm)`; syserr.log shows repeated
|
||||
`Connection reset by peer` from client peers (auth/game trying to reconnect
|
||||
is the usual culprit, but here nobody is connecting — cause needs
|
||||
verification). Two fresh `core.<pid>` files (97 MB each) sit in the db
|
||||
channel dir from 13:24 and 13:25 today.
|
||||
- Orchestration is **pure systemd**, not the upstream `start.py` / tmux setup.
|
||||
The README still documents `start.py`, so the README is stale for the Debian
|
||||
VPS; `deploy/systemd/` + `docs/debian-runtime.md` are authoritative.
|
||||
- MariaDB 11.8.6 is the backing store on `127.0.0.1:3306`. The DB user the
|
||||
stack is configured to use is `bootstrap` (from `share/conf/db.txt` /
|
||||
`game.txt`). The actual password is injected via `/etc/metin/metin.env`,
|
||||
which is `root:root 600` and intentionally unreadable by the runtime user
|
||||
inspector account.
|
||||
|
||||
## Host
|
||||
|
||||
- Hostname: `vmi3229987` (Contabo), public name `mt2.jakubkadlec.dev`.
|
||||
- OS: Debian 13 (trixie).
|
||||
- MariaDB: `mariadbd` 11.8.6, PID `103624`, listening on `127.0.0.1:3306`.
|
||||
- All metin services run as the unprivileged user
|
||||
`mt2.jakubkadlec.dev:mt2.jakubkadlec.dev`.
|
||||
- Runtime root: `/home/mt2.jakubkadlec.dev/metin/runtime/server` (755 MB across
|
||||
`channels/`, 123 MB across `share/`, total metin workspace on the box
|
||||
~1.7 GB).
|
||||
|
||||
## Processes currently alive
|
||||
|
||||
From `ps auxf` + `ss -tlnp` at audit time:
|
||||
|
||||
```
|
||||
mysql 103624 /usr/sbin/mariadbd — 127.0.0.1:3306
|
||||
mt2.j+ 1788997 /home/.../channels/db/db — 0.0.0.0:9000
|
||||
```
|
||||
|
||||
No other m2 binaries show up. `ps` has **zero** matches for `game_auth`,
|
||||
`channel1_core1`, `channel1_core2`, `channel1_core3`, `channel99_core1`.
|
||||
|
||||
Per-process inspection:
|
||||
|
||||
| PID | cwd | exe (resolved) | fds of interest |
|
||||
| ------- | ----------------------------------------------- | ------------------------------------------------- | --------------- |
|
||||
| 1788997 | `.../runtime/server/channels/db` | `.../share/bin/db` (via `./db` symlink) | fd 3→syslog.log, fd 4→syserr.log, fd 11 TCP `*:9000`, fd 17 `[eventpoll]` (epoll fdwatch) |
|
||||
|
||||
The `db` symlink inside the channel dir resolves to `../../share/bin/db`,
|
||||
which is an `ELF 64-bit LSB pie executable, x86-64, dynamically linked,
|
||||
BuildID fc049d0f..., not stripped`. Build identifier from
|
||||
`channels/db/VERSION.txt`: **`db revision: b2b037f-dirty`** — the dirty tag is
|
||||
a red flag, the build wasn't from a clean checkout of `m2dev-server-src`.
|
||||
|
||||
The `usage.txt` in the same directory shows hourly heartbeat rows with
|
||||
`| 0 | 0 |` since 2026-04-13 21:00 (the "sessions / active" columns are
|
||||
stuck at zero — consistent with no game channels being connected).
|
||||
|
||||
## Binaries actually present on disk
|
||||
|
||||
```
|
||||
/home/mt2.jakubkadlec.dev/metin/runtime/server/share/bin/
|
||||
├── db ← present, used
|
||||
└── game ← present (shared game binary, but not launched under any
|
||||
instance name that the systemd generator expects)
|
||||
```
|
||||
|
||||
What is NOT present:
|
||||
|
||||
- `share/bin/game_auth`
|
||||
- `share/bin/channel1_core1`, `channel1_core2`, `channel1_core3`
|
||||
- `share/bin/channel99_core1`
|
||||
|
||||
The `metin-game-instance-start` helper (`/usr/local/libexec/...`) is a bash
|
||||
wrapper that `cd`s into `channels/<channel>/<core>/` and execs `./<instance>`,
|
||||
e.g. `./channel1_core1`. Those per-instance binaries don't exist yet. The
|
||||
channel dirs themselves (`channel1/core1/`, etc.) already contain the
|
||||
scaffolding (`CONFIG`, `conf`, `data`, `log`, `mark`, `package`,
|
||||
`p2p_packet_info.txt`, `packet_info.txt`, `syserr.log`, `syslog.log`,
|
||||
`version.txt`), but `version.txt` says `game revision: unknown` and the
|
||||
per-instance executable file is missing. The log directory has a single
|
||||
stale `syslog_2026-04-13.log`.
|
||||
|
||||
Interpretation: the deploy pipeline that builds `m2dev-server-src` and drops
|
||||
instance binaries into `share/bin/` has not yet been run (or has not been
|
||||
re-run since the tree was laid out on 2026-04-13). Once Jakub's
|
||||
`debian-foundation` build produces per-instance symlinked/hardlinked
|
||||
binaries, the `metin-game@*` units should come up automatically on the next
|
||||
`systemctl restart metin-server`.
|
||||
|
||||
## How things are started
|
||||
|
||||
All orchestration goes through systemd units under `/etc/systemd/system/`,
|
||||
installed from `deploy/systemd/` via `deploy/systemd/install_systemd.py`.
|
||||
|
||||
Unit list and roles:
|
||||
|
||||
| Unit | Type | Role |
|
||||
| ----------------------------------------- | -------- | -------------------------------------------- |
|
||||
| `metin-server.service` | oneshot | top-level grouping, `Requires=mariadb.service`. `ExecStart=/bin/true`, `RemainAfterExit=yes`. All sub-units are `PartOf=metin-server.service` so restarting `metin-server` cycles everything. |
|
||||
| `metin-db.service` | simple | launches `.../channels/db/db` as runtime user, `Restart=on-failure`, `LimitCORE=infinity`, env file `/etc/metin/metin.env`. |
|
||||
| `metin-db-ready.service` | oneshot | runs `/usr/local/libexec/metin-wait-port 127.0.0.1 9000 30` — gate that blocks auth+game until the DB socket is listening. |
|
||||
| `metin-auth.service` | simple | launches `.../channels/auth/game_auth`. Requires db-ready. |
|
||||
| `metin-game@channel1_core1..3.service` | template | each runs `/usr/local/libexec/metin-game-instance-start <instance>` which execs `./<instance>` in that channel dir. |
|
||||
| `metin-game@channel99_core1.service` | template | same, for channel 99. |
|
||||
|
||||
Dependency chain:
|
||||
|
||||
```
|
||||
mariadb.service
|
||||
│
|
||||
▼
|
||||
metin-db.service ──► metin-db-ready.service ──► metin-auth.service
|
||||
└► metin-game@*.service
|
||||
│
|
||||
▼
|
||||
metin-server.service (oneshot umbrella)
|
||||
```
|
||||
|
||||
All units have `PartOf=metin-server.service`, `Restart=on-failure`,
|
||||
`LimitNOFILE=65535`, `LimitCORE=infinity`. None run in Docker. None use tmux,
|
||||
screen or the upstream `start.py`. **The upstream `start.py` / `stop.py` in
|
||||
the repo are NOT wired up on this host** and should be treated as FreeBSD-era
|
||||
legacy.
|
||||
|
||||
The per-instance launcher `/usr/local/libexec/metin-game-instance-start`
|
||||
(installed by `install_systemd.py`) is:
|
||||
|
||||
```bash
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
instance="${1:?missing instance name}"
|
||||
root_dir="/home/mt2.jakubkadlec.dev/metin/runtime/server/channels"
|
||||
channel_dir="${instance%_*}" # e.g. channel1 from channel1_core2
|
||||
core_dir="${instance##*_}" # e.g. core2
|
||||
workdir="${root_dir}/${channel_dir}/${core_dir}"
|
||||
cd "$workdir"
|
||||
exec "./${instance}"
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- the `%_*` / `##*_` parse is brittle — an instance name with more than one
|
||||
underscore would misbehave. For current naming (`channelN_coreM`) it works.
|
||||
- the helper does not redirect stdout/stderr; both go to the journal via
|
||||
systemd.
|
||||
|
||||
## Config files the binaries actually read
|
||||
|
||||
All m2 config files referenced by the running/installed stack, resolved to
|
||||
their real path on disk:
|
||||
|
||||
| Config file | Read by | Purpose |
|
||||
| ------------------------------------------------------------------------ | ------------- | --------------------------------------------------- |
|
||||
| `share/conf/db.txt` | `db` | SQL hosts, BIND_PORT=9000, item id range, hotbackup |
|
||||
| `share/conf/game.txt` | game cores | DB_ADDR=127.0.0.1, DB_PORT=9000, SQL creds, flags |
|
||||
| `share/conf/CMD` | game cores | in-game command ACL (notice, warp, item, …) |
|
||||
| `share/conf/item_proto.txt`, `mob_proto.txt`, `item_names*.txt`, `mob_names*.txt` | both db and game | static content tables |
|
||||
| `channels/db/conf` (symlink → `share/conf`) | `db` | every db channel looks into this flat conf tree |
|
||||
| `channels/db/data` (symlink → `share/data`) | `db`/`game` | mob/pc/dungeon/spawn data |
|
||||
| `channels/db/locale` (symlink → `share/locale`) | all | locale assets |
|
||||
| `channels/auth/CONFIG` | `game_auth` | `HOSTNAME: auth`, `CHANNEL: 1`, `PORT: 11000`, `P2P_PORT: 12000`, `AUTH_SERVER: master` |
|
||||
| `channels/channel1/core1/CONFIG` | core1 | `HOSTNAME: channel1_1`, `CHANNEL: 1`, `PORT: 11011`, `P2P_PORT: 12011`, `MAP_ALLOW: 1 4 5 6 3 23 43 112 107 67 68 72 208 302 304` |
|
||||
| `channels/channel1/core2/CONFIG` | core2 | `PORT: 11012`, `P2P_PORT: 12012` |
|
||||
| `channels/channel1/core3/CONFIG` | core3 | `PORT: 11013`, `P2P_PORT: 12013` |
|
||||
| `channels/channel99/core1/CONFIG` | ch99 core1 | `HOSTNAME: channel99_1`, `CHANNEL: 99`, `PORT: 11991`, `P2P_PORT: 12991`, `MAP_ALLOW: 113 81 100 101 103 105 110 111 114 118 119 120 121 122 123 124 125 126 127 128 181 182 183 200` |
|
||||
| `/etc/metin/metin.env` | all systemd units via `EnvironmentFile=-` | host-local secrets/overrides, root:root mode 600. Contents not readable during this audit. |
|
||||
|
||||
Flat `share/conf/db.txt` (verbatim, with bootstrap secrets):
|
||||
|
||||
```
|
||||
WELCOME_MSG = "Database connector is running..."
|
||||
SQL_ACCOUNT = "127.0.0.1 account bootstrap change-me 0"
|
||||
SQL_PLAYER = "127.0.0.1 player bootstrap change-me 0"
|
||||
SQL_COMMON = "127.0.0.1 common bootstrap change-me 0"
|
||||
SQL_HOTBACKUP= "127.0.0.1 hotbackup bootstrap change-me 0"
|
||||
TABLE_POSTFIX = ""
|
||||
BIND_PORT = 9000
|
||||
CLIENT_HEART_FPS = 60
|
||||
HASH_PLAYER_LIFE_SEC = 600
|
||||
BACKUP_LIMIT_SEC = 3600
|
||||
PLAYER_ID_START = 100
|
||||
PLAYER_DELETE_LEVEL_LIMIT = 70
|
||||
PLAYER_DELETE_CHECK_SIMPLE = 1
|
||||
ITEM_ID_RANGE = 2000000000 2100000000
|
||||
MIN_LENGTH_OF_SOCIAL_ID = 6
|
||||
SIMPLE_SOCIALID = 1
|
||||
```
|
||||
|
||||
The `bootstrap` / `change-me` values are git-tracked placeholders.
|
||||
`config-and-secrets.md` explicitly says these are templates, and real values
|
||||
are expected to come from `/etc/metin/metin.env`. This works because the
|
||||
server source re-reads credentials from the environment when injected; verify
|
||||
by grepping `m2dev-server-src` for the SQL env var names used by `db`/`game`.
|
||||
(**Open question**: confirm which env var names override the in-file creds;
|
||||
the audit session couldn't read `metin.env` directly.)
|
||||
|
||||
## Database
|
||||
|
||||
- Engine: **MariaDB 11.8.6** (`mariadb --version`).
|
||||
- PID: 103624, listening on `127.0.0.1:3306` only. No external TCP
|
||||
exposure, no unix socket checked (likely `/run/mysqld/mysqld.sock`).
|
||||
- Expected databases from `docs/database-bootstrap.md`: `account`, `player`,
|
||||
`common`, `log`, `hotbackup`.
|
||||
- Stack-side DB user: `bootstrap` (placeholder in git, real password in
|
||||
`/etc/metin/metin.env`).
|
||||
- Could not enumerate actual tables during the audit — both `mysql -uroot`
|
||||
and `sudo -u mt2.jakubkadlec.dev mariadb` failed (Access denied), since
|
||||
root uses unix-socket auth for `root@localhost` and the runtime user has
|
||||
no CLI credentials outside the systemd environment.
|
||||
- **To inspect the DB read-only:** either run as root with
|
||||
`sudo mariadb` (unix socket auth — needs confirmation it's enabled), or
|
||||
open `/etc/metin/metin.env` as root, grab the `bootstrap` password, then
|
||||
`mariadb -ubootstrap -p account` etc. Do not attempt writes.
|
||||
|
||||
## Logging
|
||||
|
||||
Every m2 process writes two files in its channel dir, via fd 3 / fd 4:
|
||||
|
||||
- `syslog.log` — verbose info stream (rotated by date in some dirs:
|
||||
`channel1/core1/log/syslog_2026-04-13.log`).
|
||||
- `syserr.log` — error stream. Look here first on crash.
|
||||
|
||||
The `db` channel additionally writes to `syslog.log` (36 MB today, rotating
|
||||
appears to be manual — there is a `log/` dir with a daily file but the
|
||||
current `syslog.log` is at the top level) and drops `core.<pid>` ELF cores
|
||||
into the channel dir on SIGSEGV/SIGABRT because `LimitCORE=infinity` is set.
|
||||
|
||||
systemd journal captures stdout/stderr as well, so `journalctl -u metin-db
|
||||
--since '1 hour ago'` is the fastest way to see startup banners and
|
||||
`systemd`-observed restarts. Example from this audit:
|
||||
|
||||
```
|
||||
Apr 14 13:26:40 vmi3229987 db[1788997]: Real Server
|
||||
Apr 14 13:26:40 vmi3229987 db[1788997]: Success ACCOUNT
|
||||
Apr 14 13:26:40 vmi3229987 db[1788997]: Success COMMON
|
||||
Apr 14 13:26:40 vmi3229987 db[1788997]: Success HOTBACKUP
|
||||
Apr 14 13:26:40 vmi3229987 db[1788997]: mysql_real_connect: Lost connection
|
||||
to server at 'sending authentication information', system error: 104
|
||||
```
|
||||
|
||||
Every `db` start it opens *more than a dozen* AsyncSQL pools ("AsyncSQL:
|
||||
connected to 127.0.0.1 (reconnect 1)" repeated ~12 times), suggesting a large
|
||||
per-instance pool size. Worth checking if that needs tuning.
|
||||
|
||||
The current `syserr.log` in `channels/db/` is dominated by:
|
||||
|
||||
```
|
||||
[error] [int CPeerBase::Recv()()] socket_read failed Connection reset by peer
|
||||
[error] [int CClientManager::Process()()] Recv failed
|
||||
```
|
||||
|
||||
which is the peer disconnect path. Since no auth/game peers should be
|
||||
connecting right now, this is either a leftover from an earlier start or
|
||||
something else (maybe a healthcheck probe) is touching 9000 and aborting.
|
||||
See open questions.
|
||||
|
||||
## Ports
|
||||
|
||||
Live `ss -tlnp` on the VPS (m2-relevant lines only):
|
||||
|
||||
| L3:L4 | Who | Exposure |
|
||||
| ---------------- | ------------ | -------------- |
|
||||
| `0.0.0.0:9000` | `db` | **INADDR_ANY** — listens on all interfaces. Look at this. |
|
||||
| `127.0.0.1:3306` | `mariadbd` | localhost only |
|
||||
|
||||
Not currently listening (would be if auth/game were up):
|
||||
|
||||
- `11000` / `12000` — auth client + p2p
|
||||
- `11011..11013` / `12011..12013` — channel1 cores + p2p
|
||||
- `11991` / `12991` — channel99 core1 + p2p
|
||||
|
||||
Other listeners on the host (not m2): `:22`, `:2222` (gitea ssh), `:25`
|
||||
(postfix loopback), `:80/:443` (Caddy), `:3000` (Gitea), `:2019` (Caddy
|
||||
admin), `:33891` (unknown loopback), `:5355` / `:53` (resolver).
|
||||
|
||||
**Firewalling note:** `db` binding to `0.0.0.0:9000` is a concern. In the
|
||||
normal m2 architecture, `db` only talks to auth/game cores on the same host
|
||||
and should bind to `127.0.0.1` only. Current binding is set by the
|
||||
`BIND_PORT = 9000` line in `share/conf/db.txt`, which in this server fork
|
||||
apparently defaults to `INADDR_ANY`. If the Contabo firewall or iptables/nft
|
||||
rules don't block 9000 from the outside, this is exposed. **Open question:
|
||||
verify iptables/nftables on the host, or move `db` to `127.0.0.1` explicitly
|
||||
in source / config.**
|
||||
|
||||
## Data directory layout
|
||||
|
||||
All under `/home/mt2.jakubkadlec.dev/metin/runtime/server/share/`:
|
||||
|
||||
```
|
||||
share/
|
||||
├── bin/ ← compiled binaries (only db + game present today)
|
||||
├── conf/ ← db.txt, game.txt, CMD, item_proto.txt, mob_proto.txt,
|
||||
│ item_names_*.txt, mob_names_*.txt (17 locales each)
|
||||
├── data/ ← DTA/, dungeon/, easterevent/, mob_spawn/, monster/,
|
||||
│ pc/, pc2/ (27 MB total)
|
||||
├── locale/ ← 86 MB, per-locale strings + binary quest outputs
|
||||
├── mark/
|
||||
└── package/
|
||||
```
|
||||
|
||||
Per-channel scaffolding under `channels/` symlinks `conf`, `data`, `locale`
|
||||
back into `share/`, so each channel reads from a single canonical content
|
||||
tree.
|
||||
|
||||
## Disk usage footprint
|
||||
|
||||
```
|
||||
/home/mt2.jakubkadlec.dev/metin/ 1.7 G (total metin workspace)
|
||||
runtime/server/share/ 123 M
|
||||
runtime/server/share/data/ 27 M
|
||||
runtime/server/share/locale/ 86 M
|
||||
runtime/server/channels/ 755 M
|
||||
channels/db/core.178508{2,8} ~194 M (two 97 MB coredumps)
|
||||
channels/db/syslog.log 36 M (grows fast)
|
||||
```
|
||||
|
||||
Core dumps dominate the channel dir footprint right now. Cleaning up old
|
||||
`core.*` files is safe when the db is not actively crashing (and only after
|
||||
Jakub has looked at them).
|
||||
|
||||
## How to restart channel1_core2 cleanly
|
||||
|
||||
Pre-flight checklist:
|
||||
|
||||
1. Confirm `share/bin/channel1_core2` actually exists on disk — right now it
|
||||
does **not**, so the instance cannot start. Skip straight to the
|
||||
"rebuild / redeploy" section in Jakub's `docs/deploy-workflow.md`
|
||||
before trying.
|
||||
2. Confirm `metin-db.service` and `metin-auth.service` are `active (running)`
|
||||
(`systemctl is-active metin-db metin-auth`). If not, fix upstream first —
|
||||
a clean restart of core2 requires a healthy auth + db.
|
||||
3. Check that no player is currently online on that core. With `usage.txt`
|
||||
at 0/0 this is trivially true today, but in prod do
|
||||
`cat channels/channel1/core2/usage.txt` first.
|
||||
4. Look at recent logs so you have a baseline:
|
||||
`journalctl -u metin-game@channel1_core2 -n 50 --no-pager`
|
||||
|
||||
Clean restart:
|
||||
|
||||
```bash
|
||||
# on the VPS as root or with sudo
|
||||
systemctl restart metin-game@channel1_core2.service
|
||||
systemctl status metin-game@channel1_core2.service --no-pager
|
||||
journalctl -u metin-game@channel1_core2.service -n 100 --no-pager -f
|
||||
```
|
||||
|
||||
Because the unit is `Type=simple` with `Restart=on-failure`, `systemctl
|
||||
restart` sends SIGTERM, waits up to `TimeoutStopSec=60`, then brings the
|
||||
process back up. The binary's own `hupsig()` handler logs the SIGTERM into
|
||||
`syserr.log` and shuts down gracefully.
|
||||
|
||||
Post-restart verification:
|
||||
|
||||
```bash
|
||||
ss -tlnp | grep -E ':(11012|12012)\b' # expect both ports listening
|
||||
tail -n 30 /home/mt2.jakubkadlec.dev/metin/runtime/server/channels/channel1/core2/syserr.log
|
||||
```
|
||||
|
||||
If the process refuses to stay up (`Restart=on-failure` loops it), **do not**
|
||||
just bump `RestartSec`; grab the last 200 journal lines and the last 200
|
||||
syserr lines and open an issue in `metin-server/m2dev-server-src` against
|
||||
Jakub. Do not edit the unit file ad-hoc on the host.
|
||||
|
||||
## Open questions
|
||||
|
||||
These are things the audit could not determine without making changes or
|
||||
getting more access. They need a human operator to resolve.
|
||||
|
||||
1. **Who produces the per-instance binaries** (`channel1_core1`,
|
||||
`channel1_core2`, `channel1_core3`, `channel99_core1`, `game_auth`)?
|
||||
The deploy flow expects them in `share/bin/` and channel dirs but they
|
||||
are missing. Is this still hand-built, or is there a make target that
|
||||
hardlinks `share/bin/game` into each `channel*/core*/<instance>` name?
|
||||
2. **Why is `db` currently flapping** (`deactivating (stop-sigterm)` in
|
||||
systemctl, plus two fresh core dumps on 2026-04-14 13:24/13:25 and
|
||||
dozens of `CPeerBase::Recv()` errors)? Nothing should be connecting to
|
||||
port 9000 right now.
|
||||
3. **What the real `metin.env` contains** — specifically, the actual
|
||||
`bootstrap` DB password, and whether there is a separate admin-page
|
||||
password override. Audit did not touch `/etc/metin/metin.env`.
|
||||
4. **Exact override-variable contract** between `share/conf/db.txt`
|
||||
placeholders and the env file. We need to verify which env var names
|
||||
the `db`/`game` source actually reads so we know whether the
|
||||
`change-me` literal is ever used at runtime.
|
||||
5. **Is `db` intended to bind `0.0.0.0:9000`?** From a defense-in-depth
|
||||
standpoint it should be `127.0.0.1`. Needs either a source fix or a
|
||||
host firewall rule. Check current nftables state.
|
||||
6. **`VERSION.txt` says `db revision: b2b037f-dirty`.** Which tree was this
|
||||
built from and why "dirty"? Point back at the `m2dev-server-src`
|
||||
commit and confirm the build artefact is reproducible.
|
||||
7. **Log rotation**: `channels/db/syslog.log` is already 36 MB today with
|
||||
nothing connected. There is a `channels/channel1/core1/log/` dated
|
||||
subdir convention that suggests daily rotation, but `db`'s own syslog
|
||||
is not rotating. Confirm whether `logrotate` or an in-process rotator
|
||||
is expected to own this.
|
||||
8. **Hourly heartbeat in `usage.txt`** comes from where? Every ~1 h a row
|
||||
is appended — this is probably the `db` backup tick, but confirm it's
|
||||
not some cron job.
|
||||
9. **`mysqld`'s live databases**: could not enumerate table names without
|
||||
credentials. `docs/database-bootstrap.md` lists the expected set;
|
||||
someone with `metin.env` access should confirm `account`, `player`,
|
||||
`common`, `log`, `hotbackup` are all present and populated.
|
||||
10. **Stale README**: top-level `README.md` still documents FreeBSD +
|
||||
`start.py`. Not urgent, but worth a `docs:` sweep to point readers at
|
||||
`docs/debian-runtime.md` as the canonical layout.
|
||||
89
docs/server-topology.md
Normal file
89
docs/server-topology.md
Normal file
@@ -0,0 +1,89 @@
|
||||
# Server topology
|
||||
|
||||
Companion diagram + port table for `docs/server-runtime.md`. Describes the
|
||||
*intended* production layout of the Debian VPS m2 stack. What is live today is
|
||||
only a subset (see the runtime audit for the actual state).
|
||||
|
||||
## ASCII diagram
|
||||
|
||||
```
|
||||
┌─────────────────────────────┐
|
||||
│ Players │
|
||||
│ (Metin2.exe + launcher) │
|
||||
└──────────────┬──────────────┘
|
||||
│ TCP (11000 auth, 11011..11013
|
||||
│ channel1, 11991 channel99)
|
||||
▼
|
||||
════════════════════════════ mt2.jakubkadlec.dev ════════════════════════════
|
||||
║ ║
|
||||
║ ┌──────────────────┐ ┌──────────────────┐ ┌────────────────┐ ║
|
||||
║ │ metin-auth │ │ metin-game@ch1_c1│ │ metin-game@ │ ║
|
||||
║ │ (game_auth) │ p2p │ (channel1_core1) │ p2p │ ch99_c1 │ ║
|
||||
║ │ :11000 client │◄────►│ :11011 client │◄────►│ :11991 client │ ║
|
||||
║ │ :12000 p2p │ │ :12011 p2p │ │ :12991 p2p │ ║
|
||||
║ └────────┬─────────┘ └────────┬─────────┘ └────────┬───────┘ ║
|
||||
║ │ │ metin-game@ch1_c2/c3 │ ║
|
||||
║ │ │ :11012/12012 :11013/12013 ║
|
||||
║ │ │ │ ║
|
||||
║ │ DB proxy/cache layer │ │ ║
|
||||
║ └────────────┐ ┌─────────┴─────────────┐ ┌────────┘ ║
|
||||
║ ▼ ▼ ▼ ▼ ║
|
||||
║ ┌──────────────────────────────────┐ ║
|
||||
║ │ metin-db (db) │ ║
|
||||
║ │ bind 0.0.0.0:9000 │ ║
|
||||
║ │ (gated by metin-db-ready.svc) │ ║
|
||||
║ └──────────────┬───────────────────┘ ║
|
||||
║ │ SQL ║
|
||||
║ ▼ ║
|
||||
║ ┌──────────────────────────────────┐ ║
|
||||
║ │ mariadbd 11.8.6 │ ║
|
||||
║ │ 127.0.0.1:3306 │ ║
|
||||
║ │ DBs: account, player, common, │ ║
|
||||
║ │ log, hotbackup │ ║
|
||||
║ └──────────────────────────────────┘ ║
|
||||
║ ║
|
||||
║ systemd orchestration: ║
|
||||
║ metin-server.service (oneshot umbrella, PartOf everything) ║
|
||||
║ ├─ Requires mariadb.service ║
|
||||
║ └─ metin-db → metin-db-ready → metin-auth + metin-game@<instance> ║
|
||||
║ ║
|
||||
║ Secrets: ║
|
||||
║ /etc/metin/metin.env (root:root 600, EnvironmentFile= for all units) ║
|
||||
║ ║
|
||||
════════════════════════════════════════════════════════════════════════════
|
||||
```
|
||||
|
||||
## Process / port table
|
||||
|
||||
| Process name | systemd unit | Client port | P2P port | Binds to | Role | Config file |
|
||||
| ---------------- | ------------------------------------- | ----------- | -------- | ---------- | ----------------------------------------------------- | ----------------------------------------------------- |
|
||||
| `db` | `metin-db.service` | 9000 | — | `0.0.0.0` | DB proxy/cache; talks to MariaDB, serves auth+game | `share/conf/db.txt` (+ `/etc/metin/metin.env`) |
|
||||
| `game_auth` | `metin-auth.service` | 11000 | 12000 | (default) | account login, token handoff to channels | `channels/auth/CONFIG` |
|
||||
| `channel1_core1` | `metin-game@channel1_core1.service` | 11011 | 12011 | (default) | ch1 core1, MAP_ALLOW 1 4 5 6 3 23 43 112 107 67 68 72 208 302 304 | `channels/channel1/core1/CONFIG` |
|
||||
| `channel1_core2` | `metin-game@channel1_core2.service` | 11012 | 12012 | (default) | ch1 core2, same channel different core | `channels/channel1/core2/CONFIG` |
|
||||
| `channel1_core3` | `metin-game@channel1_core3.service` | 11013 | 12013 | (default) | ch1 core3 | `channels/channel1/core3/CONFIG` |
|
||||
| `channel99_core1`| `metin-game@channel99_core1.service` | 11991 | 12991 | (default) | ch99 core1, event/test channel. MAP_ALLOW 113 81 100 101 103 105 110 111 114 118 119 120 121 122 123 124 125 126 127 128 181 182 183 200 | `channels/channel99/core1/CONFIG` |
|
||||
| `mariadbd` | `mariadb.service` (distro pkg) | 3306 | — | `127.0.0.1`| relational store | `/etc/mysql/mariadb.conf.d/*` |
|
||||
|
||||
Supporting, not a process:
|
||||
|
||||
| Unit | Type | Purpose |
|
||||
| -------------------------- | ------- | ---------------------------------------------------------------- |
|
||||
| `metin-server.service` | oneshot | umbrella. Restarting it cycles all sub-units via `PartOf=`. |
|
||||
| `metin-db-ready.service` | oneshot | `metin-wait-port 127.0.0.1 9000 30` — gates auth+game on db up. |
|
||||
|
||||
## Data flow
|
||||
|
||||
1. Player connects to `mt2.jakubkadlec.dev:11000` (auth).
|
||||
2. `game_auth` authenticates against `account` via `db` (`127.0.0.1:9000`)
|
||||
which proxies to MariaDB.
|
||||
3. `game_auth` hands the player a token and the channel pick.
|
||||
4. Player connects to e.g. `:11011` (channel1 core1). The core reads player
|
||||
state via `db` from `player` + `common`, loads maps per `MAP_ALLOW` and
|
||||
quest binaries from `share/locale/`.
|
||||
5. Cross-core traffic (channel switch, whisper, guild) uses the P2P ports
|
||||
(`12000`, `12011`..`12013`, `12991`) on loopback.
|
||||
6. `db` persists to MariaDB asynchronously (`AsyncSQL` pools, batch-writes
|
||||
at `SAVE_EVENT_SECOND_CYCLE=180`s from `game.txt`).
|
||||
7. `log` DB receives audit/event rows (item trades, combat, etc.) through
|
||||
a dedicated `AsyncSQL` connection.
|
||||
Reference in New Issue
Block a user