10 Commits

Author SHA1 Message Date
server
2179c46ce0 Guard public channel readiness in systemd tooling 2026-04-15 17:46:56 +02:00
server
6f16f66543 ops: snapshot crash executables 2026-04-14 17:05:11 +02:00
server
0bc6559283 ops: add core backtrace helper 2026-04-14 17:01:50 +02:00
server
c5bb515781 healthcheck: avoid temporary account collisions 2026-04-14 16:25:46 +02:00
server
84625652fe ops: add syserr triage views 2026-04-14 16:18:02 +02:00
server
cd2e1d61ca ops: add auth IP and stale session audit 2026-04-14 16:13:47 +02:00
server
f722475f17 ops: add auth activity and session views 2026-04-14 16:05:49 +02:00
server
825cfbc19b ops: add summary and auth failure views 2026-04-14 16:00:14 +02:00
server
4fccf13e09 ops: split ready and full healthchecks 2026-04-14 13:58:13 +02:00
server
5b0da5a685 ops: add login-ready wait helper 2026-04-14 13:43:23 +02:00
11 changed files with 1616 additions and 543 deletions

View File

@@ -51,6 +51,35 @@ def get_channel_ids() -> list[int]:
return [int(channel["id"]) for channel in iter_channels()]
def get_public_channel_ids(
selected_channel_ids: Iterable[int] | None = None,
*,
client_visible_only: bool = False,
) -> list[int]:
selected = None if selected_channel_ids is None else {int(channel_id) for channel_id in selected_channel_ids}
result: list[int] = []
for channel in iter_channels():
channel_id = int(channel["id"])
if selected is not None and channel_id not in selected:
continue
if not channel.get("public"):
continue
if client_visible_only and not channel.get("client_visible"):
continue
result.append(channel_id)
return result
def has_public_channel(
selected_channel_ids: Iterable[int] | None = None,
*,
client_visible_only: bool = False,
) -> bool:
return bool(get_public_channel_ids(selected_channel_ids, client_visible_only=client_visible_only))
def get_channel_map() -> dict[int, dict[int, str]]:
result: dict[int, dict[int, str]] = {}
for channel in iter_channels():

View File

@@ -8,6 +8,38 @@ if [[ "${EUID}" -ne 0 ]]; then
exit 1
fi
MODE="full"
while (($#)); do
case "$1" in
--mode)
shift
if (($# == 0)); then
echo "Missing value for --mode" >&2
exit 1
fi
MODE="$1"
;;
--mode=*)
MODE="${1#*=}"
;;
*)
echo "Unknown argument: $1" >&2
exit 1
;;
esac
shift
done
case "${MODE}" in
ready|full)
;;
*)
echo "Unsupported mode: ${MODE} (expected ready or full)" >&2
exit 1
;;
esac
: "${RUN_AS_USER:=mt2.jakubkadlec.dev}"
: "${SERVER_HOST:=173.249.9.66}"
: "${AUTH_PORT:=11000}"
@@ -26,19 +58,26 @@ if ! id "${RUN_AS_USER}" >/dev/null 2>&1; then
exit 1
fi
DELETE_LOGIN="smkdel$(date +%s)"
unique_suffix() {
printf '%s%s' "$(date +%s%N | tail -c 9)" "$(openssl rand -hex 2)"
}
DELETE_SUFFIX="$(unique_suffix)"
FULL_SUFFIX="$(unique_suffix)"
DELETE_LOGIN="smkd${DELETE_SUFFIX}"
DELETE_PASSWORD="$(openssl rand -hex 6)"
DELETE_SOCIAL_ID="$(date +%s%N | tail -c 14)"
DELETE_EMAIL="${DELETE_LOGIN}@example.invalid"
DELETE_CHARACTER_NAME="c${DELETE_LOGIN}"
DELETE_CHARACTER_NAME="d${DELETE_SUFFIX}"
DELETE_PRIVATE_CODE="${DELETE_SOCIAL_ID: -7}"
DELETE_ACCOUNT_ID=""
FULL_LOGIN="smkfull$(date +%s)"
FULL_LOGIN="smkf${FULL_SUFFIX}"
FULL_PASSWORD="$(openssl rand -hex 6)"
FULL_SOCIAL_ID="$(date +%s%N | tail -c 14)"
FULL_EMAIL="${FULL_LOGIN}@example.invalid"
FULL_CHARACTER_NAME="c${FULL_LOGIN}"
FULL_CHARACTER_NAME="f${FULL_SUFFIX}"
FULL_ACCOUNT_ID=""
cleanup_account() {
@@ -84,6 +123,8 @@ create_account() {
local social_id="$3"
local email="$4"
cleanup_account "" "${login}"
mysql -N account <<SQL
INSERT INTO account (
login,
@@ -156,26 +197,35 @@ cleanup() {
trap cleanup EXIT
DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")"
create_player_index "${DELETE_ACCOUNT_ID}"
FULL_ACCOUNT_ID="$(create_account "${FULL_LOGIN}" "${FULL_PASSWORD}" "${FULL_SOCIAL_ID}" "${FULL_EMAIL}")"
create_player_index "${FULL_ACCOUNT_ID}"
echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}"
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
--create-character-name="${DELETE_CHARACTER_NAME}" \
--delete-private-code="${DELETE_PRIVATE_CODE}" \
if [[ "${MODE}" == "full" ]]; then
DELETE_ACCOUNT_ID="$(create_account "${DELETE_LOGIN}" "${DELETE_PASSWORD}" "${DELETE_SOCIAL_ID}" "${DELETE_EMAIL}")"
create_player_index "${DELETE_ACCOUNT_ID}"
echo "Running create/delete healthcheck for temporary account ${DELETE_LOGIN}"
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${DELETE_PASSWORD}" \
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${DELETE_LOGIN}" \
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
--create-character-name="${DELETE_CHARACTER_NAME}" \
--delete-private-code="${DELETE_PRIVATE_CODE}" \
--client-version="${CLIENT_VERSION}"
fi
echo "Running ${MODE} login healthcheck for temporary account ${FULL_LOGIN}"
FULL_ARGS=(
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}"
--password-env=METIN_LOGIN_SMOKE_PASSWORD
--create-character-name="${FULL_CHARACTER_NAME}"
--client-version="${CLIENT_VERSION}"
)
if [[ "${MODE}" == "full" ]]; then
FULL_ARGS+=(--mall-password="${MALL_PASSWORD}")
fi
echo "Running full login healthcheck for temporary account ${FULL_LOGIN}"
sudo -iu "${RUN_AS_USER}" env METIN_LOGIN_SMOKE_PASSWORD="${FULL_PASSWORD}" \
"${SMOKE_BIN}" "${SERVER_HOST}" "${AUTH_PORT}" "${CHANNEL_PORT}" "${FULL_LOGIN}" \
--password-env=METIN_LOGIN_SMOKE_PASSWORD \
--create-character-name="${FULL_CHARACTER_NAME}" \
--client-version="${CLIENT_VERSION}" \
--mall-password="${MALL_PASSWORD}"
"${FULL_ARGS[@]}"
echo "Login healthcheck passed"
echo "${MODE^} login healthcheck passed"

View File

@@ -20,6 +20,8 @@ python3 deploy/systemd/install_systemd.py \
`--channel-limit 1` is also supported and will auto-include channel `99` when present in the channel inventory.
By default the installer refuses channel selections that omit every client-visible public channel. If you intentionally want an auth/internal-only stack, pass `--allow-internal-only`.
The channel selection and port layout now come from the versioned inventory file:
- [deploy/channel-inventory.json](../channel-inventory.json)
@@ -35,6 +37,7 @@ The channel selection and port layout now come from the versioned inventory file
- `/usr/local/libexec/metin-wait-port`
- `/usr/local/bin/metinctl`
- `/usr/local/sbin/metin-collect-incident`
- `/usr/local/sbin/metin-core-backtrace`
The `metin-db-ready.service` gate waits until the DB socket is actually accepting connections before `auth` and `game` units start.

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import argparse
import json
import os
import re
import shutil
import socket
import subprocess
@@ -106,6 +107,78 @@ def copy_core_files(bundle_dir: Path, core_files: list[Path]) -> None:
shutil.copy2(path, destination)
def infer_execfn_from_file_output(core_path: Path) -> Path | None:
completed = run(["file", str(core_path)], check=False)
if completed.returncode != 0:
return None
match = re.search(r"execfn: '([^']+)'", completed.stdout)
if not match:
return None
candidate = Path(match.group(1))
if candidate.exists():
return candidate.resolve()
return None
def infer_executable_for_core(core_path: Path) -> Path | None:
execfn_candidate = infer_execfn_from_file_output(core_path)
if execfn_candidate:
return execfn_candidate
parent_name = core_path.parent.name
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
if parent_name == "db":
candidate = (core_path.parent / "db").resolve()
return candidate if candidate.is_file() else None
if parent_name == "auth":
candidate = (core_path.parent / "game_auth").resolve()
return candidate if candidate.is_file() else None
if parent_name.startswith("core") and grandparent_name.startswith("channel"):
candidate = (core_path.parent / f"{grandparent_name}_{parent_name}").resolve()
return candidate if candidate.is_file() else None
return None
def executable_metadata(path: Path) -> dict[str, object]:
stat = path.stat()
return {
"path": str(path),
"size_bytes": stat.st_size,
"mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).isoformat(),
}
def write_core_executable_metadata(bundle_dir: Path, core_files: list[Path]) -> None:
rows = []
for core_path in core_files:
row: dict[str, object] = {"core": str(core_path)}
executable = infer_executable_for_core(core_path)
if executable:
row["executable"] = executable_metadata(executable)
else:
row["executable"] = None
rows.append(row)
write_text(bundle_dir / "core-executables.json", json.dumps(rows, indent=2))
def copy_core_executables(bundle_dir: Path, core_files: list[Path]) -> None:
executables_dir = bundle_dir / "executables"
copied: set[Path] = set()
for core_path in core_files:
executable = infer_executable_for_core(core_path)
if not executable or executable in copied:
continue
copied.add(executable)
relative = executable.relative_to(RUNTIME_ROOT)
destination = executables_dir / relative
destination.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(executable, destination)
def git_summary(repo_path: Path) -> dict[str, object]:
summary: dict[str, object] = {"path": str(repo_path), "present": repo_path.exists()}
if not repo_path.exists():
@@ -180,8 +253,10 @@ def main() -> int:
core_files = find_core_files()
write_core_metadata(bundle_dir, core_files)
write_core_executable_metadata(bundle_dir, core_files)
if args.include_cores and core_files:
copy_core_files(bundle_dir, core_files)
copy_core_executables(bundle_dir, core_files)
print(bundle_dir)
return 0

View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import shutil
import subprocess
from datetime import datetime, timezone
from pathlib import Path
RUNTIME_ROOT = Path("{{RUNTIME_ROOT}}")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generate a backtrace for a Metin runtime core file")
parser.add_argument("--core", help="Core file path. Defaults to the newest core under the runtime tree.")
parser.add_argument("--exe", help="Executable path override. If omitted, infer it from the core path.")
return parser.parse_args()
def ensure_root() -> None:
if os.geteuid() != 0:
raise SystemExit("Run as root.")
def run(command: list[str], check: bool = False) -> subprocess.CompletedProcess[str]:
return subprocess.run(command, check=check, capture_output=True, text=True)
def iter_core_files() -> list[Path]:
return sorted(
(path for path in RUNTIME_ROOT.glob("channels/**/core*") if path.is_file()),
key=lambda path: path.stat().st_mtime,
reverse=True,
)
def resolve_core_path(core_arg: str | None) -> Path:
if core_arg:
candidate = Path(core_arg)
if not candidate.is_absolute():
runtime_relative = RUNTIME_ROOT / core_arg
if runtime_relative.exists():
candidate = runtime_relative
candidate = candidate.resolve()
if not candidate.is_file():
raise SystemExit(f"Core file not found: {candidate}")
return candidate
cores = iter_core_files()
if not cores:
raise SystemExit(f"No core files found under {RUNTIME_ROOT}")
return cores[0]
def infer_execfn_from_file_output(core_path: Path) -> Path | None:
completed = run(["file", str(core_path)])
if completed.returncode != 0:
return None
match = re.search(r"execfn: '([^']+)'", completed.stdout)
if not match:
return None
candidate = Path(match.group(1))
if candidate.is_file():
return candidate
return None
def infer_executable(core_path: Path, exe_arg: str | None) -> Path:
if exe_arg:
exe_path = Path(exe_arg).resolve()
if not exe_path.is_file():
raise SystemExit(f"Executable not found: {exe_path}")
return exe_path
execfn_candidate = infer_execfn_from_file_output(core_path)
parent_name = core_path.parent.name
grandparent_name = core_path.parent.parent.name if core_path.parent.parent else ""
candidates: list[Path] = []
if execfn_candidate:
candidates.append(execfn_candidate)
if parent_name == "db":
candidates.append(core_path.parent / "db")
elif parent_name == "auth":
candidates.append(core_path.parent / "game_auth")
elif parent_name.startswith("core") and grandparent_name.startswith("channel"):
candidates.append(core_path.parent / f"{grandparent_name}_{parent_name}")
for candidate in candidates:
if candidate.is_file():
return candidate.resolve()
raise SystemExit(f"Could not infer executable for core file: {core_path}")
def preferred_debugger() -> str | None:
for tool in ("gdb", "lldb"):
if shutil.which(tool):
return tool
return None
def format_section(title: str, body: str) -> str:
return f"== {title} ==\n{body.rstrip()}\n"
def render_file_info(path: Path) -> str:
completed = run(["file", str(path)])
body = completed.stdout or completed.stderr or "<no output>"
return format_section(f"file {path}", body)
def render_executable_freshness(core_path: Path, exe_path: Path) -> str:
core_stat = core_path.stat()
exe_stat = exe_path.stat()
core_mtime = datetime.fromtimestamp(core_stat.st_mtime, tz=timezone.utc).isoformat()
exe_mtime = datetime.fromtimestamp(exe_stat.st_mtime, tz=timezone.utc).isoformat()
lines = [
f"core_mtime: {core_mtime}",
f"exe_mtime: {exe_mtime}",
]
if exe_stat.st_mtime > core_stat.st_mtime + 1:
lines.append(
"warning: executable is newer than the core file; symbols may not match. "
"Prefer an executable snapshot from an incident bundle or pass --exe explicitly."
)
else:
lines.append("status: executable is not newer than the core file")
return format_section("core/executable freshness", "\n".join(lines))
def render_readelf_notes(core_path: Path) -> str:
if not shutil.which("readelf"):
return ""
completed = run(["readelf", "-n", str(core_path)])
body = completed.stdout or completed.stderr or "<no output>"
return format_section(f"readelf -n {core_path}", body)
def render_debugger_backtrace(debugger: str, exe_path: Path, core_path: Path) -> str:
if debugger == "gdb":
command = [
"gdb",
"-batch",
"-ex",
"set pagination off",
"-ex",
"thread apply all bt full",
str(exe_path),
str(core_path),
]
elif debugger == "lldb":
command = [
"lldb",
"--batch",
"-o",
"thread backtrace all",
"-c",
str(core_path),
str(exe_path),
]
else:
raise SystemExit(f"Unsupported debugger: {debugger}")
completed = run(command)
output = completed.stdout or completed.stderr or "<no output>"
return format_section("backtrace", f"$ {' '.join(command)}\n\n{output}")
def main() -> int:
args = parse_args()
ensure_root()
core_path = resolve_core_path(args.core)
exe_path = infer_executable(core_path, args.exe)
debugger = preferred_debugger()
sections = [
format_section(
"summary",
"\n".join(
[
f"core: {core_path}",
f"executable: {exe_path}",
f"debugger: {debugger or '<none>'}",
]
),
),
render_file_info(core_path),
render_file_info(exe_path),
render_executable_freshness(core_path, exe_path),
]
readelf_section = render_readelf_notes(core_path)
if readelf_section:
sections.append(readelf_section)
if debugger:
sections.append(render_debugger_backtrace(debugger, exe_path, core_path))
else:
sections.append(
format_section(
"backtrace",
"No supported debugger found. Install gdb or lldb on the host to generate a stack trace.",
)
)
print("\n".join(section.rstrip() for section in sections if section).rstrip())
return 0
if __name__ == "__main__":
raise SystemExit(main())

File diff suppressed because it is too large Load Diff

View File

@@ -15,6 +15,7 @@ import channel_inventory
TEMPLATES_DIR = SCRIPT_DIR / "templates"
BIN_DIR = SCRIPT_DIR / "bin"
HEALTHCHECK_DIR = REPO_ROOT / "deploy" / "healthcheck"
def parse_args() -> argparse.Namespace:
@@ -31,6 +32,11 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--wait-port", type=int, default=9000, help="DB readiness port")
parser.add_argument("--wait-timeout", type=int, default=30, help="DB readiness timeout in seconds")
parser.add_argument("--restart", action="store_true", help="Restart metin-server.service after install")
parser.add_argument(
"--allow-internal-only",
action="store_true",
help="Allow installs that omit every client-visible public channel",
)
channel_group = parser.add_mutually_exclusive_group(required=True)
channel_group.add_argument(
@@ -75,7 +81,7 @@ def copy_file(source: Path, destination: Path, mode: int) -> None:
def resolve_channels(args: argparse.Namespace) -> list[int]:
try:
return channel_inventory.resolve_selected_channels(
selected_channels = channel_inventory.resolve_selected_channels(
channel_limit=args.channel_limit,
explicit_channels=args.channels,
)
@@ -83,6 +89,20 @@ def resolve_channels(args: argparse.Namespace) -> list[int]:
print(str(exc), file=sys.stderr)
raise SystemExit(1)
if not args.allow_internal_only and not channel_inventory.has_public_channel(
selected_channels,
client_visible_only=True,
):
print(
"Selected channels do not include any client-visible public channel. "
"Add a public channel such as --channel 1, or pass --allow-internal-only "
"if an auth/internal-only stack is intentional.",
file=sys.stderr,
)
raise SystemExit(1)
return selected_channels
def resolve_instances(selected_channels: list[int]) -> list[str]:
return channel_inventory.get_instances(selected_channels)
@@ -148,6 +168,16 @@ def main() -> int:
render_template(BIN_DIR / "metin-collect-incident.in", template_values),
0o700,
)
write_text(
sbin_dir / "metin-core-backtrace",
render_template(BIN_DIR / "metin-core-backtrace.in", template_values),
0o700,
)
copy_file(
HEALTHCHECK_DIR / "metin-login-healthcheck.sh",
sbin_dir / "metin-login-healthcheck",
0o700,
)
verify_units = [str(systemd_dir / unit_name) for unit_name in unit_names]
run(["systemd-analyze", "verify", *verify_units])

View File

@@ -19,7 +19,12 @@ Installed on the VPS:
## What The Headless Healthcheck Verifies
The installed wrapper now performs two headless passes against the live server:
The installed wrapper supports two modes:
- `--mode ready`
- `--mode full`
The full mode performs two headless passes against the live server:
1. a select-screen create/delete pass
2. a full auth + channel + `ENTERGAME` + mall pass
@@ -48,7 +53,7 @@ This is an end-to-end gameplay-path verification, not just a TCP port check.
## How The Wrapper Works
`metin-login-healthcheck.sh` does the following:
`metin-login-healthcheck.sh --mode full` does the following:
- creates two temporary accounts in MariaDB
- runs `metin_login_smoke` once in create/delete mode on the select screen
@@ -58,6 +63,15 @@ This is an end-to-end gameplay-path verification, not just a TCP port check.
- deletes both temporary accounts and any temporary character rows on exit
- passes the configured client version expected by the server
`metin-login-healthcheck.sh --mode ready` is intentionally lighter:
- creates one temporary account in MariaDB
- runs one headless login flow through auth + channel + character create + select + `ENTERGAME`
- does not run the delete pass
- does not open the mall
This mode is the right readiness probe immediately after a service restart. It verifies that the server is login-ready without depending on the deeper post-login mall path.
It is intended for manual admin use on the VPS.
## Usage
@@ -69,6 +83,12 @@ ssh mt2
/usr/local/sbin/metin-login-healthcheck
```
Readiness-only mode:
```bash
/usr/local/sbin/metin-login-healthcheck --mode ready
```
The smoke binary can also be run directly:
```bash
@@ -109,6 +129,19 @@ Useful direct flags:
- `--mall-password=PASSWORD`
after `ENTERGAME`, opens the in-game mall via encrypted chat command and verifies `MALL_OPEN`
Operational CLI:
```bash
metinctl public-ready
metinctl healthcheck --mode full
metinctl healthcheck --mode ready
metinctl wait-ready
```
`metinctl public-ready` verifies that every enabled client-visible public channel unit is active and that its declared listener port is actually up.
`metinctl wait-ready` now first waits for the public runtime to be up and only then runs the lighter `ready` login probe. The deeper `full` mode remains available as an explicit admin healthcheck.
Example negative auth test:
```bash

View File

@@ -32,15 +32,26 @@ The Debian deployment installs:
`metinctl` is a lightweight operational CLI for:
- showing an operational summary
- showing recent auth success/failure activity
- showing auth activity grouped by source IP
- showing recent `syserr.log` entries
- summarizing recurring `syserr.log` entries
- viewing inventory
- listing managed units
- checking service status
- listing declared ports
- verifying that enabled public client-facing channels are actually up
- listing recent auth failures
- listing recent login sessions
- listing stale open sessions without logout
- restarting the whole stack or specific channels/instances
- viewing logs
- listing core files in the runtime tree
- generating a backtrace for the newest or selected core file
- collecting incident bundles
- running the root-only headless healthcheck
- waiting for login-ready state after restart
## Examples
@@ -56,12 +67,90 @@ Show current unit state:
metinctl status
```
Show a quick operational summary:
```bash
metinctl summary
```
Show declared ports and whether they are currently listening:
```bash
metinctl ports --live
```
Verify that enabled client-visible public channels are active and listening:
```bash
metinctl public-ready
```
Show recent real auth failures and skip smoke-test logins:
```bash
metinctl auth-failures
```
Show recent auth success/failure flow:
```bash
metinctl auth-activity
```
Show only recent auth failures including smoke tests:
```bash
metinctl auth-activity --status failure --include-smoke
```
Show auth activity grouped by IP:
```bash
metinctl auth-ips
```
Show the latest runtime errors collected from all `syserr.log` files:
```bash
metinctl recent-errors
```
Show the most repeated runtime errors in the last 24 hours:
```bash
metinctl error-summary
```
Include smoke-test failures too:
```bash
metinctl auth-failures --include-smoke
```
Show recent login sessions from `log.loginlog2`:
```bash
metinctl sessions
```
Show only sessions that still have no recorded logout:
```bash
metinctl sessions --active-only
```
Show stale open sessions older than 30 minutes:
```bash
metinctl session-audit
```
Use a different stale threshold:
```bash
metinctl session-audit --stale-minutes 10
```
Restart only channel 1 cores:
```bash
@@ -80,10 +169,22 @@ Tail auth logs:
metinctl logs auth -n 200 -f
```
Run the end-to-end healthcheck:
Run the deeper end-to-end healthcheck:
```bash
metinctl healthcheck
metinctl healthcheck --mode full
```
Run the lighter readiness probe:
```bash
metinctl healthcheck --mode ready
```
Wait until a restarted stack is login-ready:
```bash
metinctl wait-ready
```
List core files currently present in the runtime tree:
@@ -92,6 +193,18 @@ List core files currently present in the runtime tree:
metinctl cores
```
Generate a backtrace for the newest core file:
```bash
metinctl backtrace
```
Generate a backtrace for one specific core file:
```bash
metinctl backtrace --core channels/channel1/core1/core.2255450
```
Collect an incident bundle with logs, unit status, port state and repository revisions:
```bash
@@ -113,6 +226,7 @@ It also reconciles enabled game instance units against the selected channels:
- selected game units are enabled
- stale game units are disabled
- if `--restart` is passed, stale game units are disabled with `--now`
- installs now refuse an auth/internal-only channel selection unless you pass `--allow-internal-only`
This makes channel enablement declarative instead of depending on whatever happened to be enabled previously.
@@ -121,6 +235,7 @@ This makes channel enablement declarative instead of depending on whatever happe
The Debian deployment now also installs:
- `/usr/local/sbin/metin-collect-incident`
- `/usr/local/sbin/metin-core-backtrace`
The collector creates a timestamped bundle under:
@@ -134,7 +249,16 @@ Each bundle contains:
- listener state from `ss -ltnp`
- tailed runtime `syslog.log` and `syserr.log` files
- metadata for any `core*` files found under `runtime/server/channels`
- metadata for the executable inferred for each core file
If you call it with `--include-cores`, matching core files are copied into the bundle as well.
If you call it with `--include-cores`, matching core files are copied into the bundle as well. In the same mode, the inferred executable files are copied too, so a later redeploy does not destroy your ability to symbolicate the crash with the original binary snapshot.
The runtime units now also declare `LimitCORE=infinity`, so after the next service restart the processes are allowed to emit core dumps when the host kernel/core policy permits it.
For quick manual crash triage outside the incident bundle flow, use:
```bash
metinctl backtrace
```
It defaults to the newest core file under the runtime tree, infers the executable path, and uses `gdb` or `lldb` when present on the host. If no supported debugger is installed, it still prints file/readelf metadata for the core and executable. If the current executable is newer than the core file, the helper prints an explicit warning because the backtrace may no longer match the crashed binary.

View File

@@ -1,424 +0,0 @@
# Server runtime audit
Engineer-to-engineer writeup of what the VPS `mt2.jakubkadlec.dev` is actually
running as of 2026-04-14. Existing docs under `docs/` describe the intended
layout (`debian-runtime.md`, `database-bootstrap.md`, `config-and-secrets.md`);
this document is a ground-truth snapshot from a live recon session, with PIDs,
paths, versions and surprises.
Companion: `docs/server-topology.md` for the ASCII diagram and port table.
## TL;DR
- Only one metin binary is alive right now: the **`db`** helper on port `9000`
(PID `1788997` at audit time, cwd
`/home/mt2.jakubkadlec.dev/metin/runtime/server/channels/db`).
- **`game_auth` and all `channel*_core*` processes are NOT running.** The listing
in the original prompt (auth `:11000/12000`, channel1 cores `:11011/12011`
etc.) reflects *intended* state from the systemd units, not the current live
process table. `ss -tlnp` only shows `0.0.0.0:9000` for m2.
- The game/auth binaries are **not present on disk either**. Only
`share/bin/db` exists; there is no `share/bin/game_auth` and no
`share/bin/channel*_core*`. Those channels cannot start even if requested.
- The `db` unit is currently **flapping / crash-looping**. `systemctl` reports
`deactivating (stop-sigterm)`; syserr.log shows repeated
`Connection reset by peer` from client peers (auth/game trying to reconnect
is the usual culprit, but here nobody is connecting — cause needs
verification). Two fresh `core.<pid>` files (97 MB each) sit in the db
channel dir from 13:24 and 13:25 today.
- Orchestration is **pure systemd**, not the upstream `start.py` / tmux setup.
The README still documents `start.py`, so the README is stale for the Debian
VPS; `deploy/systemd/` + `docs/debian-runtime.md` are authoritative.
- MariaDB 11.8.6 is the backing store on `127.0.0.1:3306`. The DB user the
stack is configured to use is `bootstrap` (from `share/conf/db.txt` /
`game.txt`). The actual password is injected via `/etc/metin/metin.env`,
which is `root:root 600` and intentionally unreadable by the runtime user
inspector account.
## Host
- Hostname: `vmi3229987` (Contabo), public name `mt2.jakubkadlec.dev`.
- OS: Debian 13 (trixie).
- MariaDB: `mariadbd` 11.8.6, PID `103624`, listening on `127.0.0.1:3306`.
- All metin services run as the unprivileged user
`mt2.jakubkadlec.dev:mt2.jakubkadlec.dev`.
- Runtime root: `/home/mt2.jakubkadlec.dev/metin/runtime/server` (755 MB across
`channels/`, 123 MB across `share/`, total metin workspace on the box
~1.7 GB).
## Processes currently alive
From `ps auxf` + `ss -tlnp` at audit time:
```
mysql 103624 /usr/sbin/mariadbd — 127.0.0.1:3306
mt2.j+ 1788997 /home/.../channels/db/db — 0.0.0.0:9000
```
No other m2 binaries show up. `ps` has **zero** matches for `game_auth`,
`channel1_core1`, `channel1_core2`, `channel1_core3`, `channel99_core1`.
Per-process inspection:
| PID | cwd | exe (resolved) | fds of interest |
| ------- | ----------------------------------------------- | ------------------------------------------------- | --------------- |
| 1788997 | `.../runtime/server/channels/db` | `.../share/bin/db` (via `./db` symlink) | fd 3→syslog.log, fd 4→syserr.log, fd 11 TCP `*:9000`, fd 17 `[eventpoll]` (epoll fdwatch) |
The `db` symlink inside the channel dir resolves to `../../share/bin/db`,
which is an `ELF 64-bit LSB pie executable, x86-64, dynamically linked,
BuildID fc049d0f..., not stripped`. Build identifier from
`channels/db/VERSION.txt`: **`db revision: b2b037f-dirty`** — the dirty tag is
a red flag, the build wasn't from a clean checkout of `m2dev-server-src`.
The `usage.txt` in the same directory shows hourly heartbeat rows with
`| 0 | 0 |` since 2026-04-13 21:00 (the "sessions / active" columns are
stuck at zero — consistent with no game channels being connected).
## Binaries actually present on disk
```
/home/mt2.jakubkadlec.dev/metin/runtime/server/share/bin/
├── db ← present, used
└── game ← present (shared game binary, but not launched under any
instance name that the systemd generator expects)
```
What is NOT present:
- `share/bin/game_auth`
- `share/bin/channel1_core1`, `channel1_core2`, `channel1_core3`
- `share/bin/channel99_core1`
The `metin-game-instance-start` helper (`/usr/local/libexec/...`) is a bash
wrapper that `cd`s into `channels/<channel>/<core>/` and execs `./<instance>`,
e.g. `./channel1_core1`. Those per-instance binaries don't exist yet. The
channel dirs themselves (`channel1/core1/`, etc.) already contain the
scaffolding (`CONFIG`, `conf`, `data`, `log`, `mark`, `package`,
`p2p_packet_info.txt`, `packet_info.txt`, `syserr.log`, `syslog.log`,
`version.txt`), but `version.txt` says `game revision: unknown` and the
per-instance executable file is missing. The log directory has a single
stale `syslog_2026-04-13.log`.
Interpretation: the deploy pipeline that builds `m2dev-server-src` and drops
instance binaries into `share/bin/` has not yet been run (or has not been
re-run since the tree was laid out on 2026-04-13). Once Jakub's
`debian-foundation` build produces per-instance symlinked/hardlinked
binaries, the `metin-game@*` units should come up automatically on the next
`systemctl restart metin-server`.
## How things are started
All orchestration goes through systemd units under `/etc/systemd/system/`,
installed from `deploy/systemd/` via `deploy/systemd/install_systemd.py`.
Unit list and roles:
| Unit | Type | Role |
| ----------------------------------------- | -------- | -------------------------------------------- |
| `metin-server.service` | oneshot | top-level grouping, `Requires=mariadb.service`. `ExecStart=/bin/true`, `RemainAfterExit=yes`. All sub-units are `PartOf=metin-server.service` so restarting `metin-server` cycles everything. |
| `metin-db.service` | simple | launches `.../channels/db/db` as runtime user, `Restart=on-failure`, `LimitCORE=infinity`, env file `/etc/metin/metin.env`. |
| `metin-db-ready.service` | oneshot | runs `/usr/local/libexec/metin-wait-port 127.0.0.1 9000 30` — gate that blocks auth+game until the DB socket is listening. |
| `metin-auth.service` | simple | launches `.../channels/auth/game_auth`. Requires db-ready. |
| `metin-game@channel1_core1..3.service` | template | each runs `/usr/local/libexec/metin-game-instance-start <instance>` which execs `./<instance>` in that channel dir. |
| `metin-game@channel99_core1.service` | template | same, for channel 99. |
Dependency chain:
```
mariadb.service
metin-db.service ──► metin-db-ready.service ──► metin-auth.service
└► metin-game@*.service
metin-server.service (oneshot umbrella)
```
All units have `PartOf=metin-server.service`, `Restart=on-failure`,
`LimitNOFILE=65535`, `LimitCORE=infinity`. None run in Docker. None use tmux,
screen or the upstream `start.py`. **The upstream `start.py` / `stop.py` in
the repo are NOT wired up on this host** and should be treated as FreeBSD-era
legacy.
The per-instance launcher `/usr/local/libexec/metin-game-instance-start`
(installed by `install_systemd.py`) is:
```bash
#!/usr/bin/env bash
set -euo pipefail
instance="${1:?missing instance name}"
root_dir="/home/mt2.jakubkadlec.dev/metin/runtime/server/channels"
channel_dir="${instance%_*}" # e.g. channel1 from channel1_core2
core_dir="${instance##*_}" # e.g. core2
workdir="${root_dir}/${channel_dir}/${core_dir}"
cd "$workdir"
exec "./${instance}"
```
Notes:
- the `%_*` / `##*_` parse is brittle — an instance name with more than one
underscore would misbehave. For current naming (`channelN_coreM`) it works.
- the helper does not redirect stdout/stderr; both go to the journal via
systemd.
## Config files the binaries actually read
All m2 config files referenced by the running/installed stack, resolved to
their real path on disk:
| Config file | Read by | Purpose |
| ------------------------------------------------------------------------ | ------------- | --------------------------------------------------- |
| `share/conf/db.txt` | `db` | SQL hosts, BIND_PORT=9000, item id range, hotbackup |
| `share/conf/game.txt` | game cores | DB_ADDR=127.0.0.1, DB_PORT=9000, SQL creds, flags |
| `share/conf/CMD` | game cores | in-game command ACL (notice, warp, item, …) |
| `share/conf/item_proto.txt`, `mob_proto.txt`, `item_names*.txt`, `mob_names*.txt` | both db and game | static content tables |
| `channels/db/conf` (symlink → `share/conf`) | `db` | every db channel looks into this flat conf tree |
| `channels/db/data` (symlink → `share/data`) | `db`/`game` | mob/pc/dungeon/spawn data |
| `channels/db/locale` (symlink → `share/locale`) | all | locale assets |
| `channels/auth/CONFIG` | `game_auth` | `HOSTNAME: auth`, `CHANNEL: 1`, `PORT: 11000`, `P2P_PORT: 12000`, `AUTH_SERVER: master` |
| `channels/channel1/core1/CONFIG` | core1 | `HOSTNAME: channel1_1`, `CHANNEL: 1`, `PORT: 11011`, `P2P_PORT: 12011`, `MAP_ALLOW: 1 4 5 6 3 23 43 112 107 67 68 72 208 302 304` |
| `channels/channel1/core2/CONFIG` | core2 | `PORT: 11012`, `P2P_PORT: 12012` |
| `channels/channel1/core3/CONFIG` | core3 | `PORT: 11013`, `P2P_PORT: 12013` |
| `channels/channel99/core1/CONFIG` | ch99 core1 | `HOSTNAME: channel99_1`, `CHANNEL: 99`, `PORT: 11991`, `P2P_PORT: 12991`, `MAP_ALLOW: 113 81 100 101 103 105 110 111 114 118 119 120 121 122 123 124 125 126 127 128 181 182 183 200` |
| `/etc/metin/metin.env` | all systemd units via `EnvironmentFile=-` | host-local secrets/overrides, root:root mode 600. Contents not readable during this audit. |
Flat `share/conf/db.txt` (verbatim, with bootstrap secrets):
```
WELCOME_MSG = "Database connector is running..."
SQL_ACCOUNT = "127.0.0.1 account bootstrap change-me 0"
SQL_PLAYER = "127.0.0.1 player bootstrap change-me 0"
SQL_COMMON = "127.0.0.1 common bootstrap change-me 0"
SQL_HOTBACKUP= "127.0.0.1 hotbackup bootstrap change-me 0"
TABLE_POSTFIX = ""
BIND_PORT = 9000
CLIENT_HEART_FPS = 60
HASH_PLAYER_LIFE_SEC = 600
BACKUP_LIMIT_SEC = 3600
PLAYER_ID_START = 100
PLAYER_DELETE_LEVEL_LIMIT = 70
PLAYER_DELETE_CHECK_SIMPLE = 1
ITEM_ID_RANGE = 2000000000 2100000000
MIN_LENGTH_OF_SOCIAL_ID = 6
SIMPLE_SOCIALID = 1
```
The `bootstrap` / `change-me` values are git-tracked placeholders.
`config-and-secrets.md` explicitly says these are templates, and real values
are expected to come from `/etc/metin/metin.env`. This works because the
server source re-reads credentials from the environment when injected; verify
by grepping `m2dev-server-src` for the SQL env var names used by `db`/`game`.
(**Open question**: confirm which env var names override the in-file creds;
the audit session couldn't read `metin.env` directly.)
## Database
- Engine: **MariaDB 11.8.6** (`mariadb --version`).
- PID: 103624, listening on `127.0.0.1:3306` only. No external TCP
exposure, no unix socket checked (likely `/run/mysqld/mysqld.sock`).
- Expected databases from `docs/database-bootstrap.md`: `account`, `player`,
`common`, `log`, `hotbackup`.
- Stack-side DB user: `bootstrap` (placeholder in git, real password in
`/etc/metin/metin.env`).
- Could not enumerate actual tables during the audit — both `mysql -uroot`
and `sudo -u mt2.jakubkadlec.dev mariadb` failed (Access denied), since
root uses unix-socket auth for `root@localhost` and the runtime user has
no CLI credentials outside the systemd environment.
- **To inspect the DB read-only:** either run as root with
`sudo mariadb` (unix socket auth — needs confirmation it's enabled), or
open `/etc/metin/metin.env` as root, grab the `bootstrap` password, then
`mariadb -ubootstrap -p account` etc. Do not attempt writes.
## Logging
Every m2 process writes two files in its channel dir, via fd 3 / fd 4:
- `syslog.log` — verbose info stream (rotated by date in some dirs:
`channel1/core1/log/syslog_2026-04-13.log`).
- `syserr.log` — error stream. Look here first on crash.
The `db` channel additionally writes to `syslog.log` (36 MB today, rotating
appears to be manual — there is a `log/` dir with a daily file but the
current `syslog.log` is at the top level) and drops `core.<pid>` ELF cores
into the channel dir on SIGSEGV/SIGABRT because `LimitCORE=infinity` is set.
systemd journal captures stdout/stderr as well, so `journalctl -u metin-db
--since '1 hour ago'` is the fastest way to see startup banners and
`systemd`-observed restarts. Example from this audit:
```
Apr 14 13:26:40 vmi3229987 db[1788997]: Real Server
Apr 14 13:26:40 vmi3229987 db[1788997]: Success ACCOUNT
Apr 14 13:26:40 vmi3229987 db[1788997]: Success COMMON
Apr 14 13:26:40 vmi3229987 db[1788997]: Success HOTBACKUP
Apr 14 13:26:40 vmi3229987 db[1788997]: mysql_real_connect: Lost connection
to server at 'sending authentication information', system error: 104
```
Every `db` start it opens *more than a dozen* AsyncSQL pools ("AsyncSQL:
connected to 127.0.0.1 (reconnect 1)" repeated ~12 times), suggesting a large
per-instance pool size. Worth checking if that needs tuning.
The current `syserr.log` in `channels/db/` is dominated by:
```
[error] [int CPeerBase::Recv()()] socket_read failed Connection reset by peer
[error] [int CClientManager::Process()()] Recv failed
```
which is the peer disconnect path. Since no auth/game peers should be
connecting right now, this is either a leftover from an earlier start or
something else (maybe a healthcheck probe) is touching 9000 and aborting.
See open questions.
## Ports
Live `ss -tlnp` on the VPS (m2-relevant lines only):
| L3:L4 | Who | Exposure |
| ---------------- | ------------ | -------------- |
| `0.0.0.0:9000` | `db` | **INADDR_ANY** — listens on all interfaces. Look at this. |
| `127.0.0.1:3306` | `mariadbd` | localhost only |
Not currently listening (would be if auth/game were up):
- `11000` / `12000` — auth client + p2p
- `11011..11013` / `12011..12013` — channel1 cores + p2p
- `11991` / `12991` — channel99 core1 + p2p
Other listeners on the host (not m2): `:22`, `:2222` (gitea ssh), `:25`
(postfix loopback), `:80/:443` (Caddy), `:3000` (Gitea), `:2019` (Caddy
admin), `:33891` (unknown loopback), `:5355` / `:53` (resolver).
**Firewalling note:** `db` binding to `0.0.0.0:9000` is a concern. In the
normal m2 architecture, `db` only talks to auth/game cores on the same host
and should bind to `127.0.0.1` only. Current binding is set by the
`BIND_PORT = 9000` line in `share/conf/db.txt`, which in this server fork
apparently defaults to `INADDR_ANY`. If the Contabo firewall or iptables/nft
rules don't block 9000 from the outside, this is exposed. **Open question:
verify iptables/nftables on the host, or move `db` to `127.0.0.1` explicitly
in source / config.**
## Data directory layout
All under `/home/mt2.jakubkadlec.dev/metin/runtime/server/share/`:
```
share/
├── bin/ ← compiled binaries (only db + game present today)
├── conf/ ← db.txt, game.txt, CMD, item_proto.txt, mob_proto.txt,
│ item_names_*.txt, mob_names_*.txt (17 locales each)
├── data/ ← DTA/, dungeon/, easterevent/, mob_spawn/, monster/,
│ pc/, pc2/ (27 MB total)
├── locale/ ← 86 MB, per-locale strings + binary quest outputs
├── mark/
└── package/
```
Per-channel scaffolding under `channels/` symlinks `conf`, `data`, `locale`
back into `share/`, so each channel reads from a single canonical content
tree.
## Disk usage footprint
```
/home/mt2.jakubkadlec.dev/metin/ 1.7 G (total metin workspace)
runtime/server/share/ 123 M
runtime/server/share/data/ 27 M
runtime/server/share/locale/ 86 M
runtime/server/channels/ 755 M
channels/db/core.178508{2,8} ~194 M (two 97 MB coredumps)
channels/db/syslog.log 36 M (grows fast)
```
Core dumps dominate the channel dir footprint right now. Cleaning up old
`core.*` files is safe when the db is not actively crashing (and only after
Jakub has looked at them).
## How to restart channel1_core2 cleanly
Pre-flight checklist:
1. Confirm `share/bin/channel1_core2` actually exists on disk — right now it
does **not**, so the instance cannot start. Skip straight to the
"rebuild / redeploy" section in Jakub's `docs/deploy-workflow.md`
before trying.
2. Confirm `metin-db.service` and `metin-auth.service` are `active (running)`
(`systemctl is-active metin-db metin-auth`). If not, fix upstream first —
a clean restart of core2 requires a healthy auth + db.
3. Check that no player is currently online on that core. With `usage.txt`
at 0/0 this is trivially true today, but in prod do
`cat channels/channel1/core2/usage.txt` first.
4. Look at recent logs so you have a baseline:
`journalctl -u metin-game@channel1_core2 -n 50 --no-pager`
Clean restart:
```bash
# on the VPS as root or with sudo
systemctl restart metin-game@channel1_core2.service
systemctl status metin-game@channel1_core2.service --no-pager
journalctl -u metin-game@channel1_core2.service -n 100 --no-pager -f
```
Because the unit is `Type=simple` with `Restart=on-failure`, `systemctl
restart` sends SIGTERM, waits up to `TimeoutStopSec=60`, then brings the
process back up. The binary's own `hupsig()` handler logs the SIGTERM into
`syserr.log` and shuts down gracefully.
Post-restart verification:
```bash
ss -tlnp | grep -E ':(11012|12012)\b' # expect both ports listening
tail -n 30 /home/mt2.jakubkadlec.dev/metin/runtime/server/channels/channel1/core2/syserr.log
```
If the process refuses to stay up (`Restart=on-failure` loops it), **do not**
just bump `RestartSec`; grab the last 200 journal lines and the last 200
syserr lines and open an issue in `metin-server/m2dev-server-src` against
Jakub. Do not edit the unit file ad-hoc on the host.
## Open questions
These are things the audit could not determine without making changes or
getting more access. They need a human operator to resolve.
1. **Who produces the per-instance binaries** (`channel1_core1`,
`channel1_core2`, `channel1_core3`, `channel99_core1`, `game_auth`)?
The deploy flow expects them in `share/bin/` and channel dirs but they
are missing. Is this still hand-built, or is there a make target that
hardlinks `share/bin/game` into each `channel*/core*/<instance>` name?
2. **Why is `db` currently flapping** (`deactivating (stop-sigterm)` in
systemctl, plus two fresh core dumps on 2026-04-14 13:24/13:25 and
dozens of `CPeerBase::Recv()` errors)? Nothing should be connecting to
port 9000 right now.
3. **What the real `metin.env` contains** — specifically, the actual
`bootstrap` DB password, and whether there is a separate admin-page
password override. Audit did not touch `/etc/metin/metin.env`.
4. **Exact override-variable contract** between `share/conf/db.txt`
placeholders and the env file. We need to verify which env var names
the `db`/`game` source actually reads so we know whether the
`change-me` literal is ever used at runtime.
5. **Is `db` intended to bind `0.0.0.0:9000`?** From a defense-in-depth
standpoint it should be `127.0.0.1`. Needs either a source fix or a
host firewall rule. Check current nftables state.
6. **`VERSION.txt` says `db revision: b2b037f-dirty`.** Which tree was this
built from and why "dirty"? Point back at the `m2dev-server-src`
commit and confirm the build artefact is reproducible.
7. **Log rotation**: `channels/db/syslog.log` is already 36 MB today with
nothing connected. There is a `channels/channel1/core1/log/` dated
subdir convention that suggests daily rotation, but `db`'s own syslog
is not rotating. Confirm whether `logrotate` or an in-process rotator
is expected to own this.
8. **Hourly heartbeat in `usage.txt`** comes from where? Every ~1 h a row
is appended — this is probably the `db` backup tick, but confirm it's
not some cron job.
9. **`mysqld`'s live databases**: could not enumerate table names without
credentials. `docs/database-bootstrap.md` lists the expected set;
someone with `metin.env` access should confirm `account`, `player`,
`common`, `log`, `hotbackup` are all present and populated.
10. **Stale README**: top-level `README.md` still documents FreeBSD +
`start.py`. Not urgent, but worth a `docs:` sweep to point readers at
`docs/debian-runtime.md` as the canonical layout.

View File

@@ -1,89 +0,0 @@
# Server topology
Companion diagram + port table for `docs/server-runtime.md`. Describes the
*intended* production layout of the Debian VPS m2 stack. What is live today is
only a subset (see the runtime audit for the actual state).
## ASCII diagram
```
┌─────────────────────────────┐
│ Players │
│ (Metin2.exe + launcher) │
└──────────────┬──────────────┘
│ TCP (11000 auth, 11011..11013
│ channel1, 11991 channel99)
════════════════════════════ mt2.jakubkadlec.dev ════════════════════════════
║ ║
║ ┌──────────────────┐ ┌──────────────────┐ ┌────────────────┐ ║
║ │ metin-auth │ │ metin-game@ch1_c1│ │ metin-game@ │ ║
║ │ (game_auth) │ p2p │ (channel1_core1) │ p2p │ ch99_c1 │ ║
║ │ :11000 client │◄────►│ :11011 client │◄────►│ :11991 client │ ║
║ │ :12000 p2p │ │ :12011 p2p │ │ :12991 p2p │ ║
║ └────────┬─────────┘ └────────┬─────────┘ └────────┬───────┘ ║
║ │ │ metin-game@ch1_c2/c3 │ ║
║ │ │ :11012/12012 :11013/12013 ║
║ │ │ │ ║
║ │ DB proxy/cache layer │ │ ║
║ └────────────┐ ┌─────────┴─────────────┐ ┌────────┘ ║
║ ▼ ▼ ▼ ▼ ║
║ ┌──────────────────────────────────┐ ║
║ │ metin-db (db) │ ║
║ │ bind 0.0.0.0:9000 │ ║
║ │ (gated by metin-db-ready.svc) │ ║
║ └──────────────┬───────────────────┘ ║
║ │ SQL ║
║ ▼ ║
║ ┌──────────────────────────────────┐ ║
║ │ mariadbd 11.8.6 │ ║
║ │ 127.0.0.1:3306 │ ║
║ │ DBs: account, player, common, │ ║
║ │ log, hotbackup │ ║
║ └──────────────────────────────────┘ ║
║ ║
║ systemd orchestration: ║
║ metin-server.service (oneshot umbrella, PartOf everything) ║
║ ├─ Requires mariadb.service ║
║ └─ metin-db → metin-db-ready → metin-auth + metin-game@<instance> ║
║ ║
║ Secrets: ║
║ /etc/metin/metin.env (root:root 600, EnvironmentFile= for all units) ║
║ ║
════════════════════════════════════════════════════════════════════════════
```
## Process / port table
| Process name | systemd unit | Client port | P2P port | Binds to | Role | Config file |
| ---------------- | ------------------------------------- | ----------- | -------- | ---------- | ----------------------------------------------------- | ----------------------------------------------------- |
| `db` | `metin-db.service` | 9000 | — | `0.0.0.0` | DB proxy/cache; talks to MariaDB, serves auth+game | `share/conf/db.txt` (+ `/etc/metin/metin.env`) |
| `game_auth` | `metin-auth.service` | 11000 | 12000 | (default) | account login, token handoff to channels | `channels/auth/CONFIG` |
| `channel1_core1` | `metin-game@channel1_core1.service` | 11011 | 12011 | (default) | ch1 core1, MAP_ALLOW 1 4 5 6 3 23 43 112 107 67 68 72 208 302 304 | `channels/channel1/core1/CONFIG` |
| `channel1_core2` | `metin-game@channel1_core2.service` | 11012 | 12012 | (default) | ch1 core2, same channel different core | `channels/channel1/core2/CONFIG` |
| `channel1_core3` | `metin-game@channel1_core3.service` | 11013 | 12013 | (default) | ch1 core3 | `channels/channel1/core3/CONFIG` |
| `channel99_core1`| `metin-game@channel99_core1.service` | 11991 | 12991 | (default) | ch99 core1, event/test channel. MAP_ALLOW 113 81 100 101 103 105 110 111 114 118 119 120 121 122 123 124 125 126 127 128 181 182 183 200 | `channels/channel99/core1/CONFIG` |
| `mariadbd` | `mariadb.service` (distro pkg) | 3306 | — | `127.0.0.1`| relational store | `/etc/mysql/mariadb.conf.d/*` |
Supporting, not a process:
| Unit | Type | Purpose |
| -------------------------- | ------- | ---------------------------------------------------------------- |
| `metin-server.service` | oneshot | umbrella. Restarting it cycles all sub-units via `PartOf=`. |
| `metin-db-ready.service` | oneshot | `metin-wait-port 127.0.0.1 9000 30` — gates auth+game on db up. |
## Data flow
1. Player connects to `mt2.jakubkadlec.dev:11000` (auth).
2. `game_auth` authenticates against `account` via `db` (`127.0.0.1:9000`)
which proxies to MariaDB.
3. `game_auth` hands the player a token and the channel pick.
4. Player connects to e.g. `:11011` (channel1 core1). The core reads player
state via `db` from `player` + `common`, loads maps per `MAP_ALLOW` and
quest binaries from `share/locale/`.
5. Cross-core traffic (channel switch, whisper, guild) uses the P2P ports
(`12000`, `12011`..`12013`, `12991`) on loopback.
6. `db` persists to MariaDB asynchronously (`AsyncSQL` pools, batch-writes
at `SAVE_EVENT_SECOND_CYCLE=180`s from `game.txt`).
7. `log` DB receives audit/event rows (item trades, combat, etc.) through
a dedicated `AsyncSQL` connection.