From 100dd2b87ba3e5f37784a1392b2fdf1f4b0edbfa Mon Sep 17 00:00:00 2001 From: rtw1x1 Date: Tue, 20 Jan 2026 21:23:31 +0000 Subject: [PATCH] fix: Optimized UTF8, BiDi, Debug --- extern/include/utf8.h | 475 +++++++++++++++++++++--------- src/EterBase/Debug.cpp | 217 +++++++++++--- src/EterLib/GrpTextInstance.cpp | 240 ++++++++------- src/EterLocale/Arabic.cpp | 180 ++++++----- src/UserInterface/PythonSkill.cpp | 86 ++++-- 5 files changed, 817 insertions(+), 381 deletions(-) diff --git a/extern/include/utf8.h b/extern/include/utf8.h index 84f50d2..a7ce9d6 100644 --- a/extern/include/utf8.h +++ b/extern/include/utf8.h @@ -1,9 +1,11 @@ #pragma once #include +#include #include #include #include #include +#include #include @@ -37,6 +39,161 @@ constexpr size_t ARABIC_SHAPING_SAFETY_MARGIN_RETRY = 64; #define BIDI_LOG_SIMPLE(msg) ((void)0) #endif +// ============================================================================ +// OPTIMIZED CHARACTER CLASSIFICATION (Lookup Tables) +// ============================================================================ +// Replaces expensive GetStringTypeW() syscalls with O(1) table lookups. +// Tables are initialized once on first use (thread-safe via static init). + +namespace BiDiTables +{ + // Character property flags + enum ECharFlags : uint8_t + { + CF_NONE = 0, + CF_ALPHA = 0x01, // Alphabetic (Latin, Cyrillic, Greek, etc.) + CF_DIGIT = 0x02, // Numeric digit (0-9, Arabic-Indic, etc.) + CF_RTL = 0x04, // RTL script (Arabic, Hebrew) + CF_ARABIC = 0x08, // Arabic letter that needs shaping + }; + + // Main character flags table (65536 entries for BMP) + inline const uint8_t* GetCharFlagsTable() + { + static uint8_t s_table[65536] = {0}; + static bool s_initialized = false; + + if (!s_initialized) + { + // ASCII digits + for (int i = '0'; i <= '9'; ++i) + s_table[i] |= CF_DIGIT; + + // ASCII letters + for (int i = 'A'; i <= 'Z'; ++i) + s_table[i] |= CF_ALPHA; + for (int i = 'a'; i <= 'z'; ++i) + s_table[i] |= CF_ALPHA; + + // Latin Extended-A/B (0x0100-0x024F) + for (int i = 0x0100; i <= 0x024F; ++i) + s_table[i] |= CF_ALPHA; + + // Latin Extended Additional (0x1E00-0x1EFF) + for (int i = 0x1E00; i <= 0x1EFF; ++i) + s_table[i] |= CF_ALPHA; + + // Greek (0x0370-0x03FF) + for (int i = 0x0370; i <= 0x03FF; ++i) + s_table[i] |= CF_ALPHA; + + // Cyrillic (0x0400-0x04FF) + for (int i = 0x0400; i <= 0x04FF; ++i) + s_table[i] |= CF_ALPHA; + + // Hebrew (0x0590-0x05FF) - RTL + for (int i = 0x0590; i <= 0x05FF; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + + // Arabic (0x0600-0x06FF) - RTL + needs shaping + for (int i = 0x0600; i <= 0x06FF; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + // Arabic letters that need shaping (0x0621-0x064A) + for (int i = 0x0621; i <= 0x064A; ++i) + s_table[i] |= CF_ARABIC; + + // Arabic Supplement (0x0750-0x077F) + for (int i = 0x0750; i <= 0x077F; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + + // Arabic Extended-A (0x08A0-0x08FF) + for (int i = 0x08A0; i <= 0x08FF; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + + // Arabic-Indic digits (0x0660-0x0669) + for (int i = 0x0660; i <= 0x0669; ++i) + s_table[i] |= CF_DIGIT; + + // Extended Arabic-Indic digits (0x06F0-0x06F9) + for (int i = 0x06F0; i <= 0x06F9; ++i) + s_table[i] |= CF_DIGIT; + + // Arabic Presentation Forms-A (0xFB50-0xFDFF) - already shaped + for (int i = 0xFB50; i <= 0xFDFF; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + + // Arabic Presentation Forms-B (0xFE70-0xFEFF) - already shaped + for (int i = 0xFE70; i <= 0xFEFF; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + + // Hebrew presentation forms (0xFB1D-0xFB4F) + for (int i = 0xFB1D; i <= 0xFB4F; ++i) + s_table[i] |= CF_RTL | CF_ALPHA; + + // CJK (0x4E00-0x9FFF) - treat as LTR alpha + for (int i = 0x4E00; i <= 0x9FFF; ++i) + s_table[i] |= CF_ALPHA; + + // Hangul (0xAC00-0xD7AF) + for (int i = 0xAC00; i <= 0xD7AF; ++i) + s_table[i] |= CF_ALPHA; + + // RTL marks and controls + s_table[0x200F] |= CF_RTL; // RLM + s_table[0x061C] |= CF_RTL; // ALM + for (int i = 0x202B; i <= 0x202E; ++i) + s_table[i] |= CF_RTL; // RLE/RLO/PDF/LRE/LRO + for (int i = 0x2066; i <= 0x2069; ++i) + s_table[i] |= CF_RTL; // Isolates + + s_initialized = true; + } + + return s_table; + } + + // Fast O(1) character classification functions + inline bool IsRTL(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_RTL; } + inline bool IsAlpha(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ALPHA; } + inline bool IsDigit(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_DIGIT; } + inline bool IsArabicLetter(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ARABIC; } + inline bool IsStrongLTR(wchar_t ch) + { + uint8_t flags = GetCharFlagsTable()[(uint16_t)ch]; + // Strong LTR = (Alpha OR Digit) AND NOT RTL + return (flags & (CF_ALPHA | CF_DIGIT)) && !(flags & CF_RTL); + } +} + +// ============================================================================ +// BUFFER POOLING (Avoid per-call allocations) +// ============================================================================ + +namespace BiDiBuffers +{ + struct TBufferPool + { + std::vector shaped; + + void EnsureCapacity(size_t n) + { + size_t needed = n * 2 + 64; + if (shaped.capacity() < needed) shaped.reserve(needed); + } + + void Clear() + { + shaped.clear(); + } + }; + + inline TBufferPool& Get() + { + thread_local static TBufferPool s_pool; + return s_pool; + } +} + // ============================================================================ // UNICODE VALIDATION HELPERS // ============================================================================ @@ -65,7 +222,70 @@ static inline void SanitizeWideString(std::wstring& ws) ws.end()); } +// ============================================================================ +// OPTIMIZED UTF-8 CONVERSION +// ============================================================================ +// Fast paths for ASCII-only text (very common in games). +// Falls back to Windows API for non-ASCII. + +namespace Utf8Fast +{ + // Check if string is pure ASCII (no bytes >= 128) + inline bool IsAsciiOnly(const char* s, size_t len) + { + // Process 8 bytes at a time for speed + const char* end = s + len; + const char* aligned_end = s + (len & ~7); + + while (s < aligned_end) + { + // Check 8 bytes at once using bitwise OR + uint64_t chunk; + memcpy(&chunk, s, 8); + if (chunk & 0x8080808080808080ULL) + return false; + s += 8; + } + + // Check remaining bytes + while (s < end) + { + if ((unsigned char)*s >= 128) + return false; + ++s; + } + return true; + } + + // Fast ASCII-only conversion (no API calls) + inline std::wstring AsciiToWide(const char* s, size_t len) + { + std::wstring out; + out.reserve(len); + for (size_t i = 0; i < len; ++i) + out.push_back(static_cast(static_cast(s[i]))); + return out; + } + + // Fast ASCII-only conversion (no API calls) + inline std::string WideToAscii(const wchar_t* ws, size_t len) + { + std::string out; + out.reserve(len); + for (size_t i = 0; i < len; ++i) + { + wchar_t ch = ws[i]; + if (ch < 128) + out.push_back(static_cast(ch)); + else + return ""; // Not pure ASCII, caller should use full conversion + } + return out; + } +} + // UTF-8 -> UTF-16 (Windows wide) +// OPTIMIZED: Fast path for ASCII-only strings (avoids 2x API calls) inline std::wstring Utf8ToWide(const std::string& s) { if (s.empty()) @@ -75,9 +295,14 @@ inline std::wstring Utf8ToWide(const std::string& s) if (s.size() > MAX_TEXT_LENGTH || s.size() > INT_MAX) { BIDI_LOG("Utf8ToWide: String too large (%zu bytes)", s.size()); - return L""; // String too large + return L""; } + // Fast path: ASCII-only strings (very common in games) + if (Utf8Fast::IsAsciiOnly(s.data(), s.size())) + return Utf8Fast::AsciiToWide(s.data(), s.size()); + + // Slow path: Use Windows API for non-ASCII int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), (int)s.size(), nullptr, 0); if (wlen <= 0) { @@ -90,29 +315,31 @@ inline std::wstring Utf8ToWide(const std::string& s) if (written <= 0 || written != wlen) { BIDI_LOG("Utf8ToWide: Second conversion failed (written=%d, expected=%d, error=%d)", written, wlen, GetLastError()); - return L""; // Conversion failed unexpectedly + return L""; } - // Optional: Sanitize to remove invalid Unicode codepoints (surrogates, non-characters) - // Uncomment if you want strict validation - // SanitizeWideString(out); - return out; } // Convenience overload for char* +// OPTIMIZED: Fast path for ASCII-only strings inline std::wstring Utf8ToWide(const char* s) { if (!s || !*s) return L""; + size_t len = strlen(s); + + // Fast path: ASCII-only strings + if (Utf8Fast::IsAsciiOnly(s, len)) + return Utf8Fast::AsciiToWide(s, len); + + // Slow path: Use Windows API int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, nullptr, 0); if (wlen <= 0) return L""; - // wlen includes terminating NUL std::wstring out(wlen, L'\0'); - int written = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, out.data(), wlen); if (written <= 0 || written != wlen) { @@ -124,13 +351,11 @@ inline std::wstring Utf8ToWide(const char* s) if (!out.empty() && out.back() == L'\0') out.pop_back(); - // Optional: Sanitize to remove invalid Unicode codepoints - // SanitizeWideString(out); - return out; } // UTF-16 (Windows wide) -> UTF-8 +// OPTIMIZED: Fast path for ASCII-only strings inline std::string WideToUtf8(const std::wstring& ws) { if (ws.empty()) @@ -138,8 +363,23 @@ inline std::string WideToUtf8(const std::wstring& ws) // Validate size limits (prevent DoS and INT_MAX overflow) if (ws.size() > MAX_TEXT_LENGTH || ws.size() > INT_MAX) - return ""; // String too large + return ""; + // Fast path: Check if all characters are ASCII + bool isAscii = true; + for (size_t i = 0; i < ws.size() && isAscii; ++i) + isAscii = (ws[i] < 128); + + if (isAscii) + { + std::string out; + out.reserve(ws.size()); + for (size_t i = 0; i < ws.size(); ++i) + out.push_back(static_cast(ws[i])); + return out; + } + + // Slow path: Use Windows API for non-ASCII int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, ws.data(), (int)ws.size(), nullptr, 0, nullptr, nullptr); if (len <= 0) return ""; @@ -149,7 +389,7 @@ inline std::string WideToUtf8(const std::wstring& ws) if (written <= 0 || written != len) { BIDI_LOG("WideToUtf8: Conversion failed (written=%d, expected=%d, error=%d)", written, len, GetLastError()); - return ""; // Conversion failed + return ""; } return out; } @@ -169,59 +409,22 @@ inline std::string WideToUtf8(const wchar_t* ws) enum class EBidiDir { LTR, RTL }; enum class ECharDir : unsigned char { Neutral, LTR, RTL }; -struct TBidiRun -{ - EBidiDir dir; - std::vector text; // logical order -}; - +// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls static inline bool IsRTLCodepoint(wchar_t ch) { - // Directional marks / isolates / embeddings that affect bidi - if (ch == 0x200F || ch == 0x061C) return true; // RLM, ALM - if (ch >= 0x202B && ch <= 0x202E) return true; // RLE/RLO/PDF/LRE/LRO - if (ch >= 0x2066 && ch <= 0x2069) return true; // isolates - - // Hebrew + Arabic blocks (BMP) - if (ch >= 0x0590 && ch <= 0x08FF) return true; - - // Presentation forms - if (ch >= 0xFB1D && ch <= 0xFDFF) return true; - if (ch >= 0xFE70 && ch <= 0xFEFF) return true; - - return false; + return BiDiTables::IsRTL(ch); } +// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls static inline bool IsStrongAlpha(wchar_t ch) { - // Use thread-local cache for BMP (Thread safety) - thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false - unsigned char& v = cache[(unsigned short)ch]; - if (v == 1) return true; - if (v == 2) return false; - - WORD type = 0; - bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_ALPHA); - v = ok ? 1 : 2; - return ok; + return BiDiTables::IsAlpha(ch); } +// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls static inline bool IsDigit(wchar_t ch) { - // Fast path for ASCII digits (90%+ of digit checks) - if (ch >= L'0' && ch <= L'9') - return true; - - // For non-ASCII, use cache (Arabic-Indic digits, etc.) - thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false - unsigned char& v = cache[(unsigned short)ch]; - if (v == 1) return true; - if (v == 2) return false; - - WORD type = 0; - bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_DIGIT); - v = ok ? 1 : 2; - return ok; + return BiDiTables::IsDigit(ch); } static inline bool IsNameTokenPunct(wchar_t ch) @@ -257,12 +460,10 @@ static inline bool IsNameTokenPunct(wchar_t ch) } } -// Check RTL first to avoid classifying Arabic as LTR +// Optimized: O(1) lookup - Check RTL first to avoid classifying Arabic as LTR static inline bool IsStrongLTR(wchar_t ch) { - if (IsRTLCodepoint(ch)) - return false; - return IsStrongAlpha(ch) || IsDigit(ch); + return BiDiTables::IsStrongLTR(ch); } static inline bool HasStrongLTRNeighbor(const wchar_t* s, int n, int i) @@ -561,33 +762,29 @@ static std::vector BuildVisualBidiText_Tagless(const wchar_t* s, int n, if (!s || n <= 0) return {}; + // Use buffer pool to avoid per-call allocations + BiDiBuffers::TBufferPool& buffers = BiDiBuffers::Get(); + buffers.EnsureCapacity((size_t)n); + // 1) base direction EBidiDir base = forceRTL ? EBidiDir::RTL : DetectBaseDir_FirstStrong(s, n); // Pre-compute strong character positions for O(1) neutral resolution TStrongDirCache strongCache(s, n, base); - // 2) split into runs - // Estimate runs based on text length (~1 per 50 chars, min 4) - std::vector runs; - const size_t estimatedRuns = (size_t)std::max(4, n / 50); - runs.reserve(estimatedRuns); - - auto push_run = [&](EBidiDir d) - { - if (runs.empty() || runs.back().dir != d) - runs.push_back(TBidiRun{ d, {} }); - }; - - // start with base so leading neutrals attach predictably - push_run(base); + // 2) split into runs - use a more efficient approach + // Instead of TBidiRun with vectors, use start/end indices + struct TRunInfo { int start; int end; EBidiDir dir; }; + thread_local static std::vector s_runs; + s_runs.clear(); + s_runs.reserve((size_t)std::max(4, n / 50)); EBidiDir lastStrong = base; + EBidiDir currentRunDir = base; + int runStart = 0; for (int i = 0; i < n; ++i) { - wchar_t ch = s[i]; - EBidiDir d; ECharDir cd = GetCharDirSmart(s, n, i); @@ -607,98 +804,84 @@ static std::vector BuildVisualBidiText_Tagless(const wchar_t* s, int n, d = ResolveNeutralDir(s, n, i, base, lastStrong, &strongCache); } -#ifdef DEBUG_BIDI - if (i < 50) // Only log first 50 chars to avoid spam + // Start a new run if direction changes + if (d != currentRunDir) { - BIDI_LOG("Char[%d] U+%04X '%lc' → CharDir=%s, RunDir=%s", - i, (unsigned int)ch, (ch >= 32 && ch < 127) ? ch : L'?', - cd == ECharDir::RTL ? "RTL" : (cd == ECharDir::LTR ? "LTR" : "Neutral"), - d == EBidiDir::RTL ? "RTL" : "LTR"); + if (i > runStart) + s_runs.push_back({runStart, i, currentRunDir}); + runStart = i; + currentRunDir = d; } -#endif - - push_run(d); - runs.back().text.push_back(ch); } + // Push final run + if (n > runStart) + s_runs.push_back({runStart, n, currentRunDir}); - // 3) shape RTL runs in logical order (Arabic shaping) - for (auto& r : runs) + // 3) shape RTL runs using pooled buffer + buffers.shaped.clear(); + + auto shapeRun = [&](int start, int end) -> std::pair { - if (r.dir != EBidiDir::RTL) - continue; + int len = end - start; + if (len <= 0) + return {nullptr, 0}; - if (r.text.empty()) - continue; + // Check for potential integer overflow + if ((size_t)len > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY) + return {s + start, len}; // Return unshaped - // Check for potential integer overflow before allocation - if (r.text.size() > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY) - { - BIDI_LOG("BuildVisualBidiText: RTL run too large for shaping (%zu chars)", r.text.size()); - continue; // Text too large to process safely - } + size_t neededSize = buffers.shaped.size() + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN; + if (buffers.shaped.capacity() < neededSize) + buffers.shaped.reserve(neededSize); - std::vector shaped(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN, 0); + size_t outStart = buffers.shaped.size(); + buffers.shaped.resize(outStart + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN); + + int outLen = Arabic_MakeShape(const_cast(s + start), len, + buffers.shaped.data() + outStart, + (int)(buffers.shaped.size() - outStart)); - int outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size()); if (outLen <= 0) - { - BIDI_LOG("Arabic_MakeShape FAILED for RTL run of %zu characters", r.text.size()); - BIDI_LOG(" WARNING: This RTL text segment will NOT be displayed!"); - BIDI_LOG(" First few characters: U+%04X U+%04X U+%04X U+%04X", - r.text.size() > 0 ? (unsigned int)r.text[0] : 0, - r.text.size() > 1 ? (unsigned int)r.text[1] : 0, - r.text.size() > 2 ? (unsigned int)r.text[2] : 0, - r.text.size() > 3 ? (unsigned int)r.text[3] : 0); - continue; - } + return {s + start, len}; // Return unshaped on failure - // Retry once if buffer too small - if (outLen >= (int)shaped.size()) - { - shaped.assign(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR_RETRY + ARABIC_SHAPING_SAFETY_MARGIN_RETRY, 0); - outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size()); - if (outLen <= 0) - continue; - // Add error check instead of silent truncation - if (outLen > (int)shaped.size()) - { - BIDI_LOG("Arabic_MakeShape: Buffer still too small after retry (%d > %zu)", outLen, shaped.size()); - // Shaping failed critically, use unshaped text - continue; - } - } + buffers.shaped.resize(outStart + (size_t)outLen); + return {buffers.shaped.data() + outStart, outLen}; + }; - r.text.assign(shaped.begin(), shaped.begin() + outLen); - } - - // 4) produce visual order: - // - reverse RTL runs internally - // - reverse run sequence if base RTL + // 4) produce visual order std::vector visual; visual.reserve((size_t)n); - auto emit_run = [&](const TBidiRun& r) + auto emitRun = [&](const TRunInfo& run) + { + if (run.dir == EBidiDir::RTL) { - if (r.dir == EBidiDir::RTL) + // Shape and reverse RTL runs + std::pair shaped = shapeRun(run.start, run.end); + const wchar_t* ptr = shaped.first; + int len = shaped.second; + if (ptr && len > 0) { - for (int k = (int)r.text.size() - 1; k >= 0; --k) - visual.push_back(r.text[(size_t)k]); + for (int k = len - 1; k >= 0; --k) + visual.push_back(ptr[k]); } - else - { - visual.insert(visual.end(), r.text.begin(), r.text.end()); - } - }; + } + else + { + // LTR runs: copy directly + visual.insert(visual.end(), s + run.start, s + run.end); + } + }; if (base == EBidiDir::LTR) { - for (const auto& r : runs) - emit_run(r); + for (const auto& run : s_runs) + emitRun(run); } else { - for (int i = (int)runs.size() - 1; i >= 0; --i) - emit_run(runs[(size_t)i]); + for (int i = (int)s_runs.size() - 1; i >= 0; --i) + emitRun(s_runs[(size_t)i]); } return visual; @@ -763,7 +946,7 @@ static inline std::vector BuildVisualChatMessage( { // Apply BiDi to message with auto-detection (don't force RTL) // Let the BiDi algorithm detect base direction from first strong character - std::vector msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false); + std::vector msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL); visual.insert(visual.end(), msgVisual.begin(), msgVisual.end()); } visual.push_back(L' '); @@ -787,7 +970,7 @@ static inline std::vector BuildVisualChatMessage( { // Apply BiDi to message with auto-detection (don't force RTL) // Let the BiDi algorithm detect base direction from first strong character - std::vector msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false); + std::vector msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL); visual.insert(visual.end(), msgVisual.begin(), msgVisual.end()); } } diff --git a/src/EterBase/Debug.cpp b/src/EterBase/Debug.cpp index a48dd9f..ef99f7c 100644 --- a/src/EterBase/Debug.cpp +++ b/src/EterBase/Debug.cpp @@ -16,34 +16,93 @@ const DWORD DEBUG_STRING_MAX_LEN = 1024; static int isLogFile = false; HWND g_PopupHwnd = NULL; -// Convert UTF-8 char* -> wide and send to debugger (NO helper function, just a macro) +// ============================================================================ +// OPTIMIZED LOGGING INFRASTRUCTURE +// ============================================================================ + +// Cached timestamp to avoid repeated time()/localtime() syscalls +// Refreshes every ~100ms (good enough for logging, avoids syscall overhead) +struct TCachedTimestamp +{ + DWORD lastUpdateMs = 0; + int month = 0; + int day = 0; + int hour = 0; + int minute = 0; + + void Update() + { + DWORD now = ELTimer_GetMSec(); + // Refresh timestamp every 100ms (not per-call) + if (now - lastUpdateMs > 100) + { + time_t ct = time(0); + struct tm ctm = *localtime(&ct); + month = ctm.tm_mon + 1; + day = ctm.tm_mday; + hour = ctm.tm_hour; + minute = ctm.tm_min; + lastUpdateMs = now; + } + } + + void Format(char* buf, size_t bufSize) const + { + DWORD msec = ELTimer_GetMSec() % 60000; + _snprintf_s(buf, bufSize, _TRUNCATE, "%02d%02d %02d:%02d:%05d :: ", + month, day, hour, minute, (int)msec); + } +}; + +static TCachedTimestamp g_cachedTimestamp; + +// Optimized debug output: Fast path for ASCII strings (avoids Utf8ToWide allocation) #ifdef _DEBUG #define DBG_OUT_W_UTF8(psz) \ do { \ const char* __s = (psz) ? (psz) : ""; \ - std::wstring __w = Utf8ToWide(__s); \ - OutputDebugStringW(__w.c_str()); \ + size_t __len = strlen(__s); \ + if (Utf8Fast::IsAsciiOnly(__s, __len)) { \ + /* ASCII fast path: direct conversion, no allocation */ \ + wchar_t __wbuf[512]; \ + size_t __wlen = (__len < 511) ? __len : 511; \ + for (size_t __i = 0; __i < __wlen; ++__i) \ + __wbuf[__i] = (wchar_t)(unsigned char)__s[__i]; \ + __wbuf[__wlen] = L'\0'; \ + OutputDebugStringW(__wbuf); \ + } else { \ + /* Non-ASCII: use full conversion */ \ + std::wstring __w = Utf8ToWide(__s); \ + OutputDebugStringW(__w.c_str()); \ + } \ } while (0) #else #define DBG_OUT_W_UTF8(psz) do { (void)(psz); } while (0) #endif +// Buffered log file writer +// OPTIMIZATION: Buffered writes with periodic flush instead of per-write fflush() +// - Collects writes in memory buffer +// - Flushes when buffer is full OR every 500ms OR on shutdown +// - Reduces disk I/O from 1000s of syncs to ~2 per second class CLogFile : public CSingleton { public: - CLogFile() : m_fp(NULL) {} + CLogFile() : m_fp(NULL), m_bufferPos(0), m_lastFlushMs(0) {} virtual ~CLogFile() { + Flush(); // Ensure all buffered data is written if (m_fp) fclose(m_fp); - m_fp = NULL; } void Initialize() { m_fp = fopen("log/log.txt", "w"); + m_bufferPos = 0; + m_lastFlushMs = ELTimer_GetMSec(); } void Write(const char* c_pszMsg) @@ -51,22 +110,63 @@ class CLogFile : public CSingleton if (!m_fp) return; - time_t ct = time(0); - struct tm ctm = *localtime(&ct); + // Use cached timestamp (updated every ~100ms) + g_cachedTimestamp.Update(); + char timestamp[32]; + g_cachedTimestamp.Format(timestamp, sizeof(timestamp)); - fprintf(m_fp, "%02d%02d %02d:%02d:%05d :: %s", - ctm.tm_mon + 1, - ctm.tm_mday, - ctm.tm_hour, - ctm.tm_min, - ELTimer_GetMSec() % 60000, - c_pszMsg); + // Calculate total length needed + size_t timestampLen = strlen(timestamp); + size_t msgLen = c_pszMsg ? strlen(c_pszMsg) : 0; + size_t totalLen = timestampLen + msgLen; + // If this write would overflow the buffer, flush first + if (m_bufferPos + totalLen >= BUFFER_SIZE - 1) + Flush(); + + // If message is larger than buffer, write directly (rare case) + if (totalLen >= BUFFER_SIZE - 1) + { + fputs(timestamp, m_fp); + if (c_pszMsg) + fputs(c_pszMsg, m_fp); + fflush(m_fp); + return; + } + + // Append to buffer + memcpy(m_buffer + m_bufferPos, timestamp, timestampLen); + m_bufferPos += timestampLen; + if (msgLen > 0) + { + memcpy(m_buffer + m_bufferPos, c_pszMsg, msgLen); + m_bufferPos += msgLen; + } + + // Periodic flush: every 500ms or when buffer is >75% full + DWORD now = ELTimer_GetMSec(); + if (now - m_lastFlushMs > 500 || m_bufferPos > BUFFER_SIZE * 3 / 4) + Flush(); + } + + void Flush() + { + if (!m_fp || m_bufferPos == 0) + return; + + m_buffer[m_bufferPos] = '\0'; + fputs(m_buffer, m_fp); fflush(m_fp); + m_bufferPos = 0; + m_lastFlushMs = ELTimer_GetMSec(); } protected: + static const size_t BUFFER_SIZE = 8192; // 8KB buffer FILE* m_fp; + char m_buffer[BUFFER_SIZE]; + size_t m_bufferPos; + DWORD m_lastFlushMs; }; static CLogFile gs_logfile; @@ -220,9 +320,50 @@ void Tracef(const char* c_szFormat, ...) LogFile(szBuf); } +// Buffered stderr writer for syserr (same pattern as CLogFile) +// OPTIMIZATION: Reduces fflush(stderr) from every call to every 500ms +static struct TSyserrBuffer +{ + static const size_t BUFFER_SIZE = 4096; + char buffer[BUFFER_SIZE]; + size_t pos = 0; + DWORD lastFlushMs = 0; + + void Write(const char* msg, size_t len) + { + if (pos + len >= BUFFER_SIZE - 1) + Flush(); + + if (len >= BUFFER_SIZE - 1) + { + // Large message: write directly + fwrite(msg, 1, len, stderr); + fflush(stderr); + return; + } + + memcpy(buffer + pos, msg, len); + pos += len; + + DWORD now = ELTimer_GetMSec(); + if (now - lastFlushMs > 500 || pos > BUFFER_SIZE * 3 / 4) + Flush(); + } + + void Flush() + { + if (pos == 0) + return; + fwrite(buffer, 1, pos, stderr); + fflush(stderr); + pos = 0; + lastFlushMs = ELTimer_GetMSec(); + } +} g_syserrBuffer; + void TraceError(const char* c_szFormat, ...) { -//#ifndef _DISTRIBUTE +//#ifndef _DISTRIBUTE char szBuf[DEBUG_STRING_MAX_LEN + 2]; strncpy_s(szBuf, sizeof(szBuf), "SYSERR: ", _TRUNCATE); @@ -243,17 +384,14 @@ void TraceError(const char* c_szFormat, ...) szBuf[sizeof(szBuf) - 1] = '\0'; } - time_t ct = time(0); - struct tm ctm = *localtime(&ct); + // OPTIMIZED: Use cached timestamp instead of time()/localtime() per call + g_cachedTimestamp.Update(); + char timestamp[32]; + g_cachedTimestamp.Format(timestamp, sizeof(timestamp)); - fprintf(stderr, "%02d%02d %02d:%02d:%05d :: %s", - ctm.tm_mon + 1, - ctm.tm_mday, - ctm.tm_hour, - ctm.tm_min, - ELTimer_GetMSec() % 60000, - szBuf + 8); - fflush(stderr); + // OPTIMIZED: Write to buffered stderr instead of fprintf+fflush per call + g_syserrBuffer.Write(timestamp, strlen(timestamp)); + g_syserrBuffer.Write(szBuf + 8, strlen(szBuf + 8)); // Skip "SYSERR: " prefix for stderr #ifdef _DEBUG DBG_OUT_W_UTF8(szBuf); @@ -267,8 +405,7 @@ void TraceError(const char* c_szFormat, ...) void TraceErrorWithoutEnter(const char* c_szFormat, ...) { -//#ifndef _DISTRIBUTE - +//#ifndef _DISTRIBUTE char szBuf[DEBUG_STRING_MAX_LEN]; va_list args; @@ -276,17 +413,14 @@ void TraceErrorWithoutEnter(const char* c_szFormat, ...) _vsnprintf_s(szBuf, sizeof(szBuf), _TRUNCATE, c_szFormat, args); va_end(args); - time_t ct = time(0); - struct tm ctm = *localtime(&ct); + // OPTIMIZED: Use cached timestamp instead of time()/localtime() per call + g_cachedTimestamp.Update(); + char timestamp[32]; + g_cachedTimestamp.Format(timestamp, sizeof(timestamp)); - fprintf(stderr, "%02d%02d %02d:%02d:%05d :: %s", - ctm.tm_mon + 1, - ctm.tm_mday, - ctm.tm_hour, - ctm.tm_min, - ELTimer_GetMSec() % 60000, - szBuf + 8); - fflush(stderr); + // OPTIMIZED: Write to buffered stderr instead of fprintf+fflush per call + g_syserrBuffer.Write(timestamp, strlen(timestamp)); + g_syserrBuffer.Write(szBuf, strlen(szBuf)); #ifdef _DEBUG DBG_OUT_W_UTF8(szBuf); @@ -349,7 +483,7 @@ void OpenLogFile(bool bUseLogFIle) std::filesystem::create_directory("log"); } -//#ifndef _DISTRIBUTE +//#ifndef _DISTRIBUTE _wfreopen(L"log/syserr.txt", L"w", stderr); if (bUseLogFIle) @@ -360,6 +494,13 @@ void OpenLogFile(bool bUseLogFIle) //#endif } +void CloseLogFile() +{ + // Flush all buffered output before shutdown + g_syserrBuffer.Flush(); + CLogFile::Instance().Flush(); +} + void OpenConsoleWindow() { AllocConsole(); diff --git a/src/EterLib/GrpTextInstance.cpp b/src/EterLib/GrpTextInstance.cpp index a33e8e8..214a0a8 100644 --- a/src/EterLib/GrpTextInstance.cpp +++ b/src/EterLib/GrpTextInstance.cpp @@ -238,16 +238,88 @@ void CGraphicTextInstance::Update() } // Tag-aware BiDi rendering: Parse tags, apply BiDi per segment, track colors/hyperlinks + // OPTIMIZED: Use helper lambda to eliminate code duplication (was repeated 5+ times) if (hasRTL || hasTags) { DWORD currentColor = dwColor; int hyperlinkStep = 0; // 0=normal, 1=collecting metadata, 2=visible hyperlink std::wstring hyperlinkMetadata; - std::vector currentSegment; + + // Use thread-local buffer to avoid per-call allocation + thread_local static std::vector s_currentSegment; + s_currentSegment.clear(); SHyperlink currentHyperlink; currentHyperlink.sx = currentHyperlink.ex = 0; + // In chat RTL, force RTL base direction so prefixes like "[hyperlink]" don't flip the paragraph to LTR. + const bool forceRTLForBidi = (m_isChatMessage && m_computedRTL); + + // OPTIMIZED: Single helper function for flushing segments (eliminates 5x code duplication) + auto FlushSegment = [&](DWORD segColor) -> int + { + if (s_currentSegment.empty()) + return 0; + + int totalWidth = 0; + + // Apply BiDi transformation using optimized BuildVisualBidiText_Tagless + std::vector visual = BuildVisualBidiText_Tagless( + s_currentSegment.data(), (int)s_currentSegment.size(), forceRTLForBidi); + + for (size_t j = 0; j < visual.size(); ++j) + { + int w = __DrawCharacter(pFontTexture, visual[j], segColor); + totalWidth += w; + } + + s_currentSegment.clear(); + return totalWidth; + }; + + // Prepend glyphs to the already-built draw list (used to place hyperlink before message in RTL chat). + auto PrependGlyphs = [&](CGraphicFontTexture* pFontTexture, + const std::vector& chars, + DWORD color, + int& outWidth) + { + outWidth = 0; + + // Use thread-local buffers to avoid allocation + thread_local static std::vector s_newCharInfos; + thread_local static std::vector s_newColors; + s_newCharInfos.clear(); + s_newColors.clear(); + s_newCharInfos.reserve(chars.size()); + s_newColors.reserve(chars.size()); + + for (size_t k = 0; k < chars.size(); ++k) + { + auto* pInfo = pFontTexture->GetCharacterInfomation(chars[k]); + if (!pInfo) + continue; + + s_newCharInfos.push_back(pInfo); + s_newColors.push_back(color); + + outWidth += pInfo->advance; + m_textHeight = std::max((WORD)pInfo->height, m_textHeight); + } + + // Insert at the beginning of the draw list. + m_pCharInfoVector.insert(m_pCharInfoVector.begin(), s_newCharInfos.begin(), s_newCharInfos.end()); + m_dwColorInfoVector.insert(m_dwColorInfoVector.begin(), s_newColors.begin(), s_newColors.end()); + + // Shift any already-recorded hyperlinks to the right. + for (auto& link : m_hyperlinkVector) + { + link.sx += outWidth; + link.ex += outWidth; + } + + m_textWidth += outWidth; + }; + // Parse text with tags for (int i = 0; i < wTextLen;) { @@ -257,37 +329,15 @@ void CGraphicTextInstance::Update() if (tagType == TEXT_TAG_COLOR) { - // Flush current segment with BiDi before changing color - if (!currentSegment.empty()) - { - // Use auto-detection for BiDi (don't force RTL) - std::vector visual = BuildVisualBidiText_Tagless( - currentSegment.data(), (int)currentSegment.size(), false); - for (size_t j = 0; j < visual.size(); ++j) - { - int w = __DrawCharacter(pFontTexture, visual[j], currentColor); - currentHyperlink.ex += w; - } - currentSegment.clear(); - } + // Flush current segment before changing color + currentHyperlink.ex += FlushSegment(currentColor); currentColor = htoi(tagExtra.c_str(), 8); i += tagLen; } else if (tagType == TEXT_TAG_RESTORE_COLOR) { // Flush segment before restoring color - if (!currentSegment.empty()) - { - // Use auto-detection for BiDi (don't force RTL) - std::vector visual = BuildVisualBidiText_Tagless( - currentSegment.data(), (int)currentSegment.size(), false); - for (size_t j = 0; j < visual.size(); ++j) - { - int w = __DrawCharacter(pFontTexture, visual[j], currentColor); - currentHyperlink.ex += w; - } - currentSegment.clear(); - } + currentHyperlink.ex += FlushSegment(currentColor); currentColor = dwColor; i += tagLen; } @@ -303,18 +353,7 @@ void CGraphicTextInstance::Update() { // End of metadata, start visible section // Flush any pending non-hyperlink segment first - if (!currentSegment.empty()) - { - // Use auto-detection for BiDi (don't force RTL) - std::vector visual = BuildVisualBidiText_Tagless( - currentSegment.data(), (int)currentSegment.size(), false); - for (size_t j = 0; j < visual.size(); ++j) - { - int w = __DrawCharacter(pFontTexture, visual[j], currentColor); - currentHyperlink.ex += w; - } - currentSegment.clear(); - } + currentHyperlink.ex += FlushSegment(currentColor); hyperlinkStep = 2; currentHyperlink.text = hyperlinkMetadata; @@ -323,80 +362,85 @@ void CGraphicTextInstance::Update() else if (hyperlinkStep == 2) { // End of visible section - render hyperlink text with proper Arabic handling - // Format: [Arabic Text] or [English Text] - // Keep brackets in position, reverse Arabic content between them - if (!currentSegment.empty()) + // In RTL chat: we want the hyperlink chunk to appear BEFORE the message, even if logically appended. + if (!s_currentSegment.empty()) { - // Find bracket positions + // OPTIMIZED: Use thread-local buffer for visible rendering + thread_local static std::vector s_visibleToRender; + s_visibleToRender.clear(); + + // Find bracket positions: [ ... ] int openBracket = -1, closeBracket = -1; - for (size_t idx = 0; idx < currentSegment.size(); ++idx) + for (size_t idx = 0; idx < s_currentSegment.size(); ++idx) { - if (currentSegment[idx] == L'[' && openBracket == -1) + if (s_currentSegment[idx] == L'[' && openBracket == -1) openBracket = (int)idx; - else if (currentSegment[idx] == L']' && closeBracket == -1) + else if (s_currentSegment[idx] == L']' && closeBracket == -1) closeBracket = (int)idx; } if (openBracket >= 0 && closeBracket > openBracket) { - // Extract content between brackets - std::vector content( - currentSegment.begin() + openBracket + 1, - currentSegment.begin() + closeBracket); + // Keep '[' + s_visibleToRender.push_back(L'['); - // Apply Arabic shaping to content - std::vector shaped(content.size() * 2 + 16, 0); - int shapedLen = Arabic_MakeShape(content.data(), (int)content.size(), - shaped.data(), (int)shaped.size()); + // Extract inside content and apply BiDi + thread_local static std::vector s_content; + s_content.assign( + s_currentSegment.begin() + openBracket + 1, + s_currentSegment.begin() + closeBracket); - // Render: "[" + reversed_arabic + "]" - // 1. Opening bracket - int w = __DrawCharacter(pFontTexture, L'[', currentColor); - currentHyperlink.ex += w; + // FIX: Use false to let BiDi auto-detect direction from content + // This ensures English items like [Sword+9] stay LTR + // while Arabic items like [درع فولاذي+9] are properly RTL + std::vector visual = BuildVisualBidiText_Tagless( + s_content.data(), (int)s_content.size(), false); - // 2. Arabic content (shaped and REVERSED for RTL display) - if (shapedLen > 0) - { - for (int j = shapedLen - 1; j >= 0; --j) - { - w = __DrawCharacter(pFontTexture, shaped[j], currentColor); - currentHyperlink.ex += w; - } - } - else - { - // Fallback: reverse original content - for (int j = (int)content.size() - 1; j >= 0; --j) - { - w = __DrawCharacter(pFontTexture, content[j], currentColor); - currentHyperlink.ex += w; - } - } + s_visibleToRender.insert(s_visibleToRender.end(), visual.begin(), visual.end()); - // 3. Closing bracket - w = __DrawCharacter(pFontTexture, L']', currentColor); - currentHyperlink.ex += w; - - // 4. Render any text after closing bracket (if any) - for (size_t idx = closeBracket + 1; idx < currentSegment.size(); ++idx) - { - w = __DrawCharacter(pFontTexture, currentSegment[idx], currentColor); - currentHyperlink.ex += w; - } + // Keep ']' + s_visibleToRender.push_back(L']'); } else { - // No brackets found - render as-is (shouldn't happen for hyperlinks) - for (size_t j = 0; j < currentSegment.size(); ++j) + // No brackets: apply BiDi to whole segment + // FIX: Use false to let BiDi auto-detect direction from content + std::vector visual = BuildVisualBidiText_Tagless( + s_currentSegment.data(), (int)s_currentSegment.size(), false); + + s_visibleToRender.insert(s_visibleToRender.end(), visual.begin(), visual.end()); + } + + // Ensure a space AFTER the hyperlink chunk (so it becomes "[hyperlink] اختبار...") + s_visibleToRender.push_back(L' '); + + // Key behavior: + // In RTL chat, place hyperlink BEFORE the message by prepending glyphs. + if (m_isChatMessage && m_computedRTL) + { + int addedWidth = 0; + PrependGlyphs(pFontTexture, s_visibleToRender, currentColor, addedWidth); + + // Record the hyperlink range at the beginning (0..addedWidth) + currentHyperlink.sx = 0; + currentHyperlink.ex = addedWidth; + m_hyperlinkVector.push_back(currentHyperlink); + } + else + { + // LTR or non-chat: keep original "append" behavior + currentHyperlink.sx = currentHyperlink.ex; + for (size_t j = 0; j < s_visibleToRender.size(); ++j) { - int w = __DrawCharacter(pFontTexture, currentSegment[j], currentColor); + int w = __DrawCharacter(pFontTexture, s_visibleToRender[j], currentColor); currentHyperlink.ex += w; } + m_hyperlinkVector.push_back(currentHyperlink); } - currentSegment.clear(); } - m_hyperlinkVector.push_back(currentHyperlink); + hyperlinkStep = 0; + s_currentSegment.clear(); } i += tagLen; } @@ -411,24 +455,14 @@ void CGraphicTextInstance::Update() { // Add to current segment // Will be BiDi-processed for normal text, or rendered directly for hyperlinks - currentSegment.push_back(wTextBuf[i]); + s_currentSegment.push_back(wTextBuf[i]); } i += tagLen; } } - // Flush any remaining segment - if (!currentSegment.empty()) - { - // Use auto-detection for BiDi (don't force RTL) - std::vector visual = BuildVisualBidiText_Tagless( - currentSegment.data(), (int)currentSegment.size(), false); - for (size_t j = 0; j < visual.size(); ++j) - { - int w = __DrawCharacter(pFontTexture, visual[j], currentColor); - currentHyperlink.ex += w; - } - } + // Flush any remaining segment using optimized helper + currentHyperlink.ex += FlushSegment(currentColor); pFontTexture->UpdateTexture(); m_isUpdate = true; diff --git a/src/EterLocale/Arabic.cpp b/src/EterLocale/Arabic.cpp index 4536e56..d77c97a 100644 --- a/src/EterLocale/Arabic.cpp +++ b/src/EterLocale/Arabic.cpp @@ -1,6 +1,7 @@ #include "StdAfx.h" #include "Arabic.h" #include +#include enum ARABIC_CODE { @@ -243,110 +244,151 @@ bool Arabic_IsComb2(wchar_t code) return false; } +// Helper: Check if a character can join to the right (has INITIAL or MEDIAL form) +static inline bool Arabic_CanJoinRight(wchar_t code) +{ + if (!Arabic_IsInMap(code)) + return false; + return Arabic_GetMap(code, INITIAL) != 0 || Arabic_GetMap(code, MEDIAL) != 0; +} + +// Helper: Check if a character can join to the left (has MEDIAL or FINAL form) +static inline bool Arabic_CanJoinLeft(wchar_t code) +{ + if (!Arabic_IsInMap(code)) + return false; + return Arabic_GetMap(code, MEDIAL) != 0 || Arabic_GetMap(code, FINAL) != 0 || Arabic_IsNext(code); +} + +// Optimized O(n) Arabic shaping algorithm +// Previous: O(n²) due to backward/forward scans for each character +// Now: O(n) single forward pass with state tracking size_t Arabic_MakeShape(wchar_t* src, size_t srcLen, wchar_t* dst, size_t dstLen) { - // Runtime validation instead of assert (which is disabled in release builds) + // Runtime validation if (!src || !dst || srcLen == 0 || dstLen < srcLen) return 0; - const size_t srcLastIndex = srcLen - 1; + // Phase 1: Pre-scan to find the next non-composing Arabic letter for each position + // This converts O(n) inner loops into O(1) lookups + // Use thread-local buffer to avoid per-call allocation + thread_local static std::vector s_nextArabic; + if (s_nextArabic.size() < srcLen + 1) + s_nextArabic.resize(srcLen + 1); + + // Build next-arabic lookup (reverse scan) + size_t nextArabicIdx = srcLen; // Invalid index = no next arabic + for (size_t i = srcLen; i > 0; --i) + { + size_t idx = i - 1; + s_nextArabic[idx] = nextArabicIdx; + + wchar_t ch = src[idx]; + if (Arabic_IsInMap(ch) && !Arabic_IsInComposing(ch)) + nextArabicIdx = idx; + } + s_nextArabic[srcLen] = srcLen; // Sentinel + + // Phase 2: Single forward pass with state tracking + size_t dstIndex = 0; + bool prevJoins = false; // Does previous Arabic letter join to the right? - unsigned dstIndex = 0; for (size_t srcIndex = 0; srcIndex < srcLen; ++srcIndex) { wchar_t cur = src[srcIndex]; - //printf("now %x\n", cur); + // Composing marks: copy directly, don't affect joining state + if (Arabic_IsInComposing(cur)) + { + if (dstIndex < dstLen) + dst[dstIndex++] = cur; + continue; + } if (Arabic_IsInMap(cur)) { - // 이전 글자 얻어내기 - wchar_t prev = 0; - { - size_t prevIndex = srcIndex; - while (prevIndex > 0) - { - prevIndex--; - prev = src[prevIndex]; - //printf("\tprev %d:%x\n", prevIndex, cur); - if (Arabic_IsInComposing(prev)) - continue; - else - break; - } - - if ((srcIndex == 0) || - (!Arabic_IsInMap(prev)) || - (!Arabic_GetMap(prev, INITIAL) && !Arabic_GetMap(prev, MEDIAL))) - { - //printf("\tprev not defined\n"); - prev = 0; - } - } - - // 다음 글자 얻어내기 + // Find next joinable Arabic letter using pre-computed lookup wchar_t next = 0; + size_t nextIdx = s_nextArabic[srcIndex]; + if (nextIdx < srcLen) { - size_t nextIndex = srcIndex; - while (nextIndex < srcLastIndex) - { - nextIndex++; - next = src[nextIndex]; - if (Arabic_IsInComposing(next)) - continue; - else - break; - } - - if ((nextIndex == srcLen) || - (!Arabic_IsInMap(next)) || - (!Arabic_GetMap(next, MEDIAL) && !Arabic_GetMap(next, FINAL) && !Arabic_IsNext(next))) - { - //printf("\tnext not defined\n"); - next = 0; - } + wchar_t nextChar = src[nextIdx]; + if (Arabic_CanJoinLeft(nextChar)) + next = nextChar; } - if (Arabic_IsComb1(cur) && Arabic_IsComb2(next)) + // Handle LAM-ALEF composition + if (Arabic_IsComb1(cur) && nextIdx < srcLen && Arabic_IsComb2(src[nextIdx])) { - if (prev) - dst[dstIndex] = Arabic_GetComposition(cur, next, FINAL); + wchar_t composed; + if (prevJoins) + composed = Arabic_GetComposition(cur, src[nextIdx], FINAL); else - dst[dstIndex] = Arabic_GetComposition(cur, next, ISOLATED); + composed = Arabic_GetComposition(cur, src[nextIdx], ISOLATED); - //printf("\tGot me a complex:%x\n", dst[dstIndex]); + if (dstIndex < dstLen) + dst[dstIndex++] = composed; - srcIndex++; - dstIndex++; + // Skip the ALEF that was combined + srcIndex = nextIdx; + // LAM-ALEF doesn't join to the right + prevJoins = false; + continue; } - else if (prev && next && (dst[dstIndex] = Arabic_GetMap(cur, MEDIAL))) + + // Determine form based on joining context + wchar_t shaped = 0; + bool curJoinsRight = false; + + if (prevJoins && next) { - //printf("\tGot prev & next:%x\n", dst[dstIndex]); - dstIndex++; + // Both sides join: MEDIAL + shaped = Arabic_GetMap(cur, MEDIAL); + if (shaped) + curJoinsRight = Arabic_CanJoinRight(cur); } - else if (prev && (dst[dstIndex] = Arabic_GetMap(cur, FINAL))) + + if (!shaped && prevJoins) { - //printf("\tGot prev:%x\n", dst[dstIndex]); - dstIndex++; + // Only left joins: FINAL + shaped = Arabic_GetMap(cur, FINAL); + // FINAL form doesn't extend to the right + curJoinsRight = false; } - else if (next && (dst[dstIndex] = Arabic_GetMap(cur, INITIAL))) + + if (!shaped && next) { - //printf("\tGot next:%x\n", dst[dstIndex]); - dstIndex++; + // Only right joins: INITIAL + shaped = Arabic_GetMap(cur, INITIAL); + if (shaped) + curJoinsRight = Arabic_CanJoinRight(cur); } - else + + if (!shaped) { - dst[dstIndex] = Arabic_GetMap(cur, ISOLATED); - //printf("\tGot nothing:%x\n", dst[dstIndex]); - dstIndex++; + // No joining: ISOLATED + shaped = Arabic_GetMap(cur, ISOLATED); + curJoinsRight = false; } + + if (!shaped) + shaped = cur; // Fallback to original if no mapping + + if (dstIndex < dstLen) + dst[dstIndex++] = shaped; + + // Update state for next character + prevJoins = curJoinsRight; } else { - dst[dstIndex] = cur; - dstIndex++; + // Non-Arabic character: copy directly, breaks joining + if (dstIndex < dstLen) + dst[dstIndex++] = cur; + prevJoins = false; } } + return dstIndex; } diff --git a/src/UserInterface/PythonSkill.cpp b/src/UserInterface/PythonSkill.cpp index 81dfc50..dba2408 100644 --- a/src/UserInterface/PythonSkill.cpp +++ b/src/UserInterface/PythonSkill.cpp @@ -1279,11 +1279,60 @@ float CPythonSkill::SSkillData::ProcessFormula(CPoly * pPoly, float fSkillLevel, return pPoly->Eval(); } -static void ReplaceFirst(std::string& s, const char* needle, const std::string& repl) +// Format specifiers supported in skill descriptions +static const char* FORMAT_SPECIFIERS[] = { + "%.0f", // Integer (no decimals) + "%.1f", // 1 decimal place + "%.2f", // 2 decimal places + "%d", // Integer (alternative) +}; +static const size_t FORMAT_SPECIFIER_COUNT = sizeof(FORMAT_SPECIFIERS) / sizeof(FORMAT_SPECIFIERS[0]); + +// Find and replace the first occurrence of any format specifier with the given value +// Returns true if a replacement was made +static bool ReplaceNextFormatSpecifier(std::string& s, float value) { - size_t pos = s.find(needle); - if (pos != std::string::npos) - s.replace(pos, strlen(needle), repl); + size_t bestPos = std::string::npos; + size_t bestLen = 0; + const char* bestSpec = nullptr; + + // Find the first (leftmost) format specifier in the string + for (size_t i = 0; i < FORMAT_SPECIFIER_COUNT; ++i) + { + size_t pos = s.find(FORMAT_SPECIFIERS[i]); + if (pos != std::string::npos && (bestPos == std::string::npos || pos < bestPos)) + { + bestPos = pos; + bestLen = strlen(FORMAT_SPECIFIERS[i]); + bestSpec = FORMAT_SPECIFIERS[i]; + } + } + + if (bestPos == std::string::npos) + return false; + + // Format the value according to the specifier found + char szValue[64]; + if (strcmp(bestSpec, "%.0f") == 0 || strcmp(bestSpec, "%d") == 0) + _snprintf(szValue, sizeof(szValue), "%.0f", floorf(value)); + else if (strcmp(bestSpec, "%.1f") == 0) + _snprintf(szValue, sizeof(szValue), "%.1f", value); + else // %.2f or other + _snprintf(szValue, sizeof(szValue), "%.2f", value); + + s.replace(bestPos, bestLen, szValue); + return true; +} + +// Replace all occurrences of "%%" with "%" (escaped percent sign) +static void UnescapePercent(std::string& s) +{ + size_t pos = 0; + while ((pos = s.find("%%", pos)) != std::string::npos) + { + s.replace(pos, 2, "%"); + ++pos; // Move past the replaced '%' + } } const char* CPythonSkill::SSkillData::GetAffectDescription(DWORD dwIndex, float fSkillLevel) @@ -1303,33 +1352,20 @@ const char* CPythonSkill::SSkillData::GetAffectDescription(DWORD dwIndex, float float fMinValue = ProcessFormula(&minPoly, fSkillLevel); float fMaxValue = ProcessFormula(&maxPoly, fSkillLevel); + // Take absolute values if (fMinValue < 0.0f) fMinValue = -fMinValue; if (fMaxValue < 0.0f) fMaxValue = -fMaxValue; - const bool wantsInt = (desc.find("%.0f") != std::string::npos); - if (wantsInt) - { - fMinValue = floorf(fMinValue); - fMaxValue = floorf(fMaxValue); - } - - char szMin[64], szMax[64]; - if (wantsInt) - { - _snprintf(szMin, sizeof(szMin), "%.0f", fMinValue); - _snprintf(szMax, sizeof(szMax), "%.0f", fMaxValue); - } - else - { - _snprintf(szMin, sizeof(szMin), "%.2f", fMinValue); - _snprintf(szMax, sizeof(szMax), "%.2f", fMaxValue); - } - static std::string out; out = desc; - ReplaceFirst(out, "%.0f", szMin); - ReplaceFirst(out, "%.0f", szMax); + // Replace format specifiers in order of appearance + // First specifier gets min value, second gets max value + ReplaceNextFormatSpecifier(out, fMinValue); + ReplaceNextFormatSpecifier(out, fMaxValue); + + // Convert escaped %% to single % (for display like "30%") + UnescapePercent(out); return out.c_str(); }