fix: Optimized UTF8, BiDi, Debug

This commit is contained in:
rtw1x1
2026-01-20 21:23:31 +00:00
parent ba79e137f0
commit 100dd2b87b
5 changed files with 817 additions and 381 deletions

475
extern/include/utf8.h vendored
View File

@@ -1,9 +1,11 @@
#pragma once
#include <string>
#include <cstring>
#include <windows.h>
#include <vector>
#include <algorithm>
#include <cmath>
#include <utility>
#include <EterLocale/Arabic.h>
@@ -37,6 +39,161 @@ constexpr size_t ARABIC_SHAPING_SAFETY_MARGIN_RETRY = 64;
#define BIDI_LOG_SIMPLE(msg) ((void)0)
#endif
// ============================================================================
// OPTIMIZED CHARACTER CLASSIFICATION (Lookup Tables)
// ============================================================================
// Replaces expensive GetStringTypeW() syscalls with O(1) table lookups.
// Tables are initialized once on first use (thread-safe via static init).
namespace BiDiTables
{
// Character property flags
enum ECharFlags : uint8_t
{
CF_NONE = 0,
CF_ALPHA = 0x01, // Alphabetic (Latin, Cyrillic, Greek, etc.)
CF_DIGIT = 0x02, // Numeric digit (0-9, Arabic-Indic, etc.)
CF_RTL = 0x04, // RTL script (Arabic, Hebrew)
CF_ARABIC = 0x08, // Arabic letter that needs shaping
};
// Main character flags table (65536 entries for BMP)
inline const uint8_t* GetCharFlagsTable()
{
static uint8_t s_table[65536] = {0};
static bool s_initialized = false;
if (!s_initialized)
{
// ASCII digits
for (int i = '0'; i <= '9'; ++i)
s_table[i] |= CF_DIGIT;
// ASCII letters
for (int i = 'A'; i <= 'Z'; ++i)
s_table[i] |= CF_ALPHA;
for (int i = 'a'; i <= 'z'; ++i)
s_table[i] |= CF_ALPHA;
// Latin Extended-A/B (0x0100-0x024F)
for (int i = 0x0100; i <= 0x024F; ++i)
s_table[i] |= CF_ALPHA;
// Latin Extended Additional (0x1E00-0x1EFF)
for (int i = 0x1E00; i <= 0x1EFF; ++i)
s_table[i] |= CF_ALPHA;
// Greek (0x0370-0x03FF)
for (int i = 0x0370; i <= 0x03FF; ++i)
s_table[i] |= CF_ALPHA;
// Cyrillic (0x0400-0x04FF)
for (int i = 0x0400; i <= 0x04FF; ++i)
s_table[i] |= CF_ALPHA;
// Hebrew (0x0590-0x05FF) - RTL
for (int i = 0x0590; i <= 0x05FF; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// Arabic (0x0600-0x06FF) - RTL + needs shaping
for (int i = 0x0600; i <= 0x06FF; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// Arabic letters that need shaping (0x0621-0x064A)
for (int i = 0x0621; i <= 0x064A; ++i)
s_table[i] |= CF_ARABIC;
// Arabic Supplement (0x0750-0x077F)
for (int i = 0x0750; i <= 0x077F; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// Arabic Extended-A (0x08A0-0x08FF)
for (int i = 0x08A0; i <= 0x08FF; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// Arabic-Indic digits (0x0660-0x0669)
for (int i = 0x0660; i <= 0x0669; ++i)
s_table[i] |= CF_DIGIT;
// Extended Arabic-Indic digits (0x06F0-0x06F9)
for (int i = 0x06F0; i <= 0x06F9; ++i)
s_table[i] |= CF_DIGIT;
// Arabic Presentation Forms-A (0xFB50-0xFDFF) - already shaped
for (int i = 0xFB50; i <= 0xFDFF; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// Arabic Presentation Forms-B (0xFE70-0xFEFF) - already shaped
for (int i = 0xFE70; i <= 0xFEFF; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// Hebrew presentation forms (0xFB1D-0xFB4F)
for (int i = 0xFB1D; i <= 0xFB4F; ++i)
s_table[i] |= CF_RTL | CF_ALPHA;
// CJK (0x4E00-0x9FFF) - treat as LTR alpha
for (int i = 0x4E00; i <= 0x9FFF; ++i)
s_table[i] |= CF_ALPHA;
// Hangul (0xAC00-0xD7AF)
for (int i = 0xAC00; i <= 0xD7AF; ++i)
s_table[i] |= CF_ALPHA;
// RTL marks and controls
s_table[0x200F] |= CF_RTL; // RLM
s_table[0x061C] |= CF_RTL; // ALM
for (int i = 0x202B; i <= 0x202E; ++i)
s_table[i] |= CF_RTL; // RLE/RLO/PDF/LRE/LRO
for (int i = 0x2066; i <= 0x2069; ++i)
s_table[i] |= CF_RTL; // Isolates
s_initialized = true;
}
return s_table;
}
// Fast O(1) character classification functions
inline bool IsRTL(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_RTL; }
inline bool IsAlpha(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ALPHA; }
inline bool IsDigit(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_DIGIT; }
inline bool IsArabicLetter(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ARABIC; }
inline bool IsStrongLTR(wchar_t ch)
{
uint8_t flags = GetCharFlagsTable()[(uint16_t)ch];
// Strong LTR = (Alpha OR Digit) AND NOT RTL
return (flags & (CF_ALPHA | CF_DIGIT)) && !(flags & CF_RTL);
}
}
// ============================================================================
// BUFFER POOLING (Avoid per-call allocations)
// ============================================================================
namespace BiDiBuffers
{
struct TBufferPool
{
std::vector<wchar_t> shaped;
void EnsureCapacity(size_t n)
{
size_t needed = n * 2 + 64;
if (shaped.capacity() < needed) shaped.reserve(needed);
}
void Clear()
{
shaped.clear();
}
};
inline TBufferPool& Get()
{
thread_local static TBufferPool s_pool;
return s_pool;
}
}
// ============================================================================
// UNICODE VALIDATION HELPERS
// ============================================================================
@@ -65,7 +222,70 @@ static inline void SanitizeWideString(std::wstring& ws)
ws.end());
}
// ============================================================================
// OPTIMIZED UTF-8 CONVERSION
// ============================================================================
// Fast paths for ASCII-only text (very common in games).
// Falls back to Windows API for non-ASCII.
namespace Utf8Fast
{
// Check if string is pure ASCII (no bytes >= 128)
inline bool IsAsciiOnly(const char* s, size_t len)
{
// Process 8 bytes at a time for speed
const char* end = s + len;
const char* aligned_end = s + (len & ~7);
while (s < aligned_end)
{
// Check 8 bytes at once using bitwise OR
uint64_t chunk;
memcpy(&chunk, s, 8);
if (chunk & 0x8080808080808080ULL)
return false;
s += 8;
}
// Check remaining bytes
while (s < end)
{
if ((unsigned char)*s >= 128)
return false;
++s;
}
return true;
}
// Fast ASCII-only conversion (no API calls)
inline std::wstring AsciiToWide(const char* s, size_t len)
{
std::wstring out;
out.reserve(len);
for (size_t i = 0; i < len; ++i)
out.push_back(static_cast<wchar_t>(static_cast<unsigned char>(s[i])));
return out;
}
// Fast ASCII-only conversion (no API calls)
inline std::string WideToAscii(const wchar_t* ws, size_t len)
{
std::string out;
out.reserve(len);
for (size_t i = 0; i < len; ++i)
{
wchar_t ch = ws[i];
if (ch < 128)
out.push_back(static_cast<char>(ch));
else
return ""; // Not pure ASCII, caller should use full conversion
}
return out;
}
}
// UTF-8 -> UTF-16 (Windows wide)
// OPTIMIZED: Fast path for ASCII-only strings (avoids 2x API calls)
inline std::wstring Utf8ToWide(const std::string& s)
{
if (s.empty())
@@ -75,9 +295,14 @@ inline std::wstring Utf8ToWide(const std::string& s)
if (s.size() > MAX_TEXT_LENGTH || s.size() > INT_MAX)
{
BIDI_LOG("Utf8ToWide: String too large (%zu bytes)", s.size());
return L""; // String too large
return L"";
}
// Fast path: ASCII-only strings (very common in games)
if (Utf8Fast::IsAsciiOnly(s.data(), s.size()))
return Utf8Fast::AsciiToWide(s.data(), s.size());
// Slow path: Use Windows API for non-ASCII
int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), (int)s.size(), nullptr, 0);
if (wlen <= 0)
{
@@ -90,29 +315,31 @@ inline std::wstring Utf8ToWide(const std::string& s)
if (written <= 0 || written != wlen)
{
BIDI_LOG("Utf8ToWide: Second conversion failed (written=%d, expected=%d, error=%d)", written, wlen, GetLastError());
return L""; // Conversion failed unexpectedly
return L"";
}
// Optional: Sanitize to remove invalid Unicode codepoints (surrogates, non-characters)
// Uncomment if you want strict validation
// SanitizeWideString(out);
return out;
}
// Convenience overload for char*
// OPTIMIZED: Fast path for ASCII-only strings
inline std::wstring Utf8ToWide(const char* s)
{
if (!s || !*s)
return L"";
size_t len = strlen(s);
// Fast path: ASCII-only strings
if (Utf8Fast::IsAsciiOnly(s, len))
return Utf8Fast::AsciiToWide(s, len);
// Slow path: Use Windows API
int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, nullptr, 0);
if (wlen <= 0)
return L"";
// wlen includes terminating NUL
std::wstring out(wlen, L'\0');
int written = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, out.data(), wlen);
if (written <= 0 || written != wlen)
{
@@ -124,13 +351,11 @@ inline std::wstring Utf8ToWide(const char* s)
if (!out.empty() && out.back() == L'\0')
out.pop_back();
// Optional: Sanitize to remove invalid Unicode codepoints
// SanitizeWideString(out);
return out;
}
// UTF-16 (Windows wide) -> UTF-8
// OPTIMIZED: Fast path for ASCII-only strings
inline std::string WideToUtf8(const std::wstring& ws)
{
if (ws.empty())
@@ -138,8 +363,23 @@ inline std::string WideToUtf8(const std::wstring& ws)
// Validate size limits (prevent DoS and INT_MAX overflow)
if (ws.size() > MAX_TEXT_LENGTH || ws.size() > INT_MAX)
return ""; // String too large
return "";
// Fast path: Check if all characters are ASCII
bool isAscii = true;
for (size_t i = 0; i < ws.size() && isAscii; ++i)
isAscii = (ws[i] < 128);
if (isAscii)
{
std::string out;
out.reserve(ws.size());
for (size_t i = 0; i < ws.size(); ++i)
out.push_back(static_cast<char>(ws[i]));
return out;
}
// Slow path: Use Windows API for non-ASCII
int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, ws.data(), (int)ws.size(), nullptr, 0, nullptr, nullptr);
if (len <= 0)
return "";
@@ -149,7 +389,7 @@ inline std::string WideToUtf8(const std::wstring& ws)
if (written <= 0 || written != len)
{
BIDI_LOG("WideToUtf8: Conversion failed (written=%d, expected=%d, error=%d)", written, len, GetLastError());
return ""; // Conversion failed
return "";
}
return out;
}
@@ -169,59 +409,22 @@ inline std::string WideToUtf8(const wchar_t* ws)
enum class EBidiDir { LTR, RTL };
enum class ECharDir : unsigned char { Neutral, LTR, RTL };
struct TBidiRun
{
EBidiDir dir;
std::vector<wchar_t> text; // logical order
};
// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
static inline bool IsRTLCodepoint(wchar_t ch)
{
// Directional marks / isolates / embeddings that affect bidi
if (ch == 0x200F || ch == 0x061C) return true; // RLM, ALM
if (ch >= 0x202B && ch <= 0x202E) return true; // RLE/RLO/PDF/LRE/LRO
if (ch >= 0x2066 && ch <= 0x2069) return true; // isolates
// Hebrew + Arabic blocks (BMP)
if (ch >= 0x0590 && ch <= 0x08FF) return true;
// Presentation forms
if (ch >= 0xFB1D && ch <= 0xFDFF) return true;
if (ch >= 0xFE70 && ch <= 0xFEFF) return true;
return false;
return BiDiTables::IsRTL(ch);
}
// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
static inline bool IsStrongAlpha(wchar_t ch)
{
// Use thread-local cache for BMP (Thread safety)
thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false
unsigned char& v = cache[(unsigned short)ch];
if (v == 1) return true;
if (v == 2) return false;
WORD type = 0;
bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_ALPHA);
v = ok ? 1 : 2;
return ok;
return BiDiTables::IsAlpha(ch);
}
// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
static inline bool IsDigit(wchar_t ch)
{
// Fast path for ASCII digits (90%+ of digit checks)
if (ch >= L'0' && ch <= L'9')
return true;
// For non-ASCII, use cache (Arabic-Indic digits, etc.)
thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false
unsigned char& v = cache[(unsigned short)ch];
if (v == 1) return true;
if (v == 2) return false;
WORD type = 0;
bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_DIGIT);
v = ok ? 1 : 2;
return ok;
return BiDiTables::IsDigit(ch);
}
static inline bool IsNameTokenPunct(wchar_t ch)
@@ -257,12 +460,10 @@ static inline bool IsNameTokenPunct(wchar_t ch)
}
}
// Check RTL first to avoid classifying Arabic as LTR
// Optimized: O(1) lookup - Check RTL first to avoid classifying Arabic as LTR
static inline bool IsStrongLTR(wchar_t ch)
{
if (IsRTLCodepoint(ch))
return false;
return IsStrongAlpha(ch) || IsDigit(ch);
return BiDiTables::IsStrongLTR(ch);
}
static inline bool HasStrongLTRNeighbor(const wchar_t* s, int n, int i)
@@ -561,33 +762,29 @@ static std::vector<wchar_t> BuildVisualBidiText_Tagless(const wchar_t* s, int n,
if (!s || n <= 0)
return {};
// Use buffer pool to avoid per-call allocations
BiDiBuffers::TBufferPool& buffers = BiDiBuffers::Get();
buffers.EnsureCapacity((size_t)n);
// 1) base direction
EBidiDir base = forceRTL ? EBidiDir::RTL : DetectBaseDir_FirstStrong(s, n);
// Pre-compute strong character positions for O(1) neutral resolution
TStrongDirCache strongCache(s, n, base);
// 2) split into runs
// Estimate runs based on text length (~1 per 50 chars, min 4)
std::vector<TBidiRun> runs;
const size_t estimatedRuns = (size_t)std::max(4, n / 50);
runs.reserve(estimatedRuns);
auto push_run = [&](EBidiDir d)
{
if (runs.empty() || runs.back().dir != d)
runs.push_back(TBidiRun{ d, {} });
};
// start with base so leading neutrals attach predictably
push_run(base);
// 2) split into runs - use a more efficient approach
// Instead of TBidiRun with vectors, use start/end indices
struct TRunInfo { int start; int end; EBidiDir dir; };
thread_local static std::vector<TRunInfo> s_runs;
s_runs.clear();
s_runs.reserve((size_t)std::max(4, n / 50));
EBidiDir lastStrong = base;
EBidiDir currentRunDir = base;
int runStart = 0;
for (int i = 0; i < n; ++i)
{
wchar_t ch = s[i];
EBidiDir d;
ECharDir cd = GetCharDirSmart(s, n, i);
@@ -607,98 +804,84 @@ static std::vector<wchar_t> BuildVisualBidiText_Tagless(const wchar_t* s, int n,
d = ResolveNeutralDir(s, n, i, base, lastStrong, &strongCache);
}
#ifdef DEBUG_BIDI
if (i < 50) // Only log first 50 chars to avoid spam
// Start a new run if direction changes
if (d != currentRunDir)
{
BIDI_LOG("Char[%d] U+%04X '%lc' → CharDir=%s, RunDir=%s",
i, (unsigned int)ch, (ch >= 32 && ch < 127) ? ch : L'?',
cd == ECharDir::RTL ? "RTL" : (cd == ECharDir::LTR ? "LTR" : "Neutral"),
d == EBidiDir::RTL ? "RTL" : "LTR");
if (i > runStart)
s_runs.push_back({runStart, i, currentRunDir});
runStart = i;
currentRunDir = d;
}
#endif
push_run(d);
runs.back().text.push_back(ch);
}
// Push final run
if (n > runStart)
s_runs.push_back({runStart, n, currentRunDir});
// 3) shape RTL runs in logical order (Arabic shaping)
for (auto& r : runs)
// 3) shape RTL runs using pooled buffer
buffers.shaped.clear();
auto shapeRun = [&](int start, int end) -> std::pair<const wchar_t*, int>
{
if (r.dir != EBidiDir::RTL)
continue;
int len = end - start;
if (len <= 0)
return {nullptr, 0};
if (r.text.empty())
continue;
// Check for potential integer overflow
if ((size_t)len > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY)
return {s + start, len}; // Return unshaped
// Check for potential integer overflow before allocation
if (r.text.size() > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY)
{
BIDI_LOG("BuildVisualBidiText: RTL run too large for shaping (%zu chars)", r.text.size());
continue; // Text too large to process safely
}
size_t neededSize = buffers.shaped.size() + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN;
if (buffers.shaped.capacity() < neededSize)
buffers.shaped.reserve(neededSize);
std::vector<wchar_t> shaped(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN, 0);
size_t outStart = buffers.shaped.size();
buffers.shaped.resize(outStart + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN);
int outLen = Arabic_MakeShape(const_cast<wchar_t*>(s + start), len,
buffers.shaped.data() + outStart,
(int)(buffers.shaped.size() - outStart));
int outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size());
if (outLen <= 0)
{
BIDI_LOG("Arabic_MakeShape FAILED for RTL run of %zu characters", r.text.size());
BIDI_LOG(" WARNING: This RTL text segment will NOT be displayed!");
BIDI_LOG(" First few characters: U+%04X U+%04X U+%04X U+%04X",
r.text.size() > 0 ? (unsigned int)r.text[0] : 0,
r.text.size() > 1 ? (unsigned int)r.text[1] : 0,
r.text.size() > 2 ? (unsigned int)r.text[2] : 0,
r.text.size() > 3 ? (unsigned int)r.text[3] : 0);
continue;
}
return {s + start, len}; // Return unshaped on failure
// Retry once if buffer too small
if (outLen >= (int)shaped.size())
{
shaped.assign(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR_RETRY + ARABIC_SHAPING_SAFETY_MARGIN_RETRY, 0);
outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size());
if (outLen <= 0)
continue;
// Add error check instead of silent truncation
if (outLen > (int)shaped.size())
{
BIDI_LOG("Arabic_MakeShape: Buffer still too small after retry (%d > %zu)", outLen, shaped.size());
// Shaping failed critically, use unshaped text
continue;
}
}
buffers.shaped.resize(outStart + (size_t)outLen);
return {buffers.shaped.data() + outStart, outLen};
};
r.text.assign(shaped.begin(), shaped.begin() + outLen);
}
// 4) produce visual order:
// - reverse RTL runs internally
// - reverse run sequence if base RTL
// 4) produce visual order
std::vector<wchar_t> visual;
visual.reserve((size_t)n);
auto emit_run = [&](const TBidiRun& r)
auto emitRun = [&](const TRunInfo& run)
{
if (run.dir == EBidiDir::RTL)
{
if (r.dir == EBidiDir::RTL)
// Shape and reverse RTL runs
std::pair<const wchar_t*, int> shaped = shapeRun(run.start, run.end);
const wchar_t* ptr = shaped.first;
int len = shaped.second;
if (ptr && len > 0)
{
for (int k = (int)r.text.size() - 1; k >= 0; --k)
visual.push_back(r.text[(size_t)k]);
for (int k = len - 1; k >= 0; --k)
visual.push_back(ptr[k]);
}
else
{
visual.insert(visual.end(), r.text.begin(), r.text.end());
}
};
}
else
{
// LTR runs: copy directly
visual.insert(visual.end(), s + run.start, s + run.end);
}
};
if (base == EBidiDir::LTR)
{
for (const auto& r : runs)
emit_run(r);
for (const auto& run : s_runs)
emitRun(run);
}
else
{
for (int i = (int)runs.size() - 1; i >= 0; --i)
emit_run(runs[(size_t)i]);
for (int i = (int)s_runs.size() - 1; i >= 0; --i)
emitRun(s_runs[(size_t)i]);
}
return visual;
@@ -763,7 +946,7 @@ static inline std::vector<wchar_t> BuildVisualChatMessage(
{
// Apply BiDi to message with auto-detection (don't force RTL)
// Let the BiDi algorithm detect base direction from first strong character
std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false);
std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL);
visual.insert(visual.end(), msgVisual.begin(), msgVisual.end());
}
visual.push_back(L' ');
@@ -787,7 +970,7 @@ static inline std::vector<wchar_t> BuildVisualChatMessage(
{
// Apply BiDi to message with auto-detection (don't force RTL)
// Let the BiDi algorithm detect base direction from first strong character
std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false);
std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL);
visual.insert(visual.end(), msgVisual.begin(), msgVisual.end());
}
}

View File

@@ -16,34 +16,93 @@ const DWORD DEBUG_STRING_MAX_LEN = 1024;
static int isLogFile = false;
HWND g_PopupHwnd = NULL;
// Convert UTF-8 char* -> wide and send to debugger (NO helper function, just a macro)
// ============================================================================
// OPTIMIZED LOGGING INFRASTRUCTURE
// ============================================================================
// Cached timestamp to avoid repeated time()/localtime() syscalls
// Refreshes every ~100ms (good enough for logging, avoids syscall overhead)
struct TCachedTimestamp
{
DWORD lastUpdateMs = 0;
int month = 0;
int day = 0;
int hour = 0;
int minute = 0;
void Update()
{
DWORD now = ELTimer_GetMSec();
// Refresh timestamp every 100ms (not per-call)
if (now - lastUpdateMs > 100)
{
time_t ct = time(0);
struct tm ctm = *localtime(&ct);
month = ctm.tm_mon + 1;
day = ctm.tm_mday;
hour = ctm.tm_hour;
minute = ctm.tm_min;
lastUpdateMs = now;
}
}
void Format(char* buf, size_t bufSize) const
{
DWORD msec = ELTimer_GetMSec() % 60000;
_snprintf_s(buf, bufSize, _TRUNCATE, "%02d%02d %02d:%02d:%05d :: ",
month, day, hour, minute, (int)msec);
}
};
static TCachedTimestamp g_cachedTimestamp;
// Optimized debug output: Fast path for ASCII strings (avoids Utf8ToWide allocation)
#ifdef _DEBUG
#define DBG_OUT_W_UTF8(psz) \
do { \
const char* __s = (psz) ? (psz) : ""; \
std::wstring __w = Utf8ToWide(__s); \
OutputDebugStringW(__w.c_str()); \
size_t __len = strlen(__s); \
if (Utf8Fast::IsAsciiOnly(__s, __len)) { \
/* ASCII fast path: direct conversion, no allocation */ \
wchar_t __wbuf[512]; \
size_t __wlen = (__len < 511) ? __len : 511; \
for (size_t __i = 0; __i < __wlen; ++__i) \
__wbuf[__i] = (wchar_t)(unsigned char)__s[__i]; \
__wbuf[__wlen] = L'\0'; \
OutputDebugStringW(__wbuf); \
} else { \
/* Non-ASCII: use full conversion */ \
std::wstring __w = Utf8ToWide(__s); \
OutputDebugStringW(__w.c_str()); \
} \
} while (0)
#else
#define DBG_OUT_W_UTF8(psz) do { (void)(psz); } while (0)
#endif
// Buffered log file writer
// OPTIMIZATION: Buffered writes with periodic flush instead of per-write fflush()
// - Collects writes in memory buffer
// - Flushes when buffer is full OR every 500ms OR on shutdown
// - Reduces disk I/O from 1000s of syncs to ~2 per second
class CLogFile : public CSingleton<CLogFile>
{
public:
CLogFile() : m_fp(NULL) {}
CLogFile() : m_fp(NULL), m_bufferPos(0), m_lastFlushMs(0) {}
virtual ~CLogFile()
{
Flush(); // Ensure all buffered data is written
if (m_fp)
fclose(m_fp);
m_fp = NULL;
}
void Initialize()
{
m_fp = fopen("log/log.txt", "w");
m_bufferPos = 0;
m_lastFlushMs = ELTimer_GetMSec();
}
void Write(const char* c_pszMsg)
@@ -51,22 +110,63 @@ class CLogFile : public CSingleton<CLogFile>
if (!m_fp)
return;
time_t ct = time(0);
struct tm ctm = *localtime(&ct);
// Use cached timestamp (updated every ~100ms)
g_cachedTimestamp.Update();
char timestamp[32];
g_cachedTimestamp.Format(timestamp, sizeof(timestamp));
fprintf(m_fp, "%02d%02d %02d:%02d:%05d :: %s",
ctm.tm_mon + 1,
ctm.tm_mday,
ctm.tm_hour,
ctm.tm_min,
ELTimer_GetMSec() % 60000,
c_pszMsg);
// Calculate total length needed
size_t timestampLen = strlen(timestamp);
size_t msgLen = c_pszMsg ? strlen(c_pszMsg) : 0;
size_t totalLen = timestampLen + msgLen;
// If this write would overflow the buffer, flush first
if (m_bufferPos + totalLen >= BUFFER_SIZE - 1)
Flush();
// If message is larger than buffer, write directly (rare case)
if (totalLen >= BUFFER_SIZE - 1)
{
fputs(timestamp, m_fp);
if (c_pszMsg)
fputs(c_pszMsg, m_fp);
fflush(m_fp);
return;
}
// Append to buffer
memcpy(m_buffer + m_bufferPos, timestamp, timestampLen);
m_bufferPos += timestampLen;
if (msgLen > 0)
{
memcpy(m_buffer + m_bufferPos, c_pszMsg, msgLen);
m_bufferPos += msgLen;
}
// Periodic flush: every 500ms or when buffer is >75% full
DWORD now = ELTimer_GetMSec();
if (now - m_lastFlushMs > 500 || m_bufferPos > BUFFER_SIZE * 3 / 4)
Flush();
}
void Flush()
{
if (!m_fp || m_bufferPos == 0)
return;
m_buffer[m_bufferPos] = '\0';
fputs(m_buffer, m_fp);
fflush(m_fp);
m_bufferPos = 0;
m_lastFlushMs = ELTimer_GetMSec();
}
protected:
static const size_t BUFFER_SIZE = 8192; // 8KB buffer
FILE* m_fp;
char m_buffer[BUFFER_SIZE];
size_t m_bufferPos;
DWORD m_lastFlushMs;
};
static CLogFile gs_logfile;
@@ -220,9 +320,50 @@ void Tracef(const char* c_szFormat, ...)
LogFile(szBuf);
}
// Buffered stderr writer for syserr (same pattern as CLogFile)
// OPTIMIZATION: Reduces fflush(stderr) from every call to every 500ms
static struct TSyserrBuffer
{
static const size_t BUFFER_SIZE = 4096;
char buffer[BUFFER_SIZE];
size_t pos = 0;
DWORD lastFlushMs = 0;
void Write(const char* msg, size_t len)
{
if (pos + len >= BUFFER_SIZE - 1)
Flush();
if (len >= BUFFER_SIZE - 1)
{
// Large message: write directly
fwrite(msg, 1, len, stderr);
fflush(stderr);
return;
}
memcpy(buffer + pos, msg, len);
pos += len;
DWORD now = ELTimer_GetMSec();
if (now - lastFlushMs > 500 || pos > BUFFER_SIZE * 3 / 4)
Flush();
}
void Flush()
{
if (pos == 0)
return;
fwrite(buffer, 1, pos, stderr);
fflush(stderr);
pos = 0;
lastFlushMs = ELTimer_GetMSec();
}
} g_syserrBuffer;
void TraceError(const char* c_szFormat, ...)
{
//#ifndef _DISTRIBUTE
//#ifndef _DISTRIBUTE
char szBuf[DEBUG_STRING_MAX_LEN + 2];
strncpy_s(szBuf, sizeof(szBuf), "SYSERR: ", _TRUNCATE);
@@ -243,17 +384,14 @@ void TraceError(const char* c_szFormat, ...)
szBuf[sizeof(szBuf) - 1] = '\0';
}
time_t ct = time(0);
struct tm ctm = *localtime(&ct);
// OPTIMIZED: Use cached timestamp instead of time()/localtime() per call
g_cachedTimestamp.Update();
char timestamp[32];
g_cachedTimestamp.Format(timestamp, sizeof(timestamp));
fprintf(stderr, "%02d%02d %02d:%02d:%05d :: %s",
ctm.tm_mon + 1,
ctm.tm_mday,
ctm.tm_hour,
ctm.tm_min,
ELTimer_GetMSec() % 60000,
szBuf + 8);
fflush(stderr);
// OPTIMIZED: Write to buffered stderr instead of fprintf+fflush per call
g_syserrBuffer.Write(timestamp, strlen(timestamp));
g_syserrBuffer.Write(szBuf + 8, strlen(szBuf + 8)); // Skip "SYSERR: " prefix for stderr
#ifdef _DEBUG
DBG_OUT_W_UTF8(szBuf);
@@ -267,8 +405,7 @@ void TraceError(const char* c_szFormat, ...)
void TraceErrorWithoutEnter(const char* c_szFormat, ...)
{
//#ifndef _DISTRIBUTE
//#ifndef _DISTRIBUTE
char szBuf[DEBUG_STRING_MAX_LEN];
va_list args;
@@ -276,17 +413,14 @@ void TraceErrorWithoutEnter(const char* c_szFormat, ...)
_vsnprintf_s(szBuf, sizeof(szBuf), _TRUNCATE, c_szFormat, args);
va_end(args);
time_t ct = time(0);
struct tm ctm = *localtime(&ct);
// OPTIMIZED: Use cached timestamp instead of time()/localtime() per call
g_cachedTimestamp.Update();
char timestamp[32];
g_cachedTimestamp.Format(timestamp, sizeof(timestamp));
fprintf(stderr, "%02d%02d %02d:%02d:%05d :: %s",
ctm.tm_mon + 1,
ctm.tm_mday,
ctm.tm_hour,
ctm.tm_min,
ELTimer_GetMSec() % 60000,
szBuf + 8);
fflush(stderr);
// OPTIMIZED: Write to buffered stderr instead of fprintf+fflush per call
g_syserrBuffer.Write(timestamp, strlen(timestamp));
g_syserrBuffer.Write(szBuf, strlen(szBuf));
#ifdef _DEBUG
DBG_OUT_W_UTF8(szBuf);
@@ -349,7 +483,7 @@ void OpenLogFile(bool bUseLogFIle)
std::filesystem::create_directory("log");
}
//#ifndef _DISTRIBUTE
//#ifndef _DISTRIBUTE
_wfreopen(L"log/syserr.txt", L"w", stderr);
if (bUseLogFIle)
@@ -360,6 +494,13 @@ void OpenLogFile(bool bUseLogFIle)
//#endif
}
void CloseLogFile()
{
// Flush all buffered output before shutdown
g_syserrBuffer.Flush();
CLogFile::Instance().Flush();
}
void OpenConsoleWindow()
{
AllocConsole();

View File

@@ -238,16 +238,88 @@ void CGraphicTextInstance::Update()
}
// Tag-aware BiDi rendering: Parse tags, apply BiDi per segment, track colors/hyperlinks
// OPTIMIZED: Use helper lambda to eliminate code duplication (was repeated 5+ times)
if (hasRTL || hasTags)
{
DWORD currentColor = dwColor;
int hyperlinkStep = 0; // 0=normal, 1=collecting metadata, 2=visible hyperlink
std::wstring hyperlinkMetadata;
std::vector<wchar_t> currentSegment;
// Use thread-local buffer to avoid per-call allocation
thread_local static std::vector<wchar_t> s_currentSegment;
s_currentSegment.clear();
SHyperlink currentHyperlink;
currentHyperlink.sx = currentHyperlink.ex = 0;
// In chat RTL, force RTL base direction so prefixes like "[hyperlink]" don't flip the paragraph to LTR.
const bool forceRTLForBidi = (m_isChatMessage && m_computedRTL);
// OPTIMIZED: Single helper function for flushing segments (eliminates 5x code duplication)
auto FlushSegment = [&](DWORD segColor) -> int
{
if (s_currentSegment.empty())
return 0;
int totalWidth = 0;
// Apply BiDi transformation using optimized BuildVisualBidiText_Tagless
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
s_currentSegment.data(), (int)s_currentSegment.size(), forceRTLForBidi);
for (size_t j = 0; j < visual.size(); ++j)
{
int w = __DrawCharacter(pFontTexture, visual[j], segColor);
totalWidth += w;
}
s_currentSegment.clear();
return totalWidth;
};
// Prepend glyphs to the already-built draw list (used to place hyperlink before message in RTL chat).
auto PrependGlyphs = [&](CGraphicFontTexture* pFontTexture,
const std::vector<wchar_t>& chars,
DWORD color,
int& outWidth)
{
outWidth = 0;
// Use thread-local buffers to avoid allocation
thread_local static std::vector<CGraphicFontTexture::TCharacterInfomation*> s_newCharInfos;
thread_local static std::vector<DWORD> s_newColors;
s_newCharInfos.clear();
s_newColors.clear();
s_newCharInfos.reserve(chars.size());
s_newColors.reserve(chars.size());
for (size_t k = 0; k < chars.size(); ++k)
{
auto* pInfo = pFontTexture->GetCharacterInfomation(chars[k]);
if (!pInfo)
continue;
s_newCharInfos.push_back(pInfo);
s_newColors.push_back(color);
outWidth += pInfo->advance;
m_textHeight = std::max((WORD)pInfo->height, m_textHeight);
}
// Insert at the beginning of the draw list.
m_pCharInfoVector.insert(m_pCharInfoVector.begin(), s_newCharInfos.begin(), s_newCharInfos.end());
m_dwColorInfoVector.insert(m_dwColorInfoVector.begin(), s_newColors.begin(), s_newColors.end());
// Shift any already-recorded hyperlinks to the right.
for (auto& link : m_hyperlinkVector)
{
link.sx += outWidth;
link.ex += outWidth;
}
m_textWidth += outWidth;
};
// Parse text with tags
for (int i = 0; i < wTextLen;)
{
@@ -257,37 +329,15 @@ void CGraphicTextInstance::Update()
if (tagType == TEXT_TAG_COLOR)
{
// Flush current segment with BiDi before changing color
if (!currentSegment.empty())
{
// Use auto-detection for BiDi (don't force RTL)
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
currentSegment.data(), (int)currentSegment.size(), false);
for (size_t j = 0; j < visual.size(); ++j)
{
int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
currentHyperlink.ex += w;
}
currentSegment.clear();
}
// Flush current segment before changing color
currentHyperlink.ex += FlushSegment(currentColor);
currentColor = htoi(tagExtra.c_str(), 8);
i += tagLen;
}
else if (tagType == TEXT_TAG_RESTORE_COLOR)
{
// Flush segment before restoring color
if (!currentSegment.empty())
{
// Use auto-detection for BiDi (don't force RTL)
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
currentSegment.data(), (int)currentSegment.size(), false);
for (size_t j = 0; j < visual.size(); ++j)
{
int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
currentHyperlink.ex += w;
}
currentSegment.clear();
}
currentHyperlink.ex += FlushSegment(currentColor);
currentColor = dwColor;
i += tagLen;
}
@@ -303,18 +353,7 @@ void CGraphicTextInstance::Update()
{
// End of metadata, start visible section
// Flush any pending non-hyperlink segment first
if (!currentSegment.empty())
{
// Use auto-detection for BiDi (don't force RTL)
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
currentSegment.data(), (int)currentSegment.size(), false);
for (size_t j = 0; j < visual.size(); ++j)
{
int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
currentHyperlink.ex += w;
}
currentSegment.clear();
}
currentHyperlink.ex += FlushSegment(currentColor);
hyperlinkStep = 2;
currentHyperlink.text = hyperlinkMetadata;
@@ -323,80 +362,85 @@ void CGraphicTextInstance::Update()
else if (hyperlinkStep == 2)
{
// End of visible section - render hyperlink text with proper Arabic handling
// Format: [Arabic Text] or [English Text]
// Keep brackets in position, reverse Arabic content between them
if (!currentSegment.empty())
// In RTL chat: we want the hyperlink chunk to appear BEFORE the message, even if logically appended.
if (!s_currentSegment.empty())
{
// Find bracket positions
// OPTIMIZED: Use thread-local buffer for visible rendering
thread_local static std::vector<wchar_t> s_visibleToRender;
s_visibleToRender.clear();
// Find bracket positions: [ ... ]
int openBracket = -1, closeBracket = -1;
for (size_t idx = 0; idx < currentSegment.size(); ++idx)
for (size_t idx = 0; idx < s_currentSegment.size(); ++idx)
{
if (currentSegment[idx] == L'[' && openBracket == -1)
if (s_currentSegment[idx] == L'[' && openBracket == -1)
openBracket = (int)idx;
else if (currentSegment[idx] == L']' && closeBracket == -1)
else if (s_currentSegment[idx] == L']' && closeBracket == -1)
closeBracket = (int)idx;
}
if (openBracket >= 0 && closeBracket > openBracket)
{
// Extract content between brackets
std::vector<wchar_t> content(
currentSegment.begin() + openBracket + 1,
currentSegment.begin() + closeBracket);
// Keep '['
s_visibleToRender.push_back(L'[');
// Apply Arabic shaping to content
std::vector<wchar_t> shaped(content.size() * 2 + 16, 0);
int shapedLen = Arabic_MakeShape(content.data(), (int)content.size(),
shaped.data(), (int)shaped.size());
// Extract inside content and apply BiDi
thread_local static std::vector<wchar_t> s_content;
s_content.assign(
s_currentSegment.begin() + openBracket + 1,
s_currentSegment.begin() + closeBracket);
// Render: "[" + reversed_arabic + "]"
// 1. Opening bracket
int w = __DrawCharacter(pFontTexture, L'[', currentColor);
currentHyperlink.ex += w;
// FIX: Use false to let BiDi auto-detect direction from content
// This ensures English items like [Sword+9] stay LTR
// while Arabic items like [درع فولاذي+9] are properly RTL
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
s_content.data(), (int)s_content.size(), false);
// 2. Arabic content (shaped and REVERSED for RTL display)
if (shapedLen > 0)
{
for (int j = shapedLen - 1; j >= 0; --j)
{
w = __DrawCharacter(pFontTexture, shaped[j], currentColor);
currentHyperlink.ex += w;
}
}
else
{
// Fallback: reverse original content
for (int j = (int)content.size() - 1; j >= 0; --j)
{
w = __DrawCharacter(pFontTexture, content[j], currentColor);
currentHyperlink.ex += w;
}
}
s_visibleToRender.insert(s_visibleToRender.end(), visual.begin(), visual.end());
// 3. Closing bracket
w = __DrawCharacter(pFontTexture, L']', currentColor);
currentHyperlink.ex += w;
// 4. Render any text after closing bracket (if any)
for (size_t idx = closeBracket + 1; idx < currentSegment.size(); ++idx)
{
w = __DrawCharacter(pFontTexture, currentSegment[idx], currentColor);
currentHyperlink.ex += w;
}
// Keep ']'
s_visibleToRender.push_back(L']');
}
else
{
// No brackets found - render as-is (shouldn't happen for hyperlinks)
for (size_t j = 0; j < currentSegment.size(); ++j)
// No brackets: apply BiDi to whole segment
// FIX: Use false to let BiDi auto-detect direction from content
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
s_currentSegment.data(), (int)s_currentSegment.size(), false);
s_visibleToRender.insert(s_visibleToRender.end(), visual.begin(), visual.end());
}
// Ensure a space AFTER the hyperlink chunk (so it becomes "[hyperlink] اختبار...")
s_visibleToRender.push_back(L' ');
// Key behavior:
// In RTL chat, place hyperlink BEFORE the message by prepending glyphs.
if (m_isChatMessage && m_computedRTL)
{
int addedWidth = 0;
PrependGlyphs(pFontTexture, s_visibleToRender, currentColor, addedWidth);
// Record the hyperlink range at the beginning (0..addedWidth)
currentHyperlink.sx = 0;
currentHyperlink.ex = addedWidth;
m_hyperlinkVector.push_back(currentHyperlink);
}
else
{
// LTR or non-chat: keep original "append" behavior
currentHyperlink.sx = currentHyperlink.ex;
for (size_t j = 0; j < s_visibleToRender.size(); ++j)
{
int w = __DrawCharacter(pFontTexture, currentSegment[j], currentColor);
int w = __DrawCharacter(pFontTexture, s_visibleToRender[j], currentColor);
currentHyperlink.ex += w;
}
m_hyperlinkVector.push_back(currentHyperlink);
}
currentSegment.clear();
}
m_hyperlinkVector.push_back(currentHyperlink);
hyperlinkStep = 0;
s_currentSegment.clear();
}
i += tagLen;
}
@@ -411,24 +455,14 @@ void CGraphicTextInstance::Update()
{
// Add to current segment
// Will be BiDi-processed for normal text, or rendered directly for hyperlinks
currentSegment.push_back(wTextBuf[i]);
s_currentSegment.push_back(wTextBuf[i]);
}
i += tagLen;
}
}
// Flush any remaining segment
if (!currentSegment.empty())
{
// Use auto-detection for BiDi (don't force RTL)
std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
currentSegment.data(), (int)currentSegment.size(), false);
for (size_t j = 0; j < visual.size(); ++j)
{
int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
currentHyperlink.ex += w;
}
}
// Flush any remaining segment using optimized helper
currentHyperlink.ex += FlushSegment(currentColor);
pFontTexture->UpdateTexture();
m_isUpdate = true;

View File

@@ -1,6 +1,7 @@
#include "StdAfx.h"
#include "Arabic.h"
#include <assert.h>
#include <vector>
enum ARABIC_CODE
{
@@ -243,110 +244,151 @@ bool Arabic_IsComb2(wchar_t code)
return false;
}
// Helper: Check if a character can join to the right (has INITIAL or MEDIAL form)
static inline bool Arabic_CanJoinRight(wchar_t code)
{
if (!Arabic_IsInMap(code))
return false;
return Arabic_GetMap(code, INITIAL) != 0 || Arabic_GetMap(code, MEDIAL) != 0;
}
// Helper: Check if a character can join to the left (has MEDIAL or FINAL form)
static inline bool Arabic_CanJoinLeft(wchar_t code)
{
if (!Arabic_IsInMap(code))
return false;
return Arabic_GetMap(code, MEDIAL) != 0 || Arabic_GetMap(code, FINAL) != 0 || Arabic_IsNext(code);
}
// Optimized O(n) Arabic shaping algorithm
// Previous: O(n²) due to backward/forward scans for each character
// Now: O(n) single forward pass with state tracking
size_t Arabic_MakeShape(wchar_t* src, size_t srcLen, wchar_t* dst, size_t dstLen)
{
// Runtime validation instead of assert (which is disabled in release builds)
// Runtime validation
if (!src || !dst || srcLen == 0 || dstLen < srcLen)
return 0;
const size_t srcLastIndex = srcLen - 1;
// Phase 1: Pre-scan to find the next non-composing Arabic letter for each position
// This converts O(n) inner loops into O(1) lookups
// Use thread-local buffer to avoid per-call allocation
thread_local static std::vector<size_t> s_nextArabic;
if (s_nextArabic.size() < srcLen + 1)
s_nextArabic.resize(srcLen + 1);
// Build next-arabic lookup (reverse scan)
size_t nextArabicIdx = srcLen; // Invalid index = no next arabic
for (size_t i = srcLen; i > 0; --i)
{
size_t idx = i - 1;
s_nextArabic[idx] = nextArabicIdx;
wchar_t ch = src[idx];
if (Arabic_IsInMap(ch) && !Arabic_IsInComposing(ch))
nextArabicIdx = idx;
}
s_nextArabic[srcLen] = srcLen; // Sentinel
// Phase 2: Single forward pass with state tracking
size_t dstIndex = 0;
bool prevJoins = false; // Does previous Arabic letter join to the right?
unsigned dstIndex = 0;
for (size_t srcIndex = 0; srcIndex < srcLen; ++srcIndex)
{
wchar_t cur = src[srcIndex];
//printf("now %x\n", cur);
// Composing marks: copy directly, don't affect joining state
if (Arabic_IsInComposing(cur))
{
if (dstIndex < dstLen)
dst[dstIndex++] = cur;
continue;
}
if (Arabic_IsInMap(cur))
{
// 이전 글자 얻어내기
wchar_t prev = 0;
{
size_t prevIndex = srcIndex;
while (prevIndex > 0)
{
prevIndex--;
prev = src[prevIndex];
//printf("\tprev %d:%x\n", prevIndex, cur);
if (Arabic_IsInComposing(prev))
continue;
else
break;
}
if ((srcIndex == 0) ||
(!Arabic_IsInMap(prev)) ||
(!Arabic_GetMap(prev, INITIAL) && !Arabic_GetMap(prev, MEDIAL)))
{
//printf("\tprev not defined\n");
prev = 0;
}
}
// 다음 글자 얻어내기
// Find next joinable Arabic letter using pre-computed lookup
wchar_t next = 0;
size_t nextIdx = s_nextArabic[srcIndex];
if (nextIdx < srcLen)
{
size_t nextIndex = srcIndex;
while (nextIndex < srcLastIndex)
{
nextIndex++;
next = src[nextIndex];
if (Arabic_IsInComposing(next))
continue;
else
break;
}
if ((nextIndex == srcLen) ||
(!Arabic_IsInMap(next)) ||
(!Arabic_GetMap(next, MEDIAL) && !Arabic_GetMap(next, FINAL) && !Arabic_IsNext(next)))
{
//printf("\tnext not defined\n");
next = 0;
}
wchar_t nextChar = src[nextIdx];
if (Arabic_CanJoinLeft(nextChar))
next = nextChar;
}
if (Arabic_IsComb1(cur) && Arabic_IsComb2(next))
// Handle LAM-ALEF composition
if (Arabic_IsComb1(cur) && nextIdx < srcLen && Arabic_IsComb2(src[nextIdx]))
{
if (prev)
dst[dstIndex] = Arabic_GetComposition(cur, next, FINAL);
wchar_t composed;
if (prevJoins)
composed = Arabic_GetComposition(cur, src[nextIdx], FINAL);
else
dst[dstIndex] = Arabic_GetComposition(cur, next, ISOLATED);
composed = Arabic_GetComposition(cur, src[nextIdx], ISOLATED);
//printf("\tGot me a complex:%x\n", dst[dstIndex]);
if (dstIndex < dstLen)
dst[dstIndex++] = composed;
srcIndex++;
dstIndex++;
// Skip the ALEF that was combined
srcIndex = nextIdx;
// LAM-ALEF doesn't join to the right
prevJoins = false;
continue;
}
else if (prev && next && (dst[dstIndex] = Arabic_GetMap(cur, MEDIAL)))
// Determine form based on joining context
wchar_t shaped = 0;
bool curJoinsRight = false;
if (prevJoins && next)
{
//printf("\tGot prev & next:%x\n", dst[dstIndex]);
dstIndex++;
// Both sides join: MEDIAL
shaped = Arabic_GetMap(cur, MEDIAL);
if (shaped)
curJoinsRight = Arabic_CanJoinRight(cur);
}
else if (prev && (dst[dstIndex] = Arabic_GetMap(cur, FINAL)))
if (!shaped && prevJoins)
{
//printf("\tGot prev:%x\n", dst[dstIndex]);
dstIndex++;
// Only left joins: FINAL
shaped = Arabic_GetMap(cur, FINAL);
// FINAL form doesn't extend to the right
curJoinsRight = false;
}
else if (next && (dst[dstIndex] = Arabic_GetMap(cur, INITIAL)))
if (!shaped && next)
{
//printf("\tGot next:%x\n", dst[dstIndex]);
dstIndex++;
// Only right joins: INITIAL
shaped = Arabic_GetMap(cur, INITIAL);
if (shaped)
curJoinsRight = Arabic_CanJoinRight(cur);
}
else
if (!shaped)
{
dst[dstIndex] = Arabic_GetMap(cur, ISOLATED);
//printf("\tGot nothing:%x\n", dst[dstIndex]);
dstIndex++;
// No joining: ISOLATED
shaped = Arabic_GetMap(cur, ISOLATED);
curJoinsRight = false;
}
if (!shaped)
shaped = cur; // Fallback to original if no mapping
if (dstIndex < dstLen)
dst[dstIndex++] = shaped;
// Update state for next character
prevJoins = curJoinsRight;
}
else
{
dst[dstIndex] = cur;
dstIndex++;
// Non-Arabic character: copy directly, breaks joining
if (dstIndex < dstLen)
dst[dstIndex++] = cur;
prevJoins = false;
}
}
return dstIndex;
}

View File

@@ -1279,11 +1279,60 @@ float CPythonSkill::SSkillData::ProcessFormula(CPoly * pPoly, float fSkillLevel,
return pPoly->Eval();
}
static void ReplaceFirst(std::string& s, const char* needle, const std::string& repl)
// Format specifiers supported in skill descriptions
static const char* FORMAT_SPECIFIERS[] = {
"%.0f", // Integer (no decimals)
"%.1f", // 1 decimal place
"%.2f", // 2 decimal places
"%d", // Integer (alternative)
};
static const size_t FORMAT_SPECIFIER_COUNT = sizeof(FORMAT_SPECIFIERS) / sizeof(FORMAT_SPECIFIERS[0]);
// Find and replace the first occurrence of any format specifier with the given value
// Returns true if a replacement was made
static bool ReplaceNextFormatSpecifier(std::string& s, float value)
{
size_t pos = s.find(needle);
if (pos != std::string::npos)
s.replace(pos, strlen(needle), repl);
size_t bestPos = std::string::npos;
size_t bestLen = 0;
const char* bestSpec = nullptr;
// Find the first (leftmost) format specifier in the string
for (size_t i = 0; i < FORMAT_SPECIFIER_COUNT; ++i)
{
size_t pos = s.find(FORMAT_SPECIFIERS[i]);
if (pos != std::string::npos && (bestPos == std::string::npos || pos < bestPos))
{
bestPos = pos;
bestLen = strlen(FORMAT_SPECIFIERS[i]);
bestSpec = FORMAT_SPECIFIERS[i];
}
}
if (bestPos == std::string::npos)
return false;
// Format the value according to the specifier found
char szValue[64];
if (strcmp(bestSpec, "%.0f") == 0 || strcmp(bestSpec, "%d") == 0)
_snprintf(szValue, sizeof(szValue), "%.0f", floorf(value));
else if (strcmp(bestSpec, "%.1f") == 0)
_snprintf(szValue, sizeof(szValue), "%.1f", value);
else // %.2f or other
_snprintf(szValue, sizeof(szValue), "%.2f", value);
s.replace(bestPos, bestLen, szValue);
return true;
}
// Replace all occurrences of "%%" with "%" (escaped percent sign)
static void UnescapePercent(std::string& s)
{
size_t pos = 0;
while ((pos = s.find("%%", pos)) != std::string::npos)
{
s.replace(pos, 2, "%");
++pos; // Move past the replaced '%'
}
}
const char* CPythonSkill::SSkillData::GetAffectDescription(DWORD dwIndex, float fSkillLevel)
@@ -1303,33 +1352,20 @@ const char* CPythonSkill::SSkillData::GetAffectDescription(DWORD dwIndex, float
float fMinValue = ProcessFormula(&minPoly, fSkillLevel);
float fMaxValue = ProcessFormula(&maxPoly, fSkillLevel);
// Take absolute values
if (fMinValue < 0.0f) fMinValue = -fMinValue;
if (fMaxValue < 0.0f) fMaxValue = -fMaxValue;
const bool wantsInt = (desc.find("%.0f") != std::string::npos);
if (wantsInt)
{
fMinValue = floorf(fMinValue);
fMaxValue = floorf(fMaxValue);
}
char szMin[64], szMax[64];
if (wantsInt)
{
_snprintf(szMin, sizeof(szMin), "%.0f", fMinValue);
_snprintf(szMax, sizeof(szMax), "%.0f", fMaxValue);
}
else
{
_snprintf(szMin, sizeof(szMin), "%.2f", fMinValue);
_snprintf(szMax, sizeof(szMax), "%.2f", fMaxValue);
}
static std::string out;
out = desc;
ReplaceFirst(out, "%.0f", szMin);
ReplaceFirst(out, "%.0f", szMax);
// Replace format specifiers in order of appearance
// First specifier gets min value, second gets max value
ReplaceNextFormatSpecifier(out, fMinValue);
ReplaceNextFormatSpecifier(out, fMaxValue);
// Convert escaped %% to single % (for display like "30%")
UnescapePercent(out);
return out.c_str();
}