fix: Optimized UTF8, BiDi, Debug

2026-01-20 21:23:31 +00:00
parent ba79e137f0
commit 100dd2b87b
5 changed files with 817 additions and 381 deletions
--- a/extern/include/utf8.h
+++ b/extern/include/utf8.h
@@ -1,9 +1,11 @@
 #pragma once
 #include <string>
+#include <cstring>
 #include <windows.h>
 #include <vector>
 #include <algorithm>
 #include <cmath>
+#include <utility>

 #include <EterLocale/Arabic.h>

@@ -37,6 +39,161 @@ constexpr size_t ARABIC_SHAPING_SAFETY_MARGIN_RETRY = 64;
 	#define BIDI_LOG_SIMPLE(msg) ((void)0)
 #endif

+// ============================================================================
+// OPTIMIZED CHARACTER CLASSIFICATION (Lookup Tables)
+// ============================================================================
+// Replaces expensive GetStringTypeW() syscalls with O(1) table lookups.
+// Tables are initialized once on first use (thread-safe via static init).
+
+namespace BiDiTables
+{
+	// Character property flags
+	enum ECharFlags : uint8_t
+	{
+		CF_NONE   = 0,
+		CF_ALPHA  = 0x01,  // Alphabetic (Latin, Cyrillic, Greek, etc.)
+		CF_DIGIT  = 0x02,  // Numeric digit (0-9, Arabic-Indic, etc.)
+		CF_RTL    = 0x04,  // RTL script (Arabic, Hebrew)
+		CF_ARABIC = 0x08,  // Arabic letter that needs shaping
+	};
+
+	// Main character flags table (65536 entries for BMP)
+	inline const uint8_t* GetCharFlagsTable()
+	{
+		static uint8_t s_table[65536] = {0};
+		static bool s_initialized = false;
+
+		if (!s_initialized)
+		{
+			// ASCII digits
+			for (int i = '0'; i <= '9'; ++i)
+				s_table[i] |= CF_DIGIT;
+
+			// ASCII letters
+			for (int i = 'A'; i <= 'Z'; ++i)
+				s_table[i] |= CF_ALPHA;
+			for (int i = 'a'; i <= 'z'; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// Latin Extended-A/B (0x0100-0x024F)
+			for (int i = 0x0100; i <= 0x024F; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// Latin Extended Additional (0x1E00-0x1EFF)
+			for (int i = 0x1E00; i <= 0x1EFF; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// Greek (0x0370-0x03FF)
+			for (int i = 0x0370; i <= 0x03FF; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// Cyrillic (0x0400-0x04FF)
+			for (int i = 0x0400; i <= 0x04FF; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// Hebrew (0x0590-0x05FF) - RTL
+			for (int i = 0x0590; i <= 0x05FF; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+
+			// Arabic (0x0600-0x06FF) - RTL + needs shaping
+			for (int i = 0x0600; i <= 0x06FF; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+			// Arabic letters that need shaping (0x0621-0x064A)
+			for (int i = 0x0621; i <= 0x064A; ++i)
+				s_table[i] |= CF_ARABIC;
+
+			// Arabic Supplement (0x0750-0x077F)
+			for (int i = 0x0750; i <= 0x077F; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+
+			// Arabic Extended-A (0x08A0-0x08FF)
+			for (int i = 0x08A0; i <= 0x08FF; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+
+			// Arabic-Indic digits (0x0660-0x0669)
+			for (int i = 0x0660; i <= 0x0669; ++i)
+				s_table[i] |= CF_DIGIT;
+
+			// Extended Arabic-Indic digits (0x06F0-0x06F9)
+			for (int i = 0x06F0; i <= 0x06F9; ++i)
+				s_table[i] |= CF_DIGIT;
+
+			// Arabic Presentation Forms-A (0xFB50-0xFDFF) - already shaped
+			for (int i = 0xFB50; i <= 0xFDFF; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+
+			// Arabic Presentation Forms-B (0xFE70-0xFEFF) - already shaped
+			for (int i = 0xFE70; i <= 0xFEFF; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+
+			// Hebrew presentation forms (0xFB1D-0xFB4F)
+			for (int i = 0xFB1D; i <= 0xFB4F; ++i)
+				s_table[i] |= CF_RTL | CF_ALPHA;
+
+			// CJK (0x4E00-0x9FFF) - treat as LTR alpha
+			for (int i = 0x4E00; i <= 0x9FFF; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// Hangul (0xAC00-0xD7AF)
+			for (int i = 0xAC00; i <= 0xD7AF; ++i)
+				s_table[i] |= CF_ALPHA;
+
+			// RTL marks and controls
+			s_table[0x200F] |= CF_RTL; // RLM
+			s_table[0x061C] |= CF_RTL; // ALM
+			for (int i = 0x202B; i <= 0x202E; ++i)
+				s_table[i] |= CF_RTL; // RLE/RLO/PDF/LRE/LRO
+			for (int i = 0x2066; i <= 0x2069; ++i)
+				s_table[i] |= CF_RTL; // Isolates
+
+			s_initialized = true;
+		}
+
+		return s_table;
+	}
+
+	// Fast O(1) character classification functions
+	inline bool IsRTL(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_RTL; }
+	inline bool IsAlpha(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ALPHA; }
+	inline bool IsDigit(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_DIGIT; }
+	inline bool IsArabicLetter(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ARABIC; }
+	inline bool IsStrongLTR(wchar_t ch)
+	{
+		uint8_t flags = GetCharFlagsTable()[(uint16_t)ch];
+		// Strong LTR = (Alpha OR Digit) AND NOT RTL
+		return (flags & (CF_ALPHA | CF_DIGIT)) && !(flags & CF_RTL);
+	}
+}
+
+// ============================================================================
+// BUFFER POOLING (Avoid per-call allocations)
+// ============================================================================
+
+namespace BiDiBuffers
+{
+	struct TBufferPool
+	{
+		std::vector<wchar_t> shaped;
+
+		void EnsureCapacity(size_t n)
+		{
+			size_t needed = n * 2 + 64;
+			if (shaped.capacity() < needed) shaped.reserve(needed);
+		}
+
+		void Clear()
+		{
+			shaped.clear();
+		}
+	};
+
+	inline TBufferPool& Get()
+	{
+		thread_local static TBufferPool s_pool;
+		return s_pool;
+	}
+}
+
 // ============================================================================
 // UNICODE VALIDATION HELPERS
 // ============================================================================
@@ -65,7 +222,70 @@ static inline void SanitizeWideString(std::wstring& ws)
 		ws.end());
 }

+// ============================================================================
+// OPTIMIZED UTF-8 CONVERSION
+// ============================================================================
+// Fast paths for ASCII-only text (very common in games).
+// Falls back to Windows API for non-ASCII.
+
+namespace Utf8Fast
+{
+	// Check if string is pure ASCII (no bytes >= 128)
+	inline bool IsAsciiOnly(const char* s, size_t len)
+	{
+		// Process 8 bytes at a time for speed
+		const char* end = s + len;
+		const char* aligned_end = s + (len & ~7);
+
+		while (s < aligned_end)
+		{
+			// Check 8 bytes at once using bitwise OR
+			uint64_t chunk;
+			memcpy(&chunk, s, 8);
+			if (chunk & 0x8080808080808080ULL)
+				return false;
+			s += 8;
+		}
+
+		// Check remaining bytes
+		while (s < end)
+		{
+			if ((unsigned char)*s >= 128)
+				return false;
+			++s;
+		}
+		return true;
+	}
+
+	// Fast ASCII-only conversion (no API calls)
+	inline std::wstring AsciiToWide(const char* s, size_t len)
+	{
+		std::wstring out;
+		out.reserve(len);
+		for (size_t i = 0; i < len; ++i)
+			out.push_back(static_cast<wchar_t>(static_cast<unsigned char>(s[i])));
+		return out;
+	}
+
+	// Fast ASCII-only conversion (no API calls)
+	inline std::string WideToAscii(const wchar_t* ws, size_t len)
+	{
+		std::string out;
+		out.reserve(len);
+		for (size_t i = 0; i < len; ++i)
+		{
+			wchar_t ch = ws[i];
+			if (ch < 128)
+				out.push_back(static_cast<char>(ch));
+			else
+				return ""; // Not pure ASCII, caller should use full conversion
+		}
+		return out;
+	}
+}
+
 // UTF-8 -> UTF-16 (Windows wide)
+// OPTIMIZED: Fast path for ASCII-only strings (avoids 2x API calls)
 inline std::wstring Utf8ToWide(const std::string& s)
 {
 	if (s.empty())
@@ -75,9 +295,14 @@ inline std::wstring Utf8ToWide(const std::string& s)
 	if (s.size() > MAX_TEXT_LENGTH || s.size() > INT_MAX)
 	{
 		BIDI_LOG("Utf8ToWide: String too large (%zu bytes)", s.size());
-		return L""; // String too large
+		return L"";
 	}

+	// Fast path: ASCII-only strings (very common in games)
+	if (Utf8Fast::IsAsciiOnly(s.data(), s.size()))
+		return Utf8Fast::AsciiToWide(s.data(), s.size());
+
+	// Slow path: Use Windows API for non-ASCII
 	int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), (int)s.size(), nullptr, 0);
 	if (wlen <= 0)
 	{
@@ -90,29 +315,31 @@ inline std::wstring Utf8ToWide(const std::string& s)
 	if (written <= 0 || written != wlen)
 	{
 		BIDI_LOG("Utf8ToWide: Second conversion failed (written=%d, expected=%d, error=%d)", written, wlen, GetLastError());
-		return L""; // Conversion failed unexpectedly
+		return L"";
 	}

-	// Optional: Sanitize to remove invalid Unicode codepoints (surrogates, non-characters)
-	// Uncomment if you want strict validation
-	// SanitizeWideString(out);
-
 	return out;
 }

 // Convenience overload for char*
+// OPTIMIZED: Fast path for ASCII-only strings
 inline std::wstring Utf8ToWide(const char* s)
 {
 	if (!s || !*s)
 		return L"";

+	size_t len = strlen(s);
+
+	// Fast path: ASCII-only strings
+	if (Utf8Fast::IsAsciiOnly(s, len))
+		return Utf8Fast::AsciiToWide(s, len);
+
+	// Slow path: Use Windows API
 	int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, nullptr, 0);
 	if (wlen <= 0)
 		return L"";

-	// wlen includes terminating NUL
 	std::wstring out(wlen, L'\0');
-
 	int written = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, out.data(), wlen);
 	if (written <= 0 || written != wlen)
 	{
@@ -124,13 +351,11 @@ inline std::wstring Utf8ToWide(const char* s)
 	if (!out.empty() && out.back() == L'\0')
 		out.pop_back();

-	// Optional: Sanitize to remove invalid Unicode codepoints
-	// SanitizeWideString(out);
-
 	return out;
 }

 // UTF-16 (Windows wide) -> UTF-8
+// OPTIMIZED: Fast path for ASCII-only strings
 inline std::string WideToUtf8(const std::wstring& ws)
 {
 	if (ws.empty())
@@ -138,8 +363,23 @@ inline std::string WideToUtf8(const std::wstring& ws)

 	// Validate size limits (prevent DoS and INT_MAX overflow)
 	if (ws.size() > MAX_TEXT_LENGTH || ws.size() > INT_MAX)
-		return ""; // String too large
+		return "";

+	// Fast path: Check if all characters are ASCII
+	bool isAscii = true;
+	for (size_t i = 0; i < ws.size() && isAscii; ++i)
+		isAscii = (ws[i] < 128);
+
+	if (isAscii)
+	{
+		std::string out;
+		out.reserve(ws.size());
+		for (size_t i = 0; i < ws.size(); ++i)
+			out.push_back(static_cast<char>(ws[i]));
+		return out;
+	}
+
+	// Slow path: Use Windows API for non-ASCII
 	int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, ws.data(), (int)ws.size(), nullptr, 0, nullptr, nullptr);
 	if (len <= 0)
 		return "";
@@ -149,7 +389,7 @@ inline std::string WideToUtf8(const std::wstring& ws)
 	if (written <= 0 || written != len)
 	{
 		BIDI_LOG("WideToUtf8: Conversion failed (written=%d, expected=%d, error=%d)", written, len, GetLastError());
-		return ""; // Conversion failed
+		return "";
 	}
 	return out;
 }
@@ -169,59 +409,22 @@ inline std::string WideToUtf8(const wchar_t* ws)
 enum class EBidiDir { LTR, RTL };
 enum class ECharDir : unsigned char { Neutral, LTR, RTL };

-struct TBidiRun
-{
-	EBidiDir dir;
-	std::vector<wchar_t> text; // logical order
-};
-
+// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
 static inline bool IsRTLCodepoint(wchar_t ch)
 {
-	// Directional marks / isolates / embeddings that affect bidi
-	if (ch == 0x200F || ch == 0x061C) return true; // RLM, ALM
-	if (ch >= 0x202B && ch <= 0x202E) return true; // RLE/RLO/PDF/LRE/LRO
-	if (ch >= 0x2066 && ch <= 0x2069) return true; // isolates
-
-	// Hebrew + Arabic blocks (BMP)
-	if (ch >= 0x0590 && ch <= 0x08FF) return true;
-
-	// Presentation forms
-	if (ch >= 0xFB1D && ch <= 0xFDFF) return true;
-	if (ch >= 0xFE70 && ch <= 0xFEFF) return true;
-
-	return false;
+	return BiDiTables::IsRTL(ch);
 }

+// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
 static inline bool IsStrongAlpha(wchar_t ch)
 {
-	// Use thread-local cache for BMP (Thread safety)
-	thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false
-	unsigned char& v = cache[(unsigned short)ch];
-	if (v == 1) return true;
-	if (v == 2) return false;
-
-	WORD type = 0;
-	bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_ALPHA);
-	v = ok ? 1 : 2;
-	return ok;
+	return BiDiTables::IsAlpha(ch);
 }

+// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
 static inline bool IsDigit(wchar_t ch)
 {
-	// Fast path for ASCII digits (90%+ of digit checks)
-	if (ch >= L'0' && ch <= L'9')
-		return true;
-
-	// For non-ASCII, use cache (Arabic-Indic digits, etc.)
-	thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false
-	unsigned char& v = cache[(unsigned short)ch];
-	if (v == 1) return true;
-	if (v == 2) return false;
-
-	WORD type = 0;
-	bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_DIGIT);
-	v = ok ? 1 : 2;
-	return ok;
+	return BiDiTables::IsDigit(ch);
 }

 static inline bool IsNameTokenPunct(wchar_t ch)
@@ -257,12 +460,10 @@ static inline bool IsNameTokenPunct(wchar_t ch)
 	}
 }

-// Check RTL first to avoid classifying Arabic as LTR
+// Optimized: O(1) lookup - Check RTL first to avoid classifying Arabic as LTR
 static inline bool IsStrongLTR(wchar_t ch)
 {
-	if (IsRTLCodepoint(ch))
-		return false;
-	return IsStrongAlpha(ch) || IsDigit(ch);
+	return BiDiTables::IsStrongLTR(ch);
 }

 static inline bool HasStrongLTRNeighbor(const wchar_t* s, int n, int i)
@@ -561,33 +762,29 @@ static std::vector<wchar_t> BuildVisualBidiText_Tagless(const wchar_t* s, int n,
 	if (!s || n <= 0)
 		return {};

+	// Use buffer pool to avoid per-call allocations
+	BiDiBuffers::TBufferPool& buffers = BiDiBuffers::Get();
+	buffers.EnsureCapacity((size_t)n);
+
 	// 1) base direction
 	EBidiDir base = forceRTL ? EBidiDir::RTL : DetectBaseDir_FirstStrong(s, n);

 	// Pre-compute strong character positions for O(1) neutral resolution
 	TStrongDirCache strongCache(s, n, base);

-	// 2) split into runs
-	// Estimate runs based on text length (~1 per 50 chars, min 4)
-	std::vector<TBidiRun> runs;
-	const size_t estimatedRuns = (size_t)std::max(4, n / 50);
-	runs.reserve(estimatedRuns);
-
-	auto push_run = [&](EBidiDir d)
-		{
-			if (runs.empty() || runs.back().dir != d)
-				runs.push_back(TBidiRun{ d, {} });
-		};
-
-	// start with base so leading neutrals attach predictably
-	push_run(base);
+	// 2) split into runs - use a more efficient approach
+	// Instead of TBidiRun with vectors, use start/end indices
+	struct TRunInfo { int start; int end; EBidiDir dir; };
+	thread_local static std::vector<TRunInfo> s_runs;
+	s_runs.clear();
+	s_runs.reserve((size_t)std::max(4, n / 50));

 	EBidiDir lastStrong = base;
+	EBidiDir currentRunDir = base;
+	int runStart = 0;

 	for (int i = 0; i < n; ++i)
 	{
-		wchar_t ch = s[i];
-
 		EBidiDir d;
 		ECharDir cd = GetCharDirSmart(s, n, i);

@@ -607,98 +804,84 @@ static std::vector<wchar_t> BuildVisualBidiText_Tagless(const wchar_t* s, int n,
 			d = ResolveNeutralDir(s, n, i, base, lastStrong, &strongCache);
 		}

-#ifdef DEBUG_BIDI
-		if (i < 50) // Only log first 50 chars to avoid spam
+		// Start a new run if direction changes
+		if (d != currentRunDir)
 		{
-			BIDI_LOG("Char[%d] U+%04X '%lc' → CharDir=%s, RunDir=%s",
-				i, (unsigned int)ch, (ch >= 32 && ch < 127) ? ch : L'?',
-				cd == ECharDir::RTL ? "RTL" : (cd == ECharDir::LTR ? "LTR" : "Neutral"),
-				d == EBidiDir::RTL ? "RTL" : "LTR");
+			if (i > runStart)
+				s_runs.push_back({runStart, i, currentRunDir});
+			runStart = i;
+			currentRunDir = d;
 		}
-#endif
-
-		push_run(d);
-		runs.back().text.push_back(ch);
 	}
+	// Push final run
+	if (n > runStart)
+		s_runs.push_back({runStart, n, currentRunDir});

-	// 3) shape RTL runs in logical order (Arabic shaping)
-	for (auto& r : runs)
+	// 3) shape RTL runs using pooled buffer
+	buffers.shaped.clear();
+
+	auto shapeRun = [&](int start, int end) -> std::pair<const wchar_t*, int>
 	{
-		if (r.dir != EBidiDir::RTL)
-			continue;
+		int len = end - start;
+		if (len <= 0)
+			return {nullptr, 0};

-		if (r.text.empty())
-			continue;
+		// Check for potential integer overflow
+		if ((size_t)len > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY)
+			return {s + start, len}; // Return unshaped

-		// Check for potential integer overflow before allocation
-		if (r.text.size() > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY)
-		{
-			BIDI_LOG("BuildVisualBidiText: RTL run too large for shaping (%zu chars)", r.text.size());
-			continue; // Text too large to process safely
-		}
+		size_t neededSize = buffers.shaped.size() + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN;
+		if (buffers.shaped.capacity() < neededSize)
+			buffers.shaped.reserve(neededSize);

-		std::vector<wchar_t> shaped(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN, 0);
+		size_t outStart = buffers.shaped.size();
+		buffers.shaped.resize(outStart + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN);
+
+		int outLen = Arabic_MakeShape(const_cast<wchar_t*>(s + start), len,
+		                               buffers.shaped.data() + outStart,
+		                               (int)(buffers.shaped.size() - outStart));

-		int outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size());
 		if (outLen <= 0)
-		{
-			BIDI_LOG("Arabic_MakeShape FAILED for RTL run of %zu characters", r.text.size());
-			BIDI_LOG("  WARNING: This RTL text segment will NOT be displayed!");
-			BIDI_LOG("  First few characters: U+%04X U+%04X U+%04X U+%04X",
-				r.text.size() > 0 ? (unsigned int)r.text[0] : 0,
-				r.text.size() > 1 ? (unsigned int)r.text[1] : 0,
-				r.text.size() > 2 ? (unsigned int)r.text[2] : 0,
-				r.text.size() > 3 ? (unsigned int)r.text[3] : 0);
-			continue;
-		}
+			return {s + start, len}; // Return unshaped on failure

-		// Retry once if buffer too small
-		if (outLen >= (int)shaped.size())
-		{
-			shaped.assign(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR_RETRY + ARABIC_SHAPING_SAFETY_MARGIN_RETRY, 0);
-			outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size());
-			if (outLen <= 0)
-				continue;
-			// Add error check instead of silent truncation
-			if (outLen > (int)shaped.size())
-			{
-				BIDI_LOG("Arabic_MakeShape: Buffer still too small after retry (%d > %zu)", outLen, shaped.size());
-				// Shaping failed critically, use unshaped text
-				continue;
-			}
-		}
+		buffers.shaped.resize(outStart + (size_t)outLen);
+		return {buffers.shaped.data() + outStart, outLen};
+	};

-		r.text.assign(shaped.begin(), shaped.begin() + outLen);
-	}
-
-	// 4) produce visual order:
-	// - reverse RTL runs internally
-	// - reverse run sequence if base RTL
+	// 4) produce visual order
 	std::vector<wchar_t> visual;
 	visual.reserve((size_t)n);

-	auto emit_run = [&](const TBidiRun& r)
+	auto emitRun = [&](const TRunInfo& run)
+	{
+		if (run.dir == EBidiDir::RTL)
 		{
-			if (r.dir == EBidiDir::RTL)
+			// Shape and reverse RTL runs
+			std::pair<const wchar_t*, int> shaped = shapeRun(run.start, run.end);
+			const wchar_t* ptr = shaped.first;
+			int len = shaped.second;
+			if (ptr && len > 0)
 			{
-				for (int k = (int)r.text.size() - 1; k >= 0; --k)
-					visual.push_back(r.text[(size_t)k]);
+				for (int k = len - 1; k >= 0; --k)
+					visual.push_back(ptr[k]);
 			}
-			else
-			{
-				visual.insert(visual.end(), r.text.begin(), r.text.end());
-			}
-		};
+		}
+		else
+		{
+			// LTR runs: copy directly
+			visual.insert(visual.end(), s + run.start, s + run.end);
+		}
+	};

 	if (base == EBidiDir::LTR)
 	{
-		for (const auto& r : runs)
-			emit_run(r);
+		for (const auto& run : s_runs)
+			emitRun(run);
 	}
 	else
 	{
-		for (int i = (int)runs.size() - 1; i >= 0; --i)
-			emit_run(runs[(size_t)i]);
+		for (int i = (int)s_runs.size() - 1; i >= 0; --i)
+			emitRun(s_runs[(size_t)i]);
 	}

 	return visual;
@@ -763,7 +946,7 @@ static inline std::vector<wchar_t> BuildVisualChatMessage(
 		{
 			// Apply BiDi to message with auto-detection (don't force RTL)
 			// Let the BiDi algorithm detect base direction from first strong character
-			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false);
+			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL);
 			visual.insert(visual.end(), msgVisual.begin(), msgVisual.end());
 		}
 		visual.push_back(L' ');
@@ -787,7 +970,7 @@ static inline std::vector<wchar_t> BuildVisualChatMessage(
 		{
 			// Apply BiDi to message with auto-detection (don't force RTL)
 			// Let the BiDi algorithm detect base direction from first strong character
-			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false);
+			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL);
 			visual.insert(visual.end(), msgVisual.begin(), msgVisual.end());
 		}
 	}