Merge pull request #83 from rtw1x1/main

Optimization for BiDi algorithm, UTF8, Debug, Arabic. Fixed skill desc
2026-01-20 21:25:14 +00:00
parent ba79e137f0 100dd2b87b
commit e38dfaca45
5 changed files with 817 additions and 381 deletions
--- a/extern/include/utf8.h
+++ b/extern/include/utf8.h
@@ -1,9 +1,11 @@
 #pragma once
 #include <string>
 #include <cstring>
 #include <windows.h>
 #include <vector>
 #include <algorithm>
 #include <cmath>
 #include <utility>
 #include <EterLocale/Arabic.h>
@@ -37,6 +39,161 @@ constexpr size_t ARABIC_SHAPING_SAFETY_MARGIN_RETRY = 64;
 	#define BIDI_LOG_SIMPLE(msg) ((void)0)
 #endif
 // ============================================================================
 // OPTIMIZED CHARACTER CLASSIFICATION (Lookup Tables)
 // ============================================================================
 // Replaces expensive GetStringTypeW() syscalls with O(1) table lookups.
 // Tables are initialized once on first use (thread-safe via static init).
 namespace BiDiTables
 {
 	// Character property flags
 	enum ECharFlags : uint8_t
 	{
 		CF_NONE   = 0,
 		CF_ALPHA  = 0x01,  // Alphabetic (Latin, Cyrillic, Greek, etc.)
 		CF_DIGIT  = 0x02,  // Numeric digit (0-9, Arabic-Indic, etc.)
 		CF_RTL    = 0x04,  // RTL script (Arabic, Hebrew)
 		CF_ARABIC = 0x08,  // Arabic letter that needs shaping
 	};
 	// Main character flags table (65536 entries for BMP)
 	inline const uint8_t* GetCharFlagsTable()
 	{
 		static uint8_t s_table[65536] = {0};
 		static bool s_initialized = false;
 		if (!s_initialized)
 		{
 			// ASCII digits
 			for (int i = '0'; i <= '9'; ++i)
 				s_table[i] |= CF_DIGIT;
 			// ASCII letters
 			for (int i = 'A'; i <= 'Z'; ++i)
 				s_table[i] |= CF_ALPHA;
 			for (int i = 'a'; i <= 'z'; ++i)
 				s_table[i] |= CF_ALPHA;
 			// Latin Extended-A/B (0x0100-0x024F)
 			for (int i = 0x0100; i <= 0x024F; ++i)
 				s_table[i] |= CF_ALPHA;
 			// Latin Extended Additional (0x1E00-0x1EFF)
 			for (int i = 0x1E00; i <= 0x1EFF; ++i)
 				s_table[i] |= CF_ALPHA;
 			// Greek (0x0370-0x03FF)
 			for (int i = 0x0370; i <= 0x03FF; ++i)
 				s_table[i] |= CF_ALPHA;
 			// Cyrillic (0x0400-0x04FF)
 			for (int i = 0x0400; i <= 0x04FF; ++i)
 				s_table[i] |= CF_ALPHA;
 			// Hebrew (0x0590-0x05FF) - RTL
 			for (int i = 0x0590; i <= 0x05FF; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// Arabic (0x0600-0x06FF) - RTL + needs shaping
 			for (int i = 0x0600; i <= 0x06FF; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// Arabic letters that need shaping (0x0621-0x064A)
 			for (int i = 0x0621; i <= 0x064A; ++i)
 				s_table[i] |= CF_ARABIC;
 			// Arabic Supplement (0x0750-0x077F)
 			for (int i = 0x0750; i <= 0x077F; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// Arabic Extended-A (0x08A0-0x08FF)
 			for (int i = 0x08A0; i <= 0x08FF; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// Arabic-Indic digits (0x0660-0x0669)
 			for (int i = 0x0660; i <= 0x0669; ++i)
 				s_table[i] |= CF_DIGIT;
 			// Extended Arabic-Indic digits (0x06F0-0x06F9)
 			for (int i = 0x06F0; i <= 0x06F9; ++i)
 				s_table[i] |= CF_DIGIT;
 			// Arabic Presentation Forms-A (0xFB50-0xFDFF) - already shaped
 			for (int i = 0xFB50; i <= 0xFDFF; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// Arabic Presentation Forms-B (0xFE70-0xFEFF) - already shaped
 			for (int i = 0xFE70; i <= 0xFEFF; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// Hebrew presentation forms (0xFB1D-0xFB4F)
 			for (int i = 0xFB1D; i <= 0xFB4F; ++i)
 				s_table[i] |= CF_RTL | CF_ALPHA;
 			// CJK (0x4E00-0x9FFF) - treat as LTR alpha
 			for (int i = 0x4E00; i <= 0x9FFF; ++i)
 				s_table[i] |= CF_ALPHA;
 			// Hangul (0xAC00-0xD7AF)
 			for (int i = 0xAC00; i <= 0xD7AF; ++i)
 				s_table[i] |= CF_ALPHA;
 			// RTL marks and controls
 			s_table[0x200F] |= CF_RTL; // RLM
 			s_table[0x061C] |= CF_RTL; // ALM
 			for (int i = 0x202B; i <= 0x202E; ++i)
 				s_table[i] |= CF_RTL; // RLE/RLO/PDF/LRE/LRO
 			for (int i = 0x2066; i <= 0x2069; ++i)
 				s_table[i] |= CF_RTL; // Isolates
 			s_initialized = true;
 		}
 		return s_table;
 	}
 	// Fast O(1) character classification functions
 	inline bool IsRTL(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_RTL; }
 	inline bool IsAlpha(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ALPHA; }
 	inline bool IsDigit(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_DIGIT; }
 	inline bool IsArabicLetter(wchar_t ch) { return GetCharFlagsTable()[(uint16_t)ch] & CF_ARABIC; }
 	inline bool IsStrongLTR(wchar_t ch)
 	{
 		uint8_t flags = GetCharFlagsTable()[(uint16_t)ch];
 		// Strong LTR = (Alpha OR Digit) AND NOT RTL
 		return (flags & (CF_ALPHA | CF_DIGIT)) && !(flags & CF_RTL);
 	}
 }
 // ============================================================================
 // BUFFER POOLING (Avoid per-call allocations)
 // ============================================================================
 namespace BiDiBuffers
 {
 	struct TBufferPool
 	{
 		std::vector<wchar_t> shaped;
 		void EnsureCapacity(size_t n)
 		{
 			size_t needed = n * 2 + 64;
 			if (shaped.capacity() < needed) shaped.reserve(needed);
 		}
 		void Clear()
 		{
 			shaped.clear();
 		}
 	};
 	inline TBufferPool& Get()
 	{
 		thread_local static TBufferPool s_pool;
 		return s_pool;
 	}
 }
 // ============================================================================
 // UNICODE VALIDATION HELPERS
 // ============================================================================
@@ -65,7 +222,70 @@ static inline void SanitizeWideString(std::wstring& ws)
 		ws.end());
 }
 // ============================================================================
 // OPTIMIZED UTF-8 CONVERSION
 // ============================================================================
 // Fast paths for ASCII-only text (very common in games).
 // Falls back to Windows API for non-ASCII.
 namespace Utf8Fast
 {
 	// Check if string is pure ASCII (no bytes >= 128)
 	inline bool IsAsciiOnly(const char* s, size_t len)
 	{
 		// Process 8 bytes at a time for speed
 		const char* end = s + len;
 		const char* aligned_end = s + (len & ~7);
 		while (s < aligned_end)
 		{
 			// Check 8 bytes at once using bitwise OR
 			uint64_t chunk;
 			memcpy(&chunk, s, 8);
 			if (chunk & 0x8080808080808080ULL)
 				return false;
 			s += 8;
 		}
 		// Check remaining bytes
 		while (s < end)
 		{
 			if ((unsigned char)*s >= 128)
 				return false;
 			++s;
 		}
 		return true;
 	}
 	// Fast ASCII-only conversion (no API calls)
 	inline std::wstring AsciiToWide(const char* s, size_t len)
 	{
 		std::wstring out;
 		out.reserve(len);
 		for (size_t i = 0; i < len; ++i)
 			out.push_back(static_cast<wchar_t>(static_cast<unsigned char>(s[i])));
 		return out;
 	}
 	// Fast ASCII-only conversion (no API calls)
 	inline std::string WideToAscii(const wchar_t* ws, size_t len)
 	{
 		std::string out;
 		out.reserve(len);
 		for (size_t i = 0; i < len; ++i)
 		{
 			wchar_t ch = ws[i];
 			if (ch < 128)
 				out.push_back(static_cast<char>(ch));
 			else
 				return ""; // Not pure ASCII, caller should use full conversion
 		}
 		return out;
 	}
 }
 // UTF-8 -> UTF-16 (Windows wide)
 // OPTIMIZED: Fast path for ASCII-only strings (avoids 2x API calls)
 inline std::wstring Utf8ToWide(const std::string& s)
 {
 	if (s.empty())
@@ -75,9 +295,14 @@ inline std::wstring Utf8ToWide(const std::string& s)
 	if (s.size() > MAX_TEXT_LENGTH || s.size() > INT_MAX)
 	{
 		BIDI_LOG("Utf8ToWide: String too large (%zu bytes)", s.size());
-		return L""; // String too large
+		return L"";
 	}
 	// Fast path: ASCII-only strings (very common in games)
 	if (Utf8Fast::IsAsciiOnly(s.data(), s.size()))
 		return Utf8Fast::AsciiToWide(s.data(), s.size());
 	// Slow path: Use Windows API for non-ASCII
 	int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), (int)s.size(), nullptr, 0);
 	if (wlen <= 0)
 	{
@@ -90,29 +315,31 @@ inline std::wstring Utf8ToWide(const std::string& s)
 	if (written <= 0 || written != wlen)
 	{
 		BIDI_LOG("Utf8ToWide: Second conversion failed (written=%d, expected=%d, error=%d)", written, wlen, GetLastError());
-		return L""; // Conversion failed unexpectedly
+		return L"";
 	}
 	// Optional: Sanitize to remove invalid Unicode codepoints (surrogates, non-characters)
 	// Uncomment if you want strict validation
 	// SanitizeWideString(out);
 	return out;
 }
 // Convenience overload for char*
 // OPTIMIZED: Fast path for ASCII-only strings
 inline std::wstring Utf8ToWide(const char* s)
 {
 	if (!s || !*s)
 		return L"";
 	size_t len = strlen(s);
 	// Fast path: ASCII-only strings
 	if (Utf8Fast::IsAsciiOnly(s, len))
 		return Utf8Fast::AsciiToWide(s, len);
 	// Slow path: Use Windows API
 	int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, nullptr, 0);
 	if (wlen <= 0)
 		return L"";
 	// wlen includes terminating NUL
 	std::wstring out(wlen, L'\0');
 	int written = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s, -1, out.data(), wlen);
 	if (written <= 0 || written != wlen)
 	{
@@ -124,13 +351,11 @@ inline std::wstring Utf8ToWide(const char* s)
 	if (!out.empty() && out.back() == L'\0')
 		out.pop_back();
 	// Optional: Sanitize to remove invalid Unicode codepoints
 	// SanitizeWideString(out);
 	return out;
 }
 // UTF-16 (Windows wide) -> UTF-8
 // OPTIMIZED: Fast path for ASCII-only strings
 inline std::string WideToUtf8(const std::wstring& ws)
 {
 	if (ws.empty())
@@ -138,8 +363,23 @@ inline std::string WideToUtf8(const std::wstring& ws)
 	// Validate size limits (prevent DoS and INT_MAX overflow)
 	if (ws.size() > MAX_TEXT_LENGTH || ws.size() > INT_MAX)
-		return ""; // String too large
+		return "";
 	// Fast path: Check if all characters are ASCII
 	bool isAscii = true;
 	for (size_t i = 0; i < ws.size() && isAscii; ++i)
 		isAscii = (ws[i] < 128);
 	if (isAscii)
 	{
 		std::string out;
 		out.reserve(ws.size());
 		for (size_t i = 0; i < ws.size(); ++i)
 			out.push_back(static_cast<char>(ws[i]));
 		return out;
 	}
 	// Slow path: Use Windows API for non-ASCII
 	int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, ws.data(), (int)ws.size(), nullptr, 0, nullptr, nullptr);
 	if (len <= 0)
 		return "";
@@ -149,7 +389,7 @@ inline std::string WideToUtf8(const std::wstring& ws)
 	if (written <= 0 || written != len)
 	{
 		BIDI_LOG("WideToUtf8: Conversion failed (written=%d, expected=%d, error=%d)", written, len, GetLastError());
-		return ""; // Conversion failed
+		return "";
 	}
 	return out;
 }
@@ -169,59 +409,22 @@ inline std::string WideToUtf8(const wchar_t* ws)
 enum class EBidiDir { LTR, RTL };
 enum class ECharDir : unsigned char { Neutral, LTR, RTL };
-struct TBidiRun
+// Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
 {
 	EBidiDir dir;
 	std::vector<wchar_t> text; // logical order
 };
 static inline bool IsRTLCodepoint(wchar_t ch)
 {
-	// Directional marks / isolates / embeddings that affect bidi
+	return BiDiTables::IsRTL(ch);
 	if (ch == 0x200F || ch == 0x061C) return true; // RLM, ALM
 	if (ch >= 0x202B && ch <= 0x202E) return true; // RLE/RLO/PDF/LRE/LRO
 	if (ch >= 0x2066 && ch <= 0x2069) return true; // isolates
 	// Hebrew + Arabic blocks (BMP)
 	if (ch >= 0x0590 && ch <= 0x08FF) return true;
 	// Presentation forms
 	if (ch >= 0xFB1D && ch <= 0xFDFF) return true;
 	if (ch >= 0xFE70 && ch <= 0xFEFF) return true;
 	return false;
 }
 // Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
 static inline bool IsStrongAlpha(wchar_t ch)
 {
-	// Use thread-local cache for BMP (Thread safety)
+	return BiDiTables::IsAlpha(ch);
 	thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false
 	unsigned char& v = cache[(unsigned short)ch];
 	if (v == 1) return true;
 	if (v == 2) return false;
 	WORD type = 0;
 	bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_ALPHA);
 	v = ok ? 1 : 2;
 	return ok;
 }
 // Optimized: O(1) lookup table instead of GetStringTypeW() syscalls
 static inline bool IsDigit(wchar_t ch)
 {
-	// Fast path for ASCII digits (90%+ of digit checks)
+	return BiDiTables::IsDigit(ch);
 	if (ch >= L'0' && ch <= L'9')
 		return true;
 	// For non-ASCII, use cache (Arabic-Indic digits, etc.)
 	thread_local static unsigned char cache[65536] = {}; // 0=unknown, 1=true, 2=false
 	unsigned char& v = cache[(unsigned short)ch];
 	if (v == 1) return true;
 	if (v == 2) return false;
 	WORD type = 0;
 	bool ok = GetStringTypeW(CT_CTYPE1, &ch, 1, &type) && (type & C1_DIGIT);
 	v = ok ? 1 : 2;
 	return ok;
 }
 static inline bool IsNameTokenPunct(wchar_t ch)
@@ -257,12 +460,10 @@ static inline bool IsNameTokenPunct(wchar_t ch)
 	}
 }
-// Check RTL first to avoid classifying Arabic as LTR
+// Optimized: O(1) lookup - Check RTL first to avoid classifying Arabic as LTR
 static inline bool IsStrongLTR(wchar_t ch)
 {
-	if (IsRTLCodepoint(ch))
+	return BiDiTables::IsStrongLTR(ch);
 		return false;
 	return IsStrongAlpha(ch) || IsDigit(ch);
 }
 static inline bool HasStrongLTRNeighbor(const wchar_t* s, int n, int i)
@@ -561,33 +762,29 @@ static std::vector<wchar_t> BuildVisualBidiText_Tagless(const wchar_t* s, int n,
 	if (!s || n <= 0)
 		return {};
 	// Use buffer pool to avoid per-call allocations
 	BiDiBuffers::TBufferPool& buffers = BiDiBuffers::Get();
 	buffers.EnsureCapacity((size_t)n);
 	// 1) base direction
 	EBidiDir base = forceRTL ? EBidiDir::RTL : DetectBaseDir_FirstStrong(s, n);
 	// Pre-compute strong character positions for O(1) neutral resolution
 	TStrongDirCache strongCache(s, n, base);
-	// 2) split into runs
+	// 2) split into runs - use a more efficient approach
-	// Estimate runs based on text length (~1 per 50 chars, min 4)
+	// Instead of TBidiRun with vectors, use start/end indices
-	std::vector<TBidiRun> runs;
+	struct TRunInfo { int start; int end; EBidiDir dir; };
-	const size_t estimatedRuns = (size_t)std::max(4, n / 50);
+	thread_local static std::vector<TRunInfo> s_runs;
-	runs.reserve(estimatedRuns);
+	s_runs.clear();
-
+	s_runs.reserve((size_t)std::max(4, n / 50));
 	auto push_run = [&](EBidiDir d)
 		{
 			if (runs.empty() || runs.back().dir != d)
 				runs.push_back(TBidiRun{ d, {} });
 		};
 	// start with base so leading neutrals attach predictably
 	push_run(base);
 	EBidiDir lastStrong = base;
 	EBidiDir currentRunDir = base;
 	int runStart = 0;
 	for (int i = 0; i < n; ++i)
 	{
 		wchar_t ch = s[i];
 		EBidiDir d;
 		ECharDir cd = GetCharDirSmart(s, n, i);
@@ -607,98 +804,84 @@ static std::vector<wchar_t> BuildVisualBidiText_Tagless(const wchar_t* s, int n,
 			d = ResolveNeutralDir(s, n, i, base, lastStrong, &strongCache);
 		}
-#ifdef DEBUG_BIDI
+		// Start a new run if direction changes
-		if (i < 50) // Only log first 50 chars to avoid spam
+		if (d != currentRunDir)
 		{
-			BIDI_LOG("Char[%d] U+%04X '%lc' → CharDir=%s, RunDir=%s",
+			if (i > runStart)
-				i, (unsigned int)ch, (ch >= 32 && ch < 127) ? ch : L'?',
+				s_runs.push_back({runStart, i, currentRunDir});
-				cd == ECharDir::RTL ? "RTL" : (cd == ECharDir::LTR ? "LTR" : "Neutral"),
+			runStart = i;
-				d == EBidiDir::RTL ? "RTL" : "LTR");
+			currentRunDir = d;
 		}
 #endif
 		push_run(d);
 		runs.back().text.push_back(ch);
 	}
 	// Push final run
 	if (n > runStart)
 		s_runs.push_back({runStart, n, currentRunDir});
-	// 3) shape RTL runs in logical order (Arabic shaping)
+	// 3) shape RTL runs using pooled buffer
-	for (auto& r : runs)
+	buffers.shaped.clear();
 	auto shapeRun = [&](int start, int end) -> std::pair<const wchar_t*, int>
 	{
-		if (r.dir != EBidiDir::RTL)
+		int len = end - start;
-			continue;
+		if (len <= 0)
 			return {nullptr, 0};
-		if (r.text.empty())
+		// Check for potential integer overflow
-			continue;
+		if ((size_t)len > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY)
 			return {s + start, len}; // Return unshaped
-		// Check for potential integer overflow before allocation
+		size_t neededSize = buffers.shaped.size() + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN;
-		if (r.text.size() > SIZE_MAX / ARABIC_SHAPING_EXPANSION_FACTOR_RETRY - ARABIC_SHAPING_SAFETY_MARGIN_RETRY)
+		if (buffers.shaped.capacity() < neededSize)
-		{
+			buffers.shaped.reserve(neededSize);
 			BIDI_LOG("BuildVisualBidiText: RTL run too large for shaping (%zu chars)", r.text.size());
 			continue; // Text too large to process safely
 		}
-		std::vector<wchar_t> shaped(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN, 0);
+		size_t outStart = buffers.shaped.size();
 		buffers.shaped.resize(outStart + (size_t)len * ARABIC_SHAPING_EXPANSION_FACTOR + ARABIC_SHAPING_SAFETY_MARGIN);
 		int outLen = Arabic_MakeShape(const_cast<wchar_t*>(s + start), len,
 		                               buffers.shaped.data() + outStart,
 		                               (int)(buffers.shaped.size() - outStart));
 		int outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size());
 		if (outLen <= 0)
-		{
+			return {s + start, len}; // Return unshaped on failure
 			BIDI_LOG("Arabic_MakeShape FAILED for RTL run of %zu characters", r.text.size());
 			BIDI_LOG("  WARNING: This RTL text segment will NOT be displayed!");
 			BIDI_LOG("  First few characters: U+%04X U+%04X U+%04X U+%04X",
 				r.text.size() > 0 ? (unsigned int)r.text[0] : 0,
 				r.text.size() > 1 ? (unsigned int)r.text[1] : 0,
 				r.text.size() > 2 ? (unsigned int)r.text[2] : 0,
 				r.text.size() > 3 ? (unsigned int)r.text[3] : 0);
 			continue;
 		}
-		// Retry once if buffer too small
+		buffers.shaped.resize(outStart + (size_t)outLen);
-		if (outLen >= (int)shaped.size())
+		return {buffers.shaped.data() + outStart, outLen};
-		{
+	};
 			shaped.assign(r.text.size() * ARABIC_SHAPING_EXPANSION_FACTOR_RETRY + ARABIC_SHAPING_SAFETY_MARGIN_RETRY, 0);
 			outLen = Arabic_MakeShape(r.text.data(), (int)r.text.size(), shaped.data(), (int)shaped.size());
 			if (outLen <= 0)
 				continue;
 			// Add error check instead of silent truncation
 			if (outLen > (int)shaped.size())
 			{
 				BIDI_LOG("Arabic_MakeShape: Buffer still too small after retry (%d > %zu)", outLen, shaped.size());
 				// Shaping failed critically, use unshaped text
 				continue;
 			}
 		}
-		r.text.assign(shaped.begin(), shaped.begin() + outLen);
+	// 4) produce visual order
 	}
 	// 4) produce visual order:
 	// - reverse RTL runs internally
 	// - reverse run sequence if base RTL
 	std::vector<wchar_t> visual;
 	visual.reserve((size_t)n);
-	auto emit_run = [&](const TBidiRun& r)
+	auto emitRun = [&](const TRunInfo& run)
 	{
 		if (run.dir == EBidiDir::RTL)
 		{
-			if (r.dir == EBidiDir::RTL)
+			// Shape and reverse RTL runs
 			std::pair<const wchar_t*, int> shaped = shapeRun(run.start, run.end);
 			const wchar_t* ptr = shaped.first;
 			int len = shaped.second;
 			if (ptr && len > 0)
 			{
-				for (int k = (int)r.text.size() - 1; k >= 0; --k)
+				for (int k = len - 1; k >= 0; --k)
-					visual.push_back(r.text[(size_t)k]);
+					visual.push_back(ptr[k]);
 			}
-			else
+		}
-			{
+		else
-				visual.insert(visual.end(), r.text.begin(), r.text.end());
+		{
-			}
+			// LTR runs: copy directly
-		};
+			visual.insert(visual.end(), s + run.start, s + run.end);
 		}
 	};
 	if (base == EBidiDir::LTR)
 	{
-		for (const auto& r : runs)
+		for (const auto& run : s_runs)
-			emit_run(r);
+			emitRun(run);
 	}
 	else
 	{
-		for (int i = (int)runs.size() - 1; i >= 0; --i)
+		for (int i = (int)s_runs.size() - 1; i >= 0; --i)
-			emit_run(runs[(size_t)i]);
+			emitRun(s_runs[(size_t)i]);
 	}
 	return visual;
@@ -763,7 +946,7 @@ static inline std::vector<wchar_t> BuildVisualChatMessage(
 		{
 			// Apply BiDi to message with auto-detection (don't force RTL)
 			// Let the BiDi algorithm detect base direction from first strong character
-			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false);
+			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL);
 			visual.insert(visual.end(), msgVisual.begin(), msgVisual.end());
 		}
 		visual.push_back(L' ');
@@ -787,7 +970,7 @@ static inline std::vector<wchar_t> BuildVisualChatMessage(
 		{
 			// Apply BiDi to message with auto-detection (don't force RTL)
 			// Let the BiDi algorithm detect base direction from first strong character
-			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, false);
+			std::vector<wchar_t> msgVisual = BuildVisualBidiText_Tagless(msg, msgLen, forceRTL);
 			visual.insert(visual.end(), msgVisual.begin(), msgVisual.end());
 		}
 	}
--- a/src/EterBase/Debug.cpp
+++ b/src/EterBase/Debug.cpp
@@ -16,34 +16,93 @@ const DWORD DEBUG_STRING_MAX_LEN = 1024;
 static int isLogFile = false;
 HWND g_PopupHwnd = NULL;
-// Convert UTF-8 char* -> wide and send to debugger (NO helper function, just a macro)
+// ============================================================================
 // OPTIMIZED LOGGING INFRASTRUCTURE
 // ============================================================================
 // Cached timestamp to avoid repeated time()/localtime() syscalls
 // Refreshes every ~100ms (good enough for logging, avoids syscall overhead)
 struct TCachedTimestamp
 {
    DWORD lastUpdateMs = 0;
    int month = 0;
    int day = 0;
    int hour = 0;
    int minute = 0;
    void Update()
    {
        DWORD now = ELTimer_GetMSec();
        // Refresh timestamp every 100ms (not per-call)
        if (now - lastUpdateMs > 100)
        {
            time_t ct = time(0);
            struct tm ctm = *localtime(&ct);
            month = ctm.tm_mon + 1;
            day = ctm.tm_mday;
            hour = ctm.tm_hour;
            minute = ctm.tm_min;
            lastUpdateMs = now;
        }
    }
    void Format(char* buf, size_t bufSize) const
    {
        DWORD msec = ELTimer_GetMSec() % 60000;
        _snprintf_s(buf, bufSize, _TRUNCATE, "%02d%02d %02d:%02d:%05d :: ",
            month, day, hour, minute, (int)msec);
    }
 };
 static TCachedTimestamp g_cachedTimestamp;
 // Optimized debug output: Fast path for ASCII strings (avoids Utf8ToWide allocation)
 #ifdef _DEBUG
 #define DBG_OUT_W_UTF8(psz)                                                   \
    do {                                                                      \
        const char* __s = (psz) ? (psz) : "";                                 \
-        std::wstring __w = Utf8ToWide(__s);                                   \
+        size_t __len = strlen(__s);                                           \
-        OutputDebugStringW(__w.c_str());                                      \
+        if (Utf8Fast::IsAsciiOnly(__s, __len)) {                              \
            /* ASCII fast path: direct conversion, no allocation */           \
            wchar_t __wbuf[512];                                              \
            size_t __wlen = (__len < 511) ? __len : 511;                      \
            for (size_t __i = 0; __i < __wlen; ++__i)                         \
                __wbuf[__i] = (wchar_t)(unsigned char)__s[__i];               \
            __wbuf[__wlen] = L'\0';                                           \
            OutputDebugStringW(__wbuf);                                       \
        } else {                                                              \
            /* Non-ASCII: use full conversion */                              \
            std::wstring __w = Utf8ToWide(__s);                               \
            OutputDebugStringW(__w.c_str());                                  \
        }                                                                     \
    } while (0)
 #else
 #define DBG_OUT_W_UTF8(psz) do { (void)(psz); } while (0)
 #endif
 // Buffered log file writer
 // OPTIMIZATION: Buffered writes with periodic flush instead of per-write fflush()
 // - Collects writes in memory buffer
 // - Flushes when buffer is full OR every 500ms OR on shutdown
 // - Reduces disk I/O from 1000s of syncs to ~2 per second
 class CLogFile : public CSingleton<CLogFile>
 {
    public:
-        CLogFile() : m_fp(NULL) {}
+        CLogFile() : m_fp(NULL), m_bufferPos(0), m_lastFlushMs(0) {}
        virtual ~CLogFile()
        {
            Flush(); // Ensure all buffered data is written
            if (m_fp)
                fclose(m_fp);
            m_fp = NULL;
        }
        void Initialize()
        {
            m_fp = fopen("log/log.txt", "w");
            m_bufferPos = 0;
            m_lastFlushMs = ELTimer_GetMSec();
        }
        void Write(const char* c_pszMsg)
@@ -51,22 +110,63 @@ class CLogFile : public CSingleton<CLogFile>
            if (!m_fp)
                return;
-            time_t ct = time(0);
+            // Use cached timestamp (updated every ~100ms)
-            struct tm ctm = *localtime(&ct);
+            g_cachedTimestamp.Update();
            char timestamp[32];
            g_cachedTimestamp.Format(timestamp, sizeof(timestamp));
-            fprintf(m_fp, "%02d%02d %02d:%02d:%05d :: %s",
+            // Calculate total length needed
-                ctm.tm_mon + 1,
+            size_t timestampLen = strlen(timestamp);
-                ctm.tm_mday,
+            size_t msgLen = c_pszMsg ? strlen(c_pszMsg) : 0;
-                ctm.tm_hour,
+            size_t totalLen = timestampLen + msgLen;
                ctm.tm_min,
                ELTimer_GetMSec() % 60000,
                c_pszMsg);
            // If this write would overflow the buffer, flush first
            if (m_bufferPos + totalLen >= BUFFER_SIZE - 1)
                Flush();
            // If message is larger than buffer, write directly (rare case)
            if (totalLen >= BUFFER_SIZE - 1)
            {
                fputs(timestamp, m_fp);
                if (c_pszMsg)
                    fputs(c_pszMsg, m_fp);
                fflush(m_fp);
                return;
            }
            // Append to buffer
            memcpy(m_buffer + m_bufferPos, timestamp, timestampLen);
            m_bufferPos += timestampLen;
            if (msgLen > 0)
            {
                memcpy(m_buffer + m_bufferPos, c_pszMsg, msgLen);
                m_bufferPos += msgLen;
            }
            // Periodic flush: every 500ms or when buffer is >75% full
            DWORD now = ELTimer_GetMSec();
            if (now - m_lastFlushMs > 500 || m_bufferPos > BUFFER_SIZE * 3 / 4)
                Flush();
        }
        void Flush()
        {
            if (!m_fp || m_bufferPos == 0)
                return;
            m_buffer[m_bufferPos] = '\0';
            fputs(m_buffer, m_fp);
            fflush(m_fp);
            m_bufferPos = 0;
            m_lastFlushMs = ELTimer_GetMSec();
        }
    protected:
        static const size_t BUFFER_SIZE = 8192; // 8KB buffer
        FILE* m_fp;
        char m_buffer[BUFFER_SIZE];
        size_t m_bufferPos;
        DWORD m_lastFlushMs;
 };
 static CLogFile gs_logfile;
@@ -220,9 +320,50 @@ void Tracef(const char* c_szFormat, ...)
        LogFile(szBuf);
 }
 // Buffered stderr writer for syserr (same pattern as CLogFile)
 // OPTIMIZATION: Reduces fflush(stderr) from every call to every 500ms
 static struct TSyserrBuffer
 {
    static const size_t BUFFER_SIZE = 4096;
    char buffer[BUFFER_SIZE];
    size_t pos = 0;
    DWORD lastFlushMs = 0;
    void Write(const char* msg, size_t len)
    {
        if (pos + len >= BUFFER_SIZE - 1)
            Flush();
        if (len >= BUFFER_SIZE - 1)
        {
            // Large message: write directly
            fwrite(msg, 1, len, stderr);
            fflush(stderr);
            return;
        }
        memcpy(buffer + pos, msg, len);
        pos += len;
        DWORD now = ELTimer_GetMSec();
        if (now - lastFlushMs > 500 || pos > BUFFER_SIZE * 3 / 4)
            Flush();
    }
    void Flush()
    {
        if (pos == 0)
            return;
        fwrite(buffer, 1, pos, stderr);
        fflush(stderr);
        pos = 0;
        lastFlushMs = ELTimer_GetMSec();
    }
 } g_syserrBuffer;
 void TraceError(const char* c_szFormat, ...)
 {
-//#ifndef _DISTRIBUTE 
+//#ifndef _DISTRIBUTE
    char szBuf[DEBUG_STRING_MAX_LEN + 2];
    strncpy_s(szBuf, sizeof(szBuf), "SYSERR: ", _TRUNCATE);
@@ -243,17 +384,14 @@ void TraceError(const char* c_szFormat, ...)
        szBuf[sizeof(szBuf) - 1] = '\0';
    }
-    time_t ct = time(0);
+    // OPTIMIZED: Use cached timestamp instead of time()/localtime() per call
-    struct tm ctm = *localtime(&ct);
+    g_cachedTimestamp.Update();
    char timestamp[32];
    g_cachedTimestamp.Format(timestamp, sizeof(timestamp));
-    fprintf(stderr, "%02d%02d %02d:%02d:%05d :: %s",
+    // OPTIMIZED: Write to buffered stderr instead of fprintf+fflush per call
-        ctm.tm_mon + 1,
+    g_syserrBuffer.Write(timestamp, strlen(timestamp));
-        ctm.tm_mday,
+    g_syserrBuffer.Write(szBuf + 8, strlen(szBuf + 8)); // Skip "SYSERR: " prefix for stderr
        ctm.tm_hour,
        ctm.tm_min,
        ELTimer_GetMSec() % 60000,
        szBuf + 8);
    fflush(stderr);
 #ifdef _DEBUG
    DBG_OUT_W_UTF8(szBuf);
@@ -267,8 +405,7 @@ void TraceError(const char* c_szFormat, ...)
 void TraceErrorWithoutEnter(const char* c_szFormat, ...)
 {
-//#ifndef _DISTRIBUTE 
+//#ifndef _DISTRIBUTE
    char szBuf[DEBUG_STRING_MAX_LEN];
    va_list args;
@@ -276,17 +413,14 @@ void TraceErrorWithoutEnter(const char* c_szFormat, ...)
    _vsnprintf_s(szBuf, sizeof(szBuf), _TRUNCATE, c_szFormat, args);
    va_end(args);
-    time_t ct = time(0);
+    // OPTIMIZED: Use cached timestamp instead of time()/localtime() per call
-    struct tm ctm = *localtime(&ct);
+    g_cachedTimestamp.Update();
    char timestamp[32];
    g_cachedTimestamp.Format(timestamp, sizeof(timestamp));
-    fprintf(stderr, "%02d%02d %02d:%02d:%05d :: %s",
+    // OPTIMIZED: Write to buffered stderr instead of fprintf+fflush per call
-        ctm.tm_mon + 1,
+    g_syserrBuffer.Write(timestamp, strlen(timestamp));
-        ctm.tm_mday,
+    g_syserrBuffer.Write(szBuf, strlen(szBuf));
        ctm.tm_hour,
        ctm.tm_min,
        ELTimer_GetMSec() % 60000,
        szBuf + 8);
    fflush(stderr);
 #ifdef _DEBUG
    DBG_OUT_W_UTF8(szBuf);
@@ -349,7 +483,7 @@ void OpenLogFile(bool bUseLogFIle)
        std::filesystem::create_directory("log");
    }
-//#ifndef _DISTRIBUTE 
+//#ifndef _DISTRIBUTE
    _wfreopen(L"log/syserr.txt", L"w", stderr);
    if (bUseLogFIle)
@@ -360,6 +494,13 @@ void OpenLogFile(bool bUseLogFIle)
 //#endif
 }
 void CloseLogFile()
 {
    // Flush all buffered output before shutdown
    g_syserrBuffer.Flush();
    CLogFile::Instance().Flush();
 }
 void OpenConsoleWindow()
 {
    AllocConsole();
--- a/src/EterLib/GrpTextInstance.cpp
+++ b/src/EterLib/GrpTextInstance.cpp
@@ -238,16 +238,88 @@ void CGraphicTextInstance::Update()
 	}
 	// Tag-aware BiDi rendering: Parse tags, apply BiDi per segment, track colors/hyperlinks
 	// OPTIMIZED: Use helper lambda to eliminate code duplication (was repeated 5+ times)
 	if (hasRTL || hasTags)
 	{
 		DWORD currentColor = dwColor;
 		int hyperlinkStep = 0; // 0=normal, 1=collecting metadata, 2=visible hyperlink
 		std::wstring hyperlinkMetadata;
-		std::vector<wchar_t> currentSegment;
+
 		// Use thread-local buffer to avoid per-call allocation
 		thread_local static std::vector<wchar_t> s_currentSegment;
 		s_currentSegment.clear();
 		SHyperlink currentHyperlink;
 		currentHyperlink.sx = currentHyperlink.ex = 0;
 		// In chat RTL, force RTL base direction so prefixes like "[hyperlink]" don't flip the paragraph to LTR.
 		const bool forceRTLForBidi = (m_isChatMessage && m_computedRTL);
 		// OPTIMIZED: Single helper function for flushing segments (eliminates 5x code duplication)
 		auto FlushSegment = [&](DWORD segColor) -> int
 		{
 			if (s_currentSegment.empty())
 				return 0;
 			int totalWidth = 0;
 			// Apply BiDi transformation using optimized BuildVisualBidiText_Tagless
 			std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 				s_currentSegment.data(), (int)s_currentSegment.size(), forceRTLForBidi);
 			for (size_t j = 0; j < visual.size(); ++j)
 			{
 				int w = __DrawCharacter(pFontTexture, visual[j], segColor);
 				totalWidth += w;
 			}
 			s_currentSegment.clear();
 			return totalWidth;
 		};
 		// Prepend glyphs to the already-built draw list (used to place hyperlink before message in RTL chat).
 		auto PrependGlyphs = [&](CGraphicFontTexture* pFontTexture,
 		                         const std::vector<wchar_t>& chars,
 		                         DWORD color,
 		                         int& outWidth)
 		{
 			outWidth = 0;
 			// Use thread-local buffers to avoid allocation
 			thread_local static std::vector<CGraphicFontTexture::TCharacterInfomation*> s_newCharInfos;
 			thread_local static std::vector<DWORD> s_newColors;
 			s_newCharInfos.clear();
 			s_newColors.clear();
 			s_newCharInfos.reserve(chars.size());
 			s_newColors.reserve(chars.size());
 			for (size_t k = 0; k < chars.size(); ++k)
 			{
 				auto* pInfo = pFontTexture->GetCharacterInfomation(chars[k]);
 				if (!pInfo)
 					continue;
 				s_newCharInfos.push_back(pInfo);
 				s_newColors.push_back(color);
 				outWidth += pInfo->advance;
 				m_textHeight = std::max((WORD)pInfo->height, m_textHeight);
 			}
 			// Insert at the beginning of the draw list.
 			m_pCharInfoVector.insert(m_pCharInfoVector.begin(), s_newCharInfos.begin(), s_newCharInfos.end());
 			m_dwColorInfoVector.insert(m_dwColorInfoVector.begin(), s_newColors.begin(), s_newColors.end());
 			// Shift any already-recorded hyperlinks to the right.
 			for (auto& link : m_hyperlinkVector)
 			{
 				link.sx += outWidth;
 				link.ex += outWidth;
 			}
 			m_textWidth += outWidth;
 		};
 		// Parse text with tags
 		for (int i = 0; i < wTextLen;)
 		{
@@ -257,37 +329,15 @@ void CGraphicTextInstance::Update()
 			if (tagType == TEXT_TAG_COLOR)
 			{
-				// Flush current segment with BiDi before changing color
+				// Flush current segment before changing color
-				if (!currentSegment.empty())
+				currentHyperlink.ex += FlushSegment(currentColor);
 				{
 					// Use auto-detection for BiDi (don't force RTL)
 					std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 						currentSegment.data(), (int)currentSegment.size(), false);
 					for (size_t j = 0; j < visual.size(); ++j)
 					{
 						int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
 						currentHyperlink.ex += w;
 					}
 					currentSegment.clear();
 				}
 				currentColor = htoi(tagExtra.c_str(), 8);
 				i += tagLen;
 			}
 			else if (tagType == TEXT_TAG_RESTORE_COLOR)
 			{
 				// Flush segment before restoring color
-				if (!currentSegment.empty())
+				currentHyperlink.ex += FlushSegment(currentColor);
 				{
 					// Use auto-detection for BiDi (don't force RTL)
 					std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 						currentSegment.data(), (int)currentSegment.size(), false);
 					for (size_t j = 0; j < visual.size(); ++j)
 					{
 						int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
 						currentHyperlink.ex += w;
 					}
 					currentSegment.clear();
 				}
 				currentColor = dwColor;
 				i += tagLen;
 			}
@@ -303,18 +353,7 @@ void CGraphicTextInstance::Update()
 				{
 					// End of metadata, start visible section
 					// Flush any pending non-hyperlink segment first
-					if (!currentSegment.empty())
+					currentHyperlink.ex += FlushSegment(currentColor);
 					{
 						// Use auto-detection for BiDi (don't force RTL)
 						std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 							currentSegment.data(), (int)currentSegment.size(), false);
 						for (size_t j = 0; j < visual.size(); ++j)
 						{
 							int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
 							currentHyperlink.ex += w;
 						}
 						currentSegment.clear();
 					}
 					hyperlinkStep = 2;
 					currentHyperlink.text = hyperlinkMetadata;
@@ -323,80 +362,85 @@ void CGraphicTextInstance::Update()
 				else if (hyperlinkStep == 2)
 				{
 					// End of visible section - render hyperlink text with proper Arabic handling
-					// Format: [Arabic Text] or [English Text]
+					// In RTL chat: we want the hyperlink chunk to appear BEFORE the message, even if logically appended.
-					// Keep brackets in position, reverse Arabic content between them
+					if (!s_currentSegment.empty())
 					if (!currentSegment.empty())
 					{
-						// Find bracket positions
+						// OPTIMIZED: Use thread-local buffer for visible rendering
 						thread_local static std::vector<wchar_t> s_visibleToRender;
 						s_visibleToRender.clear();
 						// Find bracket positions: [ ... ]
 						int openBracket = -1, closeBracket = -1;
-						for (size_t idx = 0; idx < currentSegment.size(); ++idx)
+						for (size_t idx = 0; idx < s_currentSegment.size(); ++idx)
 						{
-							if (currentSegment[idx] == L'[' && openBracket == -1)
+							if (s_currentSegment[idx] == L'[' && openBracket == -1)
 								openBracket = (int)idx;
-							else if (currentSegment[idx] == L']' && closeBracket == -1)
+							else if (s_currentSegment[idx] == L']' && closeBracket == -1)
 								closeBracket = (int)idx;
 						}
 						if (openBracket >= 0 && closeBracket > openBracket)
 						{
-							// Extract content between brackets
+							// Keep '['
-							std::vector<wchar_t> content(
+							s_visibleToRender.push_back(L'[');
 								currentSegment.begin() + openBracket + 1,
 								currentSegment.begin() + closeBracket);
-							// Apply Arabic shaping to content
+							// Extract inside content and apply BiDi
-							std::vector<wchar_t> shaped(content.size() * 2 + 16, 0);
+							thread_local static std::vector<wchar_t> s_content;
-							int shapedLen = Arabic_MakeShape(content.data(), (int)content.size(),
+							s_content.assign(
-							                                 shaped.data(), (int)shaped.size());
+								s_currentSegment.begin() + openBracket + 1,
 								s_currentSegment.begin() + closeBracket);
-							// Render: "[" + reversed_arabic + "]"
+							// FIX: Use false to let BiDi auto-detect direction from content
-							// 1. Opening bracket
+							// This ensures English items like [Sword+9] stay LTR
-							int w = __DrawCharacter(pFontTexture, L'[', currentColor);
+							// while Arabic items like [درع فولاذي+9] are properly RTL
-							currentHyperlink.ex += w;
+							std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 								s_content.data(), (int)s_content.size(), false);
-							// 2. Arabic content (shaped and REVERSED for RTL display)
+							s_visibleToRender.insert(s_visibleToRender.end(), visual.begin(), visual.end());
 							if (shapedLen > 0)
 							{
 								for (int j = shapedLen - 1; j >= 0; --j)
 								{
 									w = __DrawCharacter(pFontTexture, shaped[j], currentColor);
 									currentHyperlink.ex += w;
 								}
 							}
 							else
 							{
 								// Fallback: reverse original content
 								for (int j = (int)content.size() - 1; j >= 0; --j)
 								{
 									w = __DrawCharacter(pFontTexture, content[j], currentColor);
 									currentHyperlink.ex += w;
 								}
 							}
-							// 3. Closing bracket
+							// Keep ']'
-							w = __DrawCharacter(pFontTexture, L']', currentColor);
+							s_visibleToRender.push_back(L']');
 							currentHyperlink.ex += w;
 							// 4. Render any text after closing bracket (if any)
 							for (size_t idx = closeBracket + 1; idx < currentSegment.size(); ++idx)
 							{
 								w = __DrawCharacter(pFontTexture, currentSegment[idx], currentColor);
 								currentHyperlink.ex += w;
 							}
 						}
 						else
 						{
-							// No brackets found - render as-is (shouldn't happen for hyperlinks)
+							// No brackets: apply BiDi to whole segment
-							for (size_t j = 0; j < currentSegment.size(); ++j)
+							// FIX: Use false to let BiDi auto-detect direction from content
 							std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 								s_currentSegment.data(), (int)s_currentSegment.size(), false);
 							s_visibleToRender.insert(s_visibleToRender.end(), visual.begin(), visual.end());
 						}
 						// Ensure a space AFTER the hyperlink chunk (so it becomes "[hyperlink] اختبار...")
 						s_visibleToRender.push_back(L' ');
 						// Key behavior:
 						// In RTL chat, place hyperlink BEFORE the message by prepending glyphs.
 						if (m_isChatMessage && m_computedRTL)
 						{
 							int addedWidth = 0;
 							PrependGlyphs(pFontTexture, s_visibleToRender, currentColor, addedWidth);
 							// Record the hyperlink range at the beginning (0..addedWidth)
 							currentHyperlink.sx = 0;
 							currentHyperlink.ex = addedWidth;
 							m_hyperlinkVector.push_back(currentHyperlink);
 						}
 						else
 						{
 							// LTR or non-chat: keep original "append" behavior
 							currentHyperlink.sx = currentHyperlink.ex;
 							for (size_t j = 0; j < s_visibleToRender.size(); ++j)
 							{
-								int w = __DrawCharacter(pFontTexture, currentSegment[j], currentColor);
+								int w = __DrawCharacter(pFontTexture, s_visibleToRender[j], currentColor);
 								currentHyperlink.ex += w;
 							}
 							m_hyperlinkVector.push_back(currentHyperlink);
 						}
 						currentSegment.clear();
 					}
-					m_hyperlinkVector.push_back(currentHyperlink);
+
 					hyperlinkStep = 0;
 					s_currentSegment.clear();
 				}
 				i += tagLen;
 			}
@@ -411,24 +455,14 @@ void CGraphicTextInstance::Update()
 				{
 					// Add to current segment
 					// Will be BiDi-processed for normal text, or rendered directly for hyperlinks
-					currentSegment.push_back(wTextBuf[i]);
+					s_currentSegment.push_back(wTextBuf[i]);
 				}
 				i += tagLen;
 			}
 		}
-		// Flush any remaining segment
+		// Flush any remaining segment using optimized helper
-		if (!currentSegment.empty())
+		currentHyperlink.ex += FlushSegment(currentColor);
 		{
 			// Use auto-detection for BiDi (don't force RTL)
 			std::vector<wchar_t> visual = BuildVisualBidiText_Tagless(
 				currentSegment.data(), (int)currentSegment.size(), false);
 			for (size_t j = 0; j < visual.size(); ++j)
 			{
 				int w = __DrawCharacter(pFontTexture, visual[j], currentColor);
 				currentHyperlink.ex += w;
 			}
 		}
 		pFontTexture->UpdateTexture();
 		m_isUpdate = true;
--- a/src/EterLocale/Arabic.cpp
+++ b/src/EterLocale/Arabic.cpp
@@ -1,6 +1,7 @@
 #include "StdAfx.h"
 #include "Arabic.h"
 #include <assert.h>
 #include <vector>
 enum ARABIC_CODE
 {
@@ -243,110 +244,151 @@ bool Arabic_IsComb2(wchar_t code)
 	return false;
 }
 // Helper: Check if a character can join to the right (has INITIAL or MEDIAL form)
 static inline bool Arabic_CanJoinRight(wchar_t code)
 {
 	if (!Arabic_IsInMap(code))
 		return false;
 	return Arabic_GetMap(code, INITIAL) != 0 || Arabic_GetMap(code, MEDIAL) != 0;
 }
 // Helper: Check if a character can join to the left (has MEDIAL or FINAL form)
 static inline bool Arabic_CanJoinLeft(wchar_t code)
 {
 	if (!Arabic_IsInMap(code))
 		return false;
 	return Arabic_GetMap(code, MEDIAL) != 0 || Arabic_GetMap(code, FINAL) != 0 || Arabic_IsNext(code);
 }
 // Optimized O(n) Arabic shaping algorithm
 // Previous: O(n²) due to backward/forward scans for each character
 // Now: O(n) single forward pass with state tracking
 size_t Arabic_MakeShape(wchar_t* src, size_t srcLen, wchar_t* dst, size_t dstLen)
 {
-	// Runtime validation instead of assert (which is disabled in release builds)
+	// Runtime validation
 	if (!src || !dst || srcLen == 0 || dstLen < srcLen)
 		return 0;
-	const size_t srcLastIndex = srcLen - 1;
+	// Phase 1: Pre-scan to find the next non-composing Arabic letter for each position
 	// This converts O(n) inner loops into O(1) lookups
 	// Use thread-local buffer to avoid per-call allocation
 	thread_local static std::vector<size_t> s_nextArabic;
 	if (s_nextArabic.size() < srcLen + 1)
 		s_nextArabic.resize(srcLen + 1);
 	// Build next-arabic lookup (reverse scan)
 	size_t nextArabicIdx = srcLen; // Invalid index = no next arabic
 	for (size_t i = srcLen; i > 0; --i)
 	{
 		size_t idx = i - 1;
 		s_nextArabic[idx] = nextArabicIdx;
 		wchar_t ch = src[idx];
 		if (Arabic_IsInMap(ch) && !Arabic_IsInComposing(ch))
 			nextArabicIdx = idx;
 	}
 	s_nextArabic[srcLen] = srcLen; // Sentinel
 	// Phase 2: Single forward pass with state tracking
 	size_t dstIndex = 0;
 	bool prevJoins = false; // Does previous Arabic letter join to the right?
 	unsigned dstIndex = 0;	
 	for (size_t srcIndex = 0; srcIndex < srcLen; ++srcIndex)
 	{
 		wchar_t cur = src[srcIndex];
-		//printf("now %x\n", cur);
+		// Composing marks: copy directly, don't affect joining state
 		if (Arabic_IsInComposing(cur))
 		{
 			if (dstIndex < dstLen)
 				dst[dstIndex++] = cur;
 			continue;
 		}
 		if (Arabic_IsInMap(cur))
 		{
-			// 이전 글자 얻어내기
+			// Find next joinable Arabic letter using pre-computed lookup
 			wchar_t prev = 0;			
 			{
 				size_t prevIndex = srcIndex;
 				while (prevIndex > 0)
 				{
 					prevIndex--;
 					prev = src[prevIndex];
 					//printf("\tprev %d:%x\n", prevIndex, cur);
 					if (Arabic_IsInComposing(prev))
 						continue;
 					else
 						break;
 				}
 				if ((srcIndex == 0) || 
 					(!Arabic_IsInMap(prev)) || 
 					(!Arabic_GetMap(prev, INITIAL) && !Arabic_GetMap(prev, MEDIAL)))
 				{
 					//printf("\tprev not defined\n");
 					prev = 0;
 				}
 			}
 			// 다음 글자 얻어내기
 			wchar_t next = 0;
 			size_t nextIdx = s_nextArabic[srcIndex];
 			if (nextIdx < srcLen)
 			{
-				size_t nextIndex = srcIndex;
+				wchar_t nextChar = src[nextIdx];
-				while (nextIndex < srcLastIndex)
+				if (Arabic_CanJoinLeft(nextChar))
-				{
+					next = nextChar;
 					nextIndex++;
 					next = src[nextIndex];
 					if (Arabic_IsInComposing(next))
 						continue;
 					else
 						break;
 				}
 				if ((nextIndex == srcLen) || 
 					(!Arabic_IsInMap(next)) ||
 					(!Arabic_GetMap(next, MEDIAL) && !Arabic_GetMap(next, FINAL) && !Arabic_IsNext(next)))
 				{
 					//printf("\tnext not defined\n");
 					next = 0;
 				}
 			}
-			if (Arabic_IsComb1(cur) && Arabic_IsComb2(next))
+			// Handle LAM-ALEF composition
 			if (Arabic_IsComb1(cur) && nextIdx < srcLen && Arabic_IsComb2(src[nextIdx]))
 			{
-				if (prev)
+				wchar_t composed;
-					dst[dstIndex] = Arabic_GetComposition(cur, next, FINAL);
+				if (prevJoins)
 					composed = Arabic_GetComposition(cur, src[nextIdx], FINAL);
 				else
-					dst[dstIndex] = Arabic_GetComposition(cur, next, ISOLATED);
+					composed = Arabic_GetComposition(cur, src[nextIdx], ISOLATED);
-				//printf("\tGot me a complex:%x\n", dst[dstIndex]);
+				if (dstIndex < dstLen)
 					dst[dstIndex++] = composed;
-				srcIndex++;
+				// Skip the ALEF that was combined
-				dstIndex++;				
+				srcIndex = nextIdx;
 				// LAM-ALEF doesn't join to the right
 				prevJoins = false;
 				continue;
 			}
-			else if (prev && next && (dst[dstIndex] = Arabic_GetMap(cur, MEDIAL)))
+
 			// Determine form based on joining context
 			wchar_t shaped = 0;
 			bool curJoinsRight = false;
 			if (prevJoins && next)
 			{
-				//printf("\tGot prev & next:%x\n", dst[dstIndex]);
+				// Both sides join: MEDIAL
-				dstIndex++;				
+				shaped = Arabic_GetMap(cur, MEDIAL);
 				if (shaped)
 					curJoinsRight = Arabic_CanJoinRight(cur);
 			}
-			else if (prev && (dst[dstIndex] = Arabic_GetMap(cur, FINAL)))
+
 			if (!shaped && prevJoins)
 			{
-				//printf("\tGot prev:%x\n", dst[dstIndex]);
+				// Only left joins: FINAL
-				dstIndex++;				
+				shaped = Arabic_GetMap(cur, FINAL);
 				// FINAL form doesn't extend to the right
 				curJoinsRight = false;
 			}
-			else if (next && (dst[dstIndex] = Arabic_GetMap(cur, INITIAL)))
+
 			if (!shaped && next)
 			{
-				//printf("\tGot next:%x\n", dst[dstIndex]);
+				// Only right joins: INITIAL
-				dstIndex++;				
+				shaped = Arabic_GetMap(cur, INITIAL);
 				if (shaped)
 					curJoinsRight = Arabic_CanJoinRight(cur);
 			}
-			else
+
 			if (!shaped)
 			{
-				dst[dstIndex] = Arabic_GetMap(cur, ISOLATED);
+				// No joining: ISOLATED
-				//printf("\tGot nothing:%x\n", dst[dstIndex]);
+				shaped = Arabic_GetMap(cur, ISOLATED);
-				dstIndex++;
+				curJoinsRight = false;
 			}
 			if (!shaped)
 				shaped = cur; // Fallback to original if no mapping
 			if (dstIndex < dstLen)
 				dst[dstIndex++] = shaped;
 			// Update state for next character
 			prevJoins = curJoinsRight;
 		}
 		else
 		{
-			dst[dstIndex] = cur;
+			// Non-Arabic character: copy directly, breaks joining
-			dstIndex++;
+			if (dstIndex < dstLen)
 				dst[dstIndex++] = cur;
 			prevJoins = false;
 		}
 	}
 	return dstIndex;
 }
--- a/src/UserInterface/PythonSkill.cpp
+++ b/src/UserInterface/PythonSkill.cpp
@@ -1279,11 +1279,60 @@ float CPythonSkill::SSkillData::ProcessFormula(CPoly * pPoly, float fSkillLevel,
 	return pPoly->Eval();
 }
-static void ReplaceFirst(std::string& s, const char* needle, const std::string& repl)
+// Format specifiers supported in skill descriptions
 static const char* FORMAT_SPECIFIERS[] = {
 	"%.0f",  // Integer (no decimals)
 	"%.1f",  // 1 decimal place
 	"%.2f",  // 2 decimal places
 	"%d",    // Integer (alternative)
 };
 static const size_t FORMAT_SPECIFIER_COUNT = sizeof(FORMAT_SPECIFIERS) / sizeof(FORMAT_SPECIFIERS[0]);
 // Find and replace the first occurrence of any format specifier with the given value
 // Returns true if a replacement was made
 static bool ReplaceNextFormatSpecifier(std::string& s, float value)
 {
-	size_t pos = s.find(needle);
+	size_t bestPos = std::string::npos;
-	if (pos != std::string::npos)
+	size_t bestLen = 0;
-		s.replace(pos, strlen(needle), repl);
+	const char* bestSpec = nullptr;
 	// Find the first (leftmost) format specifier in the string
 	for (size_t i = 0; i < FORMAT_SPECIFIER_COUNT; ++i)
 	{
 		size_t pos = s.find(FORMAT_SPECIFIERS[i]);
 		if (pos != std::string::npos && (bestPos == std::string::npos || pos < bestPos))
 		{
 			bestPos = pos;
 			bestLen = strlen(FORMAT_SPECIFIERS[i]);
 			bestSpec = FORMAT_SPECIFIERS[i];
 		}
 	}
 	if (bestPos == std::string::npos)
 		return false;
 	// Format the value according to the specifier found
 	char szValue[64];
 	if (strcmp(bestSpec, "%.0f") == 0 || strcmp(bestSpec, "%d") == 0)
 		_snprintf(szValue, sizeof(szValue), "%.0f", floorf(value));
 	else if (strcmp(bestSpec, "%.1f") == 0)
 		_snprintf(szValue, sizeof(szValue), "%.1f", value);
 	else // %.2f or other
 		_snprintf(szValue, sizeof(szValue), "%.2f", value);
 	s.replace(bestPos, bestLen, szValue);
 	return true;
 }
 // Replace all occurrences of "%%" with "%" (escaped percent sign)
 static void UnescapePercent(std::string& s)
 {
 	size_t pos = 0;
 	while ((pos = s.find("%%", pos)) != std::string::npos)
 	{
 		s.replace(pos, 2, "%");
 		++pos; // Move past the replaced '%'
 	}
 }
 const char* CPythonSkill::SSkillData::GetAffectDescription(DWORD dwIndex, float fSkillLevel)
@@ -1303,33 +1352,20 @@ const char* CPythonSkill::SSkillData::GetAffectDescription(DWORD dwIndex, float
 	float fMinValue = ProcessFormula(&minPoly, fSkillLevel);
 	float fMaxValue = ProcessFormula(&maxPoly, fSkillLevel);
 	// Take absolute values
 	if (fMinValue < 0.0f) fMinValue = -fMinValue;
 	if (fMaxValue < 0.0f) fMaxValue = -fMaxValue;
 	const bool wantsInt = (desc.find("%.0f") != std::string::npos);
 	if (wantsInt)
 	{
 		fMinValue = floorf(fMinValue);
 		fMaxValue = floorf(fMaxValue);
 	}
 	char szMin[64], szMax[64];
 	if (wantsInt)
 	{
 		_snprintf(szMin, sizeof(szMin), "%.0f", fMinValue);
 		_snprintf(szMax, sizeof(szMax), "%.0f", fMaxValue);
 	}
 	else
 	{
 		_snprintf(szMin, sizeof(szMin), "%.2f", fMinValue);
 		_snprintf(szMax, sizeof(szMax), "%.2f", fMaxValue);
 	}
 	static std::string out;
 	out = desc;
-	ReplaceFirst(out, "%.0f", szMin);
+	// Replace format specifiers in order of appearance
-	ReplaceFirst(out, "%.0f", szMax);
+	// First specifier gets min value, second gets max value
 	ReplaceNextFormatSpecifier(out, fMinValue);
 	ReplaceNextFormatSpecifier(out, fMaxValue);
 	// Convert escaped %% to single % (for display like "30%")
 	UnescapePercent(out);
 	return out.c_str();
 }