Full Unicode patch with RTL Support & BiDi logic.

This commit is well documented, so no need to tell you my life story.

Full Unicode patch with RTL Support & BiDi logic.

Removed the legacy codePage, normalised to UTF8 (65001).

It also comes with:

CTRL + A : select text (highlighted)
CTRL + C : copy
CTRL + V : paste
CTRL + X : cut
CTRL + Y : redo
CTRL + Z : undo
This commit is contained in:
rtw1x1
2025-12-26 12:32:43 +00:00
parent d37607baa1
commit a955c50744
86 changed files with 4076 additions and 3839 deletions

View File

@@ -3,34 +3,48 @@
using namespace script;
#define ishan(ch) (((ch) & 0xE0) > 0x90)
#define isnhspace(ch) (!ishan(ch) && isspace(ch))
static const char* Utf8Next(const char* p, const char* end)
{
if (!p || p >= end) return end;
unsigned char c = (unsigned char)*p;
if (c < 0x80) return p + 1;
if ((c >> 5) == 0x6) return (p + 2 <= end) ? p + 2 : end;
if ((c >> 4) == 0xE) return (p + 3 <= end) ? p + 3 : end;
if ((c >> 3) == 0x1E) return (p + 4 <= end) ? p + 4 : end;
// invalid lead byte -> move 1 to avoid infinite loops
return p + 1;
}
extern DWORD GetDefaultCodePage();
static const char* Utf8Prev(const char* base, const char* p)
{
if (!base || !p || p <= base) return base;
const char* q = p - 1;
// move back over continuation bytes 10xxxxxx
while (q > base && (((unsigned char)*q & 0xC0) == 0x80))
--q;
return q;
}
const char* LocaleString_FindChar(const char* base, int len, char test)
{
if (!base)
return NULL;
return nullptr;
DWORD codePage = GetDefaultCodePage();
int pos = 0;
while (pos < len)
{
const char* cur = base + pos;
const char* next = CharNextExA(codePage, cur, 0);
int cur_len = next - cur;
const char* next = Utf8Next(cur, base + len);
int cur_len = int(next - cur);
if (cur_len > 1)
{
pos += cur_len;
}
else if (1 == cur_len)
else if (cur_len == 1)
{
if (*cur == test)
return cur;
++pos;
}
else
@@ -38,36 +52,31 @@ const char* LocaleString_FindChar(const char* base, int len, char test)
break;
}
}
return NULL;
return nullptr;
}
int LocaleString_RightTrim(char* base, int len)
{
DWORD codePage = GetDefaultCodePage();
int pos = len;
while (pos > 0)
{
char* cur = base + pos;
char* prev = CharPrevExA(codePage, base, cur , 0);
int prev_len = cur - prev;
char* prev = (char*)Utf8Prev(base, cur);
int prev_len = int(cur - prev);
if (prev_len != 1)
break;
if (!isspace((unsigned char) *prev) && *prev != '\n' && *prev != '\r')
break;
if (!isspace((unsigned char)*prev) && *prev != '\n' && *prev != '\r')
break;
*prev = '\0';
pos -= prev_len;
}
if (pos > 0)
return pos;
return 0;
return (pos > 0) ? pos : 0;
}
void LocaleString_RightTrim(char* base)
@@ -75,52 +84,10 @@ void LocaleString_RightTrim(char* base)
LocaleString_RightTrim(base, strlen(base));
}
void OLD_rtrim(char* base)
{
if (!base)
return;
DWORD codePage = GetDefaultCodePage();
if (949 == codePage || 936 == codePage)
{
char* end = base + strlen(base) - 1;
while (end != base)
{
if (!isnhspace((unsigned char) *end) && *end != '\n' && *end != '\r' || (end!=base && *((unsigned char*)end-1)>0xa0))
break;
*end = '\0';
end = CharPrevExA(codePage, base, end, 0);
}
}
else
{
char* end = base + strlen(base);
while (end != base)
{
char* prev = CharPrevExA(codePage, base, end, 0);
int prev_len = end - prev;
if (prev_len != 1)
break;
if (!isspace((unsigned char) *prev) && *prev != '\n' && *prev != '\r')
break;
*prev = '\0';
end = prev;
}
}
}
const char* LocaleString_Skip(DWORD codePage, const char* cur)
const char* LocaleString_Skip(const char* cur)
{
int loopCount = 0;
const char* end = cur + strlen(cur);
while (*cur)
{
@@ -130,44 +97,42 @@ const char* LocaleString_Skip(DWORD codePage, const char* cur)
break;
}
const char* next = CharNextExA(codePage, cur, 0);
int cur_len = next - cur;
const char* next = Utf8Next(cur, end);
int cur_len = int(next - cur);
if (cur_len > 1)
{
cur = next;
}
else if (1 == cur_len)
else if (cur_len == 1)
{
if (!isspace((unsigned char) *cur) && *cur != '\n' && *cur != '\r')
if (!isspace((unsigned char)*cur) && *cur != '\n' && *cur != '\r')
return cur;
++cur;
}
else
{
break;
}
}
return cur;
}
bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
{
char szName[32 + 1];
char szValue[64 + 1];
char szName[32 + 1];
char szValue[64 + 1];
int iNameLen = 0;
int iValueLen = 0;
int iNameLen = 0;
int iValueLen = 0;
int iCharLen = 0;
int pos = 0;
bool isValue = false;
bool isValue = false;
DWORD codePage = GetDefaultCodePage();
while (pos < arg_len)
{
while (pos < arg_len)
{
const char* end = c_arg_base + arg_len;
const char* cur = c_arg_base + pos;
const char* next = CharNextExA(codePage, cur, 0);
const char* next = Utf8Next(cur, end);
iCharLen = next - cur;
if (iCharLen > 1)
@@ -180,7 +145,7 @@ bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
return false;
}
memcpy(szValue+iValueLen, cur, iCharLen);
memcpy(szValue+iValueLen, cur, iCharLen);
iValueLen += iCharLen;
szValue[iValueLen] = '\0';
}
@@ -191,7 +156,7 @@ bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
TraceError("argument name overflow: must be shorter than 32 letters");
return false;
}
memcpy(szName+iNameLen, cur, iCharLen);
memcpy(szName+iNameLen, cur, iCharLen);
iNameLen += iCharLen;
szName[iNameLen] = '\0';
}
@@ -220,11 +185,9 @@ bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
{
isValue = true;
}
// 값이 아니고, 이름이 시작되지 않았을 경우 빈칸은 건너 뛴다.
else if (!isValue && iNameLen == 0 && isspace((unsigned char) c))
{
}
// 엔터는 건너 뛴다
else if (c == '\r' || c == '\n')
{
}
@@ -238,9 +201,9 @@ bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
return false;
}
memcpy(szValue+iValueLen, cur, iCharLen);
memcpy(szValue+iValueLen, cur, iCharLen);
iValueLen += iCharLen;
szValue[iValueLen] = '\0';
szValue[iValueLen] = '\0';
}
else
{
@@ -249,10 +212,10 @@ bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
TraceError("argument name overflow: must be shorter than 32 letters");
return false;
}
memcpy(szName+iNameLen, cur, iCharLen);
memcpy(szName+iNameLen, cur, iCharLen);
iNameLen += iCharLen;
szName[iNameLen] = '\0';
}
szName[iNameLen] = '\0';
}
}
}
else
@@ -261,122 +224,128 @@ bool Group::GetArg(const char *c_arg_base, int arg_len, TArgList & argList)
}
pos += iCharLen;
}
}
if (iNameLen != 0 && iValueLen != 0)
{
if (iNameLen != 0 && iValueLen != 0)
{
iNameLen = LocaleString_RightTrim(szName, iNameLen);
iValueLen = LocaleString_RightTrim(szValue, iValueLen);
argList.push_back(TArg(szName, szValue));
}
argList.push_back(TArg(szName, szValue));
}
return true;
return true;
}
bool Group::Create(const std::string & stSource)
bool Group::Create(const std::string& stSource)
{
m_cmdList.clear();
if (stSource.empty())
return false;
const char *str_base = stSource.c_str();
if (!str_base || !*str_base)
{
TraceError("Source file has no content");
return false;
}
int str_len = stSource.length();
int str_pos = 0;
DWORD codePage = GetDefaultCodePage();
const char* str_base = stSource.c_str();
if (!str_base || !*str_base)
{
TraceError("Source file has no content");
return false;
}
char box_data[1024 + 1];
const int str_len = (int)stSource.size();
int str_pos = 0;
char box_data[1024 + 1];
static std::string stLetter;
while (str_pos < str_len)
{
TCmd cmd;
while (str_pos < str_len)
{
TCmd cmd;
const char* word = str_base + str_pos;
const char* word_next = CharNextExA(codePage, word, 0);
int word_len = word_next - word;
const char* end = str_base + str_len;
const char* word_next = Utf8Next(word, end);
if (!word_next || word_next <= word)
{
// Invalid UTF-8 sequence or broken helper -> advance 1 byte to avoid infinite loop
word_next = word + 1;
}
const int word_len = (int)(word_next - word);
if (word_len > 1)
{
str_pos += word_len;
{
stLetter.assign(word, word_next);
cmd.name.assign("LETTER");
cmd.argList.push_back(TArg("value", stLetter));
m_cmdList.push_back(cmd);
}
stLetter.assign(word, word_next);
cmd.name.assign("LETTER");
cmd.argList.push_back(TArg("value", stLetter));
m_cmdList.push_back(cmd);
}
else if (word_len == 1)
{
const char cur = *word;
if ('[' == cur)
if (cur == '[')
{
++str_pos;
const char* box_begin = str_base + str_pos;
const char* box_end = LocaleString_FindChar(box_begin, str_len - str_pos, ']');
const char* box_end = LocaleString_FindChar(box_begin, str_len - str_pos, ']');
if (!box_end)
{
TraceError(" !! PARSING ERROR - Syntax Error : %s\n", box_begin);
return false;
}
str_pos += box_end - box_begin + 1;
str_pos += (int)(box_end - box_begin) + 1;
int data_len = 0;
{
const char* data_begin = LocaleString_Skip(codePage, box_begin);
const char* data_begin = LocaleString_Skip(box_begin);
const char* data_end = box_end;
data_len = data_end - data_begin;
data_len = (int)(data_end - data_begin);
if (data_len >= 1024)
{
TraceError(" !! PARSING ERROR - Buffer Overflow : %d, %s\n", data_len, str_base);
return false;
}
memcpy(box_data, data_begin, data_len);
memcpy(box_data, data_begin, (size_t)data_len);
box_data[data_len] = '\0';
data_len = LocaleString_RightTrim(box_data, data_len); // 오른쪽 빈칸 자르기
data_len = LocaleString_RightTrim(box_data, data_len);
}
{
const char* space = LocaleString_FindChar(box_data, data_len, ' ');
if (space) // 인자가 있음
if (space)
{
int name_len = space - box_data;
const int name_len = (int)(space - box_data);
cmd.name.assign(box_data, name_len);
const char* space_next = CharNextExA(codePage, space, 0);
const char* arg = LocaleString_Skip(codePage, space_next);
int arg_len = data_len - (arg - box_data);
const char* data_end = box_data + data_len;
const char* space_next = Utf8Next(space, data_end);
if (!space_next || space_next <= space)
space_next = space + 1;
const char* arg = LocaleString_Skip(space_next);
const int arg_len = (int)(data_len - (arg - box_data));
if (!GetArg(arg, arg_len, cmd.argList))
{
TraceError(" !! PARSING ERROR - Unknown Arguments : %d, %s\n", arg_len, arg);
return false;
}
}
else // 인자가 없으므로 모든 스트링이 명령어다.
else
{
cmd.name.assign(box_data);
cmd.argList.clear();
}
m_cmdList.push_back(cmd);
}
}
@@ -387,51 +356,49 @@ bool Group::Create(const std::string & stSource)
else
{
++str_pos;
{
stLetter.assign(1, cur);
cmd.name.assign("LETTER");
cmd.argList.push_back(TArg("value", stLetter));
m_cmdList.push_back(cmd);
}
stLetter.assign(1, cur);
cmd.name.assign("LETTER");
cmd.argList.push_back(TArg("value", stLetter));
m_cmdList.push_back(cmd);
}
}
else
{
break;
}
}
}
return true;
return true;
}
bool Group::GetCmd(TCmd & cmd)
{
if (m_cmdList.empty())
return false;
if (m_cmdList.empty())
return false;
cmd = m_cmdList.front();
m_cmdList.pop_front();
return true;
cmd = m_cmdList.front();
m_cmdList.pop_front();
return true;
}
bool Group::ReadCmd(TCmd & cmd)
{
if (m_cmdList.empty())
return false;
if (m_cmdList.empty())
return false;
cmd = m_cmdList.front();
return true;
cmd = m_cmdList.front();
return true;
}
std::string & Group::GetError()
{
return m_stError;
return m_stError;
}
void Group::SetError(const char * c_pszError)
{
m_stError.assign(c_pszError);
m_stError.assign(c_pszError);
}
Group::Group()