Merge pull request #65 from savisxss/main

A bit late, but Happy New Year!
This commit is contained in:
rtw1x1
2026-01-04 09:29:02 +00:00
committed by GitHub
22 changed files with 1298 additions and 17 deletions

103
src/EterLib/BufferPool.cpp Normal file
View File

@@ -0,0 +1,103 @@
#include "StdAfx.h"
#include "BufferPool.h"
#include <algorithm>
CBufferPool::CBufferPool()
: m_totalAllocated(0)
{
}
CBufferPool::~CBufferPool()
{
Clear();
}
std::vector<uint8_t> CBufferPool::Acquire(size_t minSize)
{
std::lock_guard<std::mutex> lock(m_mutex);
size_t bestIndex = SIZE_MAX;
size_t bestCapacity = SIZE_MAX;
for (size_t i = 0; i < m_pool.size(); ++i)
{
if (m_pool[i].capacity >= minSize && m_pool[i].capacity < bestCapacity)
{
bestIndex = i;
bestCapacity = m_pool[i].capacity;
if (bestCapacity == minSize)
break;
}
}
if (bestIndex != SIZE_MAX)
{
std::vector<uint8_t> result = std::move(m_pool[bestIndex].buffer);
m_pool.erase(m_pool.begin() + bestIndex);
result.clear();
return result;
}
std::vector<uint8_t> newBuffer;
newBuffer.reserve(minSize);
m_totalAllocated++;
return newBuffer;
}
void CBufferPool::Release(std::vector<uint8_t>&& buffer)
{
size_t capacity = buffer.capacity();
if (capacity == 0 || capacity > MAX_BUFFER_SIZE)
{
return;
}
std::lock_guard<std::mutex> lock(m_mutex);
if (m_pool.size() >= MAX_POOL_SIZE)
{
auto smallest = std::min_element(m_pool.begin(), m_pool.end(),
[](const TPooledBuffer& a, const TPooledBuffer& b) {
return a.capacity < b.capacity;
});
if (smallest != m_pool.end() && smallest->capacity < capacity)
{
*smallest = TPooledBuffer(std::move(buffer));
}
return;
}
m_pool.emplace_back(std::move(buffer));
}
size_t CBufferPool::GetPoolSize() const
{
std::lock_guard<std::mutex> lock(m_mutex);
return m_pool.size();
}
size_t CBufferPool::GetTotalAllocated() const
{
std::lock_guard<std::mutex> lock(m_mutex);
return m_totalAllocated;
}
size_t CBufferPool::GetTotalMemoryPooled() const
{
std::lock_guard<std::mutex> lock(m_mutex);
size_t total = 0;
for (const auto& buf : m_pool)
{
total += buf.capacity;
}
return total;
}
void CBufferPool::Clear()
{
std::lock_guard<std::mutex> lock(m_mutex);
m_pool.clear();
}

50
src/EterLib/BufferPool.h Normal file
View File

@@ -0,0 +1,50 @@
#ifndef __INC_ETERLIB_BUFFERPOOL_H__
#define __INC_ETERLIB_BUFFERPOOL_H__
#include <vector>
#include <mutex>
#include <cstdint>
// Buffer pool for file I/O operations
class CBufferPool
{
public:
CBufferPool();
~CBufferPool();
// Get buffer with minimum size
std::vector<uint8_t> Acquire(size_t minSize);
// Return buffer to pool
void Release(std::vector<uint8_t>&& buffer);
// Get statistics
size_t GetPoolSize() const;
size_t GetTotalAllocated() const;
size_t GetTotalMemoryPooled() const; // Total bytes held in pool
// Clear pool
void Clear();
private:
struct TPooledBuffer
{
std::vector<uint8_t> buffer;
size_t capacity;
TPooledBuffer(std::vector<uint8_t>&& buf)
: buffer(std::move(buf))
, capacity(buffer.capacity())
{
}
};
std::vector<TPooledBuffer> m_pool;
mutable std::mutex m_mutex;
size_t m_totalAllocated;
static const size_t MAX_POOL_SIZE = 64;
static const size_t MAX_BUFFER_SIZE = 64 * 1024 * 1024;
};
#endif // __INC_ETERLIB_BUFFERPOOL_H__

View File

@@ -0,0 +1,59 @@
#ifndef __INC_ETERLIB_DECODEDIMAGEDATA_H__
#define __INC_ETERLIB_DECODEDIMAGEDATA_H__
#include <vector>
#include <cstdint>
#include <d3d9.h>
// Decoded image data for GPU upload
struct TDecodedImageData
{
enum EFormat
{
FORMAT_UNKNOWN = 0,
FORMAT_RGBA8,
FORMAT_RGB8,
FORMAT_DDS,
};
std::vector<uint8_t> pixels;
int width;
int height;
EFormat format;
D3DFORMAT d3dFormat;
bool isDDS;
int mipLevels;
TDecodedImageData()
: width(0)
, height(0)
, format(FORMAT_UNKNOWN)
, d3dFormat(D3DFMT_UNKNOWN)
, isDDS(false)
, mipLevels(1)
{
}
void Clear()
{
pixels.clear();
width = 0;
height = 0;
format = FORMAT_UNKNOWN;
d3dFormat = D3DFMT_UNKNOWN;
isDDS = false;
mipLevels = 1;
}
bool IsValid() const
{
return width > 0 && height > 0 && !pixels.empty();
}
size_t GetDataSize() const
{
return pixels.size();
}
};
#endif // __INC_ETERLIB_DECODEDIMAGEDATA_H__

View File

@@ -0,0 +1,270 @@
#include "StdAfx.h"
#include "FileLoaderThreadPool.h"
#include "BufferPool.h"
#include "ImageDecoder.h"
#include "PackLib/PackManager.h"
#include <algorithm>
static const bool USE_STAGED_TEXTURE_LOADING = true;
CFileLoaderThreadPool::CFileLoaderThreadPool()
: m_pCompletedQueue(nullptr)
, m_bShutdown(false)
, m_nextRequestID(0)
, m_activeTasks(0)
, m_threadCount(0)
{
}
CFileLoaderThreadPool::~CFileLoaderThreadPool()
{
Shutdown();
}
bool CFileLoaderThreadPool::Initialize(unsigned int threadCount)
{
if (!m_workers.empty())
{
TraceError("CFileLoaderThreadPool::Initialize: Already initialized");
return false;
}
if (threadCount == 0)
{
threadCount = std::thread::hardware_concurrency();
if (threadCount == 0)
threadCount = 4;
else
threadCount = std::max(4u, threadCount / 2);
}
threadCount = std::max(4u, std::min(16u, threadCount));
m_threadCount = threadCount;
Tracenf("CFileLoaderThreadPool: Initializing with %u worker threads", threadCount);
m_pCompletedQueue = new SPSCQueue<TLoadResult>(COMPLETED_QUEUE_SIZE);
m_workers.reserve(threadCount);
for (unsigned int i = 0; i < threadCount; ++i)
{
TWorkerThread worker;
worker.pRequestQueue = new SPSCQueue<TLoadRequest>(REQUEST_QUEUE_SIZE);
worker.bBusy.store(false, std::memory_order_relaxed);
try
{
worker.thread = std::thread(&CFileLoaderThreadPool::WorkerThreadFunction, this, i);
}
catch (const std::exception& e)
{
TraceError("CFileLoaderThreadPool::Initialize: Failed to create thread %u: %s", i, e.what());
delete worker.pRequestQueue;
worker.pRequestQueue = nullptr;
Shutdown();
return false;
}
m_workers.push_back(std::move(worker));
}
return true;
}
void CFileLoaderThreadPool::Shutdown()
{
if (m_workers.empty())
return;
// Signal shutdown
m_bShutdown.store(true, std::memory_order_release);
// Wait for all workers to finish
for (auto& worker : m_workers)
{
if (worker.thread.joinable())
worker.thread.join();
// Cleanup request queue
if (worker.pRequestQueue)
{
delete worker.pRequestQueue;
worker.pRequestQueue = nullptr;
}
}
m_workers.clear();
// Cleanup completed queue
if (m_pCompletedQueue)
{
delete m_pCompletedQueue;
m_pCompletedQueue = nullptr;
}
m_threadCount = 0;
}
bool CFileLoaderThreadPool::Request(const std::string& fileName)
{
if (m_workers.empty())
{
TraceError("CFileLoaderThreadPool::Request: Thread pool not initialized");
return false;
}
TLoadRequest request;
request.stFileName = fileName;
request.requestID = m_nextRequestID.fetch_add(1, std::memory_order_relaxed);
request.decodeImage = false;
if (USE_STAGED_TEXTURE_LOADING)
{
size_t dotPos = fileName.find_last_of('.');
if (dotPos != std::string::npos && dotPos + 1 < fileName.size())
{
const char* ext = fileName.c_str() + dotPos;
size_t extLen = fileName.size() - dotPos;
if ((extLen == 4 && (_stricmp(ext, ".dds") == 0 || _stricmp(ext, ".png") == 0 ||
_stricmp(ext, ".jpg") == 0 || _stricmp(ext, ".tga") == 0 || _stricmp(ext, ".bmp") == 0)) ||
(extLen == 5 && _stricmp(ext, ".jpeg") == 0))
{
request.decodeImage = true;
}
}
}
unsigned int targetWorker = SelectLeastBusyWorker();
if (!m_workers[targetWorker].pRequestQueue->Push(request))
{
for (unsigned int i = 0; i < m_threadCount; ++i)
{
unsigned int workerIdx = (targetWorker + i) % m_threadCount;
if (m_workers[workerIdx].pRequestQueue->Push(request))
{
m_activeTasks.fetch_add(1, std::memory_order_relaxed);
return true;
}
}
TraceError("CFileLoaderThreadPool::Request: All worker queues full for file: %s", fileName.c_str());
return false;
}
m_activeTasks.fetch_add(1, std::memory_order_relaxed);
return true;
}
bool CFileLoaderThreadPool::Fetch(TLoadResult& result)
{
if (!m_pCompletedQueue)
return false;
if (m_pCompletedQueue->Pop(result))
{
m_activeTasks.fetch_sub(1, std::memory_order_relaxed);
return true;
}
return false;
}
size_t CFileLoaderThreadPool::GetPendingCount() const
{
size_t total = 0;
for (const auto& worker : m_workers)
{
if (worker.pRequestQueue)
total += worker.pRequestQueue->Size();
}
return total;
}
bool CFileLoaderThreadPool::IsIdle() const
{
return m_activeTasks.load(std::memory_order_acquire) == 0;
}
unsigned int CFileLoaderThreadPool::SelectLeastBusyWorker() const
{
unsigned int leastBusyIdx = 0;
size_t minSize = m_workers[0].pRequestQueue->Size();
for (unsigned int i = 1; i < m_threadCount; ++i)
{
size_t queueSize = m_workers[i].pRequestQueue->Size();
if (queueSize < minSize)
{
minSize = queueSize;
leastBusyIdx = i;
}
}
return leastBusyIdx;
}
void CFileLoaderThreadPool::WorkerThreadFunction(unsigned int workerIndex)
{
TWorkerThread& worker = m_workers[workerIndex];
SPSCQueue<TLoadRequest>* pRequestQueue = worker.pRequestQueue;
CBufferPool* pBufferPool = CPackManager::instance().GetBufferPool();
Tracenf("CFileLoaderThreadPool: Worker thread %u started", workerIndex);
int idleCount = 0;
while (!m_bShutdown.load(std::memory_order_acquire))
{
TLoadRequest request;
if (pRequestQueue->Pop(request))
{
idleCount = 0;
worker.bBusy.store(true, std::memory_order_release);
TLoadResult result;
result.stFileName = request.stFileName;
result.requestID = request.requestID;
result.File.clear();
result.hasDecodedImage = false;
CPackManager::instance().GetFileWithPool(request.stFileName, result.File, pBufferPool);
if (request.decodeImage && !result.File.empty())
{
if (CImageDecoder::DecodeImage(result.File.data(), result.File.size(), result.decodedImage))
{
result.hasDecodedImage = true;
result.File.clear();
}
}
while (!m_pCompletedQueue->Push(result))
{
std::this_thread::yield();
if (m_bShutdown.load(std::memory_order_acquire))
break;
}
worker.bBusy.store(false, std::memory_order_release);
}
else
{
idleCount++;
if (idleCount > 1000)
{
Sleep(1);
idleCount = 0;
}
else if (idleCount > 10)
{
std::this_thread::yield();
}
}
}
Tracenf("CFileLoaderThreadPool: Worker thread %u stopped", workerIndex);
}

View File

@@ -0,0 +1,90 @@
#ifndef __INC_ETERLIB_FILELOADERTHREADPOOL_H__
#define __INC_ETERLIB_FILELOADERTHREADPOOL_H__
#include <vector>
#include <thread>
#include <atomic>
#include "SPSCQueue.h"
#include "PackLib/PackManager.h"
#include "DecodedImageData.h"
class CFileLoaderThreadPool
{
public:
struct TLoadRequest
{
std::string stFileName;
uint32_t requestID;
bool decodeImage;
};
struct TLoadResult
{
std::string stFileName;
TPackFile File;
uint32_t requestID;
TDecodedImageData decodedImage;
bool hasDecodedImage;
};
public:
CFileLoaderThreadPool();
~CFileLoaderThreadPool();
bool Initialize(unsigned int threadCount = 0);
void Shutdown();
bool Request(const std::string& fileName);
bool Fetch(TLoadResult& result);
size_t GetPendingCount() const;
bool IsIdle() const;
private:
struct TWorkerThread
{
std::thread thread;
SPSCQueue<TLoadRequest>* pRequestQueue;
std::atomic<bool> bBusy;
TWorkerThread() : pRequestQueue(nullptr), bBusy(false) {}
TWorkerThread(TWorkerThread&& other) noexcept
: thread(std::move(other.thread))
, pRequestQueue(other.pRequestQueue)
, bBusy(other.bBusy.load())
{
other.pRequestQueue = nullptr;
}
TWorkerThread& operator=(TWorkerThread&& other) noexcept
{
if (this != &other)
{
thread = std::move(other.thread);
pRequestQueue = other.pRequestQueue;
bBusy.store(other.bBusy.load());
other.pRequestQueue = nullptr;
}
return *this;
}
TWorkerThread(const TWorkerThread&) = delete;
TWorkerThread& operator=(const TWorkerThread&) = delete;
};
void WorkerThreadFunction(unsigned int workerIndex);
unsigned int SelectLeastBusyWorker() const;
private:
std::vector<TWorkerThread> m_workers;
SPSCQueue<TLoadResult>* m_pCompletedQueue;
std::atomic<bool> m_bShutdown;
std::atomic<uint32_t> m_nextRequestID;
std::atomic<int> m_activeTasks; // Fast IsIdle check
unsigned int m_threadCount;
static const size_t REQUEST_QUEUE_SIZE = 16384; // Doubled from 8192
static const size_t COMPLETED_QUEUE_SIZE = 32768; // Doubled from 16384
};
#endif // __INC_ETERLIB_FILELOADERTHREADPOOL_H__

View File

@@ -1,5 +1,6 @@
#include "StdAfx.h"
#include "GrpImage.h"
#include "DecodedImageData.h"
CGraphicImage::CGraphicImage(const char * c_szFileName, DWORD dwFilter) :
CResource(c_szFileName),
@@ -79,6 +80,23 @@ bool CGraphicImage::OnLoad(int iSize, const void * c_pvBuf)
return true;
}
bool CGraphicImage::OnLoadFromDecodedData(const TDecodedImageData& decodedImage)
{
if (!decodedImage.IsValid())
return false;
m_imageTexture.SetFileName(CResource::GetFileName());
if (!m_imageTexture.CreateFromDecodedData(decodedImage, D3DFMT_UNKNOWN, m_dwFilter))
return false;
m_rect.left = 0;
m_rect.top = 0;
m_rect.right = m_imageTexture.GetWidth();
m_rect.bottom = m_imageTexture.GetHeight();
return true;
}
void CGraphicImage::OnClear()
{
// Tracef("Image Destroy : %s\n", m_pszFileName);

View File

@@ -5,6 +5,8 @@
#include "Resource.h"
#include "GrpImageTexture.h"
struct TDecodedImageData;
class CGraphicImage : public CResource
{
public:
@@ -28,6 +30,8 @@ class CGraphicImage : public CResource
const CGraphicTexture & GetTextureReference() const;
CGraphicTexture * GetTexturePointer();
bool OnLoadFromDecodedData(const TDecodedImageData& decodedImage);
protected:
bool OnLoad(int iSize, const void * c_pvBuf);

View File

@@ -2,9 +2,15 @@
#include "PackLib/PackManager.h"
#include "GrpImageTexture.h"
#include "EterImageLib/DDSTextureLoader9.h"
#include "DecodedImageData.h"
#include <stb_image.h>
#if defined(_M_IX86) || defined(_M_X64)
#include <emmintrin.h> // SSE2
#include <tmmintrin.h> // SSSE3 (for _mm_shuffle_epi8)
#endif
bool CGraphicImageTexture::Lock(int* pRetPitch, void** ppRetPixels, int level)
{
D3DLOCKED_RECT lockedRect;
@@ -110,17 +116,41 @@ bool CGraphicImageTexture::CreateFromSTB(UINT bufSize, const void* c_pvBuf)
unsigned char* data = stbi_load_from_memory((stbi_uc*)c_pvBuf, bufSize, &width, &height, &channels, 4); // force RGBA
if (data) {
LPDIRECT3DTEXTURE9 texture;
if (SUCCEEDED(ms_lpd3dDevice->CreateTexture(width, height, 1, 0, channels == 4 ? D3DFMT_A8R8G8B8 : D3DFMT_X8R8G8B8, D3DPOOL_DEFAULT, &texture, nullptr))) {
if (SUCCEEDED(ms_lpd3dDevice->CreateTexture(width, height, 1, 0, channels == 4 ? D3DFMT_A8R8G8B8 : D3DFMT_X8R8G8B8, D3DPOOL_MANAGED, &texture, nullptr))) {
D3DLOCKED_RECT rect;
if (SUCCEEDED(texture->LockRect(0, &rect, nullptr, 0))) {
uint8_t* dstData = (uint8_t*)rect.pBits;
uint8_t* srcData = (uint8_t*)data;
for (size_t i = 0; i < width * height; ++i, dstData += 4, srcData += 4) {
dstData[0] = srcData[2];
dstData[1] = srcData[1];
dstData[2] = srcData[0];
dstData[3] = srcData[3];
size_t pixelCount = width * height;
#if defined(_M_IX86) || defined(_M_X64)
{
size_t simdPixels = pixelCount & ~3;
__m128i shuffle_mask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
for (size_t i = 0; i < simdPixels; i += 4) {
__m128i pixels = _mm_loadu_si128((__m128i*)(srcData + i * 4));
pixels = _mm_shuffle_epi8(pixels, shuffle_mask);
_mm_storeu_si128((__m128i*)(dstData + i * 4), pixels);
}
for (size_t i = simdPixels; i < pixelCount; ++i) {
size_t idx = i * 4;
dstData[idx + 0] = srcData[idx + 2];
dstData[idx + 1] = srcData[idx + 1];
dstData[idx + 2] = srcData[idx + 0];
dstData[idx + 3] = srcData[idx + 3];
}
}
#else
for (size_t i = 0; i < pixelCount; ++i) {
size_t idx = i * 4;
dstData[idx + 0] = srcData[idx + 2];
dstData[idx + 1] = srcData[idx + 1];
dstData[idx + 2] = srcData[idx + 0];
dstData[idx + 3] = srcData[idx + 3];
}
#endif
texture->UnlockRect(0);
m_width = width;
@@ -228,6 +258,98 @@ bool CGraphicImageTexture::CreateFromDiskFile(const char * c_szFileName, D3DFORM
return CreateDeviceObjects();
}
bool CGraphicImageTexture::CreateFromDecodedData(const TDecodedImageData& decodedImage, D3DFORMAT d3dFmt, DWORD dwFilter)
{
assert(ms_lpd3dDevice != NULL);
assert(m_lpd3dTexture == NULL);
if (!decodedImage.IsValid())
return false;
m_bEmpty = true;
if (decodedImage.isDDS)
{
// DDS format - use DirectX loader
if (!CreateFromDDSTexture(decodedImage.pixels.size(), decodedImage.pixels.data()))
return false;
}
else if (decodedImage.format == TDecodedImageData::FORMAT_RGBA8)
{
LPDIRECT3DTEXTURE9 texture;
D3DFORMAT format = D3DFMT_A8R8G8B8;
if (FAILED(ms_lpd3dDevice->CreateTexture(
decodedImage.width,
decodedImage.height,
1,
0,
format,
D3DPOOL_MANAGED,
&texture,
nullptr)))
{
return false;
}
D3DLOCKED_RECT rect;
if (SUCCEEDED(texture->LockRect(0, &rect, nullptr, 0)))
{
uint8_t* dstData = (uint8_t*)rect.pBits;
const uint8_t* srcData = decodedImage.pixels.data();
size_t pixelCount = decodedImage.width * decodedImage.height;
#if defined(_M_IX86) || defined(_M_X64)
{
size_t simdPixels = pixelCount & ~3;
__m128i shuffle_mask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
for (size_t i = 0; i < simdPixels; i += 4) {
__m128i pixels = _mm_loadu_si128((__m128i*)(srcData + i * 4));
pixels = _mm_shuffle_epi8(pixels, shuffle_mask);
_mm_storeu_si128((__m128i*)(dstData + i * 4), pixels);
}
for (size_t i = simdPixels; i < pixelCount; ++i) {
size_t idx = i * 4;
dstData[idx + 0] = srcData[idx + 2];
dstData[idx + 1] = srcData[idx + 1];
dstData[idx + 2] = srcData[idx + 0];
dstData[idx + 3] = srcData[idx + 3];
}
}
#else
for (size_t i = 0; i < pixelCount; ++i) {
size_t idx = i * 4;
dstData[idx + 0] = srcData[idx + 2];
dstData[idx + 1] = srcData[idx + 1];
dstData[idx + 2] = srcData[idx + 0];
dstData[idx + 3] = srcData[idx + 3];
}
#endif
texture->UnlockRect(0);
m_width = decodedImage.width;
m_height = decodedImage.height;
m_lpd3dTexture = texture;
m_bEmpty = false;
}
else
{
texture->Release();
return false;
}
}
else
{
TraceError("CreateFromDecodedData: Unsupported decoded image format");
return false;
}
return !m_bEmpty;
}
CGraphicImageTexture::CGraphicImageTexture()
{
Initialize();

View File

@@ -2,6 +2,8 @@
#include "GrpTexture.h"
struct TDecodedImageData;
class CGraphicImageTexture : public CGraphicTexture
{
public:
@@ -18,6 +20,7 @@ class CGraphicImageTexture : public CGraphicTexture
bool CreateFromMemoryFile(UINT bufSize, const void* c_pvBuf, D3DFORMAT d3dFmt, DWORD dwFilter = D3DX_FILTER_LINEAR);
bool CreateFromDDSTexture(UINT bufSize, const void* c_pvBuf);
bool CreateFromSTB(UINT bufSize, const void* c_pvBuf);
bool CreateFromDecodedData(const TDecodedImageData& decodedImage, D3DFORMAT d3dFmt, DWORD dwFilter);
void SetFileName(const char * c_szFileName);

View File

@@ -0,0 +1,92 @@
#include "StdAfx.h"
#include "ImageDecoder.h"
#include "EterImageLib/DDSTextureLoader9.h"
#include <stb_image.h>
bool CImageDecoder::DecodeImage(const void* pData, size_t dataSize, TDecodedImageData& outImage)
{
if (!pData || dataSize == 0)
return false;
outImage.Clear();
if (DecodeDDS(pData, dataSize, outImage))
return true;
if (DecodeSTB(pData, dataSize, outImage))
return true;
return false;
}
bool CImageDecoder::DecodeDDS(const void* pData, size_t dataSize, TDecodedImageData& outImage)
{
if (dataSize < 4)
return false;
const uint32_t DDS_MAGIC = 0x20534444;
uint32_t magic = *(const uint32_t*)pData;
if (magic != DDS_MAGIC)
return false;
if (dataSize < 128)
return false;
struct DDSHeader
{
uint32_t magic;
uint32_t size;
uint32_t flags;
uint32_t height;
uint32_t width;
uint32_t pitchOrLinearSize;
uint32_t depth;
uint32_t mipMapCount;
uint32_t reserved1[11];
};
const DDSHeader* header = (const DDSHeader*)pData;
outImage.width = header->width;
outImage.height = header->height;
outImage.mipLevels = (header->mipMapCount > 0) ? header->mipMapCount : 1;
outImage.isDDS = true;
outImage.format = TDecodedImageData::FORMAT_DDS;
outImage.pixels.resize(dataSize);
memcpy(outImage.pixels.data(), pData, dataSize);
return true;
}
bool CImageDecoder::DecodeSTB(const void* pData, size_t dataSize, TDecodedImageData& outImage)
{
int width, height, channels;
unsigned char* imageData = stbi_load_from_memory(
(const stbi_uc*)pData,
(int)dataSize,
&width,
&height,
&channels,
4
);
if (!imageData)
return false;
outImage.width = width;
outImage.height = height;
outImage.format = TDecodedImageData::FORMAT_RGBA8;
outImage.isDDS = false;
outImage.mipLevels = 1;
size_t pixelDataSize = width * height * 4;
outImage.pixels.resize(pixelDataSize);
memcpy(outImage.pixels.data(), imageData, pixelDataSize);
stbi_image_free(imageData);
return true;
}

View File

@@ -0,0 +1,18 @@
#ifndef __INC_ETERLIB_IMAGEDECODER_H__
#define __INC_ETERLIB_IMAGEDECODER_H__
#include "DecodedImageData.h"
// Image decoder for worker threads
class CImageDecoder
{
public:
// Decode image from memory (DDS, PNG, JPG, TGA, BMP)
static bool DecodeImage(const void* pData, size_t dataSize, TDecodedImageData& outImage);
private:
static bool DecodeDDS(const void* pData, size_t dataSize, TDecodedImageData& outImage);
static bool DecodeSTB(const void* pData, size_t dataSize, TDecodedImageData& outImage);
};
#endif // __INC_ETERLIB_IMAGEDECODER_H__

View File

@@ -7,6 +7,8 @@
#include "ResourceManager.h"
#include "GrpImage.h"
#include "TextureCache.h"
#include "DecodedImageData.h"
int g_iLoadingDelayTime = 1; // Reduced from 20ms to 1ms for faster async loading
@@ -68,7 +70,16 @@ void CResourceManager::ProcessBackgroundLoading()
}
//printf("REQ %s\n", stFileName.c_str());
ms_loadingThread.Request(stFileName);
if (m_pLoaderThreadPool)
{
m_pLoaderThreadPool->Request(stFileName);
}
else
{
ms_loadingThread.Request(stFileName);
}
m_WaitingMap.insert(TResourceRequestMap::value_type(dwFileCRC, stFileName));
itor = m_RequestMap.erase(itor);
//break; // NOTE: 여기서 break 하면 천천히 로딩 된다.
@@ -76,6 +87,44 @@ void CResourceManager::ProcessBackgroundLoading()
DWORD dwCurrentTime = ELTimer_GetMSec();
if (m_pLoaderThreadPool)
{
CFileLoaderThreadPool::TLoadResult result;
while (m_pLoaderThreadPool->Fetch(result))
{
CResource * pResource = GetResourcePointer(result.stFileName.c_str());
if (pResource)
{
if (pResource->IsEmpty())
{
if (result.hasDecodedImage)
{
CGraphicImage* pImage = dynamic_cast<CGraphicImage*>(pResource);
if (pImage)
{
pImage->OnLoadFromDecodedData(result.decodedImage);
}
else
{
pResource->OnLoad(result.File.size(), result.File.data());
}
}
else
{
pResource->OnLoad(result.File.size(), result.File.data());
}
pResource->AddReferenceOnly();
m_pResRefDecreaseWaitingMap.insert(TResourceRefDecreaseWaitingMap::value_type(dwCurrentTime, pResource));
}
}
m_WaitingMap.erase(GetCRC32(result.stFileName.c_str(), result.stFileName.size()));
}
}
// Process old thread results
CFileLoaderThread::TData * pData;
while (ms_loadingThread.Fetch(&pData))
{
@@ -528,12 +577,36 @@ void CResourceManager::ReserveDeletingResource(CResource * pResource)
}
CResourceManager::CResourceManager()
: m_pLoaderThreadPool(nullptr)
, m_pTextureCache(nullptr)
{
ms_loadingThread.Create(0);
m_pLoaderThreadPool = new CFileLoaderThreadPool();
if (!m_pLoaderThreadPool->Initialize())
{
TraceError("CResourceManager: Failed to initialize FileLoaderThreadPool");
delete m_pLoaderThreadPool;
m_pLoaderThreadPool = nullptr;
}
m_pTextureCache = new CTextureCache(512);
}
CResourceManager::~CResourceManager()
{
Destroy();
ms_loadingThread.Shutdown();
if (m_pLoaderThreadPool)
{
delete m_pLoaderThreadPool;
m_pLoaderThreadPool = nullptr;
}
if (m_pTextureCache)
{
delete m_pTextureCache;
m_pTextureCache = nullptr;
}
}

View File

@@ -2,11 +2,14 @@
#include "Resource.h"
#include "FileLoaderThread.h"
#include "FileLoaderThreadPool.h"
#include <set>
#include <map>
#include <string>
class CTextureCache;
class CResourceManager : public CSingleton<CResourceManager>
{
public:
@@ -42,6 +45,9 @@ class CResourceManager : public CSingleton<CResourceManager>
void ProcessBackgroundLoading();
void PushBackgroundLoadingSet(std::set<std::string> & LoadingSet);
CTextureCache* GetTextureCache() { return m_pTextureCache; }
CFileLoaderThreadPool* GetLoaderThreadPool() { return m_pLoaderThreadPool; }
protected:
void __DestroyDeletingResourceMap();
void __DestroyResourceMap();
@@ -68,6 +74,8 @@ class CResourceManager : public CSingleton<CResourceManager>
TResourceRefDecreaseWaitingMap m_pResRefDecreaseWaitingMap;
static CFileLoaderThread ms_loadingThread;
CFileLoaderThreadPool* m_pLoaderThreadPool;
CTextureCache* m_pTextureCache;
};
extern int g_iLoadingDelayTime;

79
src/EterLib/SPSCQueue.h Normal file
View File

@@ -0,0 +1,79 @@
#ifndef __INC_ETERLIB_SPSCQUEUE_H__
#define __INC_ETERLIB_SPSCQUEUE_H__
#include <atomic>
#include <vector>
#include <cassert>
// Lock-free queue for single producer/consumer pairs
template<typename T>
class SPSCQueue
{
public:
explicit SPSCQueue(size_t capacity)
: m_capacity(capacity + 1) // +1 to distinguish full from empty
, m_buffer(m_capacity)
, m_head(0)
, m_tail(0)
{
assert(capacity > 0);
}
~SPSCQueue()
{
}
// Push item (returns false if full)
bool Push(const T& item)
{
const size_t head = m_head.load(std::memory_order_relaxed);
const size_t next_head = (head + 1) % m_capacity;
if (next_head == m_tail.load(std::memory_order_acquire))
return false; // Queue is full
m_buffer[head] = item;
m_head.store(next_head, std::memory_order_release);
return true;
}
// Pop item (returns false if empty)
bool Pop(T& item)
{
const size_t tail = m_tail.load(std::memory_order_relaxed);
if (tail == m_head.load(std::memory_order_acquire))
return false; // Queue is empty
item = m_buffer[tail];
m_tail.store((tail + 1) % m_capacity, std::memory_order_release);
return true;
}
// Check if empty
bool IsEmpty() const
{
return m_tail.load(std::memory_order_acquire) == m_head.load(std::memory_order_acquire);
}
// Get queue size
size_t Size() const
{
const size_t head = m_head.load(std::memory_order_acquire);
const size_t tail = m_tail.load(std::memory_order_acquire);
if (head >= tail)
return head - tail;
else
return m_capacity - tail + head;
}
private:
const size_t m_capacity;
std::vector<T> m_buffer;
alignas(64) std::atomic<size_t> m_head;
alignas(64) std::atomic<size_t> m_tail;
};
#endif // __INC_ETERLIB_SPSCQUEUE_H__

View File

@@ -0,0 +1,109 @@
#include "StdAfx.h"
#include "TextureCache.h"
CTextureCache::CTextureCache(size_t maxMemoryMB)
: m_maxMemory(maxMemoryMB * 1024 * 1024)
, m_currentMemory(0)
, m_hits(0)
, m_misses(0)
{
}
CTextureCache::~CTextureCache()
{
Clear();
}
bool CTextureCache::Get(const std::string& filename, TCachedTexture& outTexture)
{
std::lock_guard<std::mutex> lock(m_mutex);
auto it = m_cache.find(filename);
if (it == m_cache.end())
{
m_misses.fetch_add(1);
return false;
}
// Move to back of LRU (most recently used)
m_lruList.erase(it->second.second);
m_lruList.push_back(filename);
it->second.second = std::prev(m_lruList.end());
// Copy texture data
outTexture = it->second.first;
m_hits.fetch_add(1);
return true;
}
void CTextureCache::Put(const std::string& filename, const TCachedTexture& texture)
{
std::lock_guard<std::mutex> lock(m_mutex);
// Check if already cached
auto it = m_cache.find(filename);
if (it != m_cache.end())
{
// Update existing entry
m_currentMemory -= it->second.first.memorySize;
m_lruList.erase(it->second.second);
m_cache.erase(it);
}
// Evict if needed
while (m_currentMemory + texture.memorySize > m_maxMemory && !m_cache.empty())
{
Evict();
}
// Don't cache if too large
if (texture.memorySize > m_maxMemory / 4)
{
return; // Skip caching huge textures
}
// Add to cache
m_lruList.push_back(filename);
auto lruIt = std::prev(m_lruList.end());
m_cache[filename] = {texture, lruIt};
m_currentMemory += texture.memorySize;
}
void CTextureCache::Clear()
{
std::lock_guard<std::mutex> lock(m_mutex);
m_cache.clear();
m_lruList.clear();
m_currentMemory = 0;
}
float CTextureCache::GetHitRate() const
{
size_t hits = m_hits.load();
size_t misses = m_misses.load();
size_t total = hits + misses;
if (total == 0)
return 0.0f;
return (float)hits / (float)total;
}
void CTextureCache::Evict()
{
// Remove least recently used (front of list)
if (m_lruList.empty())
return;
const std::string& filename = m_lruList.front();
auto it = m_cache.find(filename);
if (it != m_cache.end())
{
m_currentMemory -= it->second.first.memorySize;
m_cache.erase(it);
}
m_lruList.pop_front();
}

View File

@@ -0,0 +1,55 @@
#ifndef __INC_ETERLIB_TEXTURECACHE_H__
#define __INC_ETERLIB_TEXTURECACHE_H__
#include <unordered_map>
#include <list>
#include <string>
#include <mutex>
// LRU cache for decoded textures
class CTextureCache
{
public:
struct TCachedTexture
{
std::vector<uint8_t> pixels;
int width;
int height;
size_t memorySize;
std::string filename;
};
CTextureCache(size_t maxMemoryMB = 256);
~CTextureCache();
// Get cached texture
bool Get(const std::string& filename, TCachedTexture& outTexture);
// Add texture to cache
void Put(const std::string& filename, const TCachedTexture& texture);
// Clear cache
void Clear();
// Get statistics
size_t GetMemoryUsage() const { return m_currentMemory; }
size_t GetMaxMemory() const { return m_maxMemory; }
size_t GetCachedCount() const { return m_cache.size(); }
float GetHitRate() const;
private:
void Evict();
private:
size_t m_maxMemory;
size_t m_currentMemory;
std::list<std::string> m_lruList;
std::unordered_map<std::string, std::pair<TCachedTexture, std::list<std::string>::iterator>> m_cache;
mutable std::mutex m_mutex;
std::atomic<size_t> m_hits;
std::atomic<size_t> m_misses;
};
#endif // __INC_ETERLIB_TEXTURECACHE_H__

View File

@@ -56,6 +56,27 @@ bool CTextureSet::Load(const char * c_szTextureSetFileName, float fTerrainTexCoo
m_Textures.resize(lCount + 1);
std::vector<std::string> textureFiles;
textureFiles.reserve(lCount);
for (long i = 0; i < lCount; ++i)
{
_snprintf(szTextureName, sizeof(szTextureName), "texture%03d", i + 1);
if (stTokenVectorMap.end() == stTokenVectorMap.find(szTextureName))
continue;
const CTokenVector & rVector = stTokenVectorMap[szTextureName];
const std::string & c_rstrFileName = rVector[0].c_str();
textureFiles.push_back(c_rstrFileName);
}
for (const auto& filename : textureFiles)
{
CResourceManager::Instance().GetResourcePointer(filename.c_str());
}
for (long i = 0; i < lCount; ++i)
{
_snprintf(szTextureName, sizeof(szTextureName), "texture%03d", i + 1);

View File

@@ -1,6 +1,18 @@
#include "Pack.h"
#include "EterLib/BufferPool.h"
#include <zstd.h>
static thread_local ZSTD_DCtx* g_zstdDCtx = nullptr;
static ZSTD_DCtx* GetThreadLocalZSTDContext()
{
if (!g_zstdDCtx)
{
g_zstdDCtx = ZSTD_createDCtx();
}
return g_zstdDCtx;
}
bool CPack::Open(const std::string& path, TPackFileMap& entries)
{
std::error_code ec;
@@ -38,27 +50,44 @@ bool CPack::Open(const std::string& path, TPackFileMap& entries)
}
bool CPack::GetFile(const TPackFileEntry& entry, TPackFile& result)
{
return GetFileWithPool(entry, result, nullptr);
}
bool CPack::GetFileWithPool(const TPackFileEntry& entry, TPackFile& result, CBufferPool* pPool)
{
result.resize(entry.file_size);
size_t offset = m_header.data_begin + entry.offset;
ZSTD_DCtx* dctx = GetThreadLocalZSTDContext();
switch (entry.encryption)
{
case 0: {
size_t decompressed_size = ZSTD_decompress(result.data(), result.size(), m_file.data() + offset, entry.compressed_size);
size_t decompressed_size = ZSTD_decompressDCtx(dctx, result.data(), result.size(), m_file.data() + offset, entry.compressed_size);
if (decompressed_size != entry.file_size) {
return false;
}
} break;
case 1: {
std::vector<uint8_t> compressed_data(entry.compressed_size);
std::vector<uint8_t> compressed_data;
if (pPool) {
compressed_data = pPool->Acquire(entry.compressed_size);
}
compressed_data.resize(entry.compressed_size);
memcpy(compressed_data.data(), m_file.data() + offset, entry.compressed_size);
m_decryption.Resynchronize(entry.iv, sizeof(entry.iv));
m_decryption.ProcessData(compressed_data.data(), compressed_data.data(), entry.compressed_size);
size_t decompressed_size = ZSTD_decompress(result.data(), result.size(), compressed_data.data(), compressed_data.size());
size_t decompressed_size = ZSTD_decompressDCtx(dctx, result.data(), result.size(), compressed_data.data(), compressed_data.size());
if (pPool) {
pPool->Release(std::move(compressed_data));
}
if (decompressed_size != entry.file_size) {
return false;
}

View File

@@ -4,6 +4,8 @@
#include "config.h"
class CBufferPool;
class CPack : public std::enable_shared_from_this<CPack>
{
public:
@@ -12,6 +14,7 @@ public:
bool Open(const std::string& path, TPackFileMap& entries);
bool GetFile(const TPackFileEntry& entry, TPackFile& result);
bool GetFileWithPool(const TPackFileEntry& entry, TPackFile& result, CBufferPool* pPool);
private:
TPackFileHeader m_header;

View File

@@ -1,14 +1,38 @@
#include "PackManager.h"
#include "EterLib/BufferPool.h"
#include <fstream>
#include <filesystem>
CPackManager::CPackManager()
: m_load_from_pack(true)
, m_pBufferPool(nullptr)
{
m_pBufferPool = new CBufferPool();
}
CPackManager::~CPackManager()
{
if (m_pBufferPool)
{
delete m_pBufferPool;
m_pBufferPool = nullptr;
}
}
bool CPackManager::AddPack(const std::string& path)
{
std::shared_ptr<CPack> pack = std::make_shared<CPack>();
std::lock_guard<std::mutex> lock(m_mutex);
return pack->Open(path, m_entries);
}
bool CPackManager::GetFile(std::string_view path, TPackFile& result)
{
return GetFileWithPool(path, result, m_pBufferPool);
}
bool CPackManager::GetFileWithPool(std::string_view path, TPackFile& result, CBufferPool* pPool)
{
thread_local std::string buf;
NormalizePath(path, buf);
@@ -16,7 +40,7 @@ bool CPackManager::GetFile(std::string_view path, TPackFile& result)
if (m_load_from_pack) {
auto it = m_entries.find(buf);
if (it != m_entries.end()) {
return it->second.first->GetFile(it->second.second, result);
return it->second.first->GetFileWithPool(it->second.second, result, pPool);
}
}
else {
@@ -25,7 +49,14 @@ bool CPackManager::GetFile(std::string_view path, TPackFile& result)
ifs.seekg(0, std::ios::end);
size_t size = ifs.tellg();
ifs.seekg(0, std::ios::beg);
result.resize(size);
if (pPool) {
result = pPool->Acquire(size);
result.resize(size);
} else {
result.resize(size);
}
if (ifs.read((char*)result.data(), size)) {
return true;
}

View File

@@ -1,26 +1,34 @@
#pragma once
#include <unordered_map>
#include <mutex>
#include "EterBase/Singleton.h"
#include "Pack.h"
class CBufferPool;
class CPackManager : public CSingleton<CPackManager>
{
public:
CPackManager() = default;
virtual ~CPackManager() = default;
CPackManager();
virtual ~CPackManager();
bool AddPack(const std::string& path);
bool GetFile(std::string_view path, TPackFile& result);
bool GetFileWithPool(std::string_view path, TPackFile& result, CBufferPool* pPool);
bool IsExist(std::string_view path) const;
void SetPackLoadMode() { m_load_from_pack = true; }
void SetFileLoadMode() { m_load_from_pack = false; }
CBufferPool* GetBufferPool() { return m_pBufferPool; }
private:
void NormalizePath(std::string_view in, std::string& out) const;
private:
bool m_load_from_pack = true;
TPackFileMap m_entries;
CBufferPool* m_pBufferPool;
mutable std::mutex m_mutex; // Thread safety for parallel pack loading
};

View File

@@ -16,6 +16,8 @@
#include <filesystem>
#include <format>
#include <thread>
#include <atomic>
#include <stdlib.h>
#include <utf8.h>
@@ -166,11 +168,45 @@ bool PackInitialize(const char * c_pszFolder)
"uiloading",
};
CPackManager::instance().AddPack(std::format("{}/root.pck", c_pszFolder));
for (const std::string& packFileName : packFiles) {
CPackManager::instance().AddPack(std::format("{}/{}.pck", c_pszFolder, packFileName));
Tracef("PackInitialize: Loading root.pck...");
if (!CPackManager::instance().AddPack(std::format("{}/root.pck", c_pszFolder)))
{
TraceError("Failed to load root.pck");
return false;
}
Tracef("PackInitialize: Loading %d pack files in parallel...", packFiles.size());
const size_t numThreads = std::min<size_t>(std::thread::hardware_concurrency(), packFiles.size());
const size_t packsPerThread = (packFiles.size() + numThreads - 1) / numThreads;
std::vector<std::thread> threads;
std::atomic<size_t> failedCount(0);
for (size_t t = 0; t < numThreads; ++t)
{
threads.emplace_back([&, t]() {
size_t start = t * packsPerThread;
size_t end = std::min(start + packsPerThread, packFiles.size());
for (size_t i = start; i < end; ++i)
{
std::string packPath = std::format("{}/{}.pck", c_pszFolder, packFiles[i]);
if (!CPackManager::instance().AddPack(packPath))
{
TraceError("Failed to load %s", packPath.c_str());
failedCount++;
}
}
});
}
// Wait for all threads to complete
for (auto& thread : threads)
{
thread.join();
}
Tracef("PackInitialize: Completed! Failed: %d / %d", failedCount.load(), packFiles.size());
return true;
}