From 09d51fdf2fe64d8e020f348b87b4a2af3424b5bc Mon Sep 17 00:00:00 2001 From: server Date: Wed, 15 Apr 2026 15:43:25 +0200 Subject: [PATCH] Optimize archive compression selection --- docs/client-integration.md | 5 +++++ docs/format.md | 7 ++++--- src/archive.cpp | 29 +++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/docs/client-integration.md b/docs/client-integration.md index 59292dc..ba56103 100644 --- a/docs/client-integration.md +++ b/docs/client-integration.md @@ -54,6 +54,7 @@ Minimum validation: - reject duplicate paths - reject path traversal - verify AEAD tag before decompression +- verify plaintext hash only in debug or explicit strict mode ## Launcher key delivery @@ -73,6 +74,7 @@ Supported inputs, highest priority last: --m2pack-pubkey-hex <64-hex-public-key> --m2pack-key-map --m2pack-key-id +--m2pack-strict-hash [0|1] ``` ### Environment @@ -82,6 +84,7 @@ M2PACK_MASTER_KEY_HEX M2PACK_SIGN_PUBKEY_HEX M2PACK_KEY_MAP M2PACK_KEY_ID +M2PACK_STRICT_HASH ``` ### Shared memory @@ -119,6 +122,8 @@ Recommended production path: - Keep a path-to-entry map in lowercase normalized form. - Decrypt per request, not by unpacking the full archive. - Keep a small decompression scratch buffer pool if the client reads in parallel. +- Skip plaintext hash verification in release hot paths unless `M2PACK_STRICT_HASH` + or `--m2pack-strict-hash` explicitly enables it. ## Release model diff --git a/docs/format.md b/docs/format.md index 52028cd..8e410fa 100644 --- a/docs/format.md +++ b/docs/format.md @@ -43,9 +43,10 @@ this order: Each file payload is: -1. compressed with `zstd` -2. encrypted with `XChaCha20-Poly1305` -3. stored in-place in the archive +1. compressed with `zstd` when it produces a meaningful size win +2. otherwise stored as plaintext bytes +3. encrypted with `XChaCha20-Poly1305` +4. stored in-place in the archive The file path is used as associated data so path tampering invalidates the payload authentication tag. diff --git a/src/archive.cpp b/src/archive.cpp index b76843f..b27b340 100644 --- a/src/archive.cpp +++ b/src/archive.cpp @@ -16,12 +16,36 @@ namespace constexpr char kArchiveMagic[kMagicSize] = {'M', '2', 'P', 'A', 'C', 'K', '2', '\0'}; constexpr std::uint32_t kArchiveVersion = 1; +constexpr std::size_t kCompressionMinSavingsBytes = 64; +constexpr std::size_t kCompressionMinSavingsPercent = 5; std::array require_master_key(const KeyMaterial& keys) { return keys.master_key; } +Compression select_compression_mode( + const std::vector& plain, + const std::vector& compressed) +{ + if (compressed.size() >= plain.size()) + { + return Compression::None; + } + + const auto savings = plain.size() - compressed.size(); + const bool meaningful_size_win = savings >= kCompressionMinSavingsBytes; + const bool meaningful_ratio_win = !plain.empty() && + (savings * 100 >= plain.size() * kCompressionMinSavingsPercent); + + if (!meaningful_size_win && !meaningful_ratio_win) + { + return Compression::None; + } + + return Compression::Zstd; +} + } // namespace std::string normalize_path(const std::filesystem::path& root, const std::filesystem::path& file) @@ -143,13 +167,14 @@ BuildResult build_archive( const auto compressed = compress_zstd(plain); ManifestEntry entry; entry.path = normalize_path(input_dir, path); - entry.compression = Compression::Zstd; + entry.compression = select_compression_mode(plain, compressed); entry.data_offset = payload_bytes.size(); entry.original_size = plain.size(); entry.nonce = random_nonce(); entry.plaintext_hash = hash_bytes(plain); - const auto encrypted = encrypt_payload(compressed, require_master_key(keys), entry.nonce, entry.path); + const auto& payload = entry.compression == Compression::Zstd ? compressed : plain; + const auto encrypted = encrypt_payload(payload, require_master_key(keys), entry.nonce, entry.path); entry.stored_size = encrypted.size(); payload_bytes.insert(payload_bytes.end(), encrypted.begin(), encrypted.end());