diff --git a/czi-format/czi-parser/compression/compressors.h b/czi-format/czi-parser/compression/compressors.h new file mode 100644 index 0000000000000000000000000000000000000000..69e5975eec12199528775ce55a40020449912613 --- /dev/null +++ b/czi-format/czi-parser/compression/compressors.h @@ -0,0 +1,267 @@ +#pragma once +#include "../custom_types.h" +#include "../utilities/vector_utilities.h" +#include "../image/z_order.h" + +enum CompressionMethod +{ + CompressionMethod_RLE, + CompressionMethod_LZ +}; + +constexpr size_t MAX_LITERAL_COUNT = 255; +constexpr size_t MAX_RUN_COUNT = 255; +constexpr size_t MAX_LOOKBACK_COUNT = 255; + +inline float compression_ratio(float uncompressedSize, float compressedSize) +{ + return (uncompressedSize / compressedSize); +} + +// Run-Length encode bytes and return compressed bytes. +std::vector<byte> rle_encode(const std::vector<byte> &bytes) +{ + std::vector<byte> compressed; + byte literalBuffer[MAX_LITERAL_COUNT]; + size_t literalCount = 0; + size_t runCount = 0; + + size_t uncompresseddBufferSize = bytes.size(); + + for (size_t bufferIndex = 0; bufferIndex < uncompresseddBufferSize;) + { + byte symbol = bytes[bufferIndex]; + runCount = 1; + + // Encode run. + while ((runCount < MAX_RUN_COUNT) && + (bytes[bufferIndex + runCount] == symbol) && + (runCount < (uncompresseddBufferSize - bufferIndex))) + { + ++runCount; + } + + // Maybe we want to encode runs of bigger size than 1. + if ((runCount > 1) || + (literalCount == MAX_LITERAL_COUNT) || + ((bufferIndex == (uncompresseddBufferSize - 1)) && literalCount > 0)) + { + // Write literal buffer. + byte literalCountBYTE = (byte)literalCount; + always_assert(literalCountBYTE == literalCount); + + compressed.push_back(literalCountBYTE); + for (size_t literalBufferIndex = 0; literalBufferIndex < literalCount; literalBufferIndex++) + { + compressed.push_back(literalBuffer[literalBufferIndex]); + } + literalCount = 0; + + // Write run sequence. + byte runCountBYTE = (byte)runCount; + always_assert(runCountBYTE == runCount); + compressed.push_back(runCountBYTE); + compressed.push_back(symbol); + + bufferIndex += runCount; + } + else + { + // Encode literal symbol. + literalBuffer[literalCount++] = symbol; + ++bufferIndex; + } + } + + return compressed; +} + +// Decode Run-Length encoded bytes. +std::vector<byte> rle_decode(const std::vector<byte> &compressed) +{ + std::vector<byte> uncompressed; + uncompressed.reserve(compressed.size()); + + size_t compressedBufferSize = compressed.size(); + size_t bufferIndex = 0; + + byte literalCount, runCount, runSymbol; + while (bufferIndex < compressedBufferSize) + { + literalCount = compressed[bufferIndex++]; + while (literalCount--) + { + uncompressed.push_back(compressed[bufferIndex++]); + } + + runCount = compressed[bufferIndex++]; + runSymbol = compressed[bufferIndex++]; + while (runCount--) + { + uncompressed.push_back(runSymbol); + } + } + + return uncompressed; +} + +std::vector<byte> lz_encode(const std::vector<byte> &bytes) +{ + std::vector<byte> compressed; + byte literalBuffer[MAX_LITERAL_COUNT]; + size_t literalCount = 0; + + size_t uncompresseddBufferSize = bytes.size(); + + for (size_t bufferIndex = 0; bufferIndex <= uncompresseddBufferSize;) + { + size_t bestRun = 0; + size_t bestDistance = 0; + + if (bufferIndex < uncompresseddBufferSize) + { + //TODO: In future we really need to upgrade MAX_LOOKBACK_COUNT to 16 or 32 bits. + size_t windowStartIndex = bufferIndex - (bufferIndex > MAX_LOOKBACK_COUNT ? MAX_LOOKBACK_COUNT : bufferIndex); + size_t windowEndIndex = windowStartIndex + ((bufferIndex - windowStartIndex) > MAX_RUN_COUNT) ? MAX_RUN_COUNT : bufferIndex - windowStartIndex; + for (size_t windowIndex = windowStartIndex; windowIndex < bufferIndex; windowIndex++) + { + size_t testIndex = bufferIndex; + size_t windowTestIndex = windowIndex; + size_t testRun = 0; + + while ((windowTestIndex < windowEndIndex) && bytes[testIndex++] == bytes[windowTestIndex++]) + { + ++testRun; + } + + if (bestRun < testRun) + { + bestRun = testRun; + bestDistance = bufferIndex - windowIndex; + } + } + } + + // Maybe we want to encode runs of bigger size than 1. + bool writeRun = false; + + if (literalCount > 0) + { + writeRun = bestRun > 4; + } + else + { + writeRun = bestRun > 2; + } + + if ((writeRun) || + (literalCount == MAX_LITERAL_COUNT) || + ((bufferIndex == uncompresseddBufferSize) && literalCount > 0)) + { + // Write literal buffer. + byte literalCountBYTE = (byte)literalCount; + always_assert(literalCountBYTE == literalCount); + + if (literalCountBYTE > 0) + { + compressed.push_back(literalCountBYTE); + compressed.push_back(0); + for (size_t literalBufferIndex = 0; literalBufferIndex < literalCount; literalBufferIndex++) + { + compressed.push_back(literalBuffer[literalBufferIndex]); + } + literalCount = 0; + } + + if (writeRun) + { + // Write run sequence. + byte bestRunBYTE = (byte)bestRun; + always_assert(bestRunBYTE == bestRun); + byte bestDistanceBYTE = bestDistance; + always_assert(bestDistanceBYTE == bestDistance); + + compressed.push_back(bestRunBYTE); + compressed.push_back(bestDistanceBYTE); + + bufferIndex += bestRun; + } + } + else + { + // Encode literal symbol. + literalBuffer[literalCount++] = bytes[bufferIndex]; + ++bufferIndex; + } + } + + return compressed; +} + +std::vector<byte> lz_decode(std::vector<byte> &compressed) +{ + std::vector<byte> uncompressed; + uncompressed.reserve(compressed.size()); + + size_t compressedBufferSize = compressed.size(); + size_t bufferIndex = 0; + + //int count; + byte count, copyDistance; + while (bufferIndex < compressedBufferSize) + { + count = compressed[bufferIndex++]; + copyDistance = compressed[bufferIndex++]; + byte *copyPtr = vecUtil::last_element_pointer(&uncompressed) - ((int)copyDistance - 1); + if (copyDistance == 0) + { + copyPtr = compressed.data() + bufferIndex; + bufferIndex += count; + } + + while (count--) + { + uncompressed.push_back(*copyPtr++); + } + + // Version 2. + /* + if (copyDistance == 0) + { + while (count--) + { + uncompressed.push_back(compressed[bufferIndex++]); + } + } + else + { + size_t source = (uncompressed.size() - 1) - copyDistance; + while (count--) + { + uncompressed.push_back(uncompressed[source++]); + } + } + */ + + //TODO: Maybe it would be better to use pairs to encode like in RLE. + } + + return uncompressed; +} + +void comp_test() +{ + std::vector<byte> data = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 20, 30, 40, 45, 48, 46, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 0, 0, 1, 2, 3, 8, 8, 8, 8, 9, 6, 5, 4}; + + // 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 20 30 40 45 48 46 50 50 50 50 50 50 50 50 50 50 0 0 1 2 3 8 8 8 8 9 6 5 4 + // 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 20 30 40 45 48 46 50 50 50 50 50 50 50 50 50 50 0 0 1 2 3 8 8 8 8 9 6 5 4 + + auto compressed = lz_encode(data); + auto uncompressed = lz_decode(compressed); + + bool same = vecUtil::vector_eq(data, uncompressed); + + always_assert(same && "Error in compression!"); +} \ No newline at end of file diff --git a/czi-format/czi-parser/compression/rle.h b/czi-format/czi-parser/compression/rle.h deleted file mode 100644 index 96d5f1d698af1c026d6df6408be062f3a63ee3eb..0000000000000000000000000000000000000000 --- a/czi-format/czi-parser/compression/rle.h +++ /dev/null @@ -1,115 +0,0 @@ -#pragma once -#include "../custom_types.h" -#include "../utilities/vector_utilities.h" -#include "../image/z_order.h" - -constexpr size_t MAX_LITERAL_COUNT = 255; -constexpr size_t MAX_RUN_COUNT = 255; - -inline float compression_ratio(float uncompressedSize, float compressedSize) -{ - return (uncompressedSize / compressedSize); -} - -// Run-Length encode bytes and return compressed bytes. -std::vector<byte> rle_encode(const std::vector<byte> &bytes) -{ - std::vector<byte> compressed; - byte literalBuffer[MAX_LITERAL_COUNT]; - size_t literalCount = 0; - size_t runCount = 0; - - size_t uncompresseddBufferSize = bytes.size(); - - for (size_t bufferIndex = 0; bufferIndex < uncompresseddBufferSize;) - { - byte symbol = bytes[bufferIndex]; - runCount = 1; - - // Encode run. - while ((runCount < MAX_RUN_COUNT) && - (bytes[bufferIndex + runCount] == symbol) && - (runCount < (uncompresseddBufferSize - bufferIndex))) - { - ++runCount; - } - - if ((runCount > 1) || - (literalCount == MAX_LITERAL_COUNT) || - ((bufferIndex == (uncompresseddBufferSize - 1)) && literalCount > 0)) - { - // Write literal buffer. - byte literalCountBYTE = (byte)literalCount; - always_assert(literalCountBYTE == literalCount); - - compressed.push_back(literalCountBYTE); - for (size_t literalBufferIndex = 0; literalBufferIndex < literalCount; literalBufferIndex++) - { - compressed.push_back(literalBuffer[literalBufferIndex]); - } - literalCount = 0; - - // Write run sequence. - byte runCountBYTE = (byte)runCount; - always_assert(runCountBYTE == runCount); - compressed.push_back(runCountBYTE); - compressed.push_back(symbol); - - bufferIndex += runCount; - } - else - { - // Encode literal symbol. - literalBuffer[literalCount++] = symbol; - ++bufferIndex; - } - } - - return compressed; -} - -// Decode Run-Length encoded bytes. -std::vector<byte> rle_decode(const std::vector<byte> &compressed) -{ - std::vector<byte> uncompressed; - uncompressed.reserve(compressed.size()); - - size_t compressedBufferSize = compressed.size(); - size_t bufferIndex = 0; - - byte literalCount, runCount, runSymbol; - while (bufferIndex < compressedBufferSize) - { - literalCount = compressed[bufferIndex++]; - while (literalCount--) - { - uncompressed.push_back(compressed[bufferIndex++]); - } - - runCount = compressed[bufferIndex++]; - runSymbol = compressed[bufferIndex++]; - while (runCount--) - { - uncompressed.push_back(runSymbol); - } - } - - return uncompressed; -} - -void rle_test() -{ - std::vector<byte> data = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 20, 30, 40, 45, 48, 46, 50, 50, 50, 50, 50, 50, 50, 50, 50, - 50, 0, 0, 1, 2, 3, 8, 8, 8, 8, 9, 6, 5, 4}; - - // 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 30, 40, 45, 48, 46, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 0, 0, 1, 2, 3, 8, 8, 8, 8, 9, 6, 5, 4 - // 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 30, 40, 45, 48, 46, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 0, 0, 1, 2, 3, 8, 8, 8, 8, 9, 6, 5, 4 - - auto compressed = rle_encode(data); - auto uncompressed = rle_decode(compressed); - - bool same = vecUtil::vector_eq(data, uncompressed); - - always_assert(same); -} \ No newline at end of file diff --git a/czi-format/czi-parser/custom_types.h b/czi-format/czi-parser/custom_types.h index b8862bb26282cd2b0dbf4277c5881f10e703a6c4..f35dae1254fa46aec6f30b94cc22a4ba6d4a73ba 100644 --- a/czi-format/czi-parser/custom_types.h +++ b/czi-format/czi-parser/custom_types.h @@ -13,6 +13,7 @@ constexpr bool NOT_IMPLEMENTED_YET = false; #define MUST_BE_IMPLEMENTED always_assert(false); +#define INVALID_CASE always_assert(false && "Wrong/missing case in switch statement."); typedef unsigned char byte; typedef unsigned short ushort; diff --git a/czi-format/czi-parser/czi_file.cpp b/czi-format/czi-parser/czi_file.cpp index 5f8273828d5cffdebe3ab675f0fce92c05baf0ed..2d03417fc319a8a369a5f9a092381b7ddcfde7a8 100644 --- a/czi-format/czi-parser/czi_file.cpp +++ b/czi-format/czi-parser/czi_file.cpp @@ -409,9 +409,21 @@ void CziFile::dump_image_data(const std::string &baseName) const } } -void CziFile::test_rle_encode() const +void CziFile::test_compression(CompressionMethod method, bool verbose) const { - float dataSize, dataZSize, rleDataSize, rleDataZSize, ratio, ratioZ; + switch (method) + { + case CompressionMethod_LZ: + printf("Selected compression: LZ\n"); + break; + case CompressionMethod_RLE: + printf("Selected compression: RLE\n"); + break; + default: + INVALID_CASE; + return; + } + float dataSize, dataZSize, compressedDataSize, compressedDataZSize, ratio, ratioZ; float overall = 0; float overallZ = 0; @@ -423,32 +435,55 @@ void CziFile::test_rle_encode() const auto data = get_image_data(i, false); auto dataZ = get_image_data(i, true); - auto rle_data = rle_encode(data); - auto rle_dataZ = rle_encode(dataZ); + std::vector<byte> compressedData; + std::vector<byte> compressedDataZ; + + switch (method) + { + case CompressionMethod_LZ: + { + compressedData = lz_encode(data); + compressedDataZ = lz_encode(dataZ); + } + break; + case CompressionMethod_RLE: + { + compressedData = rle_encode(data); + compressedDataZ = rle_encode(dataZ); + } + break; + default: + INVALID_CASE; + return; + } dataSize = (float)data.size(); dataZSize = (float)dataZ.size(); - rleDataSize = (float)rle_data.size(); - rleDataZSize = (float)rle_dataZ.size(); + compressedDataSize = (float)compressedData.size(); + compressedDataZSize = (float)compressedDataZ.size(); - ratio = compression_ratio(dataSize, rleDataSize); - ratioZ = compression_ratio(dataZSize, rleDataZSize); + ratio = compression_ratio(dataSize, compressedDataSize); + ratioZ = compression_ratio(dataZSize, compressedDataZSize); overall += ratio; overallZ += ratioZ; totalSize += dataSize; - totalRleSize += rleDataSize; - totalRleSizeZ += rleDataZSize; - if (ratio > 1 || ratioZ > 1) - printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f\n", (int)i, ratio, ratioZ); - else - printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f\n", (int)i, ratio, ratioZ); + totalRleSize += compressedDataSize; + totalRleSizeZ += compressedDataZSize; + + if (verbose) + { + if (ratio > 1 || ratioZ > 1) + printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f\n", (int)i, ratio, ratioZ); + else + printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f\n", (int)i, ratio, ratioZ); + } } float dataCount = (float)subBlockDirectory.entries.size(); overall /= dataCount; overallZ /= dataCount; - printf("Overall Normal %8f Z-Ordered: %8f\n", overall, overallZ); + printf("Overall compression ratios: Normal %8f Z-Ordered: %8f\n", overall, overallZ); printf("Original size: %8lu B Rle size: %8lu B RleZOrder size: %8lu B\n", totalSize, totalRleSize, totalRleSizeZ); } \ No newline at end of file diff --git a/czi-format/czi-parser/czi_file.h b/czi-format/czi-parser/czi_file.h index 05d0a9d0df34ad9faccd263d256154d176f7586e..3f276703fc497aee1d76d40c8c3bc48ea0625fd2 100644 --- a/czi-format/czi-parser/czi_file.h +++ b/czi-format/czi-parser/czi_file.h @@ -6,7 +6,7 @@ #include "stream/binary_writer.h" #include "image/image_matrix.h" #include "image/z_order.h" -#include "compression/rle.h" +#include "compression/compressors.h" //TODO: Handle multi-file scenarios. class CziFile @@ -48,8 +48,8 @@ public: void extract_images(const std::string &baseName) const; // Save differences between next images, so 0x1;1x2;... void differences_between_next(const std::string baseName) const; - // Report compression ratios for images if RLE is used, also tries to do RLE in Z order. - void test_rle_encode() const; + // Report compression ratios of images in normal and Z order. + void test_compression(CompressionMethod method, bool verbose) const; }; #include "czi_file.cpp" \ No newline at end of file diff --git a/czi-format/czi-parser/main.cpp b/czi-format/czi-parser/main.cpp index 132d6fe8c21fbd33a7450ae36f7aff20e97d9c67..466176b1ce25586c8ef2e7412254d5c2f34d52dd 100644 --- a/czi-format/czi-parser/main.cpp +++ b/czi-format/czi-parser/main.cpp @@ -1,45 +1,64 @@ #include "czi_parser.h" #include "file_system.h" -#include "compression/rle.h" #include "utilities/args.hxx" int main(int argc, char **argv) { - args::ArgumentParser argParser("CZI file tools", "Optional arguments are in [ ] necessary in < >."); + args::ArgumentParser argParser("CZI file tools", "Optional arguments are in `[]` necessary in `<>`."); + // Groups args::Group cziFileGroup(argParser, "CZI file input - necessary.", args::Group::Validators::All); - args::Group mainMethodGroup(argParser, "Main method - what to do with parsed file", args::Group::Validators::AtMostOne); - args::Group parserOptionsGroup(argParser, "CZI parser options", args::Group::Validators::DontCare); - args::Group optionsGroup(argParser, "Program options", args::Group::Validators::DontCare); + args::Group mainMethodGroup(argParser, "Methods:", args::Group::Validators::AtMostOne); + args::Group optionsGroup(argParser, "Program options:", args::Group::Validators::DontCare); + args::Group compressionGroup(argParser, "Avaible compressions", args::Group::Validators::Xor); + // Main option, has to be set. Czi file. args::ValueFlag<std::string> cziFile(cziFileGroup, "czi file", "CZI file to load.", {'i', "input"}); + // Program methods. args::HelpFlag helpMethod(mainMethodGroup, "help", "Print help", {'h', "help"}); args::Flag reportMethod(mainMethodGroup, "report", "Basic information about CZI file. [verbose]", {"report"}); + args::Flag compressionTestMethod(mainMethodGroup, "compression-test", "Compress subblocks and output compression ratios <compression method>", {"compression-test"}); - args::Flag dontParseMetadataOption(parserOptionsGroup, "no metadata", "Dont read metadata byte", {"no-metadata"}); - + // Options + args::Flag dontParseMetadataOption(optionsGroup, "no metadata", "Dont read metadata byte", {"no-metadata"}); args::Flag verboseOption(optionsGroup, "verbose", "Extend output of method", {'v', "verbose"}); + args::Flag rleCompressionOption(compressionGroup, "RLE", "RLE compression", {"rle"}); + args::Flag lzCompressionOption(compressionGroup, "LZ", "LZ compression", {"lz"}); try { + argParser.helpParams.width *= 2; argParser.ParseCLI(argc, argv); } - catch (args::Help) + catch (args::Help &) { std::cout << argParser; return 0; } - catch (args::ValidationError e) + catch (args::ValidationError &e) { - std::cout << e.what() << std::endl; - std::cout << argParser; + printf(RED "%s\n" RESET, e.what()); return 1; } - catch (args::ParseError pe) + catch (args::ParseError &pe) { - std::cout << pe.what() << std::endl; - std::cout << argParser; + printf(RED "%s\n" RESET, pe.what()); return 1; } + + CziParser parser(dontParseMetadataOption.Get()); + auto parsedFile = parser.parse_czi_file(cziFile.Get()); + + // Test compression method. + if (compressionTestMethod) + { + CompressionMethod cm = CompressionMethod_RLE; + + if (lzCompressionOption) + cm = CompressionMethod_LZ; + + parsedFile.test_compression(cm, verboseOption.Matched()); + } + /* std::string cziFile = (argc > 1) ? argv[1] : "/home/mor0146/gitlab/data_project/czi-format/data/CZT-Stack-Anno.czi"; //"/home/mor0146/gitlab/data_project/czi-format/data/m2/exampleSingleChannel.czi"; if (cziFile == "-v" || cziFile == "--version") diff --git a/czi-format/czi-parser/utilities/args.hxx b/czi-format/czi-parser/utilities/args.hxx index 655718fe279848ca9d407fc6bda292a4c4f46c6f..62bbfb3bccd9a0dbba2f993cba01b606d1b3d944 100644 --- a/czi-format/czi-parser/utilities/args.hxx +++ b/czi-format/czi-parser/utilities/args.hxx @@ -1761,7 +1761,7 @@ namespace args { parserCoroutine(coro.Parser()); } - catch (args::SubparserError) + catch (args::SubparserError&) { } #else diff --git a/czi-format/czi-parser/utilities/vector_utilities.h b/czi-format/czi-parser/utilities/vector_utilities.h index dee2a9cbd3c325754ac5ffebfd7f3ee1f3cedf14..5a6a2b22c275d81c2b045123893ebb5c6e09a813 100644 --- a/czi-format/czi-parser/utilities/vector_utilities.h +++ b/czi-format/czi-parser/utilities/vector_utilities.h @@ -31,4 +31,10 @@ inline void vector_insert_at(std::vector<T> &vecToInsert, const std::vector<T> & } } +template <typename T> +inline T *last_element_pointer(std::vector<T> *vector) +{ + return (vector->empty() ? vector->data() : &vector->back()); +} + }; // namespace vecUtil \ No newline at end of file