diff --git a/README.md b/README.md index 867d5e8ff1a5456e7e496e6fcfd16236cae53097..54cf9efea3019f72b0f49990cc639a8e753f1e2f 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,10 @@ - [ ] Feed binary data to existing compressors and produce results (tables, graphs) - Try both data in their order and in `Z-order` - Compressors to try: - - gZip - - bzip2 - - deflate - - *B3D cuda library* + - [x] gZip (zLib, huffman deflate combination) + - [ ] bzip2 + - [ ] deflate + - [ ] *B3D cuda library* - [ ] Look at *Image difference* - Negative values can be mapped to odd/even numbers. But the difference must be saved in more than one byte. - Save difference in different type than byte (`short`, `uint`, `ulong`) diff --git a/czi-format/czi-parser/CMakeLists.txt b/czi-format/czi-parser/CMakeLists.txt index f6fab4b2aa55f2abeb712e401f546771e6ea836a..d3d9d45051d2a7587b57226aa8584fc4fdbb8e0c 100644 --- a/czi-format/czi-parser/CMakeLists.txt +++ b/czi-format/czi-parser/CMakeLists.txt @@ -20,6 +20,9 @@ endif() find_package (Threads) target_link_libraries (czi-parser ${CMAKE_THREAD_LIBS_INIT}) +find_package(ZLIB) +target_link_libraries(czi-parser ${ZLIB_LIBRARIES}) + set(CPACK_PROJECT_NAME ${PROJECT_NAME}) set(CPACK_PROJECT_VERSION ${PROJECT_VERSION}) include(CPack) \ No newline at end of file diff --git a/czi-format/czi-parser/compression/compressors.h b/czi-format/czi-parser/compression/compressors.h index 69e5975eec12199528775ce55a40020449912613..cb4488dd26313f3b69975da35221a17a5e4230cb 100644 --- a/czi-format/czi-parser/compression/compressors.h +++ b/czi-format/czi-parser/compression/compressors.h @@ -3,10 +3,40 @@ #include "../utilities/vector_utilities.h" #include "../image/z_order.h" +namespace library_zlib +{ +#include <zlib.h> +}; + enum CompressionMethod { CompressionMethod_RLE, - CompressionMethod_LZ + CompressionMethod_LZ, + CompressionMethod_GZIP +}; + +struct CompressionResult +{ + size_t originalSize; + size_t compressedSize; + float compressionRatio; + float percentageOfOriginalSize; + + CompressionResult() + { + originalSize = 0; + compressedSize = 0; + compressionRatio = 0.0f; + percentageOfOriginalSize = 0.0f; + } + + void divide(float x) + { + originalSize /= x; + compressedSize /= x; + compressionRatio /= x; + percentageOfOriginalSize /= x; + } }; constexpr size_t MAX_LITERAL_COUNT = 255; @@ -18,6 +48,38 @@ inline float compression_ratio(float uncompressedSize, float compressedSize) return (uncompressedSize / compressedSize); } +ByteArray gzip_encode(const std::vector<byte> &data) +{ + size_t compressedSize = library_zlib::compressBound(data.size()); + ByteArray compressedBuffer; + // Maybe try reserve or normal array. + compressedBuffer.resize(compressedSize); + + //int compressionResult = library_zlib::compress(compressedBuffer.data(), &compressedSize, data.data(), data.size()); + int compressionResult = library_zlib::compress2(compressedBuffer.data(), &compressedSize, data.data(), data.size(), Z_BEST_COMPRESSION); + + switch (compressionResult) + { + case Z_OK: + break; + case Z_MEM_ERROR: + printf(RED "There wasn't enaugh memory!\n" RESET); + break; + case Z_BUF_ERROR: + printf(RED "Output buffer was too small!\n" RESET); + break; + default: + INVALID_CASE; + } + always_assert(compressionResult == Z_OK); + + ByteArray actualCompressedData(compressedBuffer.begin(), compressedBuffer.begin() + compressedSize); + + always_assert(actualCompressedData.size() == compressedSize); + + return actualCompressedData; +} + // Run-Length encode bytes and return compressed bytes. std::vector<byte> rle_encode(const std::vector<byte> &bytes) { @@ -264,4 +326,31 @@ void comp_test() bool same = vecUtil::vector_eq(data, uncompressed); always_assert(same && "Error in compression!"); -} \ No newline at end of file +} + +CompressionResult test_compression_method(const ByteArray &data, CompressionMethod method) +{ + ByteArray compressedData; + switch (method) + { + case CompressionMethod_RLE: + compressedData = rle_encode(data); + break; + case CompressionMethod_LZ: + compressedData = lz_encode(data); + break; + case CompressionMethod_GZIP: + compressedData = gzip_encode(data); + break; + default: + INVALID_CASE; + } + + CompressionResult result = {}; + result.originalSize = data.size(); + result.compressedSize = compressedData.size(); + result.compressionRatio = compression_ratio((float)result.originalSize, (float)result.compressedSize); + result.percentageOfOriginalSize = ((float)result.compressedSize / (float)result.originalSize) * 100.0f; + + return result; +} diff --git a/czi-format/czi-parser/custom_types.h b/czi-format/czi-parser/custom_types.h index f35dae1254fa46aec6f30b94cc22a4ba6d4a73ba..d29883dc64bc399aea582e99b1173dabc4714e52 100644 --- a/czi-format/czi-parser/custom_types.h +++ b/czi-format/czi-parser/custom_types.h @@ -23,6 +23,8 @@ typedef unsigned long ulong; #define TagType std::string, std::string typedef std::pair<TagType> Tag; +typedef std::vector<byte> ByteArray; + // Terminal colors #define RESET "\033[0m" #define RED "\033[31m" /* Red */ diff --git a/czi-format/czi-parser/czi_file.cpp b/czi-format/czi-parser/czi_file.cpp index 2d03417fc319a8a369a5f9a092381b7ddcfde7a8..40e95c07425c305eec2a6009b88dc1ba03a8cede 100644 --- a/czi-format/czi-parser/czi_file.cpp +++ b/czi-format/czi-parser/czi_file.cpp @@ -411,6 +411,7 @@ void CziFile::dump_image_data(const std::string &baseName) const void CziFile::test_compression(CompressionMethod method, bool verbose) const { + switch (method) { case CompressionMethod_LZ: @@ -419,71 +420,49 @@ void CziFile::test_compression(CompressionMethod method, bool verbose) const case CompressionMethod_RLE: printf("Selected compression: RLE\n"); break; + case CompressionMethod_GZIP: + printf("Selected compression: GZIP (zlib)\n"); + break; default: INVALID_CASE; return; } - float dataSize, dataZSize, compressedDataSize, compressedDataZSize, ratio, ratioZ; - float overall = 0; - float overallZ = 0; - size_t totalSize = 0; - size_t totalRleSize = 0; - size_t totalRleSizeZ = 0; + CompressionResult overallN; + CompressionResult overallZ; + for (size_t i = 0; i < subBlockDirectory.entries.size(); i++) { auto data = get_image_data(i, false); auto dataZ = get_image_data(i, true); - std::vector<byte> compressedData; - std::vector<byte> compressedDataZ; + CompressionResult nResult = test_compression_method(data, method); + CompressionResult zResult = test_compression_method(dataZ, method); - switch (method) - { - case CompressionMethod_LZ: - { - compressedData = lz_encode(data); - compressedDataZ = lz_encode(dataZ); - } - break; - case CompressionMethod_RLE: - { - compressedData = rle_encode(data); - compressedDataZ = rle_encode(dataZ); - } - break; - default: - INVALID_CASE; - return; - } - - dataSize = (float)data.size(); - dataZSize = (float)dataZ.size(); - compressedDataSize = (float)compressedData.size(); - compressedDataZSize = (float)compressedDataZ.size(); + overallN.compressionRatio += nResult.compressionRatio; + overallZ.compressionRatio += zResult.compressionRatio; - ratio = compression_ratio(dataSize, compressedDataSize); - ratioZ = compression_ratio(dataZSize, compressedDataZSize); - - overall += ratio; - overallZ += ratioZ; - - totalSize += dataSize; - totalRleSize += compressedDataSize; - totalRleSizeZ += compressedDataZSize; + overallN.originalSize += data.size(); + overallN.compressedSize += nResult.compressedSize; + overallZ.compressedSize += zResult.compressedSize; + overallN.percentageOfOriginalSize += nResult.percentageOfOriginalSize; + overallZ.percentageOfOriginalSize += zResult.percentageOfOriginalSize; if (verbose) { - if (ratio > 1 || ratioZ > 1) - printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f\n", (int)i, ratio, ratioZ); + if (nResult.compressionRatio > 1.0f || zResult.compressionRatio > 1.0f) + printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f; Size(N): %5.3f%%; Size(Z): %5.3f%%\n", + (int)i, nResult.compressionRatio, zResult.compressionRatio, nResult.percentageOfOriginalSize, zResult.percentageOfOriginalSize); else - printf("Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f\n", (int)i, ratio, ratioZ); + printf(RED "Subblock %-3i Compression ratios: Normal: %8f Z-Ordered: %8f; Size(N): %5.3f%%; Size(Z): %5.3f%%\n" RESET, (int)i, nResult.compressionRatio, zResult.compressionRatio, nResult.percentageOfOriginalSize, zResult.percentageOfOriginalSize); } } float dataCount = (float)subBlockDirectory.entries.size(); - overall /= dataCount; - overallZ /= dataCount; - printf("Overall compression ratios: Normal %8f Z-Ordered: %8f\n", overall, overallZ); - printf("Original size: %8lu B Rle size: %8lu B RleZOrder size: %8lu B\n", totalSize, totalRleSize, totalRleSizeZ); + overallN.divide(dataCount); + overallZ.divide(dataCount); + + printf("Overall compression ratios: Normal %8f Z-Ordered: %8f\n", overallN.compressionRatio, overallZ.compressionRatio); + printf("Original size: %8lu B Compressed size: %8lu B Compressed Z-Order size: %8lu B\n", overallN.originalSize, overallN.compressedSize, overallZ.compressedSize); + printf("Original size (%%): 100%% Compressed: %5.5f%% Compressed Z-Order: %5.5f%%\n", overallN.percentageOfOriginalSize, overallZ.percentageOfOriginalSize); } \ No newline at end of file diff --git a/czi-format/czi-parser/main.cpp b/czi-format/czi-parser/main.cpp index 466176b1ce25586c8ef2e7412254d5c2f34d52dd..e40c3368a177d2fed85fb9c2d72f9ffaf5b27509 100644 --- a/czi-format/czi-parser/main.cpp +++ b/czi-format/czi-parser/main.cpp @@ -23,6 +23,7 @@ int main(int argc, char **argv) args::Flag verboseOption(optionsGroup, "verbose", "Extend output of method", {'v', "verbose"}); args::Flag rleCompressionOption(compressionGroup, "RLE", "RLE compression", {"rle"}); args::Flag lzCompressionOption(compressionGroup, "LZ", "LZ compression", {"lz"}); + args::Flag gzipCompressionOption(compressionGroup, "GZIP", "GZIP (zlib) compression", {"gzip"}); try { @@ -36,12 +37,12 @@ int main(int argc, char **argv) } catch (args::ValidationError &e) { - printf(RED "%s\n" RESET, e.what()); + printf(RED "%s Check help with -h or --help.\n" RESET, e.what()); return 1; } catch (args::ParseError &pe) { - printf(RED "%s\n" RESET, pe.what()); + printf(RED "%s Check help with -h or --help.\n" RESET, pe.what()); return 1; } @@ -55,6 +56,8 @@ int main(int argc, char **argv) if (lzCompressionOption) cm = CompressionMethod_LZ; + else if (gzipCompressionOption) + cm = CompressionMethod_GZIP; parsedFile.test_compression(cm, verboseOption.Matched()); }