Skip to content
Snippets Groups Projects
compressors.h 12.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • theazgra's avatar
    theazgra committed
    #pragma once
    #include "../custom_types.h"
    #include "../utilities/vector_utilities.h"
    #include "../image/z_order.h"
    
    
    namespace library_zlib
    {
    #include <zlib.h>
    };
    
    
    theazgra's avatar
    theazgra committed
    namespace library_lzma
    {
    #include <lzma.h>
    };
    
    
    theazgra's avatar
    theazgra committed
    enum CompressionMethod
    {
        CompressionMethod_RLE,
    
        CompressionMethod_LZ,
    
    theazgra's avatar
    theazgra committed
        CompressionMethod_GZIP,
        CompressionMethod_LZMA,
    };
    
    struct CompressionSettings
    {
        size_t lzmaPreset = LZMA_PRESET_DEFAULT; //LZMA_PRESET_EXTREME;
        bool lzmaExtreme = false;
    
    };
    
    struct CompressionResult
    {
        size_t originalSize;
        size_t compressedSize;
        float compressionRatio;
        float percentageOfOriginalSize;
    
        CompressionResult()
        {
            originalSize = 0;
            compressedSize = 0;
            compressionRatio = 0.0f;
            percentageOfOriginalSize = 0.0f;
        }
    
        void divide(float x)
        {
            originalSize /= x;
            compressedSize /= x;
            compressionRatio /= x;
            percentageOfOriginalSize /= x;
        }
    
    theazgra's avatar
    theazgra committed
    };
    
    constexpr size_t MAX_LITERAL_COUNT = 255;
    constexpr size_t MAX_RUN_COUNT = 255;
    constexpr size_t MAX_LOOKBACK_COUNT = 255;
    
    inline float compression_ratio(float uncompressedSize, float compressedSize)
    {
        return (uncompressedSize / compressedSize);
    }
    
    
    theazgra's avatar
    theazgra committed
    ByteArray gzip_encode(const std::vector<byte> &data, CompressionSettings settings)
    
    {
        size_t compressedSize = library_zlib::compressBound(data.size());
        ByteArray compressedBuffer;
        // Maybe try reserve or normal array.
        compressedBuffer.resize(compressedSize);
    
        //int compressionResult = library_zlib::compress(compressedBuffer.data(), &compressedSize, data.data(), data.size());
        int compressionResult = library_zlib::compress2(compressedBuffer.data(), &compressedSize, data.data(), data.size(), Z_BEST_COMPRESSION);
    
        switch (compressionResult)
        {
        case Z_OK:
            break;
        case Z_MEM_ERROR:
            printf(RED "There wasn't enaugh memory!\n" RESET);
            break;
        case Z_BUF_ERROR:
            printf(RED "Output buffer was too small!\n" RESET);
            break;
        default:
            INVALID_CASE;
        }
        always_assert(compressionResult == Z_OK);
    
        ByteArray actualCompressedData(compressedBuffer.begin(), compressedBuffer.begin() + compressedSize);
    
        always_assert(actualCompressedData.size() == compressedSize);
    
        return actualCompressedData;
    }
    
    
    theazgra's avatar
    theazgra committed
    ByteArray lzma_encode(const std::vector<byte> &data, CompressionSettings settings)
    {
        size_t maximumSize = library_lzma::lzma_stream_buffer_bound(data.size());
        ByteArray buffer;
        buffer.resize(maximumSize);
        size_t finalSize = 0;
        auto compressionResult = library_lzma::lzma_easy_buffer_encode(settings.lzmaPreset, library_lzma::LZMA_CHECK_NONE,
                                                                       nullptr, data.data(), data.size(),
                                                                       buffer.data(), &finalSize, maximumSize);
        switch (compressionResult)
        {
        case library_lzma::LZMA_MEM_ERROR:
            printf(RED "Unable to allocate memory!\n" RESET);
            break;
        case library_lzma::LZMA_MEMLIMIT_ERROR:
            printf(RED "Memory limit was reached!\n" RESET);
            break;
        case library_lzma::LZMA_BUF_ERROR:
            printf(RED "Cannot consume input buffer!\n" RESET);
            break;
        case library_lzma::LZMA_PROG_ERROR:
            printf(RED "Wrong arguments!\n" RESET);
            break;
            // To make GCC with -Wall happy.
        case library_lzma::LZMA_OK:
        case library_lzma::LZMA_STREAM_END:
        case library_lzma::LZMA_NO_CHECK:
        case library_lzma::LZMA_UNSUPPORTED_CHECK:
        case library_lzma::LZMA_GET_CHECK:
        case library_lzma::LZMA_FORMAT_ERROR:
        case library_lzma::LZMA_OPTIONS_ERROR:
        case library_lzma::LZMA_DATA_ERROR:
            break;
        }
        always_assert(compressionResult == library_lzma::LZMA_OK);
    
        ByteArray result(buffer.begin(), buffer.begin() + finalSize);
        always_assert(result.size() == finalSize);
    
        return result;
    }
    
    
    theazgra's avatar
    theazgra committed
    // Run-Length encode bytes and return compressed bytes.
    std::vector<byte> rle_encode(const std::vector<byte> &bytes)
    {
        std::vector<byte> compressed;
        byte literalBuffer[MAX_LITERAL_COUNT];
        size_t literalCount = 0;
        size_t runCount = 0;
    
        size_t uncompresseddBufferSize = bytes.size();
    
        for (size_t bufferIndex = 0; bufferIndex < uncompresseddBufferSize;)
        {
            byte symbol = bytes[bufferIndex];
            runCount = 1;
    
            // Encode run.
            while ((runCount < MAX_RUN_COUNT) &&
                   (bytes[bufferIndex + runCount] == symbol) &&
                   (runCount < (uncompresseddBufferSize - bufferIndex)))
            {
                ++runCount;
            }
    
            // Maybe we want to encode runs of bigger size than 1.
            if ((runCount > 1) ||
                (literalCount == MAX_LITERAL_COUNT) ||
                ((bufferIndex == (uncompresseddBufferSize - 1)) && literalCount > 0))
            {
                // Write literal buffer.
                byte literalCountBYTE = (byte)literalCount;
                always_assert(literalCountBYTE == literalCount);
    
                compressed.push_back(literalCountBYTE);
                for (size_t literalBufferIndex = 0; literalBufferIndex < literalCount; literalBufferIndex++)
                {
                    compressed.push_back(literalBuffer[literalBufferIndex]);
                }
                literalCount = 0;
    
                // Write run sequence.
                byte runCountBYTE = (byte)runCount;
                always_assert(runCountBYTE == runCount);
                compressed.push_back(runCountBYTE);
                compressed.push_back(symbol);
    
                bufferIndex += runCount;
            }
            else
            {
                // Encode literal symbol.
                literalBuffer[literalCount++] = symbol;
                ++bufferIndex;
            }
        }
    
        return compressed;
    }
    
    // Decode Run-Length encoded bytes.
    std::vector<byte> rle_decode(const std::vector<byte> &compressed)
    {
        std::vector<byte> uncompressed;
        uncompressed.reserve(compressed.size());
    
        size_t compressedBufferSize = compressed.size();
        size_t bufferIndex = 0;
    
        byte literalCount, runCount, runSymbol;
        while (bufferIndex < compressedBufferSize)
        {
            literalCount = compressed[bufferIndex++];
            while (literalCount--)
            {
                uncompressed.push_back(compressed[bufferIndex++]);
            }
    
            runCount = compressed[bufferIndex++];
            runSymbol = compressed[bufferIndex++];
            while (runCount--)
            {
                uncompressed.push_back(runSymbol);
            }
        }
    
        return uncompressed;
    }
    
    std::vector<byte> lz_encode(const std::vector<byte> &bytes)
    {
        std::vector<byte> compressed;
        byte literalBuffer[MAX_LITERAL_COUNT];
        size_t literalCount = 0;
    
        size_t uncompresseddBufferSize = bytes.size();
    
        for (size_t bufferIndex = 0; bufferIndex <= uncompresseddBufferSize;)
        {
            size_t bestRun = 0;
            size_t bestDistance = 0;
    
            if (bufferIndex < uncompresseddBufferSize)
            {
                //TODO: In future we really need to upgrade MAX_LOOKBACK_COUNT to 16 or 32 bits.
                size_t windowStartIndex = bufferIndex - (bufferIndex > MAX_LOOKBACK_COUNT ? MAX_LOOKBACK_COUNT : bufferIndex);
                size_t windowEndIndex = windowStartIndex + ((bufferIndex - windowStartIndex) > MAX_RUN_COUNT) ? MAX_RUN_COUNT : bufferIndex - windowStartIndex;
                for (size_t windowIndex = windowStartIndex; windowIndex < bufferIndex; windowIndex++)
                {
                    size_t testIndex = bufferIndex;
                    size_t windowTestIndex = windowIndex;
                    size_t testRun = 0;
    
                    while ((windowTestIndex < windowEndIndex) && bytes[testIndex++] == bytes[windowTestIndex++])
                    {
                        ++testRun;
                    }
    
                    if (bestRun < testRun)
                    {
                        bestRun = testRun;
                        bestDistance = bufferIndex - windowIndex;
                    }
                }
            }
    
            // Maybe we want to encode runs of bigger size than 1.
            bool writeRun = false;
    
            if (literalCount > 0)
            {
                writeRun = bestRun > 4;
            }
            else
            {
                writeRun = bestRun > 2;
            }
    
            if ((writeRun) ||
                (literalCount == MAX_LITERAL_COUNT) ||
                ((bufferIndex == uncompresseddBufferSize) && literalCount > 0))
            {
                // Write literal buffer.
                byte literalCountBYTE = (byte)literalCount;
                always_assert(literalCountBYTE == literalCount);
    
                if (literalCountBYTE > 0)
                {
                    compressed.push_back(literalCountBYTE);
                    compressed.push_back(0);
                    for (size_t literalBufferIndex = 0; literalBufferIndex < literalCount; literalBufferIndex++)
                    {
                        compressed.push_back(literalBuffer[literalBufferIndex]);
                    }
                    literalCount = 0;
                }
    
                if (writeRun)
                {
                    // Write run sequence.
                    byte bestRunBYTE = (byte)bestRun;
                    always_assert(bestRunBYTE == bestRun);
                    byte bestDistanceBYTE = bestDistance;
                    always_assert(bestDistanceBYTE == bestDistance);
    
                    compressed.push_back(bestRunBYTE);
                    compressed.push_back(bestDistanceBYTE);
    
                    bufferIndex += bestRun;
                }
            }
            else
            {
                // Encode literal symbol.
                literalBuffer[literalCount++] = bytes[bufferIndex];
                ++bufferIndex;
            }
        }
    
        return compressed;
    }
    
    std::vector<byte> lz_decode(std::vector<byte> &compressed)
    {
        std::vector<byte> uncompressed;
        uncompressed.reserve(compressed.size());
    
        size_t compressedBufferSize = compressed.size();
        size_t bufferIndex = 0;
    
        //int count;
        byte count, copyDistance;
        while (bufferIndex < compressedBufferSize)
        {
            count = compressed[bufferIndex++];
            copyDistance = compressed[bufferIndex++];
            byte *copyPtr = vecUtil::last_element_pointer(&uncompressed) - ((int)copyDistance - 1);
            if (copyDistance == 0)
            {
                copyPtr = compressed.data() + bufferIndex;
                bufferIndex += count;
            }
    
            while (count--)
            {
                uncompressed.push_back(*copyPtr++);
            }
    
            // Version 2.
            /*
            if (copyDistance == 0)
            {  
                while (count--)
                {
                    uncompressed.push_back(compressed[bufferIndex++]);
                }
            }
            else
            {
                size_t source = (uncompressed.size() - 1) - copyDistance;
                while (count--)
                {
                    uncompressed.push_back(uncompressed[source++]);
                }
            }
            */
    
            //TODO: Maybe it would be better to use pairs to encode like in RLE.
        }
    
        return uncompressed;
    }
    
    void comp_test()
    {
        std::vector<byte> data = {10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
                                  20, 30, 40, 45, 48, 46, 50, 50, 50, 50, 50, 50, 50, 50, 50,
                                  50, 0, 0, 1, 2, 3, 8, 8, 8, 8, 9, 6, 5, 4};
    
        // 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 20 30 40 45 48 46 50 50 50 50 50 50 50 50 50 50 0 0 1 2 3 8 8 8 8 9 6 5 4
        // 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 20 30 40 45 48 46 50 50 50 50 50 50 50 50 50 50 0 0 1 2 3 8 8 8 8 9 6 5 4
    
        auto compressed = lz_encode(data);
        auto uncompressed = lz_decode(compressed);
    
        bool same = vecUtil::vector_eq(data, uncompressed);
    
        always_assert(same && "Error in compression!");
    
    theazgra's avatar
    theazgra committed
    CompressionResult test_compression_method(const ByteArray &data, CompressionMethod method, CompressionSettings settings)
    
    {
        ByteArray compressedData;
        switch (method)
        {
        case CompressionMethod_RLE:
            compressedData = rle_encode(data);
            break;
        case CompressionMethod_LZ:
            compressedData = lz_encode(data);
            break;
        case CompressionMethod_GZIP:
    
    theazgra's avatar
    theazgra committed
            compressedData = gzip_encode(data, settings);
            break;
        case CompressionMethod_LZMA:
            compressedData = lzma_encode(data, settings);
    
            break;
        default:
            INVALID_CASE;
        }
    
        CompressionResult result = {};
        result.originalSize = data.size();
        result.compressedSize = compressedData.size();
        result.compressionRatio = compression_ratio((float)result.originalSize, (float)result.compressedSize);
        result.percentageOfOriginalSize = ((float)result.compressedSize / (float)result.originalSize) * 100.0f;
    
        return result;
    }