From c1d45d0d7c5e2dfbcdbe8b07ec4a73933d3f3d7f Mon Sep 17 00:00:00 2001 From: Vojtech Moravec <vojtech.moravec.st@vsb.cz> Date: Sat, 21 Mar 2020 19:23:10 +0100 Subject: [PATCH] Huffman encoding test. --- .../ScalarQuantizationBenchmark.java | 3 +- .../CompressorDecompressorBase.java | 8 + .../compression/SQImageCompressor.java | 28 +++- .../java/azgracompress/huffman/Huffman.java | 144 ++++++++++++++++++ .../java/azgracompress/io/OutBitStream.java | 16 +- .../scalar/ScalarQuantizationCodebook.java | 2 +- .../quantization/scalar/ScalarQuantizer.java | 4 +- 7 files changed, 192 insertions(+), 13 deletions(-) create mode 100644 src/main/java/azgracompress/huffman/Huffman.java diff --git a/src/main/java/azgracompress/benchmark/ScalarQuantizationBenchmark.java b/src/main/java/azgracompress/benchmark/ScalarQuantizationBenchmark.java index 3a3a5c4..4cf0060 100644 --- a/src/main/java/azgracompress/benchmark/ScalarQuantizationBenchmark.java +++ b/src/main/java/azgracompress/benchmark/ScalarQuantizationBenchmark.java @@ -2,8 +2,7 @@ package azgracompress.benchmark; import azgracompress.U16; import azgracompress.cli.ParsedCliOptions; -import azgracompress.io.IPlaneLoader; -import azgracompress.io.PlaneLoaderFactory; +import azgracompress.data.V3i; import azgracompress.quantization.QTrainIteration; import azgracompress.quantization.QuantizationValueCache; import azgracompress.quantization.scalar.LloydMaxU16ScalarQuantization; diff --git a/src/main/java/azgracompress/compression/CompressorDecompressorBase.java b/src/main/java/azgracompress/compression/CompressorDecompressorBase.java index 1b369aa..15a1790 100644 --- a/src/main/java/azgracompress/compression/CompressorDecompressorBase.java +++ b/src/main/java/azgracompress/compression/CompressorDecompressorBase.java @@ -13,6 +13,14 @@ public abstract class CompressorDecompressorBase { this.codebookSize = (int) Math.pow(2, this.options.getBitsPerPixel()); } + protected int[] createHuffmanSymbols() { + int[] symbols = new int[codebookSize]; + for (int i = 0; i < codebookSize; i++) { + symbols[i] = i; + } + return symbols; + } + protected int[] getPlaneIndicesForCompression() { if (options.hasPlaneIndexSet()) { return new int[]{options.getPlaneIndex()}; diff --git a/src/main/java/azgracompress/compression/SQImageCompressor.java b/src/main/java/azgracompress/compression/SQImageCompressor.java index 62fed9e..be249be 100644 --- a/src/main/java/azgracompress/compression/SQImageCompressor.java +++ b/src/main/java/azgracompress/compression/SQImageCompressor.java @@ -4,10 +4,12 @@ import azgracompress.U16; import azgracompress.cli.ParsedCliOptions; import azgracompress.compression.exception.ImageCompressionException; import azgracompress.data.ImageU16; +import azgracompress.huffman.Huffman; import azgracompress.io.OutBitStream; import azgracompress.io.RawDataIO; import azgracompress.quantization.QuantizationValueCache; import azgracompress.quantization.scalar.LloydMaxU16ScalarQuantization; +import azgracompress.quantization.scalar.ScalarQuantizationCodebook; import azgracompress.quantization.scalar.ScalarQuantizer; import azgracompress.utilities.Stopwatch; @@ -43,11 +45,16 @@ public class SQImageCompressor extends CompressorDecompressorBase implements IIm */ private void writeCodebookToOutputStream(final ScalarQuantizer quantizer, DataOutputStream compressStream) throws ImageCompressionException { - final int[] centroids = quantizer.getCentroids(); + final ScalarQuantizationCodebook codebook = quantizer.getCodebook(); + final int[] centroids = codebook.getCentroids(); + final long[] frequencies = codebook.getSymbolFrequencies(); try { for (final int quantizationValue : centroids) { compressStream.writeShort(quantizationValue); } + for (final long symbolFrequency : frequencies) { + compressStream.writeLong(symbolFrequency); + } } catch (IOException ioEx) { throw new ImageCompressionException("Unable to write codebook to compress stream.", ioEx); } @@ -65,7 +72,8 @@ public class SQImageCompressor extends CompressorDecompressorBase implements IIm private ScalarQuantizer loadQuantizerFromCache() throws ImageCompressionException { QuantizationValueCache cache = new QuantizationValueCache(options.getCodebookCacheFolder()); try { - final int[] quantizationValues = cache.readCachedValues(options.getInputFileInfo().getFilePath(), + + final int[] quantizationValues = cache.readCachedValues(options.getInputFile(), codebookSize); // TODO(Moravec): FIXME the null value. return new ScalarQuantizer(U16.Min, U16.Max, null); @@ -85,7 +93,10 @@ public class SQImageCompressor extends CompressorDecompressorBase implements IIm final boolean hasGeneralQuantizer = options.hasCodebookCacheFolder() || options.hasReferencePlaneIndex(); ScalarQuantizer quantizer = null; + Huffman huffman = null; + final int[] huffmanSymbols = createHuffmanSymbols(); if (options.hasCodebookCacheFolder()) { + // TODO(Moravec): Create huffman. Log("Loading codebook from cache file."); quantizer = loadQuantizerFromCache(); Log("Cached quantizer created."); @@ -98,6 +109,7 @@ public class SQImageCompressor extends CompressorDecompressorBase implements IIm referencePlane = RawDataIO.loadImageU16(options.getInputFile(), options.getImageDimension(), options.getReferencePlaneIndex()); + // TODO(Moravec): Create huffman. } catch (Exception ex) { throw new ImageCompressionException("Unable to load reference plane data.", ex); } @@ -131,15 +143,21 @@ public class SQImageCompressor extends CompressorDecompressorBase implements IIm Log(String.format("Training scalar quantizer from plane %d.", planeIndex)); quantizer = trainScalarQuantizerFromData(plane.getData()); writeCodebookToOutputStream(quantizer, compressStream); - } - assert (quantizer != null); + huffman = new Huffman(huffmanSymbols, quantizer.getCodebook().getSymbolFrequencies()); + huffman.buildHuffmanTree(); + } + assert (quantizer != null) : "Scalar Quantizer wasn't initialized."; + assert (huffman != null) : "Huffman wasn't initialized."; Log("Compressing plane..."); final int[] indices = quantizer.quantizeIntoIndices(plane.getData(), 1); try (OutBitStream outBitStream = new OutBitStream(compressStream, options.getBitsPerPixel(), 2048)) { - outBitStream.write(indices); + for (final int index : indices) { + outBitStream.write(huffman.getCode(index)); + } + //outBitStream.write(indices); } catch (Exception ex) { throw new ImageCompressionException("Unable to write indices to OutBitStream.", ex); } diff --git a/src/main/java/azgracompress/huffman/Huffman.java b/src/main/java/azgracompress/huffman/Huffman.java new file mode 100644 index 0000000..5586088 --- /dev/null +++ b/src/main/java/azgracompress/huffman/Huffman.java @@ -0,0 +1,144 @@ +package azgracompress.huffman; + +import org.jetbrains.annotations.NotNull; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.PriorityQueue; + +public class Huffman { + + class Node implements Comparable<Node> { + private int symbol = -1; + private long symbolFrequency = -1; + + private boolean bit; + private boolean leaf = false; + private double probability = 0.0; + + final Node subNodeA; + final Node subNodeB; + + public Node(final int symbol, final double probability, final long frequency) { + this.symbol = symbol; + this.probability = probability; + this.symbolFrequency = frequency; + subNodeA = null; + subNodeB = null; + this.leaf = true; + } + + public Node(final double probability, Node parentA, Node parentB) { + this.probability = probability; + this.subNodeA = parentA; + this.subNodeB = parentB; + } + + Node traverse(final boolean bit) { + if (subNodeA != null && subNodeA.bit == bit) + return subNodeA; + if (subNodeB != null && subNodeB.bit == bit) + return subNodeB; + + assert (false) : "Corrupted huffman tree"; + return null; + } + + @Override + public int compareTo(@NotNull Huffman.Node otherNode) { + return Double.compare(probability, otherNode.probability); + } + } + + Node root = null; + HashMap<Integer, boolean[]> symbolCodes; + final int[] symbols; + final long[] symbolFrequencies; + + public Huffman(int[] symbols, long[] symbolFrequencies) { + assert (symbols.length == symbolFrequencies.length) : "Array lengths mismatch"; + this.symbols = symbols; + this.symbolFrequencies = symbolFrequencies; + } + + public void buildHuffmanTree() { + PriorityQueue<Node> queue = buildPriorityQueue(); + + + while (queue.size() != 1) { + final Node parentA = queue.poll(); + final Node parentB = queue.poll(); + assert (parentA.probability <= parentB.probability); + assert (parentA != null && parentB != null); + + parentA.bit = true; + parentB.bit = false; + + final double mergedProbabilities = parentA.probability + parentB.probability; + final Node mergedNode = new Node(mergedProbabilities, parentA, parentB); + queue.add(mergedNode); + } + root = queue.poll(); + buildHuffmanCodes(); + } + + private void buildHuffmanCodes() { + symbolCodes = new HashMap<>(symbols.length); + + traverseSymbolCodes(root, new ArrayList<Boolean>()); + } + + private void traverseSymbolCodes(Node currentNode, ArrayList<Boolean> currentCode) { + boolean inLeaf = true; + if (!currentNode.leaf) { + currentCode.add(currentNode.bit); + } + + if (currentNode.subNodeA != null) { + ArrayList<Boolean> codeCopy = new ArrayList<Boolean>(currentCode); + traverseSymbolCodes(currentNode.subNodeA, codeCopy); + inLeaf = false; + } + if (currentNode.subNodeB != null) { + ArrayList<Boolean> codeCopy = new ArrayList<Boolean>(currentCode); + traverseSymbolCodes(currentNode.subNodeB, codeCopy); + inLeaf = false; + } + + if (inLeaf) { + assert (currentNode.leaf); + + boolean[] finalSymbolCode = new boolean[currentCode.size()]; + for (int i = 0; i < finalSymbolCode.length; i++) { + finalSymbolCode[i] = currentCode.get(i); + } + symbolCodes.put(currentNode.symbol, finalSymbolCode); + } + + } + + private PriorityQueue<Node> buildPriorityQueue() { + double totalFrequency = 0.0; + for (final long symbolFrequency : symbolFrequencies) { + totalFrequency += symbolFrequency; + } + + PriorityQueue<Node> queue = new PriorityQueue<>(symbols.length); + + for (int sIndex = 0; sIndex < symbols.length; sIndex++) { + final double symbolProbability = (double) symbolFrequencies[sIndex] / totalFrequency; + queue.add(new Node(symbols[sIndex], symbolProbability, symbolFrequencies[sIndex])); + } + + return queue; + } + + + public boolean[] getCode(final int symbol) { + return symbolCodes.get(symbol); + } + + public Node getRoot() { + return root; + } +} diff --git a/src/main/java/azgracompress/io/OutBitStream.java b/src/main/java/azgracompress/io/OutBitStream.java index 9a46c76..6a840e6 100644 --- a/src/main/java/azgracompress/io/OutBitStream.java +++ b/src/main/java/azgracompress/io/OutBitStream.java @@ -62,8 +62,13 @@ public class OutBitStream implements AutoCloseable { * @param bit True for 1 */ private void writeBit(final int bit) throws IOException { + writeBit(bit > 0); + } + + private void writeBit(final boolean bit) throws IOException { ++bitBufferSize; - if (bit > 0) { + + if (bit) { bitBuffer |= (1 << (8 - bitBufferSize)); } @@ -76,10 +81,14 @@ public class OutBitStream implements AutoCloseable { int bit; for (int shift = 0; shift < bitsPerValue; shift++) { - bit = (value & (1 << shift)); + writeBit(bit); + } + } - //bit = (value & (1 << (31 - shift))); + + public void write(final boolean[] bits) throws IOException { + for (final boolean bit : bits) { writeBit(bit); } } @@ -92,6 +101,7 @@ public class OutBitStream implements AutoCloseable { /** * Flush the bitsteam on close. + * * @throws Exception when flush fails. */ @Override diff --git a/src/main/java/azgracompress/quantization/scalar/ScalarQuantizationCodebook.java b/src/main/java/azgracompress/quantization/scalar/ScalarQuantizationCodebook.java index 82dc681..94378bc 100644 --- a/src/main/java/azgracompress/quantization/scalar/ScalarQuantizationCodebook.java +++ b/src/main/java/azgracompress/quantization/scalar/ScalarQuantizationCodebook.java @@ -28,7 +28,7 @@ public class ScalarQuantizationCodebook { return centroids; } - public long[] getIndicesFrequency() { + public long[] getSymbolFrequencies() { return indexFrequencies; } diff --git a/src/main/java/azgracompress/quantization/scalar/ScalarQuantizer.java b/src/main/java/azgracompress/quantization/scalar/ScalarQuantizer.java index 58c80f1..1caf067 100644 --- a/src/main/java/azgracompress/quantization/scalar/ScalarQuantizer.java +++ b/src/main/java/azgracompress/quantization/scalar/ScalarQuantizer.java @@ -93,7 +93,7 @@ public class ScalarQuantizer { return mse; } - public int[] getCentroids() { - return codebook.getCentroids(); + public ScalarQuantizationCodebook getCodebook() { + return codebook; } } -- GitLab