From 37b14afa986a91f81cca5407bb67605ee039c2ef Mon Sep 17 00:00:00 2001 From: Vojtech Moravec <vojtech.moravec.st@vsb.cz> Date: Sat, 15 Feb 2020 11:35:45 +0100 Subject: [PATCH] Fix bug in VQ and move isZeroVector to VQ. The bug was caused by LBG algorithm modifying the underlaying vectors. --- .../vector/LBGVectorQuantizer.java | 40 ++----------- .../quantization/vector/VectorQuantizer.java | 59 +++++++++++++++---- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java b/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java index 98dd5ab..6ccf86e 100644 --- a/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java +++ b/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java @@ -26,7 +26,7 @@ public class LBGVectorQuantizer { this.trainingVectors = new TrainingVector[vectors.length]; for (int i = 0; i < vectors.length; i++) { - trainingVectors[i] = new TrainingVector(vectors[i]); + trainingVectors[i] = new TrainingVector(Arrays.copyOf(vectors[i], vectors[i].length)); } this.vectorSize = vectors[0].length; @@ -245,7 +245,7 @@ public class LBGVectorQuantizer { } // We always want to carry zero vector to next iteration. - if (isZeroVector(entryToSplit.getVector())) { + if (VectorQuantizer.isZeroVector(entryToSplit.getVector())) { // Use zero vector in next iteration. newCodebook[cbIndex++] = entryToSplit; @@ -261,7 +261,7 @@ public class LBGVectorQuantizer { continue; } - if (isZeroVector(prtV)) { + if (VectorQuantizer.isZeroVector(prtV)) { // Zero perturbation vector can't create two different entries. // The original entry is going to be moved to the next codebook with the new // random entry, which will get improved in the LBG algorithm. @@ -321,36 +321,6 @@ public class LBGVectorQuantizer { return randomVector; } - /** - * Check whether all vector elements are equal to 0.0 - * - * @param vector Vector array. - * @return True if all elements are zeros. - */ - private boolean isZeroVector(final double[] vector) { - for (final double value : vector) { - if (value != 0.0) { - return false; - } - } - return true; - } - - /** - * Check whether all vector elements are equal to 0 - * - * @param vector Vector array. - * @return True if all elements are zeros. - */ - private boolean isZeroVector(final int[] vector) { - for (final double value : vector) { - if (value != 0.0) { - return false; - } - } - return true; - } - /** * Execute the LBG algorithm with default epsilon value. @@ -657,7 +627,9 @@ public class LBGVectorQuantizer { int largestEntrySize = codebook[emptyEntryIndex].getVectorCount(); // NOTE(Moravec): We can't select random training vector, because zero vector would create another zero vector. for (int i = 0; i < codebook.length; i++) { - if ((codebook[i].getVectorCount() > largestEntrySize) && !isZeroVector(codebook[i].getVector())) { + if ((codebook[i].getVectorCount() > largestEntrySize) && + !VectorQuantizer.isZeroVector(codebook[i].getVector())) { + largestEntryIndex = i; largestEntrySize = codebook[i].getVectorCount(); } diff --git a/src/main/java/azgracompress/quantization/vector/VectorQuantizer.java b/src/main/java/azgracompress/quantization/vector/VectorQuantizer.java index 22ad150..44318aa 100644 --- a/src/main/java/azgracompress/quantization/vector/VectorQuantizer.java +++ b/src/main/java/azgracompress/quantization/vector/VectorQuantizer.java @@ -2,6 +2,7 @@ package azgracompress.quantization.vector; public class VectorQuantizer { + private final VectorDistanceMetric metric = VectorDistanceMetric.Euclidean; private final CodebookEntry[] codebook; private final int vectorSize; @@ -12,7 +13,7 @@ public class VectorQuantizer { public int[] quantize(final int[] dataVector) { assert (dataVector.length > 0 && dataVector.length % vectorSize == 0) : "Wrong vector size"; - final CodebookEntry closestEntry = findClosestCodebookEntry(dataVector, VectorDistanceMetric.Euclidean); + final CodebookEntry closestEntry = findClosestCodebookEntry(dataVector, metric); return closestEntry.getVector(); } @@ -22,8 +23,7 @@ public class VectorQuantizer { if (workerCount == 1) { for (int vectorIndex = 0; vectorIndex < dataVectors.length; vectorIndex++) { - final CodebookEntry closestEntry = findClosestCodebookEntry(dataVectors[vectorIndex], - VectorDistanceMetric.Euclidean); + final CodebookEntry closestEntry = findClosestCodebookEntry(dataVectors[vectorIndex], metric); result[vectorIndex] = closestEntry.getVector(); } } else { @@ -48,8 +48,7 @@ public class VectorQuantizer { if (maxWorkerCount == 1) { for (int vectorIndex = 0; vectorIndex < dataVectors.length; vectorIndex++) { - indices[vectorIndex] = findClosestCodebookEntryIndex(dataVectors[vectorIndex], - VectorDistanceMetric.Euclidean); + indices[vectorIndex] = findClosestCodebookEntryIndex(dataVectors[vectorIndex], metric); } } else { // Cap the worker count on 8 @@ -63,8 +62,7 @@ public class VectorQuantizer { workers[wId] = new Thread(() -> { for (int vectorIndex = fromIndex; vectorIndex < toIndex; vectorIndex++) { - indices[vectorIndex] = findClosestCodebookEntryIndex(dataVectors[vectorIndex], - VectorDistanceMetric.Euclidean); + indices[vectorIndex] = findClosestCodebookEntryIndex(dataVectors[vectorIndex], metric); } }); @@ -117,7 +115,7 @@ public class VectorQuantizer { } private CodebookEntry findClosestCodebookEntry(final int[] dataVector) { - return findClosestCodebookEntry(dataVector, VectorDistanceMetric.Euclidean); + return findClosestCodebookEntry(dataVector, metric); } private CodebookEntry findClosestCodebookEntry(final int[] dataVector, final VectorDistanceMetric metric) { @@ -125,21 +123,58 @@ public class VectorQuantizer { } private int findClosestCodebookEntryIndex(final int[] dataVector, final VectorDistanceMetric metric) { + boolean closesIsZero = false; double minDist = Double.MAX_VALUE; int closestEntryIndex = 0; - final int codebookSize = codebook.length; - for (int i = 0; i < codebookSize; i++) { - final double dist = distanceBetweenVectors(dataVector, codebook[i].getVector(), metric); + for (int entryIndex = 0; entryIndex < codebook.length; entryIndex++) { + + + final double dist = distanceBetweenVectors(dataVector, codebook[entryIndex].getVector(), metric); if (dist < minDist) { minDist = dist; - closestEntryIndex = i; + closestEntryIndex = entryIndex; + closesIsZero = isZeroVector(codebook[entryIndex].getVector()); } } + + if (closesIsZero) { +// System.out.println("One of zero vectors."); + } return closestEntryIndex; } public CodebookEntry[] getCodebook() { return codebook; } + + /** + * Check whether all vector elements are equal to 0.0 + * + * @param vector Vector array. + * @return True if all elements are zeros. + */ + public static boolean isZeroVector(final double[] vector) { + for (final double value : vector) { + if (value != 0.0) { + return false; + } + } + return true; + } + + /** + * Check whether all vector elements are equal to 0 + * + * @param vector Vector array. + * @return True if all elements are zeros. + */ + public static boolean isZeroVector(final int[] vector) { + for (final double value : vector) { + if (value != 0.0) { + return false; + } + } + return true; + } } -- GitLab