From 235b181cd7f514a2cd9da2c7816b50a1a2771563 Mon Sep 17 00:00:00 2001
From: Vojtech Moravec <vojtech.moravec.st@vsb.cz>
Date: Tue, 18 Feb 2020 19:47:15 +0100
Subject: [PATCH] Fix VQ for images with small amount of unique vectors.

---
 .../vector/LBGVectorQuantizer.java            | 145 ++++++++++++++++--
 .../quantization/vector/TrainingVector.java   |   6 +
 2 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java b/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java
index 4adf3f0..c05137b 100644
--- a/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java
+++ b/src/main/java/azgracompress/quantization/vector/LBGVectorQuantizer.java
@@ -4,6 +4,7 @@ import azgracompress.U16;
 import azgracompress.utilities.Stopwatch;
 import azgracompress.utilities.Utils;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Random;
 
@@ -14,6 +15,8 @@ public class LBGVectorQuantizer {
     private final int codebookSize;
     private final int workerCount;
 
+    private int uniqueVectorCount = 0;
+    private ArrayList<TrainingVector> uniqueTrainingVectors;
     private final TrainingVector[] trainingVectors;
     private final VectorDistanceMetric metric = VectorDistanceMetric.Euclidean;
 
@@ -24,16 +27,72 @@ public class LBGVectorQuantizer {
         assert (vectors.length > 0) : "No training vectors provided";
 
         this.vectorSize = vectors[0].length;
-//        final int[][] vectorsCopy = new int[vectors.length][vectorSize];
-//        System.arraycopy(vectors, 0, vectorsCopy, 0, vectors.length);
+
 
         this.trainingVectors = new TrainingVector[vectors.length];
         for (int i = 0; i < vectors.length; i++) {
-            trainingVectors[i] = new TrainingVector(Arrays.copyOf(vectors[i],vectors[i].length));
+            trainingVectors[i] = new TrainingVector(Arrays.copyOf(vectors[i], vectorSize));
         }
 
+        //        boolean allzero = true;
+        //        for (int i = 0; i < vectors.length; i++) {
+        //            if (!VectorQuantizer.isZeroVector(trainingVectors[i].getVector())) {
+        //                allzero = false;
+        //            }
+        //        }
+
         this.codebookSize = codebookSize;
         this.workerCount = workerCount;
+
+        findUniqueVectors();
+    }
+
+    private void findUniqueVectors() {
+        uniqueVectorCount = 0;
+        uniqueTrainingVectors = new ArrayList<>(codebookSize);
+        boolean unique;
+        for (final TrainingVector trainingVector : trainingVectors) {
+            unique = true;
+            for (final TrainingVector uniqueVector : uniqueTrainingVectors) {
+                if (uniqueVector.vectorEqual(trainingVector)) {
+                    unique = false;
+                    break;
+                }
+            }
+            if (unique) {
+                uniqueTrainingVectors.add(trainingVector);
+                ++uniqueVectorCount;
+                // If there is more than codebookSize training vectors, then we must use the LBG algorithm.
+                if (uniqueVectorCount > codebookSize) {
+                    uniqueTrainingVectors = null;
+                    break;
+                }
+            }
+        }
+    }
+
+    private LBGResult createCodebookFromUniqueVectors() {
+        assert (uniqueTrainingVectors != null) : "uniqueTrainingVectors aren't initialized.";
+        if (verbose) {
+            System.out.println("Creating codebook from unique vectors.");
+        }
+        CodebookEntry[] codebook = new CodebookEntry[codebookSize];
+        int[] zeros = new int[vectorSize];
+        Arrays.fill(zeros, 0);
+        CodebookEntry zeroEntry = new CodebookEntry(zeros);
+        for (int i = 0; i < codebookSize; i++) {
+            if (i < uniqueVectorCount) {
+                codebook[i] = new CodebookEntry(uniqueTrainingVectors.get(i).getVector());
+            } else {
+                codebook[i] = zeroEntry;
+            }
+        }
+        final double mse = averageMse(codebook);
+        final double psnr = Utils.calculatePsnr(mse, U16.Max);
+        if (verbose) {
+            System.out.println(String.format("Final MSE: %.4f\nFinal PSNR: %.4f (dB)", mse, psnr));
+        }
+        return new LBGResult(codebook, mse, psnr);
     }
 
     /**
@@ -45,6 +104,7 @@ public class LBGVectorQuantizer {
         return findOptimalCodebook(false);
     }
 
+
     /**
      * Find the optimal codebook of vectors, used for vector quantization.
      *
@@ -55,6 +115,10 @@ public class LBGVectorQuantizer {
         Stopwatch stopwatch = Stopwatch.startNew("findOptimalCodebook");
         this.verbose = isVerbose;
 
+        if (uniqueVectorCount < codebookSize) {
+            return createCodebookFromUniqueVectors();
+        }
+
         LearningCodebookEntry[] codebook = initializeCodebook();
         if (verbose) {
             System.out.println("Got initial codebook. Improving codebook...");
@@ -103,7 +167,7 @@ public class LBGVectorQuantizer {
      * @param codebook Codebook of vectors.
      * @return Mean square error.
      */
-    private double averageMse(final LearningCodebookEntry[] codebook) {
+    private double averageMse(final CodebookEntry[] codebook) {
         Stopwatch s = Stopwatch.startNew("averageMse");
         double mse = 0.0;
         if (workerCount > 1) {
@@ -116,7 +180,7 @@ public class LBGVectorQuantizer {
                 final int toIndex = (wId == workerCount - 1) ? trainingVectors.length : (workSize + (wId * workSize));
 
                 workers[wId] = new Thread(() -> {
-                    VectorQuantizer quantizer = new VectorQuantizer(learningCodebookToCodebook(codebook));
+                    VectorQuantizer quantizer = new VectorQuantizer(codebook);
                     double threadMse = 0.0;
                     int cnt = 0;
                     int[] vector;
@@ -149,7 +213,7 @@ public class LBGVectorQuantizer {
             }
             mse = _mse / (double) workerCount;
         } else {
-            VectorQuantizer quantizer = new VectorQuantizer(learningCodebookToCodebook(codebook));
+            VectorQuantizer quantizer = new VectorQuantizer(codebook);
             for (final TrainingVector trV : trainingVectors) {
                 int[] quantizedV = quantizer.quantize(trV.getVector());
 
@@ -240,10 +304,16 @@ public class LBGVectorQuantizer {
                             "There are no vectors from which to create perturbation " + "vector";
                     prtV = getPerturbationVector(trainingVectors);
                 } else {
-                    assert (entryToSplit.getVectorCount() > 0) :
-                            "There are no vectors from which to create perturbation vector";
+                    //                    assert (entryToSplit.getVectorCount() > 0) :
+                    //                            "There are no vectors from which to create perturbation vector";
+
+                    if (entryToSplit.getVectorCount() > 0) {
+                        prtV = entryToSplit.getPerturbationVector();
+                    } else {
+                        prtV = generateRandomVectorDouble();
+                    }
+
 
-                    prtV = entryToSplit.getPerturbationVector();
                 }
 
                 // We always want to carry zero vector to next iteration.
@@ -323,6 +393,15 @@ public class LBGVectorQuantizer {
         return randomVector;
     }
 
+    private double[] generateRandomVectorDouble() {
+        double[] randomVector = new double[vectorSize];
+        Random rnd = new Random();
+        for (int i = 0; i < vectorSize; i++) {
+            randomVector[i] = rnd.nextInt(U16.Max + 1);
+        }
+        return randomVector;
+    }
+
 
     /**
      * Execute the LBG algorithm with default epsilon value.
@@ -340,6 +419,7 @@ public class LBGVectorQuantizer {
      * @param epsilon  Epsilon value.
      */
     private void LBG(LearningCodebookEntry[] codebook, final double epsilon) {
+        //this.verbose = true;
         double previousDistortion = Double.POSITIVE_INFINITY;
         int iteration = 1;
         double lastDist = Double.POSITIVE_INFINITY;
@@ -363,6 +443,14 @@ public class LBGVectorQuantizer {
                 System.out.println(String.format("---- It: %d Distortion: %.5f", iteration++, dist));
                 System.out.println(String.format("Last Dist: %.5f Current dist: %.5f", lastDist, dist));
             }
+
+            if (Double.isNaN(dist)) {
+                if (verbose) {
+                    System.out.println("Distortion is NaN.");
+                }
+                break;
+            }
+
             if (dist > lastDist) {
                 if (verbose) {
                     System.out.println("Previous distortion was better. Ending LBG...");
@@ -602,8 +690,12 @@ public class LBGVectorQuantizer {
             }
         }
 
+        boolean ableToFix = true;
         while (emptyEntryIndex != -1) {
-            fixSingleEmptyEntry(codebook, emptyEntryIndex);
+            ableToFix = fixSingleEmptyEntry(codebook, emptyEntryIndex);
+            if (!ableToFix) {
+                break;
+            }
             emptyEntryIndex = -1;
             for (int i = 0; i < codebook.length; i++) {
                 if (codebook[i].getVectorCount() < 2) {
@@ -612,8 +704,10 @@ public class LBGVectorQuantizer {
             }
         }
 
-        for (final LearningCodebookEntry lce : codebook) {
-            assert (lce.getVectorCount() > 0) : "LearningCodebookEntry is empty!";
+        if (ableToFix) {
+            for (final LearningCodebookEntry lce : codebook) {
+                assert (lce.getVectorCount() > 0) : "LearningCodebookEntry is empty!";
+            }
         }
     }
 
@@ -623,22 +717,31 @@ public class LBGVectorQuantizer {
      * @param codebook        Vector codebook.
      * @param emptyEntryIndex Index of the empty entry.
      */
-    private void fixSingleEmptyEntry(LearningCodebookEntry[] codebook, final int emptyEntryIndex) {
+    private boolean fixSingleEmptyEntry(LearningCodebookEntry[] codebook, final int emptyEntryIndex) {
         // Find biggest partition.
         int largestEntryIndex = emptyEntryIndex;
         int largestEntrySize = codebook[emptyEntryIndex].getVectorCount();
+
         // NOTE(Moravec): We can't select random training vector, because zero vector would create another zero vector.
         for (int i = 0; i < codebook.length; i++) {
-            if ((codebook[i].getVectorCount() > largestEntrySize) &&
-                    !VectorQuantizer.isZeroVector(codebook[i].getVector())) {
+            if ((codebook[i].getVectorCount() > largestEntrySize) && !VectorQuantizer.isZeroVector(codebook[i].getVector())) {
+                /*
+
+                if (VectorQuantizer.isZeroVector(codebook[i].getVector())) {
+                    System.out.println("ZERO vector is biggest??");
+                }
+                */
 
                 largestEntryIndex = i;
                 largestEntrySize = codebook[i].getVectorCount();
             }
         }
-
+        if (largestEntryIndex == emptyEntryIndex) {
+            // Unable to find empty entry.
+            return false;
+        }
         // Assert that we have found some non empty codebook entry.
-        assert (largestEntryIndex != emptyEntryIndex) : "Unable to find biggest partition.";
+        //assert (largestEntryIndex != emptyEntryIndex) : "Unable to find biggest partition.";
         assert (codebook[largestEntryIndex].getVectorCount() > 0) : "Biggest partitions was empty before split";
 
         // Get training vectors assigned to the largest codebook entry.
@@ -647,6 +750,13 @@ public class LBGVectorQuantizer {
 
         // Choose random trainingVector from biggest partition and set it as new entry.
         int randomIndex = new Random().nextInt(largestPartitionVectors.length);
+        //        int counter = 0;
+        //        while (VectorQuantizer.isZeroVector(largestPartitionVectors[randomIndex].getVector())) {
+        //            randomIndex = new Random().nextInt(largestPartitionVectors.length);
+        //            ++counter;
+        //            //            System.out.println("x");
+        //        }
+        //        System.out.println("FOund non zero random after " + counter);
         // Plane the new entry on the index of the empty entry.
         codebook[emptyEntryIndex] = new LearningCodebookEntry(largestPartitionVectors[randomIndex].getVector());
 
@@ -708,6 +818,7 @@ public class LBGVectorQuantizer {
 
         codebook[largestEntryIndex].setInfo(oldEntryInfo);
         codebook[emptyEntryIndex].setInfo(newEntryInfo);
+        return true;
     }
 
     /**
diff --git a/src/main/java/azgracompress/quantization/vector/TrainingVector.java b/src/main/java/azgracompress/quantization/vector/TrainingVector.java
index 1d94825..8746cdd 100644
--- a/src/main/java/azgracompress/quantization/vector/TrainingVector.java
+++ b/src/main/java/azgracompress/quantization/vector/TrainingVector.java
@@ -1,5 +1,7 @@
 package azgracompress.quantization.vector;
 
+import java.util.Arrays;
+
 /**
  * Training image vector.
  */
@@ -13,6 +15,10 @@ public class TrainingVector {
         this.vector = vector;
     }
 
+    public boolean vectorEqual(final TrainingVector other) {
+        return Arrays.equals(vector, other.vector);
+    }
+
     /**
      * Set the closest codebook entry index and its distance.
      *
-- 
GitLab