From dd23d26d9ce16b0ba8c35cc73183cd9591dbb824 Mon Sep 17 00:00:00 2001
From: Vojtech Moravec <vojtech.moravec.st@vsb.cz>
Date: Thu, 3 Sep 2020 14:28:32 +0200
Subject: [PATCH] k-DTree data stucture.

Implemented basic k-DTree data structure. Main feature, searching
of nearest neighbor is not implemented yet.
The three can be constructed from provided vectors.
This is improvet variant of the tree present in the paper:

'An Algorithm for Finding Best Matches in Logarithmic Expected Time'
---
 .../java/azgracompress/kdtree/KDNode.java     |  44 ++++++
 .../azgracompress/kdtree/KDTreeBuilder.java   | 128 ++++++++++++++++++
 .../azgracompress/kdtree/TerminalKDNode.java  |  20 +++
 3 files changed, 192 insertions(+)
 create mode 100644 src/main/java/azgracompress/kdtree/KDNode.java
 create mode 100644 src/main/java/azgracompress/kdtree/KDTreeBuilder.java
 create mode 100644 src/main/java/azgracompress/kdtree/TerminalKDNode.java

diff --git a/src/main/java/azgracompress/kdtree/KDNode.java b/src/main/java/azgracompress/kdtree/KDNode.java
new file mode 100644
index 0000000..dfa09f3
--- /dev/null
+++ b/src/main/java/azgracompress/kdtree/KDNode.java
@@ -0,0 +1,44 @@
+package azgracompress.kdtree;
+
+public class KDNode {
+    private final int keyIndex;
+    private final int median;
+
+    private final KDNode loSon;
+    private final KDNode hiSon;
+
+    protected KDNode() {
+        keyIndex = -1;
+        median = -1;
+        loSon = null;
+        hiSon = null;
+    }
+
+
+    public KDNode(final int keyIndex, final int median, final KDNode loSon, final KDNode hiSon) {
+        this.keyIndex = keyIndex;
+        this.median = median;
+        this.loSon = loSon;
+        this.hiSon = hiSon;
+    }
+
+    public final KDNode getLoSon() {
+        return loSon;
+    }
+
+    public final KDNode getHiSon() {
+        return hiSon;
+    }
+
+    public final int getKeyIndex() {
+        return keyIndex;
+    }
+
+    public final int getMedian() {
+        return median;
+    }
+
+    public boolean isTerminal() {
+        return false;
+    }
+}
diff --git a/src/main/java/azgracompress/kdtree/KDTreeBuilder.java b/src/main/java/azgracompress/kdtree/KDTreeBuilder.java
new file mode 100644
index 0000000..91ac720
--- /dev/null
+++ b/src/main/java/azgracompress/kdtree/KDTreeBuilder.java
@@ -0,0 +1,128 @@
+package azgracompress.kdtree;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class KDTreeBuilder {
+
+    private static class DividedRecords {
+        private final int[][] hiRecords;
+        private final int[][] loRecords;
+
+        DividedRecords(int[][] hiRecords, int[][] loRecords) {
+            this.hiRecords = hiRecords;
+            this.loRecords = loRecords;
+        }
+
+        public int[][] getHiRecords() {
+            return hiRecords;
+        }
+
+        public int[][] getLoRecords() {
+            return loRecords;
+        }
+    }
+
+    private final int bucketSize;
+    private final int dimension;
+    private int nodeCount = 0;
+    private int terminalNodeCount = 0;
+
+    public KDTreeBuilder(final int dimension, final int bucketSize) {
+        this.bucketSize = bucketSize;
+        this.dimension = dimension;
+    }
+
+
+    public KDNode buildTree(final int[][] records) {
+        if (records.length <= bucketSize) {
+            return makeTerminalNode(records);
+        }
+
+        double maxSpread = -1.0;
+        int keyIndex = 0;
+
+        for (int j = 0; j < dimension; j++) {
+            // Find coordinate with greatest spread.
+            final double greatestSpread = calculateKeySpread(records, j);
+            if (greatestSpread > maxSpread) {
+                maxSpread = greatestSpread;
+                keyIndex = j;
+            }
+        }
+        final int median = calculateKeyMedian(records, keyIndex);
+
+
+        // Divide records in one method to hi and lo.
+        final DividedRecords dividedRecords = divideRecords(records, median, keyIndex);
+        return makeNonTerminalNode(keyIndex, median, dividedRecords);
+    }
+
+
+    private DividedRecords divideRecords(final int[][] records, final int median, final int keyIndex) {
+        ArrayList<int[]> loRecords = new ArrayList<>();
+        ArrayList<int[]> hiRecords = new ArrayList<>();
+        for (final int[] record : records) {
+            if (record[keyIndex] <= median) {
+                loRecords.add(record);
+            } else {
+                hiRecords.add(record);
+            }
+        }
+        return new DividedRecords(loRecords.toArray(new int[0][]), hiRecords.toArray(new int[0][]));
+    }
+
+    private KDNode makeNonTerminalNode(final int keyIndex, final int median, final DividedRecords dividedRecords) {
+        final KDNode loSon = buildTree(dividedRecords.getLoRecords());
+        final KDNode hiSon = buildTree(dividedRecords.getHiRecords());
+        ++nodeCount;
+        return new KDNode(keyIndex, median, loSon, hiSon);
+    }
+
+    public KDNode makeTerminalNode(final int[][] records) {
+        ++nodeCount;
+        ++terminalNodeCount;
+        return new TerminalKDNode(records);
+    }
+
+    private int calculateKeyMedian(final int[][] records, final int keyIndex) {
+        assert (records.length > 1);
+        final int[] sortedArray = new int[records.length];
+        for (int i = 0; i < records.length; i++) {
+            sortedArray[i] = records[i][keyIndex];
+        }
+        Arrays.sort(sortedArray);
+
+        final int midIndex = sortedArray.length / 2;
+        if ((sortedArray.length % 2) == 0) {
+            return (int) (((double) sortedArray[midIndex] + (double) sortedArray[(midIndex - 1)]) / 2.0);
+        } else {
+            return sortedArray[midIndex];
+        }
+    }
+
+
+    private double calculateKeySpread(final int[][] records, final int keyIndex) {
+        double center = 0.0;
+        for (final int[] record : records) {
+            center += record[keyIndex];
+        }
+        center /= (double) records.length;
+
+        double spread = 0.0;
+
+        for (final int[] record : records) {
+            spread += Math.pow(((double) center - (double) record[keyIndex]), 2);
+        }
+
+        return Math.sqrt(spread);
+    }
+
+    public int getNodeCount() {
+        return nodeCount;
+    }
+
+    public int getTerminalNodeCount() {
+        return terminalNodeCount;
+    }
+}
diff --git a/src/main/java/azgracompress/kdtree/TerminalKDNode.java b/src/main/java/azgracompress/kdtree/TerminalKDNode.java
new file mode 100644
index 0000000..6dc239b
--- /dev/null
+++ b/src/main/java/azgracompress/kdtree/TerminalKDNode.java
@@ -0,0 +1,20 @@
+package azgracompress.kdtree;
+
+public class TerminalKDNode extends KDNode {
+
+    private final int[][] bucket;
+
+    public TerminalKDNode(final int[][] records) {
+        super();
+        this.bucket = records;
+    }
+
+    @Override
+    public boolean isTerminal() {
+        return true;
+    }
+
+    public int[][] getBucket() {
+        return bucket;
+    }
+}
-- 
GitLab