Skip to content
Snippets Groups Projects
Commit 4c35ade6 authored by Vojtech Moravec's avatar Vojtech Moravec
Browse files

Initial commit of compress utility.

parents
Branches
No related tags found
No related merge requests found
.idea/
target/
class/
out/
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_9">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
pom.xml 0 → 100644
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.scijava</groupId>
<artifactId>DataCompressor</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>9</source>
<target>9</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
import quantization.LloydMaxU16ScalarQuantization;
import quantization.Utils;
import java.io.FileNotFoundException;
public class DataCompressor {
public static void main(String[] args) throws FileNotFoundException {
final String sourceFile = "D:\\tmp\\server-dump\\small.bin";
final int NumberOfBits = 3;
char[] values = Utils.convertBytesToU16(Utils.readFileBytes(sourceFile));
long leq1000 = 0;
long gt1000 = 0;
long zero = 0;
for (char value : values) {
if (value == 0) {
++zero;
} else if (value <= 1000) {
++leq1000;
} else {
++gt1000;
}
}
double leq100Perc = (double) leq1000 / (double) values.length;
double zeroPerc = (double) zero / (double) values.length;
double gt100Perc = (double) gt1000 / (double) values.length;
System.out.println(String.format(" =0:\t%f%%\n<=1000:\t%f%%\n >1000:\t%f%%", zeroPerc, leq100Perc, gt100Perc));
LloydMaxU16ScalarQuantization quantization = new LloydMaxU16ScalarQuantization(values, NumberOfBits);
quantization.train();
System.out.println("Finished learning...");
}
}
Manifest-Version: 1.0
Main-Class: DataCompressor
package quantization;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
public class LloydMaxU16ScalarQuantization {
private final int Min = 0;
private final int Max = 65535;
private final char[] trainingData;
private final int bitCount;
private int intervalCount;
private double[] centroids;
private double[] boundaryPoints;
private double[] pdf;
public LloydMaxU16ScalarQuantization(final String trainDataset, final int bitCount) throws FileNotFoundException {
trainingData = Utils.convertBytesToU16(Utils.readFileBytes(trainDataset));
this.bitCount = bitCount;
this.intervalCount = (int) Math.pow(2, this.bitCount);
}
public LloydMaxU16ScalarQuantization(final char[] trainData, final int bitCount) {
trainingData = trainData;
this.bitCount = bitCount;
this.intervalCount = (int) Math.pow(2, this.bitCount);
}
public LloydMaxU16ScalarQuantization(final short[] trainData, final int bitCount) {
trainingData = new char[trainData.length];
for (int i = 0; i < trainData.length; i++) {
trainingData[i] = (char) trainData[i];
}
this.bitCount = bitCount;
this.intervalCount = (int) Math.pow(2, this.bitCount);
}
private void initialize() {
centroids = new double[intervalCount];
boundaryPoints = new double[intervalCount + 1];
// char max = 0;
// for (int i = 0; i < trainingData.length; i++) {
// if (trainingData[i] > max)
// max = trainingData[i];
// }
boundaryPoints[0] = Min;
boundaryPoints[intervalCount] = Max;
double intervalSize = (double) (Max - Min) / (double) intervalCount;
for (int i = 0; i < intervalCount; i++) {
centroids[i] = ((double) i + 0.5) * intervalSize;
}
}
private void initializeProbabilityDensityFunction() {
pdf = new double[Max + 1];
for (int i = 0; i < trainingData.length; i++) {
pdf[trainingData[i]] += 1;
}
// double len = (double) trainingData.length;
// for (int i = 0; i < Max + 1; i++) {
// pdf[i] /= len;
// }
}
private void recalculateBoundaryPoints() {
for (int j = 1; j < intervalCount; j++) {
boundaryPoints[j] = (centroids[j] + centroids[j - 1]) / 2.0;
}
}
private void recalculateCentroids() {
double numerator = 0.0;
double denominator = 0.0;
for (int j = 0; j < intervalCount; j++) {
int from = (int) Math.floor(boundaryPoints[j]);
int to = (int) Math.ceil(boundaryPoints[j + 1]);
for (int n = from; n <= to; n++) {
numerator += (double) n * pdf[n];
denominator += pdf[n];
}
centroids[j] = (numerator / denominator);
}
}
private void recalculateCentroids2() {
for (int j = 0; j < intervalCount; j++) {
centroids[j] = boundaryPoints[j] + ((boundaryPoints[j + 1] - boundaryPoints[j]) / 2.0);
}
}
private double quantizeChar(char v) {
double dv = (double) v;
for (int intervalId = 1; intervalId <= intervalCount; intervalId++) {
if (dv >= boundaryPoints[intervalId - 1] && dv < boundaryPoints[intervalId]) {
return centroids[intervalId - 1];
}
}
throw new AssertionError("Dont get here!");
}
private double quantizeShort(short v) {
return quantizeChar((char) v);
}
private double getCurrentMse() {
double mse = 0.0;
for (int i = 0; i < trainingData.length; i++) {
mse += (Math.pow(trainingData[i] - quantizeChar(trainingData[i]), 2));
}
mse /= (double) trainingData.length;
return mse;
}
public void train() {
initialize();
initializeProbabilityDensityFunction();
double prevMse = 1.0;
double currentMse = 1.0;
recalculateBoundaryPoints();
recalculateCentroids();
printCurrentConfigration();
currentMse = getCurrentMse();
System.out.println(String.format("Current MSE: %f", currentMse));
double dist = 1;
do {
recalculateBoundaryPoints();
recalculateCentroids();
printCurrentConfigration();
prevMse = currentMse;
currentMse = getCurrentMse();
dist = (prevMse - currentMse) / currentMse;
System.out.println(String.format("Current MSE: %f", currentMse));
} while (dist > 0.001);
//recalculateCentroids2();
}
private void printCurrentConfigration() {
StringBuilder sb = new StringBuilder();
sb.append("Centroids: ");
for (int i = 0; i < centroids.length; i++) {
sb.append(String.format("a[%d]=%.5f;", i, (Math.round(centroids[i] * 100.0) / 100.0)));
}
sb.append("\n");
sb.append("Boundaries: ");
for (int i = 0; i < boundaryPoints.length; i++) {
sb.append(String.format("b[%d]=%.5f;", i, (Math.round(boundaryPoints[i] * 100.0) / 100.0)));
}
System.out.println(sb);
}
public short[] quantizeArray(short[] data) {
short[] result = new short[data.length];
for (int i = 0; i < data.length; i++) {
result[i] = (short) Math.floor(quantizeShort(data[i]));
}
return result;
}
}
package quantization;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
public class Utils {
public static byte[] readFileBytes(final String path) throws FileNotFoundException {
FileInputStream fileStream = new FileInputStream(path);
try {
return fileStream.readAllBytes();
} catch (IOException e) {
e.printStackTrace();
}
return new byte[0];
}
public static char[] convertBytesToU16(final byte[] bytes) {
assert ((bytes.length % 2) == 0);
char[] values = new char[bytes.length / 2];
int index = 0;
for (int i = 0; i < bytes.length; i += 2) {
short value = 0;
values[index++] = (char) (((bytes[i] & 0xFF) << 8) | (bytes[i + 1] & 0xFF));
}
return values;
}
}
Manifest-Version: 1.0
Main-Class: DataCompressor
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment