diff --git a/README.md b/README.md index 6f32a4af0e0c76d8b58e4d9a34096fd42f105cc3..fecf795e31ecedd5a4fe57f81cde2231c992f65a 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,11 @@ - **DEADLINE 18.03.2019** - [ ] Look at *Image difference* (*Negative values can be mapped to odd/even numbers. But the difference must be saved in more than one byte.*) - Things we have tested so far: short-short=int, mapping int to ushort, results are not better - - [ ] Try Z-Order on ushort mapped ints. + - [x] Try Z-Order on ushort mapped ints. - [ ] Try raw byte difference, find if it can be mapped to 1 or 2 bytes - [ ] [DeltaCompression](http://www.diva-portal.org/smash/get/diva2:817831/FULLTEXT01.pdf) - - bsdiff, bspatch, unix utils, generating patches + - bsdiff, bspatch + - iniat test shows quite poor results of compression ratios below 1.0. We should check some different library implementation of bsdiff MAYBE?. - [ ] Benchmark B3D cuda library - Problems with building, `cuda` files are probably compiled with g++ instead of nvcc? diff --git a/czi-format/benchmark_results/benchmark_data.sqlite b/czi-format/benchmark_results/benchmark_data.sqlite index 9cbc6aaad0dcaf822554fb6cb58fded6b50626c2..74b634b578a618ede5b8d90eba845442d40fc9b5 100644 Binary files a/czi-format/benchmark_results/benchmark_data.sqlite and b/czi-format/benchmark_results/benchmark_data.sqlite differ diff --git a/czi-format/czi-parser/CMakeLists.txt b/czi-format/czi-parser/CMakeLists.txt index c4d95d45e53530d5db979c1d5b459dd64060f6ec..10703476e5ab74e285b6079a830e344ce9777b9a 100644 --- a/czi-format/czi-parser/CMakeLists.txt +++ b/czi-format/czi-parser/CMakeLists.txt @@ -49,6 +49,8 @@ if (OPENMP_FOUND) message("ENABLED OpenMP") endif() +include_directories("utilities/bsdiff") + set(CPACK_PROJECT_NAME ${PROJECT_NAME}) set(CPACK_PROJECT_VERSION ${PROJECT_VERSION}) include(CPack) \ No newline at end of file diff --git a/czi-format/czi-parser/compression/benchmark.cpp b/czi-format/czi-parser/compression/benchmark.cpp index 98e21479e88ae6df0d81da84cbe474185c47457c..2df8ae3d2e153d5c1fe25ae13f21f833a0171d63 100644 --- a/czi-format/czi-parser/compression/benchmark.cpp +++ b/czi-format/czi-parser/compression/benchmark.cpp @@ -269,7 +269,7 @@ static void _diff_from_frame_gray16(const ByteArray &pfData, const ByteArray &cf cr = test_compression_method(ushortMappedDeltaBytes, cm, compLevel); } -void frame_difference_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose, int level, CompressionMethod cm) +void value_diff_by_prev_frame_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose, int level, CompressionMethod cm) { // NOTE: This benchmark works only for 16 bit pixels! printf("Compression method %s with compression level %i\n", compression_method_str(cm), level); @@ -334,6 +334,97 @@ void frame_difference_benchmark(CziFile &cziFile, const std::string &reportFile, record.compressionTime = 0; record.zOrderCompressionTime = 0; +#pragma omp critical + { + benchmarkRecords.push_back(record); + + printf("\rFinished %u/%u", ++iter, iterCount); + fflush(stdout); + } + } + } + + write_diff_report(benchmarkRecords, reportFile); + printf("\rFinished %u/%u\n", iterCount, iterCount); + printf("Report saved in %s\n", reportFile.c_str()); +} + +void bsdiff_by_prev_frame_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose) +{ + std::string fName = fs_wrapper::get_filename(cziFile.fileName); + printf("Difference from previuos frame by bsdiff unix tool.\n"); + auto framesByChannels = cziFile.get_subblocks_grouped_by_channels(); + + uint iter = 0; + uint iterCount = cziFile.subBlockDirectory.entryCount; + std::vector<DiffBenchmarkRecord> benchmarkRecords; + + for (const std::pair<uint, std::vector<uint>> &channelGroup : framesByChannels) + { + printf_if(verbose, "Starting channel %u\n", channelGroup.first); + //#pragma omp parallel for + for (size_t i = 1; i < channelGroup.second.size(); i++) + { + DiffBenchmarkRecord record = {}; + + uint prevFrameId = channelGroup.second[i - 1]; + uint currFrameId = channelGroup.second[i]; + + DirectoryEntryDV prevEntry = cziFile.subBlockDirectory.entries[prevFrameId]; + DirectoryEntryDV currEntry = cziFile.subBlockDirectory.entries[currFrameId]; + always_assert(prevEntry.pixelType == PixelType_Gray16); + always_assert(currEntry.pixelType == PixelType_Gray16); + + record.set_metadata(fName.c_str(), currFrameId, prevFrameId, + cziFile.pixel_type_str(currEntry.pixelType), currEntry.width, currEntry.height, + "None", 0, "bsdiff_PrevFrameDiff", 16, channelGroup.first); + + DimensionEntryDV1 prevDim = prevEntry.get_dimension(Dimension_Z); + DimensionEntryDV1 currDim = currEntry.get_dimension(Dimension_Z); + always_assert(!prevDim.isEmpty && !currDim.isEmpty); + // We should always be going in direction of Z-stack. + always_assert(prevDim.start < currDim.start); + + // pf => previous frame, cf => current frame. + size_t originalSize = 0; + size_t patchSizeN = 0; + size_t patchSizeZ = 0; + { + auto pfData = cziFile.get_image_data(prevFrameId, false); + auto cfData = cziFile.get_image_data(currFrameId, false); + originalSize = pfData.size(); + + size_t maxPatchSize = lib_bsdiff::bsdiff_patchsize_max(pfData.size(), cfData.size()); + ByteArray patchBuffer; + patchBuffer.resize(maxPatchSize); + + off_t actualBufferSize = lib_bsdiff::bsdiff(pfData.data(), pfData.size(), cfData.data(), cfData.size(), patchBuffer.data(), maxPatchSize); + always_assert(actualBufferSize != -1 && "bsdiff error."); + patchSizeN = actualBufferSize; + } + // Z order + { + auto pfDataZ = cziFile.get_image_data(prevFrameId, true); + auto cfDataZ = cziFile.get_image_data(currFrameId, true); + always_assert(pfDataZ.size() == originalSize); + + size_t maxPatchSize = lib_bsdiff::bsdiff_patchsize_max(pfDataZ.size(), cfDataZ.size()); + ByteArray patchBuffer; + patchBuffer.resize(maxPatchSize); + + off_t actualBufferSize = lib_bsdiff::bsdiff(pfDataZ.data(), pfDataZ.size(), cfDataZ.data(), cfDataZ.size(), patchBuffer.data(), maxPatchSize); + always_assert(actualBufferSize != -1 && "bsdiff error."); + patchSizeZ = actualBufferSize; + } + + record.originalSize = originalSize; + record.compressedSize = patchSizeN; + record.zOrderCompressedSize = patchSizeZ; + record.compressionRatio = compression_ratio((float)originalSize, (float)patchSizeN); + record.zOrderCompressionRatio = compression_ratio((float)originalSize, (float)patchSizeZ); + record.compressionTime = 0; + record.zOrderCompressionTime = 0; + #pragma omp critical { benchmarkRecords.push_back(record); diff --git a/czi-format/czi-parser/compression/benchmark.h b/czi-format/czi-parser/compression/benchmark.h index a1588a22def1aa825bfca3d73ae9f9fbdff3d9a8..c45fe338469926d09d58f44e87dff6ea0104a19a 100644 --- a/czi-format/czi-parser/compression/benchmark.h +++ b/czi-format/czi-parser/compression/benchmark.h @@ -5,6 +5,12 @@ #include <iomanip> #include <thread> +namespace lib_bsdiff +{ +#include "../utilities/bsdiff/bsdiff.h" +#include "../utilities/bsdiff/bsdiff.c" +}; // namespace lib_bsdiff + struct BaseBenchmarkRecord { const char *fileName; @@ -94,7 +100,8 @@ void benchmark_continuos_compression(CziFile &cziFile, const std::string &report void benchmark_compression(CziFile &cziFile, const std::string &reportFile, bool verbose, int level = -1); // Frame difference benchmark stuff -void frame_difference_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose, int level, CompressionMethod cm); +void value_diff_by_prev_frame_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose, int level, CompressionMethod cm); +void bsdiff_by_prev_frame_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose); static void write_diff_report(const std::vector<DiffBenchmarkRecord> &results, const std::string &reportFile); #include "benchmark.cpp" \ No newline at end of file diff --git a/czi-format/czi-parser/main.cpp b/czi-format/czi-parser/main.cpp index cab641c5d693176b3abf47723a29683d771bbc96..38278099c19b233a4b6587026b626149bdfbbc4a 100644 --- a/czi-format/czi-parser/main.cpp +++ b/czi-format/czi-parser/main.cpp @@ -38,7 +38,8 @@ int main(int argc, char **argv) args::Flag benchmarkMethod(mainMethodGroup, "Compression benchmark", "Start compression benchmark for selected CZI file. <report-file> [continuos] [verbose].", {"benchmark"}); args::Flag exportDataMethod(mainMethodGroup, "Export binary data", "Export raw image data <folder> [continuos]", {"dump-data"}); args::Flag exportImagesMethod(mainMethodGroup, "Export images", "Export images to ppm files <folder>", {"dump-images"}); - args::Flag frameDiffMethod(mainMethodGroup, "Frame difference", "TODO", {"frame-diff"}); + args::Flag frameDiffMethod(mainMethodGroup, "Frame difference", "Encoding difference to previous frame and mapping 32 bits back to 16 bits.", {"frame-diff"}); + args::Flag bsFrameDiffMethod(mainMethodGroup, "Frame difference by bsdiff", "Encoding difference to previous frame by bsdiff.", {"bs-diff"}); // Options args::Flag dontParseMetadataOption(optionsGroup, "no metadata", "Dont read metadata byte", {"no-metadata"}); @@ -121,7 +122,20 @@ int main(int argc, char **argv) CompressionMethod cm = get_chosen_compression_method(gzipCompressionOption, lzmaCompressionOption, bzip2CompressionOption); - frame_difference_benchmark(parsedFile, reportFileOption.Get(), verboseOption.Matched(), level, cm); + value_diff_by_prev_frame_benchmark(parsedFile, reportFileOption.Get(), verboseOption.Matched(), level, cm); + + return 0; + } + + if (bsFrameDiffMethod) + { + if (!reportFileOption.Matched()) + { + printf(RED "Report file wasn't specified!\n" RESET); + return 1; + } + + bsdiff_by_prev_frame_benchmark(parsedFile, reportFileOption.Get(), verboseOption.Matched()); return 0; } diff --git a/czi-format/czi-parser/utilities/bsdiff/bsdiff.c b/czi-format/czi-parser/utilities/bsdiff/bsdiff.c new file mode 100644 index 0000000000000000000000000000000000000000..b90ebbf36a8d4187f22884ba45e301f3c8871e18 --- /dev/null +++ b/czi-format/czi-parser/utilities/bsdiff/bsdiff.c @@ -0,0 +1,498 @@ +/*- + * Copyright 2012-2013 Austin Seipp + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if 0 +__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $"); +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "bsdiff.h" + +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +/* Header is + 0 8 BSDIFF_CONFIG_MAGIC (see minibsdiff-config.h) + 8 8 length of bzip2ed ctrl block + 16 8 length of bzip2ed diff block + 24 8 length of new file */ +/* File is + 0 32 Header + 32 ?? ctrl block + ?? ?? diff block + ?? ?? extra block */ + +static void +split(off_t *I, off_t *V, off_t start, off_t len, off_t h) +{ + off_t i, j, k, x, tmp, jj, kk; + + if (len < 16) + { + for (k = start; k < start + len; k += j) + { + j = 1; + x = V[I[k] + h]; + for (i = 1; k + i < start + len; i++) + { + if (V[I[k + i] + h] < x) + { + x = V[I[k + i] + h]; + j = 0; + }; + if (V[I[k + i] + h] == x) + { + tmp = I[k + j]; + I[k + j] = I[k + i]; + I[k + i] = tmp; + j++; + }; + }; + for (i = 0; i < j; i++) + V[I[k + i]] = k + j - 1; + if (j == 1) + I[k] = -1; + }; + return; + }; + + x = V[I[start + len / 2] + h]; + jj = 0; + kk = 0; + for (i = start; i < start + len; i++) + { + if (V[I[i] + h] < x) + jj++; + if (V[I[i] + h] == x) + kk++; + }; + jj += start; + kk += jj; + + i = start; + j = 0; + k = 0; + while (i < jj) + { + if (V[I[i] + h] < x) + { + i++; + } + else if (V[I[i] + h] == x) + { + tmp = I[i]; + I[i] = I[jj + j]; + I[jj + j] = tmp; + j++; + } + else + { + tmp = I[i]; + I[i] = I[kk + k]; + I[kk + k] = tmp; + k++; + }; + }; + + while (jj + j < kk) + { + if (V[I[jj + j] + h] == x) + { + j++; + } + else + { + tmp = I[jj + j]; + I[jj + j] = I[kk + k]; + I[kk + k] = tmp; + k++; + }; + }; + + if (jj > start) + split(I, V, start, jj - start, h); + + for (i = 0; i < kk - jj; i++) + V[I[jj + i]] = kk - 1; + if (jj == kk - 1) + I[jj] = -1; + + if (start + len > kk) + split(I, V, kk, start + len - kk, h); +} + +static void +qsufsort(off_t *I, off_t *V, u_char *old, off_t oldsize) +{ + off_t buckets[256]; + off_t i, h, len; + + for (i = 0; i < 256; i++) + buckets[i] = 0; + for (i = 0; i < oldsize; i++) + buckets[old[i]]++; + for (i = 1; i < 256; i++) + buckets[i] += buckets[i - 1]; + for (i = 255; i > 0; i--) + buckets[i] = buckets[i - 1]; + buckets[0] = 0; + + for (i = 0; i < oldsize; i++) + I[++buckets[old[i]]] = i; + I[0] = oldsize; + for (i = 0; i < oldsize; i++) + V[i] = buckets[old[i]]; + V[oldsize] = 0; + for (i = 1; i < 256; i++) + if (buckets[i] == buckets[i - 1] + 1) + I[buckets[i]] = -1; + I[0] = -1; + + for (h = 1; I[0] != -(oldsize + 1); h += h) + { + len = 0; + for (i = 0; i < oldsize + 1;) + { + if (I[i] < 0) + { + len -= I[i]; + i -= I[i]; + } + else + { + if (len) + I[i - len] = -len; + len = V[I[i]] + 1 - i; + split(I, V, i, len, h); + i += len; + len = 0; + }; + }; + if (len) + I[i - len] = -len; + }; + + for (i = 0; i < oldsize + 1; i++) + I[V[i]] = i; +} + +static off_t +matchlen(u_char *oldp, off_t oldsize, u_char *newp, off_t newsize) +{ + off_t i; + + for (i = 0; (i < oldsize) && (i < newsize); i++) + if (oldp[i] != newp[i]) + break; + + return i; +} + +static off_t +search(off_t *I, u_char *oldp, off_t oldsize, + u_char *newp, off_t newsize, off_t st, off_t en, off_t *pos) +{ + off_t x, y; + + if (en - st < 2) + { + x = matchlen(oldp + I[st], oldsize - I[st], newp, newsize); + y = matchlen(oldp + I[en], oldsize - I[en], newp, newsize); + + if (x > y) + { + *pos = I[st]; + return x; + } + else + { + *pos = I[en]; + return y; + } + }; + + x = st + (en - st) / 2; + if (memcmp(oldp + I[x], newp, MIN(oldsize - I[x], newsize)) < 0) + { + return search(I, oldp, oldsize, newp, newsize, x, en, pos); + } + else + { + return search(I, oldp, oldsize, newp, newsize, st, x, pos); + }; +} + +static void +offtout(off_t x, u_char *buf) +{ + off_t y; + + if (x < 0) + y = -x; + else + y = x; + + buf[0] = y % 256; + y -= buf[0]; + y = y / 256; + buf[1] = y % 256; + y -= buf[1]; + y = y / 256; + buf[2] = y % 256; + y -= buf[2]; + y = y / 256; + buf[3] = y % 256; + y -= buf[3]; + y = y / 256; + buf[4] = y % 256; + y -= buf[4]; + y = y / 256; + buf[5] = y % 256; + y -= buf[5]; + y = y / 256; + buf[6] = y % 256; + y -= buf[6]; + y = y / 256; + buf[7] = y % 256; + + if (x < 0) + buf[7] |= 0x80; +} + +off_t bsdiff_patchsize_max(off_t newsize, off_t oldsize) +{ + return newsize + oldsize + BSDIFF_PATCH_SLOP_SIZE; +} + +int bsdiff(u_char *oldp, off_t oldsize, + u_char *newp, off_t newsize, + u_char *patch, off_t patchsz) +{ + off_t *I, *V; + off_t scan, pos, len; + off_t lastscan, lastpos, lastoffset; + off_t oldscore, scsc; + off_t s, Sf, lenf, Sb, lenb; + off_t overlap, Ss, lens; + off_t i; + off_t dblen, eblen; + u_char *db, *eb; + u_char buf[8]; + u_char header[32]; + u_char *fileblock; + + off_t ctrllen; + + /* Sanity checks */ + if (oldp == NULL || newp == NULL || patch == NULL) + return -1; + if (oldsize < 0 || newsize < 0 || patchsz < 0) + return -1; + if (bsdiff_patchsize_max(oldsize, newsize) > patchsz) + return -1; + + /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure + that we never try to malloc(0) and get a NULL pointer */ + if (((I = (off_t *)malloc((oldsize + 1) * sizeof(off_t))) == NULL) || + ((V = (off_t *)malloc((oldsize + 1) * sizeof(off_t))) == NULL)) + { + if (I) + free(I); + return -1; + } + + qsufsort(I, V, oldp, oldsize); + + free(V); + + /* Allocate newsize+1 bytes instead of newsize bytes to ensure + that we never try to malloc(0) and get a NULL pointer */ + if (((db = (u_char *)malloc(newsize + 1)) == NULL) || + ((eb = (u_char *)malloc(newsize + 1)) == NULL)) + { + if (db) + free(db); + free(I); + return -1; + } + dblen = 0; + eblen = 0; + + /* Write initial header */ + memcpy(header, BSDIFF_CONFIG_MAGIC, 8); + offtout(0, header + 8); + offtout(0, header + 16); + offtout(newsize, header + 24); + memcpy(patch, header, 32); + + /* Set up initial pointers */ + fileblock = patch + 32; + ctrllen = 0; + + /* Compute the differences, writing ctrl as we go */ + scan = 0; + len = 0; + pos = 0; + lastscan = 0; + lastpos = 0; + lastoffset = 0; + while (scan < newsize) + { + oldscore = 0; + + for (scsc = scan += len; scan < newsize; scan++) + { + len = search(I, oldp, oldsize, newp + scan, newsize - scan, + 0, oldsize, &pos); + + for (; scsc < scan + len; scsc++) + if ((scsc + lastoffset < oldsize) && + (oldp[scsc + lastoffset] == newp[scsc])) + oldscore++; + + if (((len == oldscore) && (len != 0)) || + (len > oldscore + 8)) + break; + + if ((scan + lastoffset < oldsize) && + (oldp[scan + lastoffset] == newp[scan])) + oldscore--; + }; + + if ((len != oldscore) || (scan == newsize)) + { + s = 0; + Sf = 0; + lenf = 0; + for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) + { + if (oldp[lastpos + i] == newp[lastscan + i]) + s++; + i++; + if (s * 2 - i > Sf * 2 - lenf) + { + Sf = s; + lenf = i; + }; + }; + + lenb = 0; + if (scan < newsize) + { + s = 0; + Sb = 0; + for (i = 1; (scan >= lastscan + i) && (pos >= i); i++) + { + if (oldp[pos - i] == newp[scan - i]) + s++; + if (s * 2 - i > Sb * 2 - lenb) + { + Sb = s; + lenb = i; + }; + }; + }; + + if (lastscan + lenf > scan - lenb) + { + overlap = (lastscan + lenf) - (scan - lenb); + s = 0; + Ss = 0; + lens = 0; + for (i = 0; i < overlap; i++) + { + if (newp[lastscan + lenf - overlap + i] == + oldp[lastpos + lenf - overlap + i]) + s++; + if (newp[scan - lenb + i] == + oldp[pos - lenb + i]) + s--; + if (s > Ss) + { + Ss = s; + lens = i + 1; + }; + }; + + lenf += lens - overlap; + lenb -= lens; + }; + + for (i = 0; i < lenf; i++) + db[dblen + i] = newp[lastscan + i] - oldp[lastpos + i]; + for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) + eb[eblen + i] = newp[lastscan + lenf + i]; + + dblen += lenf; + eblen += (scan - lenb) - (lastscan + lenf); + + offtout(lenf, buf); + memcpy(fileblock, buf, 8); + fileblock += 8; + ctrllen += 8; + + offtout((scan - lenb) - (lastscan + lenf), buf); + memcpy(fileblock, buf, 8); + fileblock += 8; + ctrllen += 8; + + offtout((pos - lenb) - (lastpos + lenf), buf); + memcpy(fileblock, buf, 8); + fileblock += 8; + ctrllen += 8; + + lastscan = scan - lenb; + lastpos = pos - lenb; + lastoffset = pos - scan; + }; + }; + + /* Write size of ctrl data */ + offtout(ctrllen, header + 8); + + /* Write diff data */ + memcpy(fileblock, db, dblen); + fileblock += dblen; + /* Write size of diff data */ + offtout(dblen, header + 16); + + /* Write extra data */ + memcpy(fileblock, eb, eblen); + + /* Write the final header */ + memcpy(patch, header, 32); + + /* Free the memory we used */ + free(db); + free(eb); + free(I); + + return (32 + ctrllen + dblen + eblen); +} diff --git a/czi-format/czi-parser/utilities/bsdiff/bsdiff.h b/czi-format/czi-parser/utilities/bsdiff/bsdiff.h new file mode 100644 index 0000000000000000000000000000000000000000..84ccbc00bfbbeea032be6e074824d1b9da30faa5 --- /dev/null +++ b/czi-format/czi-parser/utilities/bsdiff/bsdiff.h @@ -0,0 +1,71 @@ +/*- + * Copyright 2012-2013 Austin Seipp + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MINIBSDIFF_H_ +#define _MINIBSDIFF_H_ + +#include "minibsdiff-config.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* ------------------------------------------------------------------------- */ + /* -- Public API ----------------------------------------------------------- */ + + /*- + * Determine the maximum size of a patch between two files. This function + * should be used to allocate a buffer big enough for `bsdiff` to store + * its output in. + */ + off_t bsdiff_patchsize_max(off_t oldsize, off_t newsize); + + /*- + * Create a binary patch from the buffers pointed to by oldp and newp (with + * respective sizes,) and store the result in the buffer pointed to by 'patch'. + * + * The input pointer 'patch' must not be NULL, and the size of the buffer must + * be at least 'bsdiff_patchsize_max(new,old)' in length. + * + * Returns -1 if `patch` is NULL, the 'patch' buffer is not large enough, or if + * memory cannot be allocated. + * Otherwise, the return value is the size of the patch that was put in the + * 'patch' buffer. + * + * This function is memory-intensive, and requires max(17*n,9*n+m)+O(1) bytes + * of memory, where n is the size of the new file and m is the size of the old + * file. It runs in O((n+m) log n) time. + */ + int bsdiff(u_char *oldp, off_t oldsize, + u_char *newp, off_t newsize, + u_char *patch, off_t patchsize); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _MINIBSDIFF_H_ */ diff --git a/czi-format/czi-parser/utilities/bsdiff/bspatch.c b/czi-format/czi-parser/utilities/bsdiff/bspatch.c new file mode 100644 index 0000000000000000000000000000000000000000..e51d85a7c48055561fd535c38ebf3045d800e6f0 --- /dev/null +++ b/czi-format/czi-parser/utilities/bsdiff/bspatch.c @@ -0,0 +1,165 @@ +/*- + * Copyright 2012-2013 Austin Seipp + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if 0 +__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bspatch/bspatch.c,v 1.1 2005/08/06 01:59:06 cperciva Exp $"); +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> + +#include "bspatch.h" + +/* + Patch file format: + 0 8 BSDIFF_CONFIG_MAGIC (see minibsdiff-config.h) + 8 8 X + 16 8 Y + 24 8 sizeof(newfile) + 32 X control block + 32+X Y diff block + 32+X+Y ??? extra block + with control block a set of triples (x,y,z) meaning "add x bytes + from oldfile to x bytes from the diff block; copy y bytes from the + extra block; seek forwards in oldfile by z bytes". +*/ + +static off_t +offtin(u_char *buf) +{ + off_t y; + + y=buf[7]&0x7F; + y=y*256;y+=buf[6]; + y=y*256;y+=buf[5]; + y=y*256;y+=buf[4]; + y=y*256;y+=buf[3]; + y=y*256;y+=buf[2]; + y=y*256;y+=buf[1]; + y=y*256;y+=buf[0]; + + if(buf[7]&0x80) y=-y; + + return y; +} + +bool +bspatch_valid_header(u_char* patch, ssize_t patchsz) +{ + ssize_t newsize, ctrllen, datalen; + + if (patchsz < 32) return false; + + /* Make sure magic and header fields are valid */ + if(memcmp(patch, BSDIFF_CONFIG_MAGIC, 8) != 0) return false; + + ctrllen=offtin(patch+8); + datalen=offtin(patch+16); + newsize=offtin(patch+24); + if((ctrllen<0) || (datalen<0) || (newsize<0)) + return false; + + return true; +} + +ssize_t +bspatch_newsize(u_char* patch, ssize_t patchsz) +{ + if (!bspatch_valid_header(patch, patchsz)) return -1; + return offtin(patch+24); +} + +int +bspatch(u_char* oldp, ssize_t oldsz, + u_char* patch, ssize_t patchsz, + u_char* newp, ssize_t newsz) +{ + ssize_t newsize,ctrllen,datalen; + u_char *ctrlblock, *diffblock, *extrablock; + off_t oldpos,newpos; + off_t ctrl[3]; + off_t i; + + /* Sanity checks */ + if (oldp == NULL || patch == NULL || newp == NULL) return -1; + if (oldsz < 0 || patchsz < 0 || newsz < 0) return -1; + if (!bspatch_valid_header(patch, patchsz)) return -2; + + /* Read lengths from patch header */ + ctrllen=offtin(patch+8); + datalen=offtin(patch+16); + newsize=offtin(patch+24); + if (newsize > newsz) return -1; + + /* Get pointers into the header metadata */ + ctrlblock = patch+32; + diffblock = patch+32+ctrllen; + extrablock = patch+32+ctrllen+datalen; + + /* Apply patch */ + oldpos=0;newpos=0; + while(newpos<newsize) { + /* Read control block */ + ctrl[0] = offtin(ctrlblock); + ctrl[1] = offtin(ctrlblock+8); + ctrl[2] = offtin(ctrlblock+16); + ctrlblock += 24; + + /* Sanity check */ + if(newpos+ctrl[0]>newsize) + return -3; /* Corrupt patch */ + + /* Read diff string */ + memcpy(newp + newpos, diffblock, ctrl[0]); + diffblock += ctrl[0]; + + /* Add old data to diff string */ + for(i=0;i<ctrl[0];i++) + if((oldpos+i>=0) && (oldpos+i<oldsz)) + newp[newpos+i]+=oldp[oldpos+i]; + + /* Adjust pointers */ + newpos+=ctrl[0]; + oldpos+=ctrl[0]; + + /* Sanity check */ + if(newpos+ctrl[1]>newsize) + return -3; /* Corrupt patch */ + + /* Read extra string */ + memcpy(newp + newpos, extrablock, ctrl[1]); + extrablock += ctrl[1]; + + /* Adjust pointers */ + newpos+=ctrl[1]; + oldpos+=ctrl[2]; + }; + + return 0; +} diff --git a/czi-format/czi-parser/utilities/bsdiff/bspatch.h b/czi-format/czi-parser/utilities/bsdiff/bspatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d55919e83a8ccfe99bccd8c9e66b4228bf0cd44c --- /dev/null +++ b/czi-format/czi-parser/utilities/bsdiff/bspatch.h @@ -0,0 +1,78 @@ +/*- + * Copyright 2012-2013 Austin Seipp + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MINIBSPATCH_H_ +#define _MINIBSPATCH_H_ + +#include "minibsdiff-config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------------------------------------------------------------- */ +/* -- Public API ----------------------------------------------------------- */ + +/*- + * Determine if the buffer pointed to by `patch` of a given `size` is + * a valid patch. + */ +bool bspatch_valid_header(u_char* patch, ssize_t patchsz); + +/*- + * Determine the size of the new file that will result from applying + * a patch. Returns -1 if the patch header is invalid, otherwise returns + * the size of the new file. + */ +ssize_t bspatch_newsize(u_char* patch, ssize_t patchsize); + +/*- + * Apply a patch stored in 'patch' to 'oldp', result in 'newp', and store the + * result in 'newp'. + * + * The input pointers must not be NULL. + * + * The size of 'newp', represented by 'newsz', must be at least + * 'bspatch_newsize(oldsz,patchsz)' bytes in length. + * + * Returns -1 if memory can't be allocated, or the input pointers are NULL. + * Returns -2 if the patch header is invalid. Returns -3 if the patch itself is + * corrupt. + * Otherwise, returns 0. + * + * This function requires n+m+O(1) bytes of memory, where n is the size of the + * old file and m is the size of the new file. It does no allocations. + * It runs in O(n+m) time. + */ +int bspatch(u_char* oldp, ssize_t oldsz, + u_char* patch, ssize_t patchsz, + u_char* newp, ssize_t newsz); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _MINIBSPATCH_H_ */ diff --git a/czi-format/czi-parser/utilities/bsdiff/minibsdiff-config.h b/czi-format/czi-parser/utilities/bsdiff/minibsdiff-config.h new file mode 100644 index 0000000000000000000000000000000000000000..3bf5607a4621b53ed5a50b19e2eb24ba689b72e9 --- /dev/null +++ b/czi-format/czi-parser/utilities/bsdiff/minibsdiff-config.h @@ -0,0 +1,64 @@ +/*- + * Copyright 2012-2013 Austin Seipp + * Copyright 2003-2005 Colin Percival + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _MINIBSDIFF_CONFIG_H_ +#define _MINIBSDIFF_CONFIG_H_ + +#ifdef _MSC_VER +#include <Windows.h> +#include "stdint-msvc.h" +#include "stdbool-msvc.h" +#else +#include <stdint.h> +#include <stdbool.h> +#endif /* _MSC_VER */ + +/* ------------------------------------------------------------------------- */ +/* -- Patch file magic number ---------------------------------------------- */ + +/** MUST be 8 bytes long! */ +/** TODO FIXME: we should static_assert this */ +#define BSDIFF_CONFIG_MAGIC "MBSDIF43" + +/* ------------------------------------------------------------------------- */ +/* -- Slop size for temporary patch buffer --------------------------------- */ + +#define BSDIFF_PATCH_SLOP_SIZE 102400 + +/* ------------------------------------------------------------------------- */ +/* -- Type definitions ----------------------------------------------------- */ + +/* Duplicated to keep code small. Keep in sync with bspatch.h! */ +#ifndef _MINIBSDIFF_U_CHAR_T_ +#define _MINIBSDIFF_U_CHAR_T_ +typedef uint8_t u_char; +#endif /* _MINIBSDIFF_U_CHAR_T_ */ + +#ifdef _MSC_VER +typedef SSIZE_T ssize_t; +#endif /* _MSC_VER */ + +#endif /* _MINIBSDIFF_CONFIG_H_ */