Newer
Older
template <typename T>
void save_histogram(const std::map<T, size_t> &histogram, const char *fileName)
{
std::ofstream oStream = std::ofstream(fileName, std::ios::out);
printf_if(!oStream.is_open(), "Unable to create file %s\n", fileName);
always_assert(oStream.is_open());
for (const std::pair<T, size_t> &pair : histogram)
{
oStream << pair.first << ";" << pair.second << std::endl;
}
oStream.flush();
oStream.close();
}
void write_compression_report(const std::vector<BaseBenchmarkRecord> &results, const std::string &reportFile)
{
std::ofstream csvFile = std::ofstream(reportFile, std::ios::out);
always_assert(csvFile.is_open());
csvFile << std::fixed << std::setprecision(5);
// CSV header.
csvFile << "filename;subblock;pixel;width;height;compression;level;originalSize;compressedSize;compressedZ;compressionRatio;compressionRatioZ;compressionTime;compressionTimeZ" << std::endl;
const char sep = ';';
for (const BaseBenchmarkRecord &record : results)
record.write_to_stream(csvFile, sep);
}
}
void write_diff_report(const std::vector<DiffBenchmarkRecord> &results, const std::string &reportFile)
{
std::ofstream csvFile = std::ofstream(reportFile, std::ios::out | std::ios::app);
always_assert(csvFile.is_open());
csvFile << std::fixed << std::setprecision(5);
// CSV header.
csvFile << "filename;channel;subblock;refSubblock;pixel;width;height;compression;level;diffType;bitsUsed;originalSize;compressedSize;compressedZ;compressionRatio;compressionRatioZ;compressionTime;compressionTimeZ" << std::endl;
const char sep = ';';
for (const DiffBenchmarkRecord &record : results)
{
record.write_to_stream(csvFile, sep);
void compression_thread_work(const ByteArray &data, CompressionMethod method, int compressionLevel, CompressionResult &result, const char *info)
{
auto compResult = test_compression_method(data, method, compressionLevel);
result = compResult;
printf("Completed: %s\n", info);
}
std::vector<BaseBenchmarkRecord> benchmark_continuos_compression_one_level(const ByteArray &data, const ByteArray &zOrderedData, int compressionLevel)
{
// CompressionMethod_GZIP
CompressionResult gzipResult = {};
CompressionResult gzipZResult = {};
// CompressionMethod_LZMA
CompressionResult lzmaResult = {};
CompressionResult lzmaZResult = {};
// CompressionMethod_BZIP2
CompressionResult bzip2Result = {};
CompressionResult bzip2ZResult = {};
std::vector<std::thread> workers;
workers.resize(6);
// We know that lzma is slowest, let's run all three in threads.
workers[0] = std::thread(compression_thread_work, std::ref(data), CompressionMethod_GZIP, compressionLevel, std::ref(gzipResult), "Gzip normal order");
workers[1] = std::thread(compression_thread_work, std::ref(zOrderedData), CompressionMethod_GZIP, compressionLevel, std::ref(gzipZResult), "Gzip Z order");
workers[2] = std::thread(compression_thread_work, std::ref(data), CompressionMethod_LZMA, compressionLevel, std::ref(lzmaResult), "LZMA normal order");
workers[3] = std::thread(compression_thread_work, std::ref(zOrderedData), CompressionMethod_LZMA, compressionLevel, std::ref(lzmaZResult), "LZMA Z order");
workers[4] = std::thread(compression_thread_work, std::ref(data), CompressionMethod_BZIP2, compressionLevel, std::ref(bzip2Result), "Bzip2 normal order");
workers[5] = std::thread(compression_thread_work, std::ref(zOrderedData), CompressionMethod_BZIP2, compressionLevel, std::ref(bzip2ZResult), "Bzip2 Z order");
for (size_t i = 0; i < workers.size(); i++)
{
workers[i].join();
}
printf("All threads completed.\n");
BaseBenchmarkRecord gzipRecord(gzipResult, gzipZResult);
gzipRecord.compressionLevel = compressionLevel;
gzipRecord.compressionMethod = GZIP_NAME;
BaseBenchmarkRecord lzmaRecord(lzmaResult, lzmaZResult);
lzmaRecord.compressionLevel = compressionLevel;
lzmaRecord.compressionMethod = LZMA_NAME;
BaseBenchmarkRecord bzip2Record(bzip2Result, bzip2ZResult);
bzip2Record.compressionLevel = compressionLevel;
bzip2Record.compressionMethod = BZIP2_NAME;
std::vector<BaseBenchmarkRecord> results;
results.resize(3);
results[0] = gzipRecord;
results[1] = lzmaRecord;
results[2] = bzip2Record;
return results;
}
void benchmark_continuos_compression(CziFile &cziFile, const std::string &reportFile, bool verbose, int level)
{
always_assert(cziFile.subBlockDirectory.entries.size() > 0);
const int minCompressionLevel = 1;
const int maxCompressionLevel = 9;
std::string fName = fs_wrapper::get_filename(cziFile.fileName);
auto entry = cziFile.subBlockDirectory.entries[0];
ByteArray data = cziFile.get_continuous_image_data(false);
ByteArray zOrderData = cziFile.get_continuous_image_data(true);
std::vector<BaseBenchmarkRecord> results;
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
if (level != -1)
{
results = benchmark_continuos_compression_one_level(data, zOrderData, level);
for (auto &&result : results)
{
result.fileName = fName.c_str();
result.subblockId = 999;
result.pixelType = cziFile.pixel_type_str(entry.pixelType);
result.width = 0;
result.height = 0;
}
write_compression_report(results, reportFile);
}
else
{
int levelDone = 0;
#pragma omp parallel for
for (int compressionLevel = minCompressionLevel; compressionLevel <= maxCompressionLevel; compressionLevel++)
{
auto levelResults = benchmark_continuos_compression_one_level(data, zOrderData, compressionLevel);
#pragma omp critical
{
results.insert(results.end(), levelResults.begin(), levelResults.end());
printf("\rFinished compression level %i/%i of normal order.", ++levelDone, maxCompressionLevel);
fflush(stdout);
}
}
if (verbose)
printf("\n");
}
for (auto &&result : results)
{
result.fileName = fName.c_str();
result.subblockId = 999;
result.pixelType = cziFile.pixel_type_str(entry.pixelType);
result.width = 0;
result.height = 0;
}
if (verbose)
printf("\nWriting report file...\n");
write_compression_report(results, reportFile);
printf("\nFinished benchmark, results are written in: %s\n", reportFile.c_str());
}
void benchmark_compression(CziFile &cziFile, const std::string &reportFile, bool verbose, int level)
{
std::vector<BaseBenchmarkRecord> benchmarkResults;
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
const int minCompressionLevel = 1;
const int maxCompressionLevel = 9;
std::string fName = fs_wrapper::get_filename(cziFile.fileName);
int sbCount = (int)cziFile.subBlockDirectory.entries.size();
for (size_t subblockId = 0; subblockId < cziFile.subBlockDirectory.entries.size(); subblockId++)
{
DirectoryEntryDV subblock = cziFile.subBlockDirectory.entries[subblockId];
const char *pt = cziFile.pixel_type_str(subblock.pixelType);
ByteArray data = cziFile.get_image_data(subblockId, false);
ByteArray dataInZOrder = cziFile.get_image_data(subblockId, true);
if (verbose)
{
printf("\rProcessing Subblock %i/%i", (int)subblockId + 1, sbCount);
fflush(stdout);
}
int fromCL = (level == -1) ? minCompressionLevel : level;
int toCL = (level == -1) ? maxCompressionLevel : level;
#pragma omp parallel for
for (int compressionLevel = fromCL; compressionLevel <= toCL; compressionLevel++)
{
// CompressionMethod_GZIP
{
CompressionResult gzipResult = test_compression_method(data, CompressionMethod_GZIP, compressionLevel);
CompressionResult gzipZOrderedResult = test_compression_method(dataInZOrder, CompressionMethod_GZIP, compressionLevel);
BaseBenchmarkRecord gzipRecord = BaseBenchmarkRecord(gzipResult, gzipZOrderedResult);
gzipRecord.set_metadata(fName.c_str(), subblockId, pt, subblock.width, subblock.height, "GZIP", compressionLevel);
#pragma omp critical
{
benchmarkResults.push_back(gzipRecord);
}
}
// CompressionMethod_LZMA
{
CompressionResult lzmaResult = test_compression_method(data, CompressionMethod_LZMA, compressionLevel);
CompressionResult lzmaZOrderedResult = test_compression_method(dataInZOrder, CompressionMethod_LZMA, compressionLevel);
BaseBenchmarkRecord lzmaRecord = BaseBenchmarkRecord(lzmaResult, lzmaZOrderedResult);
lzmaRecord.set_metadata(fName.c_str(), subblockId, pt, subblock.width, subblock.height, "LZMA2", compressionLevel);
#pragma omp critical
{
benchmarkResults.push_back(lzmaRecord);
}
}
// CompressionMethod_BZIP2
{
CompressionResult bzip2Result = test_compression_method(data, CompressionMethod_BZIP2, compressionLevel);
CompressionResult bzip2ZOrderedResult = test_compression_method(dataInZOrder, CompressionMethod_BZIP2, compressionLevel);
BaseBenchmarkRecord bzip2Record = BaseBenchmarkRecord(bzip2Result, bzip2ZOrderedResult);
bzip2Record.set_metadata(fName.c_str(), subblockId, pt, subblock.width, subblock.height, "BZIP2", compressionLevel);
#pragma omp critical
{
benchmarkResults.push_back(bzip2Record);
}
}
}
}
if (verbose)
printf("\nWriting report file...\n");
write_compression_report(benchmarkResults, reportFile);
printf("\nFinished benchmark, results are written in: %s\n", reportFile.c_str());
}
void _diff_from_frame_gray16(const ByteArray &pfData, const ByteArray &cfData,
const CompressionMethod cm, const int compLevel, CompressionResult &cr)
ushort USHORT_MAX = std::numeric_limits<ushort>::max();
always_assert(pfData.size() == cfData.size());
std::vector<int> delta;
{
std::vector<ushort> pfUshortValues = bytes_to_ushort_array(pfData);
std::vector<ushort> cfUshortValues = bytes_to_ushort_array(cfData);
always_assert(pfUshortValues.size() == cfUshortValues.size());
delta = vecUtil::diff_vectors<ushort, int>(pfUshortValues, cfUshortValues);
}
auto minMax = vecUtil::find_min_max(delta);
long maxRequiredValue = (minMax.min < 0) ? (abs(minMax.min) + minMax.max) : (minMax.max);
bool canBeMappedToUShort = maxRequiredValue < USHORT_MAX;
always_assert(canBeMappedToUShort && "Value can not be mapped into ushort.");
//ByteArray deltaBytes = int_array_to_bytes(delta);
// Here we map integers into ushort.
ByteArray ushortMappedDeltaBytes;
{
TypeMapper<int, ushort, MappingType_NegativeAfterPositive> typeMapper;
std::vector<ushort> ushortMappedDelta = typeMapper.map(delta);
ushortMappedDeltaBytes = ushort_array_to_bytes(ushortMappedDelta);
}
cr = test_compression_method(ushortMappedDeltaBytes, cm, compLevel);
}
void _diff_from_frame_gray16_va_bit_count(const ByteArray &pfData, const ByteArray &cfData,
const CompressionMethod cm, const int compLevel, CompressionResult &cr, size_t &bitsUsed)
ushort USHORT_MAX = std::numeric_limits<ushort>::max();
always_assert(pfData.size() == cfData.size());
std::vector<int> delta;
{
std::vector<ushort> pfUshortValues = bytes_to_ushort_array(pfData);
std::vector<ushort> cfUshortValues = bytes_to_ushort_array(cfData);
always_assert(pfUshortValues.size() == cfUshortValues.size());
delta = vecUtil::diff_vectors<ushort, int>(pfUshortValues, cfUshortValues);
}
auto minMax = vecUtil::find_min_max(delta);
long maxRequiredValue = (minMax.min < 0) ? (abs(minMax.min) + minMax.max) : (minMax.max);
bool canBeMappedToUShort = maxRequiredValue < USHORT_MAX;
always_assert(canBeMappedToUShort && "Value can not be mapped into ushort.");
// Here we map integers into ushort.
ByteArray diffBytes;
TypeMapper<int, ushort, MappingType_NegativeAfterPositive> typeMapper;
std::vector<ushort> ushortMappedDelta = typeMapper.map(delta);
ushort ushortMappedDeltaMax = vecUtil::find_max(ushortMappedDelta);
bitsUsed = bits_required(ushortMappedDeltaMax);
OutMemoryBitStream outMemoryBitStream(bitsUsed);
size_t requiredBitCount = bitsUsed * ushortMappedDelta.size();
requiredSize = (requiredBitCount + 8 - 1) / 8;
outMemoryBitStream.resize_for_raw_write(requiredSize);
for (size_t i = 0; i < ushortMappedDelta.size(); i++)
{
outMemoryBitStream.write_value_no_alloc(ushortMappedDelta[i]);
}
diffBytes = outMemoryBitStream.get_flushed_buffer();
}
cr = test_compression_method(diffBytes, cm, compLevel);
always_assert(cr.compressedSize < requiredSize);
}
void value_diff_by_prev_frame_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose, int level, CompressionMethod cm, bool variableBitCount)
{
// NOTE: This benchmark works only for 16 bit pixels!
printf("Compression method %s with compression level %i\n", compression_method_str(cm), level);
printf_if(variableBitCount, "Using variable bit count.\n");
std::string fName = fs_wrapper::get_filename(cziFile.fileName);
auto framesByChannels = cziFile.get_subblocks_grouped_by_channels();
uint iter = 0;
uint iterCount = cziFile.subBlockDirectory.entryCount;
std::vector<DiffBenchmarkRecord> benchmarkRecords;
for (const std::pair<uint, std::vector<uint>> &channelGroup : framesByChannels)
{
printf_if(verbose, "Starting channel %u\n", channelGroup.first);
#pragma omp parallel for
for (size_t i = 1; i < channelGroup.second.size(); i++)
{
DiffBenchmarkRecord record = {};
uint prevFrameId = channelGroup.second[i - 1];
uint currFrameId = channelGroup.second[i];
DirectoryEntryDV prevEntry = cziFile.subBlockDirectory.entries[prevFrameId];
DirectoryEntryDV currEntry = cziFile.subBlockDirectory.entries[currFrameId];
always_assert(prevEntry.pixelType == PixelType_Gray16);
always_assert(currEntry.pixelType == PixelType_Gray16);
DimensionEntryDV1 prevDim = prevEntry.get_dimension(Dimension_Z);
DimensionEntryDV1 currDim = currEntry.get_dimension(Dimension_Z);
always_assert(!prevDim.isEmpty && !currDim.isEmpty);
// We should always be going in direction of Z-stack.
always_assert(prevDim.start < currDim.start);
CompressionResult crN = {};
CompressionResult crZ = {};
// pf => previous frame, cf => current frame.
size_t originalSize = 0;
auto pfData = cziFile.get_image_data(prevFrameId, false);
auto cfData = cziFile.get_image_data(currFrameId, false);
if (variableBitCount)
_diff_from_frame_gray16_va_bit_count(pfData, cfData, cm, level, crN, bitsUsed);
else
_diff_from_frame_gray16(pfData, cfData, cm, level, crN);
auto pfDataZ = cziFile.get_image_data(prevFrameId, true);
auto cfDataZ = cziFile.get_image_data(currFrameId, true);
if (variableBitCount)
_diff_from_frame_gray16_va_bit_count(pfDataZ, cfDataZ, cm, level, crZ, bitsUsed);
else
_diff_from_frame_gray16(pfDataZ, cfDataZ, cm, level, crZ);
always_assert(pfDataZ.size() == originalSize);
record.set_metadata(fName.c_str(), currFrameId, prevFrameId,
cziFile.pixel_type_str(currEntry.pixelType), currEntry.width, currEntry.height,
compression_method_str(cm), level, "PrevFrameDiff", bitsUsed, channelGroup.first);
//printf_if((variableBitCount && verbose), "Bits used for encoding: %lu\n", bitsUsed);
record.originalSize = originalSize;
record.compressedSize = crN.compressedSize;
record.zOrderCompressedSize = crZ.compressedSize;
record.compressionRatio = compression_ratio((float)originalSize, (float)crN.compressedSize);
record.zOrderCompressionRatio = compression_ratio((float)originalSize, (float)crZ.compressedSize);
record.compressionTime = 0;
record.zOrderCompressionTime = 0;
#pragma omp critical
{
benchmarkRecords.push_back(record);
printf("\rFinished %u/%u", ++iter, iterCount);
fflush(stdout);
}
}
}
write_diff_report(benchmarkRecords, reportFile);
printf("\rFinished %u/%u\n", iterCount, iterCount);
printf("Report saved in %s\n", reportFile.c_str());
}
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
void value_diff_histogram(CziFile &cziFile, const azgra::SimpleString &folder)
{
// NOTE: This benchmark works only for 16 bit pixels!
printf("Saving histogram of differences into folder %s\n", folder.get_c_string());
auto framesByChannels = cziFile.get_subblocks_grouped_by_channels();
for (const std::pair<uint, std::vector<uint>> &channelGroup : framesByChannels)
{
//#pragma omp parallel for
for (size_t i = 1; i < channelGroup.second.size(); i++)
{
uint prevFrameId = channelGroup.second[i - 1];
uint currFrameId = channelGroup.second[i];
DirectoryEntryDV prevEntry = cziFile.subBlockDirectory.entries[prevFrameId];
DirectoryEntryDV currEntry = cziFile.subBlockDirectory.entries[currFrameId];
always_assert(prevEntry.pixelType == PixelType_Gray16);
always_assert(currEntry.pixelType == PixelType_Gray16);
DimensionEntryDV1 prevDim = prevEntry.get_dimension(Dimension_Z);
DimensionEntryDV1 currDim = currEntry.get_dimension(Dimension_Z);
always_assert(!prevDim.isEmpty && !currDim.isEmpty);
// We should always be going in direction of Z-stack.
always_assert(prevDim.start < currDim.start);
// pf => previous frame, cf => current frame.
{
auto pfData = cziFile.get_image_data(prevFrameId, false);
auto cfData = cziFile.get_image_data(currFrameId, false);
always_assert(pfData.size() == cfData.size());
std::vector<ushort> pfUshortValues = bytes_to_ushort_array(pfData);
std::vector<ushort> cfUshortValues = bytes_to_ushort_array(cfData);
always_assert(pfUshortValues.size() == cfUshortValues.size());
std::vector<int> delta = vecUtil::diff_vectors<ushort, int>(pfUshortValues, cfUshortValues);
auto deltaHistogram = azgra::vecUtil::create_histogram(delta);
SimpleString histoFName = folder + "/";
histoFName += std::to_string(channelGroup.first).c_str();
histoFName += "_";
histoFName += std::to_string(prevFrameId).c_str();
histoFName += "_";
histoFName += std::to_string(currFrameId).c_str();
histoFName += ".histo";
save_histogram(deltaHistogram, histoFName);
}
}
}
printf("Finished\n");
}
void bsdiff_benchmark(CziFile &cziFile, const std::string &reportFile, bool verbose, CompressionMethod cm, int level, bool firstFrameDiff)
const char *diffBy = firstFrameDiff ? "FirstFrameDiff_BSDIFF" : "PrevFrameDiff_BSDIFF";
std::string fName = fs_wrapper::get_filename(cziFile.fileName);
printf("Difference from %s by bsdiff unix tool. Compressing patch with %s, level %i\n",
diffBy,
compression_method_str(cm),
level);
auto framesByChannels = cziFile.get_subblocks_grouped_by_channels();
uint iter = 0;
uint iterCount = cziFile.subBlockDirectory.entryCount;
std::vector<DiffBenchmarkRecord> benchmarkRecords;
for (const std::pair<uint, std::vector<uint>> &channelGroup : framesByChannels)
{
printf_if(verbose, "Starting channel %u\n", channelGroup.first);
for (size_t i = 1; i < channelGroup.second.size(); i++)
{
DiffBenchmarkRecord record = {};
uint prevFrameId = firstFrameDiff ? (channelGroup.second.front()) : (channelGroup.second[i - 1]);
uint currFrameId = channelGroup.second[i];
DirectoryEntryDV prevEntry = cziFile.subBlockDirectory.entries[prevFrameId];
DirectoryEntryDV currEntry = cziFile.subBlockDirectory.entries[currFrameId];
always_assert(prevEntry.pixelType == PixelType_Gray16);
always_assert(currEntry.pixelType == PixelType_Gray16);
record.set_metadata(fName.c_str(), currFrameId, prevFrameId,
cziFile.pixel_type_str(currEntry.pixelType), currEntry.width, currEntry.height,
compression_method_str(cm), level, diffBy, 16, channelGroup.first);
DimensionEntryDV1 prevDim = prevEntry.get_dimension(Dimension_Z);
DimensionEntryDV1 currDim = currEntry.get_dimension(Dimension_Z);
always_assert(!prevDim.isEmpty && !currDim.isEmpty);
// We should always be going in direction of Z-stack.
always_assert(prevDim.start < currDim.start);
// pf => previous frame, cf => current frame.
CompressionResult crN;
CompressionResult crZ;
Stopwatch stopwatch;
size_t originalSize = 0;
{
auto pfData = cziFile.get_image_data(prevFrameId, false);
auto cfData = cziFile.get_image_data(currFrameId, false);
originalSize = pfData.size();
ByteArray patchBuffer;
int bsdiffResultN = bsdiff_create_patch(pfData, cfData, patchBuffer);
always_assert(bsdiffResultN == BSDIFF_API_SUCCESS);
crN = test_compression_method(patchBuffer, cm, level);
stopwatch.stop();
crN.compressionTimeMS = stopwatch.elapsed_milliseconds();
}
// Z order
{
auto pfDataZ = cziFile.get_image_data(prevFrameId, true);
auto cfDataZ = cziFile.get_image_data(currFrameId, true);
always_assert(pfDataZ.size() == originalSize);
stopwatch.start();
ByteArray patchBufferZ;
int bsdiffResultZ = bsdiff_create_patch(pfDataZ, cfDataZ, patchBufferZ);
always_assert(bsdiffResultZ == BSDIFF_API_SUCCESS);
crZ = test_compression_method(patchBufferZ, cm, level);
stopwatch.stop();
crZ.compressionTimeMS = stopwatch.elapsed_milliseconds();
}
record.originalSize = originalSize;
record.compressedSize = crN.compressedSize;
record.zOrderCompressedSize = crZ.compressedSize;
record.compressionRatio = compression_ratio((float)originalSize, (float)crN.compressedSize);
record.zOrderCompressionRatio = compression_ratio((float)originalSize, (float)crZ.compressedSize);
record.compressionTime = crN.compressionTimeMS;
record.zOrderCompressionTime = crZ.compressionTimeMS;
printf_if(verbose, "Patch sizes: %8lu %8lu\n", crN.compressedSize, crZ.compressedSize);
{
benchmarkRecords.push_back(record);
if (verbose)
printf("Finished %u/%u\n", ++iter, iterCount);
else
printf("\rFinished %u/%u", ++iter, iterCount);
write_diff_report(benchmarkRecords, reportFile);
printf("\rFinished %u/%u\n", iterCount, iterCount);
printf("Report saved in %s\n", reportFile.c_str());