Skip to content
Snippets Groups Projects
czi_parser.cpp 14 KiB
Newer Older
  • Learn to ignore specific revisions
  • theazgra's avatar
    theazgra committed
    #include "czi_parser.h"
    
    CziParser::CziParser(bool parseMetadata)
    
    theazgra's avatar
    theazgra committed
    {
    
        this->parseMetadata = parseMetadata;
    
    }
    CziParser::~CziParser()
    {
    }
    
    CziFile CziParser::parse_czi_file(const std::string &file)
    {
    
    theazgra's avatar
    theazgra committed
        //TODO: Support multi-file scenario.
    
        CziFile parsedFile;
    
        BinaryFileStream cziStream(file);
    
        parsedFile.fileName = file;
    
        parsedFile.header = parse_file_header(cziStream);
    
        parsedFile.metadata = parse_metadata(cziStream, parsedFile.header.metadataPosition);
    
    theazgra's avatar
    theazgra committed
    
        // SubBlock directory and SubBlocks
    
        parsedFile.subBlockDirectory = parse_subblock_directory(cziStream, parsedFile.header.subBlockDirectoryPosition);
    
    theazgra's avatar
    theazgra committed
    
    
    theazgra's avatar
    theazgra committed
        for (size_t subBlockId = 0; subBlockId < parsedFile.subBlockDirectory.entryCount; subBlockId++)
    
    theazgra's avatar
    theazgra committed
            int filePart = parsedFile.subBlockDirectory.entries[subBlockId].filePart;
    
            always_assert(filePart == 0 && "We are currently supporting only single-file scenario.");
    
    theazgra's avatar
    theazgra committed
            long subBlockPosition = parsedFile.subBlockDirectory.entries[subBlockId].filePosition;
            int entrySize = parsedFile.subBlockDirectory.entries[subBlockId].entrySize;
    
    theazgra's avatar
    theazgra committed
            parsedFile.subBlockDirectory.entries[subBlockId].subBlock = parse_subblock(cziStream, subBlockPosition, subBlockId, entrySize);
        }
    
        // AttachmentDirectory and Attachments
        parsedFile.attachmentDirectory = parse_attachment_directory(cziStream, parsedFile.header.attachmentDirectoryPosition);
    
    theazgra's avatar
    theazgra committed
        for (size_t attachId = 0; attachId < parsedFile.attachmentDirectory.entryCount; attachId++)
        {
            long attachPos = parsedFile.attachmentDirectory.entries[attachId].filePosition;
            parsedFile.attachmentDirectory.entries[attachId].attachment = parse_attachment_segment(cziStream, attachPos, attachId);
    
        return parsedFile;
    
    SegmentHeader CziParser::parse_segment_header(BinaryFileStream &cziStream)
    
    {
        SegmentHeader result = {};
    
        // 16 B for segment id.
        result.sId = utf8bytes_to_string(cziStream.consume_bytes(16), 0, 16);
        // 8 B for allocated size.
        result.allocatedSize = cziStream.consume_long();
        // 8 B for used size.
        result.usedSize = cziStream.consume_long();
    
    
        result.isDeleted = result.sId == DELETED_SEGMENT;
    
    
    theazgra's avatar
    theazgra committed
    #if VERBOSE_PARSER
        printf("Parsed segment header: %s.\n", result.sId.c_str());
    
        if (result.isDeleted)
            printf("This segment is marked as `deleted`\n");
    
    theazgra's avatar
    theazgra committed
    #endif // VERBOSE_PARSER
    
    
        return result;
    }
    
    
    FileHeaderSegment CziParser::parse_file_header(BinaryFileStream &cziStream)
    
        FileHeaderSegment result;
    
        result.header = parse_segment_header(cziStream);
    
        always_assert(result.header.sId == FileHeaderSegmentSID);
    
    
        result.fileVersion = {};
        result.fileVersion.major = cziStream.consume_int();
        result.fileVersion.minor = cziStream.consume_int();
    
        // 8 next bytes are reserved and not used for anythint ATM.
        cziStream.move_by(8);
    
        result.masterFileGuid = cziStream.consume_bytes(16);
        result.fileGuid = cziStream.consume_bytes(16);
        result.filePart = cziStream.consume_int();
        result.subBlockDirectoryPosition = cziStream.consume_long();
        result.metadataPosition = cziStream.consume_long();
        result.updatePending = cziStream.consume_bool(4);
        result.attachmentDirectoryPosition = cziStream.consume_long();
    
    
    theazgra's avatar
    theazgra committed
    #if VERBOSE_PARSER
        printf("Parsed FileHeader segment\n");
    #endif // VERBOSE_PARSER
    
    
    MetadataSegment CziParser::parse_metadata(BinaryFileStream &cziStream, const long position)
    
        always_assert(position > 0);
    
        cziStream.move_to(position);
    
    
        MetadataSegment result = {};
        result.header = parse_segment_header(cziStream);
    
        always_assert(result.header.sId == MetadataSegmentSID);
    
        result.xmlSize = cziStream.consume_int();
        result.attachmentSize = cziStream.consume_int();
    
        // 248 B are spare, so not used atm?
        // Skipping 248 spared bytes.
        cziStream.move_by(248);
    
    
        if (this->parseMetadata)
        {
            result.xmlString = utf8bytes_to_string(cziStream.consume_bytes(result.xmlSize));
            //printf("%s\n", result.xmlString.c_str());
    
    theazgra's avatar
    theazgra committed
    
    #if VERBOSE_PARSER
    
            printf("Parsed Metadata, containing string of length: %li.\n", result.xmlString.size());
    
    theazgra's avatar
    theazgra committed
    #endif // VERBOSE_PARSER
    
        }
        else
        {
            cziStream.move_by(result.xmlSize);
    #if VERBOSE_PARSER
            printf("Parsed Metadata, skipping reading metadata bytes; parseMetadata=false.\n");
    #endif // VERBOSE_PARSER
        }
    
    SubBlockDirectorySegment CziParser::parse_subblock_directory(BinaryFileStream &cziStream, const long position)
    
        always_assert(position > 0);
    
        cziStream.move_to(position);
    
    
    theazgra's avatar
    theazgra committed
        SubBlockDirectorySegment result = {};
    
        result.header = parse_segment_header(cziStream);
    
        always_assert(result.header.sId == SubBlockDirectorySegmentSID);
    
    
        result.entryCount = cziStream.consume_int();
        // 124 B are reserved, skipping.
        cziStream.move_by(124);
    
        result.entries.reserve(result.entryCount);
        for (size_t entry = 0; entry < result.entryCount; entry++)
        {
            result.entries.push_back(parse_subblock_directory_entry(cziStream));
        }
    
    
        // Find number of channels.
        {
            std::vector<int> channels;
            for (size_t entry = 0; entry < result.entryCount; entry++)
            {
                int c = result.entries[entry].channel;
                if (c > -1 && !vecUtil::contains(channels, c))
                    channels.push_back(c);
            }
            result.channelCount = channels.size();
        }
    
    
    theazgra's avatar
    theazgra committed
    #if VERBOSE_PARSER
        printf("Parsed SubBlockDirectory with %i entries.\n", result.entryCount);
    #endif // VERBOSE_PARSER
    
    DirectoryEntryDV CziParser::parse_subblock_directory_entry(BinaryFileStream &cziStream)
    
    {
        DirectoryEntryDV result = {};
    
        result.schemaType = cziStream.consume_bytes(2);
    
    
        always_assert(result.schemaType.size() == 2 && result.schemaType[0] == 'D' && result.schemaType[1] == 'V');
    
    
        result.pixelType = to_pixel_type(cziStream.consume_int());
        result.filePosition = cziStream.consume_long();
        result.filePart = cziStream.consume_int();
        result.compression = to_compression_type(cziStream.consume_int());
        result.pyramidType = to_pyramid_type(cziStream.consume_byte());
    
        // 5 next bytes are spare, reserved, skipping them.
        cziStream.move_by(5);
    
        result.dimensionCount = cziStream.consume_int();
    
        always_assert(result.dimensionCount > 0);
    
        result.dimensions.reserve(result.dimensionCount);
    
        for (size_t dim = 0; dim < result.dimensionCount; dim++)
        {
    
            auto dimEntry = parse_dimension_entry(cziStream);
            result.dimensions.push_back(dimEntry);
    
            // Save the most common dimensions.
    
    theazgra's avatar
    theazgra committed
            if (dimEntry.dimension == Dimension_X)
    
                result.width = dimEntry.size;
    
    theazgra's avatar
    theazgra committed
            if (dimEntry.dimension == Dimension_Y)
    
                result.height = dimEntry.size;
    
            if (dimEntry.dimension == Dimension_C)
                result.channel = dimEntry.start;
    
    theazgra's avatar
    theazgra committed
    #if VERBOSE_PARSER
        printf("Parsed SubBlockDirectoryEntry with %i dimensions.\n", result.dimensionCount);
    #endif // VERBOSE_PARSER
    
    
        result.entrySize = 32 + (result.dimensionCount * DimensionEntryDV1Size);
    
    AttachmentDirectorySegment CziParser::parse_attachment_directory(BinaryFileStream &cziStream, const long position)
    
        always_assert(position > 0);
    
    theazgra's avatar
    theazgra committed
        cziStream.move_to(position);
    
        AttachmentDirectorySegment result = {};
        result.header = parse_segment_header(cziStream);
    
        always_assert(result.header.sId == AttachmentDirectorySegmentSID);
    
    theazgra's avatar
    theazgra committed
    
        result.entryCount = cziStream.consume_int();
    
        // Next 252 B are reserved.
        cziStream.move_by(252);
    
        result.entries.reserve(result.entryCount);
        for (size_t entry = 0; entry < result.entryCount; entry++)
        {
            result.entries.push_back(parse_attachment_entry(cziStream));
        }
    
    #if VERBOSE_PARSER
        printf("Parsed AttachmentDirectory with %i entries.\n", result.entryCount);
    #endif // VERBOSE_PARSER
    
        return result;
    }
    
    
    AttachmentEntryA1 CziParser::parse_attachment_entry(BinaryFileStream &cziStream)
    
    theazgra's avatar
    theazgra committed
    {
        AttachmentEntryA1 result = {};
        result.schemaType = cziStream.consume_bytes(2);
    
        always_assert(result.schemaType.size() == 2 && result.schemaType[0] == 'A' && result.schemaType[1] == '1');
    
    theazgra's avatar
    theazgra committed
    
        // Next 10 bytes are reserved.
        cziStream.move_by(10);
    
        result.filePosition = cziStream.consume_long();
        result.filePart = cziStream.consume_int();
        result.contentGUID = cziStream.consume_bytes(16);
        result.contentFileTypeBytes = cziStream.consume_bytes(8);
        result.contentFileTypeString = utf8bytes_to_string(result.contentFileTypeBytes);
        result.nameBytes = cziStream.consume_bytes(80);
        result.name = utf8bytes_to_string(result.nameBytes);
    
    #if VERBOSE_PARSER
        printf("Parsed AttachmentEntry: %s - %s.\n", result.contentFileTypeString.c_str(), result.name.c_str());
    #endif // VERBOSE_PARSER
    
    
    DimensionEntryDV1 CziParser::parse_dimension_entry(BinaryFileStream &cziStream)
    
    {
        DimensionEntryDV1 result = {};
    
        result.dimensionBytes = cziStream.consume_bytes(4);
        result.dimension = to_dimension_type(result.dimensionBytes);
        result.start = cziStream.consume_int();
        result.size = cziStream.consume_int();
    
        result.startCoordinate = cziStream.consume_float();
    
        result.storedSize = cziStream.consume_int();
    
        result.isEmpty = false; // This dimension is initialized.
    
    theazgra's avatar
    theazgra committed
    #if VERBOSE_PARSER
        printf("Parsed DimensionEntry: %i.\n", result.dimension);
    #endif // VERBOSE_PARSER
    
    
    SubBlockSegment CziParser::parse_subblock(BinaryFileStream &cziStream, const long position, const int entryIndex, const int entrySize)
    
        always_assert(position > 0);
    
        cziStream.move_to(position);
    
        SubBlockSegment result = {};
        result.header = parse_segment_header(cziStream);
    
        always_assert(result.header.sId == SubBlockSegmentSID);
    
    
        result.metadataSize = cziStream.consume_int();
        result.attachmentSize = cziStream.consume_int();
        result.dataSize = cziStream.consume_long();
    
        result.directoryEntryIndex = entryIndex;
    
        // Metadata, Data, Attachments are offsetted by DirectoryEntrySize + Fill size;
        int fillOffset = entrySize + 16;
        int div = 256 - fillOffset;
        int fillSize = (div > 0) ? div : 0;
        int distance = entrySize + fillSize;
        cziStream.move_by(distance);
    
    
    theazgra's avatar
    theazgra committed
        // Metadata bytes
    
        if (this->parseMetadata)
        {
            auto metadataBytes = cziStream.consume_bytes(result.metadataSize);
            result.metadataString = utf8bytes_to_string(metadataBytes);
            parse_subblock_metadata(metadataBytes);
        }
        else
        {
            cziStream.move_by(result.metadataSize);
    #if VERBOSE_PARSER
            printf("Skipping reading metadata bytes.\n");
    #endif // VERBOSE_PARSER
        }
    
    theazgra's avatar
    theazgra committed
    
        //printf("%s\n", result.metadataString.c_str());
    
        // For now we won't read image into memory, we will just save its location in file.
        result.dataLocation = cziStream.get_position();
    
    
    theazgra's avatar
    theazgra committed
        result.attachmentLocation = result.dataLocation + result.dataSize;
    
    #if VERBOSE_PARSER
        printf("Parsed SubBlock, Metadata size: %i; data size: %li; Attachment size: %i\n", result.metadataSize, result.dataSize, result.attachmentSize);
    #endif // VERBOSE_PARSER
    
        return result;
    }
    
    
    AttachmentSegment CziParser::parse_attachment_segment(BinaryFileStream &cziStream, const long position, const int entryIndex)
    
        always_assert(position > 0);
    
    theazgra's avatar
    theazgra committed
        cziStream.move_to(position);
    
        AttachmentSegment result = {};
        result.directoryEntryIndex = entryIndex;
        result.header = parse_segment_header(cziStream);
    
    
        always_assert(result.header.isDeleted || result.header.sId == AttachmentSegmentSID);
    
        if (result.header.isDeleted)
        {
            return result;
        }
    
    theazgra's avatar
    theazgra committed
    
        result.dataSize = cziStream.consume_int();
        // 12 B reserved next 128 B are for AttachmentEntry which is saved under 'entryIndex' in
        // AttachmentDirectorySegment. After that, there are 112 B reserved.
        const int dataOffset = 12 + 128 + 112;
        result.dataLocation = cziStream.get_position() + dataOffset;
    
    #if VERBOSE_PARSER
        printf("Parsed AttachmentSegment. Attachment data size: %i.\n", result.dataSize);
    #endif // VERBOSE_PARSER
    
    PixelType CziParser::to_pixel_type(const int value)
    {
        PixelType result = static_cast<PixelType>(value);
    
        switch (result)
        {
    
    theazgra's avatar
    theazgra committed
        case PixelType_Gray8:
        case PixelType_Gray16:
        case PixelType_Gray32Float:
        case PixelType_Bgr24:
        case PixelType_Bgr48:
        case PixelType_Bgr96Float:
        case PixelType_Bgra32:
        case PixelType_Gray64ComplexFloat:
        case PixelType_Bgr192ComplexFloat:
        case PixelType_Gray32:
        case PixelType_Gray64:
    
        {
            // Correct pixel types.
            break;
        }
        default:
    
            always_assert("Bad pixel type." && false);
    
            break;
        }
        return result;
    }
    
    PyramidType CziParser::to_pyramid_type(const byte value)
    {
        PyramidType result = static_cast<PyramidType>(value);
        switch (result)
        {
    
    theazgra's avatar
    theazgra committed
        case PyramidType_None:
        case PyramidType_SingleSubBlock:
        case PyramidType_MultiSubBlock:
    
            always_assert("Bad pyramid type." && false);
    
            break;
        }
        }
        return result;
    }
    
    CompressionType CziParser::to_compression_type(const int value)
    {
        CompressionType result = static_cast<CompressionType>(value);
        switch (result)
        {
    
    theazgra's avatar
    theazgra committed
        case CompressionType_Uncompressed:
        case CompressionType_LZW:
        case CompressionType_JpgFile:
        case CompressionType_JpegXrFile:
        case CompressionType_Camera:
        case CompressionType_System:
    
            always_assert("Bad compression type." && false);
    
    Dimension CziParser::to_dimension_type(const ByteArray &bytes)
    
        always_assert(bytes.size() == 4);
    
        Dimension result = static_cast<Dimension>((char)bytes[0]);
        switch (result)
        {
    
    theazgra's avatar
    theazgra committed
        case Dimension_X:
        case Dimension_Y:
        case Dimension_C:
        case Dimension_Z:
        case Dimension_T:
        case Dimension_R:
        case Dimension_S:
        case Dimension_I:
        case Dimension_B:
        case Dimension_M:
        case Dimension_H:
        case Dimension_V:
    
            always_assert("Bad dimension type." && false);
    
        return result;
    
    theazgra's avatar
    theazgra committed
    }