From 0a96edb3b294f79578d37f63fc2c2cb4f5c24e43 Mon Sep 17 00:00:00 2001
From: theazgra <theazgra@gmail.com>
Date: Thu, 24 Jan 2019 14:12:45 +0100
Subject: [PATCH] Basics in CZI parser.

---
 LICENSE                                       | 23 ++++++
 .../inspector/czi-parser/CMakeLists.txt       | 11 ++-
 .../inspector/czi-parser/binary_stream.cpp    | 71 +++++++++++++++++++
 .../inspector/czi-parser/binary_stream.h      | 25 +++++++
 .../inspector/czi-parser/bit_converter.cpp    | 10 +++
 .../inspector/czi-parser/custom_types.h       |  2 +
 czi-format/inspector/czi-parser/czi_file.cpp  |  9 +++
 czi-format/inspector/czi-parser/czi_file.h    | 14 ++++
 .../inspector/czi-parser/czi_parser.cpp       | 45 ++++++++++++
 czi-format/inspector/czi-parser/czi_parser.h  |  6 ++
 .../czi-parser/czi_segments/FileHeader.h      | 34 +++++++++
 .../czi-parser/czi_segments/SegmentHeader.h   |  1 +
 .../czi-parser/czi_segments/Version.h         |  5 ++
 czi-format/inspector/czi-parser/main.cpp      | 34 +--------
 14 files changed, 258 insertions(+), 32 deletions(-)
 create mode 100644 LICENSE
 create mode 100644 czi-format/inspector/czi-parser/binary_stream.cpp
 create mode 100644 czi-format/inspector/czi-parser/binary_stream.h
 create mode 100644 czi-format/inspector/czi-parser/czi_file.cpp
 create mode 100644 czi-format/inspector/czi-parser/czi_file.h
 create mode 100644 czi-format/inspector/czi-parser/czi_parser.cpp
 create mode 100644 czi-format/inspector/czi-parser/czi_parser.h
 create mode 100644 czi-format/inspector/czi-parser/czi_segments/FileHeader.h
 create mode 100644 czi-format/inspector/czi-parser/czi_segments/Version.h

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..36b7cd9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/czi-format/inspector/czi-parser/CMakeLists.txt b/czi-format/inspector/czi-parser/CMakeLists.txt
index 2fd7033..08c9dfb 100644
--- a/czi-format/inspector/czi-parser/CMakeLists.txt
+++ b/czi-format/inspector/czi-parser/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0.0)
+cmake_minimum_required(VERSION 3.8.0)
 project(czi-parser VERSION 0.1.0)
 
 include(CTest)
@@ -6,6 +6,15 @@ enable_testing()
 
 add_executable(czi-parser main.cpp)
 
+# set(Boost_USE_STATIC_LIBS OFF) 
+set(Boost_USE_MULTITHREADED ON)  
+set(Boost_USE_STATIC_RUNTIME OFF) 
+find_package(Boost REQUIRED COMPONENTS locale)
+if(Boost_FOUND)
+    include_directories(${Boost_INCLUDE_DIRS}) 
+    target_link_libraries(czi-parser ${Boost_LIBRARIES})
+endif()
+
 set(CPACK_PROJECT_NAME ${PROJECT_NAME})
 set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
 include(CPack)
diff --git a/czi-format/inspector/czi-parser/binary_stream.cpp b/czi-format/inspector/czi-parser/binary_stream.cpp
new file mode 100644
index 0000000..dd00e30
--- /dev/null
+++ b/czi-format/inspector/czi-parser/binary_stream.cpp
@@ -0,0 +1,71 @@
+#include "binary_stream.h"
+#include <iterator>
+
+BinaryStream::BinaryStream(const std::string &file)
+{
+    this->fileStream = std::ifstream(file, std::ios::binary);
+    this->fileStream.unsetf(std::ios::skipws);
+
+    assert(this->fileStream.is_open());
+
+    this->currentPosition = 0;
+    this->fileStream.seekg(std::ios::end);
+    this->fileSize = fileStream.tellg();
+    this->fileStream.seekg(std::ios::beg);
+}
+
+BinaryStream::~BinaryStream()
+{
+    this->fileStream.close();
+}
+
+size_t BinaryStream::get_size() const
+{
+    return this->fileSize;
+}
+
+void BinaryStream::move_to(const long position)
+{
+    this->fileStream.seekg(position);
+    this->currentPosition = position;
+}
+
+void BinaryStream::move_to_beginning()
+{
+    this->fileStream.seekg(std::ios::beg);
+    this->currentPosition = 0;
+}
+
+void BinaryStream::move_to_end()
+{
+    this->fileStream.seekg(std::ios::end);
+    this->currentPosition = this->fileSize;
+}
+
+bool BinaryStream::can_read()
+{
+    return (this->currentPosition < this->fileSize);
+}
+
+std::vector<byte> BinaryStream::consume_bytes(const long byteCount)
+{
+    //TODO: Maybe this should be replace with faster reading.
+
+    // Check if iterator is set at current stream position, or
+    // we have to move iterator manually to match this->currentPosition
+    auto readIterator = std::istream_iterator<byte>(fileStream);
+
+    std::vector<byte> result;
+    result.resize(byteCount);
+    for (size_t i = 0; i < byteCount; i++)
+    {
+        result[i] = *readIterator++;
+    }
+    this->currentPosition += byteCount;
+
+    return result;
+}
+
+std::vector<byte> BinaryStream::consume_bytes_at(const long position, const long byteCount)
+{
+}
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/binary_stream.h b/czi-format/inspector/czi-parser/binary_stream.h
new file mode 100644
index 0000000..41205fb
--- /dev/null
+++ b/czi-format/inspector/czi-parser/binary_stream.h
@@ -0,0 +1,25 @@
+#pragma once
+#include <fstream>
+#include "custom_types.h"
+
+class BinaryStream
+{
+  private:
+    std::ifstream fileStream;
+    size_t fileSize;
+    long currentPosition;
+
+  public:
+    BinaryStream(const std::string &file);
+    ~BinaryStream();
+
+    size_t get_size() const;
+    void move_to(const long position);
+    void move_to_beginning();
+    void move_to_end();
+    bool can_read();
+    std::vector<byte> consume_bytes(const long byteCount);
+    std::vector<byte> consume_bytes_at(const long position, const long byteCount);
+};
+
+#include "binary_stream.cpp"
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/bit_converter.cpp b/czi-format/inspector/czi-parser/bit_converter.cpp
index 90ce777..30c36de 100644
--- a/czi-format/inspector/czi-parser/bit_converter.cpp
+++ b/czi-format/inspector/czi-parser/bit_converter.cpp
@@ -8,6 +8,8 @@
 #include <string>
 #include "custom_types.h"
 
+#include <boost/locale.hpp>
+
 short bytes_to_short(const std::vector<byte> &bytes, const uint fromIndex = 0)
 {
     assert(bytes.size() >= 2);
@@ -116,4 +118,12 @@ std::string bytes_to_raw_string(const std::vector<byte> &bytes, const uint fromI
     std::vector<byte> stringBytes(bytes.begin() + fromIndex, bytes.begin() + fromIndex + byteCount);
     std::string result(reinterpret_cast<const char *>(stringBytes.data()));
     return result;
+}
+
+std::string utf8bytes_to_string(const std::vector<byte> &bytes, const uint fromIndex, const uint byteCount)
+{
+    auto fromIt = bytes.begin() + fromIndex;
+    std::vector<byte> stringBytes(fromIt, fromIt + byteCount);
+    std::string result = boost::locale::conv::from_utf<char>((char *)stringBytes.data(), "UTF-8");
+    return result;
 }
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/custom_types.h b/czi-format/inspector/czi-parser/custom_types.h
index f96a3fc..ae3ccac 100644
--- a/czi-format/inspector/czi-parser/custom_types.h
+++ b/czi-format/inspector/czi-parser/custom_types.h
@@ -1,4 +1,6 @@
 #pragma once
+#include <vector>
+#include <assert.h>
 
 typedef unsigned char byte;
 typedef unsigned short ushort;
diff --git a/czi-format/inspector/czi-parser/czi_file.cpp b/czi-format/inspector/czi-parser/czi_file.cpp
new file mode 100644
index 0000000..219e61b
--- /dev/null
+++ b/czi-format/inspector/czi-parser/czi_file.cpp
@@ -0,0 +1,9 @@
+#include "czi_file.h"
+
+CziFile::CziFile()
+{
+}
+
+CziFile::~CziFile()
+{
+}
diff --git a/czi-format/inspector/czi-parser/czi_file.h b/czi-format/inspector/czi-parser/czi_file.h
new file mode 100644
index 0000000..4cc71ae
--- /dev/null
+++ b/czi-format/inspector/czi-parser/czi_file.h
@@ -0,0 +1,14 @@
+#pragma once
+#include "czi_segments/FileHeader.h"
+
+class CziFile
+{
+  private:
+    FileHeader _fileHeader;
+
+  public:
+    CziFile();
+    ~CziFile();
+};
+
+#include "czi_file.cpp"
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/czi_parser.cpp b/czi-format/inspector/czi-parser/czi_parser.cpp
new file mode 100644
index 0000000..91d55e5
--- /dev/null
+++ b/czi-format/inspector/czi-parser/czi_parser.cpp
@@ -0,0 +1,45 @@
+#include "czi_parser.h"
+#include <fstream>
+#include "bit_converter.cpp"
+
+CziFile parse_czi_file(std::string file)
+{
+    /*
+    std::ifstream fileStream = std::ifstream("data/CZT-Stack-Anno.czi", std::ios::binary);
+    assert(fileStream.is_open());
+    // Don't read newlines in binary mode??
+    fileStream.unsetf(std::ios::skipws);
+
+    const int readCount = 100;
+    std::vector<byte> fileHeaderBytes;
+    fileHeaderBytes.resize(readCount);
+
+    auto fileIterator = std::istream_iterator<byte>(fileStream);
+    for (size_t i = 0; i < readCount; i++)
+    {
+        fileHeaderBytes[i] = *fileIterator++;
+    }
+    //fileStream.read(reinterpret_cast<char *>(&fileHeaderBytes), 32);
+
+    printf("Read %i bytes.\n", ((int)fileHeaderBytes.size()));
+
+    SegmentHeader fileSegmentHeader = {};
+
+    std::vector<byte> sidBytes(fileHeaderBytes.begin(), fileHeaderBytes.begin() + 16);
+
+    fileSegmentHeader.sId = utf8bytes_to_string(fileHeaderBytes, 0, 16); // bytes_to_raw_string(fileHeaderBytes, 0, 16);
+    fileSegmentHeader.allocatedSize = bytes_to_long(fileHeaderBytes, 16);
+    fileSegmentHeader.usedSize = bytes_to_long(fileHeaderBytes, 16 + 8);
+
+    //std::vector<byte> x_guid(fileHeaderBytes.begin() + SegmentHeaderSize + 16, fileHeaderBytes.begin() + SegmentHeaderSize + 16 + 16);
+
+    //std::string utf8text = "\x48\x65\x6C\x6C\x6F\x20\x57\x6F\x72\x6C\x64";
+    //std::string decoded = boost::locale::conv::from_utf<char>((char *)x_guid.data(), "utf-8");
+
+    std::string guid = utf8bytes_to_string(fileHeaderBytes, SegmentHeaderSize + 16, 16);
+    std::string guid2 = utf8bytes_to_string(fileHeaderBytes, SegmentHeaderSize + 32, 16);
+
+    assert(guid == guid2);
+    */
+    return CziFile();
+}
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/czi_parser.h b/czi-format/inspector/czi-parser/czi_parser.h
new file mode 100644
index 0000000..6c76ff5
--- /dev/null
+++ b/czi-format/inspector/czi-parser/czi_parser.h
@@ -0,0 +1,6 @@
+#pragma once
+#include "czi_file.h"
+
+CziFile parse_czi_file(std::string file);
+
+#include "czi_parser.cpp"
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/czi_segments/FileHeader.h b/czi-format/inspector/czi-parser/czi_segments/FileHeader.h
new file mode 100644
index 0000000..84f380e
--- /dev/null
+++ b/czi-format/inspector/czi-parser/czi_segments/FileHeader.h
@@ -0,0 +1,34 @@
+#pragma once
+#include "Version.h"
+#include "SegmentHeader.h"
+#include <vector>
+
+struct FileHeader
+{
+    // Standart segment header.
+    SegmentHeader header;
+
+    // Version of the file.
+    Version fileVersion;
+
+    // GUID of the master file.
+    std::vector<byte> masterFileGuid;
+
+    // GUID of this file.
+    std::vector<byte> fileGuid;
+
+    // Part number in multi-file scenario. 0 if single file.
+    int filePart;
+
+    // File position of the SubBlockDirectory segment
+    long subBlockDirectoryPosition;
+
+    // File position of the Metadata segment.
+    long metadataPosition;
+
+    // This flag indicates a currently inconsistent situation.
+    long attachmentDirectoryPosition;
+
+    // File position of the AttachmentDirectory segment.
+    bool updatePending;
+};
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/czi_segments/SegmentHeader.h b/czi-format/inspector/czi-parser/czi_segments/SegmentHeader.h
index 4e17d54..51d9c24 100644
--- a/czi-format/inspector/czi-parser/czi_segments/SegmentHeader.h
+++ b/czi-format/inspector/czi-parser/czi_segments/SegmentHeader.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <string>
+#include "../custom_types.h"
 
 const int SegmentHeaderSize = 32;
 
diff --git a/czi-format/inspector/czi-parser/czi_segments/Version.h b/czi-format/inspector/czi-parser/czi_segments/Version.h
new file mode 100644
index 0000000..21f4f03
--- /dev/null
+++ b/czi-format/inspector/czi-parser/czi_segments/Version.h
@@ -0,0 +1,5 @@
+struct Version
+{
+    int major;
+    int minor;
+};
\ No newline at end of file
diff --git a/czi-format/inspector/czi-parser/main.cpp b/czi-format/inspector/czi-parser/main.cpp
index 10d8814..da72d54 100644
--- a/czi-format/inspector/czi-parser/main.cpp
+++ b/czi-format/inspector/czi-parser/main.cpp
@@ -1,35 +1,7 @@
-#include <fstream>
-#include <iterator>
-#include "bit_converter.cpp"
-#include "czi_segments/SegmentHeader.h"
-
+#include "czi_parser.h"
+#include "binary_stream.h"
 int main(int argc, char **argv)
 {
-    std::ifstream fileStream = std::ifstream("data/CZT-Stack-Anno.czi", std::ios::binary);
-    assert(fileStream.is_open());
-    // Don't read newlines in binary mode??
-    fileStream.unsetf(std::ios::skipws);
-
-    const int readCount = 100;
-    std::vector<byte> fileHeaderBytes;
-    fileHeaderBytes.resize(readCount);
-
-    auto fileIterator = std::istream_iterator<byte>(fileStream);
-    for (size_t i = 0; i < readCount; i++)
-    {
-        fileHeaderBytes[i] = *fileIterator++;
-    }
-    //fileStream.read(reinterpret_cast<char *>(&fileHeaderBytes), 32);
-
-    printf("Read %i bytes.\n", ((int)fileHeaderBytes.size()));
-
-    SegmentHeader fileSegmentHeader = {};
-    fileSegmentHeader.sId = bytes_to_raw_string(fileHeaderBytes, 0, 16);
-    fileSegmentHeader.allocatedSize = bytes_to_long(fileHeaderBytes, 16);
-    fileSegmentHeader.usedSize = bytes_to_long(fileHeaderBytes, 16 + 8);
-
-    std::string guid = bytes_to_raw_string(fileHeaderBytes, SegmentHeaderSize + 16, 16);
-    std::string guid2 = bytes_to_raw_string(fileHeaderBytes, SegmentHeaderSize + 32, 16);
-
+    CziFile result = parse_czi_file("dede");
     return 0;
 }
-- 
GitLab