diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt
index 072d6c57bb621578960134f8795ce475d12d7f02..49bdb75ddb8b1d1b56d32bf3ed3f53b33e5c5e54 100644
--- a/client/CMakeLists.txt
+++ b/client/CMakeLists.txt
@@ -232,7 +232,7 @@ endif()
 
 # Subdirectories
 if (WITH_CLIENT_CUDA)
-	#add_subdirectory(cycles_cuda)
+	add_subdirectory(cycles_cuda)
 endif()
 
 if (WITH_CLIENT_RENDERENGINE)
@@ -245,6 +245,7 @@ if (WITH_CLIENT_CESNET)
     add_subdirectory(ultragrid)
 endif()
 
+add_subdirectory(blenlib)
 add_subdirectory(cycles)
 add_subdirectory(blender_client)
 
diff --git a/client/blenlib/CMakeLists.txt b/client/blenlib/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..020c7b27b3c33544179b9d56144422653e7432ff
--- /dev/null
+++ b/client/blenlib/CMakeLists.txt
@@ -0,0 +1,30 @@
+set(INC
+	.
+	../../extern/Eigen3
+    ../../source/blender/blenlib
+	../../source/blender/makesdna
+)
+
+set(SRC
+         math_matrix.cpp
+         math_rotation.cpp
+         math_vector.cpp
+         math_vector_inline.cpp
+		 math_base.cpp
+		 math_base_inline.cpp
+)
+
+set(SRC_HEADERS
+)
+
+if(WITH_OPENMP)
+	add_definitions(-DWITH_OPENMP)
+endif()
+
+add_definitions(-D__SSE2__)
+
+include_directories(${INC})
+
+add_library(blenlib STATIC ${SRC} ${SRC_HEADERS})
+
+install (TARGETS blenlib DESTINATION lib)
diff --git a/client/blenlib/math_base.cpp b/client/blenlib/math_base.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..30267661ae7f7cf94f46bed651f816597ea21ac5
--- /dev/null
+++ b/client/blenlib/math_base.cpp
@@ -0,0 +1,81 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: some of this file.
+ *
+ * */
+
+/** \file
+ * \ingroup bli
+ */
+
+#include "BLI_math.h"
+
+#include "BLI_strict_flags.h"
+
+int pow_i(int base, int exp)
+{
+  int result = 1;
+  BLI_assert(exp >= 0);
+  while (exp) {
+    if (exp & 1) {
+      result *= base;
+    }
+    exp >>= 1;
+    base *= base;
+  }
+
+  return result;
+}
+
+/* from python 3.1 floatobject.c
+ * ndigits must be between 0 and 21 */
+double double_round(double x, int ndigits)
+{
+  double pow1, pow2, y, z;
+  if (ndigits >= 0) {
+    pow1 = pow(10.0, (double)ndigits);
+    pow2 = 1.0;
+    y = (x * pow1) * pow2;
+    /* if y overflows, then rounded value is exactly x */
+    if (!isfinite(y)) {
+      return x;
+    }
+  }
+  else {
+    pow1 = pow(10.0, (double)-ndigits);
+    pow2 = 1.0; /* unused; silences a gcc compiler warning */
+    y = x / pow1;
+  }
+
+  z = round(y);
+  if (fabs(y - z) == 0.5) {
+    /* halfway between two integers; use round-half-even */
+    z = 2.0 * round(y / 2.0);
+  }
+
+  if (ndigits >= 0) {
+    z = (z / pow2) / pow1;
+  }
+  else {
+    z *= pow1;
+  }
+
+  /* if computation resulted in overflow, raise OverflowError */
+  return z;
+}
diff --git a/client/blenlib/math_base_inline.cpp b/client/blenlib/math_base_inline.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2abc603344b5e2c9ae887f875c95f2fa849cb5e1
--- /dev/null
+++ b/client/blenlib/math_base_inline.cpp
@@ -0,0 +1,650 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: some of this file.
+ *
+ * */
+
+/** \file
+ * \ingroup bli
+ */
+
+#ifndef __MATH_BASE_INLINE_C__
+#define __MATH_BASE_INLINE_C__
+
+#include <float.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#ifdef __SSE2__
+#  include <emmintrin.h>
+#endif
+
+#include "BLI_math_base.h"
+
+/* copied from BLI_utildefines.h */
+#ifdef __GNUC__
+#  define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#  define UNLIKELY(x) (x)
+#endif
+
+//#define BLI_assert
+
+/* powf is really slow for raising to integer powers. */
+MINLINE float pow2f(float x)
+{
+  return x * x;
+}
+MINLINE float pow3f(float x)
+{
+  return pow2f(x) * x;
+}
+MINLINE float pow4f(float x)
+{
+  return pow2f(pow2f(x));
+}
+MINLINE float pow7f(float x)
+{
+  return pow2f(pow3f(x)) * x;
+}
+
+MINLINE float sqrt3f(float f)
+{
+  if (UNLIKELY(f == 0.0f)) {
+    return 0.0f;
+  }
+  else if (UNLIKELY(f < 0.0f)) {
+    return -(float)(exp(log(-f) / 3.0));
+  }
+  else {
+    return (float)(exp(log(f) / 3.0));
+  }
+}
+
+MINLINE double sqrt3d(double d)
+{
+  if (UNLIKELY(d == 0.0)) {
+    return 0.0;
+  }
+  else if (UNLIKELY(d < 0.0)) {
+    return -exp(log(-d) / 3.0);
+  }
+  else {
+    return exp(log(d) / 3.0);
+  }
+}
+
+MINLINE float sqrtf_signed(float f)
+{
+  return (f >= 0.0f) ? sqrtf(f) : -sqrtf(-f);
+}
+
+MINLINE float saacos(float fac)
+{
+  if (UNLIKELY(fac <= -1.0f)) {
+    return (float)M_PI;
+  }
+  else if (UNLIKELY(fac >= 1.0f)) {
+    return 0.0f;
+  }
+  else {
+    return acosf(fac);
+  }
+}
+
+MINLINE float saasin(float fac)
+{
+  if (UNLIKELY(fac <= -1.0f)) {
+    return (float)-M_PI / 2.0f;
+  }
+  else if (UNLIKELY(fac >= 1.0f)) {
+    return (float)M_PI / 2.0f;
+  }
+  else {
+    return asinf(fac);
+  }
+}
+
+MINLINE float sasqrt(float fac)
+{
+  if (UNLIKELY(fac <= 0.0f)) {
+    return 0.0f;
+  }
+  else {
+    return sqrtf(fac);
+  }
+}
+
+MINLINE float saacosf(float fac)
+{
+  if (UNLIKELY(fac <= -1.0f)) {
+    return (float)M_PI;
+  }
+  else if (UNLIKELY(fac >= 1.0f)) {
+    return 0.0f;
+  }
+  else {
+    return acosf(fac);
+  }
+}
+
+MINLINE float saasinf(float fac)
+{
+  if (UNLIKELY(fac <= -1.0f)) {
+    return (float)-M_PI / 2.0f;
+  }
+  else if (UNLIKELY(fac >= 1.0f)) {
+    return (float)M_PI / 2.0f;
+  }
+  else {
+    return asinf(fac);
+  }
+}
+
+MINLINE float sasqrtf(float fac)
+{
+  if (UNLIKELY(fac <= 0.0f)) {
+    return 0.0f;
+  }
+  else {
+    return sqrtf(fac);
+  }
+}
+
+MINLINE float interpf(float target, float origin, float fac)
+{
+  return (fac * target) + (1.0f - fac) * origin;
+}
+
+/* used for zoom values*/
+MINLINE float power_of_2(float val)
+{
+  return (float)pow(2.0, ceil(log((double)val) / M_LN2));
+}
+
+MINLINE int is_power_of_2_i(int n)
+{
+  return (n & (n - 1)) == 0;
+}
+
+MINLINE int power_of_2_max_i(int n)
+{
+  if (is_power_of_2_i(n)) {
+    return n;
+  }
+
+  do {
+    n = n & (n - 1);
+  } while (!is_power_of_2_i(n));
+
+  return n * 2;
+}
+
+MINLINE int power_of_2_min_i(int n)
+{
+  while (!is_power_of_2_i(n)) {
+    n = n & (n - 1);
+  }
+
+  return n;
+}
+
+MINLINE unsigned int power_of_2_max_u(unsigned int x)
+{
+  x -= 1;
+  x |= (x >> 1);
+  x |= (x >> 2);
+  x |= (x >> 4);
+  x |= (x >> 8);
+  x |= (x >> 16);
+  return x + 1;
+}
+
+MINLINE unsigned power_of_2_min_u(unsigned x)
+{
+  x |= (x >> 1);
+  x |= (x >> 2);
+  x |= (x >> 4);
+  x |= (x >> 8);
+  x |= (x >> 16);
+  return x - (x >> 1);
+}
+
+/* rounding and clamping */
+
+#define _round_clamp_fl_impl(arg, ty, min, max) \
+  { \
+    float r = floorf(arg + 0.5f); \
+    if (UNLIKELY(r <= (float)min)) { \
+      return (ty)min; \
+    } \
+    else if (UNLIKELY(r >= (float)max)) { \
+      return (ty)max; \
+    } \
+    else { \
+      return (ty)r; \
+    } \
+  }
+
+#define _round_clamp_db_impl(arg, ty, min, max) \
+  { \
+    double r = floor(arg + 0.5); \
+    if (UNLIKELY(r <= (double)min)) { \
+      return (ty)min; \
+    } \
+    else if (UNLIKELY(r >= (double)max)) { \
+      return (ty)max; \
+    } \
+    else { \
+      return (ty)r; \
+    } \
+  }
+
+#define _round_fl_impl(arg, ty) \
+  { \
+    return (ty)floorf(arg + 0.5f); \
+  }
+#define _round_db_impl(arg, ty) \
+  { \
+    return (ty)floor(arg + 0.5); \
+  }
+
+MINLINE signed char round_fl_to_char(float a){_round_fl_impl(a, signed char)} MINLINE
+    unsigned char round_fl_to_uchar(float a){_round_fl_impl(a, unsigned char)} MINLINE
+    short round_fl_to_short(float a){_round_fl_impl(a, short)} MINLINE
+    unsigned short round_fl_to_ushort(float a){_round_fl_impl(a, unsigned short)} MINLINE
+    int round_fl_to_int(float a){_round_fl_impl(a, int)} MINLINE
+    unsigned int round_fl_to_uint(float a){_round_fl_impl(a, unsigned int)}
+
+MINLINE signed char round_db_to_char(double a){_round_db_impl(a, signed char)} MINLINE
+    unsigned char round_db_to_uchar(double a){_round_db_impl(a, unsigned char)} MINLINE
+    short round_db_to_short(double a){_round_db_impl(a, short)} MINLINE
+    unsigned short round_db_to_ushort(double a){_round_db_impl(a, unsigned short)} MINLINE
+    int round_db_to_int(double a){_round_db_impl(a, int)} MINLINE
+    unsigned int round_db_to_uint(double a)
+{
+  _round_db_impl(a, unsigned int)
+}
+
+#undef _round_fl_impl
+#undef _round_db_impl
+
+MINLINE signed char round_fl_to_char_clamp(float a){
+    _round_clamp_fl_impl(a, signed char, SCHAR_MIN, SCHAR_MAX)} MINLINE
+    unsigned char round_fl_to_uchar_clamp(float a){
+        _round_clamp_fl_impl(a, unsigned char, 0, UCHAR_MAX)} MINLINE
+    short round_fl_to_short_clamp(float a){
+        _round_clamp_fl_impl(a, short, SHRT_MIN, SHRT_MAX)} MINLINE
+    unsigned short round_fl_to_ushort_clamp(float a){
+        _round_clamp_fl_impl(a, unsigned short, 0, USHRT_MAX)} MINLINE
+    int round_fl_to_int_clamp(float a){_round_clamp_fl_impl(a, int, INT_MIN, INT_MAX)} MINLINE
+    unsigned int round_fl_to_uint_clamp(float a){
+        _round_clamp_fl_impl(a, unsigned int, 0, UINT_MAX)}
+
+MINLINE signed char round_db_to_char_clamp(double a){
+    _round_clamp_db_impl(a, signed char, SCHAR_MIN, SCHAR_MAX)} MINLINE
+    unsigned char round_db_to_uchar_clamp(double a){
+        _round_clamp_db_impl(a, unsigned char, 0, UCHAR_MAX)} MINLINE
+    short round_db_to_short_clamp(double a){
+        _round_clamp_db_impl(a, short, SHRT_MIN, SHRT_MAX)} MINLINE
+    unsigned short round_db_to_ushort_clamp(double a){
+        _round_clamp_db_impl(a, unsigned short, 0, USHRT_MAX)} MINLINE
+    int round_db_to_int_clamp(double a){_round_clamp_db_impl(a, int, INT_MIN, INT_MAX)} MINLINE
+    unsigned int round_db_to_uint_clamp(double a)
+{
+  _round_clamp_db_impl(a, unsigned int, 0, UINT_MAX)
+}
+
+#undef _round_clamp_fl_impl
+#undef _round_clamp_db_impl
+
+/* integer division that rounds 0.5 up, particularly useful for color blending
+ * with integers, to avoid gradual darkening when rounding down */
+MINLINE int divide_round_i(int a, int b)
+{
+  return (2 * a + b) / (2 * b);
+}
+
+/**
+ * Integer division that floors negative result.
+ * \note This works like Python's int division.
+ */
+MINLINE int divide_floor_i(int a, int b)
+{
+  int d = a / b;
+  int r = a % b; /* Optimizes into a single division. */
+  return r ? d - ((a < 0) ^ (b < 0)) : d;
+}
+
+/**
+ * modulo that handles negative numbers, works the same as Python's.
+ */
+MINLINE int mod_i(int i, int n)
+{
+  return (i % n + n) % n;
+}
+
+MINLINE float min_ff(float a, float b)
+{
+  return (a < b) ? a : b;
+}
+MINLINE float max_ff(float a, float b)
+{
+  return (a > b) ? a : b;
+}
+
+MINLINE int min_ii(int a, int b)
+{
+  return (a < b) ? a : b;
+}
+MINLINE int max_ii(int a, int b)
+{
+  return (b < a) ? a : b;
+}
+
+MINLINE float min_fff(float a, float b, float c)
+{
+  return min_ff(min_ff(a, b), c);
+}
+MINLINE float max_fff(float a, float b, float c)
+{
+  return max_ff(max_ff(a, b), c);
+}
+
+MINLINE int min_iii(int a, int b, int c)
+{
+  return min_ii(min_ii(a, b), c);
+}
+MINLINE int max_iii(int a, int b, int c)
+{
+  return max_ii(max_ii(a, b), c);
+}
+
+MINLINE float min_ffff(float a, float b, float c, float d)
+{
+  return min_ff(min_fff(a, b, c), d);
+}
+MINLINE float max_ffff(float a, float b, float c, float d)
+{
+  return max_ff(max_fff(a, b, c), d);
+}
+
+MINLINE int min_iiii(int a, int b, int c, int d)
+{
+  return min_ii(min_iii(a, b, c), d);
+}
+MINLINE int max_iiii(int a, int b, int c, int d)
+{
+  return max_ii(max_iii(a, b, c), d);
+}
+
+MINLINE size_t min_zz(size_t a, size_t b)
+{
+  return (a < b) ? a : b;
+}
+MINLINE size_t max_zz(size_t a, size_t b)
+{
+  return (b < a) ? a : b;
+}
+
+MINLINE int clamp_i(int value, int min, int max)
+{
+  return min_ii(max_ii(value, min), max);
+}
+
+MINLINE float clamp_f(float value, float min, float max)
+{
+  if (value > max) {
+    return max;
+  }
+  else if (value < min) {
+    return min;
+  }
+  return value;
+}
+
+MINLINE size_t clamp_z(size_t value, size_t min, size_t max)
+{
+  return min_zz(max_zz(value, min), max);
+}
+
+/**
+ * Almost-equal for IEEE floats, using absolute difference method.
+ *
+ * \param max_diff: the maximum absolute difference.
+ */
+MINLINE int compare_ff(float a, float b, const float max_diff)
+{
+  return fabsf(a - b) <= max_diff;
+}
+
+/**
+ * Almost-equal for IEEE floats, using their integer representation
+ * (mixing ULP and absolute difference methods).
+ *
+ * \param max_diff: is the maximum absolute difference (allows to take care of the near-zero area,
+ * where relative difference methods cannot really work).
+ * \param max_ulps: is the 'maximum number of floats + 1'
+ * allowed between \a a and \a b to consider them equal.
+ *
+ * \see https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
+ */
+MINLINE int compare_ff_relative(float a, float b, const float max_diff, const int max_ulps)
+{
+  union {
+    float f;
+    int i;
+  } ua, ub;
+
+  BLI_assert(sizeof(float) == sizeof(int));
+  BLI_assert(max_ulps < (1 << 22));
+
+  if (fabsf(a - b) <= max_diff) {
+    return 1;
+  }
+
+  ua.f = a;
+  ub.f = b;
+
+  /* Important to compare sign from integers, since (-0.0f < 0) is false
+   * (though this shall not be an issue in common cases)... */
+  return ((ua.i < 0) != (ub.i < 0)) ? 0 : (abs(ua.i - ub.i) <= max_ulps) ? 1 : 0;
+}
+
+MINLINE float signf(float f)
+{
+  return (f < 0.f) ? -1.f : 1.f;
+}
+
+MINLINE int signum_i_ex(float a, float eps)
+{
+  if (a > eps) {
+    return 1;
+  }
+  if (a < -eps) {
+    return -1;
+  }
+  else {
+    return 0;
+  }
+}
+
+MINLINE int signum_i(float a)
+{
+  if (a > 0.0f) {
+    return 1;
+  }
+  if (a < 0.0f) {
+    return -1;
+  }
+  else {
+    return 0;
+  }
+}
+
+/** Returns number of (base ten) *significant* digits of integer part of given float
+ * (negative in case of decimal-only floats, 0.01 returns -1 e.g.). */
+MINLINE int integer_digits_f(const float f)
+{
+  return (f == 0.0f) ? 0 : (int)floor(log10(fabs(f))) + 1;
+}
+
+/** Returns number of (base ten) *significant* digits of integer part of given double
+ * (negative in case of decimal-only floats, 0.01 returns -1 e.g.). */
+MINLINE int integer_digits_d(const double d)
+{
+  return (d == 0.0) ? 0 : (int)floor(log10(fabs(d))) + 1;
+}
+
+MINLINE int integer_digits_i(const int i)
+{
+  return (int)log10((double)i) + 1;
+}
+
+/* Internal helpers for SSE2 implementation.
+ *
+ * NOTE: Are to be called ONLY from inside `#ifdef __SSE2__` !!!
+ */
+
+#ifdef __SSE2__
+
+/* Calculate initial guess for arg^exp based on float representation
+ * This method gives a constant bias, which can be easily compensated by
+ * multiplicating with bias_coeff.
+ * Gives better results for exponents near 1 (e. g. 4/5).
+ * exp = exponent, encoded as uint32_t
+ * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as
+ * uint32_t
+ *
+ * We hope that exp and e2coeff gets properly inlined
+ */
+MALWAYS_INLINE __m128 _bli_math_fastpow(const int exp, const int e2coeff, const __m128 arg)
+{
+  __m128 ret;
+  ret = _mm_mul_ps(arg, _mm_castsi128_ps(_mm_set1_epi32(e2coeff)));
+  ret = _mm_cvtepi32_ps(_mm_castps_si128(ret));
+  ret = _mm_mul_ps(ret, _mm_castsi128_ps(_mm_set1_epi32(exp)));
+  ret = _mm_castsi128_ps(_mm_cvtps_epi32(ret));
+  return ret;
+}
+
+/* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
+MALWAYS_INLINE __m128 _bli_math_improve_5throot_solution(const __m128 old_result, const __m128 x)
+{
+  __m128 approx2 = _mm_mul_ps(old_result, old_result);
+  __m128 approx4 = _mm_mul_ps(approx2, approx2);
+  __m128 t = _mm_div_ps(x, approx4);
+  __m128 summ = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(4.0f), old_result), t); /* fma */
+  return _mm_mul_ps(summ, _mm_set1_ps(1.0f / 5.0f));
+}
+
+/* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
+MALWAYS_INLINE __m128 _bli_math_fastpow24(const __m128 arg)
+{
+  /* max, avg and |avg| errors were calculated in gcc without FMA instructions
+   * The final precision should be better than powf in glibc */
+
+  /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize
+   * avg error.
+   */
+  /* 0x3F4CCCCD = 4/5 */
+  /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
+  /* error max = 0.17, avg = 0.0018, |avg| = 0.05 */
+  __m128 x = _bli_math_fastpow(0x3F4CCCCD, 0x4F55A7FB, arg);
+  __m128 arg2 = _mm_mul_ps(arg, arg);
+  __m128 arg4 = _mm_mul_ps(arg2, arg2);
+  /* error max = 0.018        avg = 0.0031    |avg| = 0.0031  */
+  x = _bli_math_improve_5throot_solution(x, arg4);
+  /* error max = 0.00021    avg = 1.6e-05    |avg| = 1.6e-05 */
+  x = _bli_math_improve_5throot_solution(x, arg4);
+  /* error max = 6.1e-07    avg = 5.2e-08    |avg| = 1.1e-07 */
+  x = _bli_math_improve_5throot_solution(x, arg4);
+  return _mm_mul_ps(x, _mm_mul_ps(x, x));
+}
+
+/* Calculate powf(x, 1.0f / 2.4) */
+MALWAYS_INLINE __m128 _bli_math_fastpow512(const __m128 arg)
+{
+  /* 5/12 is too small, so compute the 4th root of 20/12 instead.
+   * 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
+   * weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
+   */
+  __m128 xf = _bli_math_fastpow(0x3f2aaaab, 0x5eb504f3, arg);
+  __m128 xover = _mm_mul_ps(arg, xf);
+  __m128 xfm1 = _mm_rsqrt_ps(xf);
+  __m128 x2 = _mm_mul_ps(arg, arg);
+  __m128 xunder = _mm_mul_ps(x2, xfm1);
+  /* sqrt2 * over + 2 * sqrt2 * under */
+  __m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
+                           _mm_add_ps(xover, xunder));
+  xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
+  xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
+  return xavg;
+}
+
+MALWAYS_INLINE __m128 _bli_math_blend_sse(const __m128 mask, const __m128 a, const __m128 b)
+{
+  return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b));
+}
+
+#endif /* __SSE2__ */
+
+/* Low level conversion functions */
+MINLINE unsigned char unit_float_to_uchar_clamp(float val)
+{
+  return (unsigned char)((
+      (val <= 0.0f) ? 0 : ((val > (1.0f - 0.5f / 255.0f)) ? 255 : ((255.0f * val) + 0.5f))));
+}
+#define unit_float_to_uchar_clamp(val) \
+  ((CHECK_TYPE_INLINE(val, float)), unit_float_to_uchar_clamp(val))
+
+MINLINE unsigned short unit_float_to_ushort_clamp(float val)
+{
+  return (unsigned short)((val >= 1.0f - 0.5f / 65535) ?
+                              65535 :
+                              (val <= 0.0f) ? 0 : (val * 65535.0f + 0.5f));
+}
+#define unit_float_to_ushort_clamp(val) \
+  ((CHECK_TYPE_INLINE(val, float)), unit_float_to_ushort_clamp(val))
+
+MINLINE unsigned char unit_ushort_to_uchar(unsigned short val)
+{
+  return (unsigned char)(((val) >= 65535 - 128) ? 255 : ((val) + 128) >> 8);
+}
+#define unit_ushort_to_uchar(val) \
+  ((CHECK_TYPE_INLINE(val, unsigned short)), unit_ushort_to_uchar(val))
+
+#define unit_float_to_uchar_clamp_v3(v1, v2) \
+  { \
+    (v1)[0] = unit_float_to_uchar_clamp((v2[0])); \
+    (v1)[1] = unit_float_to_uchar_clamp((v2[1])); \
+    (v1)[2] = unit_float_to_uchar_clamp((v2[2])); \
+  } \
+  ((void)0)
+#define unit_float_to_uchar_clamp_v4(v1, v2) \
+  { \
+    (v1)[0] = unit_float_to_uchar_clamp((v2[0])); \
+    (v1)[1] = unit_float_to_uchar_clamp((v2[1])); \
+    (v1)[2] = unit_float_to_uchar_clamp((v2[2])); \
+    (v1)[3] = unit_float_to_uchar_clamp((v2[3])); \
+  } \
+  ((void)0)
+
+#endif /* __MATH_BASE_INLINE_C__ */
diff --git a/client/blenlib/math_matrix.cpp b/client/blenlib/math_matrix.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cde3921c65a21ea5b298f598f9c89a49d6e347cd
--- /dev/null
+++ b/client/blenlib/math_matrix.cpp
@@ -0,0 +1,2839 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: some of this file.
+ */
+
+/** \file
+ * \ingroup bli
+ */
+
+#include <assert.h>
+
+//#include "eigen_capi.h"
+
+/////////////////////////////////////////////////////////////////////////////////
+#include <Eigen/Core>
+#include <Eigen/Dense>
+
+using Eigen::Map;
+using Eigen::Matrix4f;
+
+bool EIG_invert_m4_m4(float inverse[4][4], const float matrix[4][4])
+{
+	Map<Matrix4f> M = Map<Matrix4f>((float *)matrix);
+	Matrix4f R;
+	bool invertible = true;
+	M.computeInverseWithCheck(R, invertible, 0.0f);
+	if (!invertible) {
+		R = R.Zero();
+	}
+	memcpy(inverse, R.data(), sizeof(float) * 4 * 4);
+	return invertible;
+}
+
+#include "BLI_math.h"
+
+#include "BLI_strict_flags.h"
+
+/********************************* Init **************************************/
+
+void zero_m2(float m[2][2])
+{
+  memset(m, 0, sizeof(float[2][2]));
+}
+
+void zero_m3(float m[3][3])
+{
+  memset(m, 0, sizeof(float[3][3]));
+}
+
+void zero_m4(float m[4][4])
+{
+  memset(m, 0, sizeof(float[4][4]));
+}
+
+void unit_m2(float m[2][2])
+{
+  m[0][0] = m[1][1] = 1.0f;
+  m[0][1] = 0.0f;
+  m[1][0] = 0.0f;
+}
+
+void unit_m3(float m[3][3])
+{
+  m[0][0] = m[1][1] = m[2][2] = 1.0f;
+  m[0][1] = m[0][2] = 0.0f;
+  m[1][0] = m[1][2] = 0.0f;
+  m[2][0] = m[2][1] = 0.0f;
+}
+
+void unit_m4(float m[4][4])
+{
+  m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f;
+  m[0][1] = m[0][2] = m[0][3] = 0.0f;
+  m[1][0] = m[1][2] = m[1][3] = 0.0f;
+  m[2][0] = m[2][1] = m[2][3] = 0.0f;
+  m[3][0] = m[3][1] = m[3][2] = 0.0f;
+}
+
+void copy_m2_m2(float m1[2][2], const float m2[2][2])
+{
+  memcpy(m1, m2, sizeof(float[2][2]));
+}
+
+void copy_m3_m3(float m1[3][3], const float m2[3][3])
+{
+  /* destination comes first: */
+  memcpy(m1, m2, sizeof(float[3][3]));
+}
+
+void copy_m4_m4(float m1[4][4], const float m2[4][4])
+{
+  memcpy(m1, m2, sizeof(float[4][4]));
+}
+
+void copy_m3_m4(float m1[3][3], const float m2[4][4])
+{
+  m1[0][0] = m2[0][0];
+  m1[0][1] = m2[0][1];
+  m1[0][2] = m2[0][2];
+
+  m1[1][0] = m2[1][0];
+  m1[1][1] = m2[1][1];
+  m1[1][2] = m2[1][2];
+
+  m1[2][0] = m2[2][0];
+  m1[2][1] = m2[2][1];
+  m1[2][2] = m2[2][2];
+}
+
+void copy_m4_m3(float m1[4][4], const float m2[3][3]) /* no clear */
+{
+  m1[0][0] = m2[0][0];
+  m1[0][1] = m2[0][1];
+  m1[0][2] = m2[0][2];
+
+  m1[1][0] = m2[1][0];
+  m1[1][1] = m2[1][1];
+  m1[1][2] = m2[1][2];
+
+  m1[2][0] = m2[2][0];
+  m1[2][1] = m2[2][1];
+  m1[2][2] = m2[2][2];
+
+  /*  Reevan's Bugfix */
+  m1[0][3] = 0.0f;
+  m1[1][3] = 0.0f;
+  m1[2][3] = 0.0f;
+
+  m1[3][0] = 0.0f;
+  m1[3][1] = 0.0f;
+  m1[3][2] = 0.0f;
+  m1[3][3] = 1.0f;
+}
+
+void copy_m3_m3d(float R[3][3], const double A[3][3])
+{
+  /* Keep it stupid simple for better data flow in CPU. */
+  R[0][0] = (float)A[0][0];
+  R[0][1] = (float)A[0][1];
+  R[0][2] = (float)A[0][2];
+
+  R[1][0] = (float)A[1][0];
+  R[1][1] = (float)A[1][1];
+  R[1][2] = (float)A[1][2];
+
+  R[2][0] = (float)A[2][0];
+  R[2][1] = (float)A[2][1];
+  R[2][2] = (float)A[2][2];
+}
+
+void swap_m3m3(float m1[3][3], float m2[3][3])
+{
+  float t;
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      t = m1[i][j];
+      m1[i][j] = m2[i][j];
+      m2[i][j] = t;
+    }
+  }
+}
+
+void swap_m4m4(float m1[4][4], float m2[4][4])
+{
+  float t;
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      t = m1[i][j];
+      m1[i][j] = m2[i][j];
+      m2[i][j] = t;
+    }
+  }
+}
+
+/******************************** Arithmetic *********************************/
+
+void mul_m4_m4m4(float R[4][4], const float A[4][4], const float B[4][4])
+{
+  if (A == R) {
+    mul_m4_m4_post(R, B);
+  }
+  else if (B == R) {
+    mul_m4_m4_pre(R, A);
+  }
+  else {
+    mul_m4_m4m4_uniq(R, A, B);
+  }
+}
+
+void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
+{
+  BLI_assert(R != A && R != B);
+
+  /* matrix product: R[j][k] = A[j][i] . B[i][k] */
+#ifdef __SSE2__
+  __m128 A0 = _mm_loadu_ps(A[0]);
+  __m128 A1 = _mm_loadu_ps(A[1]);
+  __m128 A2 = _mm_loadu_ps(A[2]);
+  __m128 A3 = _mm_loadu_ps(A[3]);
+
+  for (int i = 0; i < 4; i++) {
+    __m128 B0 = _mm_set1_ps(B[i][0]);
+    __m128 B1 = _mm_set1_ps(B[i][1]);
+    __m128 B2 = _mm_set1_ps(B[i][2]);
+    __m128 B3 = _mm_set1_ps(B[i][3]);
+
+    __m128 sum = _mm_add_ps(_mm_add_ps(_mm_mul_ps(B0, A0), _mm_mul_ps(B1, A1)),
+                            _mm_add_ps(_mm_mul_ps(B2, A2), _mm_mul_ps(B3, A3)));
+
+    _mm_storeu_ps(R[i], sum);
+  }
+#else
+  R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0];
+  R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1];
+  R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2] + B[0][3] * A[3][2];
+  R[0][3] = B[0][0] * A[0][3] + B[0][1] * A[1][3] + B[0][2] * A[2][3] + B[0][3] * A[3][3];
+
+  R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0] + B[1][3] * A[3][0];
+  R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1] + B[1][3] * A[3][1];
+  R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2] + B[1][3] * A[3][2];
+  R[1][3] = B[1][0] * A[0][3] + B[1][1] * A[1][3] + B[1][2] * A[2][3] + B[1][3] * A[3][3];
+
+  R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0] + B[2][3] * A[3][0];
+  R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1] + B[2][3] * A[3][1];
+  R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2] + B[2][3] * A[3][2];
+  R[2][3] = B[2][0] * A[0][3] + B[2][1] * A[1][3] + B[2][2] * A[2][3] + B[2][3] * A[3][3];
+
+  R[3][0] = B[3][0] * A[0][0] + B[3][1] * A[1][0] + B[3][2] * A[2][0] + B[3][3] * A[3][0];
+  R[3][1] = B[3][0] * A[0][1] + B[3][1] * A[1][1] + B[3][2] * A[2][1] + B[3][3] * A[3][1];
+  R[3][2] = B[3][0] * A[0][2] + B[3][1] * A[1][2] + B[3][2] * A[2][2] + B[3][3] * A[3][2];
+  R[3][3] = B[3][0] * A[0][3] + B[3][1] * A[1][3] + B[3][2] * A[2][3] + B[3][3] * A[3][3];
+#endif
+}
+
+void mul_m4_m4_pre(float R[4][4], const float A[4][4])
+{
+  BLI_assert(A != R);
+  float B[4][4];
+  copy_m4_m4(B, R);
+  mul_m4_m4m4_uniq(R, A, B);
+}
+
+void mul_m4_m4_post(float R[4][4], const float B[4][4])
+{
+  BLI_assert(B != R);
+  float A[4][4];
+  copy_m4_m4(A, R);
+  mul_m4_m4m4_uniq(R, A, B);
+}
+
+void mul_m3_m3m3(float R[3][3], const float A[3][3], const float B[3][3])
+{
+  if (A == R) {
+    mul_m3_m3_post(R, B);
+  }
+  else if (B == R) {
+    mul_m3_m3_pre(R, A);
+  }
+  else {
+    mul_m3_m3m3_uniq(R, A, B);
+  }
+}
+
+void mul_m3_m3_pre(float R[3][3], const float A[3][3])
+{
+  BLI_assert(A != R);
+  float B[3][3];
+  copy_m3_m3(B, R);
+  mul_m3_m3m3_uniq(R, A, B);
+}
+
+void mul_m3_m3_post(float R[3][3], const float B[3][3])
+{
+  BLI_assert(B != R);
+  float A[3][3];
+  copy_m3_m3(A, R);
+  mul_m3_m3m3_uniq(R, A, B);
+}
+
+void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3])
+{
+  BLI_assert(R != A && R != B);
+
+  R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0];
+  R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1];
+  R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2];
+
+  R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0];
+  R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1];
+  R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2];
+
+  R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0];
+  R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1];
+  R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2];
+}
+
+void mul_m4_m4m3(float m1[4][4], const float m3_[4][4], const float m2_[3][3])
+{
+  float m2[3][3], m3[4][4];
+
+  /* copy so it works when m1 is the same pointer as m2 or m3 */
+  /* TODO: avoid copying when matrices are different */
+  copy_m3_m3(m2, m2_);
+  copy_m4_m4(m3, m3_);
+
+  m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] + m2[0][2] * m3[2][0];
+  m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] + m2[0][2] * m3[2][1];
+  m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] + m2[0][2] * m3[2][2];
+  m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] + m2[1][2] * m3[2][0];
+  m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] + m2[1][2] * m3[2][1];
+  m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] + m2[1][2] * m3[2][2];
+  m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] + m2[2][2] * m3[2][0];
+  m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] + m2[2][2] * m3[2][1];
+  m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] + m2[2][2] * m3[2][2];
+}
+
+/* m1 = m2 * m3, ignore the elements on the 4th row/column of m2 */
+void mul_m3_m3m4(float m1[3][3], const float m3_[3][3], const float m2_[4][4])
+{
+  float m2[4][4], m3[3][3];
+
+  /* copy so it works when m1 is the same pointer as m2 or m3 */
+  /* TODO: avoid copying when matrices are different */
+  copy_m4_m4(m2, m2_);
+  copy_m3_m3(m3, m3_);
+
+  /* m1[i][j] = m2[i][k] * m3[k][j] */
+  m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] + m2[0][2] * m3[2][0];
+  m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] + m2[0][2] * m3[2][1];
+  m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] + m2[0][2] * m3[2][2];
+
+  m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] + m2[1][2] * m3[2][0];
+  m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] + m2[1][2] * m3[2][1];
+  m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] + m2[1][2] * m3[2][2];
+
+  m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] + m2[2][2] * m3[2][0];
+  m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] + m2[2][2] * m3[2][1];
+  m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] + m2[2][2] * m3[2][2];
+}
+
+/* m1 = m2 * m3, ignore the elements on the 4th row/column of m3 */
+void mul_m3_m4m3(float m1[3][3], const float m3_[4][4], const float m2_[3][3])
+{
+  float m2[3][3], m3[4][4];
+
+  /* copy so it works when m1 is the same pointer as m2 or m3 */
+  /* TODO: avoid copying when matrices are different */
+  copy_m3_m3(m2, m2_);
+  copy_m4_m4(m3, m3_);
+
+  /* m1[i][j] = m2[i][k] * m3[k][j] */
+  m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] + m2[0][2] * m3[2][0];
+  m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] + m2[0][2] * m3[2][1];
+  m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] + m2[0][2] * m3[2][2];
+
+  m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] + m2[1][2] * m3[2][0];
+  m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] + m2[1][2] * m3[2][1];
+  m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] + m2[1][2] * m3[2][2];
+
+  m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] + m2[2][2] * m3[2][0];
+  m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] + m2[2][2] * m3[2][1];
+  m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] + m2[2][2] * m3[2][2];
+}
+
+void mul_m4_m3m4(float m1[4][4], const float m3_[3][3], const float m2_[4][4])
+{
+  float m2[4][4], m3[3][3];
+
+  /* copy so it works when m1 is the same pointer as m2 or m3 */
+  /* TODO: avoid copying when matrices are different */
+  copy_m4_m4(m2, m2_);
+  copy_m3_m3(m3, m3_);
+
+  m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] + m2[0][2] * m3[2][0];
+  m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] + m2[0][2] * m3[2][1];
+  m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] + m2[0][2] * m3[2][2];
+  m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] + m2[1][2] * m3[2][0];
+  m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] + m2[1][2] * m3[2][1];
+  m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] + m2[1][2] * m3[2][2];
+  m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] + m2[2][2] * m3[2][0];
+  m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] + m2[2][2] * m3[2][1];
+  m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] + m2[2][2] * m3[2][2];
+}
+
+void mul_m3_m4m4(float m1[3][3], const float m3[4][4], const float m2[4][4])
+{
+  m1[0][0] = m2[0][0] * m3[0][0] + m2[0][1] * m3[1][0] + m2[0][2] * m3[2][0];
+  m1[0][1] = m2[0][0] * m3[0][1] + m2[0][1] * m3[1][1] + m2[0][2] * m3[2][1];
+  m1[0][2] = m2[0][0] * m3[0][2] + m2[0][1] * m3[1][2] + m2[0][2] * m3[2][2];
+  m1[1][0] = m2[1][0] * m3[0][0] + m2[1][1] * m3[1][0] + m2[1][2] * m3[2][0];
+  m1[1][1] = m2[1][0] * m3[0][1] + m2[1][1] * m3[1][1] + m2[1][2] * m3[2][1];
+  m1[1][2] = m2[1][0] * m3[0][2] + m2[1][1] * m3[1][2] + m2[1][2] * m3[2][2];
+  m1[2][0] = m2[2][0] * m3[0][0] + m2[2][1] * m3[1][0] + m2[2][2] * m3[2][0];
+  m1[2][1] = m2[2][0] * m3[0][1] + m2[2][1] * m3[1][1] + m2[2][2] * m3[2][1];
+  m1[2][2] = m2[2][0] * m3[0][2] + m2[2][1] * m3[1][2] + m2[2][2] * m3[2][2];
+}
+
+/** \name Macro helpers for: mul_m3_series
+ * \{ */
+void _va_mul_m3_series_3(float r[3][3], const float m1[3][3], const float m2[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+}
+void _va_mul_m3_series_4(float r[3][3],
+                         const float m1[3][3],
+                         const float m2[3][3],
+                         const float m3[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+  mul_m3_m3m3(r, r, m3);
+}
+void _va_mul_m3_series_5(float r[3][3],
+                         const float m1[3][3],
+                         const float m2[3][3],
+                         const float m3[3][3],
+                         const float m4[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+  mul_m3_m3m3(r, r, m3);
+  mul_m3_m3m3(r, r, m4);
+}
+void _va_mul_m3_series_6(float r[3][3],
+                         const float m1[3][3],
+                         const float m2[3][3],
+                         const float m3[3][3],
+                         const float m4[3][3],
+                         const float m5[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+  mul_m3_m3m3(r, r, m3);
+  mul_m3_m3m3(r, r, m4);
+  mul_m3_m3m3(r, r, m5);
+}
+void _va_mul_m3_series_7(float r[3][3],
+                         const float m1[3][3],
+                         const float m2[3][3],
+                         const float m3[3][3],
+                         const float m4[3][3],
+                         const float m5[3][3],
+                         const float m6[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+  mul_m3_m3m3(r, r, m3);
+  mul_m3_m3m3(r, r, m4);
+  mul_m3_m3m3(r, r, m5);
+  mul_m3_m3m3(r, r, m6);
+}
+void _va_mul_m3_series_8(float r[3][3],
+                         const float m1[3][3],
+                         const float m2[3][3],
+                         const float m3[3][3],
+                         const float m4[3][3],
+                         const float m5[3][3],
+                         const float m6[3][3],
+                         const float m7[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+  mul_m3_m3m3(r, r, m3);
+  mul_m3_m3m3(r, r, m4);
+  mul_m3_m3m3(r, r, m5);
+  mul_m3_m3m3(r, r, m6);
+  mul_m3_m3m3(r, r, m7);
+}
+void _va_mul_m3_series_9(float r[3][3],
+                         const float m1[3][3],
+                         const float m2[3][3],
+                         const float m3[3][3],
+                         const float m4[3][3],
+                         const float m5[3][3],
+                         const float m6[3][3],
+                         const float m7[3][3],
+                         const float m8[3][3])
+{
+  mul_m3_m3m3(r, m1, m2);
+  mul_m3_m3m3(r, r, m3);
+  mul_m3_m3m3(r, r, m4);
+  mul_m3_m3m3(r, r, m5);
+  mul_m3_m3m3(r, r, m6);
+  mul_m3_m3m3(r, r, m7);
+  mul_m3_m3m3(r, r, m8);
+}
+/** \} */
+
+/** \name Macro helpers for: mul_m4_series
+ * \{ */
+void _va_mul_m4_series_3(float r[4][4], const float m1[4][4], const float m2[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+}
+void _va_mul_m4_series_4(float r[4][4],
+                         const float m1[4][4],
+                         const float m2[4][4],
+                         const float m3[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+  mul_m4_m4m4(r, r, m3);
+}
+void _va_mul_m4_series_5(float r[4][4],
+                         const float m1[4][4],
+                         const float m2[4][4],
+                         const float m3[4][4],
+                         const float m4[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+  mul_m4_m4m4(r, r, m3);
+  mul_m4_m4m4(r, r, m4);
+}
+void _va_mul_m4_series_6(float r[4][4],
+                         const float m1[4][4],
+                         const float m2[4][4],
+                         const float m3[4][4],
+                         const float m4[4][4],
+                         const float m5[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+  mul_m4_m4m4(r, r, m3);
+  mul_m4_m4m4(r, r, m4);
+  mul_m4_m4m4(r, r, m5);
+}
+void _va_mul_m4_series_7(float r[4][4],
+                         const float m1[4][4],
+                         const float m2[4][4],
+                         const float m3[4][4],
+                         const float m4[4][4],
+                         const float m5[4][4],
+                         const float m6[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+  mul_m4_m4m4(r, r, m3);
+  mul_m4_m4m4(r, r, m4);
+  mul_m4_m4m4(r, r, m5);
+  mul_m4_m4m4(r, r, m6);
+}
+void _va_mul_m4_series_8(float r[4][4],
+                         const float m1[4][4],
+                         const float m2[4][4],
+                         const float m3[4][4],
+                         const float m4[4][4],
+                         const float m5[4][4],
+                         const float m6[4][4],
+                         const float m7[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+  mul_m4_m4m4(r, r, m3);
+  mul_m4_m4m4(r, r, m4);
+  mul_m4_m4m4(r, r, m5);
+  mul_m4_m4m4(r, r, m6);
+  mul_m4_m4m4(r, r, m7);
+}
+void _va_mul_m4_series_9(float r[4][4],
+                         const float m1[4][4],
+                         const float m2[4][4],
+                         const float m3[4][4],
+                         const float m4[4][4],
+                         const float m5[4][4],
+                         const float m6[4][4],
+                         const float m7[4][4],
+                         const float m8[4][4])
+{
+  mul_m4_m4m4(r, m1, m2);
+  mul_m4_m4m4(r, r, m3);
+  mul_m4_m4m4(r, r, m4);
+  mul_m4_m4m4(r, r, m5);
+  mul_m4_m4m4(r, r, m6);
+  mul_m4_m4m4(r, r, m7);
+  mul_m4_m4m4(r, r, m8);
+}
+/** \} */
+
+void mul_v2_m3v2(float r[2], const float m[3][3], const float v[2])
+{
+  float temp[3], warped[3];
+
+  copy_v2_v2(temp, v);
+  temp[2] = 1.0f;
+
+  mul_v3_m3v3(warped, m, temp);
+
+  r[0] = warped[0] / warped[2];
+  r[1] = warped[1] / warped[2];
+}
+
+void mul_m3_v2(const float m[3][3], float r[2])
+{
+  mul_v2_m3v2(r, m, r);
+}
+
+void mul_m4_v3(const float mat[4][4], float vec[3])
+{
+  const float x = vec[0];
+  const float y = vec[1];
+
+  vec[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2] + mat[3][0];
+  vec[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2] + mat[3][1];
+  vec[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2] + mat[3][2];
+}
+
+void mul_v3_m4v3(float r[3], const float mat[4][4], const float vec[3])
+{
+  const float x = vec[0];
+  const float y = vec[1];
+
+  r[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2] + mat[3][0];
+  r[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2] + mat[3][1];
+  r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2] + mat[3][2];
+}
+
+void mul_v2_m4v3(float r[2], const float mat[4][4], const float vec[3])
+{
+  const float x = vec[0];
+
+  r[0] = x * mat[0][0] + vec[1] * mat[1][0] + mat[2][0] * vec[2] + mat[3][0];
+  r[1] = x * mat[0][1] + vec[1] * mat[1][1] + mat[2][1] * vec[2] + mat[3][1];
+}
+
+void mul_v2_m2v2(float r[2], const float mat[2][2], const float vec[2])
+{
+  const float x = vec[0];
+
+  r[0] = mat[0][0] * x + mat[1][0] * vec[1];
+  r[1] = mat[0][1] * x + mat[1][1] * vec[1];
+}
+
+void mul_m2v2(const float mat[2][2], float vec[2])
+{
+  mul_v2_m2v2(vec, mat, vec);
+}
+
+/** Same as #mul_m4_v3() but doesn't apply translation component. */
+void mul_mat3_m4_v3(const float mat[4][4], float vec[3])
+{
+  const float x = vec[0];
+  const float y = vec[1];
+
+  vec[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2];
+  vec[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2];
+  vec[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2];
+}
+
+void mul_v3_mat3_m4v3(float r[3], const float mat[4][4], const float vec[3])
+{
+  const float x = vec[0];
+  const float y = vec[1];
+
+  r[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2];
+  r[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2];
+  r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2];
+}
+
+void mul_project_m4_v3(const float mat[4][4], float vec[3])
+{
+  /* absolute value to not flip the frustum upside down behind the camera */
+  const float w = fabsf(mul_project_m4_v3_zfac(mat, vec));
+  mul_m4_v3(mat, vec);
+
+  vec[0] /= w;
+  vec[1] /= w;
+  vec[2] /= w;
+}
+
+void mul_v3_project_m4_v3(float r[3], const float mat[4][4], const float vec[3])
+{
+  const float w = fabsf(mul_project_m4_v3_zfac(mat, vec));
+  mul_v3_m4v3(r, mat, vec);
+
+  r[0] /= w;
+  r[1] /= w;
+  r[2] /= w;
+}
+
+void mul_v2_project_m4_v3(float r[2], const float mat[4][4], const float vec[3])
+{
+  const float w = fabsf(mul_project_m4_v3_zfac(mat, vec));
+  mul_v2_m4v3(r, mat, vec);
+
+  r[0] /= w;
+  r[1] /= w;
+}
+
+void mul_v4_m4v4(float r[4], const float mat[4][4], const float v[4])
+{
+  const float x = v[0];
+  const float y = v[1];
+  const float z = v[2];
+
+  r[0] = x * mat[0][0] + y * mat[1][0] + z * mat[2][0] + mat[3][0] * v[3];
+  r[1] = x * mat[0][1] + y * mat[1][1] + z * mat[2][1] + mat[3][1] * v[3];
+  r[2] = x * mat[0][2] + y * mat[1][2] + z * mat[2][2] + mat[3][2] * v[3];
+  r[3] = x * mat[0][3] + y * mat[1][3] + z * mat[2][3] + mat[3][3] * v[3];
+}
+
+void mul_m4_v4(const float mat[4][4], float r[4])
+{
+  mul_v4_m4v4(r, mat, r);
+}
+
+void mul_v4d_m4v4d(double r[4], const float mat[4][4], const double v[4])
+{
+  const double x = v[0];
+  const double y = v[1];
+  const double z = v[2];
+
+  r[0] = x * (double)mat[0][0] + y * (double)mat[1][0] + z * (double)mat[2][0] +
+         (double)mat[3][0] * v[3];
+  r[1] = x * (double)mat[0][1] + y * (double)mat[1][1] + z * (double)mat[2][1] +
+         (double)mat[3][1] * v[3];
+  r[2] = x * (double)mat[0][2] + y * (double)mat[1][2] + z * (double)mat[2][2] +
+         (double)mat[3][2] * v[3];
+  r[3] = x * (double)mat[0][3] + y * (double)mat[1][3] + z * (double)mat[2][3] +
+         (double)mat[3][3] * v[3];
+}
+
+void mul_m4_v4d(const float mat[4][4], double r[4])
+{
+  mul_v4d_m4v4d(r, mat, r);
+}
+
+void mul_v4_m4v3(float r[4], const float M[4][4], const float v[3])
+{
+  /* v has implicit w = 1.0f */
+  r[0] = v[0] * M[0][0] + v[1] * M[1][0] + M[2][0] * v[2] + M[3][0];
+  r[1] = v[0] * M[0][1] + v[1] * M[1][1] + M[2][1] * v[2] + M[3][1];
+  r[2] = v[0] * M[0][2] + v[1] * M[1][2] + M[2][2] * v[2] + M[3][2];
+  r[3] = v[0] * M[0][3] + v[1] * M[1][3] + M[2][3] * v[2] + M[3][3];
+}
+
+void mul_v3_m3v3(float r[3], const float M[3][3], const float a[3])
+{
+  float t[3];
+  copy_v3_v3(t, a);
+
+  r[0] = M[0][0] * t[0] + M[1][0] * t[1] + M[2][0] * t[2];
+  r[1] = M[0][1] * t[0] + M[1][1] * t[1] + M[2][1] * t[2];
+  r[2] = M[0][2] * t[0] + M[1][2] * t[1] + M[2][2] * t[2];
+}
+
+void mul_v3_m3v3_db(double r[3], const double M[3][3], const double a[3])
+{
+  double t[3];
+  copy_v3_v3_db(t, a);
+
+  r[0] = M[0][0] * t[0] + M[1][0] * t[1] + M[2][0] * t[2];
+  r[1] = M[0][1] * t[0] + M[1][1] * t[1] + M[2][1] * t[2];
+  r[2] = M[0][2] * t[0] + M[1][2] * t[1] + M[2][2] * t[2];
+}
+
+void mul_v2_m3v3(float r[2], const float M[3][3], const float a[3])
+{
+  float t[3];
+  copy_v3_v3(t, a);
+
+  r[0] = M[0][0] * t[0] + M[1][0] * t[1] + M[2][0] * t[2];
+  r[1] = M[0][1] * t[0] + M[1][1] * t[1] + M[2][1] * t[2];
+}
+
+void mul_m3_v3(const float M[3][3], float r[3])
+{
+  //mul_v3_m3v3(r, M, (const float[3]){UNPACK3(r)});
+	mul_v3_m3v3(r, M, r);
+}
+
+void mul_m3_v3_db(const double M[3][3], double r[3])
+{
+  //mul_v3_m3v3_db(r, M, (const double[3]){UNPACK3(r)});
+	mul_v3_m3v3_db(r, M, r);
+}
+
+void mul_transposed_m3_v3(const float mat[3][3], float vec[3])
+{
+  const float x = vec[0];
+  const float y = vec[1];
+
+  vec[0] = x * mat[0][0] + y * mat[0][1] + mat[0][2] * vec[2];
+  vec[1] = x * mat[1][0] + y * mat[1][1] + mat[1][2] * vec[2];
+  vec[2] = x * mat[2][0] + y * mat[2][1] + mat[2][2] * vec[2];
+}
+
+void mul_transposed_mat3_m4_v3(const float mat[4][4], float vec[3])
+{
+  const float x = vec[0];
+  const float y = vec[1];
+
+  vec[0] = x * mat[0][0] + y * mat[0][1] + mat[0][2] * vec[2];
+  vec[1] = x * mat[1][0] + y * mat[1][1] + mat[1][2] * vec[2];
+  vec[2] = x * mat[2][0] + y * mat[2][1] + mat[2][2] * vec[2];
+}
+
+void mul_m3_fl(float m[3][3], float f)
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m[i][j] *= f;
+    }
+  }
+}
+
+void mul_m4_fl(float m[4][4], float f)
+{
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      m[i][j] *= f;
+    }
+  }
+}
+
+void mul_mat3_m4_fl(float m[4][4], float f)
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m[i][j] *= f;
+    }
+  }
+}
+
+void negate_m3(float m[3][3])
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m[i][j] *= -1.0f;
+    }
+  }
+}
+
+void negate_mat3_m4(float m[4][4])
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m[i][j] *= -1.0f;
+    }
+  }
+}
+
+void negate_m4(float m[4][4])
+{
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      m[i][j] *= -1.0f;
+    }
+  }
+}
+
+void mul_m3_v3_double(const float mat[3][3], double vec[3])
+{
+  const double x = vec[0];
+  const double y = vec[1];
+
+  vec[0] = x * (double)mat[0][0] + y * (double)mat[1][0] + (double)mat[2][0] * vec[2];
+  vec[1] = x * (double)mat[0][1] + y * (double)mat[1][1] + (double)mat[2][1] * vec[2];
+  vec[2] = x * (double)mat[0][2] + y * (double)mat[1][2] + (double)mat[2][2] * vec[2];
+}
+
+void add_m3_m3m3(float m1[3][3], const float m2[3][3], const float m3[3][3])
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m1[i][j] = m2[i][j] + m3[i][j];
+    }
+  }
+}
+
+void add_m4_m4m4(float m1[4][4], const float m2[4][4], const float m3[4][4])
+{
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      m1[i][j] = m2[i][j] + m3[i][j];
+    }
+  }
+}
+
+void madd_m3_m3m3fl(float m1[3][3], const float m2[3][3], const float m3[3][3], const float f)
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m1[i][j] = m2[i][j] + m3[i][j] * f;
+    }
+  }
+}
+
+void madd_m4_m4m4fl(float m1[4][4], const float m2[4][4], const float m3[4][4], const float f)
+{
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      m1[i][j] = m2[i][j] + m3[i][j] * f;
+    }
+  }
+}
+
+void sub_m3_m3m3(float m1[3][3], const float m2[3][3], const float m3[3][3])
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      m1[i][j] = m2[i][j] - m3[i][j];
+    }
+  }
+}
+
+void sub_m4_m4m4(float m1[4][4], const float m2[4][4], const float m3[4][4])
+{
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      m1[i][j] = m2[i][j] - m3[i][j];
+    }
+  }
+}
+
+float determinant_m3_array(const float m[3][3])
+{
+  return (m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1]) -
+          m[1][0] * (m[0][1] * m[2][2] - m[0][2] * m[2][1]) +
+          m[2][0] * (m[0][1] * m[1][2] - m[0][2] * m[1][1]));
+}
+
+float determinant_m4_mat3_array(const float m[4][4])
+{
+  return (m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1]) -
+          m[1][0] * (m[0][1] * m[2][2] - m[0][2] * m[2][1]) +
+          m[2][0] * (m[0][1] * m[1][2] - m[0][2] * m[1][1]));
+}
+
+bool invert_m3_ex(float m[3][3], const float epsilon)
+{
+  float tmp[3][3];
+  const bool success = invert_m3_m3_ex(tmp, m, epsilon);
+
+  copy_m3_m3(m, tmp);
+  return success;
+}
+
+bool invert_m3_m3_ex(float m1[3][3], const float m2[3][3], const float epsilon)
+{
+  float det;
+  int a, b;
+  bool success;
+
+  BLI_assert(epsilon >= 0.0f);
+
+  /* calc adjoint */
+  adjoint_m3_m3(m1, m2);
+
+  /* then determinant old matrix! */
+  det = determinant_m3_array(m2);
+
+  success = (fabsf(det) > epsilon);
+
+  if (LIKELY(det != 0.0f)) {
+    det = 1.0f / det;
+    for (a = 0; a < 3; a++) {
+      for (b = 0; b < 3; b++) {
+        m1[a][b] *= det;
+      }
+    }
+  }
+  return success;
+}
+
+bool invert_m3(float m[3][3])
+{
+  float tmp[3][3];
+  const bool success = invert_m3_m3(tmp, m);
+
+  copy_m3_m3(m, tmp);
+  return success;
+}
+
+bool invert_m3_m3(float m1[3][3], const float m2[3][3])
+{
+  float det;
+  int a, b;
+  bool success;
+
+  /* calc adjoint */
+  adjoint_m3_m3(m1, m2);
+
+  /* then determinant old matrix! */
+  det = determinant_m3_array(m2);
+
+  success = (det != 0.0f);
+
+  if (LIKELY(det != 0.0f)) {
+    det = 1.0f / det;
+    for (a = 0; a < 3; a++) {
+      for (b = 0; b < 3; b++) {
+        m1[a][b] *= det;
+      }
+    }
+  }
+
+  return success;
+}
+
+bool invert_m4(float m[4][4])
+{
+  float tmp[4][4];
+  const bool success = invert_m4_m4(tmp, m);
+
+  copy_m4_m4(m, tmp);
+  return success;
+}
+
+/**
+ * Computes the inverse of mat and puts it in inverse.
+ * Uses Gaussian Elimination with partial (maximal column) pivoting.
+ * \return true on success (i.e. can always find a pivot) and false on failure.
+ * Mark Segal - 1992.
+ *
+ * \note this is less performant than #EIG_invert_m4_m4 (Eigen), but e.g.
+ * for non-invertible scale matrices, findinging a partial solution can
+ * be useful to have a valid local transform center, see T57767.
+ */
+bool invert_m4_m4_fallback(float inverse[4][4], const float mat[4][4])
+{
+  if (EIG_invert_m4_m4(inverse, mat)) {
+    return true;
+  }
+
+  int i, j, k;
+  double temp;
+  float tempmat[4][4];
+  float max;
+  int maxj;
+
+  BLI_assert(inverse != mat);
+
+  /* Set inverse to identity */
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      inverse[i][j] = 0;
+    }
+  }
+  for (i = 0; i < 4; i++) {
+    inverse[i][i] = 1;
+  }
+
+  /* Copy original matrix so we don't mess it up */
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 4; j++) {
+      tempmat[i][j] = mat[i][j];
+    }
+  }
+
+  for (i = 0; i < 4; i++) {
+    /* Look for row with max pivot */
+    max = fabsf(tempmat[i][i]);
+    maxj = i;
+    for (j = i + 1; j < 4; j++) {
+      if (fabsf(tempmat[j][i]) > max) {
+        max = fabsf(tempmat[j][i]);
+        maxj = j;
+      }
+    }
+    /* Swap rows if necessary */
+    if (maxj != i) {
+      for (k = 0; k < 4; k++) {
+        SWAP(float, tempmat[i][k], tempmat[maxj][k]);
+        SWAP(float, inverse[i][k], inverse[maxj][k]);
+      }
+    }
+
+    if (UNLIKELY(tempmat[i][i] == 0.0f)) {
+      return false; /* No non-zero pivot */
+    }
+    temp = (double)tempmat[i][i];
+    for (k = 0; k < 4; k++) {
+      tempmat[i][k] = (float)((double)tempmat[i][k] / temp);
+      inverse[i][k] = (float)((double)inverse[i][k] / temp);
+    }
+    for (j = 0; j < 4; j++) {
+      if (j != i) {
+        temp = tempmat[j][i];
+        for (k = 0; k < 4; k++) {
+          tempmat[j][k] -= (float)((double)tempmat[i][k] * temp);
+          inverse[j][k] -= (float)((double)inverse[i][k] * temp);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool invert_m4_m4(float inverse[4][4], const float mat[4][4])
+{
+  /* Use optimized matrix inverse from Eigen, since performance
+   * impact of this function is significant in complex rigs. */
+  return EIG_invert_m4_m4(inverse, mat);
+}
+
+/****************************** Linear Algebra *******************************/
+
+void transpose_m3(float mat[3][3])
+{
+  float t;
+
+  t = mat[0][1];
+  mat[0][1] = mat[1][0];
+  mat[1][0] = t;
+  t = mat[0][2];
+  mat[0][2] = mat[2][0];
+  mat[2][0] = t;
+  t = mat[1][2];
+  mat[1][2] = mat[2][1];
+  mat[2][1] = t;
+}
+
+void transpose_m3_m3(float rmat[3][3], const float mat[3][3])
+{
+  BLI_assert(rmat != mat);
+
+  rmat[0][0] = mat[0][0];
+  rmat[0][1] = mat[1][0];
+  rmat[0][2] = mat[2][0];
+  rmat[1][0] = mat[0][1];
+  rmat[1][1] = mat[1][1];
+  rmat[1][2] = mat[2][1];
+  rmat[2][0] = mat[0][2];
+  rmat[2][1] = mat[1][2];
+  rmat[2][2] = mat[2][2];
+}
+
+/* seems obscure but in-fact a common operation */
+void transpose_m3_m4(float rmat[3][3], const float mat[4][4])
+{
+  BLI_assert(&rmat[0][0] != &mat[0][0]);
+
+  rmat[0][0] = mat[0][0];
+  rmat[0][1] = mat[1][0];
+  rmat[0][2] = mat[2][0];
+  rmat[1][0] = mat[0][1];
+  rmat[1][1] = mat[1][1];
+  rmat[1][2] = mat[2][1];
+  rmat[2][0] = mat[0][2];
+  rmat[2][1] = mat[1][2];
+  rmat[2][2] = mat[2][2];
+}
+
+void transpose_m4(float mat[4][4])
+{
+  float t;
+
+  t = mat[0][1];
+  mat[0][1] = mat[1][0];
+  mat[1][0] = t;
+  t = mat[0][2];
+  mat[0][2] = mat[2][0];
+  mat[2][0] = t;
+  t = mat[0][3];
+  mat[0][3] = mat[3][0];
+  mat[3][0] = t;
+
+  t = mat[1][2];
+  mat[1][2] = mat[2][1];
+  mat[2][1] = t;
+  t = mat[1][3];
+  mat[1][3] = mat[3][1];
+  mat[3][1] = t;
+
+  t = mat[2][3];
+  mat[2][3] = mat[3][2];
+  mat[3][2] = t;
+}
+
+void transpose_m4_m4(float rmat[4][4], const float mat[4][4])
+{
+  BLI_assert(rmat != mat);
+
+  rmat[0][0] = mat[0][0];
+  rmat[0][1] = mat[1][0];
+  rmat[0][2] = mat[2][0];
+  rmat[0][3] = mat[3][0];
+  rmat[1][0] = mat[0][1];
+  rmat[1][1] = mat[1][1];
+  rmat[1][2] = mat[2][1];
+  rmat[1][3] = mat[3][1];
+  rmat[2][0] = mat[0][2];
+  rmat[2][1] = mat[1][2];
+  rmat[2][2] = mat[2][2];
+  rmat[2][3] = mat[3][2];
+  rmat[3][0] = mat[0][3];
+  rmat[3][1] = mat[1][3];
+  rmat[3][2] = mat[2][3];
+  rmat[3][3] = mat[3][3];
+}
+
+/* TODO: return bool */
+int compare_m4m4(const float mat1[4][4], const float mat2[4][4], float limit)
+{
+  if (compare_v4v4(mat1[0], mat2[0], limit)) {
+    if (compare_v4v4(mat1[1], mat2[1], limit)) {
+      if (compare_v4v4(mat1[2], mat2[2], limit)) {
+        if (compare_v4v4(mat1[3], mat2[3], limit)) {
+          return 1;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/**
+ * Make an orthonormal matrix around the selected axis of the given matrix.
+ *
+ * \param axis: Axis to build the orthonormal basis around.
+ */
+void orthogonalize_m3(float mat[3][3], int axis)
+{
+  float size[3];
+  mat3_to_size(size, mat);
+  normalize_v3(mat[axis]);
+  switch (axis) {
+    case 0:
+      if (dot_v3v3(mat[0], mat[1]) < 1) {
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+        normalize_v3(mat[2]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      else if (dot_v3v3(mat[0], mat[2]) < 1) {
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+        normalize_v3(mat[1]);
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+      }
+      else {
+        float vec[3];
+
+        vec[0] = mat[0][1];
+        vec[1] = mat[0][2];
+        vec[2] = mat[0][0];
+
+        cross_v3_v3v3(mat[2], mat[0], vec);
+        normalize_v3(mat[2]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      break;
+    case 1:
+      if (dot_v3v3(mat[1], mat[0]) < 1) {
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+        normalize_v3(mat[2]);
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+      }
+      else if (dot_v3v3(mat[0], mat[2]) < 1) {
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+      }
+      else {
+        float vec[3];
+
+        vec[0] = mat[1][1];
+        vec[1] = mat[1][2];
+        vec[2] = mat[1][0];
+
+        cross_v3_v3v3(mat[0], mat[1], vec);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+      }
+      break;
+    case 2:
+      if (dot_v3v3(mat[2], mat[0]) < 1) {
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+        normalize_v3(mat[1]);
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+      }
+      else if (dot_v3v3(mat[2], mat[1]) < 1) {
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      else {
+        float vec[3];
+
+        vec[0] = mat[2][1];
+        vec[1] = mat[2][2];
+        vec[2] = mat[2][0];
+
+        cross_v3_v3v3(mat[0], vec, mat[2]);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      break;
+    default:
+      BLI_assert(0);
+      break;
+  }
+  mul_v3_fl(mat[0], size[0]);
+  mul_v3_fl(mat[1], size[1]);
+  mul_v3_fl(mat[2], size[2]);
+}
+
+/**
+ * Make an orthonormal matrix around the selected axis of the given matrix.
+ *
+ * \param axis: Axis to build the orthonormal basis around.
+ */
+void orthogonalize_m4(float mat[4][4], int axis)
+{
+  float size[3];
+  mat4_to_size(size, mat);
+  normalize_v3(mat[axis]);
+  switch (axis) {
+    case 0:
+      if (dot_v3v3(mat[0], mat[1]) < 1) {
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+        normalize_v3(mat[2]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      else if (dot_v3v3(mat[0], mat[2]) < 1) {
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+        normalize_v3(mat[1]);
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+      }
+      else {
+        float vec[3];
+
+        vec[0] = mat[0][1];
+        vec[1] = mat[0][2];
+        vec[2] = mat[0][0];
+
+        cross_v3_v3v3(mat[2], mat[0], vec);
+        normalize_v3(mat[2]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      break;
+    case 1:
+      if (dot_v3v3(mat[1], mat[0]) < 1) {
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+        normalize_v3(mat[2]);
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+      }
+      else if (dot_v3v3(mat[0], mat[2]) < 1) {
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+      }
+      else {
+        float vec[3];
+
+        vec[0] = mat[1][1];
+        vec[1] = mat[1][2];
+        vec[2] = mat[1][0];
+
+        cross_v3_v3v3(mat[0], mat[1], vec);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[2], mat[0], mat[1]);
+      }
+      break;
+    case 2:
+      if (dot_v3v3(mat[2], mat[0]) < 1) {
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+        normalize_v3(mat[1]);
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+      }
+      else if (dot_v3v3(mat[2], mat[1]) < 1) {
+        cross_v3_v3v3(mat[0], mat[1], mat[2]);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      else {
+        float vec[3];
+
+        vec[0] = mat[2][1];
+        vec[1] = mat[2][2];
+        vec[2] = mat[2][0];
+
+        cross_v3_v3v3(mat[0], vec, mat[2]);
+        normalize_v3(mat[0]);
+        cross_v3_v3v3(mat[1], mat[2], mat[0]);
+      }
+      break;
+    default:
+      BLI_assert(0);
+      break;
+  }
+  mul_v3_fl(mat[0], size[0]);
+  mul_v3_fl(mat[1], size[1]);
+  mul_v3_fl(mat[2], size[2]);
+}
+
+bool is_orthogonal_m3(const float m[3][3])
+{
+  int i, j;
+
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < i; j++) {
+      if (fabsf(dot_v3v3(m[i], m[j])) > 1e-5f) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool is_orthogonal_m4(const float m[4][4])
+{
+  int i, j;
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < i; j++) {
+      if (fabsf(dot_v4v4(m[i], m[j])) > 1e-5f) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool is_orthonormal_m3(const float m[3][3])
+{
+  if (is_orthogonal_m3(m)) {
+    int i;
+
+    for (i = 0; i < 3; i++) {
+      if (fabsf(dot_v3v3(m[i], m[i]) - 1) > 1e-5f) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+bool is_orthonormal_m4(const float m[4][4])
+{
+  if (is_orthogonal_m4(m)) {
+    int i;
+
+    for (i = 0; i < 4; i++) {
+      if (fabsf(dot_v4v4(m[i], m[i]) - 1) > 1e-5f) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  return false;
+}
+
+bool is_uniform_scaled_m3(const float m[3][3])
+{
+  const float eps = 1e-7f;
+  float t[3][3];
+  float l1, l2, l3, l4, l5, l6;
+
+  transpose_m3_m3(t, m);
+
+  l1 = len_squared_v3(m[0]);
+  l2 = len_squared_v3(m[1]);
+  l3 = len_squared_v3(m[2]);
+
+  l4 = len_squared_v3(t[0]);
+  l5 = len_squared_v3(t[1]);
+  l6 = len_squared_v3(t[2]);
+
+  if (fabsf(l2 - l1) <= eps && fabsf(l3 - l1) <= eps && fabsf(l4 - l1) <= eps &&
+      fabsf(l5 - l1) <= eps && fabsf(l6 - l1) <= eps) {
+    return true;
+  }
+
+  return false;
+}
+
+bool is_uniform_scaled_m4(const float m[4][4])
+{
+  float t[3][3];
+  copy_m3_m4(t, m);
+  return is_uniform_scaled_m3(t);
+}
+
+void normalize_m3_ex(float mat[3][3], float r_scale[3])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    r_scale[i] = normalize_v3(mat[i]);
+  }
+}
+void normalize_m3(float mat[3][3])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    normalize_v3(mat[i]);
+  }
+}
+
+void normalize_m3_m3_ex(float rmat[3][3], const float mat[3][3], float r_scale[3])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    r_scale[i] = normalize_v3_v3(rmat[i], mat[i]);
+  }
+}
+void normalize_m3_m3(float rmat[3][3], const float mat[3][3])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    normalize_v3_v3(rmat[i], mat[i]);
+  }
+}
+
+void normalize_m4_ex(float mat[4][4], float r_scale[3])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    r_scale[i] = normalize_v3(mat[i]);
+    if (r_scale[i] != 0.0f) {
+      mat[i][3] /= r_scale[i];
+    }
+  }
+}
+void normalize_m4(float mat[4][4])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    float len = normalize_v3(mat[i]);
+    if (len != 0.0f) {
+      mat[i][3] /= len;
+    }
+  }
+}
+
+void normalize_m4_m4_ex(float rmat[4][4], const float mat[4][4], float r_scale[3])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    r_scale[i] = normalize_v3_v3(rmat[i], mat[i]);
+    rmat[i][3] = (r_scale[i] != 0.0f) ? (mat[i][3] / r_scale[i]) : mat[i][3];
+  }
+  copy_v4_v4(rmat[3], mat[3]);
+}
+void normalize_m4_m4(float rmat[4][4], const float mat[4][4])
+{
+  int i;
+  for (i = 0; i < 3; i++) {
+    float len = normalize_v3_v3(rmat[i], mat[i]);
+    rmat[i][3] = (len != 0.0f) ? (mat[i][3] / len) : mat[i][3];
+  }
+  copy_v4_v4(rmat[3], mat[3]);
+}
+
+void adjoint_m2_m2(float m1[2][2], const float m[2][2])
+{
+  BLI_assert(m1 != m);
+  m1[0][0] = m[1][1];
+  m1[0][1] = -m[0][1];
+  m1[1][0] = -m[1][0];
+  m1[1][1] = m[0][0];
+}
+
+void adjoint_m3_m3(float m1[3][3], const float m[3][3])
+{
+  BLI_assert(m1 != m);
+  m1[0][0] = m[1][1] * m[2][2] - m[1][2] * m[2][1];
+  m1[0][1] = -m[0][1] * m[2][2] + m[0][2] * m[2][1];
+  m1[0][2] = m[0][1] * m[1][2] - m[0][2] * m[1][1];
+
+  m1[1][0] = -m[1][0] * m[2][2] + m[1][2] * m[2][0];
+  m1[1][1] = m[0][0] * m[2][2] - m[0][2] * m[2][0];
+  m1[1][2] = -m[0][0] * m[1][2] + m[0][2] * m[1][0];
+
+  m1[2][0] = m[1][0] * m[2][1] - m[1][1] * m[2][0];
+  m1[2][1] = -m[0][0] * m[2][1] + m[0][1] * m[2][0];
+  m1[2][2] = m[0][0] * m[1][1] - m[0][1] * m[1][0];
+}
+
+void adjoint_m4_m4(float out[4][4], const float in[4][4]) /* out = ADJ(in) */
+{
+  float a1, a2, a3, a4, b1, b2, b3, b4;
+  float c1, c2, c3, c4, d1, d2, d3, d4;
+
+  a1 = in[0][0];
+  b1 = in[0][1];
+  c1 = in[0][2];
+  d1 = in[0][3];
+
+  a2 = in[1][0];
+  b2 = in[1][1];
+  c2 = in[1][2];
+  d2 = in[1][3];
+
+  a3 = in[2][0];
+  b3 = in[2][1];
+  c3 = in[2][2];
+  d3 = in[2][3];
+
+  a4 = in[3][0];
+  b4 = in[3][1];
+  c4 = in[3][2];
+  d4 = in[3][3];
+
+  out[0][0] = determinant_m3(b2, b3, b4, c2, c3, c4, d2, d3, d4);
+  out[1][0] = -determinant_m3(a2, a3, a4, c2, c3, c4, d2, d3, d4);
+  out[2][0] = determinant_m3(a2, a3, a4, b2, b3, b4, d2, d3, d4);
+  out[3][0] = -determinant_m3(a2, a3, a4, b2, b3, b4, c2, c3, c4);
+
+  out[0][1] = -determinant_m3(b1, b3, b4, c1, c3, c4, d1, d3, d4);
+  out[1][1] = determinant_m3(a1, a3, a4, c1, c3, c4, d1, d3, d4);
+  out[2][1] = -determinant_m3(a1, a3, a4, b1, b3, b4, d1, d3, d4);
+  out[3][1] = determinant_m3(a1, a3, a4, b1, b3, b4, c1, c3, c4);
+
+  out[0][2] = determinant_m3(b1, b2, b4, c1, c2, c4, d1, d2, d4);
+  out[1][2] = -determinant_m3(a1, a2, a4, c1, c2, c4, d1, d2, d4);
+  out[2][2] = determinant_m3(a1, a2, a4, b1, b2, b4, d1, d2, d4);
+  out[3][2] = -determinant_m3(a1, a2, a4, b1, b2, b4, c1, c2, c4);
+
+  out[0][3] = -determinant_m3(b1, b2, b3, c1, c2, c3, d1, d2, d3);
+  out[1][3] = determinant_m3(a1, a2, a3, c1, c2, c3, d1, d2, d3);
+  out[2][3] = -determinant_m3(a1, a2, a3, b1, b2, b3, d1, d2, d3);
+  out[3][3] = determinant_m3(a1, a2, a3, b1, b2, b3, c1, c2, c3);
+}
+
+float determinant_m2(float a, float b, float c, float d)
+{
+
+  return a * d - b * c;
+}
+
+float determinant_m3(
+    float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
+{
+  float ans;
+
+  ans = (a1 * determinant_m2(b2, b3, c2, c3) - b1 * determinant_m2(a2, a3, c2, c3) +
+         c1 * determinant_m2(a2, a3, b2, b3));
+
+  return ans;
+}
+
+float determinant_m4(const float m[4][4])
+{
+  float ans;
+  float a1, a2, a3, a4, b1, b2, b3, b4, c1, c2, c3, c4, d1, d2, d3, d4;
+
+  a1 = m[0][0];
+  b1 = m[0][1];
+  c1 = m[0][2];
+  d1 = m[0][3];
+
+  a2 = m[1][0];
+  b2 = m[1][1];
+  c2 = m[1][2];
+  d2 = m[1][3];
+
+  a3 = m[2][0];
+  b3 = m[2][1];
+  c3 = m[2][2];
+  d3 = m[2][3];
+
+  a4 = m[3][0];
+  b4 = m[3][1];
+  c4 = m[3][2];
+  d4 = m[3][3];
+
+  ans = (a1 * determinant_m3(b2, b3, b4, c2, c3, c4, d2, d3, d4) -
+         b1 * determinant_m3(a2, a3, a4, c2, c3, c4, d2, d3, d4) +
+         c1 * determinant_m3(a2, a3, a4, b2, b3, b4, d2, d3, d4) -
+         d1 * determinant_m3(a2, a3, a4, b2, b3, b4, c2, c3, c4));
+
+  return ans;
+}
+
+/****************************** Transformations ******************************/
+
+void size_to_mat3(float mat[3][3], const float size[3])
+{
+  mat[0][0] = size[0];
+  mat[0][1] = 0.0f;
+  mat[0][2] = 0.0f;
+  mat[1][1] = size[1];
+  mat[1][0] = 0.0f;
+  mat[1][2] = 0.0f;
+  mat[2][2] = size[2];
+  mat[2][1] = 0.0f;
+  mat[2][0] = 0.0f;
+}
+
+void size_to_mat4(float mat[4][4], const float size[3])
+{
+  mat[0][0] = size[0];
+  mat[0][1] = 0.0f;
+  mat[0][2] = 0.0f;
+  mat[0][3] = 0.0f;
+  mat[1][0] = 0.0f;
+  mat[1][1] = size[1];
+  mat[1][2] = 0.0f;
+  mat[1][3] = 0.0f;
+  mat[2][0] = 0.0f;
+  mat[2][1] = 0.0f;
+  mat[2][2] = size[2];
+  mat[2][3] = 0.0f;
+  mat[3][0] = 0.0f;
+  mat[3][1] = 0.0f;
+  mat[3][2] = 0.0f;
+  mat[3][3] = 1.0f;
+}
+
+void mat3_to_size(float size[3], const float mat[3][3])
+{
+  size[0] = len_v3(mat[0]);
+  size[1] = len_v3(mat[1]);
+  size[2] = len_v3(mat[2]);
+}
+
+void mat4_to_size(float size[3], const float mat[4][4])
+{
+  size[0] = len_v3(mat[0]);
+  size[1] = len_v3(mat[1]);
+  size[2] = len_v3(mat[2]);
+}
+
+/**
+ * This computes the overall volume scale factor of a transformation matrix.
+ * For an orthogonal matrix, it is the product of all three scale values.
+ * Returns a negative value if the transform is flipped by negative scale.
+ */
+float mat3_to_volume_scale(const float mat[3][3])
+{
+  return determinant_m3_array(mat);
+}
+
+float mat4_to_volume_scale(const float mat[4][4])
+{
+  return determinant_m4_mat3_array(mat);
+}
+
+/**
+ * This gets the average scale of a matrix, only use when your scaling
+ * data that has no idea of scale axis, examples are bone-envelope-radius
+ * and curve radius.
+ */
+float mat3_to_scale(const float mat[3][3])
+{
+  /* unit length vector */
+  float unit_vec[3];
+  copy_v3_fl(unit_vec, (float)M_SQRT1_3);
+  mul_m3_v3(mat, unit_vec);
+  return len_v3(unit_vec);
+}
+
+float mat4_to_scale(const float mat[4][4])
+{
+  /* unit length vector */
+  float unit_vec[3];
+  copy_v3_fl(unit_vec, (float)M_SQRT1_3);
+  mul_mat3_m4_v3(mat, unit_vec);
+  return len_v3(unit_vec);
+}
+
+/** Return 2D scale (in XY plane) of given mat4. */
+float mat4_to_xy_scale(const float M[4][4])
+{
+  /* unit length vector in xy plane */
+  float unit_vec[3] = {(float)M_SQRT1_2, (float)M_SQRT1_2, 0.0f};
+  mul_mat3_m4_v3(M, unit_vec);
+  return len_v3(unit_vec);
+}
+
+void mat3_to_rot_size(float rot[3][3], float size[3], const float mat3[3][3])
+{
+  /* keep rot as a 3x3 matrix, the caller can convert into a quat or euler */
+  size[0] = normalize_v3_v3(rot[0], mat3[0]);
+  size[1] = normalize_v3_v3(rot[1], mat3[1]);
+  size[2] = normalize_v3_v3(rot[2], mat3[2]);
+  if (UNLIKELY(is_negative_m3(rot))) {
+    negate_m3(rot);
+    negate_v3(size);
+  }
+}
+
+void mat4_to_loc_rot_size(float loc[3], float rot[3][3], float size[3], const float wmat[4][4])
+{
+  float mat3[3][3]; /* wmat -> 3x3 */
+
+  copy_m3_m4(mat3, wmat);
+  mat3_to_rot_size(rot, size, mat3);
+
+  /* location */
+  copy_v3_v3(loc, wmat[3]);
+}
+
+void mat4_to_loc_quat(float loc[3], float quat[4], const float wmat[4][4])
+{
+  float mat3[3][3];
+  float mat3_n[3][3]; /* normalized mat3 */
+
+  copy_m3_m4(mat3, wmat);
+  normalize_m3_m3(mat3_n, mat3);
+
+  /* so scale doesn't interfere with rotation [#24291] */
+  /* note: this is a workaround for negative matrix not working for rotation conversion, FIXME */
+  if (is_negative_m3(mat3)) {
+    negate_m3(mat3_n);
+  }
+
+  mat3_normalized_to_quat(quat, mat3_n);
+  copy_v3_v3(loc, wmat[3]);
+}
+
+void mat4_decompose(float loc[3], float quat[4], float size[3], const float wmat[4][4])
+{
+  float rot[3][3];
+  mat4_to_loc_rot_size(loc, rot, size, wmat);
+  mat3_normalized_to_quat(quat, rot);
+}
+
+/**
+ * Right polar decomposition:
+ *     M = UP
+ *
+ * U is the 'rotation'-like component, the closest orthogonal matrix to M.
+ * P is the 'scaling'-like component, defined in U space.
+ *
+ * See https://en.wikipedia.org/wiki/Polar_decomposition for more.
+ */
+#ifndef MATH_STANDALONE
+void mat3_polar_decompose(const float mat3[3][3], float r_U[3][3], float r_P[3][3])
+{
+  /* From svd decomposition (M = WSV*), we have:
+   *     U = WV*
+   *     P = VSV*
+   */
+  float W[3][3], S[3][3], V[3][3], Vt[3][3];
+  float sval[3];
+
+  BLI_svd_m3(mat3, W, sval, V);
+
+  size_to_mat3(S, sval);
+
+  transpose_m3_m3(Vt, V);
+  mul_m3_m3m3(r_U, W, Vt);
+  mul_m3_series(r_P, V, S, Vt);
+}
+#endif
+
+void scale_m3_fl(float m[3][3], float scale)
+{
+  m[0][0] = m[1][1] = m[2][2] = scale;
+  m[0][1] = m[0][2] = 0.0;
+  m[1][0] = m[1][2] = 0.0;
+  m[2][0] = m[2][1] = 0.0;
+}
+
+void scale_m4_fl(float m[4][4], float scale)
+{
+  m[0][0] = m[1][1] = m[2][2] = scale;
+  m[3][3] = 1.0;
+  m[0][1] = m[0][2] = m[0][3] = 0.0;
+  m[1][0] = m[1][2] = m[1][3] = 0.0;
+  m[2][0] = m[2][1] = m[2][3] = 0.0;
+  m[3][0] = m[3][1] = m[3][2] = 0.0;
+}
+
+void translate_m4(float mat[4][4], float Tx, float Ty, float Tz)
+{
+  mat[3][0] += (Tx * mat[0][0] + Ty * mat[1][0] + Tz * mat[2][0]);
+  mat[3][1] += (Tx * mat[0][1] + Ty * mat[1][1] + Tz * mat[2][1]);
+  mat[3][2] += (Tx * mat[0][2] + Ty * mat[1][2] + Tz * mat[2][2]);
+}
+
+/* TODO: enum for axis? */
+/**
+ * Rotate a matrix in-place.
+ *
+ * \note To create a new rotation matrix see:
+ * #axis_angle_to_mat4_single, #axis_angle_to_mat3_single, #angle_to_mat2
+ * (axis & angle args are compatible).
+ */
+void rotate_m4(float mat[4][4], const char axis, const float angle)
+{
+  const float angle_cos = cosf(angle);
+  const float angle_sin = sinf(angle);
+
+  assert(axis >= 'X' && axis <= 'Z');
+
+  switch (axis) {
+    case 'X':
+      for (int col = 0; col < 4; col++) {
+        float temp = angle_cos * mat[1][col] + angle_sin * mat[2][col];
+        mat[2][col] = -angle_sin * mat[1][col] + angle_cos * mat[2][col];
+        mat[1][col] = temp;
+      }
+      break;
+
+    case 'Y':
+      for (int col = 0; col < 4; col++) {
+        float temp = angle_cos * mat[0][col] - angle_sin * mat[2][col];
+        mat[2][col] = angle_sin * mat[0][col] + angle_cos * mat[2][col];
+        mat[0][col] = temp;
+      }
+      break;
+
+    case 'Z':
+      for (int col = 0; col < 4; col++) {
+        float temp = angle_cos * mat[0][col] + angle_sin * mat[1][col];
+        mat[1][col] = -angle_sin * mat[0][col] + angle_cos * mat[1][col];
+        mat[0][col] = temp;
+      }
+      break;
+    default:
+      BLI_assert(0);
+      break;
+  }
+}
+
+/**
+ * Scale or rotate around a pivot point,
+ * a convenience function to avoid having to do inline.
+ *
+ * Since its common to make a scale/rotation matrix that pivots around an arbitrary point.
+ *
+ * Typical use case is to make 3x3 matrix, copy to 4x4, then pass to this function.
+ */
+void transform_pivot_set_m4(float mat[4][4], const float pivot[3])
+{
+  float tmat[4][4];
+
+  unit_m4(tmat);
+
+  copy_v3_v3(tmat[3], pivot);
+  mul_m4_m4m4(mat, tmat, mat);
+
+  /* invert the matrix */
+  negate_v3(tmat[3]);
+  mul_m4_m4m4(mat, mat, tmat);
+}
+
+void blend_m3_m3m3(float out[3][3],
+                   const float dst[3][3],
+                   const float src[3][3],
+                   const float srcweight)
+{
+  float srot[3][3], drot[3][3];
+  float squat[4], dquat[4], fquat[4];
+  float sscale[3], dscale[3], fsize[3];
+  float rmat[3][3], smat[3][3];
+
+  mat3_to_rot_size(drot, dscale, dst);
+  mat3_to_rot_size(srot, sscale, src);
+
+  mat3_normalized_to_quat(dquat, drot);
+  mat3_normalized_to_quat(squat, srot);
+
+  /* do blending */
+  interp_qt_qtqt(fquat, dquat, squat, srcweight);
+  interp_v3_v3v3(fsize, dscale, sscale, srcweight);
+
+  /* compose new matrix */
+  quat_to_mat3(rmat, fquat);
+  size_to_mat3(smat, fsize);
+  mul_m3_m3m3(out, rmat, smat);
+}
+
+void blend_m4_m4m4(float out[4][4],
+                   const float dst[4][4],
+                   const float src[4][4],
+                   const float srcweight)
+{
+  float sloc[3], dloc[3], floc[3];
+  float srot[3][3], drot[3][3];
+  float squat[4], dquat[4], fquat[4];
+  float sscale[3], dscale[3], fsize[3];
+
+  mat4_to_loc_rot_size(dloc, drot, dscale, dst);
+  mat4_to_loc_rot_size(sloc, srot, sscale, src);
+
+  mat3_normalized_to_quat(dquat, drot);
+  mat3_normalized_to_quat(squat, srot);
+
+  /* do blending */
+  interp_v3_v3v3(floc, dloc, sloc, srcweight);
+  interp_qt_qtqt(fquat, dquat, squat, srcweight);
+  interp_v3_v3v3(fsize, dscale, sscale, srcweight);
+
+  /* compose new matrix */
+  loc_quat_size_to_mat4(out, floc, fquat, fsize);
+}
+
+/* for builds without Eigen */
+#ifndef MATH_STANDALONE
+/**
+ * A polar-decomposition-based interpolation between matrix A and matrix B.
+ *
+ * \note This code is about five times slower as the 'naive' interpolation done by #blend_m3_m3m3
+ * (it typically remains below 2 usec on an average i74700,
+ * while #blend_m3_m3m3 remains below 0.4 usec).
+ * However, it gives expected results even with non-uniformly scaled matrices,
+ * see T46418 for an example.
+ *
+ * Based on "Matrix Animation and Polar Decomposition", by Ken Shoemake & Tom Duff
+ *
+ * \param R: Resulting interpolated matrix.
+ * \param A: Input matrix which is totally effective with `t = 0.0`.
+ * \param B: Input matrix which is totally effective with `t = 1.0`.
+ * \param t: Interpolation factor.
+ */
+void interp_m3_m3m3(float R[3][3], const float A[3][3], const float B[3][3], const float t)
+{
+  /* 'Rotation' component ('U' part of polar decomposition,
+   * the closest orthogonal matrix to M3 rot/scale
+   * transformation matrix), spherically interpolated. */
+  float U_A[3][3], U_B[3][3], U[3][3];
+  float quat_A[4], quat_B[4], quat[4];
+  /* 'Scaling' component ('P' part of polar decomposition, i.e. scaling in U-defined space),
+   * linearly interpolated. */
+  float P_A[3][3], P_B[3][3], P[3][3];
+
+  int i;
+
+  mat3_polar_decompose(A, U_A, P_A);
+  mat3_polar_decompose(B, U_B, P_B);
+
+  mat3_to_quat(quat_A, U_A);
+  mat3_to_quat(quat_B, U_B);
+  interp_qt_qtqt(quat, quat_A, quat_B, t);
+  quat_to_mat3(U, quat);
+
+  for (i = 0; i < 3; i++) {
+    interp_v3_v3v3(P[i], P_A[i], P_B[i], t);
+  }
+
+  /* And we reconstruct rot/scale matrix from interpolated polar components */
+  mul_m3_m3m3(R, U, P);
+}
+
+/**
+ * Complete transform matrix interpolation,
+ * based on polar-decomposition-based interpolation from #interp_m3_m3m3.
+ *
+ * \param R: Resulting interpolated matrix.
+ * \param A: Input matrix which is totally effective with `t = 0.0`.
+ * \param B: Input matrix which is totally effective with `t = 1.0`.
+ * \param t: Interpolation factor.
+ */
+void interp_m4_m4m4(float R[4][4], const float A[4][4], const float B[4][4], const float t)
+{
+  float A3[3][3], B3[3][3], R3[3][3];
+
+  /* Location component, linearly interpolated. */
+  float loc_A[3], loc_B[3], loc[3];
+
+  copy_v3_v3(loc_A, A[3]);
+  copy_v3_v3(loc_B, B[3]);
+  interp_v3_v3v3(loc, loc_A, loc_B, t);
+
+  copy_m3_m4(A3, A);
+  copy_m3_m4(B3, B);
+
+  interp_m3_m3m3(R3, A3, B3, t);
+
+  copy_m4_m3(R, R3);
+  copy_v3_v3(R[3], loc);
+}
+#endif /* MATH_STANDALONE */
+
+bool is_negative_m3(const float mat[3][3])
+{
+  float vec[3];
+  cross_v3_v3v3(vec, mat[0], mat[1]);
+  return (dot_v3v3(vec, mat[2]) < 0.0f);
+}
+
+bool is_negative_m4(const float mat[4][4])
+{
+  float vec[3];
+  cross_v3_v3v3(vec, mat[0], mat[1]);
+  return (dot_v3v3(vec, mat[2]) < 0.0f);
+}
+
+bool is_zero_m3(const float mat[3][3])
+{
+  return (is_zero_v3(mat[0]) && is_zero_v3(mat[1]) && is_zero_v3(mat[2]));
+}
+bool is_zero_m4(const float mat[4][4])
+{
+  return (is_zero_v4(mat[0]) && is_zero_v4(mat[1]) && is_zero_v4(mat[2]) && is_zero_v4(mat[3]));
+}
+
+bool equals_m3m3(const float mat1[3][3], const float mat2[3][3])
+{
+  return (equals_v3v3(mat1[0], mat2[0]) && equals_v3v3(mat1[1], mat2[1]) &&
+          equals_v3v3(mat1[2], mat2[2]));
+}
+
+bool equals_m4m4(const float mat1[4][4], const float mat2[4][4])
+{
+  return (equals_v4v4(mat1[0], mat2[0]) && equals_v4v4(mat1[1], mat2[1]) &&
+          equals_v4v4(mat1[2], mat2[2]) && equals_v4v4(mat1[3], mat2[3]));
+}
+
+/**
+ * Make a 4x4 matrix out of 3 transform components.
+ * Matrices are made in the order: `scale * rot * loc`
+ *
+ * TODO: need to have a version that allows for rotation order...
+ */
+void loc_eul_size_to_mat4(float mat[4][4],
+                          const float loc[3],
+                          const float eul[3],
+                          const float size[3])
+{
+  float rmat[3][3], smat[3][3], tmat[3][3];
+
+  /* initialize new matrix */
+  unit_m4(mat);
+
+  /* make rotation + scaling part */
+  eul_to_mat3(rmat, eul);
+  size_to_mat3(smat, size);
+  mul_m3_m3m3(tmat, rmat, smat);
+
+  /* copy rot/scale part to output matrix*/
+  copy_m4_m3(mat, tmat);
+
+  /* copy location to matrix */
+  mat[3][0] = loc[0];
+  mat[3][1] = loc[1];
+  mat[3][2] = loc[2];
+}
+
+/**
+ * Make a 4x4 matrix out of 3 transform components.
+ * Matrices are made in the order: `scale * rot * loc`
+ */
+void loc_eulO_size_to_mat4(float mat[4][4],
+                           const float loc[3],
+                           const float eul[3],
+                           const float size[3],
+                           const short rotOrder)
+{
+  float rmat[3][3], smat[3][3], tmat[3][3];
+
+  /* initialize new matrix */
+  unit_m4(mat);
+
+  /* make rotation + scaling part */
+  eulO_to_mat3(rmat, eul, rotOrder);
+  size_to_mat3(smat, size);
+  mul_m3_m3m3(tmat, rmat, smat);
+
+  /* copy rot/scale part to output matrix*/
+  copy_m4_m3(mat, tmat);
+
+  /* copy location to matrix */
+  mat[3][0] = loc[0];
+  mat[3][1] = loc[1];
+  mat[3][2] = loc[2];
+}
+
+/**
+ * Make a 4x4 matrix out of 3 transform components.
+ * Matrices are made in the order: `scale * rot * loc`
+ */
+void loc_quat_size_to_mat4(float mat[4][4],
+                           const float loc[3],
+                           const float quat[4],
+                           const float size[3])
+{
+  float rmat[3][3], smat[3][3], tmat[3][3];
+
+  /* initialize new matrix */
+  unit_m4(mat);
+
+  /* make rotation + scaling part */
+  quat_to_mat3(rmat, quat);
+  size_to_mat3(smat, size);
+  mul_m3_m3m3(tmat, rmat, smat);
+
+  /* copy rot/scale part to output matrix*/
+  copy_m4_m3(mat, tmat);
+
+  /* copy location to matrix */
+  mat[3][0] = loc[0];
+  mat[3][1] = loc[1];
+  mat[3][2] = loc[2];
+}
+
+void loc_axisangle_size_to_mat4(float mat[4][4],
+                                const float loc[3],
+                                const float axis[3],
+                                const float angle,
+                                const float size[3])
+{
+  float q[4];
+  axis_angle_to_quat(q, axis, angle);
+  loc_quat_size_to_mat4(mat, loc, q, size);
+}
+
+/*********************************** Other ***********************************/
+
+void print_m3(const char *str, const float m[3][3])
+{
+  printf("%s\n", str);
+  printf("%f %f %f\n", m[0][0], m[1][0], m[2][0]);
+  printf("%f %f %f\n", m[0][1], m[1][1], m[2][1]);
+  printf("%f %f %f\n", m[0][2], m[1][2], m[2][2]);
+  printf("\n");
+}
+
+void print_m4(const char *str, const float m[4][4])
+{
+  printf("%s\n", str);
+  printf("%f %f %f %f\n", m[0][0], m[1][0], m[2][0], m[3][0]);
+  printf("%f %f %f %f\n", m[0][1], m[1][1], m[2][1], m[3][1]);
+  printf("%f %f %f %f\n", m[0][2], m[1][2], m[2][2], m[3][2]);
+  printf("%f %f %f %f\n", m[0][3], m[1][3], m[2][3], m[3][3]);
+  printf("\n");
+}
+
+/*********************************** SVD ************************************
+ * from TNT matrix library
+ *
+ * Compute the Single Value Decomposition of an arbitrary matrix A
+ * That is compute the 3 matrices U,W,V with U column orthogonal (m,n)
+ * ,W a diagonal matrix and V an orthogonal square matrix s.t.
+ * A = U.W.Vt. From this decomposition it is trivial to compute the
+ * (pseudo-inverse) of A as Ainv = V.Winv.tranpose(U).
+ */
+
+void svd_m4(float U[4][4], float s[4], float V[4][4], float A_[4][4])
+{
+  float A[4][4];
+  float work1[4], work2[4];
+  int m = 4;
+  int n = 4;
+  int maxiter = 200;
+  int nu = min_ii(m, n);
+
+  float *work = work1;
+  float *e = work2;
+  float eps;
+
+  int i = 0, j = 0, k = 0, p, pp, iter;
+
+  /* Reduce A to bidiagonal form, storing the diagonal elements
+   * in s and the super-diagonal elements in e. */
+
+  int nct = min_ii(m - 1, n);
+  int nrt = max_ii(0, min_ii(n - 2, m));
+
+  copy_m4_m4(A, A_);
+  zero_m4(U);
+  zero_v4(s);
+
+  for (k = 0; k < max_ii(nct, nrt); k++) {
+    if (k < nct) {
+
+      /* Compute the transformation for the k-th column and
+       * place the k-th diagonal in s[k].
+       * Compute 2-norm of k-th column without under/overflow. */
+      s[k] = 0;
+      for (i = k; i < m; i++) {
+        s[k] = hypotf(s[k], A[i][k]);
+      }
+      if (s[k] != 0.0f) {
+        float invsk;
+        if (A[k][k] < 0.0f) {
+          s[k] = -s[k];
+        }
+        invsk = 1.0f / s[k];
+        for (i = k; i < m; i++) {
+          A[i][k] *= invsk;
+        }
+        A[k][k] += 1.0f;
+      }
+      s[k] = -s[k];
+    }
+    for (j = k + 1; j < n; j++) {
+      if ((k < nct) && (s[k] != 0.0f)) {
+
+        /* Apply the transformation. */
+
+        float t = 0;
+        for (i = k; i < m; i++) {
+          t += A[i][k] * A[i][j];
+        }
+        t = -t / A[k][k];
+        for (i = k; i < m; i++) {
+          A[i][j] += t * A[i][k];
+        }
+      }
+
+      /* Place the k-th row of A into e for the */
+      /* subsequent calculation of the row transformation. */
+
+      e[j] = A[k][j];
+    }
+    if (k < nct) {
+
+      /* Place the transformation in U for subsequent back
+       * multiplication. */
+
+      for (i = k; i < m; i++) {
+        U[i][k] = A[i][k];
+      }
+    }
+    if (k < nrt) {
+
+      /* Compute the k-th row transformation and place the
+       * k-th super-diagonal in e[k].
+       * Compute 2-norm without under/overflow. */
+      e[k] = 0;
+      for (i = k + 1; i < n; i++) {
+        e[k] = hypotf(e[k], e[i]);
+      }
+      if (e[k] != 0.0f) {
+        float invek;
+        if (e[k + 1] < 0.0f) {
+          e[k] = -e[k];
+        }
+        invek = 1.0f / e[k];
+        for (i = k + 1; i < n; i++) {
+          e[i] *= invek;
+        }
+        e[k + 1] += 1.0f;
+      }
+      e[k] = -e[k];
+      if ((k + 1 < m) & (e[k] != 0.0f)) {
+        float invek1;
+
+        /* Apply the transformation. */
+
+        for (i = k + 1; i < m; i++) {
+          work[i] = 0.0f;
+        }
+        for (j = k + 1; j < n; j++) {
+          for (i = k + 1; i < m; i++) {
+            work[i] += e[j] * A[i][j];
+          }
+        }
+        invek1 = 1.0f / e[k + 1];
+        for (j = k + 1; j < n; j++) {
+          float t = -e[j] * invek1;
+          for (i = k + 1; i < m; i++) {
+            A[i][j] += t * work[i];
+          }
+        }
+      }
+
+      /* Place the transformation in V for subsequent
+       * back multiplication. */
+
+      for (i = k + 1; i < n; i++) {
+        V[i][k] = e[i];
+      }
+    }
+  }
+
+  /* Set up the final bidiagonal matrix or order p. */
+
+  p = min_ii(n, m + 1);
+  if (nct < n) {
+    s[nct] = A[nct][nct];
+  }
+  if (m < p) {
+    s[p - 1] = 0.0f;
+  }
+  if (nrt + 1 < p) {
+    e[nrt] = A[nrt][p - 1];
+  }
+  e[p - 1] = 0.0f;
+
+  /* If required, generate U. */
+
+  for (j = nct; j < nu; j++) {
+    for (i = 0; i < m; i++) {
+      U[i][j] = 0.0f;
+    }
+    U[j][j] = 1.0f;
+  }
+  for (k = nct - 1; k >= 0; k--) {
+    if (s[k] != 0.0f) {
+      for (j = k + 1; j < nu; j++) {
+        float t = 0;
+        for (i = k; i < m; i++) {
+          t += U[i][k] * U[i][j];
+        }
+        t = -t / U[k][k];
+        for (i = k; i < m; i++) {
+          U[i][j] += t * U[i][k];
+        }
+      }
+      for (i = k; i < m; i++) {
+        U[i][k] = -U[i][k];
+      }
+      U[k][k] = 1.0f + U[k][k];
+      for (i = 0; i < k - 1; i++) {
+        U[i][k] = 0.0f;
+      }
+    }
+    else {
+      for (i = 0; i < m; i++) {
+        U[i][k] = 0.0f;
+      }
+      U[k][k] = 1.0f;
+    }
+  }
+
+  /* If required, generate V. */
+
+  for (k = n - 1; k >= 0; k--) {
+    if ((k < nrt) & (e[k] != 0.0f)) {
+      for (j = k + 1; j < nu; j++) {
+        float t = 0;
+        for (i = k + 1; i < n; i++) {
+          t += V[i][k] * V[i][j];
+        }
+        t = -t / V[k + 1][k];
+        for (i = k + 1; i < n; i++) {
+          V[i][j] += t * V[i][k];
+        }
+      }
+    }
+    for (i = 0; i < n; i++) {
+      V[i][k] = 0.0f;
+    }
+    V[k][k] = 1.0f;
+  }
+
+  /* Main iteration loop for the singular values. */
+
+  pp = p - 1;
+  iter = 0;
+  eps = powf(2.0f, -52.0f);
+  while (p > 0) {
+    int kase = 0;
+
+    /* Test for maximum iterations to avoid infinite loop */
+    if (maxiter == 0) {
+      break;
+    }
+    maxiter--;
+
+    /* This section of the program inspects for
+     * negligible elements in the s and e arrays.  On
+     * completion the variables kase and k are set as follows.
+     *
+     * kase = 1: if s(p) and e[k - 1] are negligible and k<p
+     * kase = 2: if s(k) is negligible and k<p
+     * kase = 3: if e[k - 1] is negligible, k<p, and
+     *              s(k), ..., s(p) are not negligible (qr step).
+     * kase = 4: if e(p - 1) is negligible (convergence). */
+
+    for (k = p - 2; k >= -1; k--) {
+      if (k == -1) {
+        break;
+      }
+      if (fabsf(e[k]) <= eps * (fabsf(s[k]) + fabsf(s[k + 1]))) {
+        e[k] = 0.0f;
+        break;
+      }
+    }
+    if (k == p - 2) {
+      kase = 4;
+    }
+    else {
+      int ks;
+      for (ks = p - 1; ks >= k; ks--) {
+        float t;
+        if (ks == k) {
+          break;
+        }
+        t = (ks != p ? fabsf(e[ks]) : 0.f) + (ks != k + 1 ? fabsf(e[ks - 1]) : 0.0f);
+        if (fabsf(s[ks]) <= eps * t) {
+          s[ks] = 0.0f;
+          break;
+        }
+      }
+      if (ks == k) {
+        kase = 3;
+      }
+      else if (ks == p - 1) {
+        kase = 1;
+      }
+      else {
+        kase = 2;
+        k = ks;
+      }
+    }
+    k++;
+
+    /* Perform the task indicated by kase. */
+
+    switch (kase) {
+
+        /* Deflate negligible s(p). */
+
+      case 1: {
+        float f = e[p - 2];
+        e[p - 2] = 0.0f;
+        for (j = p - 2; j >= k; j--) {
+          float t = hypotf(s[j], f);
+          float invt = 1.0f / t;
+          float cs = s[j] * invt;
+          float sn = f * invt;
+          s[j] = t;
+          if (j != k) {
+            f = -sn * e[j - 1];
+            e[j - 1] = cs * e[j - 1];
+          }
+
+          for (i = 0; i < n; i++) {
+            t = cs * V[i][j] + sn * V[i][p - 1];
+            V[i][p - 1] = -sn * V[i][j] + cs * V[i][p - 1];
+            V[i][j] = t;
+          }
+        }
+        break;
+      }
+
+        /* Split at negligible s(k). */
+
+      case 2: {
+        float f = e[k - 1];
+        e[k - 1] = 0.0f;
+        for (j = k; j < p; j++) {
+          float t = hypotf(s[j], f);
+          float invt = 1.0f / t;
+          float cs = s[j] * invt;
+          float sn = f * invt;
+          s[j] = t;
+          f = -sn * e[j];
+          e[j] = cs * e[j];
+
+          for (i = 0; i < m; i++) {
+            t = cs * U[i][j] + sn * U[i][k - 1];
+            U[i][k - 1] = -sn * U[i][j] + cs * U[i][k - 1];
+            U[i][j] = t;
+          }
+        }
+        break;
+      }
+
+        /* Perform one qr step. */
+
+      case 3: {
+
+        /* Calculate the shift. */
+
+        float scale = max_ff(
+            max_ff(max_ff(max_ff(fabsf(s[p - 1]), fabsf(s[p - 2])), fabsf(e[p - 2])), fabsf(s[k])),
+            fabsf(e[k]));
+        float invscale = 1.0f / scale;
+        float sp = s[p - 1] * invscale;
+        float spm1 = s[p - 2] * invscale;
+        float epm1 = e[p - 2] * invscale;
+        float sk = s[k] * invscale;
+        float ek = e[k] * invscale;
+        float b = ((spm1 + sp) * (spm1 - sp) + epm1 * epm1) * 0.5f;
+        float c = (sp * epm1) * (sp * epm1);
+        float shift = 0.0f;
+        float f, g;
+        if ((b != 0.0f) || (c != 0.0f)) {
+          shift = sqrtf(b * b + c);
+          if (b < 0.0f) {
+            shift = -shift;
+          }
+          shift = c / (b + shift);
+        }
+        f = (sk + sp) * (sk - sp) + shift;
+        g = sk * ek;
+
+        /* Chase zeros. */
+
+        for (j = k; j < p - 1; j++) {
+          float t = hypotf(f, g);
+          /* division by zero checks added to avoid NaN (brecht) */
+          float cs = (t == 0.0f) ? 0.0f : f / t;
+          float sn = (t == 0.0f) ? 0.0f : g / t;
+          if (j != k) {
+            e[j - 1] = t;
+          }
+          f = cs * s[j] + sn * e[j];
+          e[j] = cs * e[j] - sn * s[j];
+          g = sn * s[j + 1];
+          s[j + 1] = cs * s[j + 1];
+
+          for (i = 0; i < n; i++) {
+            t = cs * V[i][j] + sn * V[i][j + 1];
+            V[i][j + 1] = -sn * V[i][j] + cs * V[i][j + 1];
+            V[i][j] = t;
+          }
+
+          t = hypotf(f, g);
+          /* division by zero checks added to avoid NaN (brecht) */
+          cs = (t == 0.0f) ? 0.0f : f / t;
+          sn = (t == 0.0f) ? 0.0f : g / t;
+          s[j] = t;
+          f = cs * e[j] + sn * s[j + 1];
+          s[j + 1] = -sn * e[j] + cs * s[j + 1];
+          g = sn * e[j + 1];
+          e[j + 1] = cs * e[j + 1];
+          if (j < m - 1) {
+            for (i = 0; i < m; i++) {
+              t = cs * U[i][j] + sn * U[i][j + 1];
+              U[i][j + 1] = -sn * U[i][j] + cs * U[i][j + 1];
+              U[i][j] = t;
+            }
+          }
+        }
+        e[p - 2] = f;
+        iter = iter + 1;
+        break;
+      }
+        /* Convergence. */
+
+      case 4: {
+
+        /* Make the singular values positive. */
+
+        if (s[k] <= 0.0f) {
+          s[k] = (s[k] < 0.0f ? -s[k] : 0.0f);
+
+          for (i = 0; i <= pp; i++) {
+            V[i][k] = -V[i][k];
+          }
+        }
+
+        /* Order the singular values. */
+
+        while (k < pp) {
+          float t;
+          if (s[k] >= s[k + 1]) {
+            break;
+          }
+          t = s[k];
+          s[k] = s[k + 1];
+          s[k + 1] = t;
+          if (k < n - 1) {
+            for (i = 0; i < n; i++) {
+              t = V[i][k + 1];
+              V[i][k + 1] = V[i][k];
+              V[i][k] = t;
+            }
+          }
+          if (k < m - 1) {
+            for (i = 0; i < m; i++) {
+              t = U[i][k + 1];
+              U[i][k + 1] = U[i][k];
+              U[i][k] = t;
+            }
+          }
+          k++;
+        }
+        iter = 0;
+        p--;
+        break;
+      }
+    }
+  }
+}
+
+void pseudoinverse_m4_m4(float Ainv[4][4], const float A_[4][4], float epsilon)
+{
+  /* compute Moore-Penrose pseudo inverse of matrix, singular values
+   * below epsilon are ignored for stability (truncated SVD) */
+  float A[4][4], V[4][4], W[4], Wm[4][4], U[4][4];
+  int i;
+
+  transpose_m4_m4(A, A_);
+  svd_m4(V, W, U, A);
+  transpose_m4(U);
+  transpose_m4(V);
+
+  zero_m4(Wm);
+  for (i = 0; i < 4; i++) {
+    Wm[i][i] = (W[i] < epsilon) ? 0.0f : 1.0f / W[i];
+  }
+
+  transpose_m4(V);
+
+  mul_m4_series(Ainv, U, Wm, V);
+}
+
+void pseudoinverse_m3_m3(float Ainv[3][3], const float A[3][3], float epsilon)
+{
+  /* try regular inverse when possible, otherwise fall back to slow svd */
+  if (!invert_m3_m3(Ainv, A)) {
+    float tmp[4][4], tmpinv[4][4];
+
+    copy_m4_m3(tmp, A);
+    pseudoinverse_m4_m4(tmpinv, tmp, epsilon);
+    copy_m3_m4(Ainv, tmpinv);
+  }
+}
+
+bool has_zero_axis_m4(const float matrix[4][4])
+{
+  return len_squared_v3(matrix[0]) < FLT_EPSILON || len_squared_v3(matrix[1]) < FLT_EPSILON ||
+         len_squared_v3(matrix[2]) < FLT_EPSILON;
+}
+
+void invert_m4_m4_safe(float Ainv[4][4], const float A[4][4])
+{
+  if (!invert_m4_m4(Ainv, A)) {
+    float Atemp[4][4];
+
+    copy_m4_m4(Atemp, A);
+
+    /* Matrix is degenerate (e.g. 0 scale on some axis), ideally we should
+     * never be in this situation, but try to invert it anyway with tweak.
+     */
+    Atemp[0][0] += 1e-8f;
+    Atemp[1][1] += 1e-8f;
+    Atemp[2][2] += 1e-8f;
+
+    if (!invert_m4_m4(Ainv, Atemp)) {
+      unit_m4(Ainv);
+    }
+  }
+}
+
+/**
+ * #SpaceTransform struct encapsulates all needed data to convert between two coordinate spaces
+ * (where conversion can be represented by a matrix multiplication).
+ *
+ * A SpaceTransform is initialized using:
+ * - #BLI_SPACE_TRANSFORM_SETUP(&data,  ob1, ob2)
+ *
+ * After that the following calls can be used:
+ * - Converts a coordinate in ob1 space to the corresponding ob2 space:
+ *   #BLI_space_transform_apply(&data, co);
+ * - Converts a coordinate in ob2 space to the corresponding ob1 space:
+ *   #BLI_space_transform_invert(&data, co);
+ *
+ * Same concept as #BLI_space_transform_apply and #BLI_space_transform_invert,
+ * but no is normalized after conversion (and not translated at all!):
+ * - #BLI_space_transform_apply_normal(&data, no);
+ * - #BLI_space_transform_invert_normal(&data, no);
+ */
+
+/**
+ * Global-invariant transform.
+ *
+ * This defines a matrix transforming a point in local space to a point in target space
+ * such that its global coordinates remain unchanged.
+ *
+ * In other words, if we have a global point P with local coordinates (x, y, z)
+ * and global coordinates (X, Y, Z),
+ * this defines a transform matrix TM such that (x', y', z') = TM * (x, y, z)
+ * where (x', y', z') are the coordinates of P' in target space
+ * such that it keeps (X, Y, Z) coordinates in global space.
+ */
+void BLI_space_transform_from_matrices(SpaceTransform *data,
+                                       const float local[4][4],
+                                       const float target[4][4])
+{
+  float itarget[4][4];
+  invert_m4_m4(itarget, target);
+  mul_m4_m4m4(data->local2target, itarget, local);
+  invert_m4_m4(data->target2local, data->local2target);
+}
+
+/**
+ * Local-invariant transform.
+ *
+ * This defines a matrix transforming a point in global space
+ * such that its local coordinates (from local space to target space) remain unchanged.
+ *
+ * In other words, if we have a local point p with local coordinates (x, y, z)
+ * and global coordinates (X, Y, Z),
+ * this defines a transform matrix TM such that (X', Y', Z') = TM * (X, Y, Z)
+ * where (X', Y', Z') are the coordinates of p' in global space
+ * such that it keeps (x, y, z) coordinates in target space.
+ */
+void BLI_space_transform_global_from_matrices(SpaceTransform *data,
+                                              const float local[4][4],
+                                              const float target[4][4])
+{
+  float ilocal[4][4];
+  invert_m4_m4(ilocal, local);
+  mul_m4_m4m4(data->local2target, target, ilocal);
+  invert_m4_m4(data->target2local, data->local2target);
+}
+
+void BLI_space_transform_apply(const SpaceTransform *data, float co[3])
+{
+  mul_v3_m4v3(co, ((SpaceTransform *)data)->local2target, co);
+}
+
+void BLI_space_transform_invert(const SpaceTransform *data, float co[3])
+{
+  mul_v3_m4v3(co, ((SpaceTransform *)data)->target2local, co);
+}
+
+void BLI_space_transform_apply_normal(const SpaceTransform *data, float no[3])
+{
+  mul_mat3_m4_v3(((SpaceTransform *)data)->local2target, no);
+  normalize_v3(no);
+}
+
+void BLI_space_transform_invert_normal(const SpaceTransform *data, float no[3])
+{
+  mul_mat3_m4_v3(((SpaceTransform *)data)->target2local, no);
+  normalize_v3(no);
+}
diff --git a/client/blenlib/math_rotation.cpp b/client/blenlib/math_rotation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..09d923e643562ad44e5e0299e1b6310007d6abbf
--- /dev/null
+++ b/client/blenlib/math_rotation.cpp
@@ -0,0 +1,2356 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: some of this file.
+ */
+
+/** \file
+ * \ingroup bli
+ */
+
+#include <assert.h>
+#include "BLI_math.h"
+
+#include "BLI_strict_flags.h"
+
+/******************************** Quaternions ********************************/
+
+/* used to test is a quat is not normalized (only used for debug prints) */
+#ifdef DEBUG
+#  define QUAT_EPSILON 0.0001
+#endif
+
+/* convenience, avoids setting Y axis everywhere */
+void unit_axis_angle(float axis[3], float *angle)
+{
+  axis[0] = 0.0f;
+  axis[1] = 1.0f;
+  axis[2] = 0.0f;
+  *angle = 0.0f;
+}
+
+void unit_qt(float q[4])
+{
+  q[0] = 1.0f;
+  q[1] = q[2] = q[3] = 0.0f;
+}
+
+void copy_qt_qt(float q1[4], const float q2[4])
+{
+  q1[0] = q2[0];
+  q1[1] = q2[1];
+  q1[2] = q2[2];
+  q1[3] = q2[3];
+}
+
+bool is_zero_qt(const float q[4])
+{
+  return (q[0] == 0 && q[1] == 0 && q[2] == 0 && q[3] == 0);
+}
+
+void mul_qt_qtqt(float q[4], const float q1[4], const float q2[4])
+{
+  float t0, t1, t2;
+
+  t0 = q1[0] * q2[0] - q1[1] * q2[1] - q1[2] * q2[2] - q1[3] * q2[3];
+  t1 = q1[0] * q2[1] + q1[1] * q2[0] + q1[2] * q2[3] - q1[3] * q2[2];
+  t2 = q1[0] * q2[2] + q1[2] * q2[0] + q1[3] * q2[1] - q1[1] * q2[3];
+  q[3] = q1[0] * q2[3] + q1[3] * q2[0] + q1[1] * q2[2] - q1[2] * q2[1];
+  q[0] = t0;
+  q[1] = t1;
+  q[2] = t2;
+}
+
+/**
+ * \note:
+ * Assumes a unit quaternion?
+ *
+ * in fact not, but you may want to use a unit quat, read on...
+ *
+ * Shortcut for 'q v q*' when \a v is actually a quaternion.
+ * This removes the need for converting a vector to a quaternion,
+ * calculating q's conjugate and converting back to a vector.
+ * It also happens to be faster (17+,24* vs * 24+,32*).
+ * If \a q is not a unit quaternion, then \a v will be both rotated by
+ * the same amount as if q was a unit quaternion, and scaled by the square of
+ * the length of q.
+ *
+ * For people used to python mathutils, its like:
+ * def mul_qt_v3(q, v): (q * Quaternion((0.0, v[0], v[1], v[2])) * q.conjugated())[1:]
+ *
+ * \note: multiplying by 3x3 matrix is ~25% faster.
+ */
+void mul_qt_v3(const float q[4], float v[3])
+{
+  float t0, t1, t2;
+
+  t0 = -q[1] * v[0] - q[2] * v[1] - q[3] * v[2];
+  t1 = q[0] * v[0] + q[2] * v[2] - q[3] * v[1];
+  t2 = q[0] * v[1] + q[3] * v[0] - q[1] * v[2];
+  v[2] = q[0] * v[2] + q[1] * v[1] - q[2] * v[0];
+  v[0] = t1;
+  v[1] = t2;
+
+  t1 = t0 * -q[1] + v[0] * q[0] - v[1] * q[3] + v[2] * q[2];
+  t2 = t0 * -q[2] + v[1] * q[0] - v[2] * q[1] + v[0] * q[3];
+  v[2] = t0 * -q[3] + v[2] * q[0] - v[0] * q[2] + v[1] * q[1];
+  v[0] = t1;
+  v[1] = t2;
+}
+
+void conjugate_qt_qt(float q1[4], const float q2[4])
+{
+  q1[0] = q2[0];
+  q1[1] = -q2[1];
+  q1[2] = -q2[2];
+  q1[3] = -q2[3];
+}
+
+void conjugate_qt(float q[4])
+{
+  q[1] = -q[1];
+  q[2] = -q[2];
+  q[3] = -q[3];
+}
+
+float dot_qtqt(const float q1[4], const float q2[4])
+{
+  return q1[0] * q2[0] + q1[1] * q2[1] + q1[2] * q2[2] + q1[3] * q2[3];
+}
+
+void invert_qt(float q[4])
+{
+  const float f = dot_qtqt(q, q);
+
+  if (f == 0.0f) {
+    return;
+  }
+
+  conjugate_qt(q);
+  mul_qt_fl(q, 1.0f / f);
+}
+
+void invert_qt_qt(float q1[4], const float q2[4])
+{
+  copy_qt_qt(q1, q2);
+  invert_qt(q1);
+}
+
+/**
+ * This is just conjugate_qt for cases we know \a q is unit-length.
+ * we could use #conjugate_qt directly, but use this function to show intent,
+ * and assert if its ever becomes non-unit-length.
+ */
+void invert_qt_normalized(float q[4])
+{
+  BLI_ASSERT_UNIT_QUAT(q);
+  conjugate_qt(q);
+}
+
+void invert_qt_qt_normalized(float q1[4], const float q2[4])
+{
+  copy_qt_qt(q1, q2);
+  invert_qt_normalized(q1);
+}
+
+/* simple mult */
+void mul_qt_fl(float q[4], const float f)
+{
+  q[0] *= f;
+  q[1] *= f;
+  q[2] *= f;
+  q[3] *= f;
+}
+
+void sub_qt_qtqt(float q[4], const float q1[4], const float q2[4])
+{
+  float nq2[4];
+
+  nq2[0] = -q2[0];
+  nq2[1] = q2[1];
+  nq2[2] = q2[2];
+  nq2[3] = q2[3];
+
+  mul_qt_qtqt(q, q1, nq2);
+}
+
+/* raise a unit quaternion to the specified power */
+void pow_qt_fl_normalized(float q[4], const float fac)
+{
+  BLI_ASSERT_UNIT_QUAT(q);
+  const float angle = fac * saacos(q[0]); /* quat[0] = cos(0.5 * angle),
+                                           * but now the 0.5 and 2.0 rule out */
+  const float co = cosf(angle);
+  const float si = sinf(angle);
+  q[0] = co;
+  normalize_v3_length(q + 1, si);
+}
+
+/**
+ * Apply the rotation of \a a to \a q keeping the values compatible with \a old.
+ * Avoid axis flipping for animated f-curves for eg.
+ */
+void quat_to_compatible_quat(float q[4], const float a[4], const float old[4])
+{
+  const float eps = 1e-4f;
+  BLI_ASSERT_UNIT_QUAT(a);
+  float old_unit[4];
+  /* Skips `!finite_v4(old)` case too. */
+  if (normalize_qt_qt(old_unit, old) > eps) {
+    float delta[4];
+    rotation_between_quats_to_quat(delta, old_unit, a);
+    mul_qt_qtqt(q, old, delta);
+    if ((q[0] < 0.0f) != (old[0] < 0.0f)) {
+      negate_v4(q);
+    }
+  }
+  else {
+    copy_qt_qt(q, a);
+  }
+}
+
+/* skip error check, currently only needed by mat3_to_quat_is_ok */
+static void quat_to_mat3_no_error(float m[3][3], const float q[4])
+{
+  double q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
+
+  q0 = M_SQRT2 * (double)q[0];
+  q1 = M_SQRT2 * (double)q[1];
+  q2 = M_SQRT2 * (double)q[2];
+  q3 = M_SQRT2 * (double)q[3];
+
+  qda = q0 * q1;
+  qdb = q0 * q2;
+  qdc = q0 * q3;
+  qaa = q1 * q1;
+  qab = q1 * q2;
+  qac = q1 * q3;
+  qbb = q2 * q2;
+  qbc = q2 * q3;
+  qcc = q3 * q3;
+
+  m[0][0] = (float)(1.0 - qbb - qcc);
+  m[0][1] = (float)(qdc + qab);
+  m[0][2] = (float)(-qdb + qac);
+
+  m[1][0] = (float)(-qdc + qab);
+  m[1][1] = (float)(1.0 - qaa - qcc);
+  m[1][2] = (float)(qda + qbc);
+
+  m[2][0] = (float)(qdb + qac);
+  m[2][1] = (float)(-qda + qbc);
+  m[2][2] = (float)(1.0 - qaa - qbb);
+}
+
+void quat_to_mat3(float m[3][3], const float q[4])
+{
+#ifdef DEBUG
+  float f;
+  if (!((f = dot_qtqt(q, q)) == 0.0f || (fabsf(f - 1.0f) < (float)QUAT_EPSILON))) {
+    fprintf(stderr,
+            "Warning! quat_to_mat3() called with non-normalized: size %.8f *** report a bug ***\n",
+            f);
+  }
+#endif
+
+  quat_to_mat3_no_error(m, q);
+}
+
+void quat_to_mat4(float m[4][4], const float q[4])
+{
+  double q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc;
+
+#ifdef DEBUG
+  if (!((q0 = dot_qtqt(q, q)) == 0.0 || (fabs(q0 - 1.0) < QUAT_EPSILON))) {
+    fprintf(stderr,
+            "Warning! quat_to_mat4() called with non-normalized: size %.8f *** report a bug ***\n",
+            (float)q0);
+  }
+#endif
+
+  q0 = M_SQRT2 * (double)q[0];
+  q1 = M_SQRT2 * (double)q[1];
+  q2 = M_SQRT2 * (double)q[2];
+  q3 = M_SQRT2 * (double)q[3];
+
+  qda = q0 * q1;
+  qdb = q0 * q2;
+  qdc = q0 * q3;
+  qaa = q1 * q1;
+  qab = q1 * q2;
+  qac = q1 * q3;
+  qbb = q2 * q2;
+  qbc = q2 * q3;
+  qcc = q3 * q3;
+
+  m[0][0] = (float)(1.0 - qbb - qcc);
+  m[0][1] = (float)(qdc + qab);
+  m[0][2] = (float)(-qdb + qac);
+  m[0][3] = 0.0f;
+
+  m[1][0] = (float)(-qdc + qab);
+  m[1][1] = (float)(1.0 - qaa - qcc);
+  m[1][2] = (float)(qda + qbc);
+  m[1][3] = 0.0f;
+
+  m[2][0] = (float)(qdb + qac);
+  m[2][1] = (float)(-qda + qbc);
+  m[2][2] = (float)(1.0 - qaa - qbb);
+  m[2][3] = 0.0f;
+
+  m[3][0] = m[3][1] = m[3][2] = 0.0f;
+  m[3][3] = 1.0f;
+}
+
+void mat3_normalized_to_quat(float q[4], const float mat[3][3])
+{
+  double tr, s;
+
+  BLI_ASSERT_UNIT_M3(mat);
+
+  tr = 0.25 * (double)(1.0f + mat[0][0] + mat[1][1] + mat[2][2]);
+
+  if (tr > (double)1e-4f) {
+    s = sqrt(tr);
+    q[0] = (float)s;
+    s = 1.0 / (4.0 * s);
+    q[1] = (float)((double)(mat[1][2] - mat[2][1]) * s);
+    q[2] = (float)((double)(mat[2][0] - mat[0][2]) * s);
+    q[3] = (float)((double)(mat[0][1] - mat[1][0]) * s);
+  }
+  else {
+    if (mat[0][0] > mat[1][1] && mat[0][0] > mat[2][2]) {
+      s = 2.0f * sqrtf(1.0f + mat[0][0] - mat[1][1] - mat[2][2]);
+      q[1] = (float)(0.25 * s);
+
+      s = 1.0 / s;
+      q[0] = (float)((double)(mat[1][2] - mat[2][1]) * s);
+      q[2] = (float)((double)(mat[1][0] + mat[0][1]) * s);
+      q[3] = (float)((double)(mat[2][0] + mat[0][2]) * s);
+    }
+    else if (mat[1][1] > mat[2][2]) {
+      s = 2.0f * sqrtf(1.0f + mat[1][1] - mat[0][0] - mat[2][2]);
+      q[2] = (float)(0.25 * s);
+
+      s = 1.0 / s;
+      q[0] = (float)((double)(mat[2][0] - mat[0][2]) * s);
+      q[1] = (float)((double)(mat[1][0] + mat[0][1]) * s);
+      q[3] = (float)((double)(mat[2][1] + mat[1][2]) * s);
+    }
+    else {
+      s = 2.0f * sqrtf(1.0f + mat[2][2] - mat[0][0] - mat[1][1]);
+      q[3] = (float)(0.25 * s);
+
+      s = 1.0 / s;
+      q[0] = (float)((double)(mat[0][1] - mat[1][0]) * s);
+      q[1] = (float)((double)(mat[2][0] + mat[0][2]) * s);
+      q[2] = (float)((double)(mat[2][1] + mat[1][2]) * s);
+    }
+  }
+
+  normalize_qt(q);
+}
+void mat3_to_quat(float q[4], const float m[3][3])
+{
+  float unit_mat[3][3];
+
+  /* work on a copy */
+  /* this is needed AND a 'normalize_qt' in the end */
+  normalize_m3_m3(unit_mat, m);
+  mat3_normalized_to_quat(q, unit_mat);
+}
+
+void mat4_normalized_to_quat(float q[4], const float m[4][4])
+{
+  float mat3[3][3];
+
+  copy_m3_m4(mat3, m);
+  mat3_normalized_to_quat(q, mat3);
+}
+
+void mat4_to_quat(float q[4], const float m[4][4])
+{
+  float mat3[3][3];
+
+  copy_m3_m4(mat3, m);
+  mat3_to_quat(q, mat3);
+}
+
+void mat3_to_quat_is_ok(float q[4], const float wmat[3][3])
+{
+  float mat[3][3], matr[3][3], matn[3][3], q1[4], q2[4], angle, si, co, nor[3];
+
+  /* work on a copy */
+  copy_m3_m3(mat, wmat);
+  normalize_m3(mat);
+
+  /* rotate z-axis of matrix to z-axis */
+
+  nor[0] = mat[2][1]; /* cross product with (0,0,1) */
+  nor[1] = -mat[2][0];
+  nor[2] = 0.0;
+  normalize_v3(nor);
+
+  co = mat[2][2];
+  angle = 0.5f * saacos(co);
+
+  co = cosf(angle);
+  si = sinf(angle);
+  q1[0] = co;
+  q1[1] = -nor[0] * si; /* negative here, but why? */
+  q1[2] = -nor[1] * si;
+  q1[3] = -nor[2] * si;
+
+  /* rotate back x-axis from mat, using inverse q1 */
+  quat_to_mat3_no_error(matr, q1);
+  invert_m3_m3(matn, matr);
+  mul_m3_v3(matn, mat[0]);
+
+  /* and align x-axes */
+  angle = 0.5f * atan2f(mat[0][1], mat[0][0]);
+
+  co = cosf(angle);
+  si = sinf(angle);
+  q2[0] = co;
+  q2[1] = 0.0f;
+  q2[2] = 0.0f;
+  q2[3] = si;
+
+  mul_qt_qtqt(q, q1, q2);
+}
+
+float normalize_qt(float q[4])
+{
+  const float len = sqrtf(dot_qtqt(q, q));
+
+  if (len != 0.0f) {
+    mul_qt_fl(q, 1.0f / len);
+  }
+  else {
+    q[1] = 1.0f;
+    q[0] = q[2] = q[3] = 0.0f;
+  }
+
+  return len;
+}
+
+float normalize_qt_qt(float r[4], const float q[4])
+{
+  copy_qt_qt(r, q);
+  return normalize_qt(r);
+}
+
+/**
+ * Calculate a rotation matrix from 2 normalized vectors.
+ */
+void rotation_between_vecs_to_mat3(float m[3][3], const float v1[3], const float v2[3])
+{
+  float axis[3];
+  /* avoid calculating the angle */
+  float angle_sin;
+  float angle_cos;
+
+  BLI_ASSERT_UNIT_V3(v1);
+  BLI_ASSERT_UNIT_V3(v2);
+
+  cross_v3_v3v3(axis, v1, v2);
+
+  angle_sin = normalize_v3(axis);
+  angle_cos = dot_v3v3(v1, v2);
+
+  if (angle_sin > FLT_EPSILON) {
+  axis_calc:
+    BLI_ASSERT_UNIT_V3(axis);
+    axis_angle_normalized_to_mat3_ex(m, axis, angle_sin, angle_cos);
+    BLI_ASSERT_UNIT_M3(m);
+  }
+  else {
+    if (angle_cos > 0.0f) {
+      /* Same vectors, zero rotation... */
+      unit_m3(m);
+    }
+    else {
+      /* Colinear but opposed vectors, 180 rotation... */
+      ortho_v3_v3(axis, v1);
+      normalize_v3(axis);
+      angle_sin = 0.0f;  /* sin(M_PI) */
+      angle_cos = -1.0f; /* cos(M_PI) */
+      goto axis_calc;
+    }
+  }
+}
+
+/* note: expects vectors to be normalized */
+void rotation_between_vecs_to_quat(float q[4], const float v1[3], const float v2[3])
+{
+  float axis[3];
+
+  cross_v3_v3v3(axis, v1, v2);
+
+  if (normalize_v3(axis) > FLT_EPSILON) {
+    float angle;
+
+    angle = angle_normalized_v3v3(v1, v2);
+
+    axis_angle_normalized_to_quat(q, axis, angle);
+  }
+  else {
+    /* degenerate case */
+
+    if (dot_v3v3(v1, v2) > 0.0f) {
+      /* Same vectors, zero rotation... */
+      unit_qt(q);
+    }
+    else {
+      /* Colinear but opposed vectors, 180 rotation... */
+      ortho_v3_v3(axis, v1);
+      axis_angle_to_quat(q, axis, (float)M_PI);
+    }
+  }
+}
+
+void rotation_between_quats_to_quat(float q[4], const float q1[4], const float q2[4])
+{
+  float tquat[4];
+
+  conjugate_qt_qt(tquat, q1);
+
+  mul_qt_fl(tquat, 1.0f / dot_qtqt(tquat, tquat));
+
+  mul_qt_qtqt(q, tquat, q2);
+}
+
+/* -------------------------------------------------------------------- */
+/** \name Quaternion Angle
+ *
+ * Unlike the angle between vectors, this does NOT return the shortest angle.
+ * See signed functions below for this.
+ *
+ * \{ */
+
+float angle_normalized_qt(const float q[4])
+{
+  BLI_ASSERT_UNIT_QUAT(q);
+  return 2.0f * saacos(q[0]);
+}
+
+float angle_qt(const float q[4])
+{
+  float tquat[4];
+
+  normalize_qt_qt(tquat, q);
+
+  return angle_normalized_qt(tquat);
+}
+
+float angle_normalized_qtqt(const float q1[4], const float q2[4])
+{
+  float qdelta[4];
+
+  BLI_ASSERT_UNIT_QUAT(q1);
+  BLI_ASSERT_UNIT_QUAT(q2);
+
+  rotation_between_quats_to_quat(qdelta, q1, q2);
+
+  return angle_normalized_qt(qdelta);
+}
+
+float angle_qtqt(const float q1[4], const float q2[4])
+{
+  float quat1[4], quat2[4];
+
+  normalize_qt_qt(quat1, q1);
+  normalize_qt_qt(quat2, q2);
+
+  return angle_normalized_qtqt(quat1, quat2);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Quaternion Angle (Signed)
+ *
+ * Angles with quaternion calculation can exceed 180d,
+ * Having signed versions of these functions allows 'fabsf(angle_signed_qtqt(...))'
+ * to give us the shortest angle between quaternions.
+ * With higher precision than subtracting pi afterwards.
+ *
+ * \{ */
+
+float angle_signed_normalized_qt(const float q[4])
+{
+  BLI_ASSERT_UNIT_QUAT(q);
+  if (q[0] >= 0.0f) {
+    return 2.0f * saacos(q[0]);
+  }
+  else {
+    return -2.0f * saacos(-q[0]);
+  }
+}
+
+float angle_signed_normalized_qtqt(const float q1[4], const float q2[4])
+{
+  if (dot_qtqt(q1, q2) >= 0.0f) {
+    return angle_normalized_qtqt(q1, q2);
+  }
+  else {
+    float q2_copy[4];
+    negate_v4_v4(q2_copy, q2);
+    return -angle_normalized_qtqt(q1, q2_copy);
+  }
+}
+
+float angle_signed_qt(const float q[4])
+{
+  float tquat[4];
+
+  normalize_qt_qt(tquat, q);
+
+  return angle_signed_normalized_qt(tquat);
+}
+
+float angle_signed_qtqt(const float q1[4], const float q2[4])
+{
+  if (dot_qtqt(q1, q2) >= 0.0f) {
+    return angle_qtqt(q1, q2);
+  }
+  else {
+    float q2_copy[4];
+    negate_v4_v4(q2_copy, q2);
+    return -angle_qtqt(q1, q2_copy);
+  }
+}
+
+/** \} */
+
+void vec_to_quat(float q[4], const float vec[3], short axis, const short upflag)
+{
+  const float eps = 1e-4f;
+  float nor[3], tvec[3];
+  float angle, si, co, len;
+
+  assert(axis >= 0 && axis <= 5);
+  assert(upflag >= 0 && upflag <= 2);
+
+  /* first set the quat to unit */
+  unit_qt(q);
+
+  len = len_v3(vec);
+
+  if (UNLIKELY(len == 0.0f)) {
+    return;
+  }
+
+  /* rotate to axis */
+  if (axis > 2) {
+    copy_v3_v3(tvec, vec);
+    axis = (short)(axis - 3);
+  }
+  else {
+    negate_v3_v3(tvec, vec);
+  }
+
+  /* nasty! I need a good routine for this...
+   * problem is a rotation of an Y axis to the negative Y-axis for example.
+   */
+
+  if (axis == 0) { /* x-axis */
+    nor[0] = 0.0;
+    nor[1] = -tvec[2];
+    nor[2] = tvec[1];
+
+    if (fabsf(tvec[1]) + fabsf(tvec[2]) < eps) {
+      nor[1] = 1.0f;
+    }
+
+    co = tvec[0];
+  }
+  else if (axis == 1) { /* y-axis */
+    nor[0] = tvec[2];
+    nor[1] = 0.0;
+    nor[2] = -tvec[0];
+
+    if (fabsf(tvec[0]) + fabsf(tvec[2]) < eps) {
+      nor[2] = 1.0f;
+    }
+
+    co = tvec[1];
+  }
+  else { /* z-axis */
+    nor[0] = -tvec[1];
+    nor[1] = tvec[0];
+    nor[2] = 0.0;
+
+    if (fabsf(tvec[0]) + fabsf(tvec[1]) < eps) {
+      nor[0] = 1.0f;
+    }
+
+    co = tvec[2];
+  }
+  co /= len;
+
+  normalize_v3(nor);
+
+  axis_angle_normalized_to_quat(q, nor, saacos(co));
+
+  if (axis != upflag) {
+    float mat[3][3];
+    float q2[4];
+    const float *fp = mat[2];
+    quat_to_mat3(mat, q);
+
+    if (axis == 0) {
+      if (upflag == 1) {
+        angle = 0.5f * atan2f(fp[2], fp[1]);
+      }
+      else {
+        angle = -0.5f * atan2f(fp[1], fp[2]);
+      }
+    }
+    else if (axis == 1) {
+      if (upflag == 0) {
+        angle = -0.5f * atan2f(fp[2], fp[0]);
+      }
+      else {
+        angle = 0.5f * atan2f(fp[0], fp[2]);
+      }
+    }
+    else {
+      if (upflag == 0) {
+        angle = 0.5f * atan2f(-fp[1], -fp[0]);
+      }
+      else {
+        angle = -0.5f * atan2f(-fp[0], -fp[1]);
+      }
+    }
+
+    co = cosf(angle);
+    si = sinf(angle) / len;
+    q2[0] = co;
+    q2[1] = tvec[0] * si;
+    q2[2] = tvec[1] * si;
+    q2[3] = tvec[2] * si;
+
+    mul_qt_qtqt(q, q2, q);
+  }
+}
+
+#if 0
+
+/* A & M Watt, Advanced animation and rendering techniques, 1992 ACM press */
+void QuatInterpolW(float *result, float quat1[4], float quat2[4], float t)
+{
+  float omega, cosom, sinom, sc1, sc2;
+
+  cosom = quat1[0] * quat2[0] + quat1[1] * quat2[1] + quat1[2] * quat2[2] + quat1[3] * quat2[3];
+
+  /* rotate around shortest angle */
+  if ((1.0f + cosom) > 0.0001f) {
+
+    if ((1.0f - cosom) > 0.0001f) {
+      omega = (float)acos(cosom);
+      sinom = sinf(omega);
+      sc1 = sinf((1.0 - t) * omega) / sinom;
+      sc2 = sinf(t * omega) / sinom;
+    }
+    else {
+      sc1 = 1.0f - t;
+      sc2 = t;
+    }
+    result[0] = sc1 * quat1[0] + sc2 * quat2[0];
+    result[1] = sc1 * quat1[1] + sc2 * quat2[1];
+    result[2] = sc1 * quat1[2] + sc2 * quat2[2];
+    result[3] = sc1 * quat1[3] + sc2 * quat2[3];
+  }
+  else {
+    result[0] = quat2[3];
+    result[1] = -quat2[2];
+    result[2] = quat2[1];
+    result[3] = -quat2[0];
+
+    sc1 = sinf((1.0 - t) * M_PI_2);
+    sc2 = sinf(t * M_PI_2);
+
+    result[0] = sc1 * quat1[0] + sc2 * result[0];
+    result[1] = sc1 * quat1[1] + sc2 * result[1];
+    result[2] = sc1 * quat1[2] + sc2 * result[2];
+    result[3] = sc1 * quat1[3] + sc2 * result[3];
+  }
+}
+#endif
+
+/**
+ * Generic function for implementing slerp
+ * (quaternions and spherical vector coords).
+ *
+ * \param t: factor in [0..1]
+ * \param cosom: dot product from normalized vectors/quats.
+ * \param r_w: calculated weights.
+ */
+void interp_dot_slerp(const float t, const float cosom, float r_w[2])
+{
+  const float eps = 1e-4f;
+
+  BLI_assert(IN_RANGE_INCL(cosom, -1.0001f, 1.0001f));
+
+  /* within [-1..1] range, avoid aligned axis */
+  if (LIKELY(fabsf(cosom) < (1.0f - eps))) {
+    float omega, sinom;
+
+    omega = acosf(cosom);
+    sinom = sinf(omega);
+    r_w[0] = sinf((1.0f - t) * omega) / sinom;
+    r_w[1] = sinf(t * omega) / sinom;
+  }
+  else {
+    /* fallback to lerp */
+    r_w[0] = 1.0f - t;
+    r_w[1] = t;
+  }
+}
+
+void interp_qt_qtqt(float result[4], const float quat1[4], const float quat2[4], const float t)
+{
+  float quat[4], cosom, w[2];
+
+  BLI_ASSERT_UNIT_QUAT(quat1);
+  BLI_ASSERT_UNIT_QUAT(quat2);
+
+  cosom = dot_qtqt(quat1, quat2);
+
+  /* rotate around shortest angle */
+  if (cosom < 0.0f) {
+    cosom = -cosom;
+    negate_v4_v4(quat, quat1);
+  }
+  else {
+    copy_qt_qt(quat, quat1);
+  }
+
+  interp_dot_slerp(t, cosom, w);
+
+  result[0] = w[0] * quat[0] + w[1] * quat2[0];
+  result[1] = w[0] * quat[1] + w[1] * quat2[1];
+  result[2] = w[0] * quat[2] + w[1] * quat2[2];
+  result[3] = w[0] * quat[3] + w[1] * quat2[3];
+}
+
+void add_qt_qtqt(float result[4], const float quat1[4], const float quat2[4], const float t)
+{
+  result[0] = quat1[0] + t * quat2[0];
+  result[1] = quat1[1] + t * quat2[1];
+  result[2] = quat1[2] + t * quat2[2];
+  result[3] = quat1[3] + t * quat2[3];
+}
+
+/* same as tri_to_quat() but takes pre-computed normal from the triangle
+ * used for ngons when we know their normal */
+void tri_to_quat_ex(
+    float quat[4], const float v1[3], const float v2[3], const float v3[3], const float no_orig[3])
+{
+  /* imaginary x-axis, y-axis triangle is being rotated */
+  float vec[3], q1[4], q2[4], n[3], si, co, angle, mat[3][3], imat[3][3];
+
+  /* move z-axis to face-normal */
+#if 0
+  normal_tri_v3(vec, v1, v2, v3);
+#else
+  copy_v3_v3(vec, no_orig);
+  (void)v3;
+#endif
+
+  n[0] = vec[1];
+  n[1] = -vec[0];
+  n[2] = 0.0f;
+  normalize_v3(n);
+
+  if (n[0] == 0.0f && n[1] == 0.0f) {
+    n[0] = 1.0f;
+  }
+
+  angle = -0.5f * saacos(vec[2]);
+  co = cosf(angle);
+  si = sinf(angle);
+  q1[0] = co;
+  q1[1] = n[0] * si;
+  q1[2] = n[1] * si;
+  q1[3] = 0.0f;
+
+  /* rotate back line v1-v2 */
+  quat_to_mat3(mat, q1);
+  invert_m3_m3(imat, mat);
+  sub_v3_v3v3(vec, v2, v1);
+  mul_m3_v3(imat, vec);
+
+  /* what angle has this line with x-axis? */
+  vec[2] = 0.0f;
+  normalize_v3(vec);
+
+  angle = 0.5f * atan2f(vec[1], vec[0]);
+  co = cosf(angle);
+  si = sinf(angle);
+  q2[0] = co;
+  q2[1] = 0.0f;
+  q2[2] = 0.0f;
+  q2[3] = si;
+
+  mul_qt_qtqt(quat, q1, q2);
+}
+
+/**
+ * \return the length of the normal, use to test for degenerate triangles.
+ */
+//float tri_to_quat(float quat[4], const float v1[3], const float v2[3], const float v3[3])
+//{
+//  float vec[3];
+//  const float len = normal_tri_v3(vec, v1, v2, v3);
+//
+//  tri_to_quat_ex(quat, v1, v2, v3, vec);
+//  return len;
+//}
+
+void print_qt(const char *str, const float q[4])
+{
+  printf("%s: %.3f %.3f %.3f %.3f\n", str, q[0], q[1], q[2], q[3]);
+}
+
+/******************************** Axis Angle *********************************/
+
+void axis_angle_normalized_to_quat(float q[4], const float axis[3], const float angle)
+{
+  const float phi = 0.5f * angle;
+  const float si = sinf(phi);
+  const float co = cosf(phi);
+  BLI_ASSERT_UNIT_V3(axis);
+  q[0] = co;
+  mul_v3_v3fl(q + 1, axis, si);
+}
+
+void axis_angle_to_quat(float q[4], const float axis[3], const float angle)
+{
+  float nor[3];
+
+  if (LIKELY(normalize_v3_v3(nor, axis) != 0.0f)) {
+    axis_angle_normalized_to_quat(q, nor, angle);
+  }
+  else {
+    unit_qt(q);
+  }
+}
+
+/* Quaternions to Axis Angle */
+void quat_to_axis_angle(float axis[3], float *angle, const float q[4])
+{
+  float ha, si;
+
+#ifdef DEBUG
+  if (!((ha = dot_qtqt(q, q)) == 0.0f || (fabsf(ha - 1.0f) < (float)QUAT_EPSILON))) {
+    fprintf(stderr,
+            "Warning! quat_to_axis_angle() called with non-normalized: size %.8f *** report a bug "
+            "***\n",
+            ha);
+  }
+#endif
+
+  /* calculate angle/2, and sin(angle/2) */
+  ha = acosf(q[0]);
+  si = sinf(ha);
+
+  /* from half-angle to angle */
+  *angle = ha * 2;
+
+  /* prevent division by zero for axis conversion */
+  if (fabsf(si) < 0.0005f) {
+    si = 1.0f;
+  }
+
+  axis[0] = q[1] / si;
+  axis[1] = q[2] / si;
+  axis[2] = q[3] / si;
+  if (is_zero_v3(axis)) {
+    axis[1] = 1.0f;
+  }
+}
+
+/* Axis Angle to Euler Rotation */
+void axis_angle_to_eulO(float eul[3], const short order, const float axis[3], const float angle)
+{
+  float q[4];
+
+  /* use quaternions as intermediate representation for now... */
+  axis_angle_to_quat(q, axis, angle);
+  quat_to_eulO(eul, order, q);
+}
+
+/* Euler Rotation to Axis Angle */
+void eulO_to_axis_angle(float axis[3], float *angle, const float eul[3], const short order)
+{
+  float q[4];
+
+  /* use quaternions as intermediate representation for now... */
+  eulO_to_quat(q, eul, order);
+  quat_to_axis_angle(axis, angle, q);
+}
+
+/**
+ * axis angle to 3x3 matrix
+ *
+ * This takes the angle with sin/cos applied so we can avoid calculating it in some cases.
+ *
+ * \param axis: rotation axis (must be normalized).
+ * \param angle_sin: sin(angle)
+ * \param angle_cos: cos(angle)
+ */
+void axis_angle_normalized_to_mat3_ex(float mat[3][3],
+                                      const float axis[3],
+                                      const float angle_sin,
+                                      const float angle_cos)
+{
+  float nsi[3], ico;
+  float n_00, n_01, n_11, n_02, n_12, n_22;
+
+  BLI_ASSERT_UNIT_V3(axis);
+
+  /* now convert this to a 3x3 matrix */
+
+  ico = (1.0f - angle_cos);
+  nsi[0] = axis[0] * angle_sin;
+  nsi[1] = axis[1] * angle_sin;
+  nsi[2] = axis[2] * angle_sin;
+
+  n_00 = (axis[0] * axis[0]) * ico;
+  n_01 = (axis[0] * axis[1]) * ico;
+  n_11 = (axis[1] * axis[1]) * ico;
+  n_02 = (axis[0] * axis[2]) * ico;
+  n_12 = (axis[1] * axis[2]) * ico;
+  n_22 = (axis[2] * axis[2]) * ico;
+
+  mat[0][0] = n_00 + angle_cos;
+  mat[0][1] = n_01 + nsi[2];
+  mat[0][2] = n_02 - nsi[1];
+  mat[1][0] = n_01 - nsi[2];
+  mat[1][1] = n_11 + angle_cos;
+  mat[1][2] = n_12 + nsi[0];
+  mat[2][0] = n_02 + nsi[1];
+  mat[2][1] = n_12 - nsi[0];
+  mat[2][2] = n_22 + angle_cos;
+}
+
+void axis_angle_normalized_to_mat3(float mat[3][3], const float axis[3], const float angle)
+{
+  axis_angle_normalized_to_mat3_ex(mat, axis, sinf(angle), cosf(angle));
+}
+
+/* axis angle to 3x3 matrix - safer version (normalization of axis performed) */
+void axis_angle_to_mat3(float mat[3][3], const float axis[3], const float angle)
+{
+  float nor[3];
+
+  /* normalize the axis first (to remove unwanted scaling) */
+  if (normalize_v3_v3(nor, axis) == 0.0f) {
+    unit_m3(mat);
+    return;
+  }
+
+  axis_angle_normalized_to_mat3(mat, nor, angle);
+}
+
+/* axis angle to 4x4 matrix - safer version (normalization of axis performed) */
+void axis_angle_to_mat4(float mat[4][4], const float axis[3], const float angle)
+{
+  float tmat[3][3];
+
+  axis_angle_to_mat3(tmat, axis, angle);
+  unit_m4(mat);
+  copy_m4_m3(mat, tmat);
+}
+
+/* 3x3 matrix to axis angle */
+void mat3_normalized_to_axis_angle(float axis[3], float *angle, const float mat[3][3])
+{
+  float q[4];
+
+  /* use quaternions as intermediate representation */
+  /* TODO: it would be nicer to go straight there... */
+  mat3_normalized_to_quat(q, mat);
+  quat_to_axis_angle(axis, angle, q);
+}
+void mat3_to_axis_angle(float axis[3], float *angle, const float mat[3][3])
+{
+  float q[4];
+
+  /* use quaternions as intermediate representation */
+  /* TODO: it would be nicer to go straight there... */
+  mat3_to_quat(q, mat);
+  quat_to_axis_angle(axis, angle, q);
+}
+
+/* 4x4 matrix to axis angle */
+void mat4_normalized_to_axis_angle(float axis[3], float *angle, const float mat[4][4])
+{
+  float q[4];
+
+  /* use quaternions as intermediate representation */
+  /* TODO: it would be nicer to go straight there... */
+  mat4_normalized_to_quat(q, mat);
+  quat_to_axis_angle(axis, angle, q);
+}
+
+/* 4x4 matrix to axis angle */
+void mat4_to_axis_angle(float axis[3], float *angle, const float mat[4][4])
+{
+  float q[4];
+
+  /* use quaternions as intermediate representation */
+  /* TODO: it would be nicer to go straight there... */
+  mat4_to_quat(q, mat);
+  quat_to_axis_angle(axis, angle, q);
+}
+
+void axis_angle_to_mat4_single(float mat[4][4], const char axis, const float angle)
+{
+  float mat3[3][3];
+  axis_angle_to_mat3_single(mat3, axis, angle);
+  copy_m4_m3(mat, mat3);
+}
+
+/* rotation matrix from a single axis */
+void axis_angle_to_mat3_single(float mat[3][3], const char axis, const float angle)
+{
+  const float angle_cos = cosf(angle);
+  const float angle_sin = sinf(angle);
+
+  switch (axis) {
+    case 'X': /* rotation around X */
+      mat[0][0] = 1.0f;
+      mat[0][1] = 0.0f;
+      mat[0][2] = 0.0f;
+      mat[1][0] = 0.0f;
+      mat[1][1] = angle_cos;
+      mat[1][2] = angle_sin;
+      mat[2][0] = 0.0f;
+      mat[2][1] = -angle_sin;
+      mat[2][2] = angle_cos;
+      break;
+    case 'Y': /* rotation around Y */
+      mat[0][0] = angle_cos;
+      mat[0][1] = 0.0f;
+      mat[0][2] = -angle_sin;
+      mat[1][0] = 0.0f;
+      mat[1][1] = 1.0f;
+      mat[1][2] = 0.0f;
+      mat[2][0] = angle_sin;
+      mat[2][1] = 0.0f;
+      mat[2][2] = angle_cos;
+      break;
+    case 'Z': /* rotation around Z */
+      mat[0][0] = angle_cos;
+      mat[0][1] = angle_sin;
+      mat[0][2] = 0.0f;
+      mat[1][0] = -angle_sin;
+      mat[1][1] = angle_cos;
+      mat[1][2] = 0.0f;
+      mat[2][0] = 0.0f;
+      mat[2][1] = 0.0f;
+      mat[2][2] = 1.0f;
+      break;
+    default:
+      BLI_assert(0);
+      break;
+  }
+}
+
+void angle_to_mat2(float mat[2][2], const float angle)
+{
+  const float angle_cos = cosf(angle);
+  const float angle_sin = sinf(angle);
+
+  /* 2D rotation matrix */
+  mat[0][0] = angle_cos;
+  mat[0][1] = angle_sin;
+  mat[1][0] = -angle_sin;
+  mat[1][1] = angle_cos;
+}
+
+void axis_angle_to_quat_single(float q[4], const char axis, const float angle)
+{
+  const float angle_half = angle * 0.5f;
+  const float angle_cos = cosf(angle_half);
+  const float angle_sin = sinf(angle_half);
+  const int axis_index = (axis - 'X');
+
+  assert(axis >= 'X' && axis <= 'Z');
+
+  q[0] = angle_cos;
+  zero_v3(q + 1);
+  q[axis_index + 1] = angle_sin;
+}
+
+/****************************** Exponential Map ******************************/
+
+void quat_normalized_to_expmap(float expmap[3], const float q[4])
+{
+  float angle;
+  BLI_ASSERT_UNIT_QUAT(q);
+
+  /* Obtain axis/angle representation. */
+  quat_to_axis_angle(expmap, &angle, q);
+
+  /* Convert to exponential map. */
+  mul_v3_fl(expmap, angle);
+}
+
+void quat_to_expmap(float expmap[3], const float q[4])
+{
+  float q_no[4];
+  normalize_qt_qt(q_no, q);
+  quat_normalized_to_expmap(expmap, q_no);
+}
+
+//void expmap_to_quat(float r[4], const float expmap[3])
+//{
+//  float axis[3];
+//  float angle;
+//
+//  /* Obtain axis/angle representation. */
+//  if (LIKELY((angle = normalize_v3_v3(axis, expmap)) != 0.0f)) {
+//    axis_angle_normalized_to_quat(r, axis, angle_wrap_rad(angle));
+//  }
+//  else {
+//    unit_qt(r);
+//  }
+//}
+
+/******************************** XYZ Eulers *********************************/
+
+/* XYZ order */
+void eul_to_mat3(float mat[3][3], const float eul[3])
+{
+  double ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
+
+  ci = cos(eul[0]);
+  cj = cos(eul[1]);
+  ch = cos(eul[2]);
+  si = sin(eul[0]);
+  sj = sin(eul[1]);
+  sh = sin(eul[2]);
+  cc = ci * ch;
+  cs = ci * sh;
+  sc = si * ch;
+  ss = si * sh;
+
+  mat[0][0] = (float)(cj * ch);
+  mat[1][0] = (float)(sj * sc - cs);
+  mat[2][0] = (float)(sj * cc + ss);
+  mat[0][1] = (float)(cj * sh);
+  mat[1][1] = (float)(sj * ss + cc);
+  mat[2][1] = (float)(sj * cs - sc);
+  mat[0][2] = (float)-sj;
+  mat[1][2] = (float)(cj * si);
+  mat[2][2] = (float)(cj * ci);
+}
+
+/* XYZ order */
+void eul_to_mat4(float mat[4][4], const float eul[3])
+{
+  double ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
+
+  ci = cos(eul[0]);
+  cj = cos(eul[1]);
+  ch = cos(eul[2]);
+  si = sin(eul[0]);
+  sj = sin(eul[1]);
+  sh = sin(eul[2]);
+  cc = ci * ch;
+  cs = ci * sh;
+  sc = si * ch;
+  ss = si * sh;
+
+  mat[0][0] = (float)(cj * ch);
+  mat[1][0] = (float)(sj * sc - cs);
+  mat[2][0] = (float)(sj * cc + ss);
+  mat[0][1] = (float)(cj * sh);
+  mat[1][1] = (float)(sj * ss + cc);
+  mat[2][1] = (float)(sj * cs - sc);
+  mat[0][2] = (float)-sj;
+  mat[1][2] = (float)(cj * si);
+  mat[2][2] = (float)(cj * ci);
+
+  mat[3][0] = mat[3][1] = mat[3][2] = mat[0][3] = mat[1][3] = mat[2][3] = 0.0f;
+  mat[3][3] = 1.0f;
+}
+
+/* returns two euler calculation methods, so we can pick the best */
+
+/* XYZ order */
+static void mat3_normalized_to_eul2(const float mat[3][3], float eul1[3], float eul2[3])
+{
+  const float cy = hypotf(mat[0][0], mat[0][1]);
+
+  BLI_ASSERT_UNIT_M3(mat);
+
+  if (cy > 16.0f * FLT_EPSILON) {
+
+    eul1[0] = atan2f(mat[1][2], mat[2][2]);
+    eul1[1] = atan2f(-mat[0][2], cy);
+    eul1[2] = atan2f(mat[0][1], mat[0][0]);
+
+    eul2[0] = atan2f(-mat[1][2], -mat[2][2]);
+    eul2[1] = atan2f(-mat[0][2], -cy);
+    eul2[2] = atan2f(-mat[0][1], -mat[0][0]);
+  }
+  else {
+    eul1[0] = atan2f(-mat[2][1], mat[1][1]);
+    eul1[1] = atan2f(-mat[0][2], cy);
+    eul1[2] = 0.0f;
+
+    copy_v3_v3(eul2, eul1);
+  }
+}
+
+/* XYZ order */
+void mat3_normalized_to_eul(float eul[3], const float mat[3][3])
+{
+  float eul1[3], eul2[3];
+
+  mat3_normalized_to_eul2(mat, eul1, eul2);
+
+  /* return best, which is just the one with lowest values it in */
+  if (fabsf(eul1[0]) + fabsf(eul1[1]) + fabsf(eul1[2]) >
+      fabsf(eul2[0]) + fabsf(eul2[1]) + fabsf(eul2[2])) {
+    copy_v3_v3(eul, eul2);
+  }
+  else {
+    copy_v3_v3(eul, eul1);
+  }
+}
+void mat3_to_eul(float eul[3], const float mat[3][3])
+{
+  float unit_mat[3][3];
+  normalize_m3_m3(unit_mat, mat);
+  mat3_normalized_to_eul(eul, unit_mat);
+}
+
+/* XYZ order */
+void mat4_normalized_to_eul(float eul[3], const float m[4][4])
+{
+  float mat3[3][3];
+  copy_m3_m4(mat3, m);
+  mat3_normalized_to_eul(eul, mat3);
+}
+void mat4_to_eul(float eul[3], const float m[4][4])
+{
+  float mat3[3][3];
+  copy_m3_m4(mat3, m);
+  mat3_to_eul(eul, mat3);
+}
+
+/* XYZ order */
+void quat_to_eul(float eul[3], const float quat[4])
+{
+  float unit_mat[3][3];
+  quat_to_mat3(unit_mat, quat);
+  mat3_normalized_to_eul(eul, unit_mat);
+}
+
+/* XYZ order */
+void eul_to_quat(float quat[4], const float eul[3])
+{
+  float ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
+
+  ti = eul[0] * 0.5f;
+  tj = eul[1] * 0.5f;
+  th = eul[2] * 0.5f;
+  ci = cosf(ti);
+  cj = cosf(tj);
+  ch = cosf(th);
+  si = sinf(ti);
+  sj = sinf(tj);
+  sh = sinf(th);
+  cc = ci * ch;
+  cs = ci * sh;
+  sc = si * ch;
+  ss = si * sh;
+
+  quat[0] = cj * cc + sj * ss;
+  quat[1] = cj * sc - sj * cs;
+  quat[2] = cj * ss + sj * cc;
+  quat[3] = cj * cs - sj * sc;
+}
+
+/* XYZ order */
+void rotate_eul(float beul[3], const char axis, const float ang)
+{
+  float eul[3], mat1[3][3], mat2[3][3], totmat[3][3];
+
+  assert(axis >= 'X' && axis <= 'Z');
+
+  eul[0] = eul[1] = eul[2] = 0.0f;
+  if (axis == 'X') {
+    eul[0] = ang;
+  }
+  else if (axis == 'Y') {
+    eul[1] = ang;
+  }
+  else {
+    eul[2] = ang;
+  }
+
+  eul_to_mat3(mat1, eul);
+  eul_to_mat3(mat2, beul);
+
+  mul_m3_m3m3(totmat, mat2, mat1);
+
+  mat3_to_eul(beul, totmat);
+}
+
+/* order independent! */
+void compatible_eul(float eul[3], const float oldrot[3])
+{
+  /* we could use M_PI as pi_thresh: which is correct but 5.1 gives better results.
+   * Checked with baking actions to fcurves - campbell */
+  const float pi_thresh = (5.1f);
+  const float pi_x2 = (2.0f * (float)M_PI);
+
+  float deul[3];
+  unsigned int i;
+
+  /* correct differences of about 360 degrees first */
+  for (i = 0; i < 3; i++) {
+    deul[i] = eul[i] - oldrot[i];
+    if (deul[i] > pi_thresh) {
+      eul[i] -= floorf((deul[i] / pi_x2) + 0.5f) * pi_x2;
+      deul[i] = eul[i] - oldrot[i];
+    }
+    else if (deul[i] < -pi_thresh) {
+      eul[i] += floorf((-deul[i] / pi_x2) + 0.5f) * pi_x2;
+      deul[i] = eul[i] - oldrot[i];
+    }
+  }
+
+  /* is 1 of the axis rotations larger than 180 degrees and the other small? NO ELSE IF!! */
+  if (fabsf(deul[0]) > 3.2f && fabsf(deul[1]) < 1.6f && fabsf(deul[2]) < 1.6f) {
+    if (deul[0] > 0.0f) {
+      eul[0] -= pi_x2;
+    }
+    else {
+      eul[0] += pi_x2;
+    }
+  }
+  if (fabsf(deul[1]) > 3.2f && fabsf(deul[2]) < 1.6f && fabsf(deul[0]) < 1.6f) {
+    if (deul[1] > 0.0f) {
+      eul[1] -= pi_x2;
+    }
+    else {
+      eul[1] += pi_x2;
+    }
+  }
+  if (fabsf(deul[2]) > 3.2f && fabsf(deul[0]) < 1.6f && fabsf(deul[1]) < 1.6f) {
+    if (deul[2] > 0.0f) {
+      eul[2] -= pi_x2;
+    }
+    else {
+      eul[2] += pi_x2;
+    }
+  }
+}
+
+/* uses 2 methods to retrieve eulers, and picks the closest */
+
+/* XYZ order */
+void mat3_normalized_to_compatible_eul(float eul[3], const float oldrot[3], float mat[3][3])
+{
+  float eul1[3], eul2[3];
+  float d1, d2;
+
+  mat3_normalized_to_eul2(mat, eul1, eul2);
+
+  compatible_eul(eul1, oldrot);
+  compatible_eul(eul2, oldrot);
+
+  d1 = fabsf(eul1[0] - oldrot[0]) + fabsf(eul1[1] - oldrot[1]) + fabsf(eul1[2] - oldrot[2]);
+  d2 = fabsf(eul2[0] - oldrot[0]) + fabsf(eul2[1] - oldrot[1]) + fabsf(eul2[2] - oldrot[2]);
+
+  /* return best, which is just the one with lowest difference */
+  if (d1 > d2) {
+    copy_v3_v3(eul, eul2);
+  }
+  else {
+    copy_v3_v3(eul, eul1);
+  }
+}
+void mat3_to_compatible_eul(float eul[3], const float oldrot[3], float mat[3][3])
+{
+  float unit_mat[3][3];
+  normalize_m3_m3(unit_mat, mat);
+  mat3_normalized_to_compatible_eul(eul, oldrot, unit_mat);
+}
+
+void quat_to_compatible_eul(float eul[3], const float oldrot[3], const float quat[4])
+{
+  float unit_mat[3][3];
+  quat_to_mat3(unit_mat, quat);
+  mat3_normalized_to_compatible_eul(eul, oldrot, unit_mat);
+}
+
+/************************** Arbitrary Order Eulers ***************************/
+
+/* Euler Rotation Order Code:
+ * was adapted from
+ *      ANSI C code from the article
+ *      "Euler Angle Conversion"
+ *      by Ken Shoemake, shoemake@graphics.cis.upenn.edu
+ *      in "Graphics Gems IV", Academic Press, 1994
+ * for use in Blender
+ */
+
+/* Type for rotation order info - see wiki for derivation details */
+typedef struct RotOrderInfo {
+  short axis[3];
+  short parity; /* parity of axis permutation (even=0, odd=1) - 'n' in original code */
+} RotOrderInfo;
+
+/* Array of info for Rotation Order calculations
+ * WARNING: must be kept in same order as eEulerRotationOrders
+ */
+static const RotOrderInfo rotOrders[] = {
+    /* i, j, k, n */
+    {{0, 1, 2}, 0}, /* XYZ */
+    {{0, 2, 1}, 1}, /* XZY */
+    {{1, 0, 2}, 1}, /* YXZ */
+    {{1, 2, 0}, 0}, /* YZX */
+    {{2, 0, 1}, 0}, /* ZXY */
+    {{2, 1, 0}, 1}  /* ZYX */
+};
+
+/* Get relevant pointer to rotation order set from the array
+ * NOTE: since we start at 1 for the values, but arrays index from 0,
+ *       there is -1 factor involved in this process...
+ */
+static const RotOrderInfo *get_rotation_order_info(const short order)
+{
+  assert(order >= 0 && order <= 6);
+  if (order < 1) {
+    return &rotOrders[0];
+  }
+  else if (order < 6) {
+    return &rotOrders[order - 1];
+  }
+  else {
+    return &rotOrders[5];
+  }
+}
+
+/* Construct quaternion from Euler angles (in radians). */
+void eulO_to_quat(float q[4], const float e[3], const short order)
+{
+  const RotOrderInfo *R = get_rotation_order_info(order);
+  short i = R->axis[0], j = R->axis[1], k = R->axis[2];
+  double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
+  double a[3];
+
+  ti = e[i] * 0.5f;
+  tj = e[j] * (R->parity ? -0.5f : 0.5f);
+  th = e[k] * 0.5f;
+
+  ci = cos(ti);
+  cj = cos(tj);
+  ch = cos(th);
+  si = sin(ti);
+  sj = sin(tj);
+  sh = sin(th);
+
+  cc = ci * ch;
+  cs = ci * sh;
+  sc = si * ch;
+  ss = si * sh;
+
+  a[i] = cj * sc - sj * cs;
+  a[j] = cj * ss + sj * cc;
+  a[k] = cj * cs - sj * sc;
+
+  q[0] = (float)(cj * cc + sj * ss);
+  q[1] = (float)(a[0]);
+  q[2] = (float)(a[1]);
+  q[3] = (float)(a[2]);
+
+  if (R->parity) {
+    q[j + 1] = -q[j + 1];
+  }
+}
+
+/* Convert quaternion to Euler angles (in radians). */
+void quat_to_eulO(float e[3], short const order, const float q[4])
+{
+  float unit_mat[3][3];
+
+  quat_to_mat3(unit_mat, q);
+  mat3_normalized_to_eulO(e, order, unit_mat);
+}
+
+/* Construct 3x3 matrix from Euler angles (in radians). */
+void eulO_to_mat3(float M[3][3], const float e[3], const short order)
+{
+  const RotOrderInfo *R = get_rotation_order_info(order);
+  short i = R->axis[0], j = R->axis[1], k = R->axis[2];
+  double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
+
+  if (R->parity) {
+    ti = -e[i];
+    tj = -e[j];
+    th = -e[k];
+  }
+  else {
+    ti = e[i];
+    tj = e[j];
+    th = e[k];
+  }
+
+  ci = cos(ti);
+  cj = cos(tj);
+  ch = cos(th);
+  si = sin(ti);
+  sj = sin(tj);
+  sh = sin(th);
+
+  cc = ci * ch;
+  cs = ci * sh;
+  sc = si * ch;
+  ss = si * sh;
+
+  M[i][i] = (float)(cj * ch);
+  M[j][i] = (float)(sj * sc - cs);
+  M[k][i] = (float)(sj * cc + ss);
+  M[i][j] = (float)(cj * sh);
+  M[j][j] = (float)(sj * ss + cc);
+  M[k][j] = (float)(sj * cs - sc);
+  M[i][k] = (float)(-sj);
+  M[j][k] = (float)(cj * si);
+  M[k][k] = (float)(cj * ci);
+}
+
+/* returns two euler calculation methods, so we can pick the best */
+static void mat3_normalized_to_eulo2(const float mat[3][3],
+                                     float eul1[3],
+                                     float eul2[3],
+                                     const short order)
+{
+  const RotOrderInfo *R = get_rotation_order_info(order);
+  short i = R->axis[0], j = R->axis[1], k = R->axis[2];
+  float cy;
+
+  BLI_ASSERT_UNIT_M3(mat);
+
+  cy = hypotf(mat[i][i], mat[i][j]);
+
+  if (cy > 16.0f * FLT_EPSILON) {
+    eul1[i] = atan2f(mat[j][k], mat[k][k]);
+    eul1[j] = atan2f(-mat[i][k], cy);
+    eul1[k] = atan2f(mat[i][j], mat[i][i]);
+
+    eul2[i] = atan2f(-mat[j][k], -mat[k][k]);
+    eul2[j] = atan2f(-mat[i][k], -cy);
+    eul2[k] = atan2f(-mat[i][j], -mat[i][i]);
+  }
+  else {
+    eul1[i] = atan2f(-mat[k][j], mat[j][j]);
+    eul1[j] = atan2f(-mat[i][k], cy);
+    eul1[k] = 0;
+
+    copy_v3_v3(eul2, eul1);
+  }
+
+  if (R->parity) {
+    negate_v3(eul1);
+    negate_v3(eul2);
+  }
+}
+
+/* Construct 4x4 matrix from Euler angles (in radians). */
+void eulO_to_mat4(float mat[4][4], const float e[3], const short order)
+{
+  float unit_mat[3][3];
+
+  /* for now, we'll just do this the slow way (i.e. copying matrices) */
+  eulO_to_mat3(unit_mat, e, order);
+  copy_m4_m3(mat, unit_mat);
+}
+
+/* Convert 3x3 matrix to Euler angles (in radians). */
+void mat3_normalized_to_eulO(float eul[3], const short order, const float m[3][3])
+{
+  float eul1[3], eul2[3];
+  float d1, d2;
+
+  mat3_normalized_to_eulo2(m, eul1, eul2, order);
+
+  d1 = fabsf(eul1[0]) + fabsf(eul1[1]) + fabsf(eul1[2]);
+  d2 = fabsf(eul2[0]) + fabsf(eul2[1]) + fabsf(eul2[2]);
+
+  /* return best, which is just the one with lowest values it in */
+  if (d1 > d2) {
+    copy_v3_v3(eul, eul2);
+  }
+  else {
+    copy_v3_v3(eul, eul1);
+  }
+}
+void mat3_to_eulO(float eul[3], const short order, const float m[3][3])
+{
+  float unit_mat[3][3];
+  normalize_m3_m3(unit_mat, m);
+  mat3_normalized_to_eulO(eul, order, unit_mat);
+}
+
+/* Convert 4x4 matrix to Euler angles (in radians). */
+void mat4_normalized_to_eulO(float eul[3], const short order, const float m[4][4])
+{
+  float mat3[3][3];
+
+  /* for now, we'll just do this the slow way (i.e. copying matrices) */
+  copy_m3_m4(mat3, m);
+  mat3_normalized_to_eulO(eul, order, mat3);
+}
+
+void mat4_to_eulO(float eul[3], const short order, const float m[4][4])
+{
+  float mat3[3][3];
+  copy_m3_m4(mat3, m);
+  normalize_m3(mat3);
+  mat3_normalized_to_eulO(eul, order, mat3);
+}
+
+/* uses 2 methods to retrieve eulers, and picks the closest */
+void mat3_normalized_to_compatible_eulO(float eul[3],
+                                        const float oldrot[3],
+                                        const short order,
+                                        const float mat[3][3])
+{
+  float eul1[3], eul2[3];
+  float d1, d2;
+
+  mat3_normalized_to_eulo2(mat, eul1, eul2, order);
+
+  compatible_eul(eul1, oldrot);
+  compatible_eul(eul2, oldrot);
+
+  d1 = fabsf(eul1[0] - oldrot[0]) + fabsf(eul1[1] - oldrot[1]) + fabsf(eul1[2] - oldrot[2]);
+  d2 = fabsf(eul2[0] - oldrot[0]) + fabsf(eul2[1] - oldrot[1]) + fabsf(eul2[2] - oldrot[2]);
+
+  /* return best, which is just the one with lowest difference */
+  if (d1 > d2) {
+    copy_v3_v3(eul, eul2);
+  }
+  else {
+    copy_v3_v3(eul, eul1);
+  }
+}
+void mat3_to_compatible_eulO(float eul[3],
+                             const float oldrot[3],
+                             const short order,
+                             const float mat[3][3])
+{
+  float unit_mat[3][3];
+
+  normalize_m3_m3(unit_mat, mat);
+  mat3_normalized_to_compatible_eulO(eul, oldrot, order, unit_mat);
+}
+
+void mat4_normalized_to_compatible_eulO(float eul[3],
+                                        const float oldrot[3],
+                                        const short order,
+                                        const float m[4][4])
+{
+  float mat3[3][3];
+
+  /* for now, we'll just do this the slow way (i.e. copying matrices) */
+  copy_m3_m4(mat3, m);
+  mat3_normalized_to_compatible_eulO(eul, oldrot, order, mat3);
+}
+void mat4_to_compatible_eulO(float eul[3],
+                             const float oldrot[3],
+                             const short order,
+                             const float m[4][4])
+{
+  float mat3[3][3];
+
+  /* for now, we'll just do this the slow way (i.e. copying matrices) */
+  copy_m3_m4(mat3, m);
+  normalize_m3(mat3);
+  mat3_normalized_to_compatible_eulO(eul, oldrot, order, mat3);
+}
+
+void quat_to_compatible_eulO(float eul[3],
+                             const float oldrot[3],
+                             const short order,
+                             const float quat[4])
+{
+  float unit_mat[3][3];
+
+  quat_to_mat3(unit_mat, quat);
+  mat3_normalized_to_compatible_eulO(eul, oldrot, order, unit_mat);
+}
+
+/* rotate the given euler by the given angle on the specified axis */
+/* NOTE: is this safe to do with different axis orders? */
+
+void rotate_eulO(float beul[3], const short order, char axis, float ang)
+{
+  float eul[3], mat1[3][3], mat2[3][3], totmat[3][3];
+
+  assert(axis >= 'X' && axis <= 'Z');
+
+  zero_v3(eul);
+
+  if (axis == 'X') {
+    eul[0] = ang;
+  }
+  else if (axis == 'Y') {
+    eul[1] = ang;
+  }
+  else {
+    eul[2] = ang;
+  }
+
+  eulO_to_mat3(mat1, eul, order);
+  eulO_to_mat3(mat2, beul, order);
+
+  mul_m3_m3m3(totmat, mat2, mat1);
+
+  mat3_to_eulO(beul, order, totmat);
+}
+
+/* the matrix is written to as 3 axis vectors */
+void eulO_to_gimbal_axis(float gmat[3][3], const float eul[3], const short order)
+{
+  const RotOrderInfo *R = get_rotation_order_info(order);
+
+  float mat[3][3];
+  float teul[3];
+
+  /* first axis is local */
+  eulO_to_mat3(mat, eul, order);
+  copy_v3_v3(gmat[R->axis[0]], mat[R->axis[0]]);
+
+  /* second axis is local minus first rotation */
+  copy_v3_v3(teul, eul);
+  teul[R->axis[0]] = 0;
+  eulO_to_mat3(mat, teul, order);
+  copy_v3_v3(gmat[R->axis[1]], mat[R->axis[1]]);
+
+  /* Last axis is global */
+  zero_v3(gmat[R->axis[2]]);
+  gmat[R->axis[2]][R->axis[2]] = 1;
+}
+
+/******************************* Dual Quaternions ****************************/
+
+/**
+ * Conversion routines between (regular quaternion, translation) and
+ * dual quaternion.
+ *
+ * Version 1.0.0, February 7th, 2007
+ *
+ * Copyright (C) 2006-2007 University of Dublin, Trinity College, All Rights
+ * Reserved
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the author(s) be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ * Changes for Blender:
+ * - renaming, style changes and optimization's
+ * - added support for scaling
+ */
+//
+//void mat4_to_dquat(DualQuat *dq, const float basemat[4][4], const float mat[4][4])
+//{
+//  float *t, *q, dscale[3], scale[3], basequat[4], mat3[3][3];
+//  float baseRS[4][4], baseinv[4][4], baseR[4][4], baseRinv[4][4];
+//  float R[4][4], S[4][4];
+//
+//  /* split scaling and rotation, there is probably a faster way to do
+//   * this, it's done like this now to correctly get negative scaling */
+//  mul_m4_m4m4(baseRS, mat, basemat);
+//  mat4_to_size(scale, baseRS);
+//
+//  dscale[0] = scale[0] - 1.0f;
+//  dscale[1] = scale[1] - 1.0f;
+//  dscale[2] = scale[2] - 1.0f;
+//
+//  copy_m3_m4(mat3, mat);
+//
+//  if (!is_orthonormal_m3(mat3) || (determinant_m4(mat) < 0.0f) ||
+//      len_squared_v3(dscale) > SQUARE(1e-4f)) {
+//    /* extract R and S  */
+//    float tmp[4][4];
+//
+//    /* extra orthogonalize, to avoid flipping with stretched bones */
+//    copy_m4_m4(tmp, baseRS);
+//    orthogonalize_m4(tmp, 1);
+//    mat4_to_quat(basequat, tmp);
+//
+//    quat_to_mat4(baseR, basequat);
+//    copy_v3_v3(baseR[3], baseRS[3]);
+//
+//    invert_m4_m4(baseinv, basemat);
+//    mul_m4_m4m4(R, baseR, baseinv);
+//
+//    invert_m4_m4(baseRinv, baseR);
+//    mul_m4_m4m4(S, baseRinv, baseRS);
+//
+//    /* set scaling part */
+//    mul_m4_series(dq->scale, basemat, S, baseinv);
+//    dq->scale_weight = 1.0f;
+//  }
+//  else {
+//    /* matrix does not contain scaling */
+//    copy_m4_m4(R, mat);
+//    dq->scale_weight = 0.0f;
+//  }
+//
+//  /* non-dual part */
+//  mat4_to_quat(dq->quat, R);
+//
+//  /* dual part */
+//  t = R[3];
+//  q = dq->quat;
+//  dq->trans[0] = -0.5f * (t[0] * q[1] + t[1] * q[2] + t[2] * q[3]);
+//  dq->trans[1] = 0.5f * (t[0] * q[0] + t[1] * q[3] - t[2] * q[2]);
+//  dq->trans[2] = 0.5f * (-t[0] * q[3] + t[1] * q[0] + t[2] * q[1]);
+//  dq->trans[3] = 0.5f * (t[0] * q[2] - t[1] * q[1] + t[2] * q[0]);
+//}
+//
+//void dquat_to_mat4(float mat[4][4], const DualQuat *dq)
+//{
+//  float len, q0[4];
+//  const float *t;
+//
+//  /* regular quaternion */
+//  copy_qt_qt(q0, dq->quat);
+//
+//  /* normalize */
+//  len = sqrtf(dot_qtqt(q0, q0));
+//  if (len != 0.0f) {
+//    len = 1.0f / len;
+//  }
+//  mul_qt_fl(q0, len);
+//
+//  /* rotation */
+//  quat_to_mat4(mat, q0);
+//
+//  /* translation */
+//  t = dq->trans;
+//  mat[3][0] = 2.0f * (-t[0] * q0[1] + t[1] * q0[0] - t[2] * q0[3] + t[3] * q0[2]) * len;
+//  mat[3][1] = 2.0f * (-t[0] * q0[2] + t[1] * q0[3] + t[2] * q0[0] - t[3] * q0[1]) * len;
+//  mat[3][2] = 2.0f * (-t[0] * q0[3] - t[1] * q0[2] + t[2] * q0[1] + t[3] * q0[0]) * len;
+//
+//  /* scaling */
+//  if (dq->scale_weight) {
+//    mul_m4_m4m4(mat, mat, dq->scale);
+//  }
+//}
+//
+//void add_weighted_dq_dq(DualQuat *dqsum, const DualQuat *dq, float weight)
+//{
+//  bool flipped = false;
+//
+//  /* make sure we interpolate quats in the right direction */
+//  if (dot_qtqt(dq->quat, dqsum->quat) < 0) {
+//    flipped = true;
+//    weight = -weight;
+//  }
+//
+//  /* interpolate rotation and translation */
+//  dqsum->quat[0] += weight * dq->quat[0];
+//  dqsum->quat[1] += weight * dq->quat[1];
+//  dqsum->quat[2] += weight * dq->quat[2];
+//  dqsum->quat[3] += weight * dq->quat[3];
+//
+//  dqsum->trans[0] += weight * dq->trans[0];
+//  dqsum->trans[1] += weight * dq->trans[1];
+//  dqsum->trans[2] += weight * dq->trans[2];
+//  dqsum->trans[3] += weight * dq->trans[3];
+//
+//  /* Interpolate scale - but only if there is scale present. If any dual
+//   * quaternions without scale are added, they will be compensated for in
+//   * normalize_dq. */
+//  if (dq->scale_weight) {
+//    float wmat[4][4];
+//
+//    if (flipped) {
+//      /* we don't want negative weights for scaling */
+//      weight = -weight;
+//    }
+//
+//    copy_m4_m4(wmat, (float(*)[4])dq->scale);
+//    mul_m4_fl(wmat, weight);
+//    add_m4_m4m4(dqsum->scale, dqsum->scale, wmat);
+//    dqsum->scale_weight += weight;
+//  }
+//}
+//
+//void normalize_dq(DualQuat *dq, float totweight)
+//{
+//  const float scale = 1.0f / totweight;
+//
+//  mul_qt_fl(dq->quat, scale);
+//  mul_qt_fl(dq->trans, scale);
+//
+//  /* Handle scale if needed. */
+//  if (dq->scale_weight) {
+//    /* Compensate for any dual quaternions added without scale. This is an
+//     * optimization so that we can skip the scale part when not needed. */
+//    float addweight = totweight - dq->scale_weight;
+//
+//    if (addweight) {
+//      dq->scale[0][0] += addweight;
+//      dq->scale[1][1] += addweight;
+//      dq->scale[2][2] += addweight;
+//      dq->scale[3][3] += addweight;
+//    }
+//
+//    mul_m4_fl(dq->scale, scale);
+//    dq->scale_weight = 1.0f;
+//  }
+//}
+//
+//void mul_v3m3_dq(float co[3], float mat[3][3], DualQuat *dq)
+//{
+//  float M[3][3], t[3], scalemat[3][3], len2;
+//  float w = dq->quat[0], x = dq->quat[1], y = dq->quat[2], z = dq->quat[3];
+//  float t0 = dq->trans[0], t1 = dq->trans[1], t2 = dq->trans[2], t3 = dq->trans[3];
+//
+//  /* rotation matrix */
+//  M[0][0] = w * w + x * x - y * y - z * z;
+//  M[1][0] = 2 * (x * y - w * z);
+//  M[2][0] = 2 * (x * z + w * y);
+//
+//  M[0][1] = 2 * (x * y + w * z);
+//  M[1][1] = w * w + y * y - x * x - z * z;
+//  M[2][1] = 2 * (y * z - w * x);
+//
+//  M[0][2] = 2 * (x * z - w * y);
+//  M[1][2] = 2 * (y * z + w * x);
+//  M[2][2] = w * w + z * z - x * x - y * y;
+//
+//  len2 = dot_qtqt(dq->quat, dq->quat);
+//  if (len2 > 0.0f) {
+//    len2 = 1.0f / len2;
+//  }
+//
+//  /* translation */
+//  t[0] = 2 * (-t0 * x + w * t1 - t2 * z + y * t3);
+//  t[1] = 2 * (-t0 * y + t1 * z - x * t3 + w * t2);
+//  t[2] = 2 * (-t0 * z + x * t2 + w * t3 - t1 * y);
+//
+//  /* apply scaling */
+//  if (dq->scale_weight) {
+//    mul_m4_v3(dq->scale, co);
+//  }
+//
+//  /* apply rotation and translation */
+//  mul_m3_v3(M, co);
+//  co[0] = (co[0] + t[0]) * len2;
+//  co[1] = (co[1] + t[1]) * len2;
+//  co[2] = (co[2] + t[2]) * len2;
+//
+//  /* compute crazyspace correction mat */
+//  if (mat) {
+//    if (dq->scale_weight) {
+//      copy_m3_m4(scalemat, dq->scale);
+//      mul_m3_m3m3(mat, M, scalemat);
+//    }
+//    else {
+//      copy_m3_m3(mat, M);
+//    }
+//    mul_m3_fl(mat, len2);
+//  }
+//}
+//
+//void copy_dq_dq(DualQuat *dq1, const DualQuat *dq2)
+//{
+//  memcpy(dq1, dq2, sizeof(DualQuat));
+//}
+//
+///* axis matches eTrackToAxis_Modes */
+//void quat_apply_track(float quat[4], short axis, short upflag)
+//{
+//  /* rotations are hard coded to match vec_to_quat */
+//  const float sqrt_1_2 = (float)M_SQRT1_2;
+//  const float quat_track[][4] = {
+//      /* pos-y90 */
+//      {sqrt_1_2, 0.0, -sqrt_1_2, 0.0},
+//      /* Quaternion((1,0,0), radians(90)) * Quaternion((0,1,0), radians(90)) */
+//      {0.5, 0.5, 0.5, 0.5},
+//      /* pos-z90 */
+//      {sqrt_1_2, 0.0, 0.0, sqrt_1_2},
+//      /* neg-y90 */
+//      {sqrt_1_2, 0.0, sqrt_1_2, 0.0},
+//      /* Quaternion((1,0,0), radians(-90)) * Quaternion((0,1,0), radians(-90)) */
+//      {0.5, -0.5, -0.5, 0.5},
+//      /* no rotation */
+//      {0.0, sqrt_1_2, sqrt_1_2, 0.0},
+//  };
+//
+//  assert(axis >= 0 && axis <= 5);
+//  assert(upflag >= 0 && upflag <= 2);
+//
+//  mul_qt_qtqt(quat, quat, quat_track[axis]);
+//
+//  if (axis > 2) {
+//    axis = (short)(axis - 3);
+//  }
+//
+//  /* there are 2 possible up-axis for each axis used, the 'quat_track' applies so the first
+//   * up axis is used X->Y, Y->X, Z->X, if this first up axis isn't used then rotate 90d
+//   * the strange bit shift below just find the low axis {X:Y, Y:X, Z:X} */
+//  if (upflag != (2 - axis) >> 1) {
+//    float q[4] = {sqrt_1_2, 0.0, 0.0, 0.0};             /* assign 90d rotation axis */
+//    q[axis + 1] = ((axis == 1)) ? sqrt_1_2 : -sqrt_1_2; /* flip non Y axis */
+//    mul_qt_qtqt(quat, quat, q);
+//  }
+//}
+//
+//void vec_apply_track(float vec[3], short axis)
+//{
+//  float tvec[3];
+//
+//  assert(axis >= 0 && axis <= 5);
+//
+//  copy_v3_v3(tvec, vec);
+//
+//  switch (axis) {
+//    case 0: /* pos-x */
+//      /* vec[0] =  0.0; */
+//      vec[1] = tvec[2];
+//      vec[2] = -tvec[1];
+//      break;
+//    case 1: /* pos-y */
+//      /* vec[0] = tvec[0]; */
+//      /* vec[1] =  0.0; */
+//      /* vec[2] = tvec[2]; */
+//      break;
+//    case 2: /* pos-z */
+//      /* vec[0] = tvec[0]; */
+//      /* vec[1] = tvec[1]; */
+//      /* vec[2] =  0.0; */
+//      break;
+//    case 3: /* neg-x */
+//      /* vec[0] =  0.0; */
+//      vec[1] = tvec[2];
+//      vec[2] = -tvec[1];
+//      break;
+//    case 4: /* neg-y */
+//      vec[0] = -tvec[2];
+//      /* vec[1] =  0.0; */
+//      vec[2] = tvec[0];
+//      break;
+//    case 5: /* neg-z */
+//      vec[0] = -tvec[0];
+//      vec[1] = -tvec[1];
+//      /* vec[2] =  0.0; */
+//      break;
+//  }
+//}
+//
+///* lens/angle conversion (radians) */
+//float focallength_to_fov(float focal_length, float sensor)
+//{
+//  return 2.0f * atanf((sensor / 2.0f) / focal_length);
+//}
+//
+//float fov_to_focallength(float hfov, float sensor)
+//{
+//  return (sensor / 2.0f) / tanf(hfov * 0.5f);
+//}
+//
+///* 'mod_inline(-3, 4)= 1', 'fmod(-3, 4)= -3' */
+//static float mod_inline(float a, float b)
+//{
+//  return a - (b * floorf(a / b));
+//}
+//
+//float angle_wrap_rad(float angle)
+//{
+//  return mod_inline(angle + (float)M_PI, (float)M_PI * 2.0f) - (float)M_PI;
+//}
+//
+//float angle_wrap_deg(float angle)
+//{
+//  return mod_inline(angle + 180.0f, 360.0f) - 180.0f;
+//}
+//
+///* returns an angle compatible with angle_compat */
+//float angle_compat_rad(float angle, float angle_compat)
+//{
+//  return angle_compat + angle_wrap_rad(angle - angle_compat);
+//}
+//
+///* axis conversion */
+//static float _axis_convert_matrix[23][3][3] = {
+//    {{-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, 1.0}},
+//    {{-1.0, 0.0, 0.0}, {0.0, 0.0, -1.0}, {0.0, -1.0, 0.0}},
+//    {{-1.0, 0.0, 0.0}, {0.0, 0.0, 1.0}, {0.0, 1.0, 0.0}},
+//    {{-1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, -1.0}},
+//    {{0.0, -1.0, 0.0}, {-1.0, 0.0, 0.0}, {0.0, 0.0, -1.0}},
+//    {{0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}},
+//    {{0.0, 0.0, -1.0}, {-1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}},
+//    {{0.0, 1.0, 0.0}, {-1.0, 0.0, 0.0}, {0.0, 0.0, 1.0}},
+//    {{0.0, -1.0, 0.0}, {0.0, 0.0, 1.0}, {-1.0, 0.0, 0.0}},
+//    {{0.0, 0.0, -1.0}, {0.0, -1.0, 0.0}, {-1.0, 0.0, 0.0}},
+//    {{0.0, 0.0, 1.0}, {0.0, 1.0, 0.0}, {-1.0, 0.0, 0.0}},
+//    {{0.0, 1.0, 0.0}, {0.0, 0.0, -1.0}, {-1.0, 0.0, 0.0}},
+//    {{0.0, -1.0, 0.0}, {0.0, 0.0, -1.0}, {1.0, 0.0, 0.0}},
+//    {{0.0, 0.0, 1.0}, {0.0, -1.0, 0.0}, {1.0, 0.0, 0.0}},
+//    {{0.0, 0.0, -1.0}, {0.0, 1.0, 0.0}, {1.0, 0.0, 0.0}},
+//    {{0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}, {1.0, 0.0, 0.0}},
+//    {{0.0, -1.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 0.0, 1.0}},
+//    {{0.0, 0.0, -1.0}, {1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}},
+//    {{0.0, 0.0, 1.0}, {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}},
+//    {{0.0, 1.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 0.0, -1.0}},
+//    {{1.0, 0.0, 0.0}, {0.0, -1.0, 0.0}, {0.0, 0.0, -1.0}},
+//    {{1.0, 0.0, 0.0}, {0.0, 0.0, 1.0}, {0.0, -1.0, 0.0}},
+//    {{1.0, 0.0, 0.0}, {0.0, 0.0, -1.0}, {0.0, 1.0, 0.0}},
+//};
+//
+//static int _axis_convert_lut[23][24] = {
+//    {0x8C8, 0x4D0, 0x2E0, 0xAE8, 0x701, 0x511, 0x119, 0xB29, 0x682, 0x88A, 0x09A, 0x2A2,
+//     0x80B, 0x413, 0x223, 0xA2B, 0x644, 0x454, 0x05C, 0xA6C, 0x745, 0x94D, 0x15D, 0x365},
+//    {0xAC8, 0x8D0, 0x4E0, 0x2E8, 0x741, 0x951, 0x159, 0x369, 0x702, 0xB0A, 0x11A, 0x522,
+//     0xA0B, 0x813, 0x423, 0x22B, 0x684, 0x894, 0x09C, 0x2AC, 0x645, 0xA4D, 0x05D, 0x465},
+//    {0x4C8, 0x2D0, 0xAE0, 0x8E8, 0x681, 0x291, 0x099, 0x8A9, 0x642, 0x44A, 0x05A, 0xA62,
+//     0x40B, 0x213, 0xA23, 0x82B, 0x744, 0x354, 0x15C, 0x96C, 0x705, 0x50D, 0x11D, 0xB25},
+//    {0x2C8, 0xAD0, 0x8E0, 0x4E8, 0x641, 0xA51, 0x059, 0x469, 0x742, 0x34A, 0x15A, 0x962,
+//     0x20B, 0xA13, 0x823, 0x42B, 0x704, 0xB14, 0x11C, 0x52C, 0x685, 0x28D, 0x09D, 0x8A5},
+//    {0x708, 0xB10, 0x120, 0x528, 0x8C1, 0xAD1, 0x2D9, 0x4E9, 0x942, 0x74A, 0x35A, 0x162,
+//     0x64B, 0xA53, 0x063, 0x46B, 0x804, 0xA14, 0x21C, 0x42C, 0x885, 0x68D, 0x29D, 0x0A5},
+//    {0xB08, 0x110, 0x520, 0x728, 0x941, 0x151, 0x359, 0x769, 0x802, 0xA0A, 0x21A, 0x422,
+//     0xA4B, 0x053, 0x463, 0x66B, 0x884, 0x094, 0x29C, 0x6AC, 0x8C5, 0xACD, 0x2DD, 0x4E5},
+//    {0x508, 0x710, 0xB20, 0x128, 0x881, 0x691, 0x299, 0x0A9, 0x8C2, 0x4CA, 0x2DA, 0xAE2,
+//     0x44B, 0x653, 0xA63, 0x06B, 0x944, 0x754, 0x35C, 0x16C, 0x805, 0x40D, 0x21D, 0xA25},
+//    {0x108, 0x510, 0x720, 0xB28, 0x801, 0x411, 0x219, 0xA29, 0x882, 0x08A, 0x29A, 0x6A2,
+//     0x04B, 0x453, 0x663, 0xA6B, 0x8C4, 0x4D4, 0x2DC, 0xAEC, 0x945, 0x14D, 0x35D, 0x765},
+//    {0x748, 0x350, 0x160, 0x968, 0xAC1, 0x2D1, 0x4D9, 0x8E9, 0xA42, 0x64A, 0x45A, 0x062,
+//     0x68B, 0x293, 0x0A3, 0x8AB, 0xA04, 0x214, 0x41C, 0x82C, 0xB05, 0x70D, 0x51D, 0x125},
+//    {0x948, 0x750, 0x360, 0x168, 0xB01, 0x711, 0x519, 0x129, 0xAC2, 0x8CA, 0x4DA, 0x2E2,
+//     0x88B, 0x693, 0x2A3, 0x0AB, 0xA44, 0x654, 0x45C, 0x06C, 0xA05, 0x80D, 0x41D, 0x225},
+//    {0x348, 0x150, 0x960, 0x768, 0xA41, 0x051, 0x459, 0x669, 0xA02, 0x20A, 0x41A, 0x822,
+//     0x28B, 0x093, 0x8A3, 0x6AB, 0xB04, 0x114, 0x51C, 0x72C, 0xAC5, 0x2CD, 0x4DD, 0x8E5},
+//    {0x148, 0x950, 0x760, 0x368, 0xA01, 0x811, 0x419, 0x229, 0xB02, 0x10A, 0x51A, 0x722,
+//     0x08B, 0x893, 0x6A3, 0x2AB, 0xAC4, 0x8D4, 0x4DC, 0x2EC, 0xA45, 0x04D, 0x45D, 0x665},
+//    {0x688, 0x890, 0x0A0, 0x2A8, 0x4C1, 0x8D1, 0xAD9, 0x2E9, 0x502, 0x70A, 0xB1A, 0x122,
+//     0x74B, 0x953, 0x163, 0x36B, 0x404, 0x814, 0xA1C, 0x22C, 0x445, 0x64D, 0xA5D, 0x065},
+//    {0x888, 0x090, 0x2A0, 0x6A8, 0x501, 0x111, 0xB19, 0x729, 0x402, 0x80A, 0xA1A, 0x222,
+//     0x94B, 0x153, 0x363, 0x76B, 0x444, 0x054, 0xA5C, 0x66C, 0x4C5, 0x8CD, 0xADD, 0x2E5},
+//    {0x288, 0x690, 0x8A0, 0x0A8, 0x441, 0x651, 0xA59, 0x069, 0x4C2, 0x2CA, 0xADA, 0x8E2,
+//     0x34B, 0x753, 0x963, 0x16B, 0x504, 0x714, 0xB1C, 0x12C, 0x405, 0x20D, 0xA1D, 0x825},
+//    {0x088, 0x290, 0x6A0, 0x8A8, 0x401, 0x211, 0xA19, 0x829, 0x442, 0x04A, 0xA5A, 0x662,
+//     0x14B, 0x353, 0x763, 0x96B, 0x4C4, 0x2D4, 0xADC, 0x8EC, 0x505, 0x10D, 0xB1D, 0x725},
+//    {0x648, 0x450, 0x060, 0xA68, 0x2C1, 0x4D1, 0x8D9, 0xAE9, 0x282, 0x68A, 0x89A, 0x0A2,
+//     0x70B, 0x513, 0x123, 0xB2B, 0x204, 0x414, 0x81C, 0xA2C, 0x345, 0x74D, 0x95D, 0x165},
+//    {0xA48, 0x650, 0x460, 0x068, 0x341, 0x751, 0x959, 0x169, 0x2C2, 0xACA, 0x8DA, 0x4E2,
+//     0xB0B, 0x713, 0x523, 0x12B, 0x284, 0x694, 0x89C, 0x0AC, 0x205, 0xA0D, 0x81D, 0x425},
+//    {0x448, 0x050, 0xA60, 0x668, 0x281, 0x091, 0x899, 0x6A9, 0x202, 0x40A, 0x81A, 0xA22,
+//     0x50B, 0x113, 0xB23, 0x72B, 0x344, 0x154, 0x95C, 0x76C, 0x2C5, 0x4CD, 0x8DD, 0xAE5},
+//    {0x048, 0xA50, 0x660, 0x468, 0x201, 0xA11, 0x819, 0x429, 0x342, 0x14A, 0x95A, 0x762,
+//     0x10B, 0xB13, 0x723, 0x52B, 0x2C4, 0xAD4, 0x8DC, 0x4EC, 0x285, 0x08D, 0x89D, 0x6A5},
+//    {0x808, 0xA10, 0x220, 0x428, 0x101, 0xB11, 0x719, 0x529, 0x142, 0x94A, 0x75A, 0x362,
+//     0x8CB, 0xAD3, 0x2E3, 0x4EB, 0x044, 0xA54, 0x65C, 0x46C, 0x085, 0x88D, 0x69D, 0x2A5},
+//    {0xA08, 0x210, 0x420, 0x828, 0x141, 0x351, 0x759, 0x969, 0x042, 0xA4A, 0x65A, 0x462,
+//     0xACB, 0x2D3, 0x4E3, 0x8EB, 0x084, 0x294, 0x69C, 0x8AC, 0x105, 0xB0D, 0x71D, 0x525},
+//    {0x408, 0x810, 0xA20, 0x228, 0x081, 0x891, 0x699, 0x2A9, 0x102, 0x50A, 0x71A, 0xB22,
+//     0x4CB, 0x8D3, 0xAE3, 0x2EB, 0x144, 0x954, 0x75C, 0x36C, 0x045, 0x44D, 0x65D, 0xA65},
+//};
+//
+//// _axis_convert_num = {'X': 0, 'Y': 1, 'Z': 2, '-X': 3, '-Y': 4, '-Z': 5}
+//
+//BLI_INLINE int _axis_signed(const int axis)
+//{
+//  return (axis < 3) ? axis : axis - 3;
+//}
+//
+///**
+// * Each argument us an axis in ['X', 'Y', 'Z', '-X', '-Y', '-Z']
+// * where the first 2 are a source and the second 2 are the target.
+// */
+//bool mat3_from_axis_conversion(
+//    int src_forward, int src_up, int dst_forward, int dst_up, float r_mat[3][3])
+//{
+//  // from functools import reduce
+//  int value;
+//
+//  if (src_forward == dst_forward && src_up == dst_up) {
+//    unit_m3(r_mat);
+//    return false;
+//  }
+//
+//  if ((_axis_signed(src_forward) == _axis_signed(src_up)) ||
+//      (_axis_signed(dst_forward) == _axis_signed(dst_up))) {
+//    /* we could assert here! */
+//    unit_m3(r_mat);
+//    return false;
+//  }
+//
+//  value = ((src_forward << (0 * 3)) | (src_up << (1 * 3)) | (dst_forward << (2 * 3)) |
+//           (dst_up << (3 * 3)));
+//
+//  for (uint i = 0; i < (sizeof(_axis_convert_matrix) / sizeof(*_axis_convert_matrix)); i++) {
+//    for (uint j = 0; j < (sizeof(*_axis_convert_lut) / sizeof(*_axis_convert_lut[0])); j++) {
+//      if (_axis_convert_lut[i][j] == value) {
+//        copy_m3_m3(r_mat, _axis_convert_matrix[i]);
+//        return true;
+//      }
+//    }
+//  }
+//  //  BLI_assert(0);
+//  return false;
+//}
+//
+///**
+// * Use when the second axis can be guessed.
+// */
+//bool mat3_from_axis_conversion_single(int src_axis, int dst_axis, float r_mat[3][3])
+//{
+//  if (src_axis == dst_axis) {
+//    unit_m3(r_mat);
+//    return false;
+//  }
+//
+//  /* Pick predictable next axis. */
+//  int src_axis_next = (src_axis + 1) % 3;
+//  int dst_axis_next = (dst_axis + 1) % 3;
+//
+//  if ((src_axis < 3) != (dst_axis < 3)) {
+//    /* Flip both axis so matrix sign remains positive. */
+//    dst_axis_next += 3;
+//  }
+//
+//  return mat3_from_axis_conversion(src_axis, src_axis_next, dst_axis, dst_axis_next, r_mat);
+//}
diff --git a/client/blenlib/math_vector.cpp b/client/blenlib/math_vector.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a65460541d989ffdcf64c11fb14c7cb4636658c9
--- /dev/null
+++ b/client/blenlib/math_vector.cpp
@@ -0,0 +1,1404 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: some of this file.
+ *
+ * */
+
+/** \file
+ * \ingroup bli
+ */
+
+#include "BLI_math.h"
+
+#include "BLI_strict_flags.h"
+
+//******************************* Interpolation *******************************/
+
+void interp_v2_v2v2(float target[2], const float a[2], const float b[2], const float t)
+{
+  const float s = 1.0f - t;
+
+  target[0] = s * a[0] + t * b[0];
+  target[1] = s * a[1] + t * b[1];
+}
+
+/* weight 3 2D vectors,
+ * 'w' must be unit length but is not a vector, just 3 weights */
+void interp_v2_v2v2v2(
+    float p[2], const float v1[2], const float v2[2], const float v3[2], const float w[3])
+{
+  p[0] = v1[0] * w[0] + v2[0] * w[1] + v3[0] * w[2];
+  p[1] = v1[1] * w[0] + v2[1] * w[1] + v3[1] * w[2];
+}
+
+void interp_v3_v3v3(float target[3], const float a[3], const float b[3], const float t)
+{
+  const float s = 1.0f - t;
+
+  target[0] = s * a[0] + t * b[0];
+  target[1] = s * a[1] + t * b[1];
+  target[2] = s * a[2] + t * b[2];
+}
+
+void interp_v4_v4v4(float target[4], const float a[4], const float b[4], const float t)
+{
+  const float s = 1.0f - t;
+
+  target[0] = s * a[0] + t * b[0];
+  target[1] = s * a[1] + t * b[1];
+  target[2] = s * a[2] + t * b[2];
+  target[3] = s * a[3] + t * b[3];
+}
+
+/**
+ * slerp, treat vectors as spherical coordinates
+ * \see #interp_qt_qtqt
+ *
+ * \return success
+ */
+bool interp_v3_v3v3_slerp(float target[3], const float a[3], const float b[3], const float t)
+{
+  float cosom, w[2];
+
+  BLI_ASSERT_UNIT_V3(a);
+  BLI_ASSERT_UNIT_V3(b);
+
+  cosom = dot_v3v3(a, b);
+
+  /* direct opposites */
+  if (UNLIKELY(cosom < (-1.0f + FLT_EPSILON))) {
+    return false;
+  }
+
+  interp_dot_slerp(t, cosom, w);
+
+  target[0] = w[0] * a[0] + w[1] * b[0];
+  target[1] = w[0] * a[1] + w[1] * b[1];
+  target[2] = w[0] * a[2] + w[1] * b[2];
+
+  return true;
+}
+bool interp_v2_v2v2_slerp(float target[2], const float a[2], const float b[2], const float t)
+{
+  float cosom, w[2];
+
+  BLI_ASSERT_UNIT_V2(a);
+  BLI_ASSERT_UNIT_V2(b);
+
+  cosom = dot_v2v2(a, b);
+
+  /* direct opposites */
+  if (UNLIKELY(cosom < (1.0f + FLT_EPSILON))) {
+    return false;
+  }
+
+  interp_dot_slerp(t, cosom, w);
+
+  target[0] = w[0] * a[0] + w[1] * b[0];
+  target[1] = w[0] * a[1] + w[1] * b[1];
+
+  return true;
+}
+
+/**
+ * Same as #interp_v3_v3v3_slerp but uses fallback values for opposite vectors.
+ */
+void interp_v3_v3v3_slerp_safe(float target[3], const float a[3], const float b[3], const float t)
+{
+  if (UNLIKELY(!interp_v3_v3v3_slerp(target, a, b, t))) {
+    /* axis are aligned so any otho vector is acceptable */
+    float ab_ortho[3];
+    ortho_v3_v3(ab_ortho, a);
+    normalize_v3(ab_ortho);
+    if (t < 0.5f) {
+      if (UNLIKELY(!interp_v3_v3v3_slerp(target, a, ab_ortho, t * 2.0f))) {
+        BLI_assert(0);
+        copy_v3_v3(target, a);
+      }
+    }
+    else {
+      if (UNLIKELY(!interp_v3_v3v3_slerp(target, ab_ortho, b, (t - 0.5f) * 2.0f))) {
+        BLI_assert(0);
+        copy_v3_v3(target, b);
+      }
+    }
+  }
+}
+void interp_v2_v2v2_slerp_safe(float target[2], const float a[2], const float b[2], const float t)
+{
+  if (UNLIKELY(!interp_v2_v2v2_slerp(target, a, b, t))) {
+    /* axis are aligned so any otho vector is acceptable */
+    float ab_ortho[2];
+    ortho_v2_v2(ab_ortho, a);
+    // normalize_v2(ab_ortho);
+    if (t < 0.5f) {
+      if (UNLIKELY(!interp_v2_v2v2_slerp(target, a, ab_ortho, t * 2.0f))) {
+        BLI_assert(0);
+        copy_v2_v2(target, a);
+      }
+    }
+    else {
+      if (UNLIKELY(!interp_v2_v2v2_slerp(target, ab_ortho, b, (t - 0.5f) * 2.0f))) {
+        BLI_assert(0);
+        copy_v2_v2(target, b);
+      }
+    }
+  }
+}
+
+/** \name Cubic curve interpolation (bezier spline).
+ * \{ */
+
+void interp_v2_v2v2v2v2_cubic(float p[2],
+                              const float v1[2],
+                              const float v2[2],
+                              const float v3[2],
+                              const float v4[2],
+                              const float u)
+{
+  float q0[2], q1[2], q2[2], r0[2], r1[2];
+
+  interp_v2_v2v2(q0, v1, v2, u);
+  interp_v2_v2v2(q1, v2, v3, u);
+  interp_v2_v2v2(q2, v3, v4, u);
+
+  interp_v2_v2v2(r0, q0, q1, u);
+  interp_v2_v2v2(r1, q1, q2, u);
+
+  interp_v2_v2v2(p, r0, r1, u);
+}
+
+/** \} */
+
+/* weight 3 vectors,
+ * 'w' must be unit length but is not a vector, just 3 weights */
+void interp_v3_v3v3v3(
+    float p[3], const float v1[3], const float v2[3], const float v3[3], const float w[3])
+{
+  p[0] = v1[0] * w[0] + v2[0] * w[1] + v3[0] * w[2];
+  p[1] = v1[1] * w[0] + v2[1] * w[1] + v3[1] * w[2];
+  p[2] = v1[2] * w[0] + v2[2] * w[1] + v3[2] * w[2];
+}
+
+/* weight 3 vectors,
+ * 'w' must be unit length but is not a vector, just 4 weights */
+void interp_v3_v3v3v3v3(float p[3],
+                        const float v1[3],
+                        const float v2[3],
+                        const float v3[3],
+                        const float v4[3],
+                        const float w[4])
+{
+  p[0] = v1[0] * w[0] + v2[0] * w[1] + v3[0] * w[2] + v4[0] * w[3];
+  p[1] = v1[1] * w[0] + v2[1] * w[1] + v3[1] * w[2] + v4[1] * w[3];
+  p[2] = v1[2] * w[0] + v2[2] * w[1] + v3[2] * w[2] + v4[2] * w[3];
+}
+
+void interp_v4_v4v4v4(
+    float p[4], const float v1[4], const float v2[4], const float v3[4], const float w[3])
+{
+  p[0] = v1[0] * w[0] + v2[0] * w[1] + v3[0] * w[2];
+  p[1] = v1[1] * w[0] + v2[1] * w[1] + v3[1] * w[2];
+  p[2] = v1[2] * w[0] + v2[2] * w[1] + v3[2] * w[2];
+  p[3] = v1[3] * w[0] + v2[3] * w[1] + v3[3] * w[2];
+}
+
+void interp_v4_v4v4v4v4(float p[4],
+                        const float v1[4],
+                        const float v2[4],
+                        const float v3[4],
+                        const float v4[4],
+                        const float w[4])
+{
+  p[0] = v1[0] * w[0] + v2[0] * w[1] + v3[0] * w[2] + v4[0] * w[3];
+  p[1] = v1[1] * w[0] + v2[1] * w[1] + v3[1] * w[2] + v4[1] * w[3];
+  p[2] = v1[2] * w[0] + v2[2] * w[1] + v3[2] * w[2] + v4[2] * w[3];
+  p[3] = v1[3] * w[0] + v2[3] * w[1] + v3[3] * w[2] + v4[3] * w[3];
+}
+
+void interp_v3_v3v3v3_uv(
+    float p[3], const float v1[3], const float v2[3], const float v3[3], const float uv[2])
+{
+  p[0] = v1[0] + ((v2[0] - v1[0]) * uv[0]) + ((v3[0] - v1[0]) * uv[1]);
+  p[1] = v1[1] + ((v2[1] - v1[1]) * uv[0]) + ((v3[1] - v1[1]) * uv[1]);
+  p[2] = v1[2] + ((v2[2] - v1[2]) * uv[0]) + ((v3[2] - v1[2]) * uv[1]);
+}
+
+void interp_v3_v3v3_uchar(char unsigned target[3],
+                          const unsigned char a[3],
+                          const unsigned char b[3],
+                          const float t)
+{
+  const float s = 1.0f - t;
+
+  target[0] = (unsigned char)floorf(s * a[0] + t * b[0]);
+  target[1] = (unsigned char)floorf(s * a[1] + t * b[1]);
+  target[2] = (unsigned char)floorf(s * a[2] + t * b[2]);
+}
+void interp_v3_v3v3_char(char target[3], const char a[3], const char b[3], const float t)
+{
+  interp_v3_v3v3_uchar(
+      (unsigned char *)target, (const unsigned char *)a, (const unsigned char *)b, t);
+}
+
+void interp_v4_v4v4_uchar(char unsigned target[4],
+                          const unsigned char a[4],
+                          const unsigned char b[4],
+                          const float t)
+{
+  const float s = 1.0f - t;
+
+  target[0] = (unsigned char)floorf(s * a[0] + t * b[0]);
+  target[1] = (unsigned char)floorf(s * a[1] + t * b[1]);
+  target[2] = (unsigned char)floorf(s * a[2] + t * b[2]);
+  target[3] = (unsigned char)floorf(s * a[3] + t * b[3]);
+}
+void interp_v4_v4v4_char(char target[4], const char a[4], const char b[4], const float t)
+{
+  interp_v4_v4v4_uchar(
+      (unsigned char *)target, (const unsigned char *)a, (const unsigned char *)b, t);
+}
+
+void mid_v3_v3v3(float v[3], const float v1[3], const float v2[3])
+{
+  v[0] = 0.5f * (v1[0] + v2[0]);
+  v[1] = 0.5f * (v1[1] + v2[1]);
+  v[2] = 0.5f * (v1[2] + v2[2]);
+}
+
+void mid_v2_v2v2(float v[2], const float v1[2], const float v2[2])
+{
+  v[0] = 0.5f * (v1[0] + v2[0]);
+  v[1] = 0.5f * (v1[1] + v2[1]);
+}
+
+void mid_v3_v3v3v3(float v[3], const float v1[3], const float v2[3], const float v3[3])
+{
+  v[0] = (v1[0] + v2[0] + v3[0]) / 3.0f;
+  v[1] = (v1[1] + v2[1] + v3[1]) / 3.0f;
+  v[2] = (v1[2] + v2[2] + v3[2]) / 3.0f;
+}
+
+void mid_v3_v3v3v3v3(
+    float v[3], const float v1[3], const float v2[3], const float v3[3], const float v4[3])
+{
+  v[0] = (v1[0] + v2[0] + v3[0] + v4[0]) / 4.0f;
+  v[1] = (v1[1] + v2[1] + v3[1] + v4[1]) / 4.0f;
+  v[2] = (v1[2] + v2[2] + v3[2] + v4[2]) / 4.0f;
+}
+
+void mid_v3_v3_array(float r[3], const float (*vec_arr)[3], const unsigned int nbr)
+{
+  const float factor = 1.0f / (float)nbr;
+  zero_v3(r);
+
+  for (unsigned int i = 0; i < nbr; i++) {
+    madd_v3_v3fl(r, vec_arr[i], factor);
+  }
+}
+
+/**
+ * Specialized function for calculating normals.
+ * fastpath for:
+ *
+ * \code{.c}
+ * add_v3_v3v3(r, a, b);
+ * normalize_v3(r)
+ * mul_v3_fl(r, angle_normalized_v3v3(a, b) / M_PI_2);
+ * \endcode
+ *
+ * We can use the length of (a + b) to calculate the angle.
+ */
+void mid_v3_v3v3_angle_weighted(float r[3], const float a[3], const float b[3])
+{
+  /* trick, we want the middle of 2 normals as well as the angle between them
+   * avoid multiple calculations by */
+  float angle;
+
+  /* double check they are normalized */
+  BLI_ASSERT_UNIT_V3(a);
+  BLI_ASSERT_UNIT_V3(b);
+
+  add_v3_v3v3(r, a, b);
+  angle = ((float)(1.0 / (M_PI / 2.0)) *
+           /* normally we would only multiply by 2,
+            * but instead of an angle make this 0-1 factor */
+           2.0f) *
+          acosf(normalize_v3(r) / 2.0f);
+  mul_v3_fl(r, angle);
+}
+/**
+ * Same as mid_v3_v3v3_angle_weighted
+ * but \a r is assumed to be accumulated normals, divided by their total.
+ */
+void mid_v3_angle_weighted(float r[3])
+{
+  /* trick, we want the middle of 2 normals as well as the angle between them
+   * avoid multiple calculations by */
+  float angle;
+
+  /* double check they are normalized */
+  BLI_assert(len_squared_v3(r) <= 1.0f + FLT_EPSILON);
+
+  angle = ((float)(1.0 / (M_PI / 2.0)) *
+           /* normally we would only multiply by 2,
+            * but instead of an angle make this 0-1 factor */
+           2.0f) *
+          acosf(normalize_v3(r));
+  mul_v3_fl(r, angle);
+}
+
+/**
+ * Equivalent to:
+ * interp_v3_v3v3(v, v1, v2, -1.0f);
+ */
+
+void flip_v4_v4v4(float v[4], const float v1[4], const float v2[4])
+{
+  v[0] = v1[0] + (v1[0] - v2[0]);
+  v[1] = v1[1] + (v1[1] - v2[1]);
+  v[2] = v1[2] + (v1[2] - v2[2]);
+  v[3] = v1[3] + (v1[3] - v2[3]);
+}
+
+void flip_v3_v3v3(float v[3], const float v1[3], const float v2[3])
+{
+  v[0] = v1[0] + (v1[0] - v2[0]);
+  v[1] = v1[1] + (v1[1] - v2[1]);
+  v[2] = v1[2] + (v1[2] - v2[2]);
+}
+
+void flip_v2_v2v2(float v[2], const float v1[2], const float v2[2])
+{
+  v[0] = v1[0] + (v1[0] - v2[0]);
+  v[1] = v1[1] + (v1[1] - v2[1]);
+}
+
+/********************************* Comparison ********************************/
+
+bool is_finite_v2(const float v[2])
+{
+  return (isfinite(v[0]) && isfinite(v[1]));
+}
+
+bool is_finite_v3(const float v[3])
+{
+  return (isfinite(v[0]) && isfinite(v[1]) && isfinite(v[2]));
+}
+
+bool is_finite_v4(const float v[4])
+{
+  return (isfinite(v[0]) && isfinite(v[1]) && isfinite(v[2]) && isfinite(v[3]));
+}
+
+/********************************** Angles ***********************************/
+
+/* Return the angle in radians between vecs 1-2 and 2-3 in radians
+ * If v1 is a shoulder, v2 is the elbow and v3 is the hand,
+ * this would return the angle at the elbow.
+ *
+ * note that when v1/v2/v3 represent 3 points along a straight line
+ * that the angle returned will be pi (180deg), rather then 0.0
+ */
+float angle_v3v3v3(const float v1[3], const float v2[3], const float v3[3])
+{
+  float vec1[3], vec2[3];
+
+  sub_v3_v3v3(vec1, v2, v1);
+  sub_v3_v3v3(vec2, v2, v3);
+  normalize_v3(vec1);
+  normalize_v3(vec2);
+
+  return angle_normalized_v3v3(vec1, vec2);
+}
+
+/* Quicker than full angle computation */
+float cos_v3v3v3(const float p1[3], const float p2[3], const float p3[3])
+{
+  float vec1[3], vec2[3];
+
+  sub_v3_v3v3(vec1, p2, p1);
+  sub_v3_v3v3(vec2, p2, p3);
+  normalize_v3(vec1);
+  normalize_v3(vec2);
+
+  return dot_v3v3(vec1, vec2);
+}
+
+/* Return the shortest angle in radians between the 2 vectors */
+float angle_v3v3(const float v1[3], const float v2[3])
+{
+  float vec1[3], vec2[3];
+
+  normalize_v3_v3(vec1, v1);
+  normalize_v3_v3(vec2, v2);
+
+  return angle_normalized_v3v3(vec1, vec2);
+}
+
+float angle_v2v2v2(const float v1[2], const float v2[2], const float v3[2])
+{
+  float vec1[2], vec2[2];
+
+  vec1[0] = v2[0] - v1[0];
+  vec1[1] = v2[1] - v1[1];
+
+  vec2[0] = v2[0] - v3[0];
+  vec2[1] = v2[1] - v3[1];
+
+  normalize_v2(vec1);
+  normalize_v2(vec2);
+
+  return angle_normalized_v2v2(vec1, vec2);
+}
+
+/* Quicker than full angle computation */
+float cos_v2v2v2(const float p1[2], const float p2[2], const float p3[2])
+{
+  float vec1[2], vec2[2];
+
+  sub_v2_v2v2(vec1, p2, p1);
+  sub_v2_v2v2(vec2, p2, p3);
+  normalize_v2(vec1);
+  normalize_v2(vec2);
+
+  return dot_v2v2(vec1, vec2);
+}
+
+/* Return the shortest angle in radians between the 2 vectors */
+float angle_v2v2(const float v1[2], const float v2[2])
+{
+  float vec1[2], vec2[2];
+
+  vec1[0] = v1[0];
+  vec1[1] = v1[1];
+
+  vec2[0] = v2[0];
+  vec2[1] = v2[1];
+
+  normalize_v2(vec1);
+  normalize_v2(vec2);
+
+  return angle_normalized_v2v2(vec1, vec2);
+}
+
+float angle_signed_v2v2(const float v1[2], const float v2[2])
+{
+  const float perp_dot = (v1[1] * v2[0]) - (v1[0] * v2[1]);
+  return atan2f(perp_dot, dot_v2v2(v1, v2));
+}
+
+float angle_normalized_v3v3(const float v1[3], const float v2[3])
+{
+  /* double check they are normalized */
+  BLI_ASSERT_UNIT_V3(v1);
+  BLI_ASSERT_UNIT_V3(v2);
+
+  /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */
+  if (dot_v3v3(v1, v2) >= 0.0f) {
+    return 2.0f * saasin(len_v3v3(v1, v2) / 2.0f);
+  }
+  else {
+    float v2_n[3];
+    negate_v3_v3(v2_n, v2);
+    return (float)M_PI - 2.0f * saasin(len_v3v3(v1, v2_n) / 2.0f);
+  }
+}
+
+float angle_normalized_v2v2(const float v1[2], const float v2[2])
+{
+  /* double check they are normalized */
+  BLI_ASSERT_UNIT_V2(v1);
+  BLI_ASSERT_UNIT_V2(v2);
+
+  /* this is the same as acos(dot_v3v3(v1, v2)), but more accurate */
+  if (dot_v2v2(v1, v2) >= 0.0f) {
+    return 2.0f * saasin(len_v2v2(v1, v2) / 2.0f);
+  }
+  else {
+    float v2_n[2];
+    negate_v2_v2(v2_n, v2);
+    return (float)M_PI - 2.0f * saasin(len_v2v2(v1, v2_n) / 2.0f);
+  }
+}
+
+/**
+ * Angle between 2 vectors, about an axis (axis can be considered a plane).
+ */
+float angle_on_axis_v3v3_v3(const float v1[3], const float v2[3], const float axis[3])
+{
+  float v1_proj[3], v2_proj[3];
+
+  /* project the vectors onto the axis */
+  project_plane_normalized_v3_v3v3(v1_proj, v1, axis);
+  project_plane_normalized_v3_v3v3(v2_proj, v2, axis);
+
+  return angle_v3v3(v1_proj, v2_proj);
+}
+
+float angle_signed_on_axis_v3v3_v3(const float v1[3], const float v2[3], const float axis[3])
+{
+  float v1_proj[3], v2_proj[3], tproj[3];
+  float angle;
+
+  /* project the vectors onto the axis */
+  project_plane_normalized_v3_v3v3(v1_proj, v1, axis);
+  project_plane_normalized_v3_v3v3(v2_proj, v2, axis);
+
+  angle = angle_v3v3(v1_proj, v2_proj);
+
+  /* calculate the sign (reuse 'tproj') */
+  cross_v3_v3v3(tproj, v2_proj, v1_proj);
+  if (dot_v3v3(tproj, axis) < 0.0f) {
+    angle = ((float)(M_PI * 2.0)) - angle;
+  }
+
+  return angle;
+}
+
+/**
+ * Angle between 2 vectors defined by 3 coords, about an axis (axis can be considered a plane).
+ */
+float angle_on_axis_v3v3v3_v3(const float v1[3],
+                              const float v2[3],
+                              const float v3[3],
+                              const float axis[3])
+{
+  float vec1[3], vec2[3];
+
+  sub_v3_v3v3(vec1, v1, v2);
+  sub_v3_v3v3(vec2, v3, v2);
+
+  return angle_on_axis_v3v3_v3(vec1, vec2, axis);
+}
+
+float angle_signed_on_axis_v3v3v3_v3(const float v1[3],
+                                     const float v2[3],
+                                     const float v3[3],
+                                     const float axis[3])
+{
+  float vec1[3], vec2[3];
+
+  sub_v3_v3v3(vec1, v1, v2);
+  sub_v3_v3v3(vec2, v3, v2);
+
+  return angle_signed_on_axis_v3v3_v3(vec1, vec2, axis);
+}
+
+void angle_tri_v3(float angles[3], const float v1[3], const float v2[3], const float v3[3])
+{
+  float ed1[3], ed2[3], ed3[3];
+
+  sub_v3_v3v3(ed1, v3, v1);
+  sub_v3_v3v3(ed2, v1, v2);
+  sub_v3_v3v3(ed3, v2, v3);
+
+  normalize_v3(ed1);
+  normalize_v3(ed2);
+  normalize_v3(ed3);
+
+  angles[0] = (float)M_PI - angle_normalized_v3v3(ed1, ed2);
+  angles[1] = (float)M_PI - angle_normalized_v3v3(ed2, ed3);
+  // face_angles[2] = M_PI - angle_normalized_v3v3(ed3, ed1);
+  angles[2] = (float)M_PI - (angles[0] + angles[1]);
+}
+
+void angle_quad_v3(
+    float angles[4], const float v1[3], const float v2[3], const float v3[3], const float v4[3])
+{
+  float ed1[3], ed2[3], ed3[3], ed4[3];
+
+  sub_v3_v3v3(ed1, v4, v1);
+  sub_v3_v3v3(ed2, v1, v2);
+  sub_v3_v3v3(ed3, v2, v3);
+  sub_v3_v3v3(ed4, v3, v4);
+
+  normalize_v3(ed1);
+  normalize_v3(ed2);
+  normalize_v3(ed3);
+  normalize_v3(ed4);
+
+  angles[0] = (float)M_PI - angle_normalized_v3v3(ed1, ed2);
+  angles[1] = (float)M_PI - angle_normalized_v3v3(ed2, ed3);
+  angles[2] = (float)M_PI - angle_normalized_v3v3(ed3, ed4);
+  angles[3] = (float)M_PI - angle_normalized_v3v3(ed4, ed1);
+}
+
+void angle_poly_v3(float *angles, const float *verts[3], int len)
+{
+  int i;
+  float vec[3][3];
+
+  sub_v3_v3v3(vec[2], verts[len - 1], verts[0]);
+  normalize_v3(vec[2]);
+  for (i = 0; i < len; i++) {
+    sub_v3_v3v3(vec[i % 3], verts[i % len], verts[(i + 1) % len]);
+    normalize_v3(vec[i % 3]);
+    angles[i] = (float)M_PI - angle_normalized_v3v3(vec[(i + 2) % 3], vec[i % 3]);
+  }
+}
+
+/********************************* Geometry **********************************/
+
+/**
+ * Project \a p onto \a v_proj
+ */
+void project_v2_v2v2(float out[2], const float p[2], const float v_proj[2])
+{
+  const float mul = dot_v2v2(p, v_proj) / dot_v2v2(v_proj, v_proj);
+
+  out[0] = mul * v_proj[0];
+  out[1] = mul * v_proj[1];
+}
+
+/**
+ * Project \a p onto \a v_proj
+ */
+void project_v3_v3v3(float out[3], const float p[3], const float v_proj[3])
+{
+  const float mul = dot_v3v3(p, v_proj) / dot_v3v3(v_proj, v_proj);
+
+  out[0] = mul * v_proj[0];
+  out[1] = mul * v_proj[1];
+  out[2] = mul * v_proj[2];
+}
+
+/**
+ * Project \a p onto a unit length \a v_proj
+ */
+void project_v2_v2v2_normalized(float out[2], const float p[2], const float v_proj[2])
+{
+  BLI_ASSERT_UNIT_V2(v_proj);
+  const float mul = dot_v2v2(p, v_proj);
+
+  out[0] = mul * v_proj[0];
+  out[1] = mul * v_proj[1];
+}
+
+/**
+ * Project \a p onto a unit length \a v_proj
+ */
+void project_v3_v3v3_normalized(float out[3], const float p[3], const float v_proj[3])
+{
+  BLI_ASSERT_UNIT_V3(v_proj);
+  const float mul = dot_v3v3(p, v_proj);
+
+  out[0] = mul * v_proj[0];
+  out[1] = mul * v_proj[1];
+  out[2] = mul * v_proj[2];
+}
+
+/**
+ * In this case plane is a 3D vector only (no 4th component).
+ *
+ * Projecting will make \a c a copy of \a v orthogonal to \a v_plane.
+ *
+ * \note If \a v is exactly perpendicular to \a v_plane, \a c will just be a copy of \a v.
+ *
+ * \note This function is a convenience to call:
+ * \code{.c}
+ * project_v3_v3v3(c, v, v_plane);
+ * sub_v3_v3v3(c, v, c);
+ * \endcode
+ */
+void project_plane_v3_v3v3(float out[3], const float p[3], const float v_plane[3])
+{
+  const float mul = dot_v3v3(p, v_plane) / dot_v3v3(v_plane, v_plane);
+
+  out[0] = p[0] - (mul * v_plane[0]);
+  out[1] = p[1] - (mul * v_plane[1]);
+  out[2] = p[2] - (mul * v_plane[2]);
+}
+
+void project_plane_v2_v2v2(float out[2], const float p[2], const float v_plane[2])
+{
+  const float mul = dot_v2v2(p, v_plane) / dot_v2v2(v_plane, v_plane);
+
+  out[0] = p[0] - (mul * v_plane[0]);
+  out[1] = p[1] - (mul * v_plane[1]);
+}
+
+void project_plane_normalized_v3_v3v3(float out[3], const float p[3], const float v_plane[3])
+{
+  BLI_ASSERT_UNIT_V3(v_plane);
+  const float mul = dot_v3v3(p, v_plane);
+
+  out[0] = p[0] - (mul * v_plane[0]);
+  out[1] = p[1] - (mul * v_plane[1]);
+  out[2] = p[2] - (mul * v_plane[2]);
+}
+
+void project_plane_normalized_v2_v2v2(float out[2], const float p[2], const float v_plane[2])
+{
+  BLI_ASSERT_UNIT_V2(v_plane);
+  const float mul = dot_v2v2(p, v_plane);
+
+  out[0] = p[0] - (mul * v_plane[0]);
+  out[1] = p[1] - (mul * v_plane[1]);
+}
+
+/* project a vector on a plane defined by normal and a plane point p */
+void project_v3_plane(float out[3], const float plane_no[3], const float plane_co[3])
+{
+  float vector[3];
+  float mul;
+
+  sub_v3_v3v3(vector, out, plane_co);
+  mul = dot_v3v3(vector, plane_no) / len_squared_v3(plane_no);
+
+  mul_v3_v3fl(vector, plane_no, mul);
+
+  sub_v3_v3(out, vector);
+}
+
+/* Returns a vector bisecting the angle at v2 formed by v1, v2 and v3 */
+void bisect_v3_v3v3v3(float out[3], const float v1[3], const float v2[3], const float v3[3])
+{
+  float d_12[3], d_23[3];
+  sub_v3_v3v3(d_12, v2, v1);
+  sub_v3_v3v3(d_23, v3, v2);
+  normalize_v3(d_12);
+  normalize_v3(d_23);
+  add_v3_v3v3(out, d_12, d_23);
+  normalize_v3(out);
+}
+
+/**
+ * Returns a reflection vector from a vector and a normal vector
+ * reflect = vec - ((2 * dot(vec, mirror)) * mirror).
+ *
+ * <pre>
+ * v
+ * +  ^
+ * \ |
+ *   \|
+ *    + normal: axis of reflection
+ *   /
+ *  /
+ * +
+ * out: result (negate for a 'bounce').
+ * </pre>
+ */
+void reflect_v3_v3v3(float out[3], const float v[3], const float normal[3])
+{
+  const float dot2 = 2.0f * dot_v3v3(v, normal);
+
+  BLI_ASSERT_UNIT_V3(normal);
+
+  out[0] = v[0] - (dot2 * normal[0]);
+  out[1] = v[1] - (dot2 * normal[1]);
+  out[2] = v[2] - (dot2 * normal[2]);
+}
+
+/**
+ * Takes a vector and computes 2 orthogonal directions.
+ *
+ * \note if \a n is n unit length, computed values will be too.
+ */
+void ortho_basis_v3v3_v3(float r_n1[3], float r_n2[3], const float n[3])
+{
+  const float eps = FLT_EPSILON;
+  const float f = len_squared_v2(n);
+
+  if (f > eps) {
+    const float d = 1.0f / sqrtf(f);
+
+    BLI_assert(isfinite(d));
+
+    r_n1[0] = n[1] * d;
+    r_n1[1] = -n[0] * d;
+    r_n1[2] = 0.0f;
+    r_n2[0] = -n[2] * r_n1[1];
+    r_n2[1] = n[2] * r_n1[0];
+    r_n2[2] = n[0] * r_n1[1] - n[1] * r_n1[0];
+  }
+  else {
+    /* degenerate case */
+    r_n1[0] = (n[2] < 0.0f) ? -1.0f : 1.0f;
+    r_n1[1] = r_n1[2] = r_n2[0] = r_n2[2] = 0.0f;
+    r_n2[1] = 1.0f;
+  }
+}
+
+/**
+ * Calculates \a p - a perpendicular vector to \a v
+ *
+ * \note return vector won't maintain same length.
+ */
+
+//int axis_dominant_v3_single(const float vec[3])
+//{
+//	const float x = fabsf(vec[0]);
+//	const float y = fabsf(vec[1]);
+//	const float z = fabsf(vec[2]);
+//	return ((x > y) ? ((x > z) ? 0 : 2) : ((y > z) ? 1 : 2));
+//}
+
+void ortho_v3_v3(float out[3], const float v[3])
+{
+  const int axis = axis_dominant_v3_single(v);
+
+  BLI_assert(out != v);
+
+  switch (axis) {
+    case 0:
+      out[0] = -v[1] - v[2];
+      out[1] = v[0];
+      out[2] = v[0];
+      break;
+    case 1:
+      out[0] = v[1];
+      out[1] = -v[0] - v[2];
+      out[2] = v[1];
+      break;
+    case 2:
+      out[0] = v[2];
+      out[1] = v[2];
+      out[2] = -v[0] - v[1];
+      break;
+  }
+}
+
+/**
+ * no brainer compared to v3, just have for consistency.
+ */
+void ortho_v2_v2(float out[2], const float v[2])
+{
+  BLI_assert(out != v);
+
+  out[0] = -v[1];
+  out[1] = v[0];
+}
+
+/**
+ * Rotate a point \a p by \a angle around origin (0, 0)
+ */
+void rotate_v2_v2fl(float r[2], const float p[2], const float angle)
+{
+  const float co = cosf(angle);
+  const float si = sinf(angle);
+
+  BLI_assert(r != p);
+
+  r[0] = co * p[0] - si * p[1];
+  r[1] = si * p[0] + co * p[1];
+}
+
+/**
+ * Rotate a point \a p by \a angle around an arbitrary unit length \a axis.
+ * http://local.wasp.uwa.edu.au/~pbourke/geometry/
+ */
+void rotate_normalized_v3_v3v3fl(float out[3],
+                                 const float p[3],
+                                 const float axis[3],
+                                 const float angle)
+{
+  const float costheta = cosf(angle);
+  const float sintheta = sinf(angle);
+
+  /* double check they are normalized */
+  BLI_ASSERT_UNIT_V3(axis);
+
+  out[0] = ((costheta + (1 - costheta) * axis[0] * axis[0]) * p[0]) +
+           (((1 - costheta) * axis[0] * axis[1] - axis[2] * sintheta) * p[1]) +
+           (((1 - costheta) * axis[0] * axis[2] + axis[1] * sintheta) * p[2]);
+
+  out[1] = (((1 - costheta) * axis[0] * axis[1] + axis[2] * sintheta) * p[0]) +
+           ((costheta + (1 - costheta) * axis[1] * axis[1]) * p[1]) +
+           (((1 - costheta) * axis[1] * axis[2] - axis[0] * sintheta) * p[2]);
+
+  out[2] = (((1 - costheta) * axis[0] * axis[2] - axis[1] * sintheta) * p[0]) +
+           (((1 - costheta) * axis[1] * axis[2] + axis[0] * sintheta) * p[1]) +
+           ((costheta + (1 - costheta) * axis[2] * axis[2]) * p[2]);
+}
+
+void rotate_v3_v3v3fl(float r[3], const float p[3], const float axis[3], const float angle)
+{
+  BLI_assert(r != p);
+
+  float axis_n[3];
+
+  normalize_v3_v3(axis_n, axis);
+
+  rotate_normalized_v3_v3v3fl(r, p, axis_n, angle);
+}
+
+/*********************************** Other ***********************************/
+
+void print_v2(const char *str, const float v[2])
+{
+  printf("%s: %.8f %.8f\n", str, v[0], v[1]);
+}
+
+void print_v3(const char *str, const float v[3])
+{
+  printf("%s: %.8f %.8f %.8f\n", str, v[0], v[1], v[2]);
+}
+
+void print_v4(const char *str, const float v[4])
+{
+  printf("%s: %.8f %.8f %.8f %.8f\n", str, v[0], v[1], v[2], v[3]);
+}
+
+void print_vn(const char *str, const float v[], const int n)
+{
+  int i = 0;
+  printf("%s[%d]:", str, n);
+  while (i < n) {
+    printf(" %.8f", v[i++]);
+  }
+  printf("\n");
+}
+
+void minmax_v3v3_v3(float min[3], float max[3], const float vec[3])
+{
+  if (min[0] > vec[0]) {
+    min[0] = vec[0];
+  }
+  if (min[1] > vec[1]) {
+    min[1] = vec[1];
+  }
+  if (min[2] > vec[2]) {
+    min[2] = vec[2];
+  }
+
+  if (max[0] < vec[0]) {
+    max[0] = vec[0];
+  }
+  if (max[1] < vec[1]) {
+    max[1] = vec[1];
+  }
+  if (max[2] < vec[2]) {
+    max[2] = vec[2];
+  }
+}
+
+void minmax_v2v2_v2(float min[2], float max[2], const float vec[2])
+{
+  if (min[0] > vec[0]) {
+    min[0] = vec[0];
+  }
+  if (min[1] > vec[1]) {
+    min[1] = vec[1];
+  }
+
+  if (max[0] < vec[0]) {
+    max[0] = vec[0];
+  }
+  if (max[1] < vec[1]) {
+    max[1] = vec[1];
+  }
+}
+
+void minmax_v3v3_v3_array(float r_min[3], float r_max[3], const float (*vec_arr)[3], int nbr)
+{
+  while (nbr--) {
+    minmax_v3v3_v3(r_min, r_max, *vec_arr++);
+  }
+}
+
+/** ensure \a v1 is \a dist from \a v2 */
+void dist_ensure_v3_v3fl(float v1[3], const float v2[3], const float dist)
+{
+  if (!equals_v3v3(v2, v1)) {
+    float nor[3];
+
+    sub_v3_v3v3(nor, v1, v2);
+    normalize_v3(nor);
+    madd_v3_v3v3fl(v1, v2, nor, dist);
+  }
+}
+
+void dist_ensure_v2_v2fl(float v1[2], const float v2[2], const float dist)
+{
+  if (!equals_v2v2(v2, v1)) {
+    float nor[2];
+
+    sub_v2_v2v2(nor, v1, v2);
+    normalize_v2(nor);
+    madd_v2_v2v2fl(v1, v2, nor, dist);
+  }
+}
+
+void axis_sort_v3(const float axis_values[3], int r_axis_order[3])
+{
+  float v[3];
+  copy_v3_v3(v, axis_values);
+
+#define SWAP_AXIS(a, b) \
+  { \
+    SWAP(float, v[a], v[b]); \
+    SWAP(int, r_axis_order[a], r_axis_order[b]); \
+  } \
+  (void)0
+
+  if (v[0] < v[1]) {
+    if (v[2] < v[0]) {
+      SWAP_AXIS(0, 2);
+    }
+  }
+  else {
+    if (v[1] < v[2]) {
+      SWAP_AXIS(0, 1);
+    }
+    else {
+      SWAP_AXIS(0, 2);
+    }
+  }
+  if (v[2] < v[1]) {
+    SWAP_AXIS(1, 2);
+  }
+
+#undef SWAP_AXIS
+}
+
+/***************************** Array Functions *******************************/
+
+MINLINE double sqr_db(double f)
+{
+  return f * f;
+}
+
+double dot_vn_vn(const float *array_src_a, const float *array_src_b, const int size)
+{
+  double d = 0.0f;
+  const float *array_pt_a = array_src_a + (size - 1);
+  const float *array_pt_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    d += (double)(*(array_pt_a--) * *(array_pt_b--));
+  }
+  return d;
+}
+
+double len_squared_vn(const float *array, const int size)
+{
+  double d = 0.0f;
+  const float *array_pt = array + (size - 1);
+  int i = size;
+  while (i--) {
+    d += sqr_db((double)(*(array_pt--)));
+  }
+  return d;
+}
+
+float normalize_vn_vn(float *array_tar, const float *array_src, const int size)
+{
+  const double d = len_squared_vn(array_src, size);
+  float d_sqrt;
+  if (d > 1.0e-35) {
+    d_sqrt = (float)sqrt(d);
+    mul_vn_vn_fl(array_tar, array_src, size, 1.0f / d_sqrt);
+  }
+  else {
+    copy_vn_fl(array_tar, size, 0.0f);
+    d_sqrt = 0.0f;
+  }
+  return d_sqrt;
+}
+
+float normalize_vn(float *array_tar, const int size)
+{
+  return normalize_vn_vn(array_tar, array_tar, size);
+}
+
+void range_vn_i(int *array_tar, const int size, const int start)
+{
+  int *array_pt = array_tar + (size - 1);
+  int j = start + (size - 1);
+  int i = size;
+  while (i--) {
+    *(array_pt--) = j--;
+  }
+}
+
+void range_vn_u(unsigned int *array_tar, const int size, const unsigned int start)
+{
+  unsigned int *array_pt = array_tar + (size - 1);
+  unsigned int j = start + (unsigned int)(size - 1);
+  int i = size;
+  while (i--) {
+    *(array_pt--) = j--;
+  }
+}
+
+void range_vn_fl(float *array_tar, const int size, const float start, const float step)
+{
+  float *array_pt = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(array_pt--) = start + step * (float)(i);
+  }
+}
+
+void negate_vn(float *array_tar, const int size)
+{
+  float *array_pt = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(array_pt--) *= -1.0f;
+  }
+}
+
+void negate_vn_vn(float *array_tar, const float *array_src, const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = -*(src--);
+  }
+}
+
+void mul_vn_vn(float *array_tar, const float *array_src, const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) *= *(src--);
+  }
+}
+
+void mul_vn_vnvn(float *array_tar,
+                 const float *array_src_a,
+                 const float *array_src_b,
+                 const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src_a = array_src_a + (size - 1);
+  const float *src_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src_a--) * *(src_b--);
+  }
+}
+
+void mul_vn_fl(float *array_tar, const int size, const float f)
+{
+  float *array_pt = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(array_pt--) *= f;
+  }
+}
+
+void mul_vn_vn_fl(float *array_tar, const float *array_src, const int size, const float f)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src--) * f;
+  }
+}
+
+void add_vn_vn(float *array_tar, const float *array_src, const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) += *(src--);
+  }
+}
+
+void add_vn_vnvn(float *array_tar,
+                 const float *array_src_a,
+                 const float *array_src_b,
+                 const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src_a = array_src_a + (size - 1);
+  const float *src_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src_a--) + *(src_b--);
+  }
+}
+
+void madd_vn_vn(float *array_tar, const float *array_src, const float f, const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) += *(src--) * f;
+  }
+}
+
+void madd_vn_vnvn(float *array_tar,
+                  const float *array_src_a,
+                  const float *array_src_b,
+                  const float f,
+                  const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src_a = array_src_a + (size - 1);
+  const float *src_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src_a--) + (*(src_b--) * f);
+  }
+}
+
+void sub_vn_vn(float *array_tar, const float *array_src, const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) -= *(src--);
+  }
+}
+
+void sub_vn_vnvn(float *array_tar,
+                 const float *array_src_a,
+                 const float *array_src_b,
+                 const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src_a = array_src_a + (size - 1);
+  const float *src_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src_a--) - *(src_b--);
+  }
+}
+
+void msub_vn_vn(float *array_tar, const float *array_src, const float f, const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) -= *(src--) * f;
+  }
+}
+
+void msub_vn_vnvn(float *array_tar,
+                  const float *array_src_a,
+                  const float *array_src_b,
+                  const float f,
+                  const int size)
+{
+  float *tar = array_tar + (size - 1);
+  const float *src_a = array_src_a + (size - 1);
+  const float *src_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src_a--) - (*(src_b--) * f);
+  }
+}
+
+void interp_vn_vn(float *array_tar, const float *array_src, const float t, const int size)
+{
+  const float s = 1.0f - t;
+  float *tar = array_tar + (size - 1);
+  const float *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar) = (s * *(tar)) + (t * *(src));
+    tar--;
+    src--;
+  }
+}
+
+void copy_vn_i(int *array_tar, const int size, const int val)
+{
+  int *tar = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = val;
+  }
+}
+
+void copy_vn_short(short *array_tar, const int size, const short val)
+{
+  short *tar = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = val;
+  }
+}
+
+void copy_vn_ushort(unsigned short *array_tar, const int size, const unsigned short val)
+{
+  unsigned short *tar = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = val;
+  }
+}
+
+void copy_vn_uchar(unsigned char *array_tar, const int size, const unsigned char val)
+{
+  unsigned char *tar = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = val;
+  }
+}
+
+void copy_vn_fl(float *array_tar, const int size, const float val)
+{
+  float *tar = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = val;
+  }
+}
+
+/** \name Double precision versions 'db'.
+ * \{ */
+
+void add_vn_vn_d(double *array_tar, const double *array_src, const int size)
+{
+  double *tar = array_tar + (size - 1);
+  const double *src = array_src + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) += *(src--);
+  }
+}
+
+void add_vn_vnvn_d(double *array_tar,
+                   const double *array_src_a,
+                   const double *array_src_b,
+                   const int size)
+{
+  double *tar = array_tar + (size - 1);
+  const double *src_a = array_src_a + (size - 1);
+  const double *src_b = array_src_b + (size - 1);
+  int i = size;
+  while (i--) {
+    *(tar--) = *(src_a--) + *(src_b--);
+  }
+}
+
+void mul_vn_db(double *array_tar, const int size, const double f)
+{
+  double *array_pt = array_tar + (size - 1);
+  int i = size;
+  while (i--) {
+    *(array_pt--) *= f;
+  }
+}
+
+/** \} */
diff --git a/client/blenlib/math_vector_inline.cpp b/client/blenlib/math_vector_inline.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..865c2f5dc254af2e359f67906170e0319b49f2b3
--- /dev/null
+++ b/client/blenlib/math_vector_inline.cpp
@@ -0,0 +1,1228 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2001-2002 by NaN Holding BV.
+ * All rights reserved.
+ *
+ * The Original Code is: some of this file.
+ *
+ * */
+
+/** \file
+ * \ingroup bli
+ */
+
+#ifndef __MATH_VECTOR_INLINE_C__
+#define __MATH_VECTOR_INLINE_C__
+
+#include "BLI_math.h"
+
+/********************************** Init *************************************/
+
+MINLINE void zero_v2(float r[2])
+{
+  r[0] = 0.0f;
+  r[1] = 0.0f;
+}
+
+MINLINE void zero_v3(float r[3])
+{
+  r[0] = 0.0f;
+  r[1] = 0.0f;
+  r[2] = 0.0f;
+}
+
+MINLINE void zero_v4(float r[4])
+{
+  r[0] = 0.0f;
+  r[1] = 0.0f;
+  r[2] = 0.0f;
+  r[3] = 0.0f;
+}
+
+MINLINE void copy_v2_v2(float r[2], const float a[2])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+}
+
+MINLINE void copy_v3_v3(float r[3], const float a[3])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+}
+
+MINLINE void copy_v3fl_v3s(float r[3], const short a[3])
+{
+  r[0] = (float)a[0];
+  r[1] = (float)a[1];
+  r[2] = (float)a[2];
+}
+
+MINLINE void copy_v4_v4(float r[4], const float a[4])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+  r[3] = a[3];
+}
+
+MINLINE void copy_v2_fl(float r[2], float f)
+{
+  r[0] = f;
+  r[1] = f;
+}
+
+MINLINE void copy_v3_fl(float r[3], float f)
+{
+  r[0] = f;
+  r[1] = f;
+  r[2] = f;
+}
+
+MINLINE void copy_v4_fl(float r[4], float f)
+{
+  r[0] = f;
+  r[1] = f;
+  r[2] = f;
+  r[3] = f;
+}
+
+/* unsigned char */
+MINLINE void copy_v2_v2_uchar(unsigned char r[2], const unsigned char a[2])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+}
+
+MINLINE void copy_v3_v3_uchar(unsigned char r[3], const unsigned char a[3])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+}
+
+MINLINE void copy_v4_v4_uchar(unsigned char r[4], const unsigned char a[4])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+  r[3] = a[3];
+}
+
+/* char */
+MINLINE void copy_v2_v2_char(char r[2], const char a[2])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+}
+
+MINLINE void copy_v3_v3_char(char r[3], const char a[3])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+}
+
+MINLINE void copy_v4_v4_char(char r[4], const char a[4])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+  r[3] = a[3];
+}
+
+/* short */
+
+MINLINE void copy_v2_v2_short(short r[2], const short a[2])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+}
+
+MINLINE void copy_v3_v3_short(short r[3], const short a[3])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+}
+
+MINLINE void copy_v4_v4_short(short r[4], const short a[4])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+  r[3] = a[3];
+}
+
+/* int */
+MINLINE void zero_v3_int(int r[3])
+{
+  r[0] = 0;
+  r[1] = 0;
+  r[2] = 0;
+}
+
+MINLINE void copy_v2_v2_int(int r[2], const int a[2])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+}
+
+MINLINE void copy_v3_v3_int(int r[3], const int a[3])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+}
+
+MINLINE void copy_v4_v4_int(int r[4], const int a[4])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+  r[3] = a[3];
+}
+
+/* double */
+MINLINE void zero_v3_db(double r[3])
+{
+  r[0] = 0.0;
+  r[1] = 0.0;
+  r[2] = 0.0;
+}
+
+MINLINE void copy_v2_v2_db(double r[2], const double a[2])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+}
+
+MINLINE void copy_v3_v3_db(double r[3], const double a[3])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+}
+
+MINLINE void copy_v4_v4_db(double r[4], const double a[4])
+{
+  r[0] = a[0];
+  r[1] = a[1];
+  r[2] = a[2];
+  r[3] = a[3];
+}
+
+/* int <-> float */
+MINLINE void round_v2i_v2fl(int r[2], const float a[2])
+{
+  r[0] = (int)roundf(a[0]);
+  r[1] = (int)roundf(a[1]);
+}
+
+MINLINE void copy_v2fl_v2i(float r[2], const int a[2])
+{
+  r[0] = (float)a[0];
+  r[1] = (float)a[1];
+}
+
+/* double -> float */
+MINLINE void copy_v2fl_v2db(float r[2], const double a[2])
+{
+  r[0] = (float)a[0];
+  r[1] = (float)a[1];
+}
+
+MINLINE void copy_v3fl_v3db(float r[3], const double a[3])
+{
+  r[0] = (float)a[0];
+  r[1] = (float)a[1];
+  r[2] = (float)a[2];
+}
+
+MINLINE void copy_v4fl_v4db(float r[4], const double a[4])
+{
+  r[0] = (float)a[0];
+  r[1] = (float)a[1];
+  r[2] = (float)a[2];
+  r[3] = (float)a[3];
+}
+
+/* float -> double */
+MINLINE void copy_v2db_v2fl(double r[2], const float a[2])
+{
+  r[0] = (double)a[0];
+  r[1] = (double)a[1];
+}
+
+MINLINE void copy_v3db_v3fl(double r[3], const float a[3])
+{
+  r[0] = (double)a[0];
+  r[1] = (double)a[1];
+  r[2] = (double)a[2];
+}
+
+MINLINE void copy_v4db_v4fl(double r[4], const float a[4])
+{
+  r[0] = (double)a[0];
+  r[1] = (double)a[1];
+  r[2] = (double)a[2];
+  r[3] = (double)a[3];
+}
+
+MINLINE void swap_v2_v2(float a[2], float b[2])
+{
+  SWAP(float, a[0], b[0]);
+  SWAP(float, a[1], b[1]);
+}
+
+MINLINE void swap_v3_v3(float a[3], float b[3])
+{
+  SWAP(float, a[0], b[0]);
+  SWAP(float, a[1], b[1]);
+  SWAP(float, a[2], b[2]);
+}
+
+MINLINE void swap_v4_v4(float a[4], float b[4])
+{
+  SWAP(float, a[0], b[0]);
+  SWAP(float, a[1], b[1]);
+  SWAP(float, a[2], b[2]);
+  SWAP(float, a[3], b[3]);
+}
+
+/* float args -> vec */
+MINLINE void copy_v2_fl2(float v[2], float x, float y)
+{
+  v[0] = x;
+  v[1] = y;
+}
+
+MINLINE void copy_v3_fl3(float v[3], float x, float y, float z)
+{
+  v[0] = x;
+  v[1] = y;
+  v[2] = z;
+}
+
+MINLINE void copy_v4_fl4(float v[4], float x, float y, float z, float w)
+{
+  v[0] = x;
+  v[1] = y;
+  v[2] = z;
+  v[3] = w;
+}
+
+/********************************* Arithmetic ********************************/
+
+MINLINE void add_v2_fl(float r[2], float f)
+{
+  r[0] += f;
+  r[1] += f;
+}
+
+MINLINE void add_v3_fl(float r[3], float f)
+{
+  r[0] += f;
+  r[1] += f;
+  r[2] += f;
+}
+
+MINLINE void add_v4_fl(float r[4], float f)
+{
+  r[0] += f;
+  r[1] += f;
+  r[2] += f;
+  r[3] += f;
+}
+
+MINLINE void add_v2_v2(float r[2], const float a[2])
+{
+  r[0] += a[0];
+  r[1] += a[1];
+}
+
+MINLINE void add_v2_v2v2(float r[2], const float a[2], const float b[2])
+{
+  r[0] = a[0] + b[0];
+  r[1] = a[1] + b[1];
+}
+
+MINLINE void add_v2_v2v2_int(int r[2], const int a[2], const int b[2])
+{
+  r[0] = a[0] + b[0];
+  r[1] = a[1] + b[1];
+}
+
+MINLINE void add_v3_v3(float r[3], const float a[3])
+{
+  r[0] += a[0];
+  r[1] += a[1];
+  r[2] += a[2];
+}
+
+MINLINE void add_v3_v3v3(float r[3], const float a[3], const float b[3])
+{
+  r[0] = a[0] + b[0];
+  r[1] = a[1] + b[1];
+  r[2] = a[2] + b[2];
+}
+
+MINLINE void add_v3fl_v3fl_v3i(float r[3], const float a[3], const int b[3])
+{
+  r[0] = a[0] + (float)b[0];
+  r[1] = a[1] + (float)b[1];
+  r[2] = a[2] + (float)b[2];
+}
+
+MINLINE void add_v3fl_v3fl_v3s(float r[3], const float a[3], const short b[3])
+{
+  r[0] = a[0] + (float)b[0];
+  r[1] = a[1] + (float)b[1];
+  r[2] = a[2] + (float)b[2];
+}
+
+MINLINE void add_v4_v4(float r[4], const float a[4])
+{
+  r[0] += a[0];
+  r[1] += a[1];
+  r[2] += a[2];
+  r[3] += a[3];
+}
+
+MINLINE void add_v4_v4v4(float r[4], const float a[4], const float b[4])
+{
+  r[0] = a[0] + b[0];
+  r[1] = a[1] + b[1];
+  r[2] = a[2] + b[2];
+  r[3] = a[3] + b[3];
+}
+
+MINLINE void sub_v2_v2(float r[2], const float a[2])
+{
+  r[0] -= a[0];
+  r[1] -= a[1];
+}
+
+MINLINE void sub_v2_v2v2(float r[2], const float a[2], const float b[2])
+{
+  r[0] = a[0] - b[0];
+  r[1] = a[1] - b[1];
+}
+
+MINLINE void sub_v2_v2v2_int(int r[2], const int a[2], const int b[2])
+{
+  r[0] = a[0] - b[0];
+  r[1] = a[1] - b[1];
+}
+
+MINLINE void sub_v3_v3(float r[3], const float a[3])
+{
+  r[0] -= a[0];
+  r[1] -= a[1];
+  r[2] -= a[2];
+}
+
+MINLINE void sub_v3_v3v3(float r[3], const float a[3], const float b[3])
+{
+  r[0] = a[0] - b[0];
+  r[1] = a[1] - b[1];
+  r[2] = a[2] - b[2];
+}
+
+MINLINE void sub_v3_v3v3_int(int r[3], const int a[3], const int b[3])
+{
+  r[0] = a[0] - b[0];
+  r[1] = a[1] - b[1];
+  r[2] = a[2] - b[2];
+}
+
+MINLINE void sub_v3db_v3fl_v3fl(double r[3], const float a[3], const float b[3])
+{
+  r[0] = (double)a[0] - (double)b[0];
+  r[1] = (double)a[1] - (double)b[1];
+  r[2] = (double)a[2] - (double)b[2];
+}
+
+MINLINE void sub_v4_v4(float r[4], const float a[4])
+{
+  r[0] -= a[0];
+  r[1] -= a[1];
+  r[2] -= a[2];
+  r[3] -= a[3];
+}
+
+MINLINE void sub_v4_v4v4(float r[4], const float a[4], const float b[4])
+{
+  r[0] = a[0] - b[0];
+  r[1] = a[1] - b[1];
+  r[2] = a[2] - b[2];
+  r[3] = a[3] - b[3];
+}
+
+MINLINE void mul_v2_fl(float r[2], float f)
+{
+  r[0] *= f;
+  r[1] *= f;
+}
+
+MINLINE void mul_v2_v2fl(float r[2], const float a[2], float f)
+{
+  r[0] = a[0] * f;
+  r[1] = a[1] * f;
+}
+
+MINLINE void mul_v3_fl(float r[3], float f)
+{
+  r[0] *= f;
+  r[1] *= f;
+  r[2] *= f;
+}
+
+MINLINE void mul_v3_v3fl(float r[3], const float a[3], float f)
+{
+  r[0] = a[0] * f;
+  r[1] = a[1] * f;
+  r[2] = a[2] * f;
+}
+
+MINLINE void mul_v2_v2(float r[2], const float a[2])
+{
+  r[0] *= a[0];
+  r[1] *= a[1];
+}
+
+MINLINE void mul_v3_v3(float r[3], const float a[3])
+{
+  r[0] *= a[0];
+  r[1] *= a[1];
+  r[2] *= a[2];
+}
+
+MINLINE void mul_v4_fl(float r[4], float f)
+{
+  r[0] *= f;
+  r[1] *= f;
+  r[2] *= f;
+  r[3] *= f;
+}
+
+MINLINE void mul_v4_v4(float r[4], const float a[4])
+{
+  r[0] *= a[0];
+  r[1] *= a[1];
+  r[2] *= a[2];
+  r[3] *= a[3];
+}
+
+MINLINE void mul_v4_v4fl(float r[4], const float a[4], float f)
+{
+  r[0] = a[0] * f;
+  r[1] = a[1] * f;
+  r[2] = a[2] * f;
+  r[3] = a[3] * f;
+}
+
+/**
+ * Avoid doing:
+ *
+ * angle = atan2f(dvec[0], dvec[1]);
+ * angle_to_mat2(mat, angle);
+ *
+ * instead use a vector as a matrix.
+ */
+
+MINLINE void mul_v2_v2_cw(float r[2], const float mat[2], const float vec[2])
+{
+  BLI_assert(r != vec);
+
+  r[0] = mat[0] * vec[0] + (+mat[1]) * vec[1];
+  r[1] = mat[1] * vec[0] + (-mat[0]) * vec[1];
+}
+
+MINLINE void mul_v2_v2_ccw(float r[2], const float mat[2], const float vec[2])
+{
+  BLI_assert(r != vec);
+
+  r[0] = mat[0] * vec[0] + (-mat[1]) * vec[1];
+  r[1] = mat[1] * vec[0] + (+mat[0]) * vec[1];
+}
+
+/**
+ * Convenience function to get the projected depth of a position.
+ * This avoids creating a temporary 4D vector and multiplying it - only for the 4th component.
+ *
+ * Matches logic for:
+ *
+ * \code{.c}
+ * float co_4d[4] = {co[0], co[1], co[2], 1.0};
+ * mul_m4_v4(mat, co_4d);
+ * return co_4d[3];
+ * \endcode
+ */
+MINLINE float mul_project_m4_v3_zfac(const float mat[4][4], const float co[3])
+{
+  return (mat[0][3] * co[0]) + (mat[1][3] * co[1]) + (mat[2][3] * co[2]) + mat[3][3];
+}
+
+/**
+ * Has the effect of #mul_m3_v3(), on a single axis.
+ */
+MINLINE float dot_m3_v3_row_x(const float M[3][3], const float a[3])
+{
+  return M[0][0] * a[0] + M[1][0] * a[1] + M[2][0] * a[2];
+}
+MINLINE float dot_m3_v3_row_y(const float M[3][3], const float a[3])
+{
+  return M[0][1] * a[0] + M[1][1] * a[1] + M[2][1] * a[2];
+}
+MINLINE float dot_m3_v3_row_z(const float M[3][3], const float a[3])
+{
+  return M[0][2] * a[0] + M[1][2] * a[1] + M[2][2] * a[2];
+}
+
+/**
+ * Has the effect of #mul_mat3_m4_v3(), on a single axis.
+ * (no adding translation)
+ */
+MINLINE float dot_m4_v3_row_x(const float M[4][4], const float a[3])
+{
+  return M[0][0] * a[0] + M[1][0] * a[1] + M[2][0] * a[2];
+}
+MINLINE float dot_m4_v3_row_y(const float M[4][4], const float a[3])
+{
+  return M[0][1] * a[0] + M[1][1] * a[1] + M[2][1] * a[2];
+}
+MINLINE float dot_m4_v3_row_z(const float M[4][4], const float a[3])
+{
+  return M[0][2] * a[0] + M[1][2] * a[1] + M[2][2] * a[2];
+}
+
+MINLINE void madd_v2_v2fl(float r[2], const float a[2], float f)
+{
+  r[0] += a[0] * f;
+  r[1] += a[1] * f;
+}
+
+MINLINE void madd_v3_v3fl(float r[3], const float a[3], float f)
+{
+  r[0] += a[0] * f;
+  r[1] += a[1] * f;
+  r[2] += a[2] * f;
+}
+
+MINLINE void madd_v3_v3v3(float r[3], const float a[3], const float b[3])
+{
+  r[0] += a[0] * b[0];
+  r[1] += a[1] * b[1];
+  r[2] += a[2] * b[2];
+}
+
+MINLINE void madd_v2_v2v2fl(float r[2], const float a[2], const float b[2], float f)
+{
+  r[0] = a[0] + b[0] * f;
+  r[1] = a[1] + b[1] * f;
+}
+
+MINLINE void madd_v3_v3v3fl(float r[3], const float a[3], const float b[3], float f)
+{
+  r[0] = a[0] + b[0] * f;
+  r[1] = a[1] + b[1] * f;
+  r[2] = a[2] + b[2] * f;
+}
+
+MINLINE void madd_v3_v3v3v3(float r[3], const float a[3], const float b[3], const float c[3])
+{
+  r[0] = a[0] + b[0] * c[0];
+  r[1] = a[1] + b[1] * c[1];
+  r[2] = a[2] + b[2] * c[2];
+}
+
+MINLINE void madd_v3fl_v3fl_v3fl_v3i(float r[3],
+                                     const float a[3],
+                                     const float b[3],
+                                     const int c[3])
+{
+  r[0] = a[0] + b[0] * (float)c[0];
+  r[1] = a[1] + b[1] * (float)c[1];
+  r[2] = a[2] + b[2] * (float)c[2];
+}
+
+MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
+{
+  r[0] += a[0] * f;
+  r[1] += a[1] * f;
+  r[2] += a[2] * f;
+  r[3] += a[3] * f;
+}
+
+MINLINE void madd_v4_v4v4(float r[4], const float a[4], const float b[4])
+{
+  r[0] += a[0] * b[0];
+  r[1] += a[1] * b[1];
+  r[2] += a[2] * b[2];
+  r[3] += a[3] * b[3];
+}
+
+MINLINE void mul_v3_v3v3(float r[3], const float v1[3], const float v2[3])
+{
+  r[0] = v1[0] * v2[0];
+  r[1] = v1[1] * v2[1];
+  r[2] = v1[2] * v2[2];
+}
+
+MINLINE void mul_v2_v2v2(float r[2], const float a[2], const float b[2])
+{
+  r[0] = a[0] * b[0];
+  r[1] = a[1] * b[1];
+}
+
+MINLINE void negate_v2(float r[2])
+{
+  r[0] = -r[0];
+  r[1] = -r[1];
+}
+
+MINLINE void negate_v2_v2(float r[2], const float a[2])
+{
+  r[0] = -a[0];
+  r[1] = -a[1];
+}
+
+MINLINE void negate_v3(float r[3])
+{
+  r[0] = -r[0];
+  r[1] = -r[1];
+  r[2] = -r[2];
+}
+
+MINLINE void negate_v3_v3(float r[3], const float a[3])
+{
+  r[0] = -a[0];
+  r[1] = -a[1];
+  r[2] = -a[2];
+}
+
+MINLINE void negate_v4(float r[4])
+{
+  r[0] = -r[0];
+  r[1] = -r[1];
+  r[2] = -r[2];
+  r[3] = -r[3];
+}
+
+MINLINE void negate_v4_v4(float r[4], const float a[4])
+{
+  r[0] = -a[0];
+  r[1] = -a[1];
+  r[2] = -a[2];
+  r[3] = -a[3];
+}
+
+/* could add more... */
+MINLINE void negate_v3_short(short r[3])
+{
+  r[0] = (short)-r[0];
+  r[1] = (short)-r[1];
+  r[2] = (short)-r[2];
+}
+
+MINLINE void negate_v3_db(double r[3])
+{
+  r[0] = -r[0];
+  r[1] = -r[1];
+  r[2] = -r[2];
+}
+
+MINLINE void invert_v2(float r[2])
+{
+  BLI_assert(!ELEM(0.0f, r[0], r[1]));
+  r[0] = 1.0f / r[0];
+  r[1] = 1.0f / r[1];
+}
+
+MINLINE void invert_v3(float r[3])
+{
+  BLI_assert(!ELEM(0.0f, r[0], r[1], r[2]));
+  r[0] = 1.0f / r[0];
+  r[1] = 1.0f / r[1];
+  r[2] = 1.0f / r[2];
+}
+
+MINLINE void abs_v2(float r[2])
+{
+  r[0] = fabsf(r[0]);
+  r[1] = fabsf(r[1]);
+}
+
+MINLINE void abs_v2_v2(float r[2], const float a[2])
+{
+  r[0] = fabsf(a[0]);
+  r[1] = fabsf(a[1]);
+}
+
+MINLINE void abs_v3(float r[3])
+{
+  r[0] = fabsf(r[0]);
+  r[1] = fabsf(r[1]);
+  r[2] = fabsf(r[2]);
+}
+
+MINLINE void abs_v3_v3(float r[3], const float a[3])
+{
+  r[0] = fabsf(a[0]);
+  r[1] = fabsf(a[1]);
+  r[2] = fabsf(a[2]);
+}
+
+MINLINE void abs_v4(float r[4])
+{
+  r[0] = fabsf(r[0]);
+  r[1] = fabsf(r[1]);
+  r[2] = fabsf(r[2]);
+  r[3] = fabsf(r[3]);
+}
+
+MINLINE void abs_v4_v4(float r[4], const float a[4])
+{
+  r[0] = fabsf(a[0]);
+  r[1] = fabsf(a[1]);
+  r[2] = fabsf(a[2]);
+  r[3] = fabsf(a[3]);
+}
+
+MINLINE float dot_v2v2(const float a[2], const float b[2])
+{
+  return a[0] * b[0] + a[1] * b[1];
+}
+
+MINLINE float dot_v3v3(const float a[3], const float b[3])
+{
+  return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+
+MINLINE float dot_v3v3v3(const float p[3], const float a[3], const float b[3])
+{
+  float vec1[3], vec2[3];
+
+  sub_v3_v3v3(vec1, a, p);
+  sub_v3_v3v3(vec2, b, p);
+  if (is_zero_v3(vec1) || is_zero_v3(vec2)) {
+    return 0.0f;
+  }
+  return dot_v3v3(vec1, vec2);
+}
+
+MINLINE float dot_v4v4(const float a[4], const float b[4])
+{
+  return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+}
+
+MINLINE double dot_v3db_v3fl(const double a[3], const float b[3])
+{
+  return a[0] * (double)b[0] + a[1] * (double)b[1] + a[2] * (double)b[2];
+}
+
+MINLINE float cross_v2v2(const float a[2], const float b[2])
+{
+  return a[0] * b[1] - a[1] * b[0];
+}
+
+MINLINE void cross_v3_v3v3(float r[3], const float a[3], const float b[3])
+{
+  BLI_assert(r != a && r != b);
+  r[0] = a[1] * b[2] - a[2] * b[1];
+  r[1] = a[2] * b[0] - a[0] * b[2];
+  r[2] = a[0] * b[1] - a[1] * b[0];
+}
+
+/* cross product suffers from severe precision loss when vectors are
+ * nearly parallel or opposite; doing the computation in double helps a lot */
+MINLINE void cross_v3_v3v3_hi_prec(float r[3], const float a[3], const float b[3])
+{
+  BLI_assert(r != a && r != b);
+  r[0] = (float)((double)a[1] * (double)b[2] - (double)a[2] * (double)b[1]);
+  r[1] = (float)((double)a[2] * (double)b[0] - (double)a[0] * (double)b[2]);
+  r[2] = (float)((double)a[0] * (double)b[1] - (double)a[1] * (double)b[0]);
+}
+
+/* Newell's Method */
+/* excuse this fairly specific function,
+ * its used for polygon normals all over the place
+ * could use a better name */
+MINLINE void add_newell_cross_v3_v3v3(float n[3], const float v_prev[3], const float v_curr[3])
+{
+  n[0] += (v_prev[1] - v_curr[1]) * (v_prev[2] + v_curr[2]);
+  n[1] += (v_prev[2] - v_curr[2]) * (v_prev[0] + v_curr[0]);
+  n[2] += (v_prev[0] - v_curr[0]) * (v_prev[1] + v_curr[1]);
+}
+
+MINLINE void star_m3_v3(float rmat[3][3], float a[3])
+{
+  rmat[0][0] = rmat[1][1] = rmat[2][2] = 0.0;
+  rmat[0][1] = -a[2];
+  rmat[0][2] = a[1];
+  rmat[1][0] = a[2];
+  rmat[1][2] = -a[0];
+  rmat[2][0] = -a[1];
+  rmat[2][1] = a[0];
+}
+
+/*********************************** Length **********************************/
+
+MINLINE float len_squared_v2(const float v[2])
+{
+  return v[0] * v[0] + v[1] * v[1];
+}
+
+MINLINE float len_squared_v3(const float v[3])
+{
+  return v[0] * v[0] + v[1] * v[1] + v[2] * v[2];
+}
+
+MINLINE float len_manhattan_v2(const float v[2])
+{
+  return fabsf(v[0]) + fabsf(v[1]);
+}
+
+MINLINE int len_manhattan_v2_int(const int v[2])
+{
+  return abs(v[0]) + abs(v[1]);
+}
+
+MINLINE float len_manhattan_v3(const float v[3])
+{
+  return fabsf(v[0]) + fabsf(v[1]) + fabsf(v[2]);
+}
+
+MINLINE float len_v2(const float v[2])
+{
+  return sqrtf(v[0] * v[0] + v[1] * v[1]);
+}
+
+MINLINE float len_v2v2(const float v1[2], const float v2[2])
+{
+  float x, y;
+
+  x = v1[0] - v2[0];
+  y = v1[1] - v2[1];
+  return sqrtf(x * x + y * y);
+}
+
+MINLINE float len_v2v2_int(const int v1[2], const int v2[2])
+{
+  float x, y;
+
+  x = (float)(v1[0] - v2[0]);
+  y = (float)(v1[1] - v2[1]);
+  return sqrtf(x * x + y * y);
+}
+
+MINLINE float len_v3(const float a[3])
+{
+  return sqrtf(dot_v3v3(a, a));
+}
+
+MINLINE float len_squared_v2v2(const float a[2], const float b[2])
+{
+  float d[2];
+
+  sub_v2_v2v2(d, b, a);
+  return dot_v2v2(d, d);
+}
+
+MINLINE float len_squared_v3v3(const float a[3], const float b[3])
+{
+  float d[3];
+
+  sub_v3_v3v3(d, b, a);
+  return dot_v3v3(d, d);
+}
+
+MINLINE float len_squared_v4v4(const float a[4], const float b[4])
+{
+  float d[4];
+
+  sub_v4_v4v4(d, b, a);
+  return dot_v4v4(d, d);
+}
+
+MINLINE float len_manhattan_v2v2(const float a[2], const float b[2])
+{
+  float d[2];
+
+  sub_v2_v2v2(d, b, a);
+  return len_manhattan_v2(d);
+}
+
+MINLINE int len_manhattan_v2v2_int(const int a[2], const int b[2])
+{
+  int d[2];
+
+  sub_v2_v2v2_int(d, b, a);
+  return len_manhattan_v2_int(d);
+}
+
+MINLINE float len_manhattan_v3v3(const float a[3], const float b[3])
+{
+  float d[3];
+
+  sub_v3_v3v3(d, b, a);
+  return len_manhattan_v3(d);
+}
+
+MINLINE float len_v3v3(const float a[3], const float b[3])
+{
+  float d[3];
+
+  sub_v3_v3v3(d, b, a);
+  return len_v3(d);
+}
+
+MINLINE float normalize_v2_v2_length(float r[2], const float a[2], const float unit_length)
+{
+  float d = dot_v2v2(a, a);
+
+  if (d > 1.0e-35f) {
+    d = sqrtf(d);
+    mul_v2_v2fl(r, a, unit_length / d);
+  }
+  else {
+    zero_v2(r);
+    d = 0.0f;
+  }
+
+  return d;
+}
+MINLINE float normalize_v2_v2(float r[2], const float a[2])
+{
+  return normalize_v2_v2_length(r, a, 1.0f);
+}
+
+MINLINE float normalize_v2(float n[2])
+{
+  return normalize_v2_v2(n, n);
+}
+
+MINLINE float normalize_v2_length(float n[2], const float unit_length)
+{
+  return normalize_v2_v2_length(n, n, unit_length);
+}
+
+MINLINE float normalize_v3_v3_length(float r[3], const float a[3], const float unit_length)
+{
+  float d = dot_v3v3(a, a);
+
+  /* a larger value causes normalize errors in a
+   * scaled down models with camera extreme close */
+  if (d > 1.0e-35f) {
+    d = sqrtf(d);
+    mul_v3_v3fl(r, a, unit_length / d);
+  }
+  else {
+    zero_v3(r);
+    d = 0.0f;
+  }
+
+  return d;
+}
+MINLINE float normalize_v3_v3(float r[3], const float a[3])
+{
+  return normalize_v3_v3_length(r, a, 1.0f);
+}
+
+MINLINE double normalize_v3_length_d(double n[3], const double unit_length)
+{
+  double d = n[0] * n[0] + n[1] * n[1] + n[2] * n[2];
+
+  /* a larger value causes normalize errors in a
+   * scaled down models with camera extreme close */
+  if (d > 1.0e-35) {
+    double mul;
+
+    d = sqrt(d);
+    mul = unit_length / d;
+
+    n[0] *= mul;
+    n[1] *= mul;
+    n[2] *= mul;
+  }
+  else {
+    n[0] = n[1] = n[2] = 0;
+    d = 0.0;
+  }
+
+  return d;
+}
+MINLINE double normalize_v3_d(double n[3])
+{
+  return normalize_v3_length_d(n, 1.0);
+}
+
+MINLINE float normalize_v3_length(float n[3], const float unit_length)
+{
+  return normalize_v3_v3_length(n, n, unit_length);
+}
+
+MINLINE float normalize_v3(float n[3])
+{
+  return normalize_v3_v3(n, n);
+}
+
+MINLINE void normal_float_to_short_v2(short out[2], const float in[2])
+{
+  out[0] = (short)(in[0] * 32767.0f);
+  out[1] = (short)(in[1] * 32767.0f);
+}
+
+MINLINE void normal_short_to_float_v3(float out[3], const short in[3])
+{
+  out[0] = in[0] * (1.0f / 32767.0f);
+  out[1] = in[1] * (1.0f / 32767.0f);
+  out[2] = in[2] * (1.0f / 32767.0f);
+}
+
+MINLINE void normal_float_to_short_v3(short out[3], const float in[3])
+{
+  out[0] = (short)(in[0] * 32767.0f);
+  out[1] = (short)(in[1] * 32767.0f);
+  out[2] = (short)(in[2] * 32767.0f);
+}
+
+MINLINE void normal_float_to_short_v4(short out[4], const float in[4])
+{
+  out[0] = (short)(in[0] * 32767.0f);
+  out[1] = (short)(in[1] * 32767.0f);
+  out[2] = (short)(in[2] * 32767.0f);
+  out[3] = (short)(in[3] * 32767.0f);
+}
+
+/********************************* Comparison ********************************/
+
+MINLINE bool is_zero_v2(const float v[2])
+{
+  return (v[0] == 0.0f && v[1] == 0.0f);
+}
+
+MINLINE bool is_zero_v3(const float v[3])
+{
+  return (v[0] == 0.0f && v[1] == 0.0f && v[2] == 0.0f);
+}
+
+MINLINE bool is_zero_v4(const float v[4])
+{
+  return (v[0] == 0.0f && v[1] == 0.0f && v[2] == 0.0f && v[3] == 0.0f);
+}
+
+MINLINE bool is_one_v3(const float v[3])
+{
+  return (v[0] == 1.0f && v[1] == 1.0f && v[2] == 1.0f);
+}
+
+/** \name Vector Comparison
+ *
+ * \note use ``value <= limit``, so a limit of zero doesn't fail on an exact match.
+ * \{ */
+
+MINLINE bool equals_v2v2(const float v1[2], const float v2[2])
+{
+  return ((v1[0] == v2[0]) && (v1[1] == v2[1]));
+}
+
+MINLINE bool equals_v3v3(const float v1[3], const float v2[3])
+{
+  return ((v1[0] == v2[0]) && (v1[1] == v2[1]) && (v1[2] == v2[2]));
+}
+
+MINLINE bool equals_v4v4(const float v1[4], const float v2[4])
+{
+  return ((v1[0] == v2[0]) && (v1[1] == v2[1]) && (v1[2] == v2[2]) && (v1[3] == v2[3]));
+}
+
+MINLINE bool compare_v2v2(const float v1[2], const float v2[2], const float limit)
+{
+  return (compare_ff(v1[0], v2[0], limit) && compare_ff(v1[1], v2[1], limit));
+}
+
+MINLINE bool compare_v3v3(const float v1[3], const float v2[3], const float limit)
+{
+  return (compare_ff(v1[0], v2[0], limit) && compare_ff(v1[1], v2[1], limit) &&
+          compare_ff(v1[2], v2[2], limit));
+}
+
+MINLINE bool compare_v4v4(const float v1[4], const float v2[4], const float limit)
+{
+  return (compare_ff(v1[0], v2[0], limit) && compare_ff(v1[1], v2[1], limit) &&
+          compare_ff(v1[2], v2[2], limit) && compare_ff(v1[3], v2[3], limit));
+}
+
+MINLINE bool compare_v2v2_relative(const float v1[2],
+                                   const float v2[2],
+                                   const float limit,
+                                   const int max_ulps)
+{
+  return (compare_ff_relative(v1[0], v2[0], limit, max_ulps) &&
+          compare_ff_relative(v1[1], v2[1], limit, max_ulps));
+}
+
+MINLINE bool compare_v3v3_relative(const float v1[3],
+                                   const float v2[3],
+                                   const float limit,
+                                   const int max_ulps)
+{
+  return (compare_ff_relative(v1[0], v2[0], limit, max_ulps) &&
+          compare_ff_relative(v1[1], v2[1], limit, max_ulps) &&
+          compare_ff_relative(v1[2], v2[2], limit, max_ulps));
+}
+
+MINLINE bool compare_v4v4_relative(const float v1[4],
+                                   const float v2[4],
+                                   const float limit,
+                                   const int max_ulps)
+{
+  return (compare_ff_relative(v1[0], v2[0], limit, max_ulps) &&
+          compare_ff_relative(v1[1], v2[1], limit, max_ulps) &&
+          compare_ff_relative(v1[2], v2[2], limit, max_ulps) &&
+          compare_ff_relative(v1[3], v2[3], limit, max_ulps));
+}
+
+MINLINE bool compare_len_v3v3(const float v1[3], const float v2[3], const float limit)
+{
+  float d[3];
+  sub_v3_v3v3(d, v1, v2);
+  return (dot_v3v3(d, d) <= (limit * limit));
+}
+
+/**
+ * <pre>
+ *        + l1
+ *        |
+ * neg <- | -> pos
+ *        |
+ *        + l2
+ * </pre>
+ *
+ * \return Positive value when 'pt' is left-of-line
+ * (looking from 'l1' -> 'l2').
+ */
+MINLINE float line_point_side_v2(const float l1[2], const float l2[2], const float pt[2])
+{
+  return (((l1[0] - pt[0]) * (l2[1] - pt[1])) - ((l2[0] - pt[0]) * (l1[1] - pt[1])));
+}
+
+/** \} */
+
+#endif /* __MATH_VECTOR_INLINE_C__ */
diff --git a/client/ultragrid/CMakeLists.txt b/client/ultragrid/CMakeLists.txt
index b42d3e3dc21381ac842d3a6ec0c4ec9cec28d1d1..f784ddd8211e8972aec2b8b4fd7d802fc0561b5a 100644
--- a/client/ultragrid/CMakeLists.txt
+++ b/client/ultragrid/CMakeLists.txt
@@ -3,6 +3,9 @@ cmake_minimum_required(VERSION 3.10)
 set(INC
 	.
   ../renderengine/src
+  ../../source/blender/blenlib
+  ../../source/blender/makesdna
+  ${CUDA_INCLUDE_DIRS}
 )
 
 set(SRC
@@ -28,7 +31,9 @@ endif()
 
 if(WITH_CLIENT_CUDA)
 	add_definitions(-DWITH_CLIENT_CUDA)
-	enable_language(CUDA)
+  if(NOT WIN32)    
+  	enable_language(CUDA)
+  endif()    
 endif()
 
 if(WITH_CLIENT_CESNET)
diff --git a/client/ultragrid/platform_ipc.c b/client/ultragrid/platform_ipc.c
index c9502b78f5deef99edaf13cbb894ecee1e9c44c3..fb1b694168d914909e1fddf350b598c94b8bdc0f 100644
--- a/client/ultragrid/platform_ipc.c
+++ b/client/ultragrid/platform_ipc.c
@@ -48,7 +48,6 @@
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
-#include <stdio.h>
 #include <sys/sem.h>
 #include <sys/shm.h>
 #include <sys/stat.h>
@@ -58,6 +57,7 @@
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <string.h>
 
 #include "platform_ipc.h"
 
@@ -259,14 +259,15 @@ static platform_ipc_sem_t platform_ipc_sem_open_common(const char *id, int index
 platform_ipc_sem_t platform_ipc_sem_create(const char *id, int index)
 {
 #ifdef _WIN32
-        char name[strlen(id) + 21 + 1];
+        //char name[(strlen(id) + 21 + 1)];
+        char name[1024];
         sprintf(name, "%s-%d", id, index);
 
         HANDLE ghSemaphore = CreateSemaphore(
                         NULL,           // default security attributes
                         0,              // initial count
                         (1<<15) - 1,    // maximum count
-                        name);          // unnamed semaphore
+                        name);          // unnamed semaphore        
 
         if (ghSemaphore == NULL) {
                 printf("CreateSemaphore error: %d\n", GetLastError());
@@ -283,7 +284,8 @@ platform_ipc_sem_t platform_ipc_sem_create(const char *id, int index)
 platform_ipc_sem_t platform_ipc_sem_open(const char *id, int index)
 {
 #ifdef _WIN32
-        char name[strlen(id) + 21 + 1];
+        //char name[strlen(id) + 21 + 1];
+        char name[1024];
         sprintf(name, "%s-%d", id, index);
 
         HANDLE ghSemaphore = OpenSemaphore(
diff --git a/client/ultragrid/ultragrid.cpp b/client/ultragrid/ultragrid.cpp
index 2713283dd0fec4e75219084edbb708befbb5a153..e262911074d73ecdf44e7429ce186bf8da1a2416 100644
--- a/client/ultragrid/ultragrid.cpp
+++ b/client/ultragrid/ultragrid.cpp
@@ -6,12 +6,15 @@
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
+
 #ifdef WITH_CLIENT_CUDA
 # include <cuda_runtime.h>
 #endif
 
 #include "platform_ipc.h"
 #include "vrgstream.h"
+#include "BLI_math_vector.h"
+#include "BLI_math_rotation.h"
 
 constexpr const int BUFFERS = 2;
 #define KEY "UltraGrid-SHM"
@@ -160,6 +163,13 @@ struct cesnet_shm {
   struct shm_frame frames[BUFFERS];
 };
 
+void cesnet_set_inverse_matrix(float* transform_inverse_view_matrix, float* position, float *orientation)
+{
+    float view_mat[4][4];
+    quat_to_mat4(view_mat, orientation);
+    copy_v3_v3(view_mat[3], position);
+}
+
 // camera view matrix
 // vector is on 3,7,11 position
 void cesnet_set_camera_data(cyclesphi::cyclesphi_data *cdata)
diff --git a/client/ultragrid/ultragrid.h b/client/ultragrid/ultragrid.h
index 8624355c19fa92789aef947b50e0f2d8d714f6d9..c4889a4ee2d170f6fe217953d7c3bc67834ce5d8 100644
--- a/client/ultragrid/ultragrid.h
+++ b/client/ultragrid/ultragrid.h
@@ -5,6 +5,7 @@
 
 bool cesnet_is_required_exit();
 void cesnet_set_camera_data(cyclesphi::cyclesphi_data *cdata);
+void cesnet_set_inverse_matrix(float* transform_inverse_view_matrix, float* position, float* orientation);
 
 #ifdef WITH_RGBA_FORMAT
 void cesnet_set_render_buffer_rgba(unsigned char *rgba, int width, int height);