Update to GLM v0.9.9.7

- removed GLM_LICENSE from MANIFEST.in ~ Bumped version
Zuzu-Typ · Jan 8, 2020 · 66cc02f · 66cc02f
1 parent fbb202e
commit 66cc02f
Show file tree

Hide file tree

Showing 24 changed files with 620 additions and 166 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,7 +1,6 @@
 include LICENSE
 include COPYING
 include README.md
-include GLM_LICENSE
 include setup.py
 include setup.cfg
 recursive-include glm *
diff --git a/PyGLM.cpp b/PyGLM.cpp
@@ -1,4 +1,4 @@
-#define PyGLM_VERSION "1.1.4"
+#define PyGLM_VERSION "1.1.5"
 
 #define PyGLM_LICENSE "PyGLM license information:\n"\
 "\n"\

diff --git a/glm/glm/CMakeLists.txt b/glm/glm/CMakeLists.txt
@@ -42,7 +42,8 @@ source_group("SIMD Files" FILES ${SIMD_SOURCE})
 source_group("SIMD Files" FILES ${SIMD_INLINE})
 source_group("SIMD Files" FILES ${SIMD_HEADER})
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..)
+add_library(glm INTERFACE)
+target_include_directories(glm INTERFACE ../)
 
 if(BUILD_STATIC_LIBS)
 add_library(glm_static STATIC ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT}
@@ -52,6 +53,8 @@ add_library(glm_static STATIC ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT}
 	${GTC_SOURCE}     ${GTC_INLINE}     ${GTC_HEADER}
 	${GTX_SOURCE}     ${GTX_INLINE}     ${GTX_HEADER}
 	${SIMD_SOURCE}    ${SIMD_INLINE}    ${SIMD_HEADER})
+	target_link_libraries(glm_static PUBLIC glm)
+	add_library(glm::glm_static ALIAS glm_static)
 endif()
 
 if(BUILD_SHARED_LIBS)
@@ -62,5 +65,6 @@ add_library(glm_shared SHARED ${ROOT_TEXT} ${ROOT_MD} ${ROOT_NAT}
 	${GTC_SOURCE}     ${GTC_INLINE}     ${GTC_HEADER}
 	${GTX_SOURCE}     ${GTX_INLINE}     ${GTX_HEADER}
 	${SIMD_SOURCE}    ${SIMD_INLINE}    ${SIMD_HEADER})
+	target_link_libraries(glm_shared PUBLIC glm)
+	add_library(glm::glm_shared ALIAS glm_shared)
 endif()
-
diff --git a/glm/glm/detail/func_common.inl b/glm/glm/detail/func_common.inl
@@ -287,7 +287,8 @@ namespace detail
 			std::numeric_limits<genFIType>::is_iec559 || (std::numeric_limits<genFIType>::is_signed && std::numeric_limits<genFIType>::is_integer),
 			"'sign' only accept signed inputs");
 
-		return detail::compute_sign<1, genFIType, defaultp, std::numeric_limits<genFIType>::is_iec559, highp>::call(vec<1, genFIType>(x)).x;
+		return detail::compute_sign<1, genFIType, defaultp,
+                                    std::numeric_limits<genFIType>::is_iec559, detail::is_aligned<highp>::value>::call(vec<1, genFIType>(x)).x;
 	}
 
 	template<length_t L, typename T, qualifier Q>
@@ -737,11 +738,15 @@ namespace detail
 		return reinterpret_cast<vec<L, float, Q>&>(const_cast<vec<L, uint, Q>&>(v));
 	}
 
-	template<typename genType>
-	GLM_FUNC_QUALIFIER genType fma(genType const& a, genType const& b, genType const& c)
-	{
-		return a * b + c;
-	}
+#	if GLM_HAS_CXX11_STL
+		using std::fma;
+#	else
+		template<typename genType>
+		GLM_FUNC_QUALIFIER genType fma(genType const& a, genType const& b, genType const& c)
+		{
+			return a * b + c;
+		}
+#	endif
 
 	template<typename genType>
 	GLM_FUNC_QUALIFIER genType frexp(genType x, int& exp)

diff --git a/glm/glm/detail/func_geometric_simd.inl b/glm/glm/detail/func_geometric_simd.inl
@@ -96,4 +96,70 @@ namespace detail
 }//namespace detail
 }//namespace glm
 
+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
+namespace glm{
+namespace detail
+{
+	template<qualifier Q>
+	struct compute_length<4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& v)
+		{
+			return compute_dot<vec<4, float, Q>, float, true>::call(v, v);
+		}
+	};
+
+	template<qualifier Q>
+	struct compute_distance<4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& p0, vec<4, float, Q> const& p1)
+		{
+			return compute_length<4, float, Q, true>::call(p1 - p0);
+		}
+	};
+
+
+	template<qualifier Q>
+	struct compute_dot<vec<4, float, Q>, float, true>
+	{
+		GLM_FUNC_QUALIFIER static float call(vec<4, float, Q> const& x, vec<4, float, Q> const& y)
+		{
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+			float32x4_t v = vmulq_f32(x.data, y.data);
+			v = vpaddq_f32(v, v);
+			v = vpaddq_f32(v, v);
+			return vgetq_lane_f32(v, 0);
+#else  // Armv7a with Neon
+			float32x4_t p = vmulq_f32(x.data, y.data);
+			float32x2_t v = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
+			v = vpadd_f32(v, v);
+			return vget_lane_f32(v, 0);
+#endif
+		}
+	};
+
+	template<qualifier Q>
+	struct compute_normalize<4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static vec<4, float, Q> call(vec<4, float, Q> const& v)
+		{
+			float32x4_t p = vmulq_f32(v.data, v.data);
+#if GLM_ARCH & GLM_ARCH_ARMV8_BIT
+			p = vpaddq_f32(p, p);
+			p = vpaddq_f32(p, p);
+#else
+			float32x2_t t = vpadd_f32(vget_low_f32(p), vget_high_f32(p));
+			t = vpadd_f32(t, t);
+			p = vcombine_f32(t, t);
+#endif
+
+			float32x4_t vd = vrsqrteq_f32(p);
+			vec<4, float, Q> Result;
+			Result.data = vmulq_f32(v.data, vd);
+			return Result;
+		}
+	};
+}//namespace detail
+}//namespace glm
+
 #endif//GLM_ARCH & GLM_ARCH_SSE2_BIT
diff --git a/glm/glm/detail/func_matrix_simd.inl b/glm/glm/detail/func_matrix_simd.inl
@@ -91,4 +91,159 @@ namespace detail
 #	endif
 }//namespace glm
 
+#elif GLM_ARCH & GLM_ARCH_NEON_BIT
+
+namespace glm {
+#if GLM_LANG & GLM_LANG_CXX11_FLAG
+	template <qualifier Q>
+	GLM_FUNC_QUALIFIER
+	typename std::enable_if<detail::is_aligned<Q>::value, mat<4, 4, float, Q>>::type
+	operator*(mat<4, 4, float, Q> const & m1, mat<4, 4, float, Q> const & m2)
+	{
+		auto MulRow = [&](int l) {
+			float32x4_t const SrcA = m2[l].data;
+
+			float32x4_t r = neon::mul_lane(m1[0].data, SrcA, 0);
+			r = neon::madd_lane(r, m1[1].data, SrcA, 1);
+			r = neon::madd_lane(r, m1[2].data, SrcA, 2);
+			r = neon::madd_lane(r, m1[3].data, SrcA, 3);
+
+			return r;
+		};
+
+		mat<4, 4, float, aligned_highp> Result;
+		Result[0].data = MulRow(0);
+		Result[1].data = MulRow(1);
+		Result[2].data = MulRow(2);
+		Result[3].data = MulRow(3);
+
+		return Result;
+	}
+#endif // CXX11
+
+	template<qualifier Q>
+	struct detail::compute_inverse<4, 4, float, Q, true>
+	{
+		GLM_FUNC_QUALIFIER static mat<4, 4, float, Q> call(mat<4, 4, float, Q> const& m)
+		{
+			float32x4_t const& m0 = m[0].data;
+			float32x4_t const& m1 = m[1].data;
+			float32x4_t const& m2 = m[2].data;
+			float32x4_t const& m3 = m[3].data;
+
+			// m[2][2] * m[3][3] - m[3][2] * m[2][3];
+			// m[2][2] * m[3][3] - m[3][2] * m[2][3];
+			// m[1][2] * m[3][3] - m[3][2] * m[1][3];
+			// m[1][2] * m[2][3] - m[2][2] * m[1][3];
+
+			float32x4_t Fac0;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3));
+				Fac0 = w0 * w1 -  w2 * w3;
+			}
+
+			// m[2][1] * m[3][3] - m[3][1] * m[2][3];
+			// m[2][1] * m[3][3] - m[3][1] * m[2][3];
+			// m[1][1] * m[3][3] - m[3][1] * m[1][3];
+			// m[1][1] * m[2][3] - m[2][1] * m[1][3];
+
+			float32x4_t Fac1;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3));
+				Fac1 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][1] * m[3][2] - m[3][1] * m[2][2];
+			// m[2][1] * m[3][2] - m[3][1] * m[2][2];
+			// m[1][1] * m[3][2] - m[3][1] * m[1][2];
+			// m[1][1] * m[2][2] - m[2][1] * m[1][2];
+
+			float32x4_t Fac2;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2));
+				Fac2 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][0] * m[3][3] - m[3][0] * m[2][3];
+			// m[2][0] * m[3][3] - m[3][0] * m[2][3];
+			// m[1][0] * m[3][3] - m[3][0] * m[1][3];
+			// m[1][0] * m[2][3] - m[2][0] * m[1][3];
+
+			float32x4_t Fac3;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 3), 3, m2, 3);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 3), neon::dup_lane(m1, 3));
+				Fac3 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][0] * m[3][2] - m[3][0] * m[2][2];
+			// m[2][0] * m[3][2] - m[3][0] * m[2][2];
+			// m[1][0] * m[3][2] - m[3][0] * m[1][2];
+			// m[1][0] * m[2][2] - m[2][0] * m[1][2];
+
+			float32x4_t Fac4;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 2), 3, m2, 2);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 2), neon::dup_lane(m1, 2));
+				Fac4 = w0 * w1 - w2 * w3;
+			}
+
+			// m[2][0] * m[3][1] - m[3][0] * m[2][1];
+			// m[2][0] * m[3][1] - m[3][0] * m[2][1];
+			// m[1][0] * m[3][1] - m[3][0] * m[1][1];
+			// m[1][0] * m[2][1] - m[2][0] * m[1][1];
+
+			float32x4_t Fac5;
+			{
+				float32x4_t w0 = vcombine_f32(neon::dup_lane(m2, 0), neon::dup_lane(m1, 0));
+				float32x4_t w1 = neon::copy_lane(neon::dupq_lane(m3, 1), 3, m2, 1);
+				float32x4_t w2 = neon::copy_lane(neon::dupq_lane(m3, 0), 3, m2, 0);
+				float32x4_t w3 = vcombine_f32(neon::dup_lane(m2, 1), neon::dup_lane(m1, 1));
+				Fac5 = w0 * w1 - w2 * w3;
+			}
+
+			float32x4_t Vec0 = neon::copy_lane(neon::dupq_lane(m0, 0), 0, m1, 0); // (m[1][0], m[0][0], m[0][0], m[0][0]);
+			float32x4_t Vec1 = neon::copy_lane(neon::dupq_lane(m0, 1), 0, m1, 1); // (m[1][1], m[0][1], m[0][1], m[0][1]);
+			float32x4_t Vec2 = neon::copy_lane(neon::dupq_lane(m0, 2), 0, m1, 2); // (m[1][2], m[0][2], m[0][2], m[0][2]);
+			float32x4_t Vec3 = neon::copy_lane(neon::dupq_lane(m0, 3), 0, m1, 3); // (m[1][3], m[0][3], m[0][3], m[0][3]);
+
+			float32x4_t Inv0 = Vec1 * Fac0 - Vec2 * Fac1 + Vec3 * Fac2;
+			float32x4_t Inv1 = Vec0 * Fac0 - Vec2 * Fac3 + Vec3 * Fac4;
+			float32x4_t Inv2 = Vec0 * Fac1 - Vec1 * Fac3 + Vec3 * Fac5;
+			float32x4_t Inv3 = Vec0 * Fac2 - Vec1 * Fac4 + Vec2 * Fac5;
+
+			float32x4_t r0 = float32x4_t{-1, +1, -1, +1} * Inv0;
+			float32x4_t r1 = float32x4_t{+1, -1, +1, -1} * Inv1;
+			float32x4_t r2 = float32x4_t{-1, +1, -1, +1} * Inv2;
+			float32x4_t r3 = float32x4_t{+1, -1, +1, -1} * Inv3;
+
+			float32x4_t det = neon::mul_lane(r0, m0, 0);
+			det = neon::madd_lane(det, r1, m0, 1);
+			det = neon::madd_lane(det, r2, m0, 2);
+			det = neon::madd_lane(det, r3, m0, 3);
+
+			float32x4_t rdet = vdupq_n_f32(1 / vgetq_lane_f32(det, 0));
+
+			mat<4, 4, float, Q> r;
+			r[0].data = vmulq_f32(r0, rdet);
+			r[1].data = vmulq_f32(r1, rdet);
+			r[2].data = vmulq_f32(r2, rdet);
+			r[3].data = vmulq_f32(r3, rdet);
+			return r;
+		}
+	};
+}//namespace glm
 #endif
diff --git a/glm/glm/detail/setup.hpp b/glm/glm/detail/setup.hpp
@@ -6,9 +6,9 @@
 #define GLM_VERSION_MAJOR			0
 #define GLM_VERSION_MINOR			9
 #define GLM_VERSION_PATCH			9
-#define GLM_VERSION_REVISION		6
-#define GLM_VERSION					996
-#define GLM_VERSION_MESSAGE			"GLM: version 0.9.9.6"
+#define GLM_VERSION_REVISION		7
+#define GLM_VERSION					997
+#define GLM_VERSION_MESSAGE			"GLM: version 0.9.9.7"
 
 #define GLM_SETUP_INCLUDED			GLM_VERSION
 
@@ -35,9 +35,9 @@
 ///////////////////////////////////////////////////////////////////////////////////
 // Build model
 
-#if defined(__arch64__) || defined(__LP64__) || defined(_M_X64) || defined(__ppc64__) || defined(__x86_64__)
+#if defined(_M_ARM64) || defined(__LP64__) || defined(_M_X64) || defined(__ppc64__) || defined(__x86_64__)
 #	define GLM_MODEL	GLM_MODEL_64
-#elif defined(__i386__) || defined(__ppc__)
+#elif defined(__i386__) || defined(__ppc__) || defined(__ILP32__) || defined(_M_ARM)
 #	define GLM_MODEL	GLM_MODEL_32
 #else
 #	define GLM_MODEL	GLM_MODEL_32