From 7e4631ba6143b2495d6e66ba1703c7c72b419452 Mon Sep 17 00:00:00 2001 From: Elabajaba Date: Sat, 14 Dec 2024 01:25:47 -0500 Subject: [PATCH] WIP port to wide from std-simd --- Cargo.toml | 7 +- rust-toolchain | 2 +- src/animation.rs | 41 +++++----- src/blending_job.rs | 19 ++--- src/ik_aim_job.rs | 13 ++-- src/ik_two_bone_job.rs | 21 +++--- src/lib.rs | 2 - src/math.rs | 166 ++++++++++++++++++++--------------------- src/sampling_job.rs | 13 ++-- src/skeleton.rs | 42 +++++------ 10 files changed, 163 insertions(+), 163 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 659d7a4..95be788 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,17 +19,22 @@ serde = ["dep:serde", "glam/serde", "bimap/serde" ] rkyv = ["dep:rkyv", "dep:bytecheck", "glam/rkyv", "glam/bytecheck"] wasm = [] nodejs = ["wasm", "dep:js-sys", "dep:wasm-bindgen"] +archive_le = [] +archive_be = [] [dependencies] bimap = { version = "0.6" } bytecheck = { version = "0.6", optional = true, default-features = false } -glam = { version = "0.27", features = [ "core-simd", "libm" ] } +glam = { version = "0.27", features = [ "libm" ] } +# glam = { version = "0.27", features = [ "core-simd", "libm" ] } js-sys = { version = "0.3", optional = true } rkyv = { version = "0.7", optional = true, features = [ "validation" ] } serde = { version= "1.0", optional = true, features = [ "serde_derive" ] } static_assertions = "1.1" thiserror = "1.0" wasm-bindgen = { version = "0.2", optional = true } +# wide = "0.7.30" +wide = { git = "https://github.com/Lokathor/wide.git", rev = "0f15f92bff68e36bf60a7305d9201873dd26d6b9" } [dev-dependencies] getrandom = { version = "0.2", features = ["js"] } diff --git a/rust-toolchain b/rust-toolchain index 07ade69..5e49e6b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -nightly \ No newline at end of file +1.83 \ No newline at end of file diff --git a/src/animation.rs b/src/animation.rs index 26b00dd..16a539e 100644 --- a/src/animation.rs +++ b/src/animation.rs @@ -5,9 +5,8 @@ use glam::{Quat, Vec3, Vec4}; use std::alloc::{self, Layout}; use std::io::Read; -use std::simd::prelude::*; -use std::simd::*; use std::{mem, slice}; +use wide::{f32x4, i32x4}; use crate::archive::{Archive, ArchiveRead}; use crate::base::{align_ptr, align_usize, OzzError}; @@ -102,15 +101,15 @@ impl QuaternionKey { k3: &QuaternionKey, soa: &mut SoaQuat, ) { - const MASK_F000:i32x4 = i32x4::from_array([-1i32, 0, 0, 0]); - const MASK_0F00:i32x4 = i32x4::from_array([0, -1i32, 0, 0]); - const MASK_00F0:i32x4 = i32x4::from_array([0, 0, -1i32, 0]); - const MASK_000F:i32x4 = i32x4::from_array([0, 0, 0, -1i32]); + const MASK_F000:i32x4 = i32x4::new([-1i32, 0, 0, 0]); + const MASK_0F00:i32x4 = i32x4::new([0, -1i32, 0, 0]); + const MASK_00F0:i32x4 = i32x4::new([0, 0, -1i32, 0]); + const MASK_000F:i32x4 = i32x4::new([0, 0, 0, -1i32]); const MAPPING: [[usize; 4]; 4] = [[0, 0, 1, 2], [0, 0, 1, 2], [0, 1, 0, 2], [0, 1, 2, 0]]; - const SCALE: f32x4 = f32x4::from_array([core::f32::consts::SQRT_2 / 32767.0; 4]); - const OFFSET: f32x4 = f32x4::from_array([-core::f32::consts::SQRT_2 / 2.0; 4]); + const SCALE: f32x4 = f32x4::new([core::f32::consts::SQRT_2 / 32767.0; 4]); + const OFFSET: f32x4 = f32x4::new([-core::f32::consts::SQRT_2 / 2.0; 4]); let (largest0, sign0, value0) = k0.unpack(); let (largest1, sign1, value1) = k1.unpack(); @@ -123,10 +122,10 @@ impl QuaternionKey { let m3 = &MAPPING[largest3 as usize]; let cmp_keys: [f32x4; 4] = [ - f32x4::from_array([ value0[m0[0]] as f32, value1[m1[0]] as f32, value2[m2[0]] as f32, value3[m3[0]] as f32 ]), - f32x4::from_array([ value0[m0[1]] as f32, value1[m1[1]] as f32, value2[m2[1]] as f32, value3[m3[1]] as f32 ]), - f32x4::from_array([ value0[m0[2]] as f32, value1[m1[2]] as f32, value2[m2[2]] as f32, value3[m3[2]] as f32 ]), - f32x4::from_array([ value0[m0[3]] as f32, value1[m1[3]] as f32, value2[m2[3]] as f32, value3[m3[3]] as f32 ]), + f32x4::new([ value0[m0[0]] as f32, value1[m1[0]] as f32, value2[m2[0]] as f32, value3[m3[0]] as f32 ]), + f32x4::new([ value0[m0[1]] as f32, value1[m1[1]] as f32, value2[m2[1]] as f32, value3[m3[1]] as f32 ]), + f32x4::new([ value0[m0[2]] as f32, value1[m1[2]] as f32, value2[m2[2]] as f32, value3[m3[2]] as f32 ]), + f32x4::new([ value0[m0[3]] as f32, value1[m1[3]] as f32, value2[m2[3]] as f32, value3[m3[3]] as f32 ]), ]; // TODO: simd int to float let mut cpnt = [ @@ -141,9 +140,9 @@ impl QuaternionKey { cpnt[largest3 as usize] = fx4(ix4(cpnt[largest3 as usize]) & !MASK_000F); let dot = cpnt[0] * cpnt[0] + cpnt[1] * cpnt[1] + cpnt[2] * cpnt[2] + cpnt[3] * cpnt[3]; - let ww0 = f32x4::simd_max(ZERO, ONE - dot); // prevent NaN, different from C++ code + let ww0 = f32x4::fast_max(ZERO, ONE - dot); // prevent NaN, different from C++ code let w0 = ww0.sqrt(); - let sign = i32x4::from_array([sign0 as i32, sign1 as i32, sign2 as i32, sign3 as i32]) << 31; + let sign = i32x4::new([sign0 as i32, sign1 as i32, sign2 as i32, sign3 as i32]) << 31; let restored = ix4(w0) | sign; cpnt[largest0 as usize] = fx4(ix4(cpnt[largest0 as usize]) | (restored & MASK_F000)); @@ -1116,14 +1115,14 @@ mod tests { assert_eq!( soa, SoaVec3 { - x: f32x4::from_array([0.0711059570, 0.0251312255859375, 0.0711059570, 0.0251312255859375]), - y: f32x4::from_array([ + x: f32x4::new([0.0711059570, 0.0251312255859375, 0.0711059570, 0.0251312255859375]), + y: f32x4::new([ -8.77380371e-05, 5.960464477539063e-8, -8.77380371e-05, 5.960464477539063e-8 ]), - z: f32x4::from_array([1.84774399e-06, 0.0, 1.84774399e-06, 0.0]), + z: f32x4::new([1.84774399e-06, 0.0, 1.84774399e-06, 0.0]), } ); } @@ -1172,10 +1171,10 @@ mod tests { assert_eq!( soa, SoaQuat { - x: f32x4::from_array([-0.491480947, -0.498861253, -0.00912827253, 0.00852406025]), - y: f32x4::from_array([-0.508615375, -0.501123607, 0.0251405239, 0.00882613659]), - z: f32x4::from_array([-0.538519204, -0.498861253, -0.0326502919, 0.00610709190]), - w: f32x4::from_array([0.457989037, 0.501148760, 0.999108911, 0.999906063]), + x: f32x4::new([-0.491480947, -0.498861253, -0.00912827253, 0.00852406025]), + y: f32x4::new([-0.508615375, -0.501123607, 0.0251405239, 0.00882613659]), + z: f32x4::new([-0.538519204, -0.498861253, -0.0326502919, 0.00610709190]), + w: f32x4::new([0.457989037, 0.501148760, 0.999108911, 0.999906063]), } ); } diff --git a/src/blending_job.rs b/src/blending_job.rs index a4b22cb..19d74b0 100644 --- a/src/blending_job.rs +++ b/src/blending_job.rs @@ -3,18 +3,15 @@ //! use glam::Vec4; +use wide::f32x4; use std::cell::RefCell; use std::rc::Rc; -use std::simd::prelude::*; use std::sync::{Arc, RwLock}; use crate::base::{OzzBuf, OzzError, OzzMutBuf, OzzObj}; -use crate::math::{fx4_from_vec4, fx4_sign, SoaQuat, SoaTransform, SoaVec3}; +use crate::math::{fx4_from_vec4, fx4_sign, SoaQuat, SoaTransform, SoaVec3, ONE, ZERO}; use crate::skeleton::Skeleton; -const ZERO: f32x4 = f32x4::from_array([0.0; 4]); -const ONE: f32x4 = f32x4::from_array([1.0; 4]); - /// Defines a layer of blending input data (local space transforms) and parameters (weights). #[derive(Debug, Clone)] pub struct BlendingLayer> { @@ -352,13 +349,13 @@ where if ctx.num_passes == 0 { for idx in 0..num_soa_joints { - let weight = layer_weight * layer.joint_weight(idx).simd_max(ZERO); + let weight = layer_weight * layer.joint_weight(idx).fast_max(ZERO); ctx.accumulated_weights[idx] = weight; Self::blend_1st_pass(&transform[idx], weight, &mut output[idx]); } } else { for idx in 0..num_soa_joints { - let weight = layer_weight * layer.joint_weight(idx).simd_max(ZERO); + let weight = layer_weight * layer.joint_weight(idx).fast_max(ZERO); ctx.accumulated_weights[idx] += weight; Self::blend_n_pass(&transform[idx], weight, &mut output[idx]); } @@ -402,8 +399,8 @@ where } else { let simd_threshold = f32x4::splat(threshold); for idx in 0..joint_rest_poses.len() { - let bp_weight = (simd_threshold - ctx.accumulated_weights[idx]).simd_max(ZERO); - ctx.accumulated_weights[idx] = simd_threshold.simd_max(ctx.accumulated_weights[idx]); + let bp_weight = (simd_threshold - ctx.accumulated_weights[idx]).fast_max(ZERO); + ctx.accumulated_weights[idx] = simd_threshold.fast_max(ctx.accumulated_weights[idx]); Self::blend_n_pass(&joint_rest_poses[idx], bp_weight, &mut output[idx]); } } @@ -450,7 +447,7 @@ where if !layer.joint_weights.is_empty() { for idx in 0..joint_rest_poses.len() { - let weight = layer_weight * layer.joint_weight(idx).simd_max(ZERO); + let weight = layer_weight * layer.joint_weight(idx).fast_max(ZERO); let one_minus_weight = ONE - weight; Self::blend_add_pass(&transform[idx], weight, one_minus_weight, &mut output[idx]); } @@ -465,7 +462,7 @@ where if !layer.joint_weights.is_empty() { for idx in 0..joint_rest_poses.len() { - let weight = layer_weight * layer.joint_weight(idx).simd_max(ZERO); + let weight = layer_weight * layer.joint_weight(idx).fast_max(ZERO); let one_minus_weight = ONE - weight; Self::blend_sub_pass(&transform[idx], weight, one_minus_weight, &mut output[idx]); } diff --git a/src/ik_aim_job.rs b/src/ik_aim_job.rs index a17b0c0..aeb1f79 100644 --- a/src/ik_aim_job.rs +++ b/src/ik_aim_job.rs @@ -3,8 +3,7 @@ //! use glam::{Mat4, Quat, Vec3A}; -use std::simd::prelude::*; -use std::simd::StdFloat; +use wide::{f32x4, CmpEq, CmpGt, CmpNe}; use crate::base::OzzError; use crate::math::*; @@ -234,7 +233,7 @@ impl IKAimJob { let offsetted_forward = Self::compute_offsetted_forward(self.forward, self.offset, joint_to_target_js); self.reached = offsetted_forward.is_some(); - if !self.reached || (joint_to_target_js_len2.simd_eq(ZERO).to_bitmask() & 0x1 == 0x1) { + if !self.reached || (joint_to_target_js_len2.cmp_eq(ZERO).to_bitmask() & 0x1 == 0x1) { self.joint_correction = QUAT_UNIT; return Ok(()); } @@ -256,7 +255,7 @@ impl IKAimJob { let rotate_plane_axis_js; let rotate_plane_js; - if denoms.simd_ne(ZERO).to_bitmask() & 0x7 == 0x7 { + if denoms.cmp_ne(ZERO).to_bitmask() & 0x7 == 0x7 { let rsqrts = denoms.sqrt().recip(); rotate_plane_axis_js = joint_to_target_js * fx4_splat_x(rsqrts); @@ -268,7 +267,7 @@ impl IKAimJob { let rotate_plane_axis_flipped_js = fx4_xor(rotate_plane_axis_js, axis_flip); rotate_plane_js = quat_from_cos_angle( rotate_plane_axis_flipped_js, - rotate_plane_cos_angle.simd_clamp(NEG_ONE, ONE), + rotate_plane_cos_angle.fast_max(NEG_ONE).fast_min(ONE), // clamp elements between -1.0 and 1.0 ); } else { rotate_plane_axis_js = joint_to_target_js * fx4_splat_x(denoms.sqrt().recip()); @@ -284,7 +283,7 @@ impl IKAimJob { let twisted_fu = quat_positive_w(twisted); if self.weight < 1.0 { - let simd_weight = f32x4::splat(self.weight).simd_max(ZERO); + let simd_weight = f32x4::splat(self.weight).fast_max(ZERO); self.joint_correction = quat_normalize(fx4_lerp(QUAT_UNIT, twisted_fu, simd_weight)); } else { self.joint_correction = twisted_fu; @@ -296,7 +295,7 @@ impl IKAimJob { let ao_l = vec3_dot_s(forward, offset); let ac_l2 = vec3_length2_s(offset) - ao_l * ao_l; let r2 = vec3_length2_s(target); - if ac_l2.simd_gt(r2).to_bitmask() & 0x1 == 0x1 { + if ac_l2.cmp_gt(r2).to_bitmask() & 0x1 == 0x1 { return None; } let ai_l = (r2 - ac_l2).sqrt(); diff --git a/src/ik_two_bone_job.rs b/src/ik_two_bone_job.rs index ebf3065..64aa4dc 100644 --- a/src/ik_two_bone_job.rs +++ b/src/ik_two_bone_job.rs @@ -3,8 +3,9 @@ //! use glam::{Mat4, Quat, Vec3A}; -use std::simd::prelude::*; -use std::simd::StdFloat; +// use std::simd::prelude::*; +// use std::simd::StdFloat; +use wide::{f32x4, CmpGt}; use crate::base::OzzError; use crate::math::*; @@ -339,12 +340,12 @@ impl IKTwoBoneJob { let start_target_original_ss_len = fx4_splat_z(lengths); // [x y z w] let bone_len_diff_abs = (start_mid_ss_len - mid_end_ss_len).abs(); // [x] let bones_chain_len = start_mid_ss_len + mid_end_ss_len; // [x] - let da = bones_chain_len * fx4_clamp_or_min(f32x4::from_array([self.soften, 0.0, 0.0, 0.0]), ZERO, ONE); // [x 0 0 0] da.yzw needs to be 0 + let da = bones_chain_len * fx4_clamp_or_min(f32x4::new([self.soften, 0.0, 0.0, 0.0]), ZERO, ONE); // [x 0 0 0] da.yzw needs to be 0 let ds = bones_chain_len - da; // [x] let left = fx4_set_w(start_target_original_ss_len, ds); // [x y z w] let right = fx4_set_z(da, bone_len_diff_abs); // [x y z w] - let comp_mask = left.simd_gt(right).to_bitmask(); + let comp_mask = left.cmp_gt(right).to_bitmask(); let start_target_ss; let start_target_ss_len2; @@ -410,7 +411,7 @@ impl IKTwoBoneJob { let mut start_rot_ss = end_to_target_rot_ss; - if start_target_ss_len2.simd_gt(ZERO).to_bitmask() & 0x1 == 0x1 { + if start_target_ss_len2.cmp_gt(ZERO).to_bitmask() & 0x1 == 0x1 { // [x] let ref_plane_normal_ss = vec3_cross(start_target_ss, pole_ss); // [x y z] let ref_plane_normal_ss_len2 = vec3_length2_s(ref_plane_normal_ss); // [x] @@ -439,7 +440,7 @@ impl IKTwoBoneJob { let rotate_plane_ss = quat_from_cos_angle( rotate_plane_axis_flipped_ss, - rotate_plane_cos_angle.simd_clamp(NEG_ONE, ONE), + rotate_plane_cos_angle.fast_max(NEG_ONE).fast_min(ONE), // clamp elements between -1.0 and 1.0 ); if self.twist_angle != 0.0 { @@ -457,14 +458,14 @@ impl IKTwoBoneJob { let mid_rot_fu = quat_positive_w(mid_rot); if self.weight < 1.0 { - let simd_weight = f32x4::splat(self.weight).simd_max(ZERO); + let simd_weight = f32x4::splat(self.weight).fast_max(ZERO); let start_lerp = fx4_lerp(QUAT_UNIT, start_rot_fu, simd_weight); let mid_lerp = fx4_lerp(QUAT_UNIT, mid_rot_fu, simd_weight); - let rsqrts = f32x4::from_array([ - (start_lerp * start_lerp).reduce_sum(), - (mid_lerp * mid_lerp).reduce_sum(), + let rsqrts = f32x4::new([ + (start_lerp * start_lerp).reduce_add(), + (mid_lerp * mid_lerp).reduce_add(), 0.0, 0.0, ]) diff --git a/src/lib.rs b/src/lib.rs index b41b6cc..0d6b708 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,8 +45,6 @@ //! ``` //! -#![feature(portable_simd)] - pub mod animation; pub mod archive; pub mod base; diff --git a/src/math.rs b/src/math.rs index fda6f54..a0d0fe6 100644 --- a/src/math.rs +++ b/src/math.rs @@ -11,30 +11,30 @@ use static_assertions::const_assert_eq; use std::fmt::Debug; use std::io::Read; use std::mem; -use std::simd::prelude::*; -use std::simd::*; +use wide::{f32x4, i32x4}; +// use wide::f32x4 use crate::archive::{Archive, ArchiveRead}; use crate::base::OzzError; use crate::math; -pub(crate) const ZERO: f32x4 = f32x4::from_array([0.0; 4]); -pub(crate) const ONE: f32x4 = f32x4::from_array([1.0; 4]); -pub(crate) const TWO: f32x4 = f32x4::from_array([2.0; 4]); -pub(crate) const THREE: f32x4 = f32x4::from_array([3.0; 4]); -pub(crate) const NEG_ONE: f32x4 = f32x4::from_array([-1.0; 4]); -pub(crate) const FRAC_1_2: f32x4 = f32x4::from_array([0.5; 4]); -pub(crate) const FRAC_2_PI: f32x4 = f32x4::from_array([core::f32::consts::FRAC_2_PI; 4]); -pub(crate) const FRAC_PI_2: f32x4 = f32x4::from_array([core::f32::consts::FRAC_PI_2; 4]); +pub(crate) const ZERO: f32x4 = f32x4::ZERO; +pub(crate) const ONE: f32x4 = f32x4::ONE; +pub(crate) const TWO: f32x4 = f32x4::new([2.0; 4]); +pub(crate) const THREE: f32x4 = f32x4::new([3.0; 4]); +pub(crate) const NEG_ONE: f32x4 = f32x4::new([-1.0; 4]); +pub(crate) const FRAC_1_2: f32x4 = f32x4::HALF; +pub(crate) const FRAC_2_PI: f32x4 = f32x4::FRAC_2_PI; +pub(crate) const FRAC_PI_2: f32x4 = f32x4::FRAC_PI_2; -pub(crate) const X_AXIS: f32x4 = f32x4::from_array([1.0, 0.0, 0.0, 0.0]); -pub(crate) const Y_AXIS: f32x4 = f32x4::from_array([0.0, 1.0, 0.0, 0.0]); -pub(crate) const Z_AXIS: f32x4 = f32x4::from_array([0.0, 0.0, 1.0, 0.0]); +pub(crate) const X_AXIS: f32x4 = f32x4::new([1.0, 0.0, 0.0, 0.0]); +pub(crate) const Y_AXIS: f32x4 = f32x4::new([0.0, 1.0, 0.0, 0.0]); +pub(crate) const Z_AXIS: f32x4 = f32x4::new([0.0, 0.0, 1.0, 0.0]); -pub(crate) const QUAT_UNIT: f32x4 = f32x4::from_array([0.0, 0.0, 0.0, 1.0]); +pub(crate) const QUAT_UNIT: f32x4 = f32x4::new([0.0, 0.0, 0.0, 1.0]); -const SIGN: i32x4 = i32x4::from_array([i32::MIN; 4]); -const SIGN_W: i32x4 = i32x4::from_array([0, 0, 0, i32::MIN]); +pub(super) const SIGN: i32x4 = i32x4::new([i32::MIN; 4]); +pub(super) const SIGN_W: i32x4 = i32x4::new([0, 0, 0, i32::MIN]); // // SoaVec3 @@ -53,9 +53,9 @@ impl SoaVec3 { #[inline] pub const fn new(x: [f32; 4], y: [f32; 4], z: [f32; 4]) -> SoaVec3 { SoaVec3 { - x: f32x4::from_array(x), - y: f32x4::from_array(y), - z: f32x4::from_array(z), + x: f32x4::new(x), + y: f32x4::new(y), + z: f32x4::new(z), } } @@ -67,22 +67,22 @@ impl SoaVec3 { #[inline] pub const fn splat_col(v: [f32; 3]) -> SoaVec3 { SoaVec3 { - x: f32x4::from_array([v[0]; 4]), - y: f32x4::from_array([v[1]; 4]), - z: f32x4::from_array([v[2]; 4]), + x: f32x4::new([v[0]; 4]), + y: f32x4::new([v[1]; 4]), + z: f32x4::new([v[2]; 4]), } } #[inline] pub fn col(&self, idx: usize) -> Vec3 { - Vec3::new(self.x[idx], self.y[idx], self.z[idx]) + Vec3::new(self.x.as_array_ref()[idx], self.y.as_array_ref()[idx], self.z.as_array_ref()[idx]) } #[inline] pub fn set_col(&mut self, idx: usize, v: Vec3) { - self.x[idx] = v.x; - self.y[idx] = v.y; - self.z[idx] = v.z; + self.x.as_array_mut()[idx] = v.x; + self.y.as_array_mut()[idx] = v.y; + self.z.as_array_mut()[idx] = v.z; } #[inline] @@ -209,9 +209,9 @@ const _: () = { impl Serialize for SoaVec3 { fn serialize(&self, serializer: S) -> Result { let mut seq = serializer.serialize_seq(Some(3))?; - seq.serialize_element(&self.x.as_array())?; - seq.serialize_element(&self.y.as_array())?; - seq.serialize_element(&self.z.as_array())?; + seq.serialize_element(&self.x.as_array_ref())?; + seq.serialize_element(&self.y.as_array_ref())?; + seq.serialize_element(&self.z.as_array_ref())?; seq.end() } } @@ -242,10 +242,10 @@ impl SoaQuat { #[inline] pub const fn new(x: [f32; 4], y: [f32; 4], z: [f32; 4], w: [f32; 4]) -> SoaQuat { SoaQuat { - x: f32x4::from_array(x), - y: f32x4::from_array(y), - z: f32x4::from_array(z), - w: f32x4::from_array(w), + x: f32x4::new(x), + y: f32x4::new(y), + z: f32x4::new(z), + w: f32x4::new(w), } } @@ -257,10 +257,10 @@ impl SoaQuat { #[inline] pub const fn splat_col(v: [f32; 4]) -> SoaQuat { SoaQuat { - x: f32x4::from_array([v[0]; 4]), - y: f32x4::from_array([v[1]; 4]), - z: f32x4::from_array([v[2]; 4]), - w: f32x4::from_array([v[3]; 4]), + x: f32x4::new([v[0]; 4]), + y: f32x4::new([v[1]; 4]), + z: f32x4::new([v[2]; 4]), + w: f32x4::new([v[3]; 4]), } } @@ -574,10 +574,10 @@ impl AosMat4 { ) -> AosMat4 { AosMat4 { cols: [ - f32x4::from_array([n00, n01, n02, n03]), - f32x4::from_array([n10, n11, n12, n13]), - f32x4::from_array([n20, n21, n22, n23]), - f32x4::from_array([n30, n31, n32, n33]), + f32x4::new([n00, n01, n02, n03]), + f32x4::new([n10, n11, n12, n13]), + f32x4::new([n20, n21, n22, n23]), + f32x4::new([n30, n31, n32, n33]), ], } } @@ -586,10 +586,10 @@ impl AosMat4 { pub(crate) fn new_translation(t: Vec3) -> AosMat4 { AosMat4 { cols: [ - f32x4::from_array([1.0, 0.0, 0.0, 0.0]), - f32x4::from_array([0.0, 1.0, 0.0, 0.0]), - f32x4::from_array([0.0, 0.0, 1.0, 0.0]), - f32x4::from_array([t.x, t.y, t.z, 1.0]), + f32x4::new([1.0, 0.0, 0.0, 0.0]), + f32x4::new([0.0, 1.0, 0.0, 0.0]), + f32x4::new([0.0, 0.0, 1.0, 0.0]), + f32x4::new([t.x, t.y, t.z, 1.0]), ], } } @@ -598,10 +598,10 @@ impl AosMat4 { pub(crate) fn new_scaling(s: Vec3) -> AosMat4 { AosMat4 { cols: [ - f32x4::from_array([s.x, 0.0, 0.0, 0.0]), - f32x4::from_array([0.0, s.y, 0.0, 0.0]), - f32x4::from_array([0.0, 0.0, s.z, 0.0]), - f32x4::from_array([0.0, 0.0, 0.0, 1.0]), + f32x4::new([s.x, 0.0, 0.0, 0.0]), + f32x4::new([0.0, s.y, 0.0, 0.0]), + f32x4::new([0.0, 0.0, s.z, 0.0]), + f32x4::new([0.0, 0.0, 0.0, 1.0]), ], } } @@ -610,10 +610,10 @@ impl AosMat4 { pub(crate) fn identity() -> AosMat4 { AosMat4 { cols: [ - f32x4::from_array([1.0, 0.0, 0.0, 0.0]), - f32x4::from_array([0.0, 1.0, 0.0, 0.0]), - f32x4::from_array([0.0, 0.0, 1.0, 0.0]), - f32x4::from_array([0.0, 0.0, 0.0, 1.0]), + f32x4::new([1.0, 0.0, 0.0, 0.0]), + f32x4::new([0.0, 1.0, 0.0, 0.0]), + f32x4::new([0.0, 0.0, 1.0, 0.0]), + f32x4::new([0.0, 0.0, 0.0, 1.0]), ], } } @@ -865,10 +865,10 @@ pub(crate) fn f16_to_f32(n: u16) -> f32 { #[inline] pub(crate) fn simd_f16_to_f32(half4: [u16; 4]) -> f32x4 { - const MASK_NO_SIGN: i32x4 = i32x4::from_array([0x7FFF; 4]); - const MAGIC: f32x4 = fx4(i32x4::from_array([(254 - 15) << 23; 4])); - const WAS_INFNAN: i32x4 = i32x4::from_array([0x7BFF; 4]); - const EXP_INFNAN: i32x4 = i32x4::from_array([255 << 23; 4]); + const MASK_NO_SIGN: i32x4 = i32x4::new([0x7FFF; 4]); + const MAGIC: f32x4 = fx4(i32x4::new([(254 - 15) << 23; 4])); + const WAS_INFNAN: i32x4 = i32x4::new([0x7BFF; 4]); + const EXP_INFNAN: i32x4 = i32x4::new([255 << 23; 4]); let int4 = i32x4::from([half4[0] as i32, half4[1] as i32, half4[2] as i32, half4[3] as i32]); let expmant = MASK_NO_SIGN & int4; @@ -1035,17 +1035,17 @@ pub(crate) fn fx4_sin_cos(v: f32x4) -> (f32x4, f32x4) { // Implementation based on Vec4.inl from the JoltPhysics // https://github.com/jrouwe/JoltPhysics/blob/master/Jolt/Math/Vec4.inl - const N1: f32x4 = f32x4::from_array([1.5703125; 4]); - const N2: f32x4 = f32x4::from_array([0.0004837512969970703125; 4]); - const N3: f32x4 = f32x4::from_array([7.549789948768648e-8; 4]); + const N1: f32x4 = f32x4::new([1.5703125; 4]); + const N2: f32x4 = f32x4::new([0.0004837512969970703125; 4]); + const N3: f32x4 = f32x4::new([7.549789948768648e-8; 4]); - const C1: f32x4 = f32x4::from_array([2.443315711809948e-5; 4]); - const C2: f32x4 = f32x4::from_array([1.388731625493765e-3; 4]); - const C3: f32x4 = f32x4::from_array([4.166664568298827e-2; 4]); + const C1: f32x4 = f32x4::new([2.443315711809948e-5; 4]); + const C2: f32x4 = f32x4::new([1.388731625493765e-3; 4]); + const C3: f32x4 = f32x4::new([4.166664568298827e-2; 4]); - const S1: f32x4 = f32x4::from_array([-1.9515295891e-4; 4]); - const S2: f32x4 = f32x4::from_array([8.3321608736e-3; 4]); - const S3: f32x4 = f32x4::from_array([1.6666654611e-1; 4]); + const S1: f32x4 = f32x4::new([-1.9515295891e-4; 4]); + const S2: f32x4 = f32x4::new([8.3321608736e-3; 4]); + const S3: f32x4 = f32x4::new([1.6666654611e-1; 4]); // Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit) let mut sin_sign = fx4_sign(v); @@ -1129,11 +1129,11 @@ pub(crate) fn fx4_asin(v: f32x4) -> f32x4 { // Implementation based on Vec4.inl from the JoltPhysics // https://github.com/jrouwe/JoltPhysics/blob/master/Jolt/Math/Vec4.inl - const N1: f32x4 = f32x4::from_array([4.2163199048e-2; 4]); - const N2: f32x4 = f32x4::from_array([2.4181311049e-2; 4]); - const N3: f32x4 = f32x4::from_array([4.5470025998e-2; 4]); - const N4: f32x4 = f32x4::from_array([7.4953002686e-2; 4]); - const N5: f32x4 = f32x4::from_array([1.6666752422e-1; 4]); + const N1: f32x4 = f32x4::new([4.2163199048e-2; 4]); + const N2: f32x4 = f32x4::new([2.4181311049e-2; 4]); + const N3: f32x4 = f32x4::new([4.5470025998e-2; 4]); + const N4: f32x4 = f32x4::new([7.4953002686e-2; 4]); + const N5: f32x4 = f32x4::new([1.6666752422e-1; 4]); // Make argument positive let asin_sign = fx4_sign(v); @@ -1167,7 +1167,7 @@ pub(crate) fn fx4_asin(v: f32x4) -> f32x4 { #[inline] pub(crate) fn fx4_acos(v: f32x4) -> f32x4 { - const FRAC_PI_2: f32x4 = f32x4::from_array([core::f32::consts::FRAC_PI_2; 4]); + const FRAC_PI_2: f32x4 = f32x4::new([core::f32::consts::FRAC_PI_2; 4]); FRAC_PI_2 - fx4_asin(v) } @@ -1240,9 +1240,9 @@ pub(crate) fn quat_from_vectors(from: f32x4, to: f32x4) -> f32x4 { let quat; if real_part_x < 1.0e-6 * norm_from_norm_to_x { if from[0].abs() > from[2].abs() { - quat = f32x4::from_array([-from[1], from[0], 0.0, 0.0]) + quat = f32x4::new([-from[1], from[0], 0.0, 0.0]) } else { - quat = f32x4::from_array([0.0, -from[2], from[1], 0.0]) + quat = f32x4::new([0.0, -from[2], from[1], 0.0]) } } else { quat = fx4_set_w(vec3_cross(from, to), real_part) @@ -1318,13 +1318,13 @@ mod tests { 0b01111100_00000000, ]; let float4 = simd_f16_to_f32(half4); - assert_eq!(float4, f32x4::from_array([1.0f32, -1.0f32, 3.5f32, f32::INFINITY])); + assert_eq!(float4, f32x4::new([1.0f32, -1.0f32, 3.5f32, f32::INFINITY])); let half4 = [0b11111100_00000000, 0, 0x8000, 32791]; let float4 = simd_f16_to_f32(half4); assert_eq!( float4, - f32x4::from_array([f32::NEG_INFINITY, 0.0f32, 0.0f32, -1.37090683e-06]) + f32x4::new([f32::NEG_INFINITY, 0.0f32, 0.0f32, -1.37090683e-06]) ); let half4 = [0xFFFF, 0, 0, 0]; @@ -1382,19 +1382,19 @@ mod tests { #[test] #[wasm_bindgen_test] fn test_sin_cos() { - const EPSILON: f32x4 = f32x4::from_array([2.0e-7; 4]); + const EPSILON: f32x4 = f32x4::new([2.0e-7; 4]); - let (sin, cos) = fx4_sin_cos(f32x4::from_array([ + let (sin, cos) = fx4_sin_cos(f32x4::new([ 0.0, core::f32::consts::FRAC_PI_2, core::f32::consts::PI, -core::f32::consts::FRAC_PI_2, ])); - assert!((sin - f32x4::from_array([0.0, 1.0, 0.0, -1.0])) + assert!((sin - f32x4::new([0.0, 1.0, 0.0, -1.0])) .abs() .simd_lt(EPSILON) .all()); - assert!((cos - f32x4::from_array([1.0, 0.0, -1.0, 0.0])) + assert!((cos - f32x4::new([1.0, 0.0, -1.0, 0.0])) .abs() .simd_lt(EPSILON) .all()); @@ -1404,7 +1404,7 @@ mod tests { let mut i = -100.0 * core::f32::consts::PI; while i < 100.0 * core::f32::consts::PI { - let iv = f32x4::splat(i) + f32x4::from_array([0.0e-4, 2.5e-4, 5.0e-4, 7.5e-4]); + let iv = f32x4::splat(i) + f32x4::new([0.0e-4, 2.5e-4, 5.0e-4, 7.5e-4]); let (sin, cos) = fx4_sin_cos(iv); for i in 0..4 { @@ -1437,7 +1437,7 @@ mod tests { let mut i = -1.0; while i < 1.0 { - let iv = f32x4::splat(i) + f32x4::from_array([0.0e-4, 2.5e-4, 5.0e-4, 7.5e-4]).simd_min(f32x4::splat(1.0)); + let iv = f32x4::splat(i) + f32x4::new([0.0e-4, 2.5e-4, 5.0e-4, 7.5e-4]).simd_min(f32x4::splat(1.0)); let asin = fx4_asin(iv); for i in 0..4 { @@ -1465,7 +1465,7 @@ mod tests { let mut i = -1.0; while i < 1.0 { - let iv = f32x4::splat(i) + f32x4::from_array([0.0e-4, 2.5e-4, 5.0e-4, 7.5e-4]).simd_min(f32x4::splat(1.0)); + let iv = f32x4::splat(i) + f32x4::new([0.0e-4, 2.5e-4, 5.0e-4, 7.5e-4]).simd_min(f32x4::splat(1.0)); let acos = fx4_acos(iv); for i in 0..4 { diff --git a/src/sampling_job.rs b/src/sampling_job.rs index ac882fa..6119325 100644 --- a/src/sampling_job.rs +++ b/src/sampling_job.rs @@ -6,10 +6,11 @@ use std::alloc::{self, Layout}; use std::cell::RefCell; use std::fmt::{Debug, Formatter}; use std::rc::Rc; -use std::simd::prelude::*; use std::sync::{Arc, RwLock}; use std::{mem, ptr, slice}; +use wide::f32x4; + use crate::animation::{Animation, Float3Key, KeyframesCtrl, QuaternionKey}; use crate::base::{align_ptr, align_usize, OzzError, OzzMutBuf, OzzObj}; use crate::math::{f32_clamp_or_max, SoaQuat, SoaTransform, SoaVec3}; @@ -124,18 +125,18 @@ const _: () = { mod serde_interp { use serde::ser::SerializeSeq; use serde::{Deserialize, Deserializer, Serializer}; - use std::simd::prelude::*; + use wide::f32x4; pub(crate) fn serialize(value: &[f32x4; 2], serializer: S) -> Result { let mut seq = serializer.serialize_seq(Some(2))?; - seq.serialize_element(value[0].as_array())?; - seq.serialize_element(value[1].as_array())?; + seq.serialize_element(value[0].as_array_ref())?; + seq.serialize_element(value[1].as_array_ref())?; seq.end() } pub(crate) fn deserialize<'de, D: Deserializer<'de>>(deserializer: D) -> Result<[f32x4; 2], D::Error> { let tmp: [[f32; 4]; 2] = Deserialize::deserialize(deserializer)?; - Ok([f32x4::from_array(tmp[0]), f32x4::from_array(tmp[1])]) + Ok([f32x4::new(tmp[0]), f32x4::new(tmp[1])]) } } @@ -1283,7 +1284,7 @@ where #[inline(always)] fn key_ratio_simd(ctrl: &KeyframesCtrl<'_>, timepoints: &[f32], ats: &[u32]) -> f32x4 { - f32x4::from_array([ + f32x4::new([ timepoints[ctrl.ratios[ats[0] as usize] as usize], timepoints[ctrl.ratios[ats[1] as usize] as usize], timepoints[ctrl.ratios[ats[2] as usize] as usize], diff --git a/src/skeleton.rs b/src/skeleton.rs index 3fc947e..13a0664 100644 --- a/src/skeleton.rs +++ b/src/skeleton.rs @@ -409,8 +409,8 @@ const _: () = { #[cfg(test)] mod tests { - use std::simd::prelude::*; use wasm_bindgen_test::*; + use wide::f32x4; use super::*; use crate::math::{SoaQuat, SoaVec3}; @@ -425,53 +425,53 @@ mod tests { assert_eq!( skeleton.joint_rest_poses()[0].translation, SoaVec3 { - x: f32x4::from_array([-4.01047945e-10, 0.00000000, 0.0710870326, 0.110522307]), - y: f32x4::from_array([1.04666960, 0.00000000, -8.79573781e-05, -7.82728166e-05]), - z: f32x4::from_array([-0.0151103791, 0.00000000, 9.85883801e-08, -2.17094467e-10]), + x: f32x4::new([-4.01047945e-10, 0.00000000, 0.0710870326, 0.110522307]), + y: f32x4::new([1.04666960, 0.00000000, -8.79573781e-05, -7.82728166e-05]), + z: f32x4::new([-0.0151103791, 0.00000000, 9.85883801e-08, -2.17094467e-10]), }, ); assert_eq!( skeleton.joint_rest_poses()[16].translation, SoaVec3 { - x: f32x4::from_array([0.458143145, 0.117970668, 0.0849116519, 0.00000000]), - y: f32x4::from_array([2.64545919e-09, 0.148304969, 0.00000000, 0.00000000]), - z: f32x4::from_array([-4.97557555e-14, -7.47846236e-15, -1.77635680e-17, 0.00000000]), + x: f32x4::new([0.458143145, 0.117970668, 0.0849116519, 0.00000000]), + y: f32x4::new([2.64545919e-09, 0.148304969, 0.00000000, 0.00000000]), + z: f32x4::new([-4.97557555e-14, -7.47846236e-15, -1.77635680e-17, 0.00000000]), } ); assert_eq!( skeleton.joint_rest_poses()[0].rotation, SoaQuat { - x: f32x4::from_array([-0.500000000, -0.499999702, -1.41468570e-06, -3.05311332e-14]), - y: f32x4::from_array([-0.500000000, -0.500000358, -6.93941161e-07, 1.70812796e-22]), - z: f32x4::from_array([-0.500000000, -0.499999702, 0.000398159056, 1.08420217e-19]), - w: f32x4::from_array([0.500000000, 0.500000358, 1.00000000, 1.00000000]), + x: f32x4::new([-0.500000000, -0.499999702, -1.41468570e-06, -3.05311332e-14]), + y: f32x4::new([-0.500000000, -0.500000358, -6.93941161e-07, 1.70812796e-22]), + z: f32x4::new([-0.500000000, -0.499999702, 0.000398159056, 1.08420217e-19]), + w: f32x4::new([0.500000000, 0.500000358, 1.00000000, 1.00000000]), }, ); assert_eq!( skeleton.joint_rest_poses()[16].rotation, SoaQuat { - x: f32x4::from_array([-2.20410801e-09, 4.11812209e-07, -6.55128745e-32, 0.00000000]), - y: f32x4::from_array([4.60687737e-08, -4.11812152e-07, -1.30968591e-21, 0.00000000]), - z: f32x4::from_array([0.0498105064, 0.707106829, -2.46519033e-32, 0.00000000]), - w: f32x4::from_array([0.998758733, 0.707106769, 1.00000000, 1.00000000]), + x: f32x4::new([-2.20410801e-09, 4.11812209e-07, -6.55128745e-32, 0.00000000]), + y: f32x4::new([4.60687737e-08, -4.11812152e-07, -1.30968591e-21, 0.00000000]), + z: f32x4::new([0.0498105064, 0.707106829, -2.46519033e-32, 0.00000000]), + w: f32x4::new([0.998758733, 0.707106769, 1.00000000, 1.00000000]), } ); assert_eq!( skeleton.joint_rest_poses()[0].scale, SoaVec3 { - x: f32x4::from_array([1.0, 1.0, 1.0, 1.0]), - y: f32x4::from_array([1.0, 1.0, 1.0, 1.0]), - z: f32x4::from_array([1.0, 1.0, 1.0, 1.0]), + x: f32x4::new([1.0, 1.0, 1.0, 1.0]), + y: f32x4::new([1.0, 1.0, 1.0, 1.0]), + z: f32x4::new([1.0, 1.0, 1.0, 1.0]), }, ); assert_eq!( skeleton.joint_rest_poses()[16].scale, SoaVec3 { - x: f32x4::from_array([0.999999940, 1.0, 1.0, 1.0]), - y: f32x4::from_array([0.999999940, 1.0, 1.0, 1.0]), - z: f32x4::from_array([1.0, 1.0, 1.0, 1.0]), + x: f32x4::new([0.999999940, 1.0, 1.0, 1.0]), + y: f32x4::new([0.999999940, 1.0, 1.0, 1.0]), + z: f32x4::new([1.0, 1.0, 1.0, 1.0]), } );