Skip to content

Commit

Permalink
Add safe wrappers for BMI2 functions
Browse files Browse the repository at this point in the history
  • Loading branch information
kauppie committed Mar 14, 2024
1 parent a56b278 commit 5d27ef4
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 51 deletions.
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,24 @@ assert_eq!(coord, [1u16, 1u16]);
This should be faster but requires x86 specific instruction set support.

```rust
use zorder::bmi2::{coord_of, index_of};
use zorder::bmi2::{coord_of, coord_of_unchecked, HardwareSupportToken, index_of, index_of_unchecked};

// Unsafe interface with hardware support check.
if zorder::bmi2::has_hardware_support() {
let idx = unsafe { index_of([1u16, 1u16]) };
let idx = unsafe { index_of_unchecked([1u16, 1u16]) };
assert_eq!(idx, 3u32);

let coord = unsafe { coord_of(idx) };
let coord = unsafe { coord_of_unchecked(idx) };
assert_eq!(coord, [1u16, 1u16]);
}

// Safe interface with hardware support token.
let support_token = HardwareSupportToken::new();
if let Some(support_token) = support_token {
let idx = index_of([1u16, 1u16], support_token);
assert_eq!(idx, 3u32);

let coord = coord_of(idx, support_token);
assert_eq!(coord, [1u16, 1u16]);
}
```
Expand Down
24 changes: 12 additions & 12 deletions benches/bmi2_zorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,47 @@ use zorder::bmi2;

#[cfg(target_arch = "x86_64")]
fn bench_bmi2(c: &mut Criterion) {
use core::panic;
let support_token = bmi2::HardwareSupportToken::new();

if is_x86_feature_detected!("bmi2") {
if let Some(support_token) = support_token {
c.bench_function("bmi2::array_index_of_u32", |b| {
b.iter(|| unsafe { bmi2::index_of(black_box([2312347u32, 76971888u32])) })
b.iter(|| bmi2::index_of(black_box([2312347u32, 76971888u32]), support_token))
});

c.bench_function("bmi2::array_coord_of_u32", |b| {
b.iter(|| unsafe { bmi2::coord_of::<_, 2>(black_box(231234776971888u64)) })
b.iter(|| bmi2::coord_of::<_, 2>(black_box(231234776971888u64), support_token))
});

c.bench_function("bmi2::array_index_of_u16", |b| {
b.iter(|| unsafe { bmi2::index_of(black_box([2374u16, 8761u16])) })
b.iter(|| bmi2::index_of(black_box([2374u16, 8761u16]), support_token))
});

c.bench_function("bmi2::array_coord_of_u16", |b| {
b.iter(|| unsafe { bmi2::coord_of::<_, 2>(black_box(23748761u32)) })
b.iter(|| bmi2::coord_of::<_, 2>(black_box(23748761u32), support_token))
});

c.bench_function("bmi2::array_index_of_u16_dim3", |b| {
b.iter(|| unsafe { bmi2::index_of(black_box([23123u16, 1888u16, 11237u16])) })
b.iter(|| bmi2::index_of(black_box([23123u16, 1888u16, 11237u16]), support_token))
});

c.bench_function("bmi2::array_coord_of_u16_dim3", |b| {
b.iter(|| unsafe { bmi2::coord_of::<_, 3>(black_box(23123188811237u64)) })
b.iter(|| bmi2::coord_of::<_, 3>(black_box(23123188811237u64), support_token))
});

c.bench_function("bmi2::array_index_of_u8", |b| {
b.iter(|| unsafe { bmi2::index_of(black_box([237u8, 76u8])) })
b.iter(|| bmi2::index_of(black_box([237u8, 76u8]), support_token))
});

c.bench_function("bmi2::array_coord_of_u8", |b| {
b.iter(|| unsafe { bmi2::coord_of::<_, 2>(black_box(23776u16)) })
b.iter(|| bmi2::coord_of::<_, 2>(black_box(23776u16), support_token))
});

c.bench_function("bmi2::array_index_of_u8_dim3", |b| {
b.iter(|| unsafe { bmi2::index_of(black_box([23u8, 18u8, 112u8])) })
b.iter(|| bmi2::index_of(black_box([23u8, 18u8, 112u8]), support_token))
});

c.bench_function("bmi2::array_coord_of_u8_dim3", |b| {
b.iter(|| unsafe { bmi2::coord_of::<_, 3>(black_box(2318112u32)) })
b.iter(|| bmi2::coord_of::<_, 3>(black_box(2318112u32), support_token))
});
} else {
panic!("failed to benchmark: bmi2 feature is not detected");
Expand Down
4 changes: 2 additions & 2 deletions examples/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ fn main() {

println!("BMI2 implementation:");
for coord in list {
let idx = unsafe { zorder::bmi2::index_of(coord) };
let new_coord: [u8; 3] = unsafe { zorder::bmi2::coord_of(idx) };
let idx = unsafe { zorder::bmi2::index_of_unchecked(coord) };
let new_coord: [u8; 3] = unsafe { zorder::bmi2::coord_of_unchecked(idx) };

println!("{:?} => {:032b} => {:?}", coord, idx, new_coord);
}
Expand Down
112 changes: 78 additions & 34 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
//! Basic usage with software implementation:
//!
//! ```
//! use zorder::{index_of, coord_of};
//! use zorder::{coord_of, index_of};
//!
//! let idx = index_of([1u16, 1u16]);
//! assert_eq!(idx, 3u32);
Expand All @@ -27,13 +27,13 @@
//! Basic usage with bmi2 implementation:
//!
//! ```
//! use zorder::bmi2::{coord_of, index_of};
//! use zorder::bmi2::{coord_of_unchecked, index_of_unchecked};
//!
//! if zorder::bmi2::has_hardware_support() {
//! let idx = unsafe { index_of([1u16, 1u16]) };
//! let idx = unsafe { index_of_unchecked([1u16, 1u16]) };
//! assert_eq!(idx, 3u32);
//!
//! let coord = unsafe { coord_of(idx) };
//! let coord = unsafe { coord_of_unchecked(idx) };
//! assert_eq!(coord, [1u16, 1u16]);
//! }
//! ```
Expand Down Expand Up @@ -97,8 +97,11 @@ pub mod bmi2 {

/// Returns true if the CPU supports the bmi2 instruction set.
///
/// You can use this function to validate that [`zorder::bmi2::index_of`] and
/// [`zorder::bmi2::coord_of`] can be safely called.
/// You can use this function to validate that [`index_of_unchecked`] and
/// [`coord_of_unchecked`] can be safely called.
/// Optionally, you can acquire a [`HardwareSupportToken`] to ensure that
/// the CPU supports the bmi2 instruction set at runtime, and then call
/// [`index_of`] and [`coord_of`] without unsafe.
pub fn has_hardware_support() -> bool {
#[cfg(all(target_arch = "x86_64", feature = "std"))]
{
Expand All @@ -110,6 +113,40 @@ pub mod bmi2 {
}
}

/// A token that guarantees that the CPU supports the bmi2 instruction set.
///
/// You can freely copy and move this token, but you cannot create an instance
/// directly. Instead, [`HardwareSupportToken::new`] returns an instance if the
/// CPU supports the bmi2 instruction set.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct HardwareSupportToken {
_private: (),
}

impl HardwareSupportToken {
/// Returns a new instance if the CPU supports the bmi2 instruction set.
pub fn new() -> Option<Self> {
has_hardware_support().then(|| Self { _private: () })
}
}

/// Safe wrapper around [`index_of_unchecked`] that requires a
/// [`HardwareSupportToken`] to guarantee that the bmi2 instruction set is
/// supported by the CPU.
#[inline]
pub fn index_of<I, const N: usize>(
array: [I; N],
_support_token: HardwareSupportToken,
) -> <I as Interleave<N>>::Output
where
I: InterleaveBMI2<N>,
{
// SAFETY: Having an instance of `HardwareSupportToken` guarantees that
// the `bmi2` instruction set is supported by the CPU and that it is safe
// to call `index_of_unchecked`.
unsafe { index_of_unchecked(array) }
}

/// Calculates Z-order curve index for given sequence of coordinates.
///
/// Output type will be the smallest unsigned integer type that can hold all
Expand All @@ -124,36 +161,47 @@ pub mod bmi2 {
/// supported by the CPU. This can be checked at runtime:
///
/// ```
/// #[cfg(target_arch = "x86_64")]
/// {
/// if is_x86_feature_detected!("bmi2") {
/// // ...
/// }
/// if zorder::bmi2::has_hardware_support() {
/// // ...
/// }
/// ```
///
/// # Examples
///
/// ```
/// # use zorder::bmi2::index_of;
/// #[cfg(target_arch = "x86_64")]
/// {
/// if is_x86_feature_detected!("bmi2") {
/// let idx = index_of([3u32, 7u32]);
/// assert_eq!(idx, 0b101_111u64);
/// }
/// if zorder::bmi2::has_hardware_support() {
/// let idx = unsafe { index_of_unchecked([3u32, 7u32]) };
/// assert_eq!(idx, 0b101_111u64);
/// }
/// ```
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg(target_arch = "x86_64")]
pub unsafe fn index_of<I, const N: usize>(array: [I; N]) -> <I as Interleave<N>>::Output
pub unsafe fn index_of_unchecked<I, const N: usize>(
array: [I; N],
) -> <I as Interleave<N>>::Output
where
I: InterleaveBMI2<N>,
{
util::generic_index_of(array, |idx| idx.interleave_bmi2())
}

/// Safe wrapper around [`coord_of_unchecked`] that requires a
/// [`HardwareSupportToken`] to guarantee that the bmi2 instruction set is
/// supported by the CPU.
#[inline]
pub fn coord_of<I, const N: usize>(
index: I,
_support_token: HardwareSupportToken,
) -> [<I as Deinterleave<N>>::Output; N]
where
I: DeinterleaveBMI2<N> + Copy,
{
// SAFETY: Having an instance of `HardwareSupportToken` guarantees that
// the `bmi2` instruction set is supported by the CPU and that it is safe
// to call `coord_of_unchecked`.
unsafe { coord_of_unchecked(index) }
}

/// Returns the 2D coordinates of the given Z-order curve index.
///
/// Since many different 2D coordinates can be mapped to the same type `I`,
Expand All @@ -168,30 +216,26 @@ pub mod bmi2 {
/// supported by the CPU. This can be checked at runtime:
///
/// ```
/// #[cfg(target_arch = "x86_64")]
/// {
/// if is_x86_feature_detected!("bmi2") {
/// // ...
/// }
/// # use zorder::bmi2;
/// if bmi2::has_hardware_support() {
/// // ...
/// }
/// ```
///
/// # Examples
///
/// ```
/// # use zorder::bmi2::coord_of;
/// #[cfg(target_arch = "x86_64")]
/// {
/// if is_x86_feature_detected!("bmi2") {
/// let coord = coord_of(0b101_111u64);
/// assert_eq!(coord, [3u32, 7u32]);
/// }
/// # use zorder::bmi2;
/// if bmi2::has_hardware_support() {
/// let coord = unsafe { coord_of_unchecked(0b101_111u64) };
/// assert_eq!(coord, [3u32, 7u32]);
/// }
/// ```
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg(target_arch = "x86_64")]
pub unsafe fn coord_of<I, const N: usize>(index: I) -> [<I as Deinterleave<N>>::Output; N]
pub unsafe fn coord_of_unchecked<I, const N: usize>(
index: I,
) -> [<I as Deinterleave<N>>::Output; N]
where
I: DeinterleaveBMI2<N> + Copy,
{
Expand Down

0 comments on commit 5d27ef4

Please sign in to comment.