Skip to content

Commit

Permalink
Cast benchmark (#817)
Browse files Browse the repository at this point in the history
- optimized ray and shape cast : 2x faster
- update mass options instead of automatic mass
- fixes for 32-bit Windows build
- b2TreeStats for measuring query performance
- reduced tree node size from 48-bytes to 40-bytes
- fixes for 32-bit Neon
- MSVC warning level 4 and fixes
#814, #815, #813, #809
  • Loading branch information
erincatto authored Oct 6, 2024
1 parent b864f53 commit a7123be
Show file tree
Hide file tree
Showing 25 changed files with 965 additions and 251 deletions.
19 changes: 18 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ jobs:
arch: x64

- name: Configure CMake
# enkiTS is failing ASAN on windows
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF
# run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF

Expand All @@ -85,4 +84,22 @@ jobs:
- name: Test
working-directory: ${{github.workspace}}/build
run: ./bin/${{env.BUILD_TYPE}}/test

samples-windows:
name: windows
runs-on: windows-latest
steps:

- uses: actions/checkout@v4

- name: Setup MSVC dev command prompt
uses: TheMrMilchmann/setup-msvc-dev@v3
with:
arch: x64

- name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=Release -DBOX2D_SAMPLES=ON -DBUILD_SHARED_LIBS=OFF -DBOX2D_UNIT_TESTS=OFF

- name: Build
run: cmake --build ${{github.workspace}}/build --config Release

50 changes: 30 additions & 20 deletions include/box2d/box2d.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,48 +51,53 @@ B2_API b2SensorEvents b2World_GetSensorEvents( b2WorldId worldId );
B2_API b2ContactEvents b2World_GetContactEvents( b2WorldId worldId );

/// Overlap test for all shapes that *potentially* overlap the provided AABB
B2_API void b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );
B2_API b2TreeStats b2World_OverlapAABB( b2WorldId worldId, b2AABB aabb, b2QueryFilter filter, b2OverlapResultFcn* fcn,
void* context );

/// Overlap test for for all shapes that overlap the provided circle
B2_API void b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform, b2QueryFilter filter,
b2OverlapResultFcn* fcn, void* context );
B2_API b2TreeStats b2World_OverlapCircle( b2WorldId worldId, const b2Circle* circle, b2Transform transform,
b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );

/// Overlap test for all shapes that overlap the provided capsule
B2_API void b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform, b2QueryFilter filter,
b2OverlapResultFcn* fcn, void* context );
B2_API b2TreeStats b2World_OverlapCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform transform,
b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );

/// Overlap test for all shapes that overlap the provided polygon
B2_API void b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform, b2QueryFilter filter,
b2OverlapResultFcn* fcn, void* context );
B2_API b2TreeStats b2World_OverlapPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform transform,
b2QueryFilter filter, b2OverlapResultFcn* fcn, void* context );

/// Cast a ray into the world to collect shapes in the path of the ray.
/// Your callback function controls whether you get the closest point, any point, or n-points.
/// The ray-cast ignores shapes that contain the starting point.
/// @note The callback function may receive shapes in any order
/// @param worldId The world to cast the ray against
/// @param origin The start point of the ray
/// @param translation The translation of the ray from the start point to the end point
/// @param filter Contains bit flags to filter unwanted shapes from the results
/// @param fcn A user implemented callback function
/// @param context A user context that is passed along to the callback function
/// @note The callback function may receive shapes in any order
B2_API void b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn,
void* context );
/// @return traversal performance counters
B2_API b2TreeStats b2World_CastRay( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter,
b2CastResultFcn* fcn, void* context );

/// Cast a ray into the world to collect the closest hit. This is a convenience function.
/// This is less general than b2World_CastRay() and does not allow for custom filtering.
B2_API b2RayResult b2World_CastRayClosest( b2WorldId worldId, b2Vec2 origin, b2Vec2 translation, b2QueryFilter filter );

/// Cast a circle through the world. Similar to a cast ray except that a circle is cast instead of a point.
B2_API void b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform, b2Vec2 translation,
b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
/// @see b2World_CastRay
B2_API b2TreeStats b2World_CastCircle( b2WorldId worldId, const b2Circle* circle, b2Transform originTransform,
b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context );

/// Cast a capsule through the world. Similar to a cast ray except that a capsule is cast instead of a point.
B2_API void b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform, b2Vec2 translation,
b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
/// @see b2World_CastRay
B2_API b2TreeStats b2World_CastCapsule( b2WorldId worldId, const b2Capsule* capsule, b2Transform originTransform,
b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context );

/// Cast a polygon through the world. Similar to a cast ray except that a polygon is cast instead of a point.
B2_API void b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform, b2Vec2 translation,
b2QueryFilter filter, b2CastResultFcn* fcn, void* context );
/// @see b2World_CastRay
B2_API b2TreeStats b2World_CastPolygon( b2WorldId worldId, const b2Polygon* polygon, b2Transform originTransform,
b2Vec2 translation, b2QueryFilter filter, b2CastResultFcn* fcn, void* context );

/// Enable/disable sleep. If your application does not need sleeping, you can gain some performance
/// by disabling sleep completely at the world level.
Expand Down Expand Up @@ -183,6 +188,9 @@ B2_API b2Counters b2World_GetCounters( b2WorldId worldId );
/// Dump memory stats to box2d_memory.txt
B2_API void b2World_DumpMemoryStats( b2WorldId worldId );

/// todo testing
B2_API void b2World_RebuildStaticTree( b2WorldId worldId );

/** @} */

/**
Expand Down Expand Up @@ -466,8 +474,10 @@ B2_API b2ShapeId b2CreateCapsuleShape( b2BodyId bodyId, const b2ShapeDef* def, c
/// @return the shape id for accessing the shape
B2_API b2ShapeId b2CreatePolygonShape( b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon );

/// Destroy a shape
B2_API void b2DestroyShape( b2ShapeId shapeId );
/// Destroy a shape. You may defer the body mass update which can improve performance if several shapes on a
/// body are destroyed at once.
/// @see b2Body_ApplyMassFromShapes
B2_API void b2DestroyShape( b2ShapeId shapeId, bool updateBodyMass );

/// Shape identifier validation. Provides validation for up to 64K allocations.
B2_API bool b2Shape_IsValid( b2ShapeId id );
Expand All @@ -492,9 +502,9 @@ B2_API void b2Shape_SetUserData( b2ShapeId shapeId, void* userData );
B2_API void* b2Shape_GetUserData( b2ShapeId shapeId );

/// Set the mass density of a shape, typically in kg/m^2.
/// This will not update the mass properties on the parent body.
/// This will optionally update the mass properties on the parent body.
/// @see b2ShapeDef::density, b2Body_ApplyMassFromShapes
B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density );
B2_API void b2Shape_SetDensity( b2ShapeId shapeId, float density, bool updateBodyMass );

/// Get the density of a shape, typically in kg/m^2
B2_API float b2Shape_GetDensity( b2ShapeId shapeId );
Expand Down
94 changes: 51 additions & 43 deletions include/box2d/collision.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ typedef struct b2Hull b2Hull;
/// don't use more vertices.
#define b2_maxPolygonVertices 8

/// Low level ray-cast input data
/// Low level ray cast input data
typedef struct b2RayCastInput
{
/// Start point of the ray cast
Expand Down Expand Up @@ -63,7 +63,7 @@ typedef struct b2ShapeCastInput
float maxFraction;
} b2ShapeCastInput;

/// Low level ray-cast or shape-cast output data
/// Low level ray cast or shape-cast output data
typedef struct b2CastOutput
{
/// The surface normal at the hit point
Expand Down Expand Up @@ -566,16 +566,16 @@ B2_API b2Manifold b2CollideSegmentAndPolygon( const b2Segment* segmentA, b2Trans
b2Transform xfB );

/// Compute the contact manifold between a chain segment and a circle
B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA,
const b2Circle* circleB, b2Transform xfB );
B2_API b2Manifold b2CollideChainSegmentAndCircle( const b2ChainSegment* segmentA, b2Transform xfA, const b2Circle* circleB,
b2Transform xfB );

/// Compute the contact manifold between a chain segment and a capsule
B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA,
const b2Capsule* capsuleB, b2Transform xfB, b2DistanceCache* cache );
B2_API b2Manifold b2CollideChainSegmentAndCapsule( const b2ChainSegment* segmentA, b2Transform xfA, const b2Capsule* capsuleB,
b2Transform xfB, b2DistanceCache* cache );

/// Compute the contact manifold between a chain segment and a rounded polygon
B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA,
const b2Polygon* polygonB, b2Transform xfB, b2DistanceCache* cache );
B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segmentA, b2Transform xfA, const b2Polygon* polygonB,
b2Transform xfB, b2DistanceCache* cache );

/**@}*/

Expand All @@ -602,8 +602,7 @@ B2_API b2Manifold b2CollideChainSegmentAndPolygon( const b2ChainSegment* segment
/// The default category bit for a tree proxy. Used for collision filtering.
#define b2_defaultCategoryBits ( 1 )

/// Convenience mask bits to use when you don't need collision filtering and just want
/// all results.
/// Convenience mask bits to use when you don't need collision filtering and just want all results.
#define b2_defaultMaskBits ( UINT64_MAX )

/// A node in the dynamic tree. This is private data placed here for performance reasons.
Expand All @@ -617,31 +616,27 @@ typedef struct b2TreeNode

union
{
/// The node parent index
/// The node parent index (allocated node)
int32_t parent;

/// The node freelist next index
/// The node freelist next index (free node)
int32_t next;
}; // 4

/// Child 1 index
/// Child 1 index (internal node)
int32_t child1; // 4

/// Child 2 index
int32_t child2; // 4

/// User data
// todo could be union with child index
int32_t userData; // 4

/// Leaf = 0, free node = -1
int16_t height; // 2
union
{
/// Child 2 index (internal node)
int32_t child2;

/// Has the AABB been enlarged?
bool enlarged; // 1
/// User data (leaf node)
int32_t userData;
}; // 4

/// Padding for clarity
char pad[5];
uint16_t height; // 2
uint16_t flags; // 2
} b2TreeNode;

/// The dynamic tree structure. This should be considered private data.
Expand Down Expand Up @@ -682,6 +677,16 @@ typedef struct b2DynamicTree
int32_t rebuildCapacity;
} b2DynamicTree;

/// These are performance results returned by dynamic tree queries.
typedef struct b2TreeStats
{
/// Number of internal nodes visited during the query
int32_t nodeVisits;

/// Number of leaf nodes visited during the query
int32_t leafVisits;
} b2TreeStats;

/// Constructing the tree initializes the node pool.
B2_API b2DynamicTree b2DynamicTree_Create( void );

Expand All @@ -705,49 +710,53 @@ B2_API void b2DynamicTree_EnlargeProxy( b2DynamicTree* tree, int32_t proxyId, b2
typedef bool b2TreeQueryCallbackFcn( int32_t proxyId, int32_t userData, void* context );

/// Query an AABB for overlapping proxies. The callback class is called for each proxy that overlaps the supplied AABB.
B2_API void b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits, b2TreeQueryCallbackFcn* callback,
void* context );
/// @return performance data
B2_API b2TreeStats b2DynamicTree_Query( const b2DynamicTree* tree, b2AABB aabb, uint64_t maskBits,
b2TreeQueryCallbackFcn* callback, void* context );

/// This function receives clipped raycast input for a proxy. The function
/// This function receives clipped ray cast input for a proxy. The function
/// returns the new ray fraction.
/// - return a value of 0 to terminate the ray cast
/// - return a value less than input->maxFraction to clip the ray
/// - return a value of input->maxFraction to continue the ray cast without clipping
typedef float b2TreeRayCastCallbackFcn( const b2RayCastInput* input, int32_t proxyId, int32_t userData, void* context );

/// Ray-cast against the proxies in the tree. This relies on the callback
/// to perform a exact ray-cast in the case were the proxy contains a shape.
/// Ray cast against the proxies in the tree. This relies on the callback
/// to perform a exact ray cast in the case were the proxy contains a shape.
/// The callback also performs the any collision filtering. This has performance
/// roughly equal to k * log(n), where k is the number of collisions and n is the
/// number of proxies in the tree.
/// Bit-wise filtering using mask bits can greatly improve performance in some scenarios.
/// However, this filtering may be approximate, so the user should still apply filtering to results.
/// @param tree the dynamic tree to ray cast
/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1)
/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;`
/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1)
/// @param maskBits mask bit hint: `bool accept = (maskBits & node->categoryBits) != 0;`
/// @param callback a callback class that is called for each proxy that is hit by the ray
/// @param context user context that is passed to the callback
B2_API void b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits,
b2TreeRayCastCallbackFcn* callback, void* context );
/// @return performance data
B2_API b2TreeStats b2DynamicTree_RayCast( const b2DynamicTree* tree, const b2RayCastInput* input, uint64_t maskBits,
b2TreeRayCastCallbackFcn* callback, void* context );

/// This function receives clipped ray-cast input for a proxy. The function
/// This function receives clipped ray cast input for a proxy. The function
/// returns the new ray fraction.
/// - return a value of 0 to terminate the ray-cast
/// - return a value of 0 to terminate the ray cast
/// - return a value less than input->maxFraction to clip the ray
/// - return a value of input->maxFraction to continue the ray cast without clipping
typedef float b2TreeShapeCastCallbackFcn( const b2ShapeCastInput* input, int32_t proxyId, int32_t userData, void* context );

/// Ray-cast against the proxies in the tree. This relies on the callback
/// to perform a exact ray-cast in the case were the proxy contains a shape.
/// Ray cast against the proxies in the tree. This relies on the callback
/// to perform a exact ray cast in the case were the proxy contains a shape.
/// The callback also performs the any collision filtering. This has performance
/// roughly equal to k * log(n), where k is the number of collisions and n is the
/// number of proxies in the tree.
/// @param tree the dynamic tree to ray cast
/// @param input the ray-cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1).
/// @param input the ray cast input data. The ray extends from p1 to p1 + maxFraction * (p2 - p1).
/// @param maskBits filter bits: `bool accept = (maskBits & node->categoryBits) != 0;`
/// @param callback a callback class that is called for each proxy that is hit by the shape
/// @param context user context that is passed to the callback
B2_API void b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits,
b2TreeShapeCastCallbackFcn* callback, void* context );
/// @return performance data
B2_API b2TreeStats b2DynamicTree_ShapeCast( const b2DynamicTree* tree, const b2ShapeCastInput* input, uint64_t maskBits,
b2TreeShapeCastCallbackFcn* callback, void* context );

/// Validate this tree. For testing.
B2_API void b2DynamicTree_Validate( const b2DynamicTree* tree );
Expand Down Expand Up @@ -781,7 +790,6 @@ B2_API void b2DynamicTree_ShiftOrigin( b2DynamicTree* tree, b2Vec2 newOrigin );
B2_API int b2DynamicTree_GetByteCount( const b2DynamicTree* tree );

/// Get proxy user data
/// @return the proxy user data or 0 if the id is invalid
B2_INLINE int32_t b2DynamicTree_GetUserData( const b2DynamicTree* tree, int32_t proxyId )
{
return tree->nodes[proxyId].userData;
Expand Down
9 changes: 5 additions & 4 deletions include/box2d/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ typedef struct b2RayResult
b2Vec2 point;
b2Vec2 normal;
float fraction;
int nodeVisits;
int leafVisits;
bool hit;
} b2RayResult;

Expand Down Expand Up @@ -220,10 +222,6 @@ typedef struct b2BodyDef
/// Used to disable a body. A disabled body does not move or collide.
bool isEnabled;

/// Automatically compute mass and related properties on this body from shapes.
/// Triggers whenever a shape is add/removed/changed. Default is true.
bool automaticMass;

/// This allows this body to bypass rotational speed limits. Should only be used
/// for circular objects, like wheels.
bool allowFastRotation;
Expand Down Expand Up @@ -367,6 +365,9 @@ typedef struct b2ShapeDef
/// This is implicitly always true for sensors.
bool forceContactCreation;

/// Should the body update the mass properties when this shape is created. Default is true.
bool updateBodyMass;

/// Used internally to detect a valid definition. DO NOT SET.
int32_t internalValue;
} b2ShapeDef;
Expand Down
4 changes: 2 additions & 2 deletions samples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void* AllocFcn( uint32_t size, int32_t alignment )
size_t sizeAligned = ( ( size - 1 ) | ( alignment - 1 ) ) + 1;
assert( ( sizeAligned & ( alignment - 1 ) ) == 0 );

#if defined( _WIN64 )
#if defined( _WIN64 ) || defined( _WIN32 )
void* ptr = _aligned_malloc( sizeAligned, alignment );
#else
void* ptr = aligned_alloc( alignment, sizeAligned );
Expand All @@ -79,7 +79,7 @@ void* AllocFcn( uint32_t size, int32_t alignment )

void FreeFcn( void* mem )
{
#if defined( _WIN64 )
#if defined( _WIN64 ) || defined( _WIN32 )
_aligned_free( mem );
#else
free( mem );
Expand Down
Loading

0 comments on commit a7123be

Please sign in to comment.