From 4f2301f88854cc7cc6ed1272546d59b61c1e2196 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 6 Aug 2023 23:15:23 -0700 Subject: [PATCH 01/51] wip --- include/box2d/callbacks.h | 2 +- include/box2d/manifold.h | 2 +- samples/collection/benchmark_pyramid.cpp | 4 +- samples/sample.cpp | 18 ++- samples/sample.h | 3 +- src/CMakeLists.txt | 2 + src/bitset.h | 21 +++ src/body.c | 5 + src/contact.c | 10 +- src/contact.h | 3 + src/graph.c | 160 +++++++++++++++++++++++ src/graph.h | 35 +++++ src/world.c | 2 + src/world.h | 2 + 14 files changed, 259 insertions(+), 10 deletions(-) create mode 100644 src/graph.c create mode 100644 src/graph.h diff --git a/include/box2d/callbacks.h b/include/box2d/callbacks.h index d1e4b653..05ce7038 100644 --- a/include/box2d/callbacks.h +++ b/include/box2d/callbacks.h @@ -48,7 +48,7 @@ typedef void b2EndContactFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, void* conte /// get an EndContact callback. However, you may get a BeginContact callback /// the next step. /// - the supplied manifold has impulse values from the previous frame -typedef bool b2PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, void* context); +typedef bool b2PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color, void* context); BOX2D_API void b2World_SetPreSolveCallback(b2WorldId worldId, b2PreSolveFcn* fcn, void* context); /// This lets you inspect a contact after the solver is finished. This is useful diff --git a/include/box2d/manifold.h b/include/box2d/manifold.h index 71c4149a..740ba1c4 100644 --- a/include/box2d/manifold.h +++ b/include/box2d/manifold.h @@ -41,7 +41,7 @@ typedef struct b2ManifoldPoint bool persisted; } b2ManifoldPoint; -/// Conact manifold convex shapes. +/// Contact manifold convex shapes. typedef struct b2Manifold { b2ManifoldPoint points[b2_maxManifoldPoints]; diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index b6c4c04a..ee7532b1 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -23,7 +23,7 @@ class BenchmarkPyramid : public Sample m_round = 0.0f; m_baseCount = 10; m_rowCount = g_sampleDebug ? 1 : 16; - m_columnCount = g_sampleDebug ? 4 : 16; + m_columnCount = g_sampleDebug ? 1 : 16; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; @@ -63,7 +63,7 @@ class BenchmarkPyramid : public Sample for (int32_t j = i; j < m_baseCount; ++j) { - float x = (i + 1.0f) * m_extent + 2.0f * (j - i) * m_extent + centerX; + float x = (i + 1.0f) * m_extent + 2.25f * (j - i) * m_extent + centerX - 0.5f; bodyDef.position = {x, y}; assert(m_bodyIndex < m_bodyCount); diff --git a/samples/sample.cpp b/samples/sample.cpp index 5472f8d4..2d9725be 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -16,10 +16,10 @@ #include #include -bool PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, void* context) +bool PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color, void* context) { Sample* sample = static_cast(context); - return sample->PreSolve(shapeIdA, shapeIdB, manifold); + return sample->PreSolve(shapeIdA, shapeIdB, manifold, color); } static void* EnqueueTask(b2TaskCallback* task, int32_t itemCount, int32_t minRange, void* taskContext, void* userContext) @@ -336,11 +336,20 @@ void Sample::Step(Settings& settings) b2Color addColor = {0.3f, 0.95f, 0.3f, 1.0f}; b2Color persistColor = {0.3f, 0.3f, 0.95f, 1.0f}; + b2HexColor colors[8] = {b2_colorAquamarine, b2_colorBisque, b2_colorBlue, b2_colorBrown, + b2_colorBurlywood, b2_colorCadetBlue, b2_colorChartreuse, b2_colorChocolate}; + for (int32_t i = 0; i < m_pointCount; ++i) { ContactPoint* point = m_points + i; - if (point->separation > b2_linearSlop) + if (0 <= point->color && point->color < 8) + { + // graph color + g_draw.DrawPoint(point->position, 5.0f, b2MakeColor(colors[point->color], 1.0f)); + g_draw.DrawString(point->position, "%d", point->color); + } + else if (point->separation > b2_linearSlop) { // Speculative g_draw.DrawPoint(point->position, 5.0f, speculativeColor); @@ -388,7 +397,7 @@ void Sample::ShiftOrigin(b2Vec2 newOrigin) } // Thread-safe callback -bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold) +bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color) { long startCount = m_pointCount.fetch_add(manifold->pointCount); if (startCount >= k_maxContactPoints) @@ -411,6 +420,7 @@ bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifo cp->normalImpulse = manifold->points[j].normalImpulse; cp->tangentImpulse = manifold->points[j].tangentImpulse; cp->persisted = manifold->points[j].persisted; + cp->color = color; ++j; } diff --git a/samples/sample.h b/samples/sample.h index 9f90ca8f..59d8542a 100644 --- a/samples/sample.h +++ b/samples/sample.h @@ -70,6 +70,7 @@ struct ContactPoint float normalImpulse; float tangentImpulse; float separation; + int32_t color; }; class SampleTask : public enki::ITaskSet @@ -112,7 +113,7 @@ class Sample void ResetProfile(); void ShiftOrigin(b2Vec2 newOrigin); - bool PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold); + bool PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color); friend class DestructionListener; friend class BoundaryListener; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4f553c88..4affce80 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,8 @@ set(BOX2D_SOURCE_FILES distance.c dynamic_tree.c geometry.c + graph.c + graph.h hull.c island.c island.h diff --git a/src/bitset.h b/src/bitset.h index 3d240eeb..5298e2de 100644 --- a/src/bitset.h +++ b/src/bitset.h @@ -24,10 +24,31 @@ void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB); static inline void b2SetBit(b2BitSet* bitSet, uint32_t bitIndex) { uint32_t wordIndex = bitIndex / 64; + // TODO_ERIN support growing B2_ASSERT(wordIndex < bitSet->wordCount); bitSet->bits[wordIndex] |= ((uint64_t)1) << (bitIndex % 64); } +static inline void b2ClearBit(b2BitSet* bitSet, uint32_t bitIndex) +{ + uint32_t wordIndex = bitIndex / 64; + if (wordIndex >= bitSet->wordCount) + { + return; + } + bitSet->bits[wordIndex] &= ~(((uint64_t)1) << (bitIndex % 64)); +} + +static inline bool b2GetBit(const b2BitSet* bitSet, uint32_t bitIndex) +{ + uint32_t wordIndex = bitIndex / 64; + if (wordIndex >= bitSet->wordCount) + { + return false; + } + return (bitSet->bits[wordIndex] & ((uint64_t)1) << (bitIndex % 64)) != 0; +} + #if defined(_MSC_VER) && !defined(__clang__) #include diff --git a/src/body.c b/src/body.c index 25b0d10b..8bd1e9be 100644 --- a/src/body.c +++ b/src/body.c @@ -9,6 +9,7 @@ #include "body.h" #include "contact.h" #include "core.h" +#include "graph.h" #include "island.h" #include "joint.h" #include "world.h" @@ -127,6 +128,10 @@ void b2World_DestroyBody(b2BodyId bodyId) int32_t twinIndex = twinKey & 1; b2Contact* contact = world->contacts + contactIndex; + + // TODO_ERIN could pass bodies + b2RemoveContactFromGraph(world, &world->graph, contact); + b2ContactEdge* twin = contact->edges + twinIndex; // Remove contact from other body's doubly linked list diff --git a/src/contact.c b/src/contact.c index 1413d179..4fe2e3fd 100644 --- a/src/contact.c +++ b/src/contact.c @@ -197,6 +197,8 @@ void b2CreateContact(b2World* world, b2Shape* shapeA, b2Shape* shapeB) contact->islandIndex = B2_NULL_INDEX; contact->islandPrev = B2_NULL_INDEX; contact->islandNext = B2_NULL_INDEX; + contact->colorContactIndex = B2_NULL_INDEX; + contact->colorIndex = B2_NULL_INDEX; b2Body* bodyA = world->bodies + shapeA->bodyIndex; b2Body* bodyB = world->bodies + shapeB->bodyIndex; @@ -254,6 +256,9 @@ void b2CreateContact(b2World* world, b2Shape* shapeA, b2Shape* shapeB) // Add to pair set for fast lookup uint64_t pairKey = B2_SHAPE_PAIR_KEY(contact->shapeIndexA, contact->shapeIndexB); b2AddKey(&world->broadPhase.pairSet, pairKey); + + // TODO_ERIN could pass bodies + b2AddContactToGraph(world, &world->graph, contact); } void b2DestroyContact(b2World* world, b2Contact* contact) @@ -268,6 +273,9 @@ void b2DestroyContact(b2World* world, b2Contact* contact) b2Body* bodyA = world->bodies + edgeA->bodyIndex; b2Body* bodyB = world->bodies + edgeB->bodyIndex; + // TODO_ERIN pass bodies + b2RemoveContactFromGraph(world, &world->graph, contact); + // if (contactListener && contact->IsTouching()) //{ // contactListener->EndContact(contact); @@ -439,7 +447,7 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body if (touching && world->preSolveFcn) { // TODO_ERIN this call assumes thread safety - bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, world->preSolveContext); + bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, contact->colorIndex, world->preSolveContext); if (collide == false) { // disable contact diff --git a/src/contact.h b/src/contact.h index 0b2018d4..f59e4987 100644 --- a/src/contact.h +++ b/src/contact.h @@ -66,6 +66,9 @@ typedef struct b2Contact // This is too hot and has been moved to a separate array //int32_t awakeIndex; + int32_t colorIndex; + int32_t colorContactIndex; + b2ContactEdge edges[2]; int32_t shapeIndexA; diff --git a/src/graph.c b/src/graph.c new file mode 100644 index 00000000..97800721 --- /dev/null +++ b/src/graph.c @@ -0,0 +1,160 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#include "graph.h" + +#include "allocate.h" +#include "array.h" +#include "body.h" +#include "contact.h" +#include "core.h" +#include "shape.h" +#include "stack_allocator.h" +#include "world.h" + +#include + +void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) +{ + bodyCapacity = B2_MAX(bodyCapacity, 8); + contactCapacity = B2_MAX(contactCapacity, 8); + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + color->bodySet = b2CreateBitSet(bodyCapacity); + b2SetBitCountAndClear(&color->bodySet, bodyCapacity); + + color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); + } +} + +void b2DestroyGraph(b2Graph* graph) +{ + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + b2DestroyBitSet(&color->bodySet); + b2DestroyArray(color->contactArray, sizeof(int32_t)); + } +} + +void b2AddContactToGraph(b2World* world, b2Graph* graph, b2Contact* contact) +{ + B2_ASSERT(contact->colorContactIndex == B2_NULL_INDEX); + B2_ASSERT(contact->colorIndex == B2_NULL_INDEX); + + int32_t bodyIndexA = contact->edges[0].bodyIndex; + int32_t bodyIndexB = contact->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA) || b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBit(&color->bodySet, bodyIndexA); + b2SetBit(&color->bodySet, bodyIndexB); + + contact->colorContactIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + else if (typeA == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA)) + { + continue; + } + + b2SetBit(&color->bodySet, bodyIndexA); + + contact->colorContactIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + else if (typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBit(&color->bodySet, bodyIndexB); + + contact->colorContactIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } +} + +void b2RemoveContactFromGraph(b2World* world, b2Graph* graph, b2Contact* contact) +{ + if (contact->colorIndex == B2_NULL_INDEX) + { + return; + } + + B2_ASSERT(0 <= contact->colorIndex && contact->colorIndex < b2_graphColorCount); + int32_t bodyIndexA = contact->edges[0].bodyIndex; + int32_t bodyIndexB = contact->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + b2GraphColor* color = graph->colors + contact->colorIndex; + + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); + + b2ClearBit(&color->bodySet, bodyIndexA); + b2ClearBit(&color->bodySet, bodyIndexB); + } + else if (typeA == b2_dynamicBody) + { + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + + b2ClearBit(&color->bodySet, bodyIndexA); + } + else if (typeB == b2_dynamicBody) + { + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + + b2ClearBit(&color->bodySet, bodyIndexB); + } +} + +void b2SolveGraph(b2World* world, b2Graph* graph) +{ + B2_MAYBE_UNUSED(world); + B2_MAYBE_UNUSED(graph); +} diff --git a/src/graph.h b/src/graph.h new file mode 100644 index 00000000..5c9b7386 --- /dev/null +++ b/src/graph.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#pragma once + +#include "array.h" +#include "bitset.h" +#include "table.h" + +#include "box2d/dynamic_tree.h" + +typedef struct b2Contact b2Contact; +typedef struct b2World b2World; + +#define b2_graphColorCount 8 + +typedef struct b2GraphColor +{ + b2BitSet bodySet; + int32_t* contactArray; +} b2GraphColor; + +typedef struct b2Graph +{ + b2GraphColor colors[b2_graphColorCount]; + +} b2Graph; + +void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); +void b2DestroyGraph(b2Graph* graph); + +void b2AddContactToGraph(b2World* world, b2Graph* graph, b2Contact* contact); +void b2RemoveContactFromGraph(b2World* world, b2Graph* graph, b2Contact* contact); + +void b2SolveGraph(b2World* world, b2Graph* graph); diff --git a/src/world.c b/src/world.c index d21451e4..a2f81c8a 100644 --- a/src/world.c +++ b/src/world.c @@ -96,6 +96,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->stackAllocator = b2CreateStackAllocator(def->stackAllocatorCapacity); b2CreateBroadPhase(&world->broadPhase); + b2CreateGraph(&world->graph, def->bodyCapacity, def->contactCapacity); // pools world->bodyPool = b2CreatePool(sizeof(b2Body), B2_MAX(def->bodyCapacity, 1)); @@ -200,6 +201,7 @@ void b2DestroyWorld(b2WorldId id) b2DestroyPool(&world->shapePool); b2DestroyPool(&world->bodyPool); + b2DestroyGraph(&world->graph); b2DestroyBroadPhase(&world->broadPhase); b2DestroyBlockAllocator(world->blockAllocator); diff --git a/src/world.h b/src/world.h index 09e79151..05ddaf35 100644 --- a/src/world.h +++ b/src/world.h @@ -6,6 +6,7 @@ #include "bitset.h" #include "broad_phase.h" #include "island.h" +#include "graph.h" #include "pool.h" #include "box2d/callbacks.h" @@ -39,6 +40,7 @@ typedef struct b2World struct b2StackAllocator* stackAllocator; b2BroadPhase broadPhase; + b2Graph graph; b2Pool bodyPool; b2Pool contactPool; From 5136235f40519273bba711cc14ac9004c736dfbd Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 11 Aug 2023 22:45:00 -0700 Subject: [PATCH 02/51] wip --- include/box2d/debug_draw.h | 3 + samples/collection/benchmark_pyramid.cpp | 3 + samples/draw.cpp | 22 +--- src/bitset.h | 6 +- src/body.c | 6 +- src/contact.c | 9 +- src/graph.c | 144 +++++++++++++++++++++-- src/graph.h | 7 +- src/world.c | 9 ++ test/CMakeLists.txt | 1 + test/main.c | 2 + test/test_bitset.c | 39 ++++++ test/test_determinism.c | 2 + 13 files changed, 216 insertions(+), 37 deletions(-) create mode 100644 test/test_bitset.c diff --git a/include/box2d/debug_draw.h b/include/box2d/debug_draw.h index 2c54aa95..2a86be0e 100644 --- a/include/box2d/debug_draw.h +++ b/include/box2d/debug_draw.h @@ -39,6 +39,9 @@ typedef struct b2DebugDraw /// Draw a point. void (*DrawPoint)(b2Vec2 p, float size, b2Color color, void* context); + /// Draw a string. + void (*DrawString)(b2Vec2 p, const char* s, void* context); + bool drawShapes; bool drawJoints; bool drawAABBs; diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index ee7532b1..a48bfa19 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -63,7 +63,9 @@ class BenchmarkPyramid : public Sample for (int32_t j = i; j < m_baseCount; ++j) { + //float x = (1.5f * i + 1.0f) * m_extent + 3.0f * (j - i) * m_extent + centerX - 0.5f; float x = (i + 1.0f) * m_extent + 2.25f * (j - i) * m_extent + centerX - 0.5f; + bodyDef.position = {x, y}; assert(m_bodyIndex < m_bodyCount); @@ -105,6 +107,7 @@ class BenchmarkPyramid : public Sample for (int32_t i = 0; i < m_rowCount; ++i) { b2Segment segment = {{-0.5f * groundWidth, groundY}, {0.5f * groundWidth, groundY}}; + //b2Segment segment = {{-0.5f * 2.0f * groundWidth, groundY}, {0.5f * 2.0f * groundWidth, groundY}}; b2Body_CreateSegment(m_groundId, &shapeDef, &segment); groundY += groundDeltaY; } diff --git a/samples/draw.cpp b/samples/draw.cpp index 42bce19a..1dfbe0c6 100644 --- a/samples/draw.cpp +++ b/samples/draw.cpp @@ -860,7 +860,11 @@ void DrawPointFcn(b2Vec2 p, float size, b2Color color, void* context) static_cast(context)->DrawPoint(p, size, color); } -// +void DrawStringFcn(b2Vec2 p, const char* s, void* context) +{ + static_cast(context)->DrawString(p, s); +} + Draw::Draw() { m_showUI = true; @@ -871,7 +875,6 @@ Draw::Draw() m_debugDraw = {}; } -// Draw::~Draw() { assert(m_points == nullptr); @@ -879,7 +882,6 @@ Draw::~Draw() assert(m_triangles == nullptr); } -// void Draw::Create() { m_points = static_cast(malloc(sizeof(GLRenderPoints))); @@ -901,6 +903,7 @@ void Draw::Create() DrawSegmentFcn, DrawTransformFcn, DrawPointFcn, + DrawStringFcn, true, true, false, @@ -908,7 +911,6 @@ void Draw::Create() this}; } -// void Draw::Destroy() { m_points->Destroy(); @@ -928,7 +930,6 @@ void Draw::Destroy() m_roundedTriangles = nullptr; } -// void Draw::DrawPolygon(const b2Vec2* vertices, int32_t vertexCount, b2Color color) { b2Vec2 p1 = vertices[vertexCount - 1]; @@ -941,7 +942,6 @@ void Draw::DrawPolygon(const b2Vec2* vertices, int32_t vertexCount, b2Color colo } } -// void Draw::DrawSolidPolygon(const b2Vec2* vertices, int32_t vertexCount, b2Color color) { b2Color fillColor = {0.5f * color.r, 0.5f * color.g, 0.5f * color.b, 0.5f}; @@ -1038,7 +1038,6 @@ void Draw::DrawRoundedPolygon(const b2Vec2* vertices, int32_t count, float radiu } } -// void Draw::DrawCircle(b2Vec2 center, float radius, b2Color color) { const float k_segments = 32.0f; @@ -1061,7 +1060,6 @@ void Draw::DrawCircle(b2Vec2 center, float radius, b2Color color) } } -// void Draw::DrawSolidCircle(b2Vec2 center, float radius, b2Vec2 axis, b2Color color) { b2Color fillColor = {0.5f * color.r, 0.5f * color.g, 0.5f * color.b, 0.5f}; @@ -1166,7 +1164,6 @@ void Draw::DrawCapsule(b2Vec2 p1, b2Vec2 p2, float radius, b2Color color) m_lines->Vertex(p2, color); } -// void Draw::DrawSolidCapsule(b2Vec2 p1, b2Vec2 p2, float radius, b2Color color) { float length; @@ -1267,14 +1264,12 @@ void Draw::DrawSolidCapsule(b2Vec2 p1, b2Vec2 p2, float radius, b2Color color) m_lines->Vertex(p2, color); } -// void Draw::DrawSegment(b2Vec2 p1, b2Vec2 p2, b2Color color) { m_lines->Vertex(p1, color); m_lines->Vertex(p2, color); } -// void Draw::DrawTransform(b2Transform xf) { const float k_axisScale = 0.4f; @@ -1291,13 +1286,11 @@ void Draw::DrawTransform(b2Transform xf) m_lines->Vertex(p2, green); } -// void Draw::DrawPoint(b2Vec2 p, float size, b2Color color) { m_points->Vertex(p, color, size); } -// void Draw::DrawString(int x, int y, const char* string, ...) { // if (m_showUI == false) @@ -1316,7 +1309,6 @@ void Draw::DrawString(int x, int y, const char* string, ...) va_end(arg); } -// void Draw::DrawString(b2Vec2 pw, const char* string, ...) { b2Vec2 ps = g_camera.ConvertWorldToScreen(pw); @@ -1332,7 +1324,6 @@ void Draw::DrawString(b2Vec2 pw, const char* string, ...) va_end(arg); } -// void Draw::DrawAABB(b2AABB aabb, b2Color c) { b2Vec2 p1 = aabb.lowerBound; @@ -1353,7 +1344,6 @@ void Draw::DrawAABB(b2AABB aabb, b2Color c) m_lines->Vertex(p1, c); } -// void Draw::Flush() { m_roundedTriangles->Flush(); diff --git a/src/bitset.h b/src/bitset.h index 5298e2de..f4fefacf 100644 --- a/src/bitset.h +++ b/src/bitset.h @@ -26,7 +26,7 @@ static inline void b2SetBit(b2BitSet* bitSet, uint32_t bitIndex) uint32_t wordIndex = bitIndex / 64; // TODO_ERIN support growing B2_ASSERT(wordIndex < bitSet->wordCount); - bitSet->bits[wordIndex] |= ((uint64_t)1) << (bitIndex % 64); + bitSet->bits[wordIndex] |= ((uint64_t)1 << bitIndex % 64); } static inline void b2ClearBit(b2BitSet* bitSet, uint32_t bitIndex) @@ -36,7 +36,7 @@ static inline void b2ClearBit(b2BitSet* bitSet, uint32_t bitIndex) { return; } - bitSet->bits[wordIndex] &= ~(((uint64_t)1) << (bitIndex % 64)); + bitSet->bits[wordIndex] &= ~((uint64_t)1 << bitIndex % 64); } static inline bool b2GetBit(const b2BitSet* bitSet, uint32_t bitIndex) @@ -46,7 +46,7 @@ static inline bool b2GetBit(const b2BitSet* bitSet, uint32_t bitIndex) { return false; } - return (bitSet->bits[wordIndex] & ((uint64_t)1) << (bitIndex % 64)) != 0; + return (bitSet->bits[wordIndex] & ((uint64_t)1 << bitIndex % 64)) != 0; } #if defined(_MSC_VER) && !defined(__clang__) diff --git a/src/body.c b/src/body.c index 8bd1e9be..ec36272c 100644 --- a/src/body.c +++ b/src/body.c @@ -129,8 +129,10 @@ void b2World_DestroyBody(b2BodyId bodyId) b2Contact* contact = world->contacts + contactIndex; - // TODO_ERIN could pass bodies - b2RemoveContactFromGraph(world, &world->graph, contact); + if (contact->colorIndex != B2_NULL_INDEX) + { + b2RemoveContactFromGraph(world, contact); + } b2ContactEdge* twin = contact->edges + twinIndex; diff --git a/src/contact.c b/src/contact.c index 4fe2e3fd..9db06d84 100644 --- a/src/contact.c +++ b/src/contact.c @@ -256,9 +256,6 @@ void b2CreateContact(b2World* world, b2Shape* shapeA, b2Shape* shapeB) // Add to pair set for fast lookup uint64_t pairKey = B2_SHAPE_PAIR_KEY(contact->shapeIndexA, contact->shapeIndexB); b2AddKey(&world->broadPhase.pairSet, pairKey); - - // TODO_ERIN could pass bodies - b2AddContactToGraph(world, &world->graph, contact); } void b2DestroyContact(b2World* world, b2Contact* contact) @@ -273,8 +270,10 @@ void b2DestroyContact(b2World* world, b2Contact* contact) b2Body* bodyA = world->bodies + edgeA->bodyIndex; b2Body* bodyB = world->bodies + edgeB->bodyIndex; - // TODO_ERIN pass bodies - b2RemoveContactFromGraph(world, &world->graph, contact); + if (contact->colorIndex != B2_NULL_INDEX) + { + b2RemoveContactFromGraph(world, contact); + } // if (contactListener && contact->IsTouching()) //{ diff --git a/src/graph.c b/src/graph.c index 97800721..75110a5c 100644 --- a/src/graph.c +++ b/src/graph.c @@ -39,11 +39,13 @@ void b2DestroyGraph(b2Graph* graph) } } -void b2AddContactToGraph(b2World* world, b2Graph* graph, b2Contact* contact) +void b2AddContactToGraph(b2World* world, b2Contact* contact) { B2_ASSERT(contact->colorContactIndex == B2_NULL_INDEX); B2_ASSERT(contact->colorIndex == B2_NULL_INDEX); + b2Graph* graph = &world->graph; + int32_t bodyIndexA = contact->edges[0].bodyIndex; int32_t bodyIndexB = contact->edges[1].bodyIndex; @@ -107,12 +109,12 @@ void b2AddContactToGraph(b2World* world, b2Graph* graph, b2Contact* contact) } } -void b2RemoveContactFromGraph(b2World* world, b2Graph* graph, b2Contact* contact) +void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) { - if (contact->colorIndex == B2_NULL_INDEX) - { - return; - } + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorContactIndex != B2_NULL_INDEX); + + b2Graph* graph = &world->graph; B2_ASSERT(0 <= contact->colorIndex && contact->colorIndex < b2_graphColorCount); int32_t bodyIndexA = contact->edges[0].bodyIndex; @@ -151,10 +153,136 @@ void b2RemoveContactFromGraph(b2World* world, b2Graph* graph, b2Contact* contact b2ClearBit(&color->bodySet, bodyIndexB); } + + contact->colorIndex = B2_NULL_INDEX; + contact->colorContactIndex = B2_NULL_INDEX; } -void b2SolveGraph(b2World* world, b2Graph* graph) +typedef struct b2ConstraintPoint +{ + b2Vec2 rA; + b2Vec2 rB; + float normalImpulse; + float tangentImpulse; + float normalMass; + float tangentMass; + float velocityBias; +} b2ConstraintPoint; + +typedef struct b2Constraint { + b2Contact* contact; + b2ConstraintPoint points[2]; + b2Vec2 normal; + float friction; + int32_t pointCount; +} b2Constraint; + +static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b2StepContext* stepContext) +{ + int32_t constraintCount = b2Array(color->contactArray).count; + int32_t* contactIndices = color->contactArray; + b2Contact* contacts = world->contacts; + b2Body* bodies = world->bodies; + float inv_dt = stepContext->inv_dt; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + + const b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; + + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = contact->edges[0].bodyIndex; + int32_t indexB = contact->edges[1].bodyIndex; + b2Body* bodyA = bodies + indexA; + b2Body* bodyB = bodies + indexB; + + b2Constraint* constraint = color->contraints + i; + constraint->contact = contact; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->pointCount = pointCount; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Rot qA = bodyA->transform.q; + b2Vec2 cA = bodyA->position; + b2Rot qB = bodyB->transform.q; + b2Vec2 cB = bodyB->position; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* cp = manifold->points + j; + b2ConstraintPoint* constraintPoint = constraint->points + j; + + constraintPoint->normalImpulse = cp->normalImpulse; + constraintPoint->tangentImpulse = cp->tangentImpulse; + + constraintPoint->rA = b2Sub(cp->point, cA); + constraintPoint->rB = b2Sub(cp->point, cB); + + float rnA = b2Cross(constraintPoint->rA, constraint->normal); + float rnB = b2Cross(constraintPoint->rB, constraint->normal); + + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + constraintPoint->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(constraintPoint->rA, tangent); + float rtB = b2Cross(constraintPoint->rB, tangent); + + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + constraintPoint->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Velocity bias for speculative collision + constraintPoint->velocityBias = -B2_MAX(0.0f, cp->separation * inv_dt); + } + + constraintCount += 1; + } +} + +void b2SolveGraph(b2World* world, const b2StepContext* stepContext) +{ + b2Graph* graph = &world->graph; + b2GraphColor* colors = graph->colors; + + int32_t constraintCount = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + constraintCount += b2Array(colors[i].contactArray).count; + } + + b2Constraint* constraints = b2AllocateStackItem(&world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + int32_t base = 0; + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + colors[i].contraints = constraints + base; + base += b2Array(colors[i].contactArray).count; + } + + B2_ASSERT(base == constraintCount); + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + colors[i].contraints = constraints + base; + base += b2Array(colors[i].contactArray).count; + } + B2_MAYBE_UNUSED(world); - B2_MAYBE_UNUSED(graph); } diff --git a/src/graph.h b/src/graph.h index 5c9b7386..93cd6dc7 100644 --- a/src/graph.h +++ b/src/graph.h @@ -18,6 +18,7 @@ typedef struct b2GraphColor { b2BitSet bodySet; int32_t* contactArray; + struct b2Contraint* contraints; } b2GraphColor; typedef struct b2Graph @@ -29,7 +30,7 @@ typedef struct b2Graph void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); void b2DestroyGraph(b2Graph* graph); -void b2AddContactToGraph(b2World* world, b2Graph* graph, b2Contact* contact); -void b2RemoveContactFromGraph(b2World* world, b2Graph* graph, b2Contact* contact); +void b2AddContactToGraph(b2World* world, b2Contact* contact); +void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); -void b2SolveGraph(b2World* world, b2Graph* graph); +void b2SolveGraph(b2World* world); diff --git a/src/world.c b/src/world.c index a2f81c8a..33f3f13e 100644 --- a/src/world.c +++ b/src/world.c @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT +#define _CRT_SECURE_NO_WARNINGS + #include "world.h" #include "allocate.h" @@ -25,6 +27,7 @@ #include "box2d/distance.h" #include "box2d/timer.h" +#include #include b2World b2_worlds[b2_maxWorlds]; @@ -377,6 +380,7 @@ static void b2Collide(b2World* world) { B2_ASSERT(contact->islandIndex == B2_NULL_INDEX); b2LinkContact(world, contact); + b2AddContactToGraph(world, contact); contact->flags &= ~b2_contactStartedTouching; } else @@ -384,6 +388,7 @@ static void b2Collide(b2World* world) B2_ASSERT(contact->flags & b2_contactStoppedTouching); b2UnlinkContact(world, contact); + b2RemoveContactFromGraph(world, contact); contact->flags &= ~b2_contactStoppedTouching; } @@ -1168,6 +1173,10 @@ void b2World_Draw(b2WorldId worldId, b2DebugDraw* draw) continue; } + char buffer[32]; + sprintf(buffer, "%d", b->object.index); + draw->DrawString(b->position, buffer, draw->context); + int32_t shapeIndex = b->shapeList; while (shapeIndex != B2_NULL_INDEX) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5f073571..948339b1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,7 @@ set(BOX2D_TESTS main.c + test_bitset.c test_collision.c test_determinism.c test_distance.c diff --git a/test/main.c b/test/main.c index 29b29cd2..48044774 100644 --- a/test/main.c +++ b/test/main.c @@ -18,6 +18,7 @@ //} #endif +extern int BitSetTest(); extern int MathTest(); extern int CollisionTest(); extern int DeterminismTest(); @@ -47,6 +48,7 @@ int main(void) RUN_TEST(HelloWorld); RUN_TEST(ShapeTest); RUN_TEST(TableTest); + RUN_TEST(BitSetTest); printf("======================================\n"); printf("All Box2D tests passed!\n"); diff --git a/test/test_bitset.c b/test/test_bitset.c new file mode 100644 index 00000000..fdec9a0a --- /dev/null +++ b/test/test_bitset.c @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#include "test_macros.h" +#include "bitset.h" +#include "box2d/timer.h" + +#define COUNT 169 + +int BitSetTest() +{ + b2BitSet bitSet = b2CreateBitSet(COUNT); + + b2SetBitCountAndClear(&bitSet, COUNT); + bool values[COUNT] = {false}; + + int32_t i1 = 0, i2 = 1; + b2SetBit(&bitSet, i1); + values[i1] = true; + + while (i2 < COUNT) + { + b2SetBit(&bitSet, i2); + values[i2] = true; + int32_t next = i1 + i2; + i1 = i2; + i2 = next; + } + + for (int32_t i = 0; i < COUNT; ++i) + { + bool value = b2GetBit(&bitSet, i); + ENSURE(value == values[i]); + } + + b2DestroyBitSet(&bitSet); + + return 0; +} diff --git a/test/test_determinism.c b/test/test_determinism.c index 80c787e5..2eb26787 100644 --- a/test/test_determinism.c +++ b/test/test_determinism.c @@ -116,6 +116,8 @@ void TiltedStacks(int testIndex, int workerCount) worldDef.finishAllTasks = FinishAllTasks; worldDef.workerCount = workerCount; worldDef.enableSleep = false; + worldDef.bodyCapacity = 1024; + worldDef.contactCapacity = 4 * 1024; b2WorldId worldId = b2CreateWorld(&worldDef); From 39291dc9649512e1b15aa8b1a223c73e92f5560a Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 12 Aug 2023 22:28:39 -0700 Subject: [PATCH 03/51] wip --- include/box2d/box2d.h | 1 + samples/collection/benchmark_pyramid.cpp | 4 +- samples/sample.cpp | 2 +- src/graph.c | 329 +++++++++++++++++++++-- src/graph.h | 5 +- src/world.c | 223 ++++++++++++++- 6 files changed, 535 insertions(+), 29 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index 353c2e3b..8db00f0f 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -27,6 +27,7 @@ BOX2D_API void b2DestroyWorld(b2WorldId worldId); /// @param velocityIterations for the velocity constraint solver. /// @param positionIterations for the position constraint solver. BOX2D_API void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations); +BOX2D_API void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations); /// Call this to draw shapes and other debug draw data. This is intentionally non-const. BOX2D_API void b2World_Draw(b2WorldId worldId, b2DebugDraw* debugDraw); diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index a48bfa19..91229f29 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,7 +21,7 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 10; + m_baseCount = 4; m_rowCount = g_sampleDebug ? 1 : 16; m_columnCount = g_sampleDebug ? 1 : 16; m_groundId = b2_nullBodyId; @@ -59,7 +59,7 @@ class BenchmarkPyramid : public Sample for (int32_t i = 0; i < m_baseCount; ++i) { - float y = (2.0f * i + 1.0f) * m_extent + baseY; + float y = (2.0f * i + 1.0f) * m_extent + baseY + 2.0f; for (int32_t j = i; j < m_baseCount; ++j) { diff --git a/samples/sample.cpp b/samples/sample.cpp index 2d9725be..8a3ecd7b 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -237,7 +237,7 @@ void Sample::Step(Settings& settings) for (int32_t i = 0; i < 1; ++i) { - b2World_Step(m_worldId, timeStep, settings.m_velocityIterations, settings.m_positionIterations); + b2World_Step2(m_worldId, timeStep, settings.m_velocityIterations, settings.m_positionIterations); } b2World_Draw(m_worldId, &g_draw.m_debugDraw); diff --git a/src/graph.c b/src/graph.c index 75110a5c..666bd877 100644 --- a/src/graph.c +++ b/src/graph.c @@ -9,9 +9,12 @@ #include "contact.h" #include "core.h" #include "shape.h" +#include "solver_data.h" #include "stack_allocator.h" #include "world.h" +#include "box2d/aabb.h" + #include void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) @@ -107,6 +110,8 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) break; } } + + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX && contact->colorContactIndex != B2_NULL_INDEX); } void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) @@ -158,6 +163,52 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) contact->colorContactIndex = B2_NULL_INDEX; } +static void b2IntegrateVelocities(b2World* world, const b2StepContext* context) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + float timeStep = context->dt; + b2Vec2 gravity = world->gravity; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type != b2_dynamicBody) + { + continue; + } + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(timeStep * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + timeStep * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + timeStep * body->linearDamping), v); + w *= 1.0f / (1.0f + timeStep * body->angularDamping); + + body->linearVelocity = v; + body->angularVelocity = w; + } +} + typedef struct b2ConstraintPoint { b2Vec2 rA; @@ -180,7 +231,7 @@ typedef struct b2Constraint static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b2StepContext* stepContext) { - int32_t constraintCount = b2Array(color->contactArray).count; + const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; b2Contact* contacts = world->contacts; b2Body* bodies = world->bodies; @@ -200,7 +251,7 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b b2Body* bodyA = bodies + indexA; b2Body* bodyB = bodies + indexB; - b2Constraint* constraint = color->contraints + i; + b2Constraint* constraint = color->constraints + i; constraint->contact = contact; constraint->normal = manifold->normal; constraint->friction = contact->friction; @@ -211,9 +262,7 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b float mB = bodyB->invMass; float iB = bodyB->invI; - b2Rot qA = bodyA->transform.q; b2Vec2 cA = bodyA->position; - b2Rot qB = bodyB->transform.q; b2Vec2 cB = bodyB->position; b2Vec2 vA = bodyA->linearVelocity; @@ -221,38 +270,255 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; + b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(constraint->normal); for (int32_t j = 0; j < pointCount; ++j) { - const b2ManifoldPoint* cp = manifold->points + j; - b2ConstraintPoint* constraintPoint = constraint->points + j; + const b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; - constraintPoint->normalImpulse = cp->normalImpulse; - constraintPoint->tangentImpulse = cp->tangentImpulse; + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; - constraintPoint->rA = b2Sub(cp->point, cA); - constraintPoint->rB = b2Sub(cp->point, cB); + cp->rA = b2Sub(mp->point, cA); + cp->rB = b2Sub(mp->point, cB); - float rnA = b2Cross(constraintPoint->rA, constraint->normal); - float rnB = b2Cross(constraintPoint->rB, constraint->normal); + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - constraintPoint->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - float rtA = b2Cross(constraintPoint->rA, tangent); - float rtB = b2Cross(constraintPoint->rB, tangent); + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - constraintPoint->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; // Velocity bias for speculative collision - constraintPoint->velocityBias = -B2_MAX(0.0f, cp->separation * inv_dt); + cp->velocityBias = -B2_MAX(0.0f, mp->separation * inv_dt); + + // Warm start + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + } +} + +static void b2SolveConstraints(b2World* world, b2GraphColor* color) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + + const b2Contact* contact = constraint->contact; + + int32_t indexA = contact->edges[0].bodyIndex; + int32_t indexB = contact->edges[1].bodyIndex; + b2Body* bodyA = bodies + indexA; + b2Body* bodyB = bodies + indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + + // Solve tangent constraints first because non-penetration is more important + // than friction. + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float lambda = -cp->normalMass * (vn - cp->velocityBias); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + lambda, 0.0f); + lambda = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } - constraintCount += 1; + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +static void b2StoreImpulses(b2GraphColor* color) +{ + int32_t constraintCount = b2Array(color->contactArray).count; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + b2Contact* contact = constraint->contact; + + b2Manifold* manifold = &contact->manifold; + + for (int32_t j = 0; j < constraint->pointCount; ++j) + { + manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; + manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; + } + } +} + +static void b2IntegratePositions(b2World* world, const b2StepContext* context) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + float timeStep = context->dt; + b2Contact* contacts = world->contacts; + + b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[0].shapeBitSet; + const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type == b2_staticBody) + { + continue; + } + + b2Vec2 c = body->position; + float a = body->angle; + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Clamp large velocities + b2Vec2 translation = b2MulSV(timeStep, v); + if (b2Dot(translation, translation) > b2_maxTranslationSquared) + { + float ratio = b2_maxTranslation / b2Length(translation); + v = b2MulSV(ratio, v); + } + + float rotation = timeStep * w; + if (rotation * rotation > b2_maxRotationSquared) + { + float ratio = b2_maxRotation / B2_ABS(rotation); + w *= ratio; + } + + // Integrate + c = b2MulAdd(c, timeStep, v); + a += timeStep * w; + + body->position = c; + body->angle = a; + body->linearVelocity = v; + body->angularVelocity = w; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + + body->force = b2Vec2_zero; + body->torque = 0.0f; + + // Update shapes AABBs + int32_t shapeIndex = body->shapeList; + while (shapeIndex != B2_NULL_INDEX) + { + b2Shape* shape = world->shapes + shapeIndex; + + B2_ASSERT(shape->isFast == false); + + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + + shapeIndex = shape->nextShapeIndex; + } + + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) + { + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // Bit set to prevent duplicates + b2SetBit(awakeContactBitSet, contactIndex); + contactKey = contact->edges[edgeIndex].nextKey; + } } } @@ -267,22 +533,39 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) constraintCount += b2Array(colors[i].contactArray).count; } - b2Constraint* constraints = b2AllocateStackItem(&world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); int32_t base = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { - colors[i].contraints = constraints + base; + colors[i].constraints = constraints + base; base += b2Array(colors[i].contactArray).count; } B2_ASSERT(base == constraintCount); + b2IntegrateVelocities(world, stepContext); + for (int32_t i = 0; i < b2_graphColorCount; ++i) { - colors[i].contraints = constraints + base; - base += b2Array(colors[i].contactArray).count; + b2InitializeConstraints(world, colors + i, stepContext); + } + + int32_t iterationCount = stepContext->velocityIterations; + for (int32_t iter = 0; iter < iterationCount; ++iter) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolveConstraints(world, colors + i); + } + } + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2StoreImpulses(colors + i); } - B2_MAYBE_UNUSED(world); + b2FreeStackItem(world->stackAllocator, constraints); + + b2IntegratePositions(world, stepContext); } diff --git a/src/graph.h b/src/graph.h index 93cd6dc7..76b149b2 100644 --- a/src/graph.h +++ b/src/graph.h @@ -10,6 +10,7 @@ #include "box2d/dynamic_tree.h" typedef struct b2Contact b2Contact; +typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; #define b2_graphColorCount 8 @@ -18,7 +19,7 @@ typedef struct b2GraphColor { b2BitSet bodySet; int32_t* contactArray; - struct b2Contraint* contraints; + struct b2Constraint* constraints; } b2GraphColor; typedef struct b2Graph @@ -33,4 +34,4 @@ void b2DestroyGraph(b2Graph* graph); void b2AddContactToGraph(b2World* world, b2Contact* contact); void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); -void b2SolveGraph(b2World* world); +void b2SolveGraph(b2World* world, const b2StepContext* stepContext); diff --git a/src/world.c b/src/world.c index 33f3f13e..5e29234b 100644 --- a/src/world.c +++ b/src/world.c @@ -13,6 +13,7 @@ #include "broad_phase.h" #include "contact.h" #include "core.h" +#include "graph.h" #include "island.h" #include "joint.h" #include "pool.h" @@ -626,7 +627,6 @@ static void b2ContinuousParallelForTask(int32_t startIndex, int32_t endIndex, ui b2TracyCZoneEnd(continuous_task); } -// Solve with union-find islands static void b2Solve(b2World* world, b2StepContext* context) { b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); @@ -915,6 +915,140 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(solve); } +// Graph coloring experiment +static void b2Solve2(b2World* world, b2StepContext* context) +{ + b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); + + b2Timer timer = b2CreateTimer(); + + world->stepId += 1; + + // Prepare contact and shape bit-sets + int32_t contactCapacity = world->contactPool.capacity; + int32_t shapeCapacity = world->shapePool.capacity; + for (uint32_t i = 0; i < world->workerCount; ++i) + { + b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); + } + + world->profile.buildIslands = 0.0f; + + b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); + + b2SolveGraph(world, context); + + b2ValidateNoEnlarged(&world->broadPhase); + + b2TracyCZoneEnd(island_solver); + + world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); + + b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); + + b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true); + + // Enlarge broad-phase proxies and build move array + { + b2BroadPhase* broadPhase = &world->broadPhase; + + // Gather bits for all shapes that have enlarged AABBs + b2BitSet* bitSet = &world->taskContextArray[0].shapeBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) + { + b2InPlaceUnion(bitSet, &world->taskContextArray[i].shapeBitSet); + } + + // Apply shape AABB changes to broadphase. This also create the move array which must be + // ordered to ensure determinism. + b2Shape* shapes = world->shapes; + uint64_t word; + uint32_t wordCount = bitSet->wordCount; + uint64_t* bits = bitSet->bits; + for (uint32_t k = 0; k < wordCount; ++k) + { + word = bits[k]; + while (word != 0) + { + uint32_t ctz = b2CTZ(word); + uint32_t shapeIndex = 64 * k + ctz; + + b2Shape* shape = shapes + shapeIndex; + B2_ASSERT(b2ObjectValid(&shape->object)); + if (shape->isFast == false) + { + b2BroadPhase_EnlargeProxy(broadPhase, shape->proxyKey, shape->fatAABB); + } + else + { + // Shape is fast. It's aabb will be enlarged in continuous collision. + b2BufferMove(broadPhase, shape->proxyKey); + } + + // Clear the smallest set bit + word = word & (word - 1); + } + } + } + + b2TracyCZoneEnd(enlarge_proxies); + + b2TracyCZoneNC(awake_contacts, "Awake Contacts", b2_colorYellowGreen, true); + + // Build awake contact array + { + b2BitSet* bitSet = &world->taskContextArray[0].awakeContactBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) + { + b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeContactBitSet); + } + + b2Array_Clear(world->awakeContactArray); + + int32_t* contactAwakeIndexArray = world->contactAwakeIndexArray; + + // Iterate the bit set + // The order of the awake contact array doesn't matter, but I don't want duplicates. It is possible + // that body A or body B or both bodies wake the contact. + uint64_t word; + uint32_t wordCount = bitSet->wordCount; + uint64_t* bits = bitSet->bits; + for (uint32_t k = 0; k < wordCount; ++k) + { + word = bits[k]; + while (word != 0) + { + uint32_t ctz = b2CTZ(word); + uint32_t contactIndex = 64 * k + ctz; + + B2_ASSERT(contactAwakeIndexArray[contactIndex] == B2_NULL_INDEX); + + // This cache miss is brutal but is necessary to make contact destruction reasonably quick. + contactAwakeIndexArray[contactIndex] = b2Array(world->awakeContactArray).count; + + // This is fast + b2Array_Push(world->awakeContactArray, contactIndex); + + // Clear the smallest set bit + word = word & (word - 1); + } + } + } + + b2TracyCZoneEnd(awake_contacts); + + b2ValidateBroadphase(&world->broadPhase); + + world->profile.broadphase = b2GetMilliseconds(&timer); + + b2TracyCZoneEnd(broad_phase); + + world->profile.continuous = 0.0f; + + b2TracyCZoneEnd(solve); +} + void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations) { if (timeStep == 0.0f) @@ -1002,6 +1136,93 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, b2TracyCZoneEnd(world_step); } +void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations) +{ + if (timeStep == 0.0f) + { + // TODO_ERIN would be useful to still process collision while paused + return; + } + + b2TracyCZoneNC(world_step, "Step", b2_colorChartreuse, true); + + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->profile = b2_emptyProfile; + + b2Timer stepTimer = b2CreateTimer(); + + // Update collision pairs and create contacts + { + b2Timer timer = b2CreateTimer(); + b2UpdateBroadPhasePairs(world); + world->profile.pairs = b2GetMilliseconds(&timer); + } + + // TODO_ERIN atomic + world->locked = true; + + b2StepContext context = {0}; + context.dt = timeStep; + context.velocityIterations = velocityIterations; + context.positionIterations = positionIterations; + if (timeStep > 0.0f) + { + context.inv_dt = 1.0f / timeStep; + } + else + { + context.inv_dt = 0.0f; + } + + context.dtRatio = world->inv_dt0 * timeStep; + context.restitutionThreshold = world->restitutionThreshold; + context.warmStarting = world->warmStarting; + context.bodies = world->bodies; + context.bodyCapacity = world->bodyPool.capacity; + + // Update contacts + { + b2Timer timer = b2CreateTimer(); + b2Collide(world); + world->profile.collide = b2GetMilliseconds(&timer); + } + + // Integrate velocities, solve velocity constraints, and integrate positions. + if (context.dt > 0.0f) + { + b2Timer timer = b2CreateTimer(); + b2Solve2(world, &context); + world->profile.solve = b2GetMilliseconds(&timer); + } + + if (context.dt > 0.0f) + { + world->inv_dt0 = context.inv_dt; + } + + world->locked = false; + + world->profile.step = b2GetMilliseconds(&stepTimer); + + B2_ASSERT(b2GetStackAllocation(world->stackAllocator) == 0); + + // Ensure stack is large enough + b2GrowStack(world->stackAllocator); + + if (b2_parallel) + { + world->finishAllTasksFcn(world->userTaskContext); + } + + b2TracyCZoneEnd(world_step); +} + static void b2DrawShape(b2DebugDraw* draw, b2Shape* shape, b2Transform xf, b2Color color) { switch (shape->type) From 9bd284330c9c93496ea75a8a5c43eaf4f8e953af Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 13 Aug 2023 23:00:44 -0700 Subject: [PATCH 04/51] TGS wip --- samples/collection/benchmark_pyramid.cpp | 8 +- samples/sample.cpp | 5 +- src/graph.c | 460 +++++++++++++++++++++-- src/graph.h | 3 +- src/world.c | 2 +- 5 files changed, 436 insertions(+), 42 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 91229f29..87faae0e 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,7 +21,7 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 4; + m_baseCount = 2; m_rowCount = g_sampleDebug ? 1 : 16; m_columnCount = g_sampleDebug ? 1 : 16; m_groundId = b2_nullBodyId; @@ -59,7 +59,7 @@ class BenchmarkPyramid : public Sample for (int32_t i = 0; i < m_baseCount; ++i) { - float y = (2.0f * i + 1.0f) * m_extent + baseY + 2.0f; + float y = (2.0f * i + 1.0f) * m_extent + baseY; for (int32_t j = i; j < m_baseCount; ++j) { @@ -106,8 +106,8 @@ class BenchmarkPyramid : public Sample for (int32_t i = 0; i < m_rowCount; ++i) { - b2Segment segment = {{-0.5f * groundWidth, groundY}, {0.5f * groundWidth, groundY}}; - //b2Segment segment = {{-0.5f * 2.0f * groundWidth, groundY}, {0.5f * 2.0f * groundWidth, groundY}}; + //b2Segment segment = {{-0.5f * groundWidth, groundY}, {0.5f * groundWidth, groundY}}; + b2Segment segment = {{-0.5f * 2.0f * groundWidth, groundY}, {0.5f * 2.0f * groundWidth, groundY}}; b2Body_CreateSegment(m_groundId, &shapeDef, &segment); groundY += groundDeltaY; } diff --git a/samples/sample.cpp b/samples/sample.cpp index 8a3ecd7b..b7a33f21 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -233,7 +233,10 @@ void Sample::Step(Settings& settings) // m_world->SetWarmStarting(settings.m_enableWarmStarting); // m_world->SetContinuousPhysics(settings.m_enableContinuous); - m_pointCount = 0; + if (timeStep > 0.0f) + { + m_pointCount = 0; + } for (int32_t i = 0; i < 1; ++i) { diff --git a/src/graph.c b/src/graph.c index 666bd877..cdbbfb89 100644 --- a/src/graph.c +++ b/src/graph.c @@ -163,11 +163,10 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) contact->colorContactIndex = B2_NULL_INDEX; } -static void b2IntegrateVelocities(b2World* world, const b2StepContext* context) +static void b2IntegrateVelocities(b2World* world, float h) { b2Body* bodies = world->bodies; int32_t bodyCapacity = world->bodyPool.capacity; - float timeStep = context->dt; b2Vec2 gravity = world->gravity; // Integrate velocities and apply damping. Initialize the body state. @@ -191,8 +190,8 @@ static void b2IntegrateVelocities(b2World* world, const b2StepContext* context) float w = body->angularVelocity; // Integrate velocities - v = b2Add(v, b2MulSV(timeStep * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); - w = w + timeStep * invI * body->torque; + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; // Apply damping. // ODE: dv/dt + c * v = 0 @@ -201,8 +200,8 @@ static void b2IntegrateVelocities(b2World* world, const b2StepContext* context) // v2 = exp(-c * dt) * v1 // Pade approximation: // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + timeStep * body->linearDamping), v); - w *= 1.0f / (1.0f + timeStep * body->angularDamping); + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); body->linearVelocity = v; body->angularVelocity = w; @@ -211,31 +210,35 @@ static void b2IntegrateVelocities(b2World* world, const b2StepContext* context) typedef struct b2ConstraintPoint { - b2Vec2 rA; - b2Vec2 rB; + b2Vec2 rA, rB; + b2Vec2 localAnchorA, localAnchorB; + float separation; + float baseSeparation; float normalImpulse; float tangentImpulse; float normalMass; float tangentMass; - float velocityBias; + float bias; + float gamma; } b2ConstraintPoint; typedef struct b2Constraint { b2Contact* contact; + int32_t indexA; + int32_t indexB; b2ConstraintPoint points[2]; b2Vec2 normal; float friction; int32_t pointCount; } b2Constraint; -static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b2StepContext* stepContext) +static void b2InitializeConstraintsAndWarmStart(b2World* world, b2GraphColor* color, float h) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; b2Contact* contacts = world->contacts; b2Body* bodies = world->bodies; - float inv_dt = stepContext->inv_dt; for (int32_t i = 0; i < constraintCount; ++i) { @@ -253,6 +256,8 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b b2Constraint* constraint = color->constraints + i; constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; constraint->normal = manifold->normal; constraint->friction = contact->friction; constraint->pointCount = pointCount; @@ -264,6 +269,8 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b b2Vec2 cA = bodyA->position; b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; @@ -283,23 +290,40 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b cp->rA = b2Sub(mp->point, cA); cp->rB = b2Sub(mp->point, cB); + cp->localAnchorA = b2InvRotateVector(qA, cp->rA); + cp->localAnchorB = b2InvRotateVector(qB, cp->rB); float rnA = b2Cross(cp->rA, normal); float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - float rtA = b2Cross(cp->rA, tangent); float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - // Velocity bias for speculative collision - cp->velocityBias = -B2_MAX(0.0f, mp->separation * inv_dt); + // Soft contact with speculation + const float hertz = 10.0f; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + //float d = 2.0f * zeta * omega / kNormal; + //float k = omega * omega / kNormal; + + //cp->gamma = 1.0f / (h * (d + h * k)); + //cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); + cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); + + //cp->bias = h * k * cp->gamma * mp->separation; + //cp->bias = k / (d + h * k) * mp->separation; + //cp->bias = + // (omega * omega / kNormal) / (2.0f * dampingRatio * omega / kNormal + h * omega * omega / kNormal) * mp->separation; + cp->bias = (omega / (2.0f * zeta + h * omega)) * mp->separation; + //cp->gamma = 0.0f; + //cp->bias = (0.2f / h) * mp->separation; + + // TODO_ERIN this can be expanded + cp->normalMass = 1.0f / (kNormal + cp->gamma); // Warm start b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); @@ -307,26 +331,216 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, const b vA = b2MulAdd(vA, -mA, P); wB += iB * b2Cross(cp->rB, P); vB = b2MulAdd(vB, mB, P); + + cp->baseSeparation = mp->separation; + cp->separation = mp->separation; } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } } -static void b2SolveConstraints(b2World* world, b2GraphColor* color) +static void b2InitializeConstraints(b2World* world, b2GraphColor* color, float h) { const int32_t constraintCount = b2Array(color->contactArray).count; + int32_t* contactIndices = color->contactArray; + b2Contact* contacts = world->contacts; b2Body* bodies = world->bodies; for (int32_t i = 0; i < constraintCount; ++i) { - b2Constraint* constraint = color->constraints + i; + b2Contact* contact = contacts + contactIndices[i]; + + const b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; - const b2Contact* contact = constraint->contact; + B2_ASSERT(0 < pointCount && pointCount <= 2); int32_t indexA = contact->edges[0].bodyIndex; int32_t indexB = contact->edges[1].bodyIndex; b2Body* bodyA = bodies + indexA; b2Body* bodyB = bodies + indexB; + b2Constraint* constraint = color->constraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->pointCount = pointCount; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 cA = bodyA->position; + b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; + + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; + + cp->rA = b2Sub(mp->point, cA); + cp->rB = b2Sub(mp->point, cB); + cp->localAnchorA = b2InvRotateVector(qA, cp->rA); + cp->localAnchorB = b2InvRotateVector(qB, cp->rB); + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Soft contact with speculation + const float hertz = 10.0f; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + // float d = 2.0f * zeta * omega / kNormal; + // float k = omega * omega / kNormal; + + // cp->gamma = 1.0f / (h * (d + h * k)); + // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); + cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); + + // cp->bias = h * k * cp->gamma * mp->separation; + // cp->bias = k / (d + h * k) * mp->separation; + // cp->bias = + // (omega * omega / kNormal) / (2.0f * dampingRatio * omega / kNormal + h * omega * omega / kNormal) * mp->separation; + cp->bias = (omega / (2.0f * zeta + h * omega)) * mp->separation; + // cp->gamma = 0.0f; + // cp->bias = (0.2f / h) * mp->separation; + + // TODO_ERIN this can be expanded + cp->normalMass = 1.0f / (kNormal + cp->gamma); + + cp->baseSeparation = mp->separation; + cp->separation = mp->separation; + } + } +} + +static void b2WarmStart(b2World* world, b2GraphColor* color) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + + int32_t pointCount = constraint->pointCount; + B2_ASSERT(0 < pointCount && pointCount <= 2); + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +// separation = dot(normal, pB - pA) + separation0 +static void b2UpdateSeparation(b2World* world, b2GraphColor* color, float h) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + + int32_t pointCount = constraint->pointCount; + B2_ASSERT(0 < pointCount && pointCount <= 2); + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + b2Vec2 cA = bodyA->position; + b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); + + b2Vec2 normal = constraint->normal; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + b2Vec2 rA = b2RotateVector(qA, cp->localAnchorA); + b2Vec2 rB = b2RotateVector(qB, cp->localAnchorB); + + // Current separation + b2Vec2 d = b2Add(b2Sub(cB, cA), b2Sub(rB, rA)); + + // TODO_ERIN really only need to update bias below + cp->separation = b2Dot(d, normal) + cp->baseSeparation; + + // Soft contact with speculation + const float hertz = 10.0f; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + // float d = 2.0f * zeta * omega / kNormal; + // float k = omega * omega / kNormal; + cp->bias = (omega / (2.0f * zeta + h * omega)) * cp->separation; + } + } +} + +static void b2SolveConstraints(b2World* world, b2GraphColor* color) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + float mA = bodyA->invMass; float iA = bodyA->invI; float mB = bodyB->invMass; @@ -384,15 +598,15 @@ static void b2SolveConstraints(b2World* world, b2GraphColor* color) // Compute normal impulse float vn = b2Dot(dv, normal); - float lambda = -cp->normalMass * (vn - cp->velocityBias); + float impulse = -cp->normalMass * (vn + cp->bias + cp->gamma * cp->normalImpulse); // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + lambda, 0.0f); - lambda = newImpulse - cp->normalImpulse; + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; cp->normalImpulse = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, normal); + b2Vec2 P = b2MulSV(impulse, normal); vA = b2MulSub(vA, mA, P); wA -= iA * b2Cross(cp->rA, P); @@ -426,11 +640,60 @@ static void b2StoreImpulses(b2GraphColor* color) } } -static void b2IntegratePositions(b2World* world, const b2StepContext* context) +static void b2IntegratePositions(b2World* world, float h) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type == b2_staticBody) + { + continue; + } + + b2Vec2 c = body->position; + float a = body->angle; + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Clamp large velocities + b2Vec2 translation = b2MulSV(h, v); + if (b2Dot(translation, translation) > b2_maxTranslationSquared) + { + float ratio = b2_maxTranslation / b2Length(translation); + v = b2MulSV(ratio, v); + } + + float rotation = h * w; + if (rotation * rotation > b2_maxRotationSquared) + { + float ratio = b2_maxRotation / B2_ABS(rotation); + w *= ratio; + } + + // Integrate + c = b2MulAdd(c, h, v); + a += h * w; + + body->position = c; + body->angle = a; + body->linearVelocity = v; + body->angularVelocity = w; + } +} + +static void b2FinalizePositions(b2World* world, float h) { b2Body* bodies = world->bodies; int32_t bodyCapacity = world->bodyPool.capacity; - float timeStep = context->dt; b2Contact* contacts = world->contacts; b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; @@ -457,14 +720,14 @@ static void b2IntegratePositions(b2World* world, const b2StepContext* context) float w = body->angularVelocity; // Clamp large velocities - b2Vec2 translation = b2MulSV(timeStep, v); + b2Vec2 translation = b2MulSV(h, v); if (b2Dot(translation, translation) > b2_maxTranslationSquared) { float ratio = b2_maxTranslation / b2Length(translation); v = b2MulSV(ratio, v); } - float rotation = timeStep * w; + float rotation = h * w; if (rotation * rotation > b2_maxRotationSquared) { float ratio = b2_maxRotation / B2_ABS(rotation); @@ -472,8 +735,8 @@ static void b2IntegratePositions(b2World* world, const b2StepContext* context) } // Integrate - c = b2MulAdd(c, timeStep, v); - a += timeStep * w; + c = b2MulAdd(c, h, v); + a += h * w; body->position = c; body->angle = a; @@ -522,7 +785,75 @@ static void b2IntegratePositions(b2World* world, const b2StepContext* context) } } -void b2SolveGraph(b2World* world, const b2StepContext* stepContext) +// Update body transform, mark broadphase AABB, build awake contact bits +static void b2FinalizeSolve(b2World* world) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + b2Contact* contacts = world->contacts; + + b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[0].shapeBitSet; + const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type == b2_staticBody) + { + continue; + } + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + + body->force = b2Vec2_zero; + body->torque = 0.0f; + + // Update shapes AABBs + int32_t shapeIndex = body->shapeList; + while (shapeIndex != B2_NULL_INDEX) + { + b2Shape* shape = world->shapes + shapeIndex; + + B2_ASSERT(shape->isFast == false); + + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + + shapeIndex = shape->nextShapeIndex; + } + + // TODO_ERIN legacy + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) + { + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // Bit set to prevent duplicates + b2SetBit(awakeContactBitSet, contactIndex); + contactKey = contact->edges[edgeIndex].nextKey; + } + } +} + +void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; @@ -544,14 +875,16 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) B2_ASSERT(base == constraintCount); - b2IntegrateVelocities(world, stepContext); + int32_t iterationCount = stepContext->velocityIterations; + float h = stepContext->dt; + + b2IntegrateVelocities(world, h); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i, stepContext); + b2InitializeConstraintsAndWarmStart(world, colors + i, h); } - int32_t iterationCount = stepContext->velocityIterations; for (int32_t iter = 0; iter < iterationCount; ++iter) { for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -567,5 +900,62 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); - b2IntegratePositions(world, stepContext); + b2FinalizePositions(world, h); +} + +void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) +{ + b2Graph* graph = &world->graph; + b2GraphColor* colors = graph->colors; + + int32_t constraintCount = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + constraintCount += b2Array(colors[i].contactArray).count; + } + + b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + int32_t base = 0; + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + colors[i].constraints = constraints + base; + base += b2Array(colors[i].contactArray).count; + } + + B2_ASSERT(base == constraintCount); + + int32_t substepCount = stepContext->velocityIterations; + float h = stepContext->dt / substepCount; + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2InitializeConstraints(world, colors + i, h); + } + + for (int32_t substep = 0; substep < substepCount; ++substep) + { + b2IntegrateVelocities(world, h); + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2WarmStart(world, colors + i); + if (substep > 0) + { + b2UpdateSeparation(world, colors + i, h); + } + b2SolveConstraints(world, colors + i); + } + + b2IntegratePositions(world, h); + } + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2StoreImpulses(colors + i); + } + + b2FinalizeSolve(world); + + b2FreeStackItem(world->stackAllocator, constraints); } diff --git a/src/graph.h b/src/graph.h index 76b149b2..e4efe542 100644 --- a/src/graph.h +++ b/src/graph.h @@ -34,4 +34,5 @@ void b2DestroyGraph(b2Graph* graph); void b2AddContactToGraph(b2World* world, b2Contact* contact); void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); -void b2SolveGraph(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext); diff --git a/src/world.c b/src/world.c index 5e29234b..508702f7 100644 --- a/src/world.c +++ b/src/world.c @@ -937,7 +937,7 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); - b2SolveGraph(world, context); + b2SolveGraphTGS(world, context); b2ValidateNoEnlarged(&world->broadPhase); From 4e3a721e79c11063daf86882e7073ad8f52ef795 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 15 Aug 2023 21:40:50 -0700 Subject: [PATCH 05/51] fixes for tgs --- .clang-format | 2 +- include/box2d/api.h | 6 +- samples/collection/benchmark_pyramid.cpp | 2 +- samples/main.cpp | 2 +- src/allocate.c | 10 +- src/allocate.h | 4 +- src/bitset.c | 19 ++- src/bitset.h | 11 ++ src/graph.c | 182 +++++++++-------------- 9 files changed, 110 insertions(+), 128 deletions(-) diff --git a/.clang-format b/.clang-format index c2247d17..ed8cfc26 100644 --- a/.clang-format +++ b/.clang-format @@ -18,7 +18,7 @@ UseTab: Always BreakConstructorInitializers: BeforeComma # when VS updates clang-format to v16 -# InsertNewlineAtEOF: true +InsertNewlineAtEOF: true IncludeBlocks: Regroup diff --git a/include/box2d/api.h b/include/box2d/api.h index 60ad5ea1..49cb3d4b 100644 --- a/include/box2d/api.h +++ b/include/box2d/api.h @@ -3,6 +3,8 @@ #pragma once +#include + #ifdef __cplusplus #define BOX2D_CPP extern "C" #else @@ -22,7 +24,7 @@ #define BOX2D_API BOX2D_CPP #endif -typedef void* b2AllocFcn(int size); +typedef void* b2AllocFcn(uint32_t size); typedef void b2FreeFcn(void* mem); // Return 0 to @@ -37,7 +39,7 @@ extern "C" void b2SetAllocator(b2AllocFcn* allocFcn, b2FreeFcn* freeFcn); /// Total bytes allocated by Box2D -int b2GetByteCount(void); +uint32_t b2GetByteCount(void); extern b2AssertFcn* Box2DAssertCallback; diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 87faae0e..0e90b19a 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -41,7 +41,7 @@ class BenchmarkPyramid : public Sample CreateScene(); } - ~BenchmarkPyramid() + ~BenchmarkPyramid() override { free(m_bodyIds); } diff --git a/samples/main.cpp b/samples/main.cpp index 819867ad..578c9b60 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -53,7 +53,7 @@ static b2Vec2 s_clickPointWS = b2Vec2_zero; static float s_windowScale = 1.0f; static float s_framebufferScale = 1.0f; -void* AllocFcn(int32_t size) +void* AllocFcn(uint32_t size) { size_t size16 = ((size - 1) | 0xF) + 1; assert((size16 & 0xF) == 0); diff --git a/src/allocate.c b/src/allocate.c index 0dc50cf8..66ba872f 100644 --- a/src/allocate.c +++ b/src/allocate.c @@ -34,7 +34,7 @@ static b2AllocFcn* b2_allocFcn = NULL; static b2FreeFcn* b2_freeFcn = NULL; -static _Atomic int32_t b2_byteCount; +static _Atomic uint32_t b2_byteCount; void b2SetAllocator(b2AllocFcn* allocFcn, b2FreeFcn* freeFcn) { @@ -42,7 +42,7 @@ void b2SetAllocator(b2AllocFcn* allocFcn, b2FreeFcn* freeFcn) b2_freeFcn = freeFcn; } -void* b2Alloc(int32_t size) +void* b2Alloc(uint32_t size) { atomic_fetch_add_explicit(&b2_byteCount, size, memory_order_relaxed); @@ -53,7 +53,7 @@ void* b2Alloc(int32_t size) return ptr; } - size_t size16 = ((size - 1) | 0xF) + 1; + uint32_t size16 = ((size - 1) | 0xF) + 1; #ifdef B2_PLATFORM_WINDOWS void* ptr = _aligned_malloc(size16, 16); #else @@ -64,7 +64,7 @@ void* b2Alloc(int32_t size) return ptr; } -void b2Free(void* mem, int32_t size) +void b2Free(void* mem, uint32_t size) { if (mem == NULL) { @@ -89,7 +89,7 @@ void b2Free(void* mem, int32_t size) atomic_fetch_sub_explicit(&b2_byteCount, size, memory_order_relaxed); } -int32_t b2GetByteCount(void) +uint32_t b2GetByteCount(void) { return atomic_load_explicit(&b2_byteCount, memory_order_relaxed); } diff --git a/src/allocate.h b/src/allocate.h index a9b381e7..5133251c 100644 --- a/src/allocate.h +++ b/src/allocate.h @@ -4,5 +4,5 @@ #pragma once #include -void* b2Alloc(int32_t size); -void b2Free(void* mem, int32_t size); +void* b2Alloc(uint32_t size); +void b2Free(void* mem, uint32_t size); diff --git a/src/bitset.c b/src/bitset.c index 38ed0251..fbecc26d 100644 --- a/src/bitset.c +++ b/src/bitset.c @@ -14,7 +14,7 @@ b2BitSet b2CreateBitSet(uint32_t bitCapacity) bitSet.wordCapacity = (bitCapacity + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); bitSet.wordCount = 0; bitSet.bits = b2Alloc(bitSet.wordCapacity * sizeof(uint64_t)); - + memset(bitSet.bits, 0, bitSet.wordCapacity * sizeof(uint64_t)); return bitSet; } @@ -40,6 +40,23 @@ void b2SetBitCountAndClear(b2BitSet* bitSet, uint32_t bitCount) memset(bitSet->bits, 0, bitSet->wordCount * sizeof(uint64_t)); } +void b2GrowBitSet(b2BitSet* bitSet, uint32_t wordCount) +{ + B2_ASSERT(wordCount > bitSet->wordCount); + if (wordCount > bitSet->wordCapacity) + { + uint32_t oldCapacity = bitSet->wordCapacity; + bitSet->wordCapacity = wordCount + wordCount / 2; + uint64_t* newBits = b2Alloc(bitSet->wordCapacity * sizeof(uint64_t)); + memset(newBits, 0, bitSet->wordCapacity * sizeof(uint64_t)); + memcpy(newBits, bitSet->bits, bitSet->wordCount * sizeof(uint64_t)); + b2Free(bitSet->bits, oldCapacity * sizeof(uint64_t)); + bitSet->bits = newBits; + } + + bitSet->wordCount = wordCount; +} + void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB) { B2_ASSERT(setA->wordCount == setB->wordCount); diff --git a/src/bitset.h b/src/bitset.h index f4fefacf..36e83991 100644 --- a/src/bitset.h +++ b/src/bitset.h @@ -20,6 +20,7 @@ b2BitSet b2CreateBitSet(uint32_t bitCapacity); void b2DestroyBitSet(b2BitSet* bitSet); void b2SetBitCountAndClear(b2BitSet* bitset, uint32_t bitCount); void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB); +void b2GrowBitSet(b2BitSet* set, uint32_t wordCount); static inline void b2SetBit(b2BitSet* bitSet, uint32_t bitIndex) { @@ -29,6 +30,16 @@ static inline void b2SetBit(b2BitSet* bitSet, uint32_t bitIndex) bitSet->bits[wordIndex] |= ((uint64_t)1 << bitIndex % 64); } +static inline void b2SetBitGrow(b2BitSet* bitSet, uint32_t bitIndex) +{ + uint32_t wordIndex = bitIndex / 64; + if (wordIndex >= bitSet->wordCount) + { + b2GrowBitSet(bitSet, wordIndex + 1); + } + bitSet->bits[wordIndex] |= ((uint64_t)1 << bitIndex % 64); +} + static inline void b2ClearBit(b2BitSet* bitSet, uint32_t bitIndex) { uint32_t wordIndex = bitIndex / 64; diff --git a/src/graph.c b/src/graph.c index cdbbfb89..0a01ace8 100644 --- a/src/graph.c +++ b/src/graph.c @@ -65,8 +65,8 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) continue; } - b2SetBit(&color->bodySet, bodyIndexA); - b2SetBit(&color->bodySet, bodyIndexB); + b2SetBitGrow(&color->bodySet, bodyIndexA); + b2SetBitGrow(&color->bodySet, bodyIndexB); contact->colorContactIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); @@ -84,7 +84,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) continue; } - b2SetBit(&color->bodySet, bodyIndexA); + b2SetBitGrow(&color->bodySet, bodyIndexA); contact->colorContactIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); @@ -102,7 +102,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) continue; } - b2SetBit(&color->bodySet, bodyIndexB); + b2SetBitGrow(&color->bodySet, bodyIndexB); contact->colorContactIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); @@ -220,6 +220,7 @@ typedef struct b2ConstraintPoint float tangentMass; float bias; float gamma; + float gammaScale; } b2ConstraintPoint; typedef struct b2Constraint @@ -307,20 +308,20 @@ static void b2InitializeConstraintsAndWarmStart(b2World* world, b2GraphColor* co const float hertz = 10.0f; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; - //float d = 2.0f * zeta * omega / kNormal; - //float k = omega * omega / kNormal; + // float d = 2.0f * zeta * omega / kNormal; + // float k = omega * omega / kNormal; - //cp->gamma = 1.0f / (h * (d + h * k)); - //cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); + // cp->gamma = 1.0f / (h * (d + h * k)); + // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); - //cp->bias = h * k * cp->gamma * mp->separation; - //cp->bias = k / (d + h * k) * mp->separation; - //cp->bias = + // cp->bias = h * k * cp->gamma * mp->separation; + // cp->bias = k / (d + h * k) * mp->separation; + // cp->bias = // (omega * omega / kNormal) / (2.0f * dampingRatio * omega / kNormal + h * omega * omega / kNormal) * mp->separation; cp->bias = (omega / (2.0f * zeta + h * omega)) * mp->separation; - //cp->gamma = 0.0f; - //cp->bias = (0.2f / h) * mp->separation; + // cp->gamma = 0.0f; + // cp->bias = (0.2f / h) * mp->separation; // TODO_ERIN this can be expanded cp->normalMass = 1.0f / (kNormal + cp->gamma); @@ -343,7 +344,7 @@ static void b2InitializeConstraintsAndWarmStart(b2World* world, b2GraphColor* co } } -static void b2InitializeConstraints(b2World* world, b2GraphColor* color, float h) +static void b2InitializeConstraints(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -372,19 +373,11 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, float h constraint->friction = contact->friction; constraint->pointCount = pointCount; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - b2Vec2 cA = bodyA->position; b2Vec2 cB = bodyB->position; b2Rot qA = b2MakeRot(bodyA->angle); b2Rot qB = b2MakeRot(bodyB->angle); - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); - for (int32_t j = 0; j < pointCount; ++j) { const b2ManifoldPoint* mp = manifold->points + j; @@ -393,50 +386,14 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, float h cp->normalImpulse = mp->normalImpulse; cp->tangentImpulse = mp->tangentImpulse; - cp->rA = b2Sub(mp->point, cA); - cp->rB = b2Sub(mp->point, cB); - cp->localAnchorA = b2InvRotateVector(qA, cp->rA); - cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - - // Soft contact with speculation - const float hertz = 10.0f; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - // float d = 2.0f * zeta * omega / kNormal; - // float k = omega * omega / kNormal; - - // cp->gamma = 1.0f / (h * (d + h * k)); - // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); - cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); - - // cp->bias = h * k * cp->gamma * mp->separation; - // cp->bias = k / (d + h * k) * mp->separation; - // cp->bias = - // (omega * omega / kNormal) / (2.0f * dampingRatio * omega / kNormal + h * omega * omega / kNormal) * mp->separation; - cp->bias = (omega / (2.0f * zeta + h * omega)) * mp->separation; - // cp->gamma = 0.0f; - // cp->bias = (0.2f / h) * mp->separation; - - // TODO_ERIN this can be expanded - cp->normalMass = 1.0f / (kNormal + cp->gamma); - + cp->localAnchorA = b2InvRotateVector(qA, b2Sub(mp->point, cA)); + cp->localAnchorB = b2InvRotateVector(qB, b2Sub(mp->point, cB)); cp->baseSeparation = mp->separation; - cp->separation = mp->separation; } } } -static void b2WarmStart(b2World* world, b2GraphColor* color) +static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t stepIndex) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -456,18 +413,58 @@ static void b2WarmStart(b2World* world, b2GraphColor* color) float mB = bodyB->invMass; float iB = bodyB->invI; + b2Vec2 cA = bodyA->position; + b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); + b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); + b2Vec2 tangent = b2RightPerp(normal); for (int32_t j = 0; j < pointCount; ++j) { b2ConstraintPoint* cp = constraint->points + j; + cp->rA = b2RotateVector(qA, cp->localAnchorA); + cp->rB = b2RotateVector(qB, cp->localAnchorB); + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + const float hertz = 30.0f; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); + cp->normalMass = 1.0f / (kNormal + cp->gamma); + + if (stepIndex == 0) + { + cp->separation = cp->baseSeparation; + } + else + { + // Current separation + b2Vec2 d = b2Add(b2Sub(cB, cA), b2Sub(cp->rB, cp->rA)); + + // TODO_ERIN really only need to update bias below + cp->separation = b2Dot(d, normal) + cp->baseSeparation; + } + + //cp->bias = B2_MAX(B2_MAX(cp->separation / h, (omega / (2.0f * zeta + h * omega)) * cp->separation), -1.0f); + cp->bias = (omega / (2.0f * zeta + h * omega)) * cp->separation; + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); wA -= iA * b2Cross(cp->rA, P); vA = b2MulAdd(vA, -mA, P); @@ -482,53 +479,6 @@ static void b2WarmStart(b2World* world, b2GraphColor* color) } } -// separation = dot(normal, pB - pA) + separation0 -static void b2UpdateSeparation(b2World* world, b2GraphColor* color, float h) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = color->constraints + i; - - int32_t pointCount = constraint->pointCount; - B2_ASSERT(0 < pointCount && pointCount <= 2); - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); - - b2Vec2 normal = constraint->normal; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - b2Vec2 rA = b2RotateVector(qA, cp->localAnchorA); - b2Vec2 rB = b2RotateVector(qB, cp->localAnchorB); - - // Current separation - b2Vec2 d = b2Add(b2Sub(cB, cA), b2Sub(rB, rA)); - - // TODO_ERIN really only need to update bias below - cp->separation = b2Dot(d, normal) + cp->baseSeparation; - - // Soft contact with speculation - const float hertz = 10.0f; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - // float d = 2.0f * zeta * omega / kNormal; - // float k = omega * omega / kNormal; - cp->bias = (omega / (2.0f * zeta + h * omega)) * cp->separation; - } - } -} - static void b2SolveConstraints(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; @@ -930,20 +880,22 @@ void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i, h); + b2InitializeConstraints(world, colors + i); } for (int32_t substep = 0; substep < substepCount; ++substep) { b2IntegrateVelocities(world, h); + // Have to fully complete warm starting before solving constraints + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2WarmStart(world, colors + i, h, i); + } + + // One constraint iteration for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2WarmStart(world, colors + i); - if (substep > 0) - { - b2UpdateSeparation(world, colors + i, h); - } b2SolveConstraints(world, colors + i); } From 6392ddbb243ef89bebb6f54d543b3a8720eb5cdd Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 15 Aug 2023 22:56:03 -0700 Subject: [PATCH 06/51] fixes --- samples/collection/benchmark_pyramid.cpp | 3 +-- samples/main.cpp | 2 +- src/graph.h | 2 +- src/island.c | 20 +++++++++++++++++++- src/world.h | 2 ++ 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 0e90b19a..51cfb782 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,7 +21,7 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 2; + m_baseCount = 60; m_rowCount = g_sampleDebug ? 1 : 16; m_columnCount = g_sampleDebug ? 1 : 16; m_groundId = b2_nullBodyId; @@ -63,7 +63,6 @@ class BenchmarkPyramid : public Sample for (int32_t j = i; j < m_baseCount; ++j) { - //float x = (1.5f * i + 1.0f) * m_extent + 3.0f * (j - i) * m_extent + centerX - 0.5f; float x = (i + 1.0f) * m_extent + 2.25f * (j - i) * m_extent + centerX - 0.5f; bodyDef.position = {x, y}; diff --git a/samples/main.cpp b/samples/main.cpp index 578c9b60..71148022 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -565,7 +565,7 @@ int main(int, char**) // MSAA glfwWindowHint(GLFW_SAMPLES, 4); - sprintf(buffer, "Box2D Version %d.%d.%d c", b2_version.major, b2_version.minor, b2_version.revision); + sprintf(buffer, "Box2D Version %d.%d.%d Graph Color", b2_version.major, b2_version.minor, b2_version.revision); if (GLFWmonitor* primaryMonitor = glfwGetPrimaryMonitor()) { diff --git a/src/graph.h b/src/graph.h index e4efe542..dd8d3a51 100644 --- a/src/graph.h +++ b/src/graph.h @@ -13,7 +13,7 @@ typedef struct b2Contact b2Contact; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; -#define b2_graphColorCount 8 +#define b2_graphColorCount 16 typedef struct b2GraphColor { diff --git a/src/island.c b/src/island.c index cad6e0cd..d095301e 100644 --- a/src/island.c +++ b/src/island.c @@ -21,7 +21,6 @@ #include #include #include -#include /* Position Correction Notes @@ -189,6 +188,23 @@ void b2WakeIsland(b2Island* island) b2Array_Push(world->awakeIslandArray, island->object.index); } +#if B2_GRAPH_COLOR == 1 + +void b2LinkContact(b2World* world, b2Contact* contact) +{ + B2_MAYBE_UNUSED(world); + B2_MAYBE_UNUSED(contact); +} + +void b2UnlinkContact(b2World* world, b2Contact* contact) +{ + B2_MAYBE_UNUSED(world); + B2_MAYBE_UNUSED(contact); + +} + +#else + // https://en.wikipedia.org/wiki/Disjoint-set_data_structure void b2LinkContact(b2World* world, b2Contact* contact) { @@ -313,6 +329,8 @@ void b2UnlinkContact(b2World* world, b2Contact* contact) contact->islandNext = B2_NULL_INDEX; } +#endif + static void b2AddJointToIsland(b2World* world, b2Island* island, b2Joint* joint) { B2_ASSERT(joint->islandIndex == B2_NULL_INDEX); diff --git a/src/world.h b/src/world.h index 05ddaf35..00ed2e77 100644 --- a/src/world.h +++ b/src/world.h @@ -12,6 +12,8 @@ #include "box2d/callbacks.h" #include "box2d/timer.h" +#define B2_GRAPH_COLOR 1 + typedef struct b2Contact b2Contact; // Per thread task storage From 73ea6a7cf5caaaf0935b5b76dc88a7ee65ec17ff Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 16 Aug 2023 23:48:50 -0700 Subject: [PATCH 07/51] high mass ratio test --- samples/CMakeLists.txt | 2 +- samples/collection/behavior.cpp | 64 +++++++++++++++++++++++++++++++++ src/graph.c | 6 ++-- 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 samples/collection/behavior.cpp diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 109e4335..8ae488ec 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -69,7 +69,7 @@ set(BOX2D_SAMPLES collection/benchmark_many_tumblers.cpp collection/benchmark_pyramid.cpp collection/benchmark_tumbler.cpp - + collection/behavior.cpp collection/sample_continuous1.cpp collection/sample_distance.cpp collection/sample_dynamic_tree.cpp diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp new file mode 100644 index 00000000..ca152e77 --- /dev/null +++ b/samples/collection/behavior.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: 2022 Erin Catto +// SPDX-License-Identifier: MIT + +#include "sample.h" + +#include "box2d/box2d.h" +#include "box2d/geometry.h" + +#include +#include + +class HighMassRatio : public Sample +{ + public: + HighMassRatio(const Settings& settings) + : Sample(settings) + { + float extent = 1.0f; + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + float groundWidth = 66.0f * extent; + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.friction = 0.5f; + + b2Segment segment = {{-0.5f * 2.0f * groundWidth, 0.0f}, {0.5f * 2.0f * groundWidth, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + bodyDef.type = b2_dynamicBody; + + b2Polygon box = b2MakeBox(extent, extent); + + for (int j = 0; j < 1; ++j) + { + int count = 2; + float offset = 2.0f * (count + 1.0f) * extent * j; + float y = extent; + while (count > 0) + { + for (int i = 0; i < count; ++i) + { + float coeff = i - 0.5f * count; + + bodyDef.position = {2.0f * coeff * extent + offset, y}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + shapeDef.density = count == 1 ? (j + 1.0f) * 300.0f : 1.0f; + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } + + --count; + y += 2.0f * extent; + } + } + } + + static Sample* Create(const Settings& settings) + { + return new HighMassRatio(settings); + } +}; + +static int sampleIndex = RegisterSample("Behavior", "HighMassRatio", HighMassRatio::Create); diff --git a/src/graph.c b/src/graph.c index 0a01ace8..9a3e6a30 100644 --- a/src/graph.c +++ b/src/graph.c @@ -443,7 +443,7 @@ static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t st cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - const float hertz = 30.0f; + const float hertz = 60.0f; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); @@ -462,8 +462,8 @@ static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t st cp->separation = b2Dot(d, normal) + cp->baseSeparation; } - //cp->bias = B2_MAX(B2_MAX(cp->separation / h, (omega / (2.0f * zeta + h * omega)) * cp->separation), -1.0f); - cp->bias = (omega / (2.0f * zeta + h * omega)) * cp->separation; + cp->bias = B2_MAX(B2_MAX(cp->separation / h, (omega / (2.0f * zeta + h * omega)) * cp->separation), -1.0f); + //cp->bias = (omega / (2.0f * zeta + h * omega)) * cp->separation; b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); wA -= iA * b2Cross(cp->rA, P); From 3d29e55c40b0a31a4f13a767ee2d2835795dece1 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 22 Aug 2023 23:03:55 -0700 Subject: [PATCH 08/51] wip --- include/box2d/manifold.h | 8 + samples/collection/behavior.cpp | 5 +- src/body.c | 2 + src/body.h | 3 + src/graph.c | 560 +++++++++++++++++++------------- src/graph.h | 1 + src/world.c | 4 +- 7 files changed, 361 insertions(+), 222 deletions(-) diff --git a/include/box2d/manifold.h b/include/box2d/manifold.h index 740ba1c4..56be72d9 100644 --- a/include/box2d/manifold.h +++ b/include/box2d/manifold.h @@ -45,6 +45,14 @@ typedef struct b2ManifoldPoint typedef struct b2Manifold { b2ManifoldPoint points[b2_maxManifoldPoints]; + + b2Vec2 localFrictionAnchorA; + b2Vec2 localFrictionNormalA; + b2Vec2 localFrictionAnchorB; + b2Vec2 localFrictionNormalB; + float tangentImpulse; + bool frictionValid; + b2Vec2 normal; int32_t pointCount; } b2Manifold; diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index ca152e77..53b8dddb 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -42,10 +42,11 @@ class HighMassRatio : public Sample { float coeff = i - 0.5f * count; - bodyDef.position = {2.0f * coeff * extent + offset, y}; + float yy = count == 1 ? y + 0.0f : y; + bodyDef.position = {2.0f * coeff * extent + offset, yy}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); - shapeDef.density = count == 1 ? (j + 1.0f) * 300.0f : 1.0f; + shapeDef.density = count == 1 ? (j + 1.0f) * 100.0f : 1.0f; b2Body_CreatePolygon(bodyId, &shapeDef, &box); } diff --git a/src/body.c b/src/body.c index ec36272c..e0fef239 100644 --- a/src/body.c +++ b/src/body.c @@ -47,6 +47,8 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->localCenter = b2Vec2_zero; b->linearVelocity = def->linearVelocity; b->angularVelocity = def->angularVelocity; + b->deltaPosition = b2Vec2_zero; + b->deltaAngle = 0.0f; b->force = b2Vec2_zero; b->torque = 0.0f; b->shapeList = B2_NULL_INDEX; diff --git a/src/body.h b/src/body.h index 15449cb8..931619cb 100644 --- a/src/body.h +++ b/src/body.h @@ -36,6 +36,9 @@ typedef struct b2Body b2Vec2 linearVelocity; float angularVelocity; + b2Vec2 deltaPosition; + float deltaAngle; + b2Vec2 force; float torque; diff --git a/src/graph.c b/src/graph.c index 9a3e6a30..14cdb78c 100644 --- a/src/graph.c +++ b/src/graph.c @@ -205,20 +205,66 @@ static void b2IntegrateVelocities(b2World* world, float h) body->linearVelocity = v; body->angularVelocity = w; + + body->deltaPosition = b2Vec2_zero; + body->deltaAngle = 0.0f; + } +} + +static void b2Integrate(b2World* world, float h) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type == b2_staticBody) + { + continue; + } + + body->deltaAngle += h * body->angularVelocity; + body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); + } +} + +static void b2UpdatePositions(b2World* world) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type == b2_staticBody) + { + continue; + } + + body->position = b2Add(body->position, body->deltaPosition); + body->angle += body->deltaAngle; } } typedef struct b2ConstraintPoint { b2Vec2 rA, rB; - b2Vec2 localAnchorA, localAnchorB; - float separation; float baseSeparation; float normalImpulse; float tangentImpulse; float normalMass; float tangentMass; - float bias; float gamma; float gammaScale; } b2ConstraintPoint; @@ -229,12 +275,19 @@ typedef struct b2Constraint int32_t indexA; int32_t indexB; b2ConstraintPoint points[2]; + + b2Vec2 frictionAnchorA; + b2Vec2 frictionAnchorB; + float frictionError; + float frictionImpulse; + bool frictionValid; + b2Vec2 normal; float friction; int32_t pointCount; } b2Constraint; -static void b2InitializeConstraintsAndWarmStart(b2World* world, b2GraphColor* color, float h) +static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool warmStart) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -262,7 +315,7 @@ static void b2InitializeConstraintsAndWarmStart(b2World* world, b2GraphColor* co constraint->normal = manifold->normal; constraint->friction = contact->friction; constraint->pointCount = pointCount; - + float mA = bodyA->invMass; float iA = bodyA->invI; float mB = bodyB->invMass; @@ -273,127 +326,86 @@ static void b2InitializeConstraintsAndWarmStart(b2World* world, b2GraphColor* co b2Rot qA = b2MakeRot(bodyA->angle); b2Rot qB = b2MakeRot(bodyB->angle); - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); + b2Vec2 tangent = b2RightPerp(normal); for (int32_t j = 0; j < pointCount; ++j) { const b2ManifoldPoint* mp = manifold->points + j; b2ConstraintPoint* cp = constraint->points + j; - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; + if (warmStart) + { + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; + } + else + { + cp->normalImpulse = 0.0f; + cp->tangentImpulse = 0.0f; + } cp->rA = b2Sub(mp->point, cA); cp->rB = b2Sub(mp->point, cB); cp->localAnchorA = b2InvRotateVector(qA, cp->rA); cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + cp->baseSeparation = mp->separation; float rtA = b2Cross(cp->rA, tangent); float rtB = b2Cross(cp->rB, tangent); float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - // Soft contact with speculation - const float hertz = 10.0f; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - // float d = 2.0f * zeta * omega / kNormal; - // float k = omega * omega / kNormal; - - // cp->gamma = 1.0f / (h * (d + h * k)); - // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); - cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); - - // cp->bias = h * k * cp->gamma * mp->separation; - // cp->bias = k / (d + h * k) * mp->separation; - // cp->bias = - // (omega * omega / kNormal) / (2.0f * dampingRatio * omega / kNormal + h * omega * omega / kNormal) * mp->separation; - cp->bias = (omega / (2.0f * zeta + h * omega)) * mp->separation; - // cp->gamma = 0.0f; - // cp->bias = (0.2f / h) * mp->separation; - - // TODO_ERIN this can be expanded - cp->normalMass = 1.0f / (kNormal + cp->gamma); - - // Warm start - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); - vB = b2MulAdd(vB, mB, P); - - cp->baseSeparation = mp->separation; - cp->separation = mp->separation; + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; } - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -static void b2InitializeConstraints(b2World* world, b2GraphColor* color) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - int32_t* contactIndices = color->contactArray; - b2Contact* contacts = world->contacts; - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Contact* contact = contacts + contactIndices[i]; - - const b2Manifold* manifold = &contact->manifold; - int32_t pointCount = manifold->pointCount; + bool frictionPreserved = false; + while (manifold->frictionValid) + { + b2Vec2 normalA = b2RotateVector(qA, manifold->localFrictionNormalA); + b2Vec2 normalB = b2RotateVector(qB, manifold->localFrictionNormalB); - B2_ASSERT(0 < pointCount && pointCount <= 2); + float nn = b2Dot(normalA, normalB); + if (nn < 0.98f) + { + break; + } - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + b2Vec2 anchorA = b2RotateVector(qA, manifold->localFrictionAnchorA); + b2Vec2 anchorB = b2RotateVector(qB, manifold->localFrictionAnchorB); + b2Vec2 offset = b2Add(b2Sub(cB, cA), b2Sub(anchorB - anchorA)); + float frictionSeparation = b2Dot(offset, normalA); + if (B2_ABS(frictionSeparation) > 2.0f * b2_linearSlop) + { + break; + } - b2Constraint* constraint = color->constraints + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - constraint->pointCount = pointCount; + constraint->frictionAnchorA = anchorA; + constraint->frictionAnchorB = anchorB; + constraint->frictionError = b2Dot(offset, b2RightPerp(normal)); - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); + frictionPreserved = true; + break; + } - for (int32_t j = 0; j < pointCount; ++j) + if (frictionPreserved == false) { - const b2ManifoldPoint* mp = manifold->points + j; - b2ConstraintPoint* cp = constraint->points + j; - - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; + // TODO_ERIN compute new local anchors and normals - cp->localAnchorA = b2InvRotateVector(qA, b2Sub(mp->point, cA)); - cp->localAnchorB = b2InvRotateVector(qB, b2Sub(mp->point, cB)); - cp->baseSeparation = mp->separation; + constraint->frictionAnchorA = anchorA; + constraint->frictionAnchorB = anchorB; + constraint->frictionError = 0.0f; } + + constraint->frictionImpulse = frictionPreserved; + manifold->frictionValid = true; } } -static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t stepIndex) +static void b2WarmStart(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -413,11 +425,6 @@ static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t st float mB = bodyB->invMass; float iB = bodyB->invI; - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); - b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; b2Vec2 vB = bodyB->linearVelocity; @@ -430,41 +437,6 @@ static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t st { b2ConstraintPoint* cp = constraint->points + j; - cp->rA = b2RotateVector(qA, cp->localAnchorA); - cp->rB = b2RotateVector(qB, cp->localAnchorB); - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - - const float hertz = 60.0f; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); - cp->normalMass = 1.0f / (kNormal + cp->gamma); - - if (stepIndex == 0) - { - cp->separation = cp->baseSeparation; - } - else - { - // Current separation - b2Vec2 d = b2Add(b2Sub(cB, cA), b2Sub(cp->rB, cp->rA)); - - // TODO_ERIN really only need to update bias below - cp->separation = b2Dot(d, normal) + cp->baseSeparation; - } - - cp->bias = B2_MAX(B2_MAX(cp->separation / h, (omega / (2.0f * zeta + h * omega)) * cp->separation), -1.0f); - //cp->bias = (omega / (2.0f * zeta + h * omega)) * cp->separation; - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); wA -= iA * b2Cross(cp->rA, P); vA = b2MulAdd(vA, -mA, P); @@ -479,7 +451,7 @@ static void b2WarmStart(b2World* world, b2GraphColor* color, float h, int32_t st } } -static void b2SolveConstraints(b2World* world, b2GraphColor* color) +static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -548,7 +520,82 @@ static void b2SolveConstraints(b2World* world, b2GraphColor* color) // Compute normal impulse float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * (vn + cp->bias + cp->gamma * cp->normalImpulse); + float impulse = -cp->normalMass * vn; + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +static void b2SolveVelocityConstraints2(b2World* world, b2GraphColor* color, float minSeparation, float inv_dt) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + b2Body* bodies = world->bodies; + const float maxBaumgarteVelocity = 2.0f; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + const b2Vec2 dpA = bodyA->deltaPosition; + const float daA = bodyA->deltaAngle; + const b2Vec2 dpB = bodyB->deltaPosition; + const float daB = bodyB->deltaAngle; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + + float s = B2_MAX(minSeparation, cp->baseSeparation + ds); + float bias = B2_MIN(maxBaumgarteVelocity, -0.8f * s * inv_dt); + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = cp->normalMass * (bias - vn); // Clamp the accumulated impulse float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); @@ -564,6 +611,35 @@ static void b2SolveConstraints(b2World* world, b2GraphColor* color) wB += iB * b2Cross(cp->rB, P); } + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; @@ -640,98 +716,66 @@ static void b2IntegratePositions(b2World* world, float h) } } -static void b2FinalizePositions(b2World* world, float h) +static void b2SolvePositionConstraints(b2World* world, b2GraphColor* color) { + const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; - b2Contact* contacts = world->contacts; - - b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[0].shapeBitSet; - const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCapacity; ++i) + for (int32_t i = 0; i < constraintCount; ++i) { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) - { - continue; - } - - if (body->type == b2_staticBody) - { - continue; - } + b2Constraint* constraint = color->constraints + i; - b2Vec2 c = body->position; - float a = body->angle; - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; - // Clamp large velocities - b2Vec2 translation = b2MulSV(h, v); - if (b2Dot(translation, translation) > b2_maxTranslationSquared) - { - float ratio = b2_maxTranslation / b2Length(translation); - v = b2MulSV(ratio, v); - } + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; - float rotation = h * w; - if (rotation * rotation > b2_maxRotationSquared) - { - float ratio = b2_maxRotation / B2_ABS(rotation); - w *= ratio; - } + b2Vec2 cA = bodyA->position; + float aA = bodyA->angle; + b2Vec2 cB = bodyB->position; + float aB = bodyB->angle; - // Integrate - c = b2MulAdd(c, h, v); - a += h * w; + b2Vec2 normal = constraint->normal; + float slop = b2_linearSlop; - body->position = c; - body->angle = a; - body->linearVelocity = v; - body->angularVelocity = w; + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); - body->force = b2Vec2_zero; - body->torque = 0.0f; + b2Vec2 rA = b2RotateVector(qA, cp->localAnchorA); + b2Vec2 rB = b2RotateVector(qB, cp->localAnchorB); - // Update shapes AABBs - int32_t shapeIndex = body->shapeList; - while (shapeIndex != B2_NULL_INDEX) - { - b2Shape* shape = world->shapes + shapeIndex; + // Current separation + b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); + float separation = b2Dot(d, normal) + cp->baseSeparation; - B2_ASSERT(shape->isFast == false); + // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. + // This improves stacking stability significantly. + float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + // Compute normal impulse + float impulse = -cp->normalMass * C; - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) - { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + b2Vec2 P = b2MulSV(impulse, normal); - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } + cA = b2MulSub(cA, mA, P); + aA -= iA * b2Cross(cp->rA, P); - shapeIndex = shape->nextShapeIndex; + cB = b2MulAdd(cB, mB, P); + aB += iB * b2Cross(cp->rB, P); } - int32_t contactKey = body->contactList; - while (contactKey != B2_NULL_INDEX) - { - int32_t contactIndex = contactKey >> 1; - int32_t edgeIndex = contactKey & 1; - b2Contact* contact = contacts + contactIndex; - - // Bit set to prevent duplicates - b2SetBit(awakeContactBitSet, contactIndex); - contactKey = contact->edges[edgeIndex].nextKey; - } + bodyA->position = cA; + bodyA->angle = aA; + bodyB->position = cB; + bodyB->angle = aB; } } @@ -825,21 +869,23 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) B2_ASSERT(base == constraintCount); - int32_t iterationCount = stepContext->velocityIterations; + int32_t velocityIterations = stepContext->velocityIterations; + int32_t positionIterations = stepContext->positionIterations; float h = stepContext->dt; b2IntegrateVelocities(world, h); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraintsAndWarmStart(world, colors + i, h); + b2InitializeConstraints(world, colors + i, true); + b2WarmStart(world, colors + i); } - for (int32_t iter = 0; iter < iterationCount; ++iter) + for (int32_t iter = 0; iter < velocityIterations; ++iter) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveConstraints(world, colors + i); + b2SolveVelocityConstraints(world, colors + i); } } @@ -848,9 +894,19 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) b2StoreImpulses(colors + i); } - b2FreeStackItem(world->stackAllocator, constraints); + b2IntegratePositions(world, h); + + for (int32_t iter = 0; iter < positionIterations; ++iter) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolvePositionConstraints(world, colors + i); + } + } - b2FinalizePositions(world, h); + b2FinalizeSolve(world); + + b2FreeStackItem(world->stackAllocator, constraints); } void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) @@ -880,7 +936,7 @@ void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i); + b2InitializeConstraints(world, colors + i, true); } for (int32_t substep = 0; substep < substepCount; ++substep) @@ -890,16 +946,21 @@ void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) // Have to fully complete warm starting before solving constraints for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2WarmStart(world, colors + i, h, i); + b2WarmStart(world, colors + i); } // One constraint iteration for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveConstraints(world, colors + i); + b2SolveVelocityConstraints(world, colors + i); } b2IntegratePositions(world, h); + + //for (int32_t i = 0; i < b2_graphColorCount; ++i) + //{ + // b2SolvePositionConstraints(world, colors + i); + //} } for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -911,3 +972,64 @@ void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); } + +void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) +{ + b2Graph* graph = &world->graph; + b2GraphColor* colors = graph->colors; + + int32_t constraintCount = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + constraintCount += b2Array(colors[i].contactArray).count; + } + + b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + int32_t base = 0; + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + colors[i].constraints = constraints + base; + base += b2Array(colors[i].contactArray).count; + } + + B2_ASSERT(base == constraintCount); + + b2IntegrateVelocities(world, stepContext->dt); + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2InitializeConstraints(world, colors + i, false); + } + + int32_t substepCount = stepContext->velocityIterations; + float h = stepContext->dt / substepCount; + + for (int32_t substep = 0; substep < substepCount; ++substep) + { + // One constraint iteration + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolveVelocityConstraints2(world, colors + i, -b2_huge, stepContext->inv_dt); + } + + b2Integrate(world, h); + + // for (int32_t i = 0; i < b2_graphColorCount; ++i) + //{ + // b2SolvePositionConstraints(world, colors + i); + // } + } + + // One iteration with no baumgarte and no affect on position + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolveVelocityConstraints2(world, colors + i, 0.0f, stepContext->inv_dt); + } + + b2UpdatePositions(world); + + b2FinalizeSolve(world); + + b2FreeStackItem(world->stackAllocator, constraints); +} diff --git a/src/graph.h b/src/graph.h index dd8d3a51..aa107ed2 100644 --- a/src/graph.h +++ b/src/graph.h @@ -36,3 +36,4 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext); void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext); diff --git a/src/world.c b/src/world.c index 508702f7..c73ca269 100644 --- a/src/world.c +++ b/src/world.c @@ -937,7 +937,9 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); - b2SolveGraphTGS(world, context); + //b2SolveGraphPGS(world, context); + //b2SolveGraphTGS(world, context); + b2SolveGraphTGS2(world, context); b2ValidateNoEnlarged(&world->broadPhase); From 9c00dd0c4fd6f883586305c1a9543f17d5a52c2f Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 26 Aug 2023 12:07:10 -0700 Subject: [PATCH 09/51] sticky friction static graph colors --- include/box2d/manifold.h | 18 +- samples/collection/behavior.cpp | 12 + samples/collection/sample_vertical_stack.cpp | 2 +- src/contact.c | 17 ++ src/graph.c | 239 ++++++++++++++----- src/graph.h | 3 + 6 files changed, 215 insertions(+), 76 deletions(-) diff --git a/include/box2d/manifold.h b/include/box2d/manifold.h index 56be72d9..d3dd7508 100644 --- a/include/box2d/manifold.h +++ b/include/box2d/manifold.h @@ -6,7 +6,6 @@ #include "box2d/types.h" #define b2_nullFeature UCHAR_MAX -#define b2_maxManifoldPoints 2 typedef struct b2Circle b2Circle; typedef struct b2Capsule b2Capsule; @@ -25,6 +24,12 @@ typedef struct b2ManifoldPoint /// world coordinates of contact point b2Vec2 point; + // Friction anchors + b2Vec2 localAnchorA; + b2Vec2 localAnchorB; + b2Vec2 localNormalA; + b2Vec2 localNormalB; + /// the separation of the contact point, negative if penetrating float separation; @@ -44,17 +49,10 @@ typedef struct b2ManifoldPoint /// Contact manifold convex shapes. typedef struct b2Manifold { - b2ManifoldPoint points[b2_maxManifoldPoints]; - - b2Vec2 localFrictionAnchorA; - b2Vec2 localFrictionNormalA; - b2Vec2 localFrictionAnchorB; - b2Vec2 localFrictionNormalB; - float tangentImpulse; - bool frictionValid; - + b2ManifoldPoint points[2]; b2Vec2 normal; int32_t pointCount; + bool frictionPersisted; } b2Manifold; static const b2Manifold b2_emptyManifold = {0}; diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index 53b8dddb..07effd4d 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -31,6 +31,17 @@ class HighMassRatio : public Sample b2Polygon box = b2MakeBox(extent, extent); +#if 1 + int count = 2; + for (int i = 0; i < count; ++i) + { + bodyDef.position = {0.0f, (2.0f * i + 1.0f) * 1.0f * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + shapeDef.density = i == count - 1 ? 100.0f : 1.0f; + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } +#else for (int j = 0; j < 1; ++j) { int count = 2; @@ -54,6 +65,7 @@ class HighMassRatio : public Sample y += 2.0f * extent; } } +#endif } static Sample* Create(const Settings& settings) diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index d339dde3..db78c525 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -56,7 +56,7 @@ class VerticalStack : public Sample } m_shapeType = e_boxShape; - m_rowCount = g_sampleDebug ? 2 : 50; + m_rowCount = g_sampleDebug ? 3 : 50; m_columnCount = g_sampleDebug ? 1 : 200; m_bulletCount = 1; m_bulletType = e_circleShape; diff --git a/src/contact.c b/src/contact.c index 9db06d84..c28cbc31 100644 --- a/src/contact.c +++ b/src/contact.c @@ -413,6 +413,13 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body touching = contact->manifold.pointCount > 0; + contact->manifold.frictionPersisted = true; + + if (contact->manifold.pointCount != oldManifold.pointCount) + { + contact->manifold.frictionPersisted = false; + } + // Match old contact ids to new contact ids and copy the // stored impulses to warm start the solver. for (int32_t i = 0; i < contact->manifold.pointCount; ++i) @@ -429,6 +436,11 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body if (mp1->id == id2) { + mp2->localNormalA = mp1->localNormalA; + mp2->localNormalB = mp1->localNormalB; + mp2->localAnchorA = mp1->localAnchorA; + mp2->localAnchorB = mp1->localAnchorB; + mp2->normalImpulse = mp1->normalImpulse; mp2->tangentImpulse = mp1->tangentImpulse; mp2->persisted = true; @@ -436,6 +448,11 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body } } + if (mp2->persisted == false) + { + contact->manifold.frictionPersisted = false; + } + // For debugging ids // if (mp2->persisted == false && contact->manifold.pointCount == oldManifold.pointCount) //{ diff --git a/src/graph.c b/src/graph.c index 14cdb78c..7de9c7d2 100644 --- a/src/graph.c +++ b/src/graph.c @@ -30,6 +30,15 @@ void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); } + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->staticColors + i; + color->bodySet = b2CreateBitSet(bodyCapacity); + b2SetBitCountAndClear(&color->bodySet, bodyCapacity); + + color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); + } } void b2DestroyGraph(b2Graph* graph) @@ -40,8 +49,16 @@ void b2DestroyGraph(b2Graph* graph) b2DestroyBitSet(&color->bodySet); b2DestroyArray(color->contactArray, sizeof(int32_t)); } + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->staticColors + i; + b2DestroyBitSet(&color->bodySet); + b2DestroyArray(color->contactArray, sizeof(int32_t)); + } } +// TODO_ERIN use a specific color for static constraints so they go last in the solver void b2AddContactToGraph(b2World* world, b2Contact* contact) { B2_ASSERT(contact->colorContactIndex == B2_NULL_INDEX); @@ -55,7 +72,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) b2BodyType typeA = world->bodies[bodyIndexA].type; b2BodyType typeB = world->bodies[bodyIndexB].type; - if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + if (typeA != b2_staticBody && typeB != b2_staticBody) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -78,7 +95,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2GraphColor* color = graph->colors + i; + b2GraphColor* color = graph->staticColors + i; if (b2GetBit(&color->bodySet, bodyIndexA)) { continue; @@ -96,7 +113,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2GraphColor* color = graph->colors + i; + b2GraphColor* color = graph->staticColors + i; if (b2GetBit(&color->bodySet, bodyIndexB)) { continue; @@ -128,34 +145,53 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) b2BodyType typeA = world->bodies[bodyIndexA].type; b2BodyType typeB = world->bodies[bodyIndexB].type; - b2GraphColor* color = graph->colors + contact->colorIndex; - - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) - { - // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; - } - - if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + if (typeA != b2_staticBody && typeB != b2_staticBody) { + b2GraphColor* color = graph->colors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } b2ClearBit(&color->bodySet, bodyIndexA); b2ClearBit(&color->bodySet, bodyIndexB); } else if (typeA == b2_dynamicBody) { + b2GraphColor* color = graph->staticColors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } + b2ClearBit(&color->bodySet, bodyIndexA); } else if (typeB == b2_dynamicBody) { + b2GraphColor* color = graph->staticColors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } + b2ClearBit(&color->bodySet, bodyIndexB); } @@ -260,6 +296,9 @@ static void b2UpdatePositions(b2World* world) typedef struct b2ConstraintPoint { b2Vec2 rA, rB; + b2Vec2 rAf, rBf; + b2Vec2 localAnchorA, localAnchorB; + float tangentSeparation; float baseSeparation; float normalImpulse; float tangentImpulse; @@ -267,6 +306,7 @@ typedef struct b2ConstraintPoint float tangentMass; float gamma; float gammaScale; + bool frictionValid; } b2ConstraintPoint; typedef struct b2Constraint @@ -275,13 +315,6 @@ typedef struct b2Constraint int32_t indexA; int32_t indexB; b2ConstraintPoint points[2]; - - b2Vec2 frictionAnchorA; - b2Vec2 frictionAnchorB; - float frictionError; - float frictionImpulse; - bool frictionValid; - b2Vec2 normal; float friction; int32_t pointCount; @@ -297,8 +330,8 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool wa for (int32_t i = 0; i < constraintCount; ++i) { b2Contact* contact = contacts + contactIndices[i]; + b2Manifold* manifold = &contact->manifold; - const b2Manifold* manifold = &contact->manifold; int32_t pointCount = manifold->pointCount; B2_ASSERT(0 < pointCount && pointCount <= 2); @@ -362,46 +395,77 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool wa cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; } - bool frictionPreserved = false; - while (manifold->frictionValid) + bool frictionConfirmed = false; + if (manifold->frictionPersisted) { - b2Vec2 normalA = b2RotateVector(qA, manifold->localFrictionNormalA); - b2Vec2 normalB = b2RotateVector(qB, manifold->localFrictionNormalB); - - float nn = b2Dot(normalA, normalB); - if (nn < 0.98f) + int32_t confirmCount = 0; + for (int32_t j = 0; j < pointCount; ++j) { - break; + const b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; + + b2Vec2 normalA = b2RotateVector(qA, mp->localNormalA); + b2Vec2 normalB = b2RotateVector(qB, mp->localNormalB); + + float nn = b2Dot(normalA, normalB); + if (nn < 0.98f) + { + // Relative rotation has invalidated cached friction anchors + break; + } + + b2Vec2 anchorA = b2RotateVector(qA, mp->localAnchorA); + b2Vec2 anchorB = b2RotateVector(qB, mp->localAnchorB); + b2Vec2 offset = b2Add(b2Sub(cB, cA), b2Sub(anchorB, anchorA)); + float normalSeparation = b2Dot(offset, normalA); + if (B2_ABS(normalSeparation) > 2.0f * b2_linearSlop) + { + // Normal separation has invalidated cached friction anchors + break; + } + + cp->rAf = anchorA; + cp->rBf = anchorB; + cp->tangentSeparation = b2Dot(offset, tangent); + + float rtA = b2Cross(anchorA, tangent); + float rtB = b2Cross(anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + confirmCount += 1; } - b2Vec2 anchorA = b2RotateVector(qA, manifold->localFrictionAnchorA); - b2Vec2 anchorB = b2RotateVector(qB, manifold->localFrictionAnchorB); - b2Vec2 offset = b2Add(b2Sub(cB, cA), b2Sub(anchorB - anchorA)); - float frictionSeparation = b2Dot(offset, normalA); - if (B2_ABS(frictionSeparation) > 2.0f * b2_linearSlop) + if (confirmCount == pointCount) { - break; + frictionConfirmed = true; } - - constraint->frictionAnchorA = anchorA; - constraint->frictionAnchorB = anchorB; - constraint->frictionError = b2Dot(offset, b2RightPerp(normal)); - - frictionPreserved = true; - break; } - if (frictionPreserved == false) + if (frictionConfirmed == false) { - // TODO_ERIN compute new local anchors and normals - - constraint->frictionAnchorA = anchorA; - constraint->frictionAnchorB = anchorB; - constraint->frictionError = 0.0f; + for (int32_t j = 0; j < pointCount; ++j) + { + b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; + + mp->localNormalA = b2InvRotateVector(qA, normal); + mp->localNormalB = b2InvRotateVector(qB, normal); + mp->localAnchorA = b2InvRotateVector(qA, cp->rA); + mp->localAnchorB = b2InvRotateVector(qB, cp->rB); + + cp->rAf = cp->rA; + cp->rBf = cp->rB; + cp->tangentSeparation = 0.0f; + + float rtA = b2Cross(cp->rAf, tangent); + float rtB = b2Cross(cp->rBf, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + } } - constraint->frictionImpulse = frictionPreserved; - manifold->frictionValid = true; + manifold->frictionPersisted = true; } } @@ -543,11 +607,11 @@ static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color) } } -static void b2SolveVelocityConstraints2(b2World* world, b2GraphColor* color, float minSeparation, float inv_dt) +static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color, float minSeparation, float invh) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; - const float maxBaumgarteVelocity = 2.0f; + const float maxBaumgarteVelocity = 3.0f; for (int32_t i = 0; i < constraintCount; ++i) { @@ -576,6 +640,7 @@ static void b2SolveVelocityConstraints2(b2World* world, b2GraphColor* color, flo b2Vec2 tangent = b2RightPerp(normal); float friction = constraint->friction; + // Non-penetration constraints for (int32_t j = 0; j < pointCount; ++j) { b2ConstraintPoint* cp = constraint->points + j; @@ -591,7 +656,7 @@ static void b2SolveVelocityConstraints2(b2World* world, b2GraphColor* color, flo float ds = b2Dot(b2Sub(prB, prA), normal); float s = B2_MAX(minSeparation, cp->baseSeparation + ds); - float bias = B2_MIN(maxBaumgarteVelocity, -0.8f * s * inv_dt); + float bias = B2_MIN(maxBaumgarteVelocity, -0.8f * s * invh); // Compute normal impulse float vn = b2Dot(dv, normal); @@ -611,27 +676,47 @@ static void b2SolveVelocityConstraints2(b2World* world, b2GraphColor* color, flo wB += iB * b2Cross(cp->rB, P); } + // Sticky friction constraints for (int32_t j = 0; j < pointCount; ++j) { b2ConstraintPoint* cp = constraint->points + j; // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rBf)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rAf)); b2Vec2 dv = b2Sub(vrB, vrA); - // Compute tangent force + // Compute change in separation + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rBf)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rAf)); + float ds = b2Dot(b2Sub(prB, prA), tangent); + + float s = cp->tangentSeparation + ds; + float bias = B2_CLAMP(-0.8f * s * invh, -maxBaumgarteVelocity, maxBaumgarteVelocity); + + // Compute tangent impulse float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); + float impulse = cp->tangentMass * (bias - vt); - // Clamp the accumulated force + // Clamp the accumulated impulse float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; + float newImpulse = cp->tangentImpulse + impulse; + if (newImpulse < -maxFriction) + { + newImpulse = -maxFriction; + constraint->contact->manifold.frictionPersisted = false; + } + else if (newImpulse > maxFriction) + { + newImpulse = maxFriction; + constraint->contact->manifold.frictionPersisted = false; + } + + impulse = newImpulse - cp->tangentImpulse; cp->tangentImpulse = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); + b2Vec2 P = b2MulSV(impulse, tangent); vA = b2MulSub(vA, mA, P); wA -= iA * b2Cross(cp->rA, P); @@ -977,11 +1062,13 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; + b2GraphColor* staticColors = graph->staticColors; int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { constraintCount += b2Array(colors[i].contactArray).count; + constraintCount += b2Array(staticColors[i].contactArray).count; } b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); @@ -993,6 +1080,12 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) base += b2Array(colors[i].contactArray).count; } + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + staticColors[i].constraints = constraints + base; + base += b2Array(staticColors[i].contactArray).count; + } + B2_ASSERT(base == constraintCount); b2IntegrateVelocities(world, stepContext->dt); @@ -1002,15 +1095,26 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) b2InitializeConstraints(world, colors + i, false); } + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2InitializeConstraints(world, staticColors + i, false); + } + int32_t substepCount = stepContext->velocityIterations; float h = stepContext->dt / substepCount; + float invh = substepCount / stepContext->dt; for (int32_t substep = 0; substep < substepCount; ++substep) { // One constraint iteration for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveVelocityConstraints2(world, colors + i, -b2_huge, stepContext->inv_dt); + b2SolveVelocityConstraintsSticky(world, colors + i, -b2_huge, invh); + } + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolveVelocityConstraintsSticky(world, staticColors + i, -b2_huge, invh); } b2Integrate(world, h); @@ -1024,7 +1128,12 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) // One iteration with no baumgarte and no affect on position for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveVelocityConstraints2(world, colors + i, 0.0f, stepContext->inv_dt); + b2SolveVelocityConstraintsSticky(world, colors + i, 0.0f, 0.0f); + } + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolveVelocityConstraintsSticky(world, staticColors + i, 0.0f, 0.0f); } b2UpdatePositions(world); diff --git a/src/graph.h b/src/graph.h index aa107ed2..51efe9bb 100644 --- a/src/graph.h +++ b/src/graph.h @@ -25,7 +25,10 @@ typedef struct b2GraphColor typedef struct b2Graph { b2GraphColor colors[b2_graphColorCount]; + int32_t colorCount; + b2GraphColor staticColors[b2_graphColorCount]; + int32_t staticColorCount; } b2Graph; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); From bbc3031d4a13bfd622ee668508d051419611161c Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Thu, 31 Aug 2023 23:44:48 -0700 Subject: [PATCH 10/51] wip --- include/box2d/aabb.h | 7 + include/box2d/constants.h | 1 - include/box2d/manifold.h | 1 + samples/collection/behavior.cpp | 79 +- samples/collection/benchmark_barrel.cpp | 5 + samples/collection/benchmark_pyramid.cpp | 6 +- samples/collection/sample_vertical_stack.cpp | 8 +- samples/sample.cpp | 14 +- samples/sample.h | 1 + src/contact.c | 10 +- src/contact.h | 6 +- src/graph.c | 875 ++++++++++++++++--- src/graph.h | 5 +- src/world.c | 7 +- 14 files changed, 898 insertions(+), 127 deletions(-) diff --git a/include/box2d/aabb.h b/include/box2d/aabb.h index efa99e8f..456f60f5 100644 --- a/include/box2d/aabb.h +++ b/include/box2d/aabb.h @@ -107,6 +107,13 @@ static inline bool b2AABB_Contains(b2AABB a, b2AABB b) return s; } +static inline bool b2AABB_ContainsWithMargin(b2AABB a, b2AABB b, float margin) +{ + bool s = (a.lowerBound.x <= b.lowerBound.x - margin) & (a.lowerBound.y <= b.lowerBound.y - margin) & + (b.upperBound.x + margin <= a.upperBound.x) & (b.upperBound.y + margin <= a.upperBound.y); + return s; +} + /// Do a and b overlap static inline bool b2AABB_Overlaps(b2AABB a, b2AABB b) { diff --git a/include/box2d/constants.h b/include/box2d/constants.h index 0f3d2b1f..b06e5b28 100644 --- a/include/box2d/constants.h +++ b/include/box2d/constants.h @@ -72,7 +72,6 @@ extern float b2_lengthUnitsPerMeter; #define b2_maxRotation (0.5f * b2_pi) #define b2_maxRotationSquared (b2_maxRotation * b2_maxRotation) -/// TODO_ERIN make dynamic based on speed? /// @warning modifying this can have a significant impact on stability #define b2_speculativeDistance (4.0f * b2_linearSlop) diff --git a/include/box2d/manifold.h b/include/box2d/manifold.h index d3dd7508..d5242e48 100644 --- a/include/box2d/manifold.h +++ b/include/box2d/manifold.h @@ -52,6 +52,7 @@ typedef struct b2Manifold b2ManifoldPoint points[2]; b2Vec2 normal; int32_t pointCount; + int32_t constraintIndex; bool frictionPersisted; } b2Manifold; diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index 07effd4d..f46d0d56 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -31,21 +31,23 @@ class HighMassRatio : public Sample b2Polygon box = b2MakeBox(extent, extent); -#if 1 +#if 0 + //b2Circle circle = {{0.0f, 0.0f}, extent}; int count = 2; for (int i = 0; i < count; ++i) { bodyDef.position = {0.0f, (2.0f * i + 1.0f) * 1.0f * extent}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); - shapeDef.density = i == count - 1 ? 100.0f : 1.0f; + shapeDef.density = i == count - 1 ? 300.0f : 1.0f; + //b2Body_CreateCircle(bodyId, &shapeDef, &circle); b2Body_CreatePolygon(bodyId, &shapeDef, &box); } #else - for (int j = 0; j < 1; ++j) + for (int j = 0; j < 3; ++j) { - int count = 2; - float offset = 2.0f * (count + 1.0f) * extent * j; + int count = 10; + float offset = -20.0f * extent + 2.0f * (count + 1.0f) * extent * j; float y = extent; while (count > 0) { @@ -53,7 +55,7 @@ class HighMassRatio : public Sample { float coeff = i - 0.5f * count; - float yy = count == 1 ? y + 0.0f : y; + float yy = count == 1 ? y + 2.0f : y; bodyDef.position = {2.0f * coeff * extent + offset, yy}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); @@ -74,4 +76,67 @@ class HighMassRatio : public Sample } }; -static int sampleIndex = RegisterSample("Behavior", "HighMassRatio", HighMassRatio::Create); +static int sampleIndex1 = RegisterSample("Behavior", "HighMassRatio", HighMassRatio::Create); + + +class Friction : public Sample +{ + public: + Friction(const Settings& settings) + : Sample(settings) + { + + { + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.friction = 0.2f; + + b2Segment segment = {{-40.0f, 0.0f}, {40.0f, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + b2Polygon box = b2MakeOffsetBox(13.0f, 0.25f, {-4.0f, 22.0f}, -0.25f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(0.25f, 1.0f, {10.5f, 19.0f}, 0.0f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(13.0f, 0.25f, {4.0f, 14.0f}, 0.25f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(0.25f, 1.0f, {-10.5f, 11.0f}, 0.0f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(13.0f, 0.25f, {-4.0f, 6.0f}, -0.25f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + } + + { + b2Polygon box = b2MakeBox(0.5f, 0.5f); + + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 25.0f; + + float friction[5] = {0.75f, 0.5f, 0.35f, 0.1f, 0.0f}; + + for (int i = 0; i < 5; ++i) + { + b2BodyDef bodyDef = b2DefaultBodyDef(); + bodyDef.type = b2_dynamicBody; + bodyDef.position = {-15.0f + 4.0f * i, 28.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + shapeDef.friction = friction[i]; + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } + } + } + + static Sample* Create(const Settings& settings) + { + return new Friction(settings); + } +}; + +static int sampleIndex2 = RegisterSample("Behavior", "Friction", Friction::Create); diff --git a/samples/collection/benchmark_barrel.cpp b/samples/collection/benchmark_barrel.cpp index ddfd58b5..2fd7f7f4 100644 --- a/samples/collection/benchmark_barrel.cpp +++ b/samples/collection/benchmark_barrel.cpp @@ -70,6 +70,9 @@ class BenchmarkBarrel : public Sample } m_columnCount = g_sampleDebug ? 4 : e_maxColumns; + + m_columnCount = e_maxColumns; + float rad = 0.5f; float shift = rad * 2.0f; @@ -90,6 +93,8 @@ class BenchmarkBarrel : public Sample m_rowCount = g_sampleDebug ? 8 : e_maxRows; + m_rowCount = e_maxRows; + int32_t index = 0; for (int32_t i = 0; i < m_columnCount; ++i) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 51cfb782..83c03de3 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -57,13 +57,15 @@ class BenchmarkPyramid : public Sample float h = m_extent - m_round; b2Polygon cuboid = b2MakeRoundedBox(h, h, m_round); + float shift = 1.0f * h; + for (int32_t i = 0; i < m_baseCount; ++i) { - float y = (2.0f * i + 1.0f) * m_extent + baseY; + float y = (2.0f * i + 1.0f) * shift + baseY; for (int32_t j = i; j < m_baseCount; ++j) { - float x = (i + 1.0f) * m_extent + 2.25f * (j - i) * m_extent + centerX - 0.5f; + float x = (i + 1.0f) * shift + 2.0f * (j - i) * shift + centerX - 0.5f; bodyDef.position = {x, y}; diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index db78c525..11ffe27e 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -56,7 +56,7 @@ class VerticalStack : public Sample } m_shapeType = e_boxShape; - m_rowCount = g_sampleDebug ? 3 : 50; + m_rowCount = g_sampleDebug ? 2 : 50; m_columnCount = g_sampleDebug ? 1 : 200; m_bulletCount = 1; m_bulletType = e_circleShape; @@ -78,8 +78,8 @@ class VerticalStack : public Sample b2Circle circle = {0}; circle.radius = 0.5f; - //b2Polygon box = b2MakeBox(0.5f, 0.5f); - b2Polygon box = b2MakeRoundedBox(0.45f, 0.45f, 0.05f); + b2Polygon box = b2MakeBox(0.5f, 0.5f); + //b2Polygon box = b2MakeRoundedBox(0.45f, 0.45f, 0.05f); b2ShapeDef sd = b2DefaultShapeDef(); sd.density = 1.0f; @@ -112,7 +112,7 @@ class VerticalStack : public Sample float shift = (i % 2 == 0 ? -offset : offset); //bd.position = {x + shift, 0.505f + 1.01f * i}; - bd.position = {x + shift, 2.0f + 1.51f * i}; + bd.position = {x + shift, 4.0f + 1.51f * i}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); m_bodies[n] = bodyId; diff --git a/samples/sample.cpp b/samples/sample.cpp index b7a33f21..c048e4f4 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -339,14 +339,21 @@ void Sample::Step(Settings& settings) b2Color addColor = {0.3f, 0.95f, 0.3f, 1.0f}; b2Color persistColor = {0.3f, 0.3f, 0.95f, 1.0f}; - b2HexColor colors[8] = {b2_colorAquamarine, b2_colorBisque, b2_colorBlue, b2_colorBrown, - b2_colorBurlywood, b2_colorCadetBlue, b2_colorChartreuse, b2_colorChocolate}; + b2HexColor colors[12] = {b2_colorAquamarine, b2_colorBisque, b2_colorBlue, b2_colorBrown, + b2_colorBurlywood, b2_colorCadetBlue, b2_colorChartreuse, b2_colorChocolate, + b2_colorDarkGoldenrod, b2_colorCoral, b2_colorAqua, b2_colorHoneydew}; for (int32_t i = 0; i < m_pointCount; ++i) { ContactPoint* point = m_points + i; - if (0 <= point->color && point->color < 8) + //if (point->constraintIndex >= 0 && point->constraintIndex < 5000) + //{ + // b2Vec2 p = point->position; + // p.y += 0.1f; + // g_draw.DrawString(p, "%d", point->constraintIndex); + //} + if (0 <= point->color && point->color < 12) { // graph color g_draw.DrawPoint(point->position, 5.0f, b2MakeColor(colors[point->color], 1.0f)); @@ -423,6 +430,7 @@ bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifo cp->normalImpulse = manifold->points[j].normalImpulse; cp->tangentImpulse = manifold->points[j].tangentImpulse; cp->persisted = manifold->points[j].persisted; + cp->constraintIndex = manifold->constraintIndex; cp->color = color; ++j; } diff --git a/samples/sample.h b/samples/sample.h index 59d8542a..84beeb27 100644 --- a/samples/sample.h +++ b/samples/sample.h @@ -70,6 +70,7 @@ struct ContactPoint float normalImpulse; float tangentImpulse; float separation; + int32_t constraintIndex; int32_t color; }; diff --git a/src/contact.c b/src/contact.c index c28cbc31..f3e38fa2 100644 --- a/src/contact.c +++ b/src/contact.c @@ -420,6 +420,9 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body contact->manifold.frictionPersisted = false; } + // TODO_ERIN testing + contact->manifold.constraintIndex = oldManifold.constraintIndex; + // Match old contact ids to new contact ids and copy the // stored impulses to warm start the solver. for (int32_t i = 0; i < contact->manifold.pointCount; ++i) @@ -463,7 +466,12 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body if (touching && world->preSolveFcn) { // TODO_ERIN this call assumes thread safety - bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, contact->colorIndex, world->preSolveContext); + int32_t colorIndex = contact->colorIndex; + if (contact->flags & b2_contactStatic) + { + colorIndex += 8; + } + bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, colorIndex, world->preSolveContext); if (collide == false) { // disable contact diff --git a/src/contact.h b/src/contact.h index f59e4987..e9d88a44 100644 --- a/src/contact.h +++ b/src/contact.h @@ -51,7 +51,9 @@ enum b2ContactFlags // This contact stopped touching b2_contactStoppedTouching = 0x00000080, - b2_contactIslandFlag = 0x0100 + b2_contactIslandFlag = 0x00000100, + + b2_contactStatic = 0x00000200 }; /// The class manages contact between two shapes. A contact exists for each overlapping @@ -67,6 +69,8 @@ typedef struct b2Contact //int32_t awakeIndex; int32_t colorIndex; + + // For fast removal from graph color int32_t colorContactIndex; b2ContactEdge edges[2]; diff --git a/src/graph.c b/src/graph.c index 7de9c7d2..84f10569 100644 --- a/src/graph.c +++ b/src/graph.c @@ -16,6 +16,9 @@ #include "box2d/aabb.h" #include +#include + +#define maxBaumgarteVelocity 2.0f void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) { @@ -58,6 +61,8 @@ void b2DestroyGraph(b2Graph* graph) } } +#define B2_STATIC_CONTACTS 0 + // TODO_ERIN use a specific color for static constraints so they go last in the solver void b2AddContactToGraph(b2World* world, b2Contact* contact) { @@ -72,7 +77,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) b2BodyType typeA = world->bodies[bodyIndexA].type; b2BodyType typeB = world->bodies[bodyIndexB].type; - if (typeA != b2_staticBody && typeB != b2_staticBody) + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -88,6 +93,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) contact->colorContactIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); contact->colorIndex = i; + contact->flags &= ~b2_contactStatic; break; } } @@ -95,7 +101,12 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { +#if B2_STATIC_CONTACTS == 0 + b2GraphColor* color = graph->colors + i; +#else b2GraphColor* color = graph->staticColors + i; + contact->flags |= b2_contactStatic; +#endif if (b2GetBit(&color->bodySet, bodyIndexA)) { continue; @@ -113,7 +124,12 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { +#if B2_STATIC_CONTACTS == 0 + b2GraphColor* color = graph->colors + i; +#else b2GraphColor* color = graph->staticColors + i; + contact->flags |= b2_contactStatic; +#endif if (b2GetBit(&color->bodySet, bodyIndexB)) { continue; @@ -145,7 +161,7 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) b2BodyType typeA = world->bodies[bodyIndexA].type; b2BodyType typeB = world->bodies[bodyIndexB].type; - if (typeA != b2_staticBody && typeB != b2_staticBody) + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) { b2GraphColor* color = graph->colors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); @@ -164,7 +180,11 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) } else if (typeA == b2_dynamicBody) { +#if B2_STATIC_CONTACTS == 0 + b2GraphColor* color = graph->colors + contact->colorIndex; +#else b2GraphColor* color = graph->staticColors + contact->colorIndex; +#endif B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); int32_t colorContactIndex = contact->colorContactIndex; @@ -180,7 +200,11 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) } else if (typeB == b2_dynamicBody) { +#if B2_STATIC_CONTACTS == 0 + b2GraphColor* color = graph->colors + contact->colorIndex; +#else b2GraphColor* color = graph->staticColors + contact->colorIndex; +#endif B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); int32_t colorContactIndex = contact->colorContactIndex; @@ -197,6 +221,7 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) contact->colorIndex = B2_NULL_INDEX; contact->colorContactIndex = B2_NULL_INDEX; + contact->flags &= ~b2_contactStatic; } static void b2IntegrateVelocities(b2World* world, float h) @@ -242,12 +267,62 @@ static void b2IntegrateVelocities(b2World* world, float h) body->linearVelocity = v; body->angularVelocity = w; + body->deltaAngle = 0.0f; body->deltaPosition = b2Vec2_zero; + } +} + +#if 0 // no need? +static void b2IntegrateVelocitiesSoft(b2World* world, float h) +{ + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + b2Vec2 gravity = world->gravity; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + if (body->type != b2_dynamicBody) + { + continue; + } + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); + + body->linearVelocity = v; + body->angularVelocity = w; + body->deltaAngle = 0.0f; + body->deltaPosition = b2Vec2_zero; } } +#endif -static void b2Integrate(b2World* world, float h) +static void b2IntegrateDeltaTransform(b2World* world, float h) { b2Body* bodies = world->bodies; int32_t bodyCapacity = world->bodyPool.capacity; @@ -267,6 +342,7 @@ static void b2Integrate(b2World* world, float h) body->deltaAngle += h * body->angularVelocity; body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); + i += 0; } } @@ -299,13 +375,16 @@ typedef struct b2ConstraintPoint b2Vec2 rAf, rBf; b2Vec2 localAnchorA, localAnchorB; float tangentSeparation; - float baseSeparation; + float separation; float normalImpulse; float tangentImpulse; float normalMass; float tangentMass; float gamma; - float gammaScale; + float massCoefficient; + float biasCoefficient; + float impulseCoefficient; + float baumgarte; bool frictionValid; } b2ConstraintPoint; @@ -320,7 +399,7 @@ typedef struct b2Constraint int32_t pointCount; } b2Constraint; -static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool warmStart) +static void b2InitializeSoftConstraints(b2World* world, b2GraphColor* color, float h, bool warmStart) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -330,8 +409,8 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool wa for (int32_t i = 0; i < constraintCount; ++i) { b2Contact* contact = contacts + contactIndices[i]; - b2Manifold* manifold = &contact->manifold; + const b2Manifold* manifold = &contact->manifold; int32_t pointCount = manifold->pointCount; B2_ASSERT(0 < pointCount && pointCount <= 2); @@ -348,7 +427,7 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool wa constraint->normal = manifold->normal; constraint->friction = contact->friction; constraint->pointCount = pointCount; - + float mA = bodyA->invMass; float iA = bodyA->invI; float mB = bodyB->invMass; @@ -359,30 +438,217 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color, bool wa b2Rot qA = b2MakeRot(bodyA->angle); b2Rot qB = b2MakeRot(bodyB->angle); + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); + b2Vec2 tangent = b2RightPerp(constraint->normal); for (int32_t j = 0; j < pointCount; ++j) { const b2ManifoldPoint* mp = manifold->points + j; b2ConstraintPoint* cp = constraint->points + j; + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; + + cp->rA = b2Sub(mp->point, cA); + cp->rB = b2Sub(mp->point, cB); + cp->localAnchorA = b2InvRotateVector(qA, cp->rA); + cp->localAnchorB = b2InvRotateVector(qB, cp->rB); + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Soft contact with speculation + const float hertz = 30.0f; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + // float d = 2.0f * zeta * omega / kNormal; + // float k = omega * omega / kNormal; + + // cp->gamma = 1.0f / (h * (d + h * k)); + // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); + cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); + + cp->separation = mp->separation; + + // cp->bias = h * k * cp->gamma * mp->separation; + // cp->bias = k / (d + h * k) * mp->separation; + // cp->bias = + // (omega * omega / kNormal) / (2 * zeta * omega / kNormal + h * omega * omega / kNormal) * mp->separation; + cp->biasCoefficient = omega / (2.0f * zeta + h * omega); + // cp->gamma = 0.0f; + // cp->bias = (0.2f / h) * mp->separation; + + // TODO_ERIN this can be expanded + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + //cp->normalMass = 1.0f / (kNormal + cp->gamma); + + float c = h * omega * (2.0f * zeta + h * omega); + cp->impulseCoefficient = 1.0f / (1.0f + c); + cp->massCoefficient = c * cp->impulseCoefficient; + + // meff = 1.0f / kNormal * 1.0f / (1.0f + 1.0f / (h * omega * (2 * zeta + h * omega))) + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + // = -meff * mscale * (vn + bias) - imp_scale * impulse + + // Warm start if (warmStart) { - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; - } - else - { - cp->normalImpulse = 0.0f; - cp->tangentImpulse = 0.0f; + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); } + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +static void b2InitializeConstraints(b2World* world, b2GraphColor* color) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + int32_t* contactIndices = color->contactArray; + b2Contact* contacts = world->contacts; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + b2Manifold* manifold = &contact->manifold; + + int32_t pointCount = manifold->pointCount; + + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = contact->edges[0].bodyIndex; + int32_t indexB = contact->edges[1].bodyIndex; + b2Body* bodyA = bodies + indexA; + b2Body* bodyB = bodies + indexB; + + b2Constraint* constraint = color->constraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->pointCount = pointCount; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 cA = bodyA->position; + b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; + + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; cp->rA = b2Sub(mp->point, cA); cp->rB = b2Sub(mp->point, cB); cp->localAnchorA = b2InvRotateVector(qA, cp->rA); cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - cp->baseSeparation = mp->separation; + cp->separation = mp->separation; + + cp->baumgarte = 0.0f; + cp->biasCoefficient = mp->separation > 0.0f ? 1.0f : 0.8f; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + } + } +} + +static void b2InitializeStickyConstraints(b2World* world, b2GraphColor* color) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + int32_t* contactIndices = color->contactArray; + b2Contact* contacts = world->contacts; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + b2Manifold* manifold = &contact->manifold; + + int32_t pointCount = manifold->pointCount; + + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = contact->edges[0].bodyIndex; + int32_t indexB = contact->edges[1].bodyIndex; + b2Body* bodyA = bodies + indexA; + b2Body* bodyB = bodies + indexB; + + b2Constraint* constraint = color->constraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->pointCount = pointCount; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 cA = bodyA->position; + b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; + + cp->normalImpulse = 0.0f; + cp->tangentImpulse = 0.0f; + + cp->rA = b2Sub(mp->point, cA); + cp->rB = b2Sub(mp->point, cB); + cp->localAnchorA = b2InvRotateVector(qA, cp->rA); + cp->localAnchorB = b2InvRotateVector(qB, cp->rB); + cp->separation = mp->separation; + + cp->baumgarte = mp->separation > 0.0f ? 1.0f : 0.8f; float rtA = b2Cross(cp->rA, tangent); float rtB = b2Cross(cp->rB, tangent); @@ -494,18 +760,242 @@ static void b2WarmStart(b2World* world, b2GraphColor* color) b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +static void b2WarmStartAll(b2World* world, b2Constraint* constraints, int32_t constraintCount) +{ + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = constraints + i; + + int32_t pointCount = constraint->pointCount; + B2_ASSERT(0 < pointCount && pointCount <= 2); + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color, float inv_dt) +{ + const int32_t constraintCount = b2Array(color->contactArray).count; + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = color->constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2CrossVS(normal, 1.0f); + float friction = constraint->friction; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * (vn + cp->biasCoefficient * cp->separation * inv_dt); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } +} + +static void b2SolveVelocityConstraintsSorted(b2World* world, b2Constraint* constraints, int32_t constraintCount, float inv_dt) +{ + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2CrossVS(normal, 1.0f); + float friction = constraint->friction; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * (vn + cp->biasCoefficient * cp->separation * inv_dt); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } for (int32_t j = 0; j < pointCount; ++j) { b2ConstraintPoint* cp = constraint->points + j; - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } bodyA->linearVelocity = vA; @@ -515,7 +1005,7 @@ static void b2WarmStart(b2World* world, b2GraphColor* color) } } -static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color) +static void b2SolveVelocityConstraintsSoft(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -538,12 +1028,15 @@ static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color) b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; + const b2Vec2 dpA = bodyA->deltaPosition; + const float daA = bodyA->deltaAngle; + const b2Vec2 dpB = bodyB->deltaPosition; + const float daB = bodyB->deltaAngle; + b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); float friction = constraint->friction; - // Solve tangent constraints first because non-penetration is more important - // than friction. for (int32_t j = 0; j < pointCount; ++j) { b2ConstraintPoint* cp = constraint->points + j; @@ -553,19 +1046,39 @@ static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color) b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); b2Vec2 dv = b2Sub(vrB, vrA); - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); + // Compute change in separation + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // Speculative + bias = s * inv_dt; + } + else if (removeOverlap) + { + bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); + //bias = cp->biasCoefficient * s; + massScale = cp->massCoefficient; + impulseScale = cp->impulseCoefficient; + } + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + //float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - + b2Vec2 P = b2MulSV(impulse, normal); vA = b2MulSub(vA, mA, P); wA -= iA * b2Cross(cp->rA, P); @@ -582,17 +1095,19 @@ static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color) b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); b2Vec2 dv = b2Sub(vrB, vrA); - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * vn; + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); + b2Vec2 P = b2MulSV(lambda, tangent); + vA = b2MulSub(vA, mA, P); wA -= iA * b2Cross(cp->rA, P); @@ -611,7 +1126,6 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; - const float maxBaumgarteVelocity = 3.0f; for (int32_t i = 0; i < constraintCount; ++i) { @@ -655,8 +1169,8 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); float ds = b2Dot(b2Sub(prB, prA), normal); - float s = B2_MAX(minSeparation, cp->baseSeparation + ds); - float bias = B2_MIN(maxBaumgarteVelocity, -0.8f * s * invh); + float s = B2_MAX(minSeparation, cp->separation + ds); + float bias = B2_MIN(maxBaumgarteVelocity, -cp->baumgarte * s * invh); // Compute normal impulse float vn = b2Dot(dv, normal); @@ -692,7 +1206,7 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color float ds = b2Dot(b2Sub(prB, prA), tangent); float s = cp->tangentSeparation + ds; - float bias = B2_CLAMP(-0.8f * s * invh, -maxBaumgarteVelocity, maxBaumgarteVelocity); + float bias = -0.2f * s * invh; // Compute tangent impulse float vt = b2Dot(dv, tangent); @@ -732,13 +1246,11 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color } } -static void b2StoreImpulses(b2GraphColor* color) +static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) { - int32_t constraintCount = b2Array(color->contactArray).count; - for (int32_t i = 0; i < constraintCount; ++i) { - b2Constraint* constraint = color->constraints + i; + b2Constraint* constraint = constraints + i; b2Contact* contact = constraint->contact; b2Manifold* manifold = &contact->manifold; @@ -805,6 +1317,7 @@ static void b2SolvePositionConstraints(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; + float slop = b2_linearSlop; for (int32_t i = 0; i < constraintCount; ++i) { @@ -824,6 +1337,72 @@ static void b2SolvePositionConstraints(b2World* world, b2GraphColor* color) b2Vec2 cB = bodyB->position; float aB = bodyB->angle; + b2Vec2 normal = constraint->normal; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); + + b2Vec2 rA = b2RotateVector(qA, cp->localAnchorA); + b2Vec2 rB = b2RotateVector(qB, cp->localAnchorB); + + // Current separation + b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); + float separation = b2Dot(d, normal) + cp->separation; + + // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. + // This improves stacking stability significantly. + float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); + + // Compute the effective mass. + float rnA = b2Cross(rA, normal); + float rnB = b2Cross(rB, normal); + float K = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + // Compute normal impulse + float impulse = K > 0.0f ? -C / K : 0.0f; + + b2Vec2 P = b2MulSV(impulse, normal); + + cA = b2MulSub(cA, mA, P); + aA -= iA * b2Cross(cp->rA, P); + + cB = b2MulAdd(cB, mB, P); + aB += iB * b2Cross(cp->rB, P); + } + + bodyA->position = cA; + bodyA->angle = aA; + bodyB->position = cB; + bodyB->angle = aB; + } +} + +static void b2SolvePositionConstraintsSorted(b2World* world, b2Constraint* constraints, int32_t constraintCount) +{ + b2Body* bodies = world->bodies; + + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; + + b2Vec2 cA = bodyA->position; + float aA = bodyA->angle; + b2Vec2 cB = bodyB->position; + float aB = bodyB->angle; + b2Vec2 normal = constraint->normal; float slop = b2_linearSlop; @@ -839,14 +1418,19 @@ static void b2SolvePositionConstraints(b2World* world, b2GraphColor* color) // Current separation b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - float separation = b2Dot(d, normal) + cp->baseSeparation; + float separation = b2Dot(d, normal) + cp->separation; // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. // This improves stacking stability significantly. float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); + // Compute the effective mass. + float rnA = b2Cross(rA, normal); + float rnB = b2Cross(rB, normal); + float K = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + // Compute normal impulse - float impulse = -cp->normalMass * C; + float impulse = K > 0.0f ? -C / K : 0.0f; b2Vec2 P = b2MulSV(impulse, normal); @@ -932,6 +1516,44 @@ static void b2FinalizeSolve(b2World* world) } } +int b2CompareConstraints(const void* ptr1, const void* ptr2) +{ + const b2Constraint* c1 = ptr1; + const b2Constraint* c2 = ptr2; + + b2Vec2 point1 = c1->contact->manifold.points[0].point; + b2Vec2 point2 = c2->contact->manifold.points[0].point; + + if (B2_ABS(point1.y - point2.y) > 5.0f * b2_linearSlop) + { + if (point1.y < point2.y) + { + return 1; + } + + return -1; + } + else if (point1.x < point2.x) + { + return -1; + } + + return 1; +} + +int b2RandomizeConstraints(const void* ptr1, const void* ptr2) +{ + B2_MAYBE_UNUSED(ptr1); + B2_MAYBE_UNUSED(ptr2); + + if (rand() & 1) + { + return -1; + } + + return 1; +} + void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; @@ -957,27 +1579,31 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) int32_t velocityIterations = stepContext->velocityIterations; int32_t positionIterations = stepContext->positionIterations; float h = stepContext->dt; + float inv_h = stepContext->inv_dt; b2IntegrateVelocities(world, h); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i, true); - b2WarmStart(world, colors + i); + b2InitializeConstraints(world, colors + i); + } + + b2WarmStartAll(world, constraints, constraintCount); + + for (int32_t i = 0; i < constraintCount; ++i) + { + constraints[i].contact->manifold.constraintIndex = i; } for (int32_t iter = 0; iter < velocityIterations; ++iter) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveVelocityConstraints(world, colors + i); + b2SolveVelocityConstraints(world, colors + i, inv_h); } } - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2StoreImpulses(colors + i); - } + b2StoreImpulses(constraints, constraintCount); b2IntegratePositions(world, h); @@ -994,7 +1620,7 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); } -void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) +void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; @@ -1016,59 +1642,123 @@ void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext) B2_ASSERT(base == constraintCount); - int32_t substepCount = stepContext->velocityIterations; - float h = stepContext->dt / substepCount; + int32_t velocityIterations = stepContext->velocityIterations; + int32_t positionIterations = stepContext->positionIterations; + float h = stepContext->dt; + + b2IntegrateVelocities(world, h); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i, true); + b2InitializeSoftConstraints(world, colors + i, h, true); } - for (int32_t substep = 0; substep < substepCount; ++substep) + for (int32_t iter = 0; iter < velocityIterations; ++iter) { - b2IntegrateVelocities(world, h); - - // Have to fully complete warm starting before solving constraints for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2WarmStart(world, colors + i); + b2SolveVelocityConstraintsSoft(world, colors + i, stepContext->inv_dt, true); } + } + + b2IntegratePositions(world, h); - // One constraint iteration + for (int32_t iter = 0; iter < positionIterations; ++iter) + { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveVelocityConstraints(world, colors + i); + b2SolveVelocityConstraintsSoft(world, colors + i, stepContext->inv_dt, false); } + } + + b2StoreImpulses(constraints, constraintCount); + + b2FinalizeSolve(world); + + b2FreeStackItem(world->stackAllocator, constraints); +} + +void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) +{ + b2Graph* graph = &world->graph; + b2GraphColor* colors = graph->colors; + + int32_t constraintCount = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + constraintCount += b2Array(colors[i].contactArray).count; + } + + b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + int32_t base = 0; + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + colors[i].constraints = constraints + base; + base += b2Array(colors[i].contactArray).count; + } - b2IntegratePositions(world, h); + B2_ASSERT(base == constraintCount); + + // Full step apply gravity + b2IntegrateVelocities(world, stepContext->dt); + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + bool warmStart = true; + b2InitializeSoftConstraints(world, colors + i, stepContext->dt, warmStart); + } + + int32_t substepCount = stepContext->velocityIterations; + float h = stepContext->dt / substepCount; + float inv_h = 1.0f / h; + for (int32_t substep = 0; substep < substepCount; ++substep) + { //for (int32_t i = 0; i < b2_graphColorCount; ++i) //{ - // b2SolvePositionConstraints(world, colors + i); + // b2WarmStart(world, colors + i); //} + + // One constraint iteration + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + bool removeOverlap = true; + b2SolveVelocityConstraintsSoft(world, colors + i, inv_h, removeOverlap); + } + + b2IntegrateDeltaTransform(world, h); } - for (int32_t i = 0; i < b2_graphColorCount; ++i) + b2UpdatePositions(world); + + int32_t positionIterations = stepContext->positionIterations; + for (int32_t iter = 0; iter < positionIterations; ++iter) { - b2StoreImpulses(colors + i); + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + bool removeOverlap = false; + b2SolveVelocityConstraintsSoft(world, colors + i, 0.0f, removeOverlap); + } } + b2StoreImpulses(constraints, constraintCount); + b2FinalizeSolve(world); b2FreeStackItem(world->stackAllocator, constraints); } -void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) +// Sticky +void b2SolveGraphStickyTGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; - b2GraphColor* staticColors = graph->staticColors; int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { constraintCount += b2Array(colors[i].contactArray).count; - constraintCount += b2Array(staticColors[i].contactArray).count; } b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); @@ -1080,24 +1770,13 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) base += b2Array(colors[i].contactArray).count; } - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - staticColors[i].constraints = constraints + base; - base += b2Array(staticColors[i].contactArray).count; - } - B2_ASSERT(base == constraintCount); b2IntegrateVelocities(world, stepContext->dt); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i, false); - } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2InitializeConstraints(world, staticColors + i, false); + b2InitializeStickyConstraints(world, colors + i); } int32_t substepCount = stepContext->velocityIterations; @@ -1112,12 +1791,7 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) b2SolveVelocityConstraintsSticky(world, colors + i, -b2_huge, invh); } - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2SolveVelocityConstraintsSticky(world, staticColors + i, -b2_huge, invh); - } - - b2Integrate(world, h); + b2IntegrateDeltaTransform(world, h); // for (int32_t i = 0; i < b2_graphColorCount; ++i) //{ @@ -1125,19 +1799,14 @@ void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext) // } } + b2UpdatePositions(world); + // One iteration with no baumgarte and no affect on position for (int32_t i = 0; i < b2_graphColorCount; ++i) { b2SolveVelocityConstraintsSticky(world, colors + i, 0.0f, 0.0f); } - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2SolveVelocityConstraintsSticky(world, staticColors + i, 0.0f, 0.0f); - } - - b2UpdatePositions(world); - b2FinalizeSolve(world); b2FreeStackItem(world->stackAllocator, constraints); diff --git a/src/graph.h b/src/graph.h index 51efe9bb..286274db 100644 --- a/src/graph.h +++ b/src/graph.h @@ -38,5 +38,6 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact); void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext); -void b2SolveGraphTGS(b2World* world, const b2StepContext* stepContext); -void b2SolveGraphTGS2(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphStickyTGS(b2World* world, const b2StepContext* stepContext); diff --git a/src/world.c b/src/world.c index c73ca269..2092fafe 100644 --- a/src/world.c +++ b/src/world.c @@ -937,9 +937,10 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); - //b2SolveGraphPGS(world, context); - //b2SolveGraphTGS(world, context); - b2SolveGraphTGS2(world, context); + //b2SolveGraphSoftPGS(world, context); + b2SolveGraphPGS(world, context); + //b2SolveGraphSoftTGS(world, context); + //b2SolveGraphStickyTGS(world, context); b2ValidateNoEnlarged(&world->broadPhase); From edbcf14bd78b45e988369f5fadee823fa7706dad Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 1 Sep 2023 15:05:17 -0700 Subject: [PATCH 11/51] fixes --- samples/collection/sample_vertical_stack.cpp | 8 +- src/graph.c | 98 +++++++------------- src/graph.h | 3 - src/world.c | 14 +-- 4 files changed, 46 insertions(+), 77 deletions(-) diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index 11ffe27e..05582131 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -14,8 +14,8 @@ class VerticalStack : public Sample enum { - e_maxColumns = 500, - e_maxRows = 100, + e_maxColumns = 50, + e_maxRows = 30, e_maxBullets = 20 }; @@ -111,8 +111,8 @@ class VerticalStack : public Sample int32_t n = j * m_rowCount + i; float shift = (i % 2 == 0 ? -offset : offset); - //bd.position = {x + shift, 0.505f + 1.01f * i}; - bd.position = {x + shift, 4.0f + 1.51f * i}; + bd.position = {x + shift, 0.5f + 1.0f * i}; + //bd.position = {x + shift, 1.0f + 1.51f * i}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); m_bodies[n] = bodyId; diff --git a/src/graph.c b/src/graph.c index 84f10569..7efa932d 100644 --- a/src/graph.c +++ b/src/graph.c @@ -18,7 +18,7 @@ #include #include -#define maxBaumgarteVelocity 2.0f +#define maxBaumgarteVelocity 3.0f void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) { @@ -33,15 +33,6 @@ void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2GraphColor* color = graph->staticColors + i; - color->bodySet = b2CreateBitSet(bodyCapacity); - b2SetBitCountAndClear(&color->bodySet, bodyCapacity); - - color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); - } } void b2DestroyGraph(b2Graph* graph) @@ -52,18 +43,8 @@ void b2DestroyGraph(b2Graph* graph) b2DestroyBitSet(&color->bodySet); b2DestroyArray(color->contactArray, sizeof(int32_t)); } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2GraphColor* color = graph->staticColors + i; - b2DestroyBitSet(&color->bodySet); - b2DestroyArray(color->contactArray, sizeof(int32_t)); - } } -#define B2_STATIC_CONTACTS 0 - -// TODO_ERIN use a specific color for static constraints so they go last in the solver void b2AddContactToGraph(b2World* world, b2Contact* contact) { B2_ASSERT(contact->colorContactIndex == B2_NULL_INDEX); @@ -101,12 +82,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { -#if B2_STATIC_CONTACTS == 0 b2GraphColor* color = graph->colors + i; -#else - b2GraphColor* color = graph->staticColors + i; - contact->flags |= b2_contactStatic; -#endif if (b2GetBit(&color->bodySet, bodyIndexA)) { continue; @@ -124,12 +100,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { -#if B2_STATIC_CONTACTS == 0 b2GraphColor* color = graph->colors + i; -#else - b2GraphColor* color = graph->staticColors + i; - contact->flags |= b2_contactStatic; -#endif if (b2GetBit(&color->bodySet, bodyIndexB)) { continue; @@ -180,11 +151,7 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) } else if (typeA == b2_dynamicBody) { -#if B2_STATIC_CONTACTS == 0 b2GraphColor* color = graph->colors + contact->colorIndex; -#else - b2GraphColor* color = graph->staticColors + contact->colorIndex; -#endif B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); int32_t colorContactIndex = contact->colorContactIndex; @@ -200,11 +167,7 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) } else if (typeB == b2_dynamicBody) { -#if B2_STATIC_CONTACTS == 0 b2GraphColor* color = graph->colors + contact->colorIndex; -#else - b2GraphColor* color = graph->staticColors + contact->colorIndex; -#endif B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); int32_t colorContactIndex = contact->colorContactIndex; @@ -520,7 +483,7 @@ static void b2InitializeSoftConstraints(b2World* world, b2GraphColor* color, flo } } -static void b2InitializeConstraints(b2World* world, b2GraphColor* color) +static void b2InitializePGSConstraints(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -577,7 +540,7 @@ static void b2InitializeConstraints(b2World* world, b2GraphColor* color) cp->separation = mp->separation; cp->baumgarte = 0.0f; - cp->biasCoefficient = mp->separation > 0.0f ? 1.0f : 0.8f; + cp->biasCoefficient = mp->separation > 0.0f ? 1.0f : 0.0f; float rtA = b2Cross(cp->rA, tangent); float rtB = b2Cross(cp->rB, tangent); @@ -648,7 +611,7 @@ static void b2InitializeStickyConstraints(b2World* world, b2GraphColor* color) cp->localAnchorB = b2InvRotateVector(qB, cp->rB); cp->separation = mp->separation; - cp->baumgarte = mp->separation > 0.0f ? 1.0f : 0.8f; + cp->baumgarte = 0.8f; float rtA = b2Cross(cp->rA, tangent); float rtB = b2Cross(cp->rB, tangent); @@ -1152,7 +1115,9 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; + float friction = 0.3f; //constraint->friction; + + float totalNormalImpulse = 0.0f; // Non-penetration constraints for (int32_t j = 0; j < pointCount; ++j) @@ -1168,19 +1133,31 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; - float s = B2_MAX(minSeparation, cp->separation + ds); - float bias = B2_MIN(maxBaumgarteVelocity, -cp->baumgarte * s * invh); + float bias = 0.0f; + if (s > 0.0f) + { + // Speculative + bias = s * invh; + + } + else if (minSeparation < 0.0f) + { + bias = B2_MAX(-maxBaumgarteVelocity, cp->baumgarte * s * invh); + } // Compute normal impulse float vn = b2Dot(dv, normal); - float impulse = cp->normalMass * (bias - vn); + float impulse = -cp->normalMass * (vn + bias); // Clamp the accumulated impulse float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); impulse = newImpulse - cp->normalImpulse; cp->normalImpulse = newImpulse; + totalNormalImpulse += cp->normalImpulse; + // Apply contact impulse b2Vec2 P = b2MulSV(impulse, normal); vA = b2MulSub(vA, mA, P); @@ -1204,16 +1181,17 @@ static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rBf)); b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rAf)); float ds = b2Dot(b2Sub(prB, prA), tangent); - float s = cp->tangentSeparation + ds; - float bias = -0.2f * s * invh; + float bias = 0.5f * s * invh; // Compute tangent impulse float vt = b2Dot(dv, tangent); - float impulse = cp->tangentMass * (bias - vt); + float impulse = -cp->tangentMass * (vt + bias); + + // max friction uses an average of the total normal impulse because persistent friction anchors don't line up with normal anchors + float maxFriction = 0.5f * friction * totalNormalImpulse; // Clamp the accumulated impulse - float maxFriction = friction * cp->normalImpulse; float newImpulse = cp->tangentImpulse + impulse; if (newImpulse < -maxFriction) { @@ -1585,7 +1563,7 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeConstraints(world, colors + i); + b2InitializePGSConstraints(world, colors + i); } b2WarmStartAll(world, constraints, constraintCount); @@ -1715,11 +1693,6 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t substep = 0; substep < substepCount; ++substep) { - //for (int32_t i = 0; i < b2_graphColorCount; ++i) - //{ - // b2WarmStart(world, colors + i); - //} - // One constraint iteration for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -1792,19 +1765,18 @@ void b2SolveGraphStickyTGS(b2World* world, const b2StepContext* stepContext) } b2IntegrateDeltaTransform(world, h); - - // for (int32_t i = 0; i < b2_graphColorCount; ++i) - //{ - // b2SolvePositionConstraints(world, colors + i); - // } } b2UpdatePositions(world); - // One iteration with no baumgarte and no affect on position - for (int32_t i = 0; i < b2_graphColorCount; ++i) + int32_t positionIterations = stepContext->positionIterations; + for (int32_t iter = 0; iter < positionIterations; ++iter) { - b2SolveVelocityConstraintsSticky(world, colors + i, 0.0f, 0.0f); + // Solve with no baumgarte and no affect on position + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2SolveVelocityConstraintsSticky(world, colors + i, 0.0f, 0.0f); + } } b2FinalizeSolve(world); diff --git a/src/graph.h b/src/graph.h index 286274db..a498e5e3 100644 --- a/src/graph.h +++ b/src/graph.h @@ -26,9 +26,6 @@ typedef struct b2Graph { b2GraphColor colors[b2_graphColorCount]; int32_t colorCount; - - b2GraphColor staticColors[b2_graphColorCount]; - int32_t staticColorCount; } b2Graph; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); diff --git a/src/world.c b/src/world.c index 2092fafe..59057536 100644 --- a/src/world.c +++ b/src/world.c @@ -938,9 +938,9 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); //b2SolveGraphSoftPGS(world, context); - b2SolveGraphPGS(world, context); + //b2SolveGraphPGS(world, context); //b2SolveGraphSoftTGS(world, context); - //b2SolveGraphStickyTGS(world, context); + b2SolveGraphStickyTGS(world, context); b2ValidateNoEnlarged(&world->broadPhase); @@ -1196,6 +1196,11 @@ void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations world->profile.collide = b2GetMilliseconds(&timer); } + if (b2_parallel) + { + world->finishAllTasksFcn(world->userTaskContext); + } + // Integrate velocities, solve velocity constraints, and integrate positions. if (context.dt > 0.0f) { @@ -1218,11 +1223,6 @@ void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations // Ensure stack is large enough b2GrowStack(world->stackAllocator); - if (b2_parallel) - { - world->finishAllTasksFcn(world->userTaskContext); - } - b2TracyCZoneEnd(world_step); } From 593c3d2c285f7f008311e7d378ae92876724baa0 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 1 Sep 2023 23:14:47 -0700 Subject: [PATCH 12/51] testing --- samples/collection/behavior.cpp | 115 ++++++++++++++++++++++++++++++-- src/broad_phase.c | 12 ++-- src/graph.c | 5 +- 3 files changed, 120 insertions(+), 12 deletions(-) diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index f46d0d56..5cc07fe2 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -5,14 +5,16 @@ #include "box2d/box2d.h" #include "box2d/geometry.h" +#include "box2d/hull.h" #include #include -class HighMassRatio : public Sample +// Pyramid with heavy box on top +class HighMassRatio1 : public Sample { public: - HighMassRatio(const Settings& settings) + HighMassRatio1(const Settings& settings) : Sample(settings) { float extent = 1.0f; @@ -72,12 +74,65 @@ class HighMassRatio : public Sample static Sample* Create(const Settings& settings) { - return new HighMassRatio(settings); + return new HighMassRatio1(settings); } }; -static int sampleIndex1 = RegisterSample("Behavior", "HighMassRatio", HighMassRatio::Create); +static int sampleIndex1 = RegisterSample("Behavior", "HighMassRatio1", HighMassRatio1::Create); +// Big box on small boxes +class HighMassRatio2 : public Sample +{ + public: + HighMassRatio2(const Settings& settings) + : Sample(settings) + { + float extent = 1.0f; + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + float groundWidth = 66.0f * extent; + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 1.0f; + + b2Segment segment = {{-0.5f * 2.0f * groundWidth, 0.0f}, {0.5f * 2.0f * groundWidth, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + bodyDef.type = b2_dynamicBody; + + b2Vec2 points[3] = {{-0.5f * extent, 0.0f}, {0.5f * extent, 0.0f}, {0.0f, 1.0f * extent}}; + b2Hull hull = b2ComputeHull(points, 3); + b2Polygon smallTriangle = b2MakePolygon(&hull, 0.0f); + b2Polygon smallBox = b2MakeBox(0.5f * extent, 0.5f * extent); + b2Polygon bigBox = b2MakeBox(10.0f * extent, 10.0f * extent); + + { + bodyDef.position = {-9.5f * extent, 0.5f * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + b2Body_CreatePolygon(bodyId, &shapeDef, &smallBox); + } + + { + bodyDef.position = {9.5f * extent, 0.5f * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + b2Body_CreatePolygon(bodyId, &shapeDef, &smallBox); + } + + { + bodyDef.position = {0.0f, (10.0f + 1.0f) * extent }; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + b2Body_CreatePolygon(bodyId, &shapeDef, &bigBox); + } + } + + static Sample* Create(const Settings& settings) + { + return new HighMassRatio2(settings); + } +}; + +static int sampleIndex2 = RegisterSample("Behavior", "HighMassRatio2", HighMassRatio2::Create); class Friction : public Sample { @@ -139,4 +194,54 @@ class Friction : public Sample } }; -static int sampleIndex2 = RegisterSample("Behavior", "Friction", Friction::Create); +static int sampleIndex3 = RegisterSample("Behavior", "Friction", Friction::Create); + +class OverlapRecovery : public Sample +{ + public: + OverlapRecovery(const Settings& settings) + : Sample(settings) + { + float extent = 1.0f; + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + float groundWidth = 10.0f * extent; + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 1.0f; + + b2Segment segment = {{-0.5f * 2.0f * groundWidth, 0.0f}, {0.5f * 2.0f * groundWidth, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + bodyDef.type = b2_dynamicBody; + + b2Polygon box = b2MakeBox(extent, extent); + + int count = 3; + float offset = -count * extent; + float y = extent; + while (count > 0) + { + for (int i = 0; i < count; ++i) + { + float coeff = i - 0.5f * count; + + bodyDef.position = {2.0f * coeff * extent + offset, y}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } + + --count; + y += 2.0f * extent; + } + } + + static Sample* Create(const Settings& settings) + { + return new OverlapRecovery(settings); + } +}; + +static int sampleIndex4 = RegisterSample("Behavior", "Overlap Recovery", OverlapRecovery::Create); diff --git a/src/broad_phase.c b/src/broad_phase.c index fbea52f9..3d755101 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -178,7 +178,7 @@ static bool b2PairQueryCallback(int32_t proxyId, int32_t shapeIndex, void* conte } bool moved = b2ContainsKey(&bp->moveSet, proxyKey); - if (moved && proxyKey > queryContext->queryProxyKey) + if (moved && proxyKey < queryContext->queryProxyKey) { // Both proxies are moving. Avoid duplicate pairs. return true; @@ -282,7 +282,7 @@ void b2FindPairsTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, continue; } - int32_t proxyType = B2_PROXY_TYPE(proxyKey); + b2BodyType proxyType = B2_PROXY_TYPE(proxyKey); int32_t proxyId = B2_PROXY_ID(proxyKey); queryContext.queryProxyKey = proxyKey; @@ -296,12 +296,12 @@ void b2FindPairsTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, // Query trees if (proxyType == b2_dynamicBody) { - queryContext.queryTreeType = b2_dynamicBody; - b2DynamicTree_Query(bp->trees + b2_dynamicBody, fatAABB, b2PairQueryCallback, &queryContext); - queryContext.queryTreeType = b2_kinematicBody; - b2DynamicTree_Query(bp->trees + b2_kinematicBody, fatAABB, b2PairQueryCallback, &queryContext); queryContext.queryTreeType = b2_staticBody; b2DynamicTree_Query(bp->trees + b2_staticBody, fatAABB, b2PairQueryCallback, &queryContext); + queryContext.queryTreeType = b2_kinematicBody; + b2DynamicTree_Query(bp->trees + b2_kinematicBody, fatAABB, b2PairQueryCallback, &queryContext); + queryContext.queryTreeType = b2_dynamicBody; + b2DynamicTree_Query(bp->trees + b2_dynamicBody, fatAABB, b2PairQueryCallback, &queryContext); } else if (proxyType == b2_kinematicBody) { diff --git a/src/graph.c b/src/graph.c index 7efa932d..7b3f9955 100644 --- a/src/graph.c +++ b/src/graph.c @@ -433,7 +433,8 @@ static void b2InitializeSoftConstraints(b2World* world, b2GraphColor* color, flo cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; // Soft contact with speculation - const float hertz = 30.0f; + const float hertz = mA == 0.0f ? 60.0f : 30.0f; + //const float hertz = 30.0f; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; // float d = 2.0f * zeta * omega / kNormal; @@ -1656,6 +1657,8 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); } +// Soft constraints with substepping. Allows for stiffer contacts with a small performance hit. Includes a +// bias removal stage to help remove excess warm starting energy. void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; From 2325a9fe244812efacb5327ee5f14292ad6cbf30 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 2 Sep 2023 23:04:29 -0700 Subject: [PATCH 13/51] static contacts kinda last fix move set soft revolute joint --- include/box2d/box2d.h | 2 + samples/CMakeLists.txt | 2 +- samples/collection/behavior.cpp | 36 ++-- samples/collection/benchmark_joint_grid.cpp | 88 --------- samples/collection/sample_joints.cpp | 194 +++++++++++++++++++ samples/collection/sample_vertical_stack.cpp | 4 +- samples/sample.cpp | 4 +- src/broad_phase.c | 4 +- src/broad_phase.h | 3 +- src/graph.c | 72 +++++-- src/island.c | 6 +- src/joint.c | 48 +++-- src/joint.h | 11 +- src/mouse_joint.c | 2 +- src/revolute_joint.c | 138 ++++++++++++- src/table.c | 7 + src/table.h | 2 +- src/world.c | 17 +- 18 files changed, 484 insertions(+), 156 deletions(-) delete mode 100644 samples/collection/benchmark_joint_grid.cpp create mode 100644 samples/collection/sample_joints.cpp diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index 8db00f0f..c3defb11 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -35,6 +35,8 @@ BOX2D_API void b2World_Draw(b2WorldId worldId, b2DebugDraw* debugDraw); /// Enable/disable sleep. BOX2D_API void b2World_EnableSleeping(b2WorldId worldId, bool flag); +BOX2D_API void b2World_EnableWarmStarting(b2WorldId worldId, bool flag); + /// Enable/disable continuous collision. BOX2D_API void b2World_EnableContinuous(b2WorldId worldId, bool flag); diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 8ae488ec..324c1f2c 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -65,7 +65,7 @@ set(BOX2D_SAMPLES collection/benchmark_barrel.cpp collection/benchmark_create_destroy.cpp - collection/benchmark_joint_grid.cpp + collection/sample_joints.cpp collection/benchmark_many_tumblers.cpp collection/benchmark_pyramid.cpp collection/benchmark_tumbler.cpp diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index 5cc07fe2..a53a1ef6 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -106,7 +106,7 @@ class HighMassRatio2 : public Sample b2Polygon smallTriangle = b2MakePolygon(&hull, 0.0f); b2Polygon smallBox = b2MakeBox(0.5f * extent, 0.5f * extent); b2Polygon bigBox = b2MakeBox(10.0f * extent, 10.0f * extent); - + { bodyDef.position = {-9.5f * extent, 0.5f * extent}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); @@ -120,7 +120,7 @@ class HighMassRatio2 : public Sample } { - bodyDef.position = {0.0f, (10.0f + 1.0f) * extent }; + bodyDef.position = {0.0f, (10.0f + 1.0f) * extent}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); b2Body_CreatePolygon(bodyId, &shapeDef, &bigBox); } @@ -209,33 +209,33 @@ class OverlapRecovery : public Sample float groundWidth = 10.0f * extent; b2ShapeDef shapeDef = b2DefaultShapeDef(); - shapeDef.density = 1.0f; + shapeDef.density = 1.0f; - b2Segment segment = {{-0.5f * 2.0f * groundWidth, 0.0f}, {0.5f * 2.0f * groundWidth, 0.0f}}; + b2Segment segment = {{-groundWidth, 0.0f}, {groundWidth, 0.0f}}; b2Body_CreateSegment(groundId, &shapeDef, &segment); bodyDef.type = b2_dynamicBody; b2Polygon box = b2MakeBox(extent, extent); - int count = 3; - float offset = -count * extent; - float y = extent; - while (count > 0) + int count = 4; + float fraction = 0.75f; + float y = fraction * extent; + while (count > 0) + { + for (int i = 0; i < count; ++i) { - for (int i = 0; i < count; ++i) - { - float coeff = i - 0.5f * count; + float coeff = i - 0.5f * count; - bodyDef.position = {2.0f * coeff * extent + offset, y}; - b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); - - b2Body_CreatePolygon(bodyId, &shapeDef, &box); - } + bodyDef.position = {2.0f * fraction * coeff * extent, y}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); - --count; - y += 2.0f * extent; + b2Body_CreatePolygon(bodyId, &shapeDef, &box); } + + --count; + y += 2.0f * fraction * extent; + } } static Sample* Create(const Settings& settings) diff --git a/samples/collection/benchmark_joint_grid.cpp b/samples/collection/benchmark_joint_grid.cpp deleted file mode 100644 index 331db2f7..00000000 --- a/samples/collection/benchmark_joint_grid.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-FileCopyrightText: 2022 Erin Catto -// SPDX-License-Identifier: MIT - -#include "box2d/box2d.h" -#include "box2d/geometry.h" -#include "sample.h" - -// TODO_ERIN test more joint types -// TODO_ERIN try to stabilize revolute -class BenchmarkJointGrid : public Sample -{ -public: - BenchmarkJointGrid(const Settings& settings) - : Sample(settings) - { - constexpr float rad = 0.4f; - constexpr int32_t numi = g_sampleDebug ? 10 : 100; - constexpr int32_t numk = g_sampleDebug ? 10 : 100; - constexpr float shift = 1.0f; - - // Allocate to avoid huge stack usage - b2BodyId* bodies = static_cast(malloc(numi * numk * sizeof(b2BodyId))); - int32_t index = 0; - - b2ShapeDef sd = b2DefaultShapeDef(); - sd.density = 1.0f; - sd.filter.maskBits = 0; - - b2Circle circle = {0}; - circle.radius = rad; - - b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); - - for (int32_t k = 0; k < numk; ++k) - { - for (int32_t i = 0; i < numi; ++i) - { - float fk = (float)k; - float fi = (float)i; - - b2BodyDef bd = b2DefaultBodyDef(); - if (k >= numk / 2 - 3 && k <= numk / 2 + 3 && i == 0) - { - bd.type = b2_staticBody; - } - else - { - bd.type = b2_dynamicBody; - } - - bd.position = {fk * shift, -fi * shift}; - - b2BodyId body = b2World_CreateBody(m_worldId, &bd); - - b2Body_CreateCircle(body, &sd, &circle); - - if (i > 0) - { - jd.bodyIdA = bodies[index - 1]; - jd.bodyIdB = body; - jd.localAnchorA = {0.0f, -0.5f * shift}; - jd.localAnchorB = {0.0f, 0.5f * shift}; - b2World_CreateRevoluteJoint(m_worldId, &jd); - } - - if (k > 0) - { - jd.bodyIdA = bodies[index - numi]; - jd.bodyIdB = body; - jd.localAnchorA = {0.5f * shift, 0.0f}; - jd.localAnchorB = {-0.5f * shift, 0.0f}; - b2World_CreateRevoluteJoint(m_worldId, &jd); - } - - bodies[index++] = body; - } - } - - free(bodies); - } - - static Sample* Create(const Settings& settings) - { - return new BenchmarkJointGrid(settings); - } -}; - -static int sampleIndex = RegisterSample("Benchmark", "Joint Grid", BenchmarkJointGrid::Create); diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp new file mode 100644 index 00000000..a154ee5c --- /dev/null +++ b/samples/collection/sample_joints.cpp @@ -0,0 +1,194 @@ +// SPDX-FileCopyrightText: 2022 Erin Catto +// SPDX-License-Identifier: MIT + +#include "box2d/box2d.h" +#include "box2d/geometry.h" +#include "sample.h" + +// TODO_ERIN test more joint types +// TODO_ERIN try to stabilize revolute +class BenchmarkJointGrid : public Sample +{ +public: + BenchmarkJointGrid(const Settings& settings) + : Sample(settings) + { + constexpr float rad = 0.4f; + constexpr int32_t numi = g_sampleDebug ? 100 : 100; + constexpr int32_t numk = g_sampleDebug ? 100 : 100; + constexpr float shift = 1.0f; + + // Allocate to avoid huge stack usage + b2BodyId* bodies = static_cast(malloc(numi * numk * sizeof(b2BodyId))); + int32_t index = 0; + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 1.0f; + sd.filter.maskBits = 0; + + b2Circle circle = {0}; + circle.radius = rad; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + for (int32_t k = 0; k < numk; ++k) + { + for (int32_t i = 0; i < numi; ++i) + { + float fk = (float)k; + float fi = (float)i; + + b2BodyDef bd = b2DefaultBodyDef(); + if (k >= numk / 2 - 3 && k <= numk / 2 + 3 && i == 0) + { + bd.type = b2_staticBody; + } + else + { + bd.type = b2_dynamicBody; + } + + bd.position = {fk * shift, -fi * shift}; + + b2BodyId body = b2World_CreateBody(m_worldId, &bd); + + b2Body_CreateCircle(body, &sd, &circle); + + if (i > 0) + { + jd.bodyIdA = bodies[index - 1]; + jd.bodyIdB = body; + jd.localAnchorA = {0.0f, -0.5f * shift}; + jd.localAnchorB = {0.0f, 0.5f * shift}; + b2World_CreateRevoluteJoint(m_worldId, &jd); + } + + if (k > 0) + { + jd.bodyIdA = bodies[index - numi]; + jd.bodyIdB = body; + jd.localAnchorA = {0.5f * shift, 0.0f}; + jd.localAnchorB = {-0.5f * shift, 0.0f}; + b2World_CreateRevoluteJoint(m_worldId, &jd); + } + + bodies[index++] = body; + } + } + + free(bodies); + } + + static Sample* Create(const Settings& settings) + { + return new BenchmarkJointGrid(settings); + } +}; + +static int sampleJointGridIndex = RegisterSample("Joints", "Joint Grid", BenchmarkJointGrid::Create); + +// A suspension bridge +class Bridge : public Sample +{ + public: + enum + { + e_count = 200 + }; + + Bridge(const Settings& settings) + : Sample(settings) + { + b2BodyId groundId = b2_nullBodyId; + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.position = {0.0f, -1.0f}; + groundId = b2World_CreateBody(m_worldId, &bd); + + //b2Segment segment = {{-80.0f, 0.0f}, {80.0f, 0.0f}}; + //b2ShapeDef sd = b2DefaultShapeDef(); + //b2Body_CreateSegment(groundId, &sd, &segment); + } + + { + b2Polygon box = b2MakeBox(0.5f, 0.125f); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {-34.5f + 1.0f * i, 20.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {-35.0f + 1.0f * i, 20.0f}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + b2World_CreateRevoluteJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + b2Vec2 pivot = {-35.0f + 1.0f * e_count, 20.0f}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = groundId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + b2World_CreateRevoluteJoint(m_worldId, &jd); + } + +#if 0 + for (int32 i = 0; i < 2; ++i) + { + b2Vec2 vertices[3]; + vertices[0].Set(-0.5f, 0.0f); + vertices[1].Set(0.5f, 0.0f); + vertices[2].Set(0.0f, 1.5f); + + b2PolygonShape shape; + shape.Set(vertices, 3); + + b2FixtureDef fd; + fd.shape = &shape; + fd.density = 1.0f; + + b2BodyDef bd; + bd.type = b2_dynamicBody; + bd.position.Set(-8.0f + 8.0f * i, 12.0f); + b2Body* body = m_world->CreateBody(&bd); + body->CreateFixture(&fd); + } + + for (int32 i = 0; i < 3; ++i) + { + b2CircleShape shape; + shape.m_radius = 0.5f; + + b2FixtureDef fd; + fd.shape = &shape; + fd.density = 1.0f; + + b2BodyDef bd; + bd.type = b2_dynamicBody; + bd.position.Set(-6.0f + 6.0f * i, 10.0f); + b2Body* body = m_world->CreateBody(&bd); + body->CreateFixture(&fd); + } +#endif + } + + static Sample* Create(const Settings& settings) + { + return new Bridge(settings); + } +}; + +static int sampleBridgeIndex = RegisterSample("Joints", "Bridge", Bridge::Create); diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index 05582131..9f00a5d3 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -56,7 +56,7 @@ class VerticalStack : public Sample } m_shapeType = e_boxShape; - m_rowCount = g_sampleDebug ? 2 : 50; + m_rowCount = g_sampleDebug ? 14 : 50; m_columnCount = g_sampleDebug ? 1 : 200; m_bulletCount = 1; m_bulletType = e_circleShape; @@ -93,7 +93,7 @@ class VerticalStack : public Sample } else { - offset = 0.0f; // 0.01f; + offset = 0.01f; } float dx = 3.0f; diff --git a/samples/sample.cpp b/samples/sample.cpp index c048e4f4..c8d4cd3d 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -229,9 +229,7 @@ void Sample::Step(Settings& settings) g_draw.m_debugDraw.drawCOMs = settings.m_drawCOMs; b2World_EnableSleeping(m_worldId, settings.m_enableSleep); - - // m_world->SetWarmStarting(settings.m_enableWarmStarting); - // m_world->SetContinuousPhysics(settings.m_enableContinuous); + b2World_EnableWarmStarting(m_worldId, settings.m_enableWarmStarting); if (timeStep > 0.0f) { diff --git a/src/broad_phase.c b/src/broad_phase.c index 3d755101..d6dd280f 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -75,7 +75,7 @@ void b2DestroyBroadPhase(b2BroadPhase* bp) static inline void b2UnBufferMove(b2BroadPhase* bp, int32_t proxyKey) { - bool found = b2RemoveKey(&bp->moveSet, proxyKey); + bool found = b2RemoveKey(&bp->moveSet, proxyKey + 1); if (found) { @@ -177,7 +177,7 @@ static bool b2PairQueryCallback(int32_t proxyId, int32_t shapeIndex, void* conte return true; } - bool moved = b2ContainsKey(&bp->moveSet, proxyKey); + bool moved = b2ContainsKey(&bp->moveSet, proxyKey + 1); if (moved && proxyKey < queryContext->queryProxyKey) { // Both proxies are moving. Avoid duplicate pairs. diff --git a/src/broad_phase.h b/src/broad_phase.h index e000eb58..c9c0a760 100644 --- a/src/broad_phase.h +++ b/src/broad_phase.h @@ -65,7 +65,8 @@ void b2ValidateNoEnlarged(const b2BroadPhase* bp); // Warning: this must be called in deterministic order static inline void b2BufferMove(b2BroadPhase* bp, int32_t proxyKey) { - bool alreadyAdded = b2AddKey(&bp->moveSet, proxyKey); + // Adding 1 because 0 is the sentinel + bool alreadyAdded = b2AddKey(&bp->moveSet, proxyKey + 1); if (alreadyAdded == false) { b2Array_Push(bp->moveArray, proxyKey); diff --git a/src/graph.c b/src/graph.c index 7b3f9955..5211e39c 100644 --- a/src/graph.c +++ b/src/graph.c @@ -8,6 +8,7 @@ #include "body.h" #include "contact.h" #include "core.h" +#include "joint.h" #include "shape.h" #include "solver_data.h" #include "stack_allocator.h" @@ -80,7 +81,8 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) } else if (typeA == b2_dynamicBody) { - for (int32_t i = 0; i < b2_graphColorCount; ++i) + // Static contacts never in color 0 + for (int32_t i = 1; i < b2_graphColorCount; ++i) { b2GraphColor* color = graph->colors + i; if (b2GetBit(&color->bodySet, bodyIndexA)) @@ -98,7 +100,8 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) } else if (typeB == b2_dynamicBody) { - for (int32_t i = 0; i < b2_graphColorCount; ++i) + // Static contacts never in color 0 + for (int32_t i = 1; i < b2_graphColorCount; ++i) { b2GraphColor* color = graph->colors + i; if (b2GetBit(&color->bodySet, bodyIndexB)) @@ -362,7 +365,7 @@ typedef struct b2Constraint int32_t pointCount; } b2Constraint; -static void b2InitializeSoftConstraints(b2World* world, b2GraphColor* color, float h, bool warmStart) +static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, bool warmStart) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -433,8 +436,8 @@ static void b2InitializeSoftConstraints(b2World* world, b2GraphColor* color, flo cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; // Soft contact with speculation - const float hertz = mA == 0.0f ? 60.0f : 30.0f; - //const float hertz = 30.0f; + //const float hertz = mA == 0.0f ? 60.0f : 30.0f; + const float hertz = 30.0f; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; // float d = 2.0f * zeta * omega / kNormal; @@ -969,7 +972,7 @@ static void b2SolveVelocityConstraintsSorted(b2World* world, b2Constraint* const } } -static void b2SolveVelocityConstraintsSoft(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) +static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -1292,7 +1295,7 @@ static void b2IntegratePositions(b2World* world, float h) } } -static void b2SolvePositionConstraints(b2World* world, b2GraphColor* color) +static void b2SolveContactPosition(b2World* world, b2GraphColor* color) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -1590,7 +1593,7 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolvePositionConstraints(world, colors + i); + b2SolveContactPosition(world, colors + i); } } @@ -1629,14 +1632,14 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2InitializeSoftConstraints(world, colors + i, h, true); + b2PrepareSoftContact(world, colors + i, h, true); } for (int32_t iter = 0; iter < velocityIterations; ++iter) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveVelocityConstraintsSoft(world, colors + i, stepContext->inv_dt, true); + b2SolveSoftContact(world, colors + i, stepContext->inv_dt, true); } } @@ -1646,7 +1649,7 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveVelocityConstraintsSoft(world, colors + i, stepContext->inv_dt, false); + b2SolveSoftContact(world, colors + i, stepContext->inv_dt, false); } } @@ -1663,6 +1666,7 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; + b2Joint* joints = world->joints; int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -1686,10 +1690,24 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { + // Soft constraints initialized with full time step bool warmStart = true; - b2InitializeSoftConstraints(world, colors + i, stepContext->dt, warmStart); + b2PrepareSoftContact(world, colors + i, stepContext->dt, warmStart); } - + + int32_t jointCapacity = world->jointPool.capacity; + + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + b2PrepareJoint(joint, stepContext); + } + int32_t substepCount = stepContext->velocityIterations; float h = stepContext->dt / substepCount; float inv_h = 1.0f / h; @@ -1697,10 +1715,22 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t substep = 0; substep < substepCount; ++substep) { // One constraint iteration + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + bool removeOverlap = true; + b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); + } + for (int32_t i = 0; i < b2_graphColorCount; ++i) { bool removeOverlap = true; - b2SolveVelocityConstraintsSoft(world, colors + i, inv_h, removeOverlap); + b2SolveSoftContact(world, colors + i, inv_h, removeOverlap); } b2IntegrateDeltaTransform(world, h); @@ -1711,10 +1741,22 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) int32_t positionIterations = stepContext->positionIterations; for (int32_t iter = 0; iter < positionIterations; ++iter) { + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + bool removeOverlap = false; + b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); + } + for (int32_t i = 0; i < b2_graphColorCount; ++i) { bool removeOverlap = false; - b2SolveVelocityConstraintsSoft(world, colors + i, 0.0f, removeOverlap); + b2SolveSoftContact(world, colors + i, 0.0f, removeOverlap); } } diff --git a/src/island.c b/src/island.c index d095301e..82bb9b3c 100644 --- a/src/island.c +++ b/src/island.c @@ -1056,7 +1056,7 @@ void b2SolveIsland(b2Island* island, uint32_t threadIndex) while (jointIndex != B2_NULL_INDEX) { b2Joint* joint = joints + jointIndex; - b2InitVelocityConstraints(joint, context); + b2PrepareJoint(joint, context); jointIndex = joint->islandNext; } @@ -1068,7 +1068,7 @@ void b2SolveIsland(b2Island* island, uint32_t threadIndex) while (jointIndex != B2_NULL_INDEX) { b2Joint* joint = joints + jointIndex; - b2SolveVelocityConstraints(joint, context); + b2SolveJointVelocity(joint, context); jointIndex = joint->islandNext; } @@ -1152,7 +1152,7 @@ void b2SolveIsland(b2Island* island, uint32_t threadIndex) { b2Joint* joint = joints + jointIndex; - bool jointOkay = b2SolvePositionConstraints(joint, context); + bool jointOkay = b2SolveJointPosition(joint, context); jointsOkay = jointsOkay && jointOkay; jointIndex = joint->islandNext; diff --git a/src/joint.c b/src/joint.c index 22afbd54..8f2e6597 100644 --- a/src/joint.c +++ b/src/joint.c @@ -323,19 +323,19 @@ void b2World_DestroyJoint(b2JointId jointId) b2FreeObject(&world->jointPool, &joint->object); } -extern void b2InitializeMouse(b2Joint* base, b2StepContext* data); -extern void b2InitializeRevolute(b2Joint* base, b2StepContext* data); +extern void b2PrepareMouse(b2Joint* base, const b2StepContext* context); +extern void b2PrepareRevolute(b2Joint* base, const b2StepContext* context); -void b2InitVelocityConstraints(b2Joint* joint, b2StepContext* data) +void b2PrepareJoint(b2Joint* joint, const b2StepContext* context) { switch (joint->type) { case b2_mouseJoint: - b2InitializeMouse(joint, data); + b2PrepareMouse(joint, context); break; case b2_revoluteJoint: - b2InitializeRevolute(joint, data); + b2PrepareRevolute(joint, context); break; default: @@ -343,19 +343,19 @@ void b2InitVelocityConstraints(b2Joint* joint, b2StepContext* data) } } -extern void b2SolveMouseVelocity(b2Joint* base, b2StepContext* data); -extern void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* data); +extern void b2SolveMouseVelocity(b2Joint* base, const b2StepContext* context); +extern void b2SolveRevoluteVelocity(b2Joint* base, const b2StepContext* context); -void b2SolveVelocityConstraints(b2Joint* joint, b2StepContext* data) +void b2SolveJointVelocity(b2Joint* joint, const b2StepContext* context) { switch (joint->type) { case b2_mouseJoint: - b2SolveMouseVelocity(joint, data); + b2SolveMouseVelocity(joint, context); break; case b2_revoluteJoint: - b2SolveRevoluteVelocity(joint, data); + b2SolveRevoluteVelocity(joint, context); break; default: @@ -363,15 +363,37 @@ void b2SolveVelocityConstraints(b2Joint* joint, b2StepContext* data) } } -extern bool b2SolveRevolutePosition(b2Joint* base, b2StepContext* data); +extern void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap); + +void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool removeOverlap) +{ + switch (joint->type) + { + case b2_mouseJoint: + if (removeOverlap) + { + b2SolveMouseVelocity(joint, context); + } + break; + + case b2_revoluteJoint: + b2SolveRevoluteVelocitySoft(joint, context, removeOverlap); + break; + + default: + B2_ASSERT(false); + } +} + +extern bool b2SolveRevolutePosition(b2Joint* base, const b2StepContext* context); // This returns true if the position errors are within tolerance. -bool b2SolvePositionConstraints(b2Joint* joint, b2StepContext* data) +bool b2SolveJointPosition(b2Joint* joint, const b2StepContext* context) { switch (joint->type) { case b2_revoluteJoint: - return b2SolveRevolutePosition(joint, data); + return b2SolveRevolutePosition(joint, context); default: return true; diff --git a/src/joint.h b/src/joint.h index d9025c86..047ae305 100644 --- a/src/joint.h +++ b/src/joint.h @@ -85,6 +85,10 @@ typedef struct b2RevoluteJoint float invIA; float invIB; b2Mat22 K; + b2Vec2 separation; + float biasCoefficient; + float massCoefficient; + float impulseCoefficient; float angle; float axialMass; } b2RevoluteJoint; @@ -116,10 +120,11 @@ typedef struct b2Joint bool collideConnected; } b2Joint; -void b2InitVelocityConstraints(b2Joint* joint, b2StepContext* data); -void b2SolveVelocityConstraints(b2Joint* joint, b2StepContext* data); +void b2PrepareJoint(b2Joint* joint, const b2StepContext* context); +void b2SolveJointVelocity(b2Joint* joint, const b2StepContext* context); +void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool removeOverlap); // This returns true if the position errors are within tolerance. -bool b2SolvePositionConstraints(b2Joint* joint, b2StepContext* data); +bool b2SolveJointPosition(b2Joint* joint, const b2StepContext* context); void b2DrawJoint(b2DebugDraw* draw, b2World* world, b2Joint* joint); diff --git a/src/mouse_joint.c b/src/mouse_joint.c index 31d7c2ab..3a5addee 100644 --- a/src/mouse_joint.c +++ b/src/mouse_joint.c @@ -33,7 +33,7 @@ void b2MouseJoint_SetTarget(b2JointId jointId, b2Vec2 target) base->mouseJoint.targetA = target; } -void b2InitializeMouse(b2Joint* base, b2StepContext* context) +void b2PrepareMouse(b2Joint* base, b2StepContext* context) { B2_ASSERT(base->type == b2_mouseJoint); diff --git a/src/revolute_joint.c b/src/revolute_joint.c index f5c2a4dc..79b39ada 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -22,7 +22,7 @@ // J = [0 0 -1 0 0 1] // K = invI1 + invI2 -void b2InitializeRevolute(b2Joint* base, b2StepContext* context) +void b2PrepareRevolute(b2Joint* base, b2StepContext* context) { B2_ASSERT(base->type == b2_revoluteJoint); @@ -86,6 +86,22 @@ void b2InitializeRevolute(b2Joint* base, b2StepContext* context) fixedRotation = true; } + // TODO_ERIN softness experiment + const float hertz = 120.0f; + const float zeta = 4.0f; + float omega = 2.0f * b2_pi * hertz; + float h = context->dt; + + joint->separation = b2Add(b2Sub(joint->rB, joint->rA), b2Sub(bodyB->position, bodyA->position)); + joint->biasCoefficient = omega / (2.0f * zeta + h * omega); + float c = h * omega * (2.0f * zeta + h * omega); + joint->impulseCoefficient = 1.0f / (1.0f + c); + joint->massCoefficient = c * joint->impulseCoefficient; + + //joint->biasCoefficient = 0.5f; + //joint->impulseCoefficient = 0.0f; + //joint->massCoefficient = 1.0f; + joint->angle = aB - aA - joint->referenceAngle; if (joint->enableLimit == false || fixedRotation) { @@ -131,7 +147,7 @@ void b2InitializeRevolute(b2Joint* base, b2StepContext* context) bodyB->angularVelocity = wB; } -void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) +void b2SolveRevoluteVelocity(b2Joint* base, const b2StepContext* context) { B2_ASSERT(base->type == b2_revoluteJoint); @@ -216,6 +232,124 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) bodyB->angularVelocity = wB; } +void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap) +{ + B2_ASSERT(base->type == b2_revoluteJoint); + + b2RevoluteJoint* joint = &base->revoluteJoint; + + b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; + b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); + const float aA = bodyA->angle + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); + const float aB = bodyB->angle + bodyB->deltaAngle; + + float mA = joint->invMassA, mB = joint->invMassB; + float iA = joint->invIA, iB = joint->invIB; + + bool fixedRotation = (iA + iB == 0.0f); + + // Solve motor constraint. + if (joint->enableMotor && fixedRotation == false) + { + float Cdot = wB - wA - joint->motorSpeed; + float impulse = -joint->axialMass * Cdot; + float oldImpulse = joint->motorImpulse; + float maxImpulse = context->dt * joint->maxMotorTorque; + joint->motorImpulse = B2_CLAMP(joint->motorImpulse + impulse, -maxImpulse, maxImpulse); + impulse = joint->motorImpulse - oldImpulse; + + wA -= iA * impulse; + wB += iB * impulse; + } + + if (joint->enableLimit && fixedRotation == false) + { + // Lower limit + { + float C = joint->angle - joint->lowerAngle; + float Cdot = wB - wA; + float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); + float oldImpulse = joint->lowerImpulse; + joint->lowerImpulse = B2_MAX(joint->lowerImpulse + impulse, 0.0f); + impulse = joint->lowerImpulse - oldImpulse; + + wA -= iA * impulse; + wB += iB * impulse; + } + + // Upper limit + // Note: signs are flipped to keep C positive when the constraint is satisfied. + // This also keeps the impulse positive when the limit is active. + { + float C = joint->upperAngle - joint->angle; + float Cdot = wA - wB; + float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); + float oldImpulse = joint->upperImpulse; + joint->upperImpulse = B2_MAX(joint->upperImpulse + impulse, 0.0f); + impulse = joint->upperImpulse - oldImpulse; + + wA += iA * impulse; + wB -= iB * impulse; + } + } + + // Solve point-to-point constraint + { + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); + + b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); + b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + + b2Mat22 K; + K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; + K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; + K.cx.y = K.cy.x; + K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB; + + b2Vec2 separation = b2Add(b2Sub(rB, rA), b2Sub(cB, cA)); + + b2Vec2 Cdot = b2Sub(b2Add(vB, b2CrossSV(wB, rB)), b2Add(vA, b2CrossSV(wA, rA))); + + float biasScale = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (removeOverlap) + { + biasScale = joint->biasCoefficient; + massScale = joint->massCoefficient; + impulseScale = joint->impulseCoefficient; + } + + b2Vec2 b = b2Solve22(K, b2MulAdd(Cdot, biasScale, separation)); + b2Vec2 impulse; + impulse.x = -massScale * b.x - impulseScale * joint->impulse.x; + impulse.y = -massScale * b.y - impulseScale * joint->impulse.y; + + joint->impulse.x += impulse.x; + joint->impulse.y += impulse.y; + + vA = b2MulSub(vA, mA, impulse); + wA -= iA * b2Cross(rA, impulse); + + vB = b2MulAdd(vB, mB, impulse); + wB += iB * b2Cross(rB, impulse); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; +} + bool b2SolveRevolutePosition(b2Joint* base, b2StepContext* context) { B2_ASSERT(base->type == b2_revoluteJoint); diff --git a/src/table.c b/src/table.c index 00316c00..a1a16251 100644 --- a/src/table.c +++ b/src/table.c @@ -156,6 +156,8 @@ static void b2GrowTable(b2Set* set) bool b2ContainsKey(const b2Set* set, uint64_t key) { + // key of zero is a sentinel + B2_ASSERT(key != 0); uint32_t hash = b2KeyHash(key); int32_t index = b2FindSlot(set, key, hash); return set->items[index].key == key; @@ -163,7 +165,12 @@ bool b2ContainsKey(const b2Set* set, uint64_t key) bool b2AddKey(b2Set* set, uint64_t key) { + // key of zero is a sentinel + B2_ASSERT(key != 0); + uint32_t hash = b2KeyHash(key); + B2_ASSERT(hash != 0); + int32_t index = b2FindSlot(set, key, hash); if (set->items[index].hash != 0) { diff --git a/src/table.h b/src/table.h index dc492e68..16074c2b 100644 --- a/src/table.h +++ b/src/table.h @@ -26,7 +26,7 @@ void b2DestroySet(b2Set* set); void b2ClearSet(b2Set* set); - // Returns true if key was already in set +// Returns true if key was already in set bool b2AddKey(b2Set* set, uint64_t key); // Returns true if the key was found diff --git a/src/world.c b/src/world.c index 59057536..4055c9a1 100644 --- a/src/world.c +++ b/src/world.c @@ -895,7 +895,6 @@ static void b2Solve(b2World* world, b2StepContext* context) B2_ASSERT(B2_PROXY_TYPE(proxyKey) == b2_dynamicBody); // all fast shapes should already be in the move buffer - b2DynamicTree_EnlargeProxy(tree, proxyId, shape->fatAABB); shapeIndex = shape->nextShapeIndex; @@ -939,8 +938,8 @@ static void b2Solve2(b2World* world, b2StepContext* context) //b2SolveGraphSoftPGS(world, context); //b2SolveGraphPGS(world, context); - //b2SolveGraphSoftTGS(world, context); - b2SolveGraphStickyTGS(world, context); + b2SolveGraphSoftTGS(world, context); + //b2SolveGraphStickyTGS(world, context); b2ValidateNoEnlarged(&world->broadPhase); @@ -1477,6 +1476,18 @@ void b2World_EnableSleeping(b2WorldId worldId, bool flag) } } +void b2World_EnableWarmStarting(b2WorldId worldId, bool flag) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->warmStarting = flag; +} + void b2World_EnableContinuo(b2WorldId worldId, bool flag) { b2World* world = b2GetWorldFromId(worldId); From 9a212e3cddb97f0c9e0a90c22048cb281b2676a0 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Mon, 4 Sep 2023 00:10:43 -0700 Subject: [PATCH 14/51] weld joint testing --- include/box2d/box2d.h | 2 + include/box2d/joint_types.h | 46 +++++ include/box2d/math.h | 33 +++- include/box2d/types.h | 13 ++ samples/collection/behavior.cpp | 2 +- samples/collection/sample_joints.cpp | 242 +++++++++++++++++++++++---- src/CMakeLists.txt | 1 + src/joint.c | 57 +++++++ src/joint.h | 26 +++ src/revolute_joint.c | 26 ++- src/solver_data.h | 1 + src/weld_joint.c | 191 +++++++++++++++++++++ 12 files changed, 599 insertions(+), 41 deletions(-) create mode 100644 src/weld_joint.c diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index c3defb11..f6845241 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -77,6 +77,7 @@ BOX2D_API bool b2Shape_TestPoint(b2ShapeId shapeId, b2Vec2 point); BOX2D_API b2JointId b2World_CreateMouseJoint(b2WorldId worldId, const b2MouseJointDef* def); BOX2D_API b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2RevoluteJointDef* def); +BOX2D_API b2JointId b2World_CreateWeldJoint(b2WorldId worldId, const b2WeldJointDef* def); BOX2D_API void b2World_DestroyJoint(b2JointId jointId); BOX2D_API void b2MouseJoint_SetTarget(b2JointId jointId, b2Vec2 target); @@ -85,6 +86,7 @@ BOX2D_API void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit); BOX2D_API void b2RevoluteJoint_EnableMotor(b2JointId jointId, bool enableMotor); BOX2D_API void b2RevoluteJoint_SetMotorSpeed(b2JointId jointId, float motorSpeed); BOX2D_API float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep); +BOX2D_API void b2RevoluteJoint_SetMaxMotorTorque(b2JointId jointId, float torque); /// This function receives shapes found in the AABB query. /// @return true if the query should continue diff --git a/include/box2d/joint_types.h b/include/box2d/joint_types.h index cb9fa245..a78f3f77 100644 --- a/include/box2d/joint_types.h +++ b/include/box2d/joint_types.h @@ -115,3 +115,49 @@ static inline struct b2RevoluteJointDef b2DefaultRevoluteJointDef(void) def.collideConnected = false; return def; } + +typedef struct b2WeldJointDef +{ + /// The first attached body. + b2BodyId bodyIdA; + + /// The second attached body. + b2BodyId bodyIdB; + + /// The local anchor point relative to bodyA's origin. + b2Vec2 localAnchorA; + + /// The local anchor point relative to bodyB's origin. + b2Vec2 localAnchorB; + + /// The bodyB angle minus bodyA angle in the reference state (radians). + /// This defines the zero angle for the joint limit. + float referenceAngle; + + /// Stiffness expressed as hertz (oscillations per second). Use zero for maximum stiffness. + float linearHertz; + float angularHertz; + + /// Damping ratio, non-dimensional. Use 1 for critical damping. + float linearDampingRatio; + float angularDampingRatio; + + /// Set this flag to true if the attached bodies should collide. + bool collideConnected; +} b2WeldJointDef; + +static inline struct b2WeldJointDef b2DefaultWeldJointDef(void) +{ + b2WeldJointDef def = {0}; + def.bodyIdA = b2_nullBodyId; + def.bodyIdB = b2_nullBodyId; + def.localAnchorA = B2_LITERAL(b2Vec2){0.0f, 0.0f}; + def.localAnchorB = B2_LITERAL(b2Vec2){0.0f, 0.0f}; + def.referenceAngle = 0.0f; + def.linearHertz = 0.0f; + def.angularHertz = 0.0f; + def.linearDampingRatio = 1.0f; + def.angularDampingRatio = 1.0f; + def.collideConnected = false; + return def; +} \ No newline at end of file diff --git a/include/box2d/math.h b/include/box2d/math.h index 315fefa0..a287765e 100644 --- a/include/box2d/math.h +++ b/include/box2d/math.h @@ -18,9 +18,11 @@ extern "C" #define B2_CLAMP(A, B, C) B2_MIN(B2_MAX(A, B), C) static const b2Vec2 b2Vec2_zero = {0.0f, 0.0f}; +static const b2Vec3 b2Vec3_zero = {0.0f, 0.0f, 0.0f}; static const b2Rot b2Rot_identity = {0.0f, 1.0f}; static const b2Transform b2Transform_identity = {{0.0f, 0.0f}, {0.0f, 1.0f}}; static const b2Mat22 b2Mat22_zero = {{0.0f, 0.0f}, {0.0f, 0.0f}}; +static const b2Mat33 b2Mat33_zero = {{0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f}}; bool b2IsValid(float a); bool b2IsValidVec2(b2Vec2 v); @@ -43,6 +45,19 @@ static inline float b2Cross(b2Vec2 a, b2Vec2 b) return a.x * b.y - a.y * b.x; } +/// Perform the dot product on two 3-vectors. +static inline float b2Dot3(b2Vec3 a, b2Vec3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +/// Perform the cross product on two 3-vectors. +static inline b2Vec3 b2Cross3(b2Vec3 a, b2Vec3 b) +{ + return B2_LITERAL(b2Vec3){a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x}; +} + + /// Perform the cross product on a vector and a scalar. In 2D this produces /// a vector. static inline b2Vec2 b2CrossVS(b2Vec2 v, float s) @@ -305,8 +320,7 @@ static inline b2Mat22 b2GetInverse22(b2Mat22 A) return B; } -/// Solve A * x = b, where b is a column vector. This is more efficient -/// than computing the inverse in one-shot cases. +/// Solve A * x = b, where b is a column vector. static inline b2Vec2 b2Solve22(b2Mat22 A, b2Vec2 b) { float a11 = A.cx.x, a12 = A.cy.x, a21 = A.cx.y, a22 = A.cy.y; @@ -319,6 +333,21 @@ static inline b2Vec2 b2Solve22(b2Mat22 A, b2Vec2 b) return x; } +/// Solve A * x = b, where b is a column vector. +static inline b2Vec3 b2Solve33(b2Mat33 A, b2Vec3 b) +{ + float det = b2Dot3(A.cx, b2Cross3(A.cy, A.cz)); + if (det != 0.0f) + { + det = 1.0f / det; + } + b2Vec3 x; + x.x = det * b2Dot3(b, b2Cross3(A.cy, A.cz)); + x.y = det * b2Dot3(A.cx, b2Cross3(b, A.cz)); + x.z = det * b2Dot3(A.cx, b2Cross3(A.cy, b)); + return x; +} + #ifdef __cplusplus } #endif diff --git a/include/box2d/types.h b/include/box2d/types.h index a77aa0ea..a3cab401 100644 --- a/include/box2d/types.h +++ b/include/box2d/types.h @@ -27,6 +27,12 @@ typedef struct b2Vec2 float x, y; } b2Vec2; +/// 3D vector +typedef struct b2Vec3 +{ + float x, y, z; +} b2Vec3; + /// 2D rotation typedef struct b2Rot { @@ -48,6 +54,13 @@ typedef struct b2Mat22 b2Vec2 cx, cy; } b2Mat22; +/// A 3-by-3 Matrix +typedef struct b2Mat33 +{ + /// columns + b2Vec3 cx, cy, cz; +} b2Mat33; + /// Axis-aligned bounding box typedef struct b2AABB { diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index a53a1ef6..a65d6118 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -218,7 +218,7 @@ class OverlapRecovery : public Sample b2Polygon box = b2MakeBox(extent, extent); - int count = 4; + int count = 2; float fraction = 0.75f; float y = fraction * extent; while (count > 0) diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index a154ee5c..f91fd6eb 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -1,15 +1,18 @@ // SPDX-FileCopyrightText: 2022 Erin Catto // SPDX-License-Identifier: MIT +#include "sample.h" + #include "box2d/box2d.h" #include "box2d/geometry.h" -#include "sample.h" +#include "box2d/hull.h" + +//#include +#include -// TODO_ERIN test more joint types -// TODO_ERIN try to stabilize revolute class BenchmarkJointGrid : public Sample { -public: + public: BenchmarkJointGrid(const Settings& settings) : Sample(settings) { @@ -102,12 +105,7 @@ class Bridge : public Sample b2BodyId groundId = b2_nullBodyId; { b2BodyDef bd = b2DefaultBodyDef(); - bd.position = {0.0f, -1.0f}; groundId = b2World_CreateBody(m_worldId, &bd); - - //b2Segment segment = {{-80.0f, 0.0f}, {80.0f, 0.0f}}; - //b2ShapeDef sd = b2DefaultShapeDef(); - //b2Body_CreateSegment(groundId, &sd, &segment); } { @@ -132,6 +130,8 @@ class Bridge : public Sample jd.bodyIdB = bodyId; jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + // jd.enableMotor = true; + // jd.maxMotorTorque = 1000.0f; b2World_CreateRevoluteJoint(m_worldId, &jd); prevBodyId = bodyId; @@ -142,47 +142,41 @@ class Bridge : public Sample jd.bodyIdB = groundId; jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + // jd.enableMotor = true; + // jd.maxMotorTorque = 1000.0f; b2World_CreateRevoluteJoint(m_worldId, &jd); } -#if 0 - for (int32 i = 0; i < 2; ++i) + for (int32_t i = 0; i < 2; ++i) { - b2Vec2 vertices[3]; - vertices[0].Set(-0.5f, 0.0f); - vertices[1].Set(0.5f, 0.0f); - vertices[2].Set(0.0f, 1.5f); + b2Vec2 vertices[3] = {{-0.5f, 0.0f}, {0.5f, 0.0f}, {0.0f, 1.5f}}; - b2PolygonShape shape; - shape.Set(vertices, 3); + b2Hull hull = b2ComputeHull(vertices, 3); + b2Polygon triangle = b2MakePolygon(&hull, 0.0f); - b2FixtureDef fd; - fd.shape = &shape; - fd.density = 1.0f; + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; - b2BodyDef bd; + b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; - bd.position.Set(-8.0f + 8.0f * i, 12.0f); - b2Body* body = m_world->CreateBody(&bd); - body->CreateFixture(&fd); + bd.position = {-8.0f + 8.0f * i, 22.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &triangle); } - for (int32 i = 0; i < 3; ++i) + for (int32_t i = 0; i < 3; ++i) { - b2CircleShape shape; - shape.m_radius = 0.5f; + b2Circle circle = {{0.0f, 0.0f}, 0.5f}; - b2FixtureDef fd; - fd.shape = &shape; - fd.density = 1.0f; + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; - b2BodyDef bd; + b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; - bd.position.Set(-6.0f + 6.0f * i, 10.0f); - b2Body* body = m_world->CreateBody(&bd); - body->CreateFixture(&fd); + bd.position = {-6.0f + 6.0f * i, 25.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCircle(bodyId, &sd, &circle); } -#endif } static Sample* Create(const Settings& settings) @@ -192,3 +186,181 @@ class Bridge : public Sample }; static int sampleBridgeIndex = RegisterSample("Joints", "Bridge", Bridge::Create); + +class BallAndChain : public Sample +{ + public: + enum + { + e_count = 30 + }; + + BallAndChain(const Settings& settings) + : Sample(settings) + { + b2BodyId groundId = b2_nullBodyId; + { + b2BodyDef bd = b2DefaultBodyDef(); + groundId = b2World_CreateBody(m_worldId, &bd); + } + + m_maxMotorTorque = 0.0f; + + { + float hx = 0.5f; + b2Polygon box = b2MakeBox(hx, 0.125f); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + int32_t jointIndex = 0; + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {(1.0f + 2.0f * i) * hx, e_count * hx}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {(2.0f * i) * hx, e_count * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = true; + jd.maxMotorTorque = 0.0f; + m_jointIds[jointIndex] = b2World_CreateRevoluteJoint(m_worldId, &jd); + jointIndex += 1; + + prevBodyId = bodyId; + } + + b2Circle circle = {{0.0f, 0.0f}, 4.0f}; + + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {(1.0f + 2.0f * e_count) * hx + circle.radius - hx, e_count * hx}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCircle(bodyId, &sd, &circle); + + b2Vec2 pivot = {(2.0f * e_count) * hx, e_count * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = true; + jd.maxMotorTorque = 0.0f; + m_jointIds[jointIndex] = b2World_CreateRevoluteJoint(m_worldId, &jd); + jointIndex += 1; + assert(jointIndex == e_count + 1); + } + } + + void UpdateUI() override + { + ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); + ImGui::SetNextWindowSize(ImVec2(260.0f, 60.0f)); + ImGui::Begin("Options", nullptr, ImGuiWindowFlags_NoResize); + + bool updateFriction = ImGui::SliderFloat("Joint Friction", &m_maxMotorTorque, 0.0f, 100000.0f, "%1.f"); + if (updateFriction) + { + for (int32_t i = 0; i <= e_count; ++i) + { + b2RevoluteJoint_SetMaxMotorTorque(m_jointIds[i], m_maxMotorTorque); + } + } + + ImGui::End(); + } + + static Sample* Create(const Settings& settings) + { + return new BallAndChain(settings); + } + + b2JointId m_jointIds[e_count + 1]; + float m_maxMotorTorque; +}; + +static int sampleBallAndChainIndex = RegisterSample("Joints", "BallAndChain", BallAndChain::Create); + + +class Cantilever : public Sample +{ + public: + enum + { + e_count = 8 + }; + + Cantilever(const Settings& settings) + : Sample(settings) + { + b2BodyId groundId = b2_nullBodyId; + { + b2BodyDef bd = b2DefaultBodyDef(); + groundId = b2World_CreateBody(m_worldId, &bd); + } + + { + float hx = 0.5f; + b2Polygon box = b2MakeBox(hx, 0.125f); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2WeldJointDef jd = b2DefaultWeldJointDef(); + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {(1.0f + 2.0f * i) * hx, e_count * hx}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {(2.0f * i) * hx, e_count * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.linearHertz = 5.0f; + b2World_CreateWeldJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + //b2Circle circle = {{0.0f, 0.0f}, 4.0f}; + + //b2BodyDef bd = b2DefaultBodyDef(); + //bd.type = b2_dynamicBody; + //bd.position = {(1.0f + 2.0f * e_count) * hx + circle.radius - hx, e_count * hx}; + //b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + //b2Body_CreateCircle(bodyId, &sd, &circle); + + //b2Vec2 pivot = {(2.0f * e_count) * hx, e_count * hx}; + //jd.bodyIdA = prevBodyId; + //jd.bodyIdB = bodyId; + //jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + //jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + //jd.enableMotor = true; + //jd.maxMotorTorque = 0.0f; + //m_jointIds[jointIndex] = b2World_CreateRevoluteJoint(m_worldId, &jd); + //jointIndex += 1; + //assert(jointIndex == e_count + 1); + } + } + + static Sample* Create(const Settings& settings) + { + return new Cantilever(settings); + } +}; + +static int sampleCantileverIndex = RegisterSample("Joints", "Cantilever", Cantilever::Create); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4affce80..a7203fff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,6 +45,7 @@ set(BOX2D_SOURCE_FILES table.h timer.c types.c + weld_joint.c world.c world.h ) diff --git a/src/joint.c b/src/joint.c index 8f2e6597..858aa970 100644 --- a/src/joint.c +++ b/src/joint.c @@ -249,6 +249,50 @@ b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2RevoluteJointDe return jointId; } +b2JointId b2World_CreateWeldJoint(b2WorldId worldId, const b2WeldJointDef* def) +{ + b2World* world = b2GetWorldFromId(worldId); + + B2_ASSERT(world->locked == false); + + if (world->locked) + { + return b2_nullJointId; + } + + B2_ASSERT(b2IsBodyIdValid(world, def->bodyIdA)); + B2_ASSERT(b2IsBodyIdValid(world, def->bodyIdB)); + + b2Body* bodyA = world->bodies + def->bodyIdA.index; + b2Body* bodyB = world->bodies + def->bodyIdB.index; + + b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); + + joint->type = b2_weldJoint; + + joint->localAnchorA = def->localAnchorA; + joint->localAnchorB = def->localAnchorB; + + b2WeldJoint empty = {0}; + joint->weldJoint = empty; + joint->weldJoint.referenceAngle = def->referenceAngle; + joint->weldJoint.linearHertz = def->linearHertz; + joint->weldJoint.linearDampingRatio = def->linearDampingRatio; + joint->weldJoint.angularHertz = def->angularHertz; + joint->weldJoint.angularDampingRatio = def->angularDampingRatio; + joint->weldJoint.impulse = b2Vec3_zero; + + // If the joint prevents collisions, then destroy all contacts between attached bodies + if (def->collideConnected == false) + { + b2DestroyContactsBetweenBodies(world, bodyA, bodyB); + } + + b2JointId jointId = {joint->object.index, world->index, joint->object.revision}; + + return jointId; +} + void b2World_DestroyJoint(b2JointId jointId) { b2World* world = b2GetWorldFromIndex(jointId.world); @@ -325,6 +369,7 @@ void b2World_DestroyJoint(b2JointId jointId) extern void b2PrepareMouse(b2Joint* base, const b2StepContext* context); extern void b2PrepareRevolute(b2Joint* base, const b2StepContext* context); +extern void b2PrepareWeld(b2Joint* base, const b2StepContext* context); void b2PrepareJoint(b2Joint* joint, const b2StepContext* context) { @@ -338,6 +383,10 @@ void b2PrepareJoint(b2Joint* joint, const b2StepContext* context) b2PrepareRevolute(joint, context); break; + case b2_weldJoint: + b2PrepareWeld(joint, context); + break; + default: B2_ASSERT(false); } @@ -358,12 +407,16 @@ void b2SolveJointVelocity(b2Joint* joint, const b2StepContext* context) b2SolveRevoluteVelocity(joint, context); break; + case b2_weldJoint: + break; + default: B2_ASSERT(false); } } extern void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap); +extern void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap); void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool removeOverlap) { @@ -380,6 +433,10 @@ void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool b2SolveRevoluteVelocitySoft(joint, context, removeOverlap); break; + case b2_weldJoint: + b2SolveWeldVelocitySoft(joint, context, removeOverlap); + break; + default: B2_ASSERT(false); } diff --git a/src/joint.h b/src/joint.h index 047ae305..08104dc4 100644 --- a/src/joint.h +++ b/src/joint.h @@ -93,6 +93,31 @@ typedef struct b2RevoluteJoint float axialMass; } b2RevoluteJoint; +typedef struct b2WeldJoint +{ + // Solver shared + float referenceAngle; + float linearHertz; + float linearDampingRatio; + float angularHertz; + float angularDampingRatio; + float linearBiasCoefficient; + float linearMassCoefficient; + float linearImpulseCoefficient; + float angularBiasCoefficient; + float angularMassCoefficient; + float angularImpulseCoefficient; + b2Vec3 impulse; + + // Solver temp + b2Vec2 localCenterA; + b2Vec2 localCenterB; + float invMassA; + float invMassB; + float invIA; + float invIB; +} b2WeldJoint; + /// The base joint class. Joints are used to constraint two bodies together in /// various fashions. Some joints also feature limits and motors. typedef struct b2Joint @@ -114,6 +139,7 @@ typedef struct b2Joint { b2MouseJoint mouseJoint; b2RevoluteJoint revoluteJoint; + b2WeldJoint weldJoint; }; bool isMarked; diff --git a/src/revolute_joint.c b/src/revolute_joint.c index 79b39ada..ca47296a 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -87,8 +87,9 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) } // TODO_ERIN softness experiment - const float hertz = 120.0f; - const float zeta = 4.0f; + // hertz = 6.0f * subStep/dt + const float hertz = 0.25f * context->velocityIterations * context->inv_dt; + const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; float h = context->dt; @@ -119,7 +120,8 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) float dtRatio = context->dtRatio; // Scale impulses to support a variable time step. - joint->impulse = b2MulSV(dtRatio, joint->impulse); + //joint->impulse = b2MulSV(dtRatio, joint->impulse); + joint->impulse = b2Vec2_zero; joint->motorImpulse *= dtRatio; joint->lowerImpulse *= dtRatio; joint->upperImpulse *= dtRatio; @@ -507,6 +509,24 @@ float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep) return inverseTimeStep * joint->revoluteJoint.motorImpulse; } +void b2RevoluteJoint_SetMaxMotorTorque(b2JointId jointId, float torque) +{ + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); + + b2Joint* joint = world->joints + jointId.index; + B2_ASSERT(joint->object.index == joint->object.next); + B2_ASSERT(joint->object.revision == jointId.revision); + B2_ASSERT(joint->type == b2_revoluteJoint); + joint->revoluteJoint.maxMotorTorque = torque; +} + #if 0 void b2RevoluteJoint::Dump() { diff --git a/src/solver_data.h b/src/solver_data.h index b09797d2..31372d72 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -14,6 +14,7 @@ typedef struct b2StepContext // inverse time step (0 if dt == 0). float inv_dt; + // TODO_ERIN eliminate support for variable time step // ratio between current and previous time step (dt * inv_dt0) float dtRatio; diff --git a/src/weld_joint.c b/src/weld_joint.c new file mode 100644 index 00000000..0d5ee4b5 --- /dev/null +++ b/src/weld_joint.c @@ -0,0 +1,191 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#include "body.h" +#include "core.h" +#include "joint.h" +#include "solver_data.h" +#include "world.h" + +#include "box2d/debug_draw.h" + +// Point-to-point constraint +// C = p2 - p1 +// Cdot = v2 - v1 +// = v2 + cross(w2, r2) - v1 - cross(w1, r1) +// J = [-I -r1_skew I r2_skew ] +// Identity used: +// w k % (rx i + ry j) = w * (-ry i + rx j) + +// Angle constraint +// C = angle2 - angle1 - referenceAngle +// Cdot = w2 - w1 +// J = [0 0 -1 0 0 1] +// K = invI1 + invI2 + +void b2PrepareWeld(b2Joint* base, b2StepContext* context) +{ + B2_ASSERT(base->type == b2_weldJoint); + + int32_t indexA = base->edges[0].bodyIndex; + int32_t indexB = base->edges[1].bodyIndex; + B2_ASSERT(0 <= indexA && indexA < context->bodyCapacity); + B2_ASSERT(0 <= indexB && indexB < context->bodyCapacity); + + b2Body* bodyA = context->bodies + indexA; + b2Body* bodyB = context->bodies + indexB; + B2_ASSERT(bodyA->object.index == bodyA->object.next); + B2_ASSERT(bodyB->object.index == bodyB->object.next); + + b2WeldJoint* joint = &base->weldJoint; + joint->localCenterA = bodyA->localCenter; + joint->invMassA = bodyA->invMass; + joint->invIA = bodyA->invI; + + joint->localCenterB = bodyB->localCenter; + joint->invMassB = bodyB->invMass; + joint->invIB = bodyB->invI; + + const float h = context->dt; + + float linearHertz = joint->linearHertz; + if (linearHertz == 0.0f) + { + linearHertz = 0.25f * context->velocityIterations * context->inv_dt; + } + + { + const float zeta = joint->linearDampingRatio; + const float omega = 2.0f * b2_pi * linearHertz; + joint->linearBiasCoefficient = omega / (2.0f * zeta + h * omega); + float c = h * omega * (2.0f * zeta + h * omega); + joint->linearImpulseCoefficient = 1.0f / (1.0f + c); + joint->linearMassCoefficient = c * joint->linearImpulseCoefficient; + } + + float angularHertz = joint->angularHertz; + if (angularHertz == 0.0f) + { + angularHertz = 0.25f * context->velocityIterations * context->inv_dt; + } + + { + const float zeta = joint->angularDampingRatio; + const float omega = 2.0f * b2_pi * angularHertz; + joint->angularBiasCoefficient = omega / (2.0f * zeta + h * omega); + float c = h * omega * (2.0f * zeta + h * omega); + joint->angularImpulseCoefficient = 1.0f / (1.0f + c); + joint->angularMassCoefficient = c * joint->angularImpulseCoefficient; + } + + joint->impulse = b2Vec3_zero; +} + +void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap) +{ + B2_ASSERT(base->type == b2_weldJoint); + + b2WeldJoint* joint = &base->weldJoint; + + b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; + b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); + const float aA = bodyA->angle + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); + const float aB = bodyB->angle + bodyB->deltaAngle; + + float mA = joint->invMassA, mB = joint->invMassB; + float iA = joint->invIA, iB = joint->invIB; + + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); + + b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); + b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + + b2Mat33 K; + K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; + K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; + K.cz.x = -rA.y * iA - rB.y * iB; + K.cx.y = K.cy.x; + K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB; + K.cz.y = rA.x * iA + rB.x * iB; + K.cx.z = K.cz.x; + K.cy.z = K.cz.y; + K.cz.z = iA + iB; + + b2Vec2 Cdot1 = b2Add(b2Sub(vB, vA), b2Sub(b2CrossSV(wB, rB), b2CrossSV(wA, rA))); + float Cdot2 = wB - wA; + + float linearBiasScale = 0.0f; + float linearMassScale = 1.0f; + float linearImpulseScale = 0.0f; + float angularBiasScale = 0.0f; + float angularMassScale = 1.0f; + float angularImpulseScale = 0.0f; + if (removeOverlap) + { + linearBiasScale = joint->linearBiasCoefficient; + linearMassScale = joint->linearMassCoefficient; + linearImpulseScale = joint->linearImpulseCoefficient; + angularBiasScale = joint->angularBiasCoefficient; + angularMassScale = joint->angularMassCoefficient; + angularImpulseScale = joint->angularImpulseCoefficient; + } + + b2Vec2 C1 = b2Add(b2Sub(cB, cA), b2Sub(rB, rA)); + float C2 = aB - aA - joint->referenceAngle; + + b2Vec3 c; + c.x = Cdot1.x + linearBiasScale * C1.x; + c.y = Cdot1.y + linearBiasScale * C1.y; + c.z = Cdot2 + angularBiasScale * C2; + + b2Vec3 b = b2Solve33(K, c); + b2Vec3 impulse; + impulse.x = -linearMassScale * b.x - linearImpulseScale * joint->impulse.x; + impulse.y = -linearMassScale * b.y - linearImpulseScale * joint->impulse.y; + impulse.z = -angularMassScale * b.z - angularImpulseScale * joint->impulse.z; + + joint->impulse.x += impulse.x; + joint->impulse.y += impulse.y; + joint->impulse.z += impulse.z; + + b2Vec2 P = {impulse.x, impulse.y}; + + vA = b2MulSub(vA, mA, P); + wA -= iA * (b2Cross(rA, P) + impulse.z); + + vB = b2MulAdd(vB, mB, P); + wB += iB * (b2Cross(rB, P) + impulse.z); + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; +} + +#if 0 +void b2WeldJoint::Dump() +{ + int32 indexA = m_bodyA->m_islandIndex; + int32 indexB = m_bodyB->m_islandIndex; + + b2Dump(" b2WeldJointDef jd;\n"); + b2Dump(" jd.bodyA = bodies[%d];\n", indexA); + b2Dump(" jd.bodyB = bodies[%d];\n", indexB); + b2Dump(" jd.collideConnected = bool(%d);\n", m_collideConnected); + b2Dump(" jd.localAnchorA.Set(%.9g, %.9g);\n", m_localAnchorA.x, m_localAnchorA.y); + b2Dump(" jd.localAnchorB.Set(%.9g, %.9g);\n", m_localAnchorB.x, m_localAnchorB.y); + b2Dump(" jd.referenceAngle = %.9g;\n", m_referenceAngle); + b2Dump(" jd.stiffness = %.9g;\n", m_stiffness); + b2Dump(" jd.damping = %.9g;\n", m_damping); + b2Dump(" joints[%d] = m_world->CreateJoint(&jd);\n", m_index); +} +#endif From 266cd4d4abd40fb2ec59aaaeb02a54861e588c1f Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Mon, 4 Sep 2023 15:30:25 -0700 Subject: [PATCH 15/51] dirk step testing --- samples/collection/sample_joints.cpp | 94 ++++++++++++++--------- samples/main.cpp | 2 +- src/body.c | 2 + src/body.h | 5 ++ src/contact_solver.c | 4 +- src/graph.c | 107 ++++++++++----------------- src/graph.h | 3 +- src/mouse_joint.c | 2 +- src/revolute_joint.c | 19 +++-- src/solver_data.h | 2 +- src/weld_joint.c | 17 ++++- src/world.c | 4 +- 12 files changed, 141 insertions(+), 120 deletions(-) diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index f91fd6eb..c112a565 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -7,7 +7,7 @@ #include "box2d/geometry.h" #include "box2d/hull.h" -//#include +// #include #include class BenchmarkJointGrid : public Sample @@ -115,6 +115,8 @@ class Bridge : public Sample sd.density = 20.0f; b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + int32_t jointIndex = 0; + m_maxMotorTorque = 5000.0f; b2BodyId prevBodyId = groundId; for (int32_t i = 0; i < e_count; ++i) @@ -130,9 +132,9 @@ class Bridge : public Sample jd.bodyIdB = bodyId; jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); - // jd.enableMotor = true; - // jd.maxMotorTorque = 1000.0f; - b2World_CreateRevoluteJoint(m_worldId, &jd); + jd.enableMotor = true; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); prevBodyId = bodyId; } @@ -142,9 +144,11 @@ class Bridge : public Sample jd.bodyIdB = groundId; jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); - // jd.enableMotor = true; - // jd.maxMotorTorque = 1000.0f; - b2World_CreateRevoluteJoint(m_worldId, &jd); + jd.enableMotor = true; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + + assert(jointIndex == e_count + 1); } for (int32_t i = 0; i < 2; ++i) @@ -179,10 +183,34 @@ class Bridge : public Sample } } + void UpdateUI() override + { + ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); + + // Automatic window size + ImGui::Begin("Options", nullptr, ImGuiWindowFlags_AlwaysAutoResize); + + // Slider takes half the window + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.5f); + bool updateFriction = ImGui::SliderFloat("Joint Friction", &m_maxMotorTorque, 0.0f, 10000.0f, "%2.f"); + if (updateFriction) + { + for (int32_t i = 0; i <= e_count; ++i) + { + b2RevoluteJoint_SetMaxMotorTorque(m_jointIds[i], m_maxMotorTorque); + } + } + + ImGui::End(); + } + static Sample* Create(const Settings& settings) { return new Bridge(settings); } + + b2JointId m_jointIds[e_count + 1]; + float m_maxMotorTorque; }; static int sampleBridgeIndex = RegisterSample("Joints", "Bridge", Bridge::Create); @@ -204,7 +232,7 @@ class BallAndChain : public Sample groundId = b2World_CreateBody(m_worldId, &bd); } - m_maxMotorTorque = 0.0f; + m_maxMotorTorque = 10000.0f; { float hx = 0.5f; @@ -233,8 +261,7 @@ class BallAndChain : public Sample jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); jd.enableMotor = true; jd.maxMotorTorque = 0.0f; - m_jointIds[jointIndex] = b2World_CreateRevoluteJoint(m_worldId, &jd); - jointIndex += 1; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); prevBodyId = bodyId; } @@ -244,6 +271,7 @@ class BallAndChain : public Sample b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; bd.position = {(1.0f + 2.0f * e_count) * hx + circle.radius - hx, e_count * hx}; + //bd.linearVelocity = {100.0f, -100.0f}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); b2Body_CreateCircle(bodyId, &sd, &circle); @@ -254,8 +282,7 @@ class BallAndChain : public Sample jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); jd.enableMotor = true; jd.maxMotorTorque = 0.0f; - m_jointIds[jointIndex] = b2World_CreateRevoluteJoint(m_worldId, &jd); - jointIndex += 1; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); assert(jointIndex == e_count + 1); } } @@ -263,10 +290,10 @@ class BallAndChain : public Sample void UpdateUI() override { ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); - ImGui::SetNextWindowSize(ImVec2(260.0f, 60.0f)); + ImGui::SetNextWindowSize(ImVec2(300.0f, 60.0f)); ImGui::Begin("Options", nullptr, ImGuiWindowFlags_NoResize); - bool updateFriction = ImGui::SliderFloat("Joint Friction", &m_maxMotorTorque, 0.0f, 100000.0f, "%1.f"); + bool updateFriction = ImGui::SliderFloat("Joint Friction", &m_maxMotorTorque, 0.0f, 10000.0f, "%2.f"); if (updateFriction) { for (int32_t i = 0; i <= e_count; ++i) @@ -274,7 +301,7 @@ class BallAndChain : public Sample b2RevoluteJoint_SetMaxMotorTorque(m_jointIds[i], m_maxMotorTorque); } } - + ImGui::End(); } @@ -289,7 +316,6 @@ class BallAndChain : public Sample static int sampleBallAndChainIndex = RegisterSample("Joints", "BallAndChain", BallAndChain::Create); - class Cantilever : public Sample { public: @@ -321,46 +347,40 @@ class Cantilever : public Sample { b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; - bd.position = {(1.0f + 2.0f * i) * hx, e_count * hx}; + bd.position = {(1.0f + 2.0f * i) * hx, 0.0f}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); b2Body_CreatePolygon(bodyId, &sd, &box); - b2Vec2 pivot = {(2.0f * i) * hx, e_count * hx}; + b2Vec2 pivot = {(2.0f * i) * hx, 0.0f}; jd.bodyIdA = prevBodyId; jd.bodyIdB = bodyId; jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); - jd.linearHertz = 5.0f; + //jd.linearHertz = 5.0f; b2World_CreateWeldJoint(m_worldId, &jd); prevBodyId = bodyId; } - //b2Circle circle = {{0.0f, 0.0f}, 4.0f}; - - //b2BodyDef bd = b2DefaultBodyDef(); - //bd.type = b2_dynamicBody; - //bd.position = {(1.0f + 2.0f * e_count) * hx + circle.radius - hx, e_count * hx}; - //b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); - //b2Body_CreateCircle(bodyId, &sd, &circle); - - //b2Vec2 pivot = {(2.0f * e_count) * hx, e_count * hx}; - //jd.bodyIdA = prevBodyId; - //jd.bodyIdB = bodyId; - //jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); - //jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); - //jd.enableMotor = true; - //jd.maxMotorTorque = 0.0f; - //m_jointIds[jointIndex] = b2World_CreateRevoluteJoint(m_worldId, &jd); - //jointIndex += 1; - //assert(jointIndex == e_count + 1); + m_tipId = prevBodyId; } } + void Step(Settings& settings) override + { + Sample::Step(settings); + + b2Vec2 tipPosition = b2Body_GetPosition(m_tipId); + g_draw.DrawString(5, m_textLine, "tip-y = %.2f", tipPosition.y); + m_textLine += m_textIncrement; + } + static Sample* Create(const Settings& settings) { return new Cantilever(settings); } + + b2BodyId m_tipId; }; static int sampleCantileverIndex = RegisterSample("Joints", "Cantilever", Cantilever::Create); diff --git a/samples/main.cpp b/samples/main.cpp index 71148022..dd88c02d 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -694,7 +694,7 @@ int main(int, char**) UpdateUI(); - // ImGui::ShowDemoWindow(); + //ImGui::ShowDemoWindow(); // if (g_draw.m_showUI) { diff --git a/src/body.c b/src/body.c index e0fef239..81b65609 100644 --- a/src/body.c +++ b/src/body.c @@ -49,6 +49,8 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->angularVelocity = def->angularVelocity; b->deltaPosition = b2Vec2_zero; b->deltaAngle = 0.0f; + b->deltaPositionIter = b2Vec2_zero; + b->deltaAngleIter = 0.0f; b->force = b2Vec2_zero; b->torque = 0.0f; b->shapeList = B2_NULL_INDEX; diff --git a/src/body.h b/src/body.h index 931619cb..6ee2b6ab 100644 --- a/src/body.h +++ b/src/body.h @@ -36,9 +36,14 @@ typedef struct b2Body b2Vec2 linearVelocity; float angularVelocity; + // These are the change in position/angle that accumulate across constraint substeps b2Vec2 deltaPosition; float deltaAngle; + // These are the change in position/angle that occur within a single constraint substep + b2Vec2 deltaPositionIter; + float deltaAngleIter; + b2Vec2 force; float torque; diff --git a/src/contact_solver.c b/src/contact_solver.c index f0c439c5..f85f1b0e 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -137,7 +137,7 @@ void b2ContactSolver_Initialize(b2ContactSolver* solver) const b2ManifoldPoint* cp = manifold->points + j; b2VelocityConstraintPoint* vcp = vc->points + j; - if (context->warmStarting) + if (context->enableWarmStarting) { vcp->normalImpulse = context->dtRatio * cp->normalImpulse; vcp->tangentImpulse = context->dtRatio * cp->tangentImpulse; @@ -219,7 +219,7 @@ void b2ContactSolver_Initialize(b2ContactSolver* solver) solver->constraintCount = constraintCount; // Warm start - if (context->warmStarting) + if (context->enableWarmStarting) { for (int32_t i = 0; i < constraintCount; ++i) { diff --git a/src/graph.c b/src/graph.c index 5211e39c..a16b5385 100644 --- a/src/graph.c +++ b/src/graph.c @@ -235,58 +235,11 @@ static void b2IntegrateVelocities(b2World* world, float h) body->deltaAngle = 0.0f; body->deltaPosition = b2Vec2_zero; - } -} - -#if 0 // no need? -static void b2IntegrateVelocitiesSoft(b2World* world, float h) -{ - b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; - b2Vec2 gravity = world->gravity; - - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCapacity; ++i) - { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) - { - continue; - } - - if (body->type != b2_dynamicBody) - { - continue; - } - - float invMass = body->invMass; - float invI = body->invI; - - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; - - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); - w = w + h * invI * body->torque; - - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); - w *= 1.0f / (1.0f + h * body->angularDamping); - - body->linearVelocity = v; - body->angularVelocity = w; - body->deltaAngle = 0.0f; - body->deltaPosition = b2Vec2_zero; + body->deltaAngleIter = 0.0f; + body->deltaPositionIter = b2Vec2_zero; } } -#endif static void b2IntegrateDeltaTransform(b2World* world, float h) { @@ -308,6 +261,11 @@ static void b2IntegrateDeltaTransform(b2World* world, float h) body->deltaAngle += h * body->angularVelocity; body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); + + body->deltaAngleIter = body->deltaAngle; + body->deltaPositionIter = body->deltaPosition; + + // breakpoint helper i += 0; } } @@ -365,7 +323,8 @@ typedef struct b2Constraint int32_t pointCount; } b2Constraint; -static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, bool warmStart) +// h is full time step +static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, float stiffHertz, bool warmStart) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -437,7 +396,7 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, b // Soft contact with speculation //const float hertz = mA == 0.0f ? 60.0f : 30.0f; - const float hertz = 30.0f; + const float hertz = stiffHertz; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; // float d = 2.0f * zeta * omega / kNormal; @@ -972,7 +931,8 @@ static void b2SolveVelocityConstraintsSorted(b2World* world, b2Constraint* const } } -static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) +// inv_dt is full time step inverse, h is sub time step +static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, float h, bool removeOverlap) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -995,10 +955,10 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - const b2Vec2 dpA = bodyA->deltaPosition; - const float daA = bodyA->deltaAngle; - const b2Vec2 dpB = bodyB->deltaPosition; - const float daB = bodyB->deltaAngle; + const b2Vec2 dpA = bodyA->deltaPositionIter; + const float daA = bodyA->deltaAngleIter; + const b2Vec2 dpB = bodyB->deltaPositionIter; + const float daB = bodyB->deltaAngleIter; b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); @@ -1013,7 +973,7 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); b2Vec2 dv = b2Sub(vrB, vrA); - // Compute change in separation + // Compute change in separation (small angle approximation of sin(angle) == angle) b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); float ds = b2Dot(b2Sub(prB, prA), normal); @@ -1023,7 +983,7 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt float impulseScale = 0.0f; if (s > 0.0f) { - // Speculative + // Speculative (inverse of full time step) bias = s * inv_dt; } else if (removeOverlap) @@ -1082,6 +1042,16 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt wB += iB * b2Cross(cp->rB, P); } + if (removeOverlap) + { + B2_MAYBE_UNUSED(h); + // Iteratively update delta angle/position using for sub-step + bodyA->deltaAngleIter = bodyA->deltaAngle + h * wA; + bodyA->deltaPositionIter = b2MulAdd(bodyA->deltaPosition, h, vA); + bodyB->deltaAngleIter = bodyB->deltaAngle + h * wB; + bodyB->deltaPositionIter = b2MulAdd(bodyB->deltaPosition, h, vB); + } + bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; @@ -1627,19 +1597,20 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) int32_t velocityIterations = stepContext->velocityIterations; int32_t positionIterations = stepContext->positionIterations; float h = stepContext->dt; + float stiffHertz = 0.25f * stepContext->velocityIterations * stepContext->inv_dt; b2IntegrateVelocities(world, h); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2PrepareSoftContact(world, colors + i, h, true); + b2PrepareSoftContact(world, colors + i, h, stiffHertz, true); } for (int32_t iter = 0; iter < velocityIterations; ++iter) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveSoftContact(world, colors + i, stepContext->inv_dt, true); + b2SolveSoftContact(world, colors + i, stepContext->inv_dt, h, true); } } @@ -1649,7 +1620,7 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) { for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveSoftContact(world, colors + i, stepContext->inv_dt, false); + b2SolveSoftContact(world, colors + i, stepContext->inv_dt, h, false); } } @@ -1688,11 +1659,14 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) // Full step apply gravity b2IntegrateVelocities(world, stepContext->dt); + float stiffHertz = 30.0f; + //0.125f * stepContext->velocityIterations * stepContext->inv_dt; + for (int32_t i = 0; i < b2_graphColorCount; ++i) { // Soft constraints initialized with full time step - bool warmStart = true; - b2PrepareSoftContact(world, colors + i, stepContext->dt, warmStart); + bool warmStart = stepContext->enableWarmStarting; + b2PrepareSoftContact(world, colors + i, stepContext->dt, stiffHertz, warmStart); } int32_t jointCapacity = world->jointPool.capacity; @@ -1710,7 +1684,6 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) int32_t substepCount = stepContext->velocityIterations; float h = stepContext->dt / substepCount; - float inv_h = 1.0f / h; for (int32_t substep = 0; substep < substepCount; ++substep) { @@ -1730,12 +1703,14 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { bool removeOverlap = true; - b2SolveSoftContact(world, colors + i, inv_h, removeOverlap); + b2SolveSoftContact(world, colors + i, stepContext->inv_dt, h, removeOverlap); } + // TODO_ERIN final iteration should update world positions b2IntegrateDeltaTransform(world, h); } + // TODO_ERIN wasteful since I just looped over bodies in b2IntegrateDeltaTransform b2UpdatePositions(world); int32_t positionIterations = stepContext->positionIterations; @@ -1756,7 +1731,7 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { bool removeOverlap = false; - b2SolveSoftContact(world, colors + i, 0.0f, removeOverlap); + b2SolveSoftContact(world, colors + i, 0.0f, 0.0f, removeOverlap); } } diff --git a/src/graph.h b/src/graph.h index a498e5e3..061d11af 100644 --- a/src/graph.h +++ b/src/graph.h @@ -13,7 +13,8 @@ typedef struct b2Contact b2Contact; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; -#define b2_graphColorCount 16 +// TODO_ERIN fixme +#define b2_graphColorCount 64 typedef struct b2GraphColor { diff --git a/src/mouse_joint.c b/src/mouse_joint.c index 3a5addee..f6cee647 100644 --- a/src/mouse_joint.c +++ b/src/mouse_joint.c @@ -89,7 +89,7 @@ void b2PrepareMouse(b2Joint* base, b2StepContext* context) // Cheat with some damping wB *= B2_MAX(0.0f, 1.0f - 0.02f * (60.0f * h)); - if (context->warmStarting) + if (context->enableWarmStarting) { joint->impulse = b2MulSV(context->dtRatio, joint->impulse); vB = b2MulAdd(vB, joint->invMassB, joint->impulse); diff --git a/src/revolute_joint.c b/src/revolute_joint.c index ca47296a..7fa356b3 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -115,7 +115,7 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) joint->motorImpulse = 0.0f; } - if (context->warmStarting) + if (context->enableWarmStarting) { float dtRatio = context->dtRatio; @@ -248,10 +248,10 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); - const float aA = bodyA->angle + bodyA->deltaAngle; - const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); - const float aB = bodyB->angle + bodyB->deltaAngle; + const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPositionIter); + const float aA = bodyA->angle + bodyA->deltaAngleIter; + const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPositionIter); + const float aB = bodyB->angle + bodyB->deltaAngleIter; float mA = joint->invMassA, mB = joint->invMassB; float iA = joint->invIA, iB = joint->invIB; @@ -346,6 +346,15 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo wB += iB * b2Cross(rB, impulse); } + if (removeOverlap) + { + float h = context->dt / context->velocityIterations; + bodyA->deltaAngleIter = bodyA->deltaAngle + h * wA; + bodyA->deltaPositionIter = b2MulAdd(bodyA->deltaPosition, h, vA); + bodyB->deltaAngleIter = bodyB->deltaAngle + h * wB; + bodyB->deltaPositionIter = b2MulAdd(bodyB->deltaPosition, h, vB); + } + bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; diff --git a/src/solver_data.h b/src/solver_data.h index 31372d72..fab10569 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -30,5 +30,5 @@ typedef struct b2StepContext struct b2Body* bodies; int32_t bodyCapacity; - bool warmStarting; + bool enableWarmStarting; } b2StepContext; diff --git a/src/weld_joint.c b/src/weld_joint.c index 0d5ee4b5..f72557e5 100644 --- a/src/weld_joint.c +++ b/src/weld_joint.c @@ -95,10 +95,10 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); - const float aA = bodyA->angle + bodyA->deltaAngle; - const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); - const float aB = bodyB->angle + bodyB->deltaAngle; + const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPositionIter); + const float aA = bodyA->angle + bodyA->deltaAngleIter; + const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPositionIter); + const float aB = bodyB->angle + bodyB->deltaAngleIter; float mA = joint->invMassA, mB = joint->invMassB; float iA = joint->invIA, iB = joint->invIB; @@ -109,6 +109,9 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + // TODO_ERIN handle fixed rotation + //bool fixedRotation = (iA + iB == 0.0f); + b2Mat33 K; K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; @@ -165,6 +168,12 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r vB = b2MulAdd(vB, mB, P); wB += iB * (b2Cross(rB, P) + impulse.z); + float h = context->dt / context->velocityIterations; + bodyA->deltaAngleIter = bodyA->deltaAngle + h * wA; + bodyA->deltaPositionIter = b2MulAdd(bodyA->deltaPosition, h, vA); + bodyB->deltaAngleIter = bodyB->deltaAngle + h * wB; + bodyB->deltaPositionIter = b2MulAdd(bodyB->deltaPosition, h, vB); + bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; diff --git a/src/world.c b/src/world.c index 4055c9a1..106672fe 100644 --- a/src/world.c +++ b/src/world.c @@ -1097,7 +1097,7 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, context.dtRatio = world->inv_dt0 * timeStep; context.restitutionThreshold = world->restitutionThreshold; - context.warmStarting = world->warmStarting; + context.enableWarmStarting = world->warmStarting; context.bodies = world->bodies; context.bodyCapacity = world->bodyPool.capacity; @@ -1184,7 +1184,7 @@ void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations context.dtRatio = world->inv_dt0 * timeStep; context.restitutionThreshold = world->restitutionThreshold; - context.warmStarting = world->warmStarting; + context.enableWarmStarting = world->warmStarting; context.bodies = world->bodies; context.bodyCapacity = world->bodyPool.capacity; From 609d1cc7cfcf7342118c63eac0b68cfa869bd1ba Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Mon, 4 Sep 2023 23:21:50 -0700 Subject: [PATCH 16/51] remove Dirk stepper testing --- samples/collection/behavior.cpp | 4 +- samples/collection/benchmark_pyramid.cpp | 4 +- samples/collection/sample_joints.cpp | 15 ++- samples/collection/sample_vertical_stack.cpp | 4 +- src/body.c | 2 - src/body.h | 4 - src/graph.c | 120 +++++++++++++------ src/graph.h | 2 +- src/revolute_joint.c | 59 ++++++--- src/weld_joint.c | 14 +-- src/world.c | 4 +- 11 files changed, 144 insertions(+), 88 deletions(-) diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index a65d6118..d7e46753 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -57,7 +57,7 @@ class HighMassRatio1 : public Sample { float coeff = i - 0.5f * count; - float yy = count == 1 ? y + 2.0f : y; + float yy = count == 1 ? y + 0.0f : y; bodyDef.position = {2.0f * coeff * extent + offset, yy}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); @@ -218,7 +218,7 @@ class OverlapRecovery : public Sample b2Polygon box = b2MakeBox(extent, extent); - int count = 2; + int count = 4; float fraction = 0.75f; float y = fraction * extent; while (count > 0) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 83c03de3..41b82fc0 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -22,8 +22,8 @@ class BenchmarkPyramid : public Sample m_extent = 0.5f; m_round = 0.0f; m_baseCount = 60; - m_rowCount = g_sampleDebug ? 1 : 16; - m_columnCount = g_sampleDebug ? 1 : 16; + m_rowCount = g_sampleDebug ? 1 : 1; + m_columnCount = g_sampleDebug ? 1 : 1; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index c112a565..f9df5fe9 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -96,7 +96,7 @@ class Bridge : public Sample public: enum { - e_count = 200 + e_count = 20 }; Bridge(const Settings& settings) @@ -116,7 +116,7 @@ class Bridge : public Sample b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); int32_t jointIndex = 0; - m_maxMotorTorque = 5000.0f; + m_maxMotorTorque = 0.0f; b2BodyId prevBodyId = groundId; for (int32_t i = 0; i < e_count; ++i) @@ -124,6 +124,8 @@ class Bridge : public Sample b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; bd.position = {-34.5f + 1.0f * i, 20.0f}; + //bd.linearDamping = 0.1f; + //bd.angularDamping = 0.1f; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); b2Body_CreatePolygon(bodyId, &sd, &box); @@ -232,7 +234,7 @@ class BallAndChain : public Sample groundId = b2World_CreateBody(m_worldId, &bd); } - m_maxMotorTorque = 10000.0f; + m_maxMotorTorque = 0.0f; { float hx = 0.5f; @@ -260,7 +262,7 @@ class BallAndChain : public Sample jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); jd.enableMotor = true; - jd.maxMotorTorque = 0.0f; + jd.maxMotorTorque = m_maxMotorTorque; m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); prevBodyId = bodyId; @@ -271,6 +273,9 @@ class BallAndChain : public Sample b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; bd.position = {(1.0f + 2.0f * e_count) * hx + circle.radius - hx, e_count * hx}; + //bd.linearDamping = 0.1f; + //bd.angularDamping = 0.1f; + //bd.linearVelocity = {100.0f, -100.0f}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); b2Body_CreateCircle(bodyId, &sd, &circle); @@ -281,7 +286,7 @@ class BallAndChain : public Sample jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); jd.enableMotor = true; - jd.maxMotorTorque = 0.0f; + jd.maxMotorTorque = m_maxMotorTorque; m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); assert(jointIndex == e_count + 1); } diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index 9f00a5d3..319ec6c8 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -56,8 +56,8 @@ class VerticalStack : public Sample } m_shapeType = e_boxShape; - m_rowCount = g_sampleDebug ? 14 : 50; - m_columnCount = g_sampleDebug ? 1 : 200; + m_rowCount = 14; + m_columnCount = g_sampleDebug ? 1 : e_maxColumns; m_bulletCount = 1; m_bulletType = e_circleShape; diff --git a/src/body.c b/src/body.c index 81b65609..e0fef239 100644 --- a/src/body.c +++ b/src/body.c @@ -49,8 +49,6 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->angularVelocity = def->angularVelocity; b->deltaPosition = b2Vec2_zero; b->deltaAngle = 0.0f; - b->deltaPositionIter = b2Vec2_zero; - b->deltaAngleIter = 0.0f; b->force = b2Vec2_zero; b->torque = 0.0f; b->shapeList = B2_NULL_INDEX; diff --git a/src/body.h b/src/body.h index 6ee2b6ab..65532864 100644 --- a/src/body.h +++ b/src/body.h @@ -40,10 +40,6 @@ typedef struct b2Body b2Vec2 deltaPosition; float deltaAngle; - // These are the change in position/angle that occur within a single constraint substep - b2Vec2 deltaPositionIter; - float deltaAngleIter; - b2Vec2 force; float torque; diff --git a/src/graph.c b/src/graph.c index a16b5385..e1fea36e 100644 --- a/src/graph.c +++ b/src/graph.c @@ -235,9 +235,6 @@ static void b2IntegrateVelocities(b2World* world, float h) body->deltaAngle = 0.0f; body->deltaPosition = b2Vec2_zero; - - body->deltaAngleIter = 0.0f; - body->deltaPositionIter = b2Vec2_zero; } } @@ -262,9 +259,6 @@ static void b2IntegrateDeltaTransform(b2World* world, float h) body->deltaAngle += h * body->angularVelocity; body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); - body->deltaAngleIter = body->deltaAngle; - body->deltaPositionIter = body->deltaPosition; - // breakpoint helper i += 0; } @@ -324,7 +318,7 @@ typedef struct b2Constraint } b2Constraint; // h is full time step -static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, float stiffHertz, bool warmStart) +static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, float contactHertz, bool warmStart) { const int32_t constraintCount = b2Array(color->contactArray).count; int32_t* contactIndices = color->contactArray; @@ -396,7 +390,7 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, f // Soft contact with speculation //const float hertz = mA == 0.0f ? 60.0f : 30.0f; - const float hertz = stiffHertz; + const float hertz = mA == 0.0f ? contactHertz : 0.5f * contactHertz; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; // float d = 2.0f * zeta * omega / kNormal; @@ -931,8 +925,8 @@ static void b2SolveVelocityConstraintsSorted(b2World* world, b2Constraint* const } } -// inv_dt is full time step inverse, h is sub time step -static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, float h, bool removeOverlap) +// inv_dt is full time step inverse +static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) { const int32_t constraintCount = b2Array(color->contactArray).count; b2Body* bodies = world->bodies; @@ -955,10 +949,10 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - const b2Vec2 dpA = bodyA->deltaPositionIter; - const float daA = bodyA->deltaAngleIter; - const b2Vec2 dpB = bodyB->deltaPositionIter; - const float daB = bodyB->deltaAngleIter; + const b2Vec2 dpA = bodyA->deltaPosition; + const float daA = bodyA->deltaAngle; + const b2Vec2 dpB = bodyB->deltaPosition; + const float daB = bodyB->deltaAngle; b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); @@ -1042,16 +1036,6 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt wB += iB * b2Cross(cp->rB, P); } - if (removeOverlap) - { - B2_MAYBE_UNUSED(h); - // Iteratively update delta angle/position using for sub-step - bodyA->deltaAngleIter = bodyA->deltaAngle + h * wA; - bodyA->deltaPositionIter = b2MulAdd(bodyA->deltaPosition, h, vA); - bodyB->deltaAngleIter = bodyB->deltaAngle + h * wB; - bodyB->deltaPositionIter = b2MulAdd(bodyB->deltaPosition, h, vB); - } - bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; @@ -1572,10 +1556,12 @@ void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); } +// inferior joint stability to soft step void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; + b2Joint* joints = world->joints; int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -1596,31 +1582,69 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) int32_t velocityIterations = stepContext->velocityIterations; int32_t positionIterations = stepContext->positionIterations; - float h = stepContext->dt; - float stiffHertz = 0.25f * stepContext->velocityIterations * stepContext->inv_dt; + float dt = stepContext->dt; + float inv_dt = stepContext->inv_dt; + float contactHertz = 120.0f; - b2IntegrateVelocities(world, h); + b2IntegrateVelocities(world, dt); for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2PrepareSoftContact(world, colors + i, h, stiffHertz, true); + b2PrepareSoftContact(world, colors + i, dt, contactHertz, true); + } + + int32_t jointCapacity = world->jointPool.capacity; + + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + b2PrepareJoint(joint, stepContext); } for (int32_t iter = 0; iter < velocityIterations; ++iter) { + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + bool removeOverlap = true; + b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); + } + for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveSoftContact(world, colors + i, stepContext->inv_dt, h, true); + b2SolveSoftContact(world, colors + i, inv_dt, true); } } - b2IntegratePositions(world, h); + b2IntegratePositions(world, dt); for (int32_t iter = 0; iter < positionIterations; ++iter) { + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + bool removeOverlap = false; + b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); + } + for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2SolveSoftContact(world, colors + i, stepContext->inv_dt, h, false); + b2SolveSoftContact(world, colors + i, inv_dt, false); } } @@ -1631,9 +1655,25 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); } -// Soft constraints with substepping. Allows for stiffer contacts with a small performance hit. Includes a -// bias removal stage to help remove excess warm starting energy. -void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) +// Threading: +// 1. build array of awake bodies, maybe copy to contiguous array +// 2. parallel-for integrate velocities +// 3. parallel prepare constraints by color +// Loop sub-steps: +// 4. parallel solve constraints by color +// 5. parallel-for update position deltas (and positions on last iter) +// End Loop +// Loop bias-removal: +// 6. parallel solve constraints by color +// End loop +// 7. parallel-for store impulses +// 8. parallel-for update aabbs, build proxy update set, build awake contact set + +// Soft constraints with constraint error substepping. Allows for stiffer contacts with a small performance hit. Includes a +// bias removal stage to help remove excess bias energy. +// http://mmacklin.com/smallsteps.pdf +// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf +void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; @@ -1659,14 +1699,16 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) // Full step apply gravity b2IntegrateVelocities(world, stepContext->dt); - float stiffHertz = 30.0f; - //0.125f * stepContext->velocityIterations * stepContext->inv_dt; + // 30 is a bit soft, 60 oscillates too much + //const float contactHertz = 45.0f; + //const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 120.0f; for (int32_t i = 0; i < b2_graphColorCount; ++i) { // Soft constraints initialized with full time step bool warmStart = stepContext->enableWarmStarting; - b2PrepareSoftContact(world, colors + i, stepContext->dt, stiffHertz, warmStart); + b2PrepareSoftContact(world, colors + i, stepContext->dt, contactHertz, warmStart); } int32_t jointCapacity = world->jointPool.capacity; @@ -1703,7 +1745,7 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { bool removeOverlap = true; - b2SolveSoftContact(world, colors + i, stepContext->inv_dt, h, removeOverlap); + b2SolveSoftContact(world, colors + i, h, removeOverlap); } // TODO_ERIN final iteration should update world positions @@ -1731,7 +1773,7 @@ void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { bool removeOverlap = false; - b2SolveSoftContact(world, colors + i, 0.0f, 0.0f, removeOverlap); + b2SolveSoftContact(world, colors + i, h, removeOverlap); } } diff --git a/src/graph.h b/src/graph.h index 061d11af..b2b15ff2 100644 --- a/src/graph.h +++ b/src/graph.h @@ -37,5 +37,5 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext); void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext); -void b2SolveGraphSoftTGS(b2World* world, const b2StepContext* stepContext); +void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext); void b2SolveGraphStickyTGS(b2World* world, const b2StepContext* stepContext); diff --git a/src/revolute_joint.c b/src/revolute_joint.c index 7fa356b3..01a4f157 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -119,8 +119,7 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) { float dtRatio = context->dtRatio; - // Scale impulses to support a variable time step. - //joint->impulse = b2MulSV(dtRatio, joint->impulse); + // Soft step works best when bilateral constraints have no warm starting. joint->impulse = b2Vec2_zero; joint->motorImpulse *= dtRatio; joint->lowerImpulse *= dtRatio; @@ -248,10 +247,10 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPositionIter); - const float aA = bodyA->angle + bodyA->deltaAngleIter; - const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPositionIter); - const float aB = bodyB->angle + bodyB->deltaAngleIter; + const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); + const float aA = bodyA->angle + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); + const float aB = bodyB->angle + bodyB->deltaAngle; float mA = joint->invMassA, mB = joint->invMassB; float iA = joint->invIA, iB = joint->invIB; @@ -274,11 +273,27 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo if (joint->enableLimit && fixedRotation == false) { + float jointAngle = aB - aA - joint->referenceAngle; + // Lower limit { - float C = joint->angle - joint->lowerAngle; + float C = jointAngle - joint->lowerAngle; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (C > 0.0f) + { + bias = C * context->inv_dt; + } + else if (removeOverlap) + { + bias = joint->biasCoefficient * C; + massScale = joint->massCoefficient; + impulseScale = joint->impulseCoefficient; + } + float Cdot = wB - wA; - float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); + float impulse = -joint->axialMass * massScale * (Cdot + bias) - impulseScale * joint->lowerImpulse; float oldImpulse = joint->lowerImpulse; joint->lowerImpulse = B2_MAX(joint->lowerImpulse + impulse, 0.0f); impulse = joint->lowerImpulse - oldImpulse; @@ -291,9 +306,24 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo // Note: signs are flipped to keep C positive when the constraint is satisfied. // This also keeps the impulse positive when the limit is active. { - float C = joint->upperAngle - joint->angle; + float C = joint->upperAngle - jointAngle; + + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (C > 0.0f) + { + bias = C * context->inv_dt; + } + else if (removeOverlap) + { + bias = joint->biasCoefficient * C; + massScale = joint->massCoefficient; + impulseScale = joint->impulseCoefficient; + } + float Cdot = wA - wB; - float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); + float impulse = -joint->axialMass * massScale * (Cdot + bias) - impulseScale * joint->lowerImpulse; float oldImpulse = joint->upperImpulse; joint->upperImpulse = B2_MAX(joint->upperImpulse + impulse, 0.0f); impulse = joint->upperImpulse - oldImpulse; @@ -346,15 +376,6 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo wB += iB * b2Cross(rB, impulse); } - if (removeOverlap) - { - float h = context->dt / context->velocityIterations; - bodyA->deltaAngleIter = bodyA->deltaAngle + h * wA; - bodyA->deltaPositionIter = b2MulAdd(bodyA->deltaPosition, h, vA); - bodyB->deltaAngleIter = bodyB->deltaAngle + h * wB; - bodyB->deltaPositionIter = b2MulAdd(bodyB->deltaPosition, h, vB); - } - bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; diff --git a/src/weld_joint.c b/src/weld_joint.c index f72557e5..45dacf2d 100644 --- a/src/weld_joint.c +++ b/src/weld_joint.c @@ -95,10 +95,10 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPositionIter); - const float aA = bodyA->angle + bodyA->deltaAngleIter; - const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPositionIter); - const float aB = bodyB->angle + bodyB->deltaAngleIter; + const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); + const float aA = bodyA->angle + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); + const float aB = bodyB->angle + bodyB->deltaAngle; float mA = joint->invMassA, mB = joint->invMassB; float iA = joint->invIA, iB = joint->invIB; @@ -168,12 +168,6 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r vB = b2MulAdd(vB, mB, P); wB += iB * (b2Cross(rB, P) + impulse.z); - float h = context->dt / context->velocityIterations; - bodyA->deltaAngleIter = bodyA->deltaAngle + h * wA; - bodyA->deltaPositionIter = b2MulAdd(bodyA->deltaPosition, h, vA); - bodyB->deltaAngleIter = bodyB->deltaAngle + h * wB; - bodyB->deltaPositionIter = b2MulAdd(bodyB->deltaPosition, h, vB); - bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; diff --git a/src/world.c b/src/world.c index 106672fe..bc7ec552 100644 --- a/src/world.c +++ b/src/world.c @@ -936,9 +936,9 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); - //b2SolveGraphSoftPGS(world, context); + b2SolveGraphSoftPGS(world, context); //b2SolveGraphPGS(world, context); - b2SolveGraphSoftTGS(world, context); + //b2SolveGraphSoftStep(world, context); //b2SolveGraphStickyTGS(world, context); b2ValidateNoEnlarged(&world->broadPhase); From fb4999c286f9d0736f35f96a33cee6a39cc8ff4c Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 9 Sep 2023 22:43:59 -0700 Subject: [PATCH 17/51] wip --- samples/collection/sample_joints.cpp | 65 +- src/graph.c | 1748 ++++++++------------------ src/graph.h | 12 +- src/revolute_joint.c | 20 +- src/world.c | 5 +- 5 files changed, 617 insertions(+), 1233 deletions(-) diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index f9df5fe9..e6824a2d 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -124,8 +124,8 @@ class Bridge : public Sample b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; bd.position = {-34.5f + 1.0f * i, 20.0f}; - //bd.linearDamping = 0.1f; - //bd.angularDamping = 0.1f; + // bd.linearDamping = 0.1f; + // bd.angularDamping = 0.1f; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); b2Body_CreatePolygon(bodyId, &sd, &box); @@ -236,6 +236,7 @@ class BallAndChain : public Sample m_maxMotorTorque = 0.0f; +#if 0 { float hx = 0.5f; b2Polygon box = b2MakeBox(hx, 0.125f); @@ -290,6 +291,64 @@ class BallAndChain : public Sample m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); assert(jointIndex == e_count + 1); } +#else + { + float hx = 0.5f; + b2Polygon box = b2MakeBox(0.125f, hx); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + sd.filter.categoryBits = 1; + sd.filter.maskBits = 0; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + int32_t jointIndex = 0; + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {0.0, -(1.0f + 2.0f * i) * hx}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {0.0f, -(2.0f * i) * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = false; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + b2Circle circle = {{0.0f, 0.0f}, 20.0f}; + + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {0.0f, -(1.0f + 2.0f * e_count) * hx - circle.radius + hx}; + // bd.linearDamping = 0.1f; + // bd.angularDamping = 0.1f; + + // bd.linearVelocity = {100.0f, -100.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCircle(bodyId, &sd, &circle); + + b2Vec2 pivot = {0.0f, -(2.0f * e_count) * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = false; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + assert(jointIndex == e_count + 1); + } +#endif } void UpdateUI() override @@ -361,7 +420,7 @@ class Cantilever : public Sample jd.bodyIdB = bodyId; jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); - //jd.linearHertz = 5.0f; + // jd.linearHertz = 5.0f; b2World_CreateWeldJoint(m_worldId, &jd); prevBodyId = bodyId; diff --git a/src/graph.c b/src/graph.c index e1fea36e..e2e0ba2a 100644 --- a/src/graph.c +++ b/src/graph.c @@ -17,10 +17,49 @@ #include "box2d/aabb.h" #include -#include +//#include #define maxBaumgarteVelocity 3.0f +typedef enum b2SolverStage +{ + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageFinalizePositions, + b2_stageStoreImpulses +} b2SolverStage; + +typedef struct b2SolverTaskEntry +{ + uint16_t startIndex; + uint16_t endIndex; + + // b2SolverStage + uint8_t stage; + uint8_t color; +} b2SolverTaskEntry; + +typedef struct b2SolverTaskContext +{ + b2World* world; + b2Body** awakeBodies; + b2Graph* graph; + + b2SolverTaskEntry* taskEntries; + int32_t taskCount; + int32_t* segmentIndices; + + _Atomic int startIndex; + _Atomic int endIndex; + _Atomic int completionCount; +} b2SolverTaskContext; + void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) { bodyCapacity = B2_MAX(bodyCapacity, 8); @@ -34,6 +73,8 @@ void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); } + + graph->solverTaskEntries = b2CreateArray(sizeof(b2SolverTaskEntry), 32); } void b2DestroyGraph(b2Graph* graph) @@ -44,6 +85,8 @@ void b2DestroyGraph(b2Graph* graph) b2DestroyBitSet(&color->bodySet); b2DestroyArray(color->contactArray, sizeof(int32_t)); } + + b2DestroyArray(graph->solverTaskEntries, sizeof(b2SolverTaskEntry)); } void b2AddContactToGraph(b2World* world, b2Contact* contact) @@ -139,7 +182,7 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) { b2GraphColor* color = graph->colors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); - + int32_t colorContactIndex = contact->colorContactIndex; b2Array_RemoveSwap(color->contactArray, colorContactIndex); if (colorContactIndex < b2Array(color->contactArray).count) @@ -190,20 +233,14 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) contact->flags &= ~b2_contactStatic; } -static void b2IntegrateVelocities(b2World* world, float h) +static void b2IntegrateVelocities2(b2World* world, b2Body** bodies, int32_t bodyCount, float h) { - b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; b2Vec2 gravity = world->gravity; // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCapacity; ++i) + for (int32_t i = 0; i < bodyCount; ++i) { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) - { - continue; - } + b2Body* body = bodies[i]; if (body->type != b2_dynamicBody) { @@ -238,52 +275,149 @@ static void b2IntegrateVelocities(b2World* world, float h) } } -static void b2IntegrateDeltaTransform(b2World* world, float h) +typedef struct b2BodyContext { - b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; + b2World* world; + b2Body** bodies; + float h; +} b2BodyContext; - for (int32_t i = 0; i < bodyCapacity; ++i) - { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) - { - continue; - } +static void b2UpdateDeltasTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) +{ + B2_MAYBE_UNUSED(threadIndex); - if (body->type == b2_staticBody) - { - continue; - } + b2TracyCZoneNC(update_deltas, "Deltas", b2_colorDarkSeaGreen, true); + + b2BodyContext* bodyContext = taskContext; + b2Body** bodies = bodyContext->bodies; + float h = bodyContext->h; + + B2_ASSERT(startIndex <= endIndex); + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Body* body = bodies[i]; body->deltaAngle += h * body->angularVelocity; body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); + } + + b2TracyCZoneEnd(update_deltas); +} + +static void b2UpdateDeltas(b2World* world, b2Body** bodies, int32_t count, float h) +{ + if (count == 0) + { + return; + } + + b2BodyContext context = {world, bodies, h}; - // breakpoint helper - i += 0; + int32_t minRange = 128; + if (count < minRange) + { + b2UpdateDeltasTask(0, count, 0, &context); + } + else + { + void* userTask = world->enqueueTaskFcn(&b2UpdateDeltasTask, count, minRange, &context, world->userTaskContext); + world->finishTaskFcn(userTask, world->userTaskContext); } } -static void b2UpdatePositions(b2World* world) +static void b2UpdatePositionsTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { - b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; + b2TracyCZoneNC(update_positions, "Positions", b2_colorViolet, true); - for (int32_t i = 0; i < bodyCapacity; ++i) + b2BodyContext* bodyContext = taskContext; + b2World* world = bodyContext->world; + b2Body** bodies = bodyContext->bodies; + b2Contact* contacts = world->contacts; + const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; + float h = bodyContext->h; + + b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= world->bodyPool.capacity); + B2_ASSERT(endIndex <= world->bodyPool.capacity); + + for (int32_t i = startIndex; i < endIndex; ++i) { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) + b2Body* body = bodies[i]; + + // Final substep + body->deltaAngle += h * body->angularVelocity; + body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); + + body->position = b2Add(body->position, body->deltaPosition); + body->angle += body->deltaAngle; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + + body->force = b2Vec2_zero; + body->torque = 0.0f; + + // Update shapes AABBs + int32_t shapeIndex = body->shapeList; + while (shapeIndex != B2_NULL_INDEX) { - continue; + b2Shape* shape = world->shapes + shapeIndex; + + B2_ASSERT(shape->isFast == false); + + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + + shapeIndex = shape->nextShapeIndex; } - if (body->type == b2_staticBody) + // TODO_ERIN legacy + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) { - continue; + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // Bit set to prevent duplicates + b2SetBit(awakeContactBitSet, contactIndex); + contactKey = contact->edges[edgeIndex].nextKey; } + } - body->position = b2Add(body->position, body->deltaPosition); - body->angle += body->deltaAngle; + b2TracyCZoneEnd(update_positions); +} + +static void b2UpdatePositions(b2World* world, b2Body** bodies, int32_t count, float h) +{ + if (count == 0) + { + return; + } + + b2BodyContext context = {world, bodies, h}; + + int32_t minRange = 32; + if (count < minRange) + { + b2UpdatePositionsTask(0, count, 0, &context); + } + else + { + void* userTask = world->enqueueTaskFcn(&b2UpdatePositionsTask, count, minRange, &context, world->userTaskContext); + world->finishTaskFcn(userTask, world->userTaskContext); } } @@ -317,15 +451,36 @@ typedef struct b2Constraint int32_t pointCount; } b2Constraint; -// h is full time step -static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, float contactHertz, bool warmStart) +typedef struct b2GraphContext { - const int32_t constraintCount = b2Array(color->contactArray).count; + b2World* world; + b2GraphColor* color; + float timeStep; + float contactHertz; + bool enableWarmStarting; +} b2GraphContext; + +static void b2PrepareSoftContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) +{ + B2_MAYBE_UNUSED(threadIndex); + + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); + + b2GraphContext* graphContext = taskContext; + b2GraphColor* color = graphContext->color; int32_t* contactIndices = color->contactArray; - b2Contact* contacts = world->contacts; - b2Body* bodies = world->bodies; + b2Contact* contacts = graphContext->world->contacts; + b2Body* bodies = graphContext->world->bodies; - for (int32_t i = 0; i < constraintCount; ++i) + float contactHertz = graphContext->contactHertz; + float h = graphContext->timeStep; + bool enableWarmStarting = graphContext->enableWarmStarting; + + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + + for (int32_t i = startIndex; i < endIndex; ++i) { b2Contact* contact = contacts + contactIndices[i]; @@ -339,7 +494,7 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, f b2Body* bodyA = bodies + indexA; b2Body* bodyB = bodies + indexB; - b2Constraint* constraint = color->constraints + i; + b2Constraint* constraint = color->contacts + i; constraint->contact = contact; constraint->indexA = indexA; constraint->indexB = indexB; @@ -388,9 +543,8 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, f cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - // Soft contact with speculation - //const float hertz = mA == 0.0f ? 60.0f : 30.0f; - const float hertz = mA == 0.0f ? contactHertz : 0.5f * contactHertz; + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; // float d = 2.0f * zeta * omega / kNormal; @@ -412,7 +566,7 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, f // TODO_ERIN this can be expanded cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - //cp->normalMass = 1.0f / (kNormal + cp->gamma); + // cp->normalMass = 1.0f / (kNormal + cp->gamma); float c = h * omega * (2.0f * zeta + h * omega); cp->impulseCoefficient = 1.0f / (1.0f + c); @@ -423,7 +577,7 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, f // = -meff * mscale * (vn + bias) - imp_scale * impulse // Warm start - if (warmStart) + if (enableWarmStarting) { b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); wA -= iA * b2Cross(cp->rA, P); @@ -438,1135 +592,283 @@ static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, f bodyB->linearVelocity = vB; bodyB->angularVelocity = wB; } + + b2TracyCZoneEnd(prepare_contact); } -static void b2InitializePGSConstraints(b2World* world, b2GraphColor* color) +// h is full time step +static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, float contactHertz, bool warmStart) { - const int32_t constraintCount = b2Array(color->contactArray).count; - int32_t* contactIndices = color->contactArray; - b2Contact* contacts = world->contacts; - b2Body* bodies = world->bodies; + int32_t count = b2Array(color->contactArray).count; + if (count == 0) + { + return; + } - for (int32_t i = 0; i < constraintCount; ++i) + b2GraphContext context = {world, color, h, contactHertz, warmStart}; + + int32_t minRange = 64; + if (count < minRange) { - b2Contact* contact = contacts + contactIndices[i]; - b2Manifold* manifold = &contact->manifold; + b2PrepareSoftContactTask(0, count, 0, &context); + } + else + { + void* userPrepareTask = world->enqueueTaskFcn(&b2PrepareSoftContactTask, count, minRange, &context, world->userTaskContext); + world->finishTaskFcn(userPrepareTask, world->userTaskContext); + } +} - int32_t pointCount = manifold->pointCount; +typedef struct b2ContactContext +{ + b2World* world; + b2GraphColor* color; + float inv_dt; + bool removeOverlap; +} b2ContactContext; - B2_ASSERT(0 < pointCount && pointCount <= 2); +static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) +{ + B2_MAYBE_UNUSED(threadIndex); + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + b2ContactContext* contactContext = taskContext; + b2Body* bodies = contactContext->world->bodies; + b2Constraint* constraints = contactContext->color->contacts; - b2Constraint* constraint = color->constraints + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - constraint->pointCount = pointCount; + float inv_dt = contactContext->inv_dt; + bool removeOverlap = contactContext->removeOverlap; + + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= b2Array(contactContext->color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(contactContext->color->contactArray).count); + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Constraint* constraint = constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; float mA = bodyA->invMass; float iA = bodyA->invI; float mB = bodyB->invMass; float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + const b2Vec2 dpA = bodyA->deltaPosition; + const float daA = bodyA->deltaAngle; + const b2Vec2 dpB = bodyB->deltaPosition; + const float daB = bodyB->deltaAngle; b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; for (int32_t j = 0; j < pointCount; ++j) { - const b2ManifoldPoint* mp = manifold->points + j; b2ConstraintPoint* cp = constraint->points + j; - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - cp->rA = b2Sub(mp->point, cA); - cp->rB = b2Sub(mp->point, cB); - cp->localAnchorA = b2InvRotateVector(qA, cp->rA); - cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - cp->separation = mp->separation; + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (removeOverlap) + { + bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = cp->massCoefficient; + impulseScale = cp->impulseCoefficient; + } - cp->baumgarte = 0.0f; - cp->biasCoefficient = mp->separation > 0.0f ? 1.0f : 0.0f; + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } - } -} -static void b2InitializeStickyConstraints(b2World* world, b2GraphColor* color) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - int32_t* contactIndices = color->contactArray; - b2Contact* contacts = world->contacts; - b2Body* bodies = world->bodies; + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Contact* contact = contacts + contactIndices[i]; - b2Manifold* manifold = &contact->manifold; + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - int32_t pointCount = manifold->pointCount; + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - B2_ASSERT(0 < pointCount && pointCount <= 2); + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - b2Constraint* constraint = color->constraints + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - constraint->pointCount = pointCount; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* mp = manifold->points + j; - b2ConstraintPoint* cp = constraint->points + j; - - cp->normalImpulse = 0.0f; - cp->tangentImpulse = 0.0f; - - cp->rA = b2Sub(mp->point, cA); - cp->rB = b2Sub(mp->point, cB); - cp->localAnchorA = b2InvRotateVector(qA, cp->rA); - cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - cp->separation = mp->separation; - - cp->baumgarte = 0.8f; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - } - - bool frictionConfirmed = false; - if (manifold->frictionPersisted) - { - int32_t confirmCount = 0; - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* mp = manifold->points + j; - b2ConstraintPoint* cp = constraint->points + j; - - b2Vec2 normalA = b2RotateVector(qA, mp->localNormalA); - b2Vec2 normalB = b2RotateVector(qB, mp->localNormalB); - - float nn = b2Dot(normalA, normalB); - if (nn < 0.98f) - { - // Relative rotation has invalidated cached friction anchors - break; - } - - b2Vec2 anchorA = b2RotateVector(qA, mp->localAnchorA); - b2Vec2 anchorB = b2RotateVector(qB, mp->localAnchorB); - b2Vec2 offset = b2Add(b2Sub(cB, cA), b2Sub(anchorB, anchorA)); - float normalSeparation = b2Dot(offset, normalA); - if (B2_ABS(normalSeparation) > 2.0f * b2_linearSlop) - { - // Normal separation has invalidated cached friction anchors - break; - } - - cp->rAf = anchorA; - cp->rBf = anchorB; - cp->tangentSeparation = b2Dot(offset, tangent); - - float rtA = b2Cross(anchorA, tangent); - float rtB = b2Cross(anchorB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - - confirmCount += 1; - } - - if (confirmCount == pointCount) - { - frictionConfirmed = true; - } - } - - if (frictionConfirmed == false) - { - for (int32_t j = 0; j < pointCount; ++j) - { - b2ManifoldPoint* mp = manifold->points + j; - b2ConstraintPoint* cp = constraint->points + j; - - mp->localNormalA = b2InvRotateVector(qA, normal); - mp->localNormalB = b2InvRotateVector(qB, normal); - mp->localAnchorA = b2InvRotateVector(qA, cp->rA); - mp->localAnchorB = b2InvRotateVector(qB, cp->rB); - - cp->rAf = cp->rA; - cp->rBf = cp->rB; - cp->tangentSeparation = 0.0f; - - float rtA = b2Cross(cp->rAf, tangent); - float rtB = b2Cross(cp->rBf, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - } - } - - manifold->frictionPersisted = true; - } -} - -static void b2WarmStart(b2World* world, b2GraphColor* color) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = color->constraints + i; - - int32_t pointCount = constraint->pointCount; - B2_ASSERT(0 < pointCount && pointCount <= 2); - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); - vB = b2MulAdd(vB, mB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -static void b2WarmStartAll(b2World* world, b2Constraint* constraints, int32_t constraintCount) -{ - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = constraints + i; - - int32_t pointCount = constraint->pointCount; - B2_ASSERT(0 < pointCount && pointCount <= 2); - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); - vB = b2MulAdd(vB, mB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -static void b2SolveVelocityConstraints(b2World* world, b2GraphColor* color, float inv_dt) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = color->constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2CrossVS(normal, 1.0f); - float friction = constraint->friction; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * (vn + cp->biasCoefficient * cp->separation * inv_dt); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -static void b2SolveVelocityConstraintsSorted(b2World* world, b2Constraint* constraints, int32_t constraintCount, float inv_dt) -{ - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2CrossVS(normal, 1.0f); - float friction = constraint->friction; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * (vn + cp->biasCoefficient * cp->separation * inv_dt); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -// inv_dt is full time step inverse -static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = color->constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - const b2Vec2 dpA = bodyA->deltaPosition; - const float daA = bodyA->deltaAngle; - const b2Vec2 dpB = bodyB->deltaPosition; - const float daB = bodyB->deltaAngle; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (removeOverlap) - { - bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); - //bias = cp->biasCoefficient * s; - massScale = cp->massCoefficient; - impulseScale = cp->impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - //float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -static void b2SolveVelocityConstraintsSticky(b2World* world, b2GraphColor* color, float minSeparation, float invh) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = color->constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - const b2Vec2 dpA = bodyA->deltaPosition; - const float daA = bodyA->deltaAngle; - const b2Vec2 dpB = bodyB->deltaPosition; - const float daB = bodyB->deltaAngle; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = 0.3f; //constraint->friction; - - float totalNormalImpulse = 0.0f; - - // Non-penetration constraints - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - - float bias = 0.0f; - if (s > 0.0f) - { - // Speculative - bias = s * invh; - - } - else if (minSeparation < 0.0f) - { - bias = B2_MAX(-maxBaumgarteVelocity, cp->baumgarte * s * invh); - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * (vn + bias); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - totalNormalImpulse += cp->normalImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - // Sticky friction constraints - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rBf)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rAf)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rBf)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rAf)); - float ds = b2Dot(b2Sub(prB, prA), tangent); - float s = cp->tangentSeparation + ds; - float bias = 0.5f * s * invh; - - // Compute tangent impulse - float vt = b2Dot(dv, tangent); - float impulse = -cp->tangentMass * (vt + bias); - - // max friction uses an average of the total normal impulse because persistent friction anchors don't line up with normal anchors - float maxFriction = 0.5f * friction * totalNormalImpulse; - - // Clamp the accumulated impulse - float newImpulse = cp->tangentImpulse + impulse; - if (newImpulse < -maxFriction) - { - newImpulse = -maxFriction; - constraint->contact->manifold.frictionPersisted = false; - } - else if (newImpulse > maxFriction) - { - newImpulse = maxFriction; - constraint->contact->manifold.frictionPersisted = false; - } - - impulse = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} - -static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) -{ - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = constraints + i; - b2Contact* contact = constraint->contact; - - b2Manifold* manifold = &contact->manifold; - - for (int32_t j = 0; j < constraint->pointCount; ++j) - { - manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; - manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; - } - } -} - -static void b2IntegratePositions(b2World* world, float h) -{ - b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; - - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCapacity; ++i) - { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) - { - continue; - } - - if (body->type == b2_staticBody) - { - continue; - } - - b2Vec2 c = body->position; - float a = body->angle; - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; - - // Clamp large velocities - b2Vec2 translation = b2MulSV(h, v); - if (b2Dot(translation, translation) > b2_maxTranslationSquared) - { - float ratio = b2_maxTranslation / b2Length(translation); - v = b2MulSV(ratio, v); - } - - float rotation = h * w; - if (rotation * rotation > b2_maxRotationSquared) - { - float ratio = b2_maxRotation / B2_ABS(rotation); - w *= ratio; - } - - // Integrate - c = b2MulAdd(c, h, v); - a += h * w; - - body->position = c; - body->angle = a; - body->linearVelocity = v; - body->angularVelocity = w; - } -} - -static void b2SolveContactPosition(b2World* world, b2GraphColor* color) -{ - const int32_t constraintCount = b2Array(color->contactArray).count; - b2Body* bodies = world->bodies; - float slop = b2_linearSlop; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = color->constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 cA = bodyA->position; - float aA = bodyA->angle; - b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; - - b2Vec2 normal = constraint->normal; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); - - b2Vec2 rA = b2RotateVector(qA, cp->localAnchorA); - b2Vec2 rB = b2RotateVector(qB, cp->localAnchorB); - - // Current separation - b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - float separation = b2Dot(d, normal) + cp->separation; - - // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. - // This improves stacking stability significantly. - float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); - - // Compute the effective mass. - float rnA = b2Cross(rA, normal); - float rnB = b2Cross(rB, normal); - float K = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - // Compute normal impulse - float impulse = K > 0.0f ? -C / K : 0.0f; - - b2Vec2 P = b2MulSV(impulse, normal); - - cA = b2MulSub(cA, mA, P); - aA -= iA * b2Cross(cp->rA, P); - - cB = b2MulAdd(cB, mB, P); - aB += iB * b2Cross(cp->rB, P); - } - - bodyA->position = cA; - bodyA->angle = aA; - bodyB->position = cB; - bodyB->angle = aB; - } -} - -static void b2SolvePositionConstraintsSorted(b2World* world, b2Constraint* constraints, int32_t constraintCount) -{ - b2Body* bodies = world->bodies; - - for (int32_t i = 0; i < constraintCount; ++i) - { - b2Constraint* constraint = constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 cA = bodyA->position; - float aA = bodyA->angle; - b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; - - b2Vec2 normal = constraint->normal; - float slop = b2_linearSlop; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); - - b2Vec2 rA = b2RotateVector(qA, cp->localAnchorA); - b2Vec2 rB = b2RotateVector(qB, cp->localAnchorB); - - // Current separation - b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - float separation = b2Dot(d, normal) + cp->separation; - - // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. - // This improves stacking stability significantly. - float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); - - // Compute the effective mass. - float rnA = b2Cross(rA, normal); - float rnB = b2Cross(rB, normal); - float K = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - // Compute normal impulse - float impulse = K > 0.0f ? -C / K : 0.0f; - - b2Vec2 P = b2MulSV(impulse, normal); - - cA = b2MulSub(cA, mA, P); - aA -= iA * b2Cross(cp->rA, P); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - cB = b2MulAdd(cB, mB, P); - aB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } - bodyA->position = cA; - bodyA->angle = aA; - bodyB->position = cB; - bodyB->angle = aB; + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } + + b2TracyCZoneEnd(solve_contact); } -// Update body transform, mark broadphase AABB, build awake contact bits -static void b2FinalizeSolve(b2World* world) +// inv_dt is full time step inverse +static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) { - b2Body* bodies = world->bodies; - int32_t bodyCapacity = world->bodyPool.capacity; - b2Contact* contacts = world->contacts; - - b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[0].shapeBitSet; - const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCapacity; ++i) + int32_t count = b2Array(color->contactArray).count; + if (count == 0) { - b2Body* body = bodies + i; - if (b2ObjectValid(&body->object) == false) - { - continue; - } - - if (body->type == b2_staticBody) - { - continue; - } - - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - - body->force = b2Vec2_zero; - body->torque = 0.0f; - - // Update shapes AABBs - int32_t shapeIndex = body->shapeList; - while (shapeIndex != B2_NULL_INDEX) - { - b2Shape* shape = world->shapes + shapeIndex; - - B2_ASSERT(shape->isFast == false); - - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); - - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) - { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - - shapeIndex = shape->nextShapeIndex; - } + return; + } - // TODO_ERIN legacy - int32_t contactKey = body->contactList; - while (contactKey != B2_NULL_INDEX) - { - int32_t contactIndex = contactKey >> 1; - int32_t edgeIndex = contactKey & 1; - b2Contact* contact = contacts + contactIndex; + b2ContactContext context = {world, color, inv_dt, removeOverlap}; - // Bit set to prevent duplicates - b2SetBit(awakeContactBitSet, contactIndex); - contactKey = contact->edges[edgeIndex].nextKey; - } + int32_t minRange = 128; + if (count < minRange) + { + b2SolveContactTask(0, count, 0, &context); + } + else + { + void* userSolveTask = world->enqueueTaskFcn(&b2SolveContactTask, count, minRange, &context, world->userTaskContext); + world->finishTaskFcn(userSolveTask, world->userTaskContext); } } -int b2CompareConstraints(const void* ptr1, const void* ptr2) +static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) { - const b2Constraint* c1 = ptr1; - const b2Constraint* c2 = ptr2; + for (int32_t i = 0; i < constraintCount; ++i) + { + b2Constraint* constraint = constraints + i; + b2Contact* contact = constraint->contact; - b2Vec2 point1 = c1->contact->manifold.points[0].point; - b2Vec2 point2 = c2->contact->manifold.points[0].point; + b2Manifold* manifold = &contact->manifold; - if (B2_ABS(point1.y - point2.y) > 5.0f * b2_linearSlop) - { - if (point1.y < point2.y) + for (int32_t j = 0; j < constraint->pointCount; ++j) { - return 1; + manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; + manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; } - - return -1; - } - else if (point1.x < point2.x) - { - return -1; } - - return 1; } -int b2RandomizeConstraints(const void* ptr1, const void* ptr2) +void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { - B2_MAYBE_UNUSED(ptr1); - B2_MAYBE_UNUSED(ptr2); + B2_MAYBE_UNUSED(startIndex); + B2_MAYBE_UNUSED(endIndex); + + b2SolverTaskContext* context = taskContext; + B2_MAYBE_UNUSED(context); - if (rand() & 1) + if (threadIndex == 0) { - return -1; + // Manage and execute tasks + } + else + { + // Execute tasks } - - return 1; } -void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext) +void b2SolveGraph(b2World* world, , const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; + b2Joint* joints = world->joints; - int32_t constraintCount = 0; - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - constraintCount += b2Array(colors[i].contactArray).count; - } - - b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); - int32_t base = 0; - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - colors[i].constraints = constraints + base; - base += b2Array(colors[i].contactArray).count; - } - - B2_ASSERT(base == constraintCount); - - int32_t velocityIterations = stepContext->velocityIterations; - int32_t positionIterations = stepContext->positionIterations; - float h = stepContext->dt; - float inv_h = stepContext->inv_dt; - - b2IntegrateVelocities(world, h); - - for (int32_t i = 0; i < b2_graphColorCount; ++i) + int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; + int32_t awakeBodyCount = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) { - b2InitializePGSConstraints(world, colors + i); + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + awakeBodyCount += island->bodyCount; } - b2WarmStartAll(world, constraints, constraintCount); - - for (int32_t i = 0; i < constraintCount; ++i) + if (awakeBodyCount == 0) { - constraints[i].contact->manifold.constraintIndex = i; + return; } - for (int32_t iter = 0; iter < velocityIterations; ++iter) + b2Body* bodies = world->bodies; + b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "body pointers"); + int32_t index = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) { - for (int32_t i = 0; i < b2_graphColorCount; ++i) + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) { - b2SolveVelocityConstraints(world, colors + i, inv_h); - } - } + b2Body* body = bodies + bodyIndex; + B2_ASSERT(b2ObjectValid(&body->object)); - b2StoreImpulses(constraints, constraintCount); - - b2IntegratePositions(world, h); - - for (int32_t iter = 0; iter < positionIterations; ++iter) - { - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2SolveContactPosition(world, colors + i); + awakeBodies[index++] = body; + bodyIndex = body->islandNext; } } - b2FinalizeSolve(world); - - b2FreeStackItem(world->stackAllocator, constraints); -} + int32_t bodyBlockSize = 1 << 6; + int32_t bodyTaskCount = ((awakeBodyCount - 1) >> 6) + 1; -// inferior joint stability to soft step -void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) -{ - b2Graph* graph = &world->graph; - b2GraphColor* colors = graph->colors; - b2Joint* joints = world->joints; + B2_ASSERT(index == awakeBodyCount); + int32_t perColorTaskCount[b2_graphColorCount]; + + int32_t contactBlockSize = 1 << 5; + int32_t constraintTaskCount = 0; int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { - constraintCount += b2Array(colors[i].contactArray).count; + int32_t count = b2Array(colors[i].contactArray).count; + perColorTaskCount[i] = count > 0 ? ((count - 1) >> 5) + 1 : 0; + constraintTaskCount += perColorTaskCount[i]; + constraintCount += count; } b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); @@ -1574,85 +876,126 @@ void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { - colors[i].constraints = constraints + base; + colors[i].contacts = constraints + base; base += b2Array(colors[i].contactArray).count; } - B2_ASSERT(base == constraintCount); - - int32_t velocityIterations = stepContext->velocityIterations; - int32_t positionIterations = stepContext->positionIterations; - float dt = stepContext->dt; - float inv_dt = stepContext->inv_dt; - float contactHertz = 120.0f; + int32_t storeBlockSize = 1 << 6; + int32_t storeTaskCount = constraintCount > 0 ? ((constraintCount - 1) >> 6) + 1 : 0; + int32_t velIters = stepContext->velocityIterations; + int32_t posIters = stepContext->positionIterations; - b2IntegrateVelocities(world, dt); + // TODO_ERIN joint tasks + int32_t taskCount = bodyTaskCount + constraintTaskCount + velIters * (constraintTaskCount + bodyTaskCount) + + posIters * (constraintTaskCount) + bodyTaskCount + storeTaskCount; + + b2SolverTaskEntry* entries = b2AllocateStackItem(world->stackAllocator, taskCount * sizeof(b2SolverTaskEntry), "task entries"); - for (int32_t i = 0; i < b2_graphColorCount; ++i) + int32_t taskIndex = 0; + for (int32_t i = 0; i < bodyTaskCount; ++i) { - b2PrepareSoftContact(world, colors + i, dt, contactHertz, true); + int32_t startIndex = i * bodyBlockSize; + int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageIntegrateVelocities, 0xFF}; } - int32_t jointCapacity = world->jointPool.capacity; - - for (int32_t i = 0; i < jointCapacity; ++i) + for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2Joint* joint = joints + i; - if (b2ObjectValid(&joint->object) == false) + int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; + int32_t colorTaskCount = perColorTaskCount[i]; + + for (int32_t j = 0; j < colorTaskCount; ++j) { - continue; + int32_t startIndex = j * contactBlockSize; + int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stagePrepareContacts, (uint8_t)i}; } - - b2PrepareJoint(joint, stepContext); } - - for (int32_t iter = 0; iter < velocityIterations; ++iter) + + for (int32_t iter = 0; iter < velIters; ++iter) { - for (int32_t i = 0; i < jointCapacity; ++i) + for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2Joint* joint = joints + i; - if (b2ObjectValid(&joint->object) == false) + int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; + int32_t colorTaskCount = perColorTaskCount[i]; + + for (int32_t j = 0; j < colorTaskCount; ++j) { - continue; + int32_t startIndex = j * contactBlockSize; + int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageSolveContacts, (uint8_t)i}; } - - bool removeOverlap = true; - b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) + + for (int32_t i = 0; i < bodyTaskCount; ++i) { - b2SolveSoftContact(world, colors + i, inv_dt, true); + int32_t startIndex = i * bodyBlockSize; + int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageIntegratePositions, 0xFF}; } } - b2IntegratePositions(world, dt); - - for (int32_t iter = 0; iter < positionIterations; ++iter) + for (int32_t iter = 0; iter < posIters; ++iter) { - for (int32_t i = 0; i < jointCapacity; ++i) + for (int32_t i = 0; i < b2_graphColorCount; ++i) { - b2Joint* joint = joints + i; - if (b2ObjectValid(&joint->object) == false) + int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; + int32_t colorTaskCount = perColorTaskCount[i]; + + for (int32_t j = 0; j < colorTaskCount; ++j) { - continue; + int32_t startIndex = j * contactBlockSize; + int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageCalmContacts, (uint8_t)i}; } - - bool removeOverlap = false; - b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); } + } - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2SolveSoftContact(world, colors + i, inv_dt, false); - } + for (int32_t i = 0; i < bodyTaskCount; ++i) + { + int32_t startIndex = i * bodyBlockSize; + int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageFinalizePositions, 0xFF}; } - b2StoreImpulses(constraints, constraintCount); + for (int32_t i = 0; i < storeTaskCount; ++i) + { + int32_t startIndex = i * storeBlockSize; + int32_t endIndex = B2_MIN(startIndex + storeBlockSize, constraintCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageStoreImpulses, 0xFF}; + } - b2FinalizeSolve(world); + B2_ASSERT(taskIndex == taskCount); + /* + typedef enum b2SolverStage + { + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageFinalizePositions, + b2_stageStoreImpulses + } b2SolverStage; + */ + + b2SolverTaskContext context; + context.world = world; + context.awakeBodies = awakeBodies; + context.graph = graph; + context.taskCount = taskCount; + context.taskEnties = entries; + context.startIndex = 0; + context.endIndex = 0; + context.completionCount = 0; + + b2FreeStackItem(world->stackAllocator, entries); b2FreeStackItem(world->stackAllocator, constraints); + b2FreeStackItem(world->stackAllocator, awakeBodies); } // Threading: @@ -1679,6 +1022,35 @@ void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) b2GraphColor* colors = graph->colors; b2Joint* joints = world->joints; + int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; + int32_t awakeBodyCount = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + awakeBodyCount += island->bodyCount; + } + + b2Body* bodies = world->bodies; + b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "body pointers"); + int32_t index = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = bodies + bodyIndex; + B2_ASSERT(b2ObjectValid(&body->object)); + + awakeBodies[index++] = body; + bodyIndex = body->islandNext; + } + } + + B2_ASSERT(index == awakeBodyCount); + int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -1690,19 +1062,19 @@ void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { - colors[i].constraints = constraints + base; + colors[i].contacts = constraints + base; base += b2Array(colors[i].contactArray).count; } B2_ASSERT(base == constraintCount); // Full step apply gravity - b2IntegrateVelocities(world, stepContext->dt); + b2IntegrateVelocities2(world, awakeBodies, awakeBodyCount, stepContext->dt); // 30 is a bit soft, 60 oscillates too much - //const float contactHertz = 45.0f; - //const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 120.0f; + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 30.0f; for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -1748,13 +1120,16 @@ void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) b2SolveSoftContact(world, colors + i, h, removeOverlap); } - // TODO_ERIN final iteration should update world positions - b2IntegrateDeltaTransform(world, h); + if (substep < substepCount - 1) + { + b2UpdateDeltas(world, awakeBodies, awakeBodyCount, h); + } + else + { + b2UpdatePositions(world, awakeBodies, awakeBodyCount, h); + } } - // TODO_ERIN wasteful since I just looped over bodies in b2IntegrateDeltaTransform - b2UpdatePositions(world); - int32_t positionIterations = stepContext->positionIterations; for (int32_t iter = 0; iter < positionIterations; ++iter) { @@ -1779,69 +1154,6 @@ void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) b2StoreImpulses(constraints, constraintCount); - b2FinalizeSolve(world); - - b2FreeStackItem(world->stackAllocator, constraints); -} - -// Sticky -void b2SolveGraphStickyTGS(b2World* world, const b2StepContext* stepContext) -{ - b2Graph* graph = &world->graph; - b2GraphColor* colors = graph->colors; - - int32_t constraintCount = 0; - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - constraintCount += b2Array(colors[i].contactArray).count; - } - - b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); - int32_t base = 0; - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - colors[i].constraints = constraints + base; - base += b2Array(colors[i].contactArray).count; - } - - B2_ASSERT(base == constraintCount); - - b2IntegrateVelocities(world, stepContext->dt); - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2InitializeStickyConstraints(world, colors + i); - } - - int32_t substepCount = stepContext->velocityIterations; - float h = stepContext->dt / substepCount; - float invh = substepCount / stepContext->dt; - - for (int32_t substep = 0; substep < substepCount; ++substep) - { - // One constraint iteration - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2SolveVelocityConstraintsSticky(world, colors + i, -b2_huge, invh); - } - - b2IntegrateDeltaTransform(world, h); - } - - b2UpdatePositions(world); - - int32_t positionIterations = stepContext->positionIterations; - for (int32_t iter = 0; iter < positionIterations; ++iter) - { - // Solve with no baumgarte and no affect on position - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - b2SolveVelocityConstraintsSticky(world, colors + i, 0.0f, 0.0f); - } - } - - b2FinalizeSolve(world); - b2FreeStackItem(world->stackAllocator, constraints); + b2FreeStackItem(world->stackAllocator, awakeBodies); } diff --git a/src/graph.h b/src/graph.h index b2b15ff2..aececa13 100644 --- a/src/graph.h +++ b/src/graph.h @@ -10,8 +10,10 @@ #include "box2d/dynamic_tree.h" typedef struct b2Contact b2Contact; +typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; +typedef struct b2SolverTaskEntry b2SolverTaskEntry; // TODO_ERIN fixme #define b2_graphColorCount 64 @@ -20,13 +22,18 @@ typedef struct b2GraphColor { b2BitSet bodySet; int32_t* contactArray; - struct b2Constraint* constraints; + int32_t* jointArray; + + // transient + struct b2Constraint* contacts; } b2GraphColor; typedef struct b2Graph { b2GraphColor colors[b2_graphColorCount]; int32_t colorCount; + + b2SolverTaskEntry* solverTaskEntries; } b2Graph; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); @@ -35,6 +42,9 @@ void b2DestroyGraph(b2Graph* graph); void b2AddContactToGraph(b2World* world, b2Contact* contact); void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); +void b2AddJointToGraph(b2World* world, b2Joint* contact); +void b2RemoveJointFromGraph(b2World* world, b2Joint* contact); + void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext); void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext); void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext); diff --git a/src/revolute_joint.c b/src/revolute_joint.c index 01a4f157..be6ddb19 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -86,9 +86,8 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) fixedRotation = true; } - // TODO_ERIN softness experiment - // hertz = 6.0f * subStep/dt - const float hertz = 0.25f * context->velocityIterations * context->inv_dt; + // hertz = 1/4 * substep Hz + const float hertz = (1.0f / 4.0f) * context->velocityIterations * context->inv_dt; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; float h = context->dt; @@ -99,10 +98,6 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) joint->impulseCoefficient = 1.0f / (1.0f + c); joint->massCoefficient = c * joint->impulseCoefficient; - //joint->biasCoefficient = 0.5f; - //joint->impulseCoefficient = 0.0f; - //joint->massCoefficient = 1.0f; - joint->angle = aB - aA - joint->referenceAngle; if (joint->enableLimit == false || fixedRotation) { @@ -121,6 +116,7 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) // Soft step works best when bilateral constraints have no warm starting. joint->impulse = b2Vec2_zero; + //joint->impulse.x = 0.0f; joint->motorImpulse *= dtRatio; joint->lowerImpulse *= dtRatio; joint->upperImpulse *= dtRatio; @@ -133,6 +129,11 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) vB = b2MulAdd(vB, mB, P); wB += iB * (b2Cross(joint->rB, P) + axialImpulse); + + //vA.x = 0.0f; + //wA = 0.0f; + //vB.x = 0.0f; + //wB = 0.0f; } else { @@ -375,6 +376,11 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo vB = b2MulAdd(vB, mB, impulse); wB += iB * b2Cross(rB, impulse); } + + //vA.x = 0.0f; + //wA = 0.0f; + //vB.x = 0.0f; + //wB = 0.0f; bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; diff --git a/src/world.c b/src/world.c index bc7ec552..8bc4db89 100644 --- a/src/world.c +++ b/src/world.c @@ -936,10 +936,7 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); - b2SolveGraphSoftPGS(world, context); - //b2SolveGraphPGS(world, context); - //b2SolveGraphSoftStep(world, context); - //b2SolveGraphStickyTGS(world, context); + b2SolveGraphSoftStep(world, context); b2ValidateNoEnlarged(&world->broadPhase); From a1e48ad4bf3a8d70b57f1f41ec64a7316f60c763 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 10 Sep 2023 23:00:22 -0700 Subject: [PATCH 18/51] wip --- src/graph.c | 356 ++++++++++++++++++++++++++++++++++++++++++++-------- src/graph.h | 3 - 2 files changed, 303 insertions(+), 56 deletions(-) diff --git a/src/graph.c b/src/graph.c index e2e0ba2a..2230b0c6 100644 --- a/src/graph.c +++ b/src/graph.c @@ -17,11 +17,12 @@ #include "box2d/aabb.h" #include +#include //#include #define maxBaumgarteVelocity 3.0f -typedef enum b2SolverStage +typedef enum b2SolverStageType { b2_stageIntegrateVelocities = 0, b2_stagePrepareContacts, @@ -33,17 +34,24 @@ typedef enum b2SolverStage b2_stageCalmContacts, b2_stageFinalizePositions, b2_stageStoreImpulses -} b2SolverStage; +} b2SolverStageType; typedef struct b2SolverTaskEntry { - uint16_t startIndex; - uint16_t endIndex; + int32_t startIndex; + int32_t endIndex; +} b2SolverTaskEntry; - // b2SolverStage - uint8_t stage; +// Each stage must be completed before going to the next stage. +typedef struct b2SolverStage +{ + b2SolverTaskEntry* taskEntries; + _Atomic int taskIndex; + _Atomic int completionCount; + int32_t taskCount; uint8_t color; -} b2SolverTaskEntry; + b2SolverStageType type; +} b2SolverStage; typedef struct b2SolverTaskContext { @@ -51,13 +59,10 @@ typedef struct b2SolverTaskContext b2Body** awakeBodies; b2Graph* graph; - b2SolverTaskEntry* taskEntries; - int32_t taskCount; - int32_t* segmentIndices; + b2SolverStage* stages; + int32_t stageCount; - _Atomic int startIndex; - _Atomic int endIndex; - _Atomic int completionCount; + _Atomic int stageIndex; } b2SolverTaskContext; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) @@ -73,8 +78,6 @@ void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); } - - graph->solverTaskEntries = b2CreateArray(sizeof(b2SolverTaskEntry), 32); } void b2DestroyGraph(b2Graph* graph) @@ -85,8 +88,6 @@ void b2DestroyGraph(b2Graph* graph) b2DestroyBitSet(&color->bodySet); b2DestroyArray(color->contactArray, sizeof(int32_t)); } - - b2DestroyArray(graph->solverTaskEntries, sizeof(b2SolverTaskEntry)); } void b2AddContactToGraph(b2World* world, b2Contact* contact) @@ -797,6 +798,87 @@ static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) } } +void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, bool mainThread) +{ + int32_t taskCount = stage->taskCount; + b2SolverStageType type = stage->type; + + // TODO_ERIN + if (type == b2_stagePrepareJoints) + { + if (mainThread) + { + b2PrepareJointsTask(context); + } + return; + } + + // TODO_ERIN + if (type == b2_stageSolveJoints) + { + if (mainThread) + { + bool removeOverlap = true; + b2SolveJointsTask(context, removeOverlap); + } + return; + } + + // TODO_ERIN + if (type == b2_stageCalmJoints) + { + if (mainThread) + { + bool removeOverlap = false; + b2SolveJointsTask(context, removeOverlap); + } + return; + } + + while (true) + { + int32_t taskIndex = atomic_fetch_add(&stage->taskIndex, 1); + if (taskIndex == taskCount) + { + return; + } + + const b2SolverTaskEntry* entry = stage->taskEntries + taskIndex; + switch (type) + { + case b2_stageIntegrateVelocities: + b2IntegrationVelocitiesTask(entry, context); + break; + + case b2_stagePrepareContacts: + b2PrepareContactsTask(entry, context, stage->color); + break; + + case b2_stageSolveContacts: + b2SolveContactsTask(entry, context, stage->color, true); + break; + + case b2_stageIntegratePositions: + b2IntegrationPositionsTask(entry, context); + break; + + case b2_stageCalmContacts: + b2SolveContactsTask(entry, context, stage->color, false); + break; + + case b2_stageFinalizePositions: + b2FinalizePositionsTask(entry, context); + break; + + case b2_stageStoreImpulses: + b2StoreImpulsesTask(entry, context); + break; + } + + atomic_fetch_add(&stage->completionCount, 1); + } +} + void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { B2_MAYBE_UNUSED(startIndex); @@ -805,21 +887,74 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo b2SolverTaskContext* context = taskContext; B2_MAYBE_UNUSED(context); + b2SolverStage* stages = context->stages; + int32_t stageCount = context->stageCount; + if (threadIndex == 0) { - // Manage and execute tasks + // Main thread + while (true) + { + b2SolverStage* stage = stages + context->stageIndex; + + // Manage and execute tasks + b2ExecuteStage(stage, context, true); + + // Wait for stage completion + int32_t stageTaskCount = stage->count; + while (atomic_load(&stage->completionCount) < stageTaskCount) + { + _mm_pause(); + _mm_pause(); + _mm_pause(); + } + + // Next stage + int32_t stageIndex = atomic_fetch_add(&context->stageIndex, 1); + + if (stageIndex == stageCount - 1) + { + // All done + return; + } + } } - else + + // Worker + while(true) { - // Execute tasks + b2SolverStage* stage = stages + stageIndex; + + // Manage and execute tasks + b2ExecuteStage(stage, context, false); + + // Wait for next stage + while (true) + { + int32_t newStageIndex = atomic_load(&context->stageIndex); + if (newStageIndex > stageIndex) + { + if (newStageIndex == stageCount) + { + return; + } + + stageIndex = newStageIndex; + break; + } + + // spin + _mm_pause(); + _mm_pause(); + _mm_pause(); + } } } -void b2SolveGraph(b2World* world, , const b2StepContext* stepContext) +void b2SolveGraph(b2World* world, const b2StepContext* stepContext) { b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; - b2Joint* joints = world->joints; int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; int32_t awakeBodyCount = 0; @@ -861,12 +996,21 @@ void b2SolveGraph(b2World* world, , const b2StepContext* stepContext) int32_t perColorTaskCount[b2_graphColorCount]; int32_t contactBlockSize = 1 << 5; + int32_t activeColorCount = 0; int32_t constraintTaskCount = 0; int32_t constraintCount = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t count = b2Array(colors[i].contactArray).count; - perColorTaskCount[i] = count > 0 ? ((count - 1) >> 5) + 1 : 0; + if (count > 0) + { + activeColorCount += 1; + perColorTaskCount[i] = ((count - 1) >> 5) + 1; + } + else + { + perColorTaskCount[i] = 0; + } constraintTaskCount += perColorTaskCount[i]; constraintCount += count; } @@ -886,114 +1030,218 @@ void b2SolveGraph(b2World* world, , const b2StepContext* stepContext) int32_t posIters = stepContext->positionIterations; // TODO_ERIN joint tasks + int32_t stageCount = 1 + 1 + activeColorCount + velIters * (1 + activeColorCount + 1) + posIters * (1 + activeColorCount) + 1 + 1; + b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); + int32_t taskCount = bodyTaskCount + constraintTaskCount + velIters * (constraintTaskCount + bodyTaskCount) + posIters * (constraintTaskCount) + bodyTaskCount + storeTaskCount; b2SolverTaskEntry* entries = b2AllocateStackItem(world->stackAllocator, taskCount * sizeof(b2SolverTaskEntry), "task entries"); int32_t taskIndex = 0; + int32_t stageIndex = 0; + + // Integrate velocities task setup + stages[stageIndex].type = b2_stageIntegrateVelocities; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = bodyTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; for (int32_t i = 0; i < bodyTaskCount; ++i) { int32_t startIndex = i * bodyBlockSize; int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageIntegrateVelocities, 0xFF}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } - + stageIndex += 1; + + // Prepare joints + stages[stageIndex].type = b2_stagePrepareJoints; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = 0; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = 0; + stages[stageIndex].currentIndex = 0; + stageIndex += 1; + + // Prepare constraints task setup for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; int32_t colorTaskCount = perColorTaskCount[i]; + if (colorTaskCount == 0) + { + continue; + } + + stages[stageIndex].type = b2_stagePrepareContacts; + stages[stageIndex].color = (uint8_t)i; + stages[stageIndex].count = colorTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; for (int32_t j = 0; j < colorTaskCount; ++j) { int32_t startIndex = j * contactBlockSize; int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stagePrepareContacts, (uint8_t)i}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } + + stageIndex += 1; } + // Velocity iterations task setup for (int32_t iter = 0; iter < velIters; ++iter) { + // Joints + stages[stageIndex].type = b2_stageSolveJoints; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = 0; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = 0; + stages[stageIndex].currentIndex = 0; + stageIndex += 1; + + // Constraint graph for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; int32_t colorTaskCount = perColorTaskCount[i]; + if (colorTaskCount == 0) + { + continue; + } + + stages[stageIndex].type = b2_stageSolveContacts; + stages[stageIndex].color = (uint8_t)i; + stages[stageIndex].count = colorTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; + for (int32_t j = 0; j < colorTaskCount; ++j) { int32_t startIndex = j * contactBlockSize; int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageSolveContacts, (uint8_t)i}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } + + stageIndex += 1; } - + + // Integrate positions + stages[stageIndex].type = b2_stageIntegratePositions; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = bodyTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; + for (int32_t i = 0; i < bodyTaskCount; ++i) { int32_t startIndex = i * bodyBlockSize; int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageIntegratePositions, 0xFF}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } + + stageIndex += 1; } + // Calming iterations task setup for (int32_t iter = 0; iter < posIters; ++iter) { + // Joints + stages[stageIndex].type = b2_stageCalmJoints; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = 0; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = 0; + stages[stageIndex].currentIndex = 0; + stageIndex += 1; + for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; int32_t colorTaskCount = perColorTaskCount[i]; + if (colorTaskCount == 0) + { + continue; + } + + stages[stageIndex].type = b2_stageCalmContacts; + stages[stageIndex].color = (uint8_t)i; + stages[stageIndex].count = colorTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; + for (int32_t j = 0; j < colorTaskCount; ++j) { int32_t startIndex = j * contactBlockSize; int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageCalmContacts, (uint8_t)i}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } + stageIndex += 1; } } + // Prepare finalize position stage + stages[stageIndex].type = b2_stageFinalizePositions; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = bodyTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; + for (int32_t i = 0; i < bodyTaskCount; ++i) { int32_t startIndex = i * bodyBlockSize; int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageFinalizePositions, 0xFF}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } + stageIndex += 1; + + // Prepare store impulses stage + stages[stageIndex].type = b2_stageStoreImpulses; + stages[stageIndex].color = 0xFF; + stages[stageIndex].count = constraintCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].entries = entries + taskIndex; + stages[stageIndex].currentIndex = 0; for (int32_t i = 0; i < storeTaskCount; ++i) { int32_t startIndex = i * storeBlockSize; int32_t endIndex = B2_MIN(startIndex + storeBlockSize, constraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex, b2_stageStoreImpulses, 0xFF}; + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; } + stageIndex += 1; + B2_ASSERT(stageIndex == stageCount); B2_ASSERT(taskIndex == taskCount); - /* - typedef enum b2SolverStage - { - b2_stageIntegrateVelocities = 0, - b2_stagePrepareContacts, - b2_stagePrepareJoints, - b2_stageSolveJoints, - b2_stageSolveContacts, - b2_stageIntegratePositions, - b2_stageCalmJoints, - b2_stageCalmContacts, - b2_stageFinalizePositions, - b2_stageStoreImpulses - } b2SolverStage; - */ - b2SolverTaskContext context; context.world = world; context.awakeBodies = awakeBodies; context.graph = graph; - context.taskCount = taskCount; - context.taskEnties = entries; - context.startIndex = 0; - context.endIndex = 0; - context.completionCount = 0; + context.stageCount = taskCount; + context.stages = stages; + context.stageIndex = 0; + + int32_t workerCount = world->workerCount; + for (int32_t i = 0; i < workerCount; ++i) + { + world->enqueueTaskFcn(b2SolverTask, 1, 1, &context, world->userTaskContext); + } + + world->finishAllTasksFcn(world->userTaskContext); b2FreeStackItem(world->stackAllocator, entries); + b2FreeStackItem(world->stackAllocator, stages); b2FreeStackItem(world->stackAllocator, constraints); b2FreeStackItem(world->stackAllocator, awakeBodies); } @@ -1018,6 +1266,8 @@ void b2SolveGraph(b2World* world, , const b2StepContext* stepContext) // https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) { + b2SolveGraph(world, stepContext); + b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; b2Joint* joints = world->joints; diff --git a/src/graph.h b/src/graph.h index aececa13..831fbe53 100644 --- a/src/graph.h +++ b/src/graph.h @@ -13,7 +13,6 @@ typedef struct b2Contact b2Contact; typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; -typedef struct b2SolverTaskEntry b2SolverTaskEntry; // TODO_ERIN fixme #define b2_graphColorCount 64 @@ -32,8 +31,6 @@ typedef struct b2Graph { b2GraphColor colors[b2_graphColorCount]; int32_t colorCount; - - b2SolverTaskEntry* solverTaskEntries; } b2Graph; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); From 2c100a054ec0bb8f2185362b641ebcc522e0f2ab Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 12 Sep 2023 22:32:40 -0700 Subject: [PATCH 19/51] wip --- samples/collection/sample_joints.cpp | 2 +- src/graph.c | 987 ++++++++++++++++++++------- src/graph.h | 4 +- src/world.c | 11 +- src/world.h | 2 +- 5 files changed, 754 insertions(+), 252 deletions(-) diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index e6824a2d..146bd20b 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -236,7 +236,7 @@ class BallAndChain : public Sample m_maxMotorTorque = 0.0f; -#if 0 +#if 1 { float hx = 0.5f; b2Polygon box = b2MakeBox(hx, 0.125f); diff --git a/src/graph.c b/src/graph.c index 2230b0c6..13c18636 100644 --- a/src/graph.c +++ b/src/graph.c @@ -22,6 +22,41 @@ #define maxBaumgarteVelocity 3.0f +// TODO_ERIN clean this up +typedef struct b2ConstraintPoint +{ + b2Vec2 rA, rB; + b2Vec2 localAnchorA, localAnchorB; + float separation; + float normalImpulse; + float tangentImpulse; + float normalMass; + float tangentMass; + float massCoefficient; + float biasCoefficient; + float impulseCoefficient; +} b2ConstraintPoint; + +typedef struct b2Constraint +{ + b2Contact* contact; + int32_t indexA; + int32_t indexB; + b2ConstraintPoint points[2]; + b2Vec2 normal; + float friction; + int32_t pointCount; +} b2Constraint; + +typedef struct b2GraphContext +{ + b2World* world; + b2GraphColor* color; + float timeStep; + float contactHertz; + bool enableWarmStarting; +} b2GraphContext; + typedef enum b2SolverStageType { b2_stageIntegrateVelocities = 0, @@ -45,12 +80,12 @@ typedef struct b2SolverTaskEntry // Each stage must be completed before going to the next stage. typedef struct b2SolverStage { + b2SolverStageType type; b2SolverTaskEntry* taskEntries; _Atomic int taskIndex; _Atomic int completionCount; int32_t taskCount; uint8_t color; - b2SolverStageType type; } b2SolverStage; typedef struct b2SolverTaskContext @@ -58,6 +93,13 @@ typedef struct b2SolverTaskContext b2World* world; b2Body** awakeBodies; b2Graph* graph; + const b2StepContext* stepContext; + b2Constraint* constraints; + + float timeStep; + float invTimeStep; + float subStep; + float invSubStep; b2SolverStage* stages; int32_t stageCount; @@ -134,146 +176,614 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) continue; } - b2SetBitGrow(&color->bodySet, bodyIndexA); + b2SetBitGrow(&color->bodySet, bodyIndexA); + + contact->colorContactIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + else if (typeB == b2_dynamicBody) + { + // Static contacts never in color 0 + for (int32_t i = 1; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexB); + + contact->colorContactIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX && contact->colorContactIndex != B2_NULL_INDEX); +} + +void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) +{ + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorContactIndex != B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + B2_ASSERT(0 <= contact->colorIndex && contact->colorIndex < b2_graphColorCount); + int32_t bodyIndexA = contact->edges[0].bodyIndex; + int32_t bodyIndexB = contact->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + contact->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + b2ClearBit(&color->bodySet, bodyIndexB); + } + else if (typeA == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + contact->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + } + else if (typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + contact->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorContactIndex = contact->colorContactIndex; + b2Array_RemoveSwap(color->contactArray, colorContactIndex); + if (colorContactIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedContactIndex = color->contactArray[colorContactIndex]; + world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexB); + } + + contact->colorIndex = B2_NULL_INDEX; + contact->colorContactIndex = B2_NULL_INDEX; + contact->flags &= ~b2_contactStatic; +} + +static void b2IntegrateVelocities2(b2World* world, b2Body** bodies, int32_t bodyCount, float h) +{ + b2Vec2 gravity = world->gravity; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = 0; i < bodyCount; ++i) + { + b2Body* body = bodies[i]; + + if (body->type != b2_dynamicBody) + { + continue; + } + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); + + body->linearVelocity = v; + body->angularVelocity = w; + + body->deltaAngle = 0.0f; + body->deltaPosition = b2Vec2_zero; + } +} + +static void b2IntegrateVelocitiesTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context) +{ + b2Vec2 gravity = context->world->gravity; + b2Body** bodies = context->awakeBodies; + int32_t endIndex = entry->endIndex; + float h = context->timeStep; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = entry->startIndex; i < endIndex; ++i) + { + b2Body* body = bodies[i]; + + if (body->type != b2_dynamicBody) + { + continue; + } + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); + + body->linearVelocity = v; + body->angularVelocity = w; + + body->deltaAngle = 0.0f; + body->deltaPosition = b2Vec2_zero; + } +} + +static void b2PrepareJointsTask(b2SolverTaskContext* context) +{ + b2World* world = context->world; + b2Joint* joints = world->joints; + int32_t jointCapacity = world->jointPool.capacity; + const b2StepContext* stepContext = context->stepContext; + + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + b2PrepareJoint(joint, stepContext); + } +} + +static void b2PrepareContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t colorIndex) +{ + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); + + b2World* world = context->world; + b2Graph* graph = context->graph; + b2GraphColor* color = graph->colors + colorIndex; + int32_t* contactIndices = color->contactArray; + b2Contact* contacts = world->contacts; + b2Body* bodies = world->bodies; + + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 30.0f; + + float h = context->timeStep; + bool enableWarmStarting = world->enableWarmStarting; + + int32_t endIndex = entry->endIndex; + + B2_ASSERT(entry->startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + + for (int32_t i = entry->startIndex; i < endIndex; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + + const b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; + + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = contact->edges[0].bodyIndex; + int32_t indexB = contact->edges[1].bodyIndex; + b2Body* bodyA = bodies + indexA; + b2Body* bodyB = bodies + indexB; + + b2Constraint* constraint = color->contacts + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->pointCount = pointCount; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 cA = bodyA->position; + b2Vec2 cB = bodyB->position; + b2Rot qA = b2MakeRot(bodyA->angle); + b2Rot qB = b2MakeRot(bodyB->angle); + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* mp = manifold->points + j; + b2ConstraintPoint* cp = constraint->points + j; + + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; + + cp->rA = b2Sub(mp->point, cA); + cp->rB = b2Sub(mp->point, cB); + cp->localAnchorA = b2InvRotateVector(qA, cp->rA); + cp->localAnchorB = b2InvRotateVector(qB, cp->rB); + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + // float d = 2.0f * zeta * omega / kNormal; + // float k = omega * omega / kNormal; + + // cp->gamma = 1.0f / (h * (d + h * k)); + // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); + // cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); + + cp->separation = mp->separation; + + // cp->bias = h * k * cp->gamma * mp->separation; + // cp->bias = k / (d + h * k) * mp->separation; + // cp->bias = + // (omega * omega / kNormal) / (2 * zeta * omega / kNormal + h * omega * omega / kNormal) * mp->separation; + cp->biasCoefficient = omega / (2.0f * zeta + h * omega); + // cp->gamma = 0.0f; + // cp->bias = (0.2f / h) * mp->separation; + + // TODO_ERIN this can be expanded + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + // cp->normalMass = 1.0f / (kNormal + cp->gamma); + + float c = h * omega * (2.0f * zeta + h * omega); + cp->impulseCoefficient = 1.0f / (1.0f + c); + cp->massCoefficient = c * cp->impulseCoefficient; + + // meff = 1.0f / kNormal * 1.0f / (1.0f + 1.0f / (h * omega * (2 * zeta + h * omega))) + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + // = -meff * mscale * (vn + bias) - imp_scale * impulse + + // Warm start + if (enableWarmStarting) + { + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } + + b2TracyCZoneEnd(prepare_contact); +} + +static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) +{ + b2World* world = context->world; + b2Joint* joints = world->joints; + int32_t jointCapacity = world->jointPool.capacity; + const b2StepContext* stepContext = context->stepContext; + + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + b2SolveJointVelocitySoft(joint, stepContext, useBias); + } +} + +static void b2SolveContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +{ + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); + + b2World* world = context->world; + b2Graph* graph = context->graph; + b2GraphColor* color = graph->colors + colorIndex; + b2Body* bodies = world->bodies; + b2Constraint* constraints = color->contacts; + + float inv_dt = context->invTimeStep; + int32_t endIndex = entry->endIndex; + + B2_ASSERT(entry->startIndex <= endIndex); + B2_ASSERT(entry->startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + + for (int32_t i = entry->startIndex; i < endIndex; ++i) + { + b2Constraint* constraint = constraints + i; + + b2Body* bodyA = bodies + constraint->indexA; + b2Body* bodyB = bodies + constraint->indexB; + + float mA = bodyA->invMass; + float iA = bodyA->invI; + float mB = bodyB->invMass; + float iB = bodyB->invI; + int32_t pointCount = constraint->pointCount; + + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + + const b2Vec2 dpA = bodyA->deltaPosition; + const float daA = bodyA->deltaAngle; + const b2Vec2 dpB = bodyB->deltaPosition; + const float daB = bodyB->deltaAngle; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = cp->massCoefficient; + impulseScale = cp->impulseCoefficient; + } + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - contact->colorContactIndex = b2Array(color->contactArray).count; - b2Array_Push(color->contactArray, contact->object.index); - contact->colorIndex = i; - break; + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } - else if (typeB == b2_dynamicBody) - { - // Static contacts never in color 0 - for (int32_t i = 1; i < b2_graphColorCount; ++i) - { - b2GraphColor* color = graph->colors + i; - if (b2GetBit(&color->bodySet, bodyIndexB)) - { - continue; - } - b2SetBitGrow(&color->bodySet, bodyIndexB); + b2TracyCZoneEnd(solve_contact); +} - contact->colorContactIndex = b2Array(color->contactArray).count; - b2Array_Push(color->contactArray, contact->object.index); - contact->colorIndex = i; - break; - } +static void b2IntegratePositionsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); + + b2Body** bodies = context->awakeBodies; + float h = context->subStep; + + int32_t endIndex = entry->endIndex; + B2_ASSERT(entry->startIndex <= endIndex); + + for (int32_t i = entry->startIndex; i < endIndex; ++i) + { + b2Body* body = bodies[i]; + body->deltaAngle += h * body->angularVelocity; + body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); } - B2_ASSERT(contact->colorIndex != B2_NULL_INDEX && contact->colorContactIndex != B2_NULL_INDEX); + b2TracyCZoneEnd(integrate_positions); } -void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) +static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t threadIndex) { - B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); - B2_ASSERT(contact->colorContactIndex != B2_NULL_INDEX); + b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); - b2Graph* graph = &world->graph; + b2World* world = context->world; + b2Body** bodies = context->awakeBodies; + b2Contact* contacts = world->contacts; + const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - B2_ASSERT(0 <= contact->colorIndex && contact->colorIndex < b2_graphColorCount); - int32_t bodyIndexA = contact->edges[0].bodyIndex; - int32_t bodyIndexB = contact->edges[1].bodyIndex; + b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + int32_t endIndex = entry->endIndex; - b2BodyType typeA = world->bodies[bodyIndexA].type; - b2BodyType typeB = world->bodies[bodyIndexB].type; + B2_ASSERT(entry->startIndex <= endIndex); + B2_ASSERT(entry->startIndex <= world->bodyPool.capacity); + B2_ASSERT(endIndex <= world->bodyPool.capacity); - if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + for (int32_t i = entry->startIndex; i < endIndex; ++i) { - b2GraphColor* color = graph->colors + contact->colorIndex; - B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); - - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) - { - // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; - } + b2Body* body = bodies[i]; - b2ClearBit(&color->bodySet, bodyIndexA); - b2ClearBit(&color->bodySet, bodyIndexB); - } - else if (typeA == b2_dynamicBody) - { - b2GraphColor* color = graph->colors + contact->colorIndex; - B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + body->position = b2Add(body->position, body->deltaPosition); + body->angle += body->deltaAngle; - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) - { - // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; - } + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - b2ClearBit(&color->bodySet, bodyIndexA); - } - else if (typeB == b2_dynamicBody) - { - b2GraphColor* color = graph->colors + contact->colorIndex; - B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + body->force = b2Vec2_zero; + body->torque = 0.0f; - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) + // Update shapes AABBs + int32_t shapeIndex = body->shapeList; + while (shapeIndex != B2_NULL_INDEX) { - // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; - } + b2Shape* shape = world->shapes + shapeIndex; - b2ClearBit(&color->bodySet, bodyIndexB); - } + B2_ASSERT(shape->isFast == false); - contact->colorIndex = B2_NULL_INDEX; - contact->colorContactIndex = B2_NULL_INDEX; - contact->flags &= ~b2_contactStatic; -} + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); -static void b2IntegrateVelocities2(b2World* world, b2Body** bodies, int32_t bodyCount, float h) -{ - b2Vec2 gravity = world->gravity; + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCount; ++i) - { - b2Body* body = bodies[i]; + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } - if (body->type != b2_dynamicBody) + shapeIndex = shape->nextShapeIndex; + } + + // TODO_ERIN legacy + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) { - continue; + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // Bit set to prevent duplicates + b2SetBit(awakeContactBitSet, contactIndex); + contactKey = contact->edges[edgeIndex].nextKey; } + } - float invMass = body->invMass; - float invI = body->invI; + b2TracyCZoneEnd(finalize_positions); +} - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; +static void b2StoreImpulsesTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); - w = w + h * invI * body->torque; + b2Constraint* constraints = context->constraints; + int32_t endIndex = entry->endIndex; - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); - w *= 1.0f / (1.0f + h * body->angularDamping); + for (int32_t i = entry->startIndex; i < endIndex; ++i) + { + b2Constraint* constraint = constraints + i; + b2Contact* contact = constraint->contact; - body->linearVelocity = v; - body->angularVelocity = w; + b2Manifold* manifold = &contact->manifold; - body->deltaAngle = 0.0f; - body->deltaPosition = b2Vec2_zero; + for (int32_t j = 0; j < constraint->pointCount; ++j) + { + manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; + manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; + } } + + b2TracyCZoneEnd(store_impulses); } typedef struct b2BodyContext @@ -422,44 +932,6 @@ static void b2UpdatePositions(b2World* world, b2Body** bodies, int32_t count, fl } } -typedef struct b2ConstraintPoint -{ - b2Vec2 rA, rB; - b2Vec2 rAf, rBf; - b2Vec2 localAnchorA, localAnchorB; - float tangentSeparation; - float separation; - float normalImpulse; - float tangentImpulse; - float normalMass; - float tangentMass; - float gamma; - float massCoefficient; - float biasCoefficient; - float impulseCoefficient; - float baumgarte; - bool frictionValid; -} b2ConstraintPoint; - -typedef struct b2Constraint -{ - b2Contact* contact; - int32_t indexA; - int32_t indexB; - b2ConstraintPoint points[2]; - b2Vec2 normal; - float friction; - int32_t pointCount; -} b2Constraint; - -typedef struct b2GraphContext -{ - b2World* world; - b2GraphColor* color; - float timeStep; - float contactHertz; - bool enableWarmStarting; -} b2GraphContext; static void b2PrepareSoftContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { @@ -553,7 +1025,7 @@ static void b2PrepareSoftContactTask(int32_t startIndex, int32_t endIndex, uint3 // cp->gamma = 1.0f / (h * (d + h * k)); // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); - cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); + // cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); cp->separation = mp->separation; @@ -625,7 +1097,7 @@ typedef struct b2ContactContext b2World* world; b2GraphColor* color; float inv_dt; - bool removeOverlap; + bool useBias; } b2ContactContext; static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) @@ -638,7 +1110,7 @@ static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t th b2Constraint* constraints = contactContext->color->contacts; float inv_dt = contactContext->inv_dt; - bool removeOverlap = contactContext->removeOverlap; + bool useBias = contactContext->useBias; B2_ASSERT(startIndex <= endIndex); B2_ASSERT(startIndex <= b2Array(contactContext->color->contactArray).count); @@ -693,7 +1165,7 @@ static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t th // Speculative (inverse of full time step) bias = s * inv_dt; } - else if (removeOverlap) + else if (useBias) { bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); // bias = cp->biasCoefficient * s; @@ -759,7 +1231,7 @@ static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t th } // inv_dt is full time step inverse -static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool removeOverlap) +static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool useBias) { int32_t count = b2Array(color->contactArray).count; if (count == 0) @@ -767,7 +1239,7 @@ static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt return; } - b2ContactContext context = {world, color, inv_dt, removeOverlap}; + b2ContactContext context = {world, color, inv_dt, useBias}; int32_t minRange = 128; if (count < minRange) @@ -798,39 +1270,39 @@ static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) } } -void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, bool mainThread) +void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t threadIndex) { int32_t taskCount = stage->taskCount; b2SolverStageType type = stage->type; - // TODO_ERIN + // TODO_ERIN only main thread if (type == b2_stagePrepareJoints) { - if (mainThread) + if (threadIndex == 0) { b2PrepareJointsTask(context); } return; } - // TODO_ERIN + // TODO_ERIN only main thread if (type == b2_stageSolveJoints) { - if (mainThread) + if (threadIndex == 0) { - bool removeOverlap = true; - b2SolveJointsTask(context, removeOverlap); + bool useBias = true; + b2SolveJointsTask(context, useBias); } return; } - // TODO_ERIN + // TODO_ERIN only main thread if (type == b2_stageCalmJoints) { - if (mainThread) + if (threadIndex == 0) { - bool removeOverlap = false; - b2SolveJointsTask(context, removeOverlap); + bool useBias = false; + b2SolveJointsTask(context, useBias); } return; } @@ -838,7 +1310,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, bool mai while (true) { int32_t taskIndex = atomic_fetch_add(&stage->taskIndex, 1); - if (taskIndex == taskCount) + if (taskIndex >= taskCount) { return; } @@ -847,7 +1319,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, bool mai switch (type) { case b2_stageIntegrateVelocities: - b2IntegrationVelocitiesTask(entry, context); + b2IntegrateVelocitiesTask(entry, context); break; case b2_stagePrepareContacts: @@ -859,7 +1331,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, bool mai break; case b2_stageIntegratePositions: - b2IntegrationPositionsTask(entry, context); + b2IntegratePositionsTask(entry, context); break; case b2_stageCalmContacts: @@ -867,7 +1339,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, bool mai break; case b2_stageFinalizePositions: - b2FinalizePositionsTask(entry, context); + b2FinalizePositionsTask(entry, context, threadIndex); break; case b2_stageStoreImpulses: @@ -893,15 +1365,17 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo if (threadIndex == 0) { // Main thread + int32_t stageIndex = 0; + while (true) { - b2SolverStage* stage = stages + context->stageIndex; + b2SolverStage* stage = stages + stageIndex; // Manage and execute tasks - b2ExecuteStage(stage, context, true); + b2ExecuteStage(stage, context, threadIndex); // Wait for stage completion - int32_t stageTaskCount = stage->count; + int32_t stageTaskCount = stage->taskCount; while (atomic_load(&stage->completionCount) < stageTaskCount) { _mm_pause(); @@ -910,9 +1384,10 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo } // Next stage - int32_t stageIndex = atomic_fetch_add(&context->stageIndex, 1); + stageIndex += 1; + atomic_store(&context->stageIndex, stageIndex); - if (stageIndex == stageCount - 1) + if (stageIndex == stageCount) { // All done return; @@ -923,10 +1398,12 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo // Worker while(true) { + int32_t stageIndex = atomic_load(&context->stageIndex); + b2SolverStage* stage = stages + stageIndex; // Manage and execute tasks - b2ExecuteStage(stage, context, false); + b2ExecuteStage(stage, context, threadIndex); // Wait for next stage while (true) @@ -1024,13 +1501,21 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) base += b2Array(colors[i].contactArray).count; } + int32_t jointCount = world->jointPool.count; + int32_t storeBlockSize = 1 << 6; int32_t storeTaskCount = constraintCount > 0 ? ((constraintCount - 1) >> 6) + 1 : 0; - int32_t velIters = stepContext->velocityIterations; + int32_t velIters = B2_MAX(1, stepContext->velocityIterations); int32_t posIters = stepContext->positionIterations; // TODO_ERIN joint tasks - int32_t stageCount = 1 + 1 + activeColorCount + velIters * (1 + activeColorCount + 1) + posIters * (1 + activeColorCount) + 1 + 1; + int32_t stageCount = 1; + stageCount += jointCount > 0 ? 1 : 0; + stageCount += activeColorCount; + stageCount += jointCount > 0 ? velIters * (1 + activeColorCount + 1) : velIters * (activeColorCount + 1); + stageCount += jointCount > 0 ? posIters * (1 + activeColorCount) : posIters * activeColorCount; + stageCount += constraintCount > 0 ? 2 : 1; + b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); int32_t taskCount = bodyTaskCount + constraintTaskCount + velIters * (constraintTaskCount + bodyTaskCount) + @@ -1044,10 +1529,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) // Integrate velocities task setup stages[stageIndex].type = b2_stageIntegrateVelocities; stages[stageIndex].color = 0xFF; - stages[stageIndex].count = bodyTaskCount; + stages[stageIndex].taskCount = bodyTaskCount; stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; for (int32_t i = 0; i < bodyTaskCount; ++i) { int32_t startIndex = i * bodyBlockSize; @@ -1057,13 +1542,16 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stageIndex += 1; // Prepare joints - stages[stageIndex].type = b2_stagePrepareJoints; - stages[stageIndex].color = 0xFF; - stages[stageIndex].count = 0; - stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = 0; - stages[stageIndex].currentIndex = 0; - stageIndex += 1; + if (jointCount > 0) + { + stages[stageIndex].type = b2_stagePrepareJoints; + stages[stageIndex].color = 0xFF; + stages[stageIndex].taskCount = 0; + stages[stageIndex].completionCount = 0; + stages[stageIndex].taskEntries = 0; + stages[stageIndex].taskIndex = 0; + stageIndex += 1; + } // Prepare constraints task setup for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -1077,10 +1565,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stages[stageIndex].type = b2_stagePrepareContacts; stages[stageIndex].color = (uint8_t)i; - stages[stageIndex].count = colorTaskCount; + stages[stageIndex].taskCount = colorTaskCount; stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; for (int32_t j = 0; j < colorTaskCount; ++j) { @@ -1096,13 +1584,16 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) for (int32_t iter = 0; iter < velIters; ++iter) { // Joints - stages[stageIndex].type = b2_stageSolveJoints; - stages[stageIndex].color = 0xFF; - stages[stageIndex].count = 0; - stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = 0; - stages[stageIndex].currentIndex = 0; - stageIndex += 1; + if (jointCount > 0) + { + stages[stageIndex].type = b2_stageSolveJoints; + stages[stageIndex].color = 0xFF; + stages[stageIndex].taskCount = 0; + stages[stageIndex].completionCount = 0; + stages[stageIndex].taskEntries = 0; + stages[stageIndex].taskIndex = 0; + stageIndex += 1; + } // Constraint graph for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -1117,10 +1608,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stages[stageIndex].type = b2_stageSolveContacts; stages[stageIndex].color = (uint8_t)i; - stages[stageIndex].count = colorTaskCount; + stages[stageIndex].taskCount = colorTaskCount; stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; for (int32_t j = 0; j < colorTaskCount; ++j) { @@ -1135,10 +1626,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) // Integrate positions stages[stageIndex].type = b2_stageIntegratePositions; stages[stageIndex].color = 0xFF; - stages[stageIndex].count = bodyTaskCount; + stages[stageIndex].taskCount = bodyTaskCount; stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; for (int32_t i = 0; i < bodyTaskCount; ++i) { @@ -1154,13 +1645,16 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) for (int32_t iter = 0; iter < posIters; ++iter) { // Joints - stages[stageIndex].type = b2_stageCalmJoints; - stages[stageIndex].color = 0xFF; - stages[stageIndex].count = 0; - stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = 0; - stages[stageIndex].currentIndex = 0; - stageIndex += 1; + if (jointCount > 0) + { + stages[stageIndex].type = b2_stageCalmJoints; + stages[stageIndex].color = 0xFF; + stages[stageIndex].taskCount = 0; + stages[stageIndex].completionCount = 0; + stages[stageIndex].taskEntries = 0; + stages[stageIndex].taskIndex = 0; + stageIndex += 1; + } for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -1174,10 +1668,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stages[stageIndex].type = b2_stageCalmContacts; stages[stageIndex].color = (uint8_t)i; - stages[stageIndex].count = colorTaskCount; + stages[stageIndex].taskCount = colorTaskCount; stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; for (int32_t j = 0; j < colorTaskCount; ++j) { @@ -1192,10 +1686,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) // Prepare finalize position stage stages[stageIndex].type = b2_stageFinalizePositions; stages[stageIndex].color = 0xFF; - stages[stageIndex].count = bodyTaskCount; + stages[stageIndex].taskCount = bodyTaskCount; stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; for (int32_t i = 0; i < bodyTaskCount; ++i) { @@ -1206,39 +1700,50 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stageIndex += 1; // Prepare store impulses stage - stages[stageIndex].type = b2_stageStoreImpulses; - stages[stageIndex].color = 0xFF; - stages[stageIndex].count = constraintCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].entries = entries + taskIndex; - stages[stageIndex].currentIndex = 0; - - for (int32_t i = 0; i < storeTaskCount; ++i) + if (constraintCount > 0) { - int32_t startIndex = i * storeBlockSize; - int32_t endIndex = B2_MIN(startIndex + storeBlockSize, constraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; + stages[stageIndex].type = b2_stageStoreImpulses; + stages[stageIndex].color = 0xFF; + stages[stageIndex].taskCount = storeTaskCount; + stages[stageIndex].completionCount = 0; + stages[stageIndex].taskEntries = entries + taskIndex; + stages[stageIndex].taskIndex = 0; + + for (int32_t i = 0; i < storeTaskCount; ++i) + { + int32_t startIndex = i * storeBlockSize; + int32_t endIndex = B2_MIN(startIndex + storeBlockSize, constraintCount); + entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; + } + stageIndex += 1; } - stageIndex += 1; B2_ASSERT(stageIndex == stageCount); B2_ASSERT(taskIndex == taskCount); b2SolverTaskContext context; + context.stepContext = stepContext; context.world = world; context.awakeBodies = awakeBodies; context.graph = graph; - context.stageCount = taskCount; + context.constraints = constraints; + context.stageCount = stageCount; context.stages = stages; context.stageIndex = 0; + context.timeStep = stepContext->dt; + context.invTimeStep = stepContext->inv_dt; + context.subStep = context.timeStep / velIters; + context.invSubStep = velIters * stepContext->inv_dt; - int32_t workerCount = world->workerCount; - for (int32_t i = 0; i < workerCount; ++i) - { - world->enqueueTaskFcn(b2SolverTask, 1, 1, &context, world->userTaskContext); - } + b2SolverTask(0, 0, 0, &context); - world->finishAllTasksFcn(world->userTaskContext); + //int32_t workerCount = world->workerCount; + //for (int32_t i = 0; i < workerCount; ++i) + //{ + // world->enqueueTaskFcn(b2SolverTask, 1, 1, &context, world->userTaskContext); + //} + + //world->finishAllTasksFcn(world->userTaskContext); b2FreeStackItem(world->stackAllocator, entries); b2FreeStackItem(world->stackAllocator, stages); @@ -1266,8 +1771,6 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) // https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) { - b2SolveGraph(world, stepContext); - b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; b2Joint* joints = world->joints; @@ -1360,14 +1863,14 @@ void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) continue; } - bool removeOverlap = true; - b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); + bool useBias = true; + b2SolveJointVelocitySoft(joint, stepContext, useBias); } for (int32_t i = 0; i < b2_graphColorCount; ++i) { - bool removeOverlap = true; - b2SolveSoftContact(world, colors + i, h, removeOverlap); + bool useBias = true; + b2SolveSoftContact(world, colors + i, h, useBias); } if (substep < substepCount - 1) @@ -1391,14 +1894,14 @@ void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) continue; } - bool removeOverlap = false; - b2SolveJointVelocitySoft(joint, stepContext, removeOverlap); + bool useBias = false; + b2SolveJointVelocitySoft(joint, stepContext, useBias); } for (int32_t i = 0; i < b2_graphColorCount; ++i) { - bool removeOverlap = false; - b2SolveSoftContact(world, colors + i, h, removeOverlap); + bool useBias = false; + b2SolveSoftContact(world, colors + i, h, useBias); } } diff --git a/src/graph.h b/src/graph.h index 831fbe53..344ed56b 100644 --- a/src/graph.h +++ b/src/graph.h @@ -42,7 +42,5 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); void b2AddJointToGraph(b2World* world, b2Joint* contact); void b2RemoveJointFromGraph(b2World* world, b2Joint* contact); -void b2SolveGraphPGS(b2World* world, const b2StepContext* stepContext); -void b2SolveGraphSoftPGS(b2World* world, const b2StepContext* stepContext); +void b2SolveGraph(b2World* world, const b2StepContext* stepContext); void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext); -void b2SolveGraphStickyTGS(b2World* world, const b2StepContext* stepContext); diff --git a/src/world.c b/src/world.c index 8bc4db89..012e5312 100644 --- a/src/world.c +++ b/src/world.c @@ -135,7 +135,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->inv_dt0 = 0.0f; world->enableSleep = true; world->locked = false; - world->warmStarting = true; + world->enableWarmStarting = true; world->enableContinuous = true; world->profile = b2_emptyProfile; @@ -936,7 +936,8 @@ static void b2Solve2(b2World* world, b2StepContext* context) b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); - b2SolveGraphSoftStep(world, context); + b2SolveGraph(world, context); + //b2SolveGraphSoftStep(world, context); b2ValidateNoEnlarged(&world->broadPhase); @@ -1094,7 +1095,7 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, context.dtRatio = world->inv_dt0 * timeStep; context.restitutionThreshold = world->restitutionThreshold; - context.enableWarmStarting = world->warmStarting; + context.enableWarmStarting = world->enableWarmStarting; context.bodies = world->bodies; context.bodyCapacity = world->bodyPool.capacity; @@ -1181,7 +1182,7 @@ void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations context.dtRatio = world->inv_dt0 * timeStep; context.restitutionThreshold = world->restitutionThreshold; - context.enableWarmStarting = world->warmStarting; + context.enableWarmStarting = world->enableWarmStarting; context.bodies = world->bodies; context.bodyCapacity = world->bodyPool.capacity; @@ -1482,7 +1483,7 @@ void b2World_EnableWarmStarting(b2WorldId worldId, bool flag) return; } - world->warmStarting = flag; + world->enableWarmStarting = flag; } void b2World_EnableContinuo(b2WorldId worldId, bool flag) diff --git a/src/world.h b/src/world.h index 00ed2e77..c968a272 100644 --- a/src/world.h +++ b/src/world.h @@ -112,7 +112,7 @@ typedef struct b2World bool enableSleep; bool locked; - bool warmStarting; + bool enableWarmStarting; bool enableContinuous; } b2World; From ee95f57b650ef119c46d82f46c436f953aed7676 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 12 Sep 2023 23:36:46 -0700 Subject: [PATCH 20/51] parallel --- samples/collection/benchmark_pyramid.cpp | 6 +-- src/graph.c | 54 +++++++++++++++--------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 41b82fc0..9d5d6eee 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,9 +21,9 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 60; - m_rowCount = g_sampleDebug ? 1 : 1; - m_columnCount = g_sampleDebug ? 1 : 1; + m_baseCount = 10; + m_rowCount = g_sampleDebug ? 1 : 32; + m_columnCount = g_sampleDebug ? 1 : 32; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/src/graph.c b/src/graph.c index 13c18636..ce62bea2 100644 --- a/src/graph.c +++ b/src/graph.c @@ -107,6 +107,12 @@ typedef struct b2SolverTaskContext _Atomic int stageIndex; } b2SolverTaskContext; +typedef struct b2WorkerContext +{ + b2SolverTaskContext* context; + int32_t workerIndex; +} b2WorkerContext; + void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) { bodyCapacity = B2_MAX(bodyCapacity, 8); @@ -693,7 +699,7 @@ static void b2IntegratePositionsTask(const b2SolverTaskEntry* entry, b2SolverTas b2TracyCZoneEnd(integrate_positions); } -static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t threadIndex) +static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t workerIndex) { b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); @@ -702,8 +708,8 @@ static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTask b2Contact* contacts = world->contacts; const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + b2BitSet* awakeContactBitSet = &world->taskContextArray[workerIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[workerIndex].shapeBitSet; int32_t endIndex = entry->endIndex; B2_ASSERT(entry->startIndex <= endIndex); @@ -1270,7 +1276,7 @@ static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) } } -void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t threadIndex) +void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t workerIndex) { int32_t taskCount = stage->taskCount; b2SolverStageType type = stage->type; @@ -1278,7 +1284,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t // TODO_ERIN only main thread if (type == b2_stagePrepareJoints) { - if (threadIndex == 0) + if (workerIndex == 0) { b2PrepareJointsTask(context); } @@ -1288,7 +1294,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t // TODO_ERIN only main thread if (type == b2_stageSolveJoints) { - if (threadIndex == 0) + if (workerIndex == 0) { bool useBias = true; b2SolveJointsTask(context, useBias); @@ -1299,7 +1305,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t // TODO_ERIN only main thread if (type == b2_stageCalmJoints) { - if (threadIndex == 0) + if (workerIndex == 0) { bool useBias = false; b2SolveJointsTask(context, useBias); @@ -1339,7 +1345,7 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t break; case b2_stageFinalizePositions: - b2FinalizePositionsTask(entry, context, threadIndex); + b2FinalizePositionsTask(entry, context, workerIndex); break; case b2_stageStoreImpulses: @@ -1355,14 +1361,16 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo { B2_MAYBE_UNUSED(startIndex); B2_MAYBE_UNUSED(endIndex); + B2_MAYBE_UNUSED(threadIndex); - b2SolverTaskContext* context = taskContext; - B2_MAYBE_UNUSED(context); + b2WorkerContext* workerContext = taskContext; + int32_t workerIndex = workerContext->workerIndex; + b2SolverTaskContext* context = workerContext->context; b2SolverStage* stages = context->stages; int32_t stageCount = context->stageCount; - if (threadIndex == 0) + if (workerIndex == 0) { // Main thread int32_t stageIndex = 0; @@ -1372,7 +1380,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo b2SolverStage* stage = stages + stageIndex; // Manage and execute tasks - b2ExecuteStage(stage, context, threadIndex); + b2ExecuteStage(stage, context, workerIndex); // Wait for stage completion int32_t stageTaskCount = stage->taskCount; @@ -1399,11 +1407,15 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo while(true) { int32_t stageIndex = atomic_load(&context->stageIndex); + if (stageIndex == stageCount) + { + return; + } b2SolverStage* stage = stages + stageIndex; // Manage and execute tasks - b2ExecuteStage(stage, context, threadIndex); + b2ExecuteStage(stage, context, workerIndex); // Wait for next stage while (true) @@ -1735,15 +1747,17 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) context.subStep = context.timeStep / velIters; context.invSubStep = velIters * stepContext->inv_dt; - b2SolverTask(0, 0, 0, &context); + b2WorkerContext workerContext[16]; - //int32_t workerCount = world->workerCount; - //for (int32_t i = 0; i < workerCount; ++i) - //{ - // world->enqueueTaskFcn(b2SolverTask, 1, 1, &context, world->userTaskContext); - //} + int32_t workerCount = B2_MIN(16, world->workerCount); + for (int32_t i = 0; i < workerCount; ++i) + { + workerContext[i].context = &context; + workerContext[i].workerIndex = i; + world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + } - //world->finishAllTasksFcn(world->userTaskContext); + world->finishAllTasksFcn(world->userTaskContext); b2FreeStackItem(world->stackAllocator, entries); b2FreeStackItem(world->stackAllocator, stages); From 8b484aefe8e93bc0d4d9e83474ea39966d288cf7 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 17 Sep 2023 23:21:36 -0700 Subject: [PATCH 21/51] wip --- src/graph.c | 496 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 342 insertions(+), 154 deletions(-) diff --git a/src/graph.c b/src/graph.c index ce62bea2..1a18b099 100644 --- a/src/graph.c +++ b/src/graph.c @@ -16,9 +16,12 @@ #include "box2d/aabb.h" +#include #include #include -//#include + +// #include +// #include #define maxBaumgarteVelocity 3.0f @@ -60,41 +63,51 @@ typedef struct b2GraphContext typedef enum b2SolverStageType { b2_stageIntegrateVelocities = 0, - b2_stagePrepareContacts, b2_stagePrepareJoints, + b2_stagePrepareContacts, b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, + b2_stageFinalizePositions, b2_stageCalmJoints, b2_stageCalmContacts, - b2_stageFinalizePositions, b2_stageStoreImpulses } b2SolverStageType; -typedef struct b2SolverTaskEntry +// Each block of work has a sync index that gets incremented when a worker claims the block. This ensures only a single worker claims a +// block, yet lets work be distributed dynamically across multiple workers (work stealing). This also reduces contention on a single block +// index atomic. For non-iterative stages the sync index is simply set to one. For iterative stages (solver iteration) the same block of +// work is executed once per iteration and the atomic sync index is shared across iterations, so it increases monotonically. +typedef struct { int32_t startIndex; int32_t endIndex; -} b2SolverTaskEntry; + _Atomic int syncIndex; +} b2SolverBlock; // Each stage must be completed before going to the next stage. -typedef struct b2SolverStage +// Non-iterative stages use a stage instance once while iterative stages re-use the same instance each iteration. +typedef struct { b2SolverStageType type; - b2SolverTaskEntry* taskEntries; - _Atomic int taskIndex; + b2SolverBlock* blocks; + int32_t blockCount; + //int32_t colorIndex; _Atomic int completionCount; - int32_t taskCount; - uint8_t color; } b2SolverStage; -typedef struct b2SolverTaskContext +typedef struct { b2World* world; b2Body** awakeBodies; b2Graph* graph; + const b2StepContext* stepContext; b2Constraint* constraints; + int32_t activeColorCount; + int32_t velocityIterations; + int32_t calmIterations; + int32_t workerCount; float timeStep; float invTimeStep; @@ -104,7 +117,8 @@ typedef struct b2SolverTaskContext b2SolverStage* stages; int32_t stageCount; - _Atomic int stageIndex; + // sync index (16-bits) | stage type (16-bits) + _Atomic unsigned int syncBits; } b2SolverTaskContext; typedef struct b2WorkerContext @@ -324,15 +338,14 @@ static void b2IntegrateVelocities2(b2World* world, b2Body** bodies, int32_t body } } -static void b2IntegrateVelocitiesTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context) +static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2Vec2 gravity = context->world->gravity; b2Body** bodies = context->awakeBodies; - int32_t endIndex = entry->endIndex; float h = context->timeStep; // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = entry->startIndex; i < endIndex; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { b2Body* body = bodies[i]; @@ -388,7 +401,7 @@ static void b2PrepareJointsTask(b2SolverTaskContext* context) } } -static void b2PrepareContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t colorIndex) +static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); @@ -407,12 +420,10 @@ static void b2PrepareContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskCo float h = context->timeStep; bool enableWarmStarting = world->enableWarmStarting; - int32_t endIndex = entry->endIndex; - - B2_ASSERT(entry->startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - for (int32_t i = entry->startIndex; i < endIndex; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { b2Contact* contact = contacts + contactIndices[i]; @@ -547,7 +558,7 @@ static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) } } -static void b2SolveContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); @@ -558,13 +569,12 @@ static void b2SolveContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskCont b2Constraint* constraints = color->contacts; float inv_dt = context->invTimeStep; - int32_t endIndex = entry->endIndex; - B2_ASSERT(entry->startIndex <= endIndex); - B2_ASSERT(entry->startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - for (int32_t i = entry->startIndex; i < endIndex; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { b2Constraint* constraint = constraints + i; @@ -679,17 +689,16 @@ static void b2SolveContactsTask(const b2SolverTaskEntry* entry, b2SolverTaskCont b2TracyCZoneEnd(solve_contact); } -static void b2IntegratePositionsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context) +static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); b2Body** bodies = context->awakeBodies; float h = context->subStep; - int32_t endIndex = entry->endIndex; - B2_ASSERT(entry->startIndex <= endIndex); + B2_ASSERT(startIndex <= endIndex); - for (int32_t i = entry->startIndex; i < endIndex; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { b2Body* body = bodies[i]; body->deltaAngle += h * body->angularVelocity; @@ -699,7 +708,7 @@ static void b2IntegratePositionsTask(const b2SolverTaskEntry* entry, b2SolverTas b2TracyCZoneEnd(integrate_positions); } -static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context, int32_t workerIndex) +static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, uint32_t threadIndex) { b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); @@ -708,15 +717,14 @@ static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTask b2Contact* contacts = world->contacts; const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - b2BitSet* awakeContactBitSet = &world->taskContextArray[workerIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[workerIndex].shapeBitSet; - int32_t endIndex = entry->endIndex; + b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; - B2_ASSERT(entry->startIndex <= endIndex); - B2_ASSERT(entry->startIndex <= world->bodyPool.capacity); + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= world->bodyPool.capacity); B2_ASSERT(endIndex <= world->bodyPool.capacity); - for (int32_t i = entry->startIndex; i < endIndex; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { b2Body* body = bodies[i]; @@ -768,14 +776,13 @@ static void b2FinalizePositionsTask(const b2SolverTaskEntry* entry, b2SolverTask b2TracyCZoneEnd(finalize_positions); } -static void b2StoreImpulsesTask(const b2SolverTaskEntry* entry, b2SolverTaskContext* context) +static void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); b2Constraint* constraints = context->constraints; - int32_t endIndex = entry->endIndex; - for (int32_t i = entry->startIndex; i < endIndex; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { b2Constraint* constraint = constraints + i; b2Contact* contact = constraint->contact; @@ -938,7 +945,6 @@ static void b2UpdatePositions(b2World* world, b2Body** bodies, int32_t count, fl } } - static void b2PrepareSoftContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { B2_MAYBE_UNUSED(threadIndex); @@ -1276,9 +1282,107 @@ static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) } } -void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t workerIndex) +static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, uint32_t threadIndex) +{ + b2SolverStageType type = stage->type; + + switch (type) + { + case b2_stageIntegrateVelocities: + b2IntegrateVelocitiesTask(startIndex, endIndex, context); + break; + + case b2_stagePrepareContacts: + b2PrepareContactsTask(startIndex, endIndex, context, stage->colorIndex); + break; + + case b2_stageSolveContacts: + b2SolveContactsTask(startIndex, endIndex, context, stage->colorIndex, true); + break; + + case b2_stageIntegratePositions: + b2IntegratePositionsTask(startIndex, endIndex, context); + break; + + case b2_stageFinalizePositions: + b2FinalizePositionsTask(startIndex, endIndex, context, threadIndex); + break; + + case b2_stageCalmContacts: + b2SolveContactsTask(startIndex, endIndex, context, stage->colorIndex, false); + break; + + case b2_stageStoreImpulses: + b2StoreImpulsesTask(startIndex, endIndex, context); + break; + } +} + +static inline int32_t GetWorkerBlockIndex(int32_t workerIndex, int32_t blockCount, int32_t workerCount) +{ + if (blockCount <= workerCount) + { + return workerIndex < blockCount ? workerIndex : B2_NULL_INDEX; + } + + int32_t blocksPerWorker = blockCount / workerCount; + int32_t remainder = blockCount - blocksPerWorker * workerCount; + return blocksPerWorker * workerIndex + B2_MIN(remainder, workerIndex); +} + +static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, int32_t workerIndex, int previousSyncIndex, int syncIndex, + uint32_t threadIndex) { - int32_t taskCount = stage->taskCount; + int32_t completedCount = 0; + b2SolverBlock* blocks = stage->blocks; + int32_t blockCount = stage->blockCount; + + int32_t colorIndex = (stageBits >> 8) & 0xFF; + int32_t iterIndex = (stageBits >> 16) & 0xFF; + int32_t expectedSyncIndex = previousSyncIndex; + + int32_t startIndex = GetWorkerBlockIndex(workerIndex, blockCount, context->workerCount); + int32_t blockIndex = startIndex; + + // Caution: this can change expectedSyncIndex + while (atomic_compare_exchange_strong(&blocks[blockIndex].syncIndex, &expectedSyncIndex, syncIndex) == true) + { + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, threadIndex); + + completedCount += 1; + blockIndex += 1; + if (blockIndex > blockCount) + { + // Keep looking for work + blockIndex = 0; + } + + expectedSyncIndex = previousSyncIndex; + } + + // Search backwards for blocks + blockIndex = startIndex - 1; + while (true) + { + if (blockIndex < 0) + { + blockIndex = blockCount - 1; + } + + expectedSyncIndex = previousSyncIndex; + + // Caution: this can change expectedSyncIndex + if (atomic_compare_exchange_strong(&blocks[blockIndex].syncIndex, &expectedSyncIndex, syncIndex) == false) + { + break; + } + + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, threadIndex); + completedCount += 1; + blockIndex -= 1; + } + +#if 0 b2SolverStageType type = stage->type; // TODO_ERIN only main thread @@ -1312,131 +1416,194 @@ void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t } return; } +#endif +} - while (true) +static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* context, int32_t workerIndex, uint32_t syncBits) +{ + int32_t blockCount = stage->blockCount; + if (blockCount == 0) { - int32_t taskIndex = atomic_fetch_add(&stage->taskIndex, 1); - if (taskIndex >= taskCount) - { - return; - } - - const b2SolverTaskEntry* entry = stage->taskEntries + taskIndex; - switch (type) - { - case b2_stageIntegrateVelocities: - b2IntegrateVelocitiesTask(entry, context); - break; - - case b2_stagePrepareContacts: - b2PrepareContactsTask(entry, context, stage->color); - break; - - case b2_stageSolveContacts: - b2SolveContactsTask(entry, context, stage->color, true); - break; + return; + } - case b2_stageIntegratePositions: - b2IntegratePositionsTask(entry, context); - break; + if (blockCount == 1) + { + b2ExecuteBlock(stage, context, stage->blocks[0].startIndex, stage->blocks[0].endIndex, 0); + } + else + { + atomic_store(&context->syncBits, syncBits); - case b2_stageCalmContacts: - b2SolveContactsTask(entry, context, stage->color, false); - break; + int syncIndex = (syncBits >> 16) & 0xFFFF; + B2_ASSERT(syncIndex > 0); + int previousSyncIndex = syncIndex - 1; - case b2_stageFinalizePositions: - b2FinalizePositionsTask(entry, context, workerIndex); - break; + b2ExecuteStage(stage, context, workerIndex, previousSyncIndex, syncIndex, 0); - case b2_stageStoreImpulses: - b2StoreImpulsesTask(entry, context); - break; + while (atomic_load(&stage->completionCount) != blockCount) + { + _mm_pause(); } - atomic_fetch_add(&stage->completionCount, 1); + atomic_store(&stage->completionCount, 0); } } +/* +b2_stageIntegrateVelocities = 0, +b2_stagePrepareJoints, +b2_stagePrepareContacts, +b2_stageSolveJoints, +b2_stageSolveContacts, +b2_stageIntegratePositions, +b2_stageFinalizePositions, +b2_stageCalmJoints, +b2_stageCalmContacts, +b2_stageStoreImpulses +*/ + void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { B2_MAYBE_UNUSED(startIndex); B2_MAYBE_UNUSED(endIndex); - B2_MAYBE_UNUSED(threadIndex); b2WorkerContext* workerContext = taskContext; int32_t workerIndex = workerContext->workerIndex; - b2SolverTaskContext* context = workerContext->context; + b2Graph* graph = context->graph; + int32_t activeColorCount = context->activeColorCount; + b2SolverStage* stages = context->stages; int32_t stageCount = context->stageCount; - if (workerIndex == 0) + // TODO_ERIN ? + int32_t maximumSyncStagesPerSubstep = 2 + 2 * activeColorCount; + + if (threadIndex == 0) { - // Main thread - int32_t stageIndex = 0; + // Stages are re-used for loops so that I don't need more stages for large iteration counts. + // The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks. + // The stage index and sync indices are combined in to sync bits for atomic synchronization. + // The workers need to compute the previous sync index for a given stage so that CAS works correctly. This + // setup makes this easy to do. + + uint32_t bodySyncIndex = 1; + uint32_t stageIndex = 0; + uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageIntegrateVelocities); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, 0, syncBits); + bodySyncIndex += 1; + + // TODO_ERIN single threaded + B2_ASSERT(stages[1].type == b2_stagePrepareJoints); + b2PrepareJointsTask(context); + + uint32_t baseStageIndex = 2; + uint32_t graphSyncIndex = 1; + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + stageIndex = baseStageIndex + colorIndex; + syncBits = (graphSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + } + graphSyncIndex += 1; - while (true) + baseStageIndex += 2 + colorIndex; + int32_t velocityIterations = context->velocityIterations; + for (int32_t i = 0; i < velocityIterations; ++i) { - b2SolverStage* stage = stages + stageIndex; + // stage index restarted each iteration + stageIndex = baseStageIndex; - // Manage and execute tasks - b2ExecuteStage(stage, context, workerIndex); + B2_ASSERT(stages[stageIndex].type == b2_stageSolveJoints); + b2SolveJointsTask(context, true); + stageIndex += 1; - // Wait for stage completion - int32_t stageTaskCount = stage->taskCount; - while (atomic_load(&stage->completionCount) < stageTaskCount) + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { - _mm_pause(); - _mm_pause(); - _mm_pause(); + syncBits = (graphSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageSolveContacts); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + stageIndex += 1; } + graphSyncIndex += 1; + + B2_ASSERT(stages[stageIndex].type == b2_stageIntegratePositions); + syncBits = (bodySyncIndex << 16) | stageIndex; + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + bodySyncIndex += 1; + } - // Next stage + baseStageIndex += 1 + activeColorCount + 1; + + stageIndex = baseStageIndex; + syncBits = (bodySyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageFinalizePositions); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + baseStageIndex += 1; + + int32_t calmIterations = context->calmIterations; + for (int32_t i = 0; i < calmIterations; ++i) + { + // stage index restarted each iteration + stageIndex = baseStageIndex; + + B2_ASSERT(stages[stageIndex].type == b2_stageCalmJoints); + b2SolveJointsTask(context, false); stageIndex += 1; - atomic_store(&context->stageIndex, stageIndex); - if (stageIndex == stageCount) + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { - // All done - return; + syncBits = (graphSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageCalmContacts); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + stageIndex += 1; } + graphSyncIndex += 1; } + + baseStageIndex += 1 + activeColorCount; + stageIndex = baseStageIndex; + + uint32_t constraintSyncIndex = 1; + syncBits = (constraintSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + + return; } // Worker - while(true) + uint32_t lastStageBits = 0; + + while (true) { - int32_t stageIndex = atomic_load(&context->stageIndex); - if (stageIndex == stageCount) + // Spin until main thread bumps the sync index + uint32_t syncBits = atomic_load(&context->syncBits); + while (syncBits == lastStageBits) { - return; + _mm_pause(); + syncBits = atomic_load(&context->syncBits); } - b2SolverStage* stage = stages + stageIndex; - - // Manage and execute tasks - b2ExecuteStage(stage, context, workerIndex); - - // Wait for next stage - while (true) + if (syncBits == UINT_MAX) { - int32_t newStageIndex = atomic_load(&context->stageIndex); - if (newStageIndex > stageIndex) - { - if (newStageIndex == stageCount) - { - return; - } + // sentinel hit + break; + } - stageIndex = newStageIndex; - break; - } + uint32_t stageIndex = syncBits & 0xFFFF; + B2_ASSERT(stageIndex < context->stageCount); - // spin - _mm_pause(); - _mm_pause(); - _mm_pause(); - } + uint32_t syncIndex = (syncBits >> 16) & 0xFFFF; + B2_ASSERT(syncIndex > 0); + + int32_t previousSyncIndex = syncIndex - 1; + + b2SolverStage* stage = stages + stageIndex; + b2ExecuteStage(stage, context, workerIndex, previousSyncIndex, syncIndex, threadIndex); } } @@ -1476,32 +1643,30 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) bodyIndex = body->islandNext; } } + B2_ASSERT(index == awakeBodyCount); int32_t bodyBlockSize = 1 << 6; - int32_t bodyTaskCount = ((awakeBodyCount - 1) >> 6) + 1; + int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 6) + 1; - B2_ASSERT(index == awakeBodyCount); + int32_t colorBlockCounts[b2_graphColorCount]; + int32_t activeColorIndices[b2_graphColorCount]; - int32_t perColorTaskCount[b2_graphColorCount]; - - int32_t contactBlockSize = 1 << 5; + int32_t graphBlockSize = 1 << 5; int32_t activeColorCount = 0; - int32_t constraintTaskCount = 0; + int32_t graphBlockCount = 0; int32_t constraintCount = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t count = b2Array(colors[i].contactArray).count; if (count > 0) { - activeColorCount += 1; - perColorTaskCount[i] = ((count - 1) >> 5) + 1; + activeColorIndices[activeColorCount++] = i; + int32_t blockCount = ((count - 1) >> 5) + 1; + colorBlockCounts[i] = blockCount; + graphBlockCount += blockCount; + constraintCount += count; } - else - { - perColorTaskCount[i] = 0; - } - constraintTaskCount += perColorTaskCount[i]; - constraintCount += count; } b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); @@ -1516,31 +1681,52 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) int32_t jointCount = world->jointPool.count; int32_t storeBlockSize = 1 << 6; - int32_t storeTaskCount = constraintCount > 0 ? ((constraintCount - 1) >> 6) + 1 : 0; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 6) + 1 : 0; int32_t velIters = B2_MAX(1, stepContext->velocityIterations); - int32_t posIters = stepContext->positionIterations; + int32_t calmIters = stepContext->positionIterations; + + /* + b2_stageIntegrateVelocities = 0, + b2_stagePrepareJoints, + b2_stagePrepareContacts, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageFinalizePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageStoreImpulses + */ // TODO_ERIN joint tasks - int32_t stageCount = 1; - stageCount += jointCount > 0 ? 1 : 0; + int32_t stageCount = 0; + + // b2_stageIntegrateVelocities + stageCount += 1; + // b2_stagePrepareJoints + stageCount += 1; + // b2_stagePrepareContacts stageCount += activeColorCount; - stageCount += jointCount > 0 ? velIters * (1 + activeColorCount + 1) : velIters * (activeColorCount + 1); - stageCount += jointCount > 0 ? posIters * (1 + activeColorCount) : posIters * activeColorCount; - stageCount += constraintCount > 0 ? 2 : 1; + // b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions + stageCount += 1 + activeColorCount + 1; + // b2_stageFinalizePositions + stageCount += 1; + // b2_stageCalmJoints, b2_stageCalmContacts + stageCount += 1 + activeColorCount; + // b2_stageStoreImpulses + stageCount += 1; b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); - int32_t taskCount = bodyTaskCount + constraintTaskCount + velIters * (constraintTaskCount + bodyTaskCount) + - posIters * (constraintTaskCount) + bodyTaskCount + storeTaskCount; - - b2SolverTaskEntry* entries = b2AllocateStackItem(world->stackAllocator, taskCount * sizeof(b2SolverTaskEntry), "task entries"); + b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); + b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); + b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); int32_t taskIndex = 0; int32_t stageIndex = 0; // Integrate velocities task setup stages[stageIndex].type = b2_stageIntegrateVelocities; - stages[stageIndex].color = 0xFF; stages[stageIndex].taskCount = bodyTaskCount; stages[stageIndex].completionCount = 0; stages[stageIndex].taskEntries = entries + taskIndex; @@ -1591,7 +1777,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stageIndex += 1; } - + // Velocity iterations task setup for (int32_t iter = 0; iter < velIters; ++iter) { @@ -1634,7 +1820,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stageIndex += 1; } - + // Integrate positions stages[stageIndex].type = b2_stageIntegratePositions; stages[stageIndex].color = 0xFF; @@ -1652,7 +1838,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stageIndex += 1; } - + // Calming iterations task setup for (int32_t iter = 0; iter < posIters; ++iter) { @@ -1733,12 +1919,17 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) B2_ASSERT(stageIndex == stageCount); B2_ASSERT(taskIndex == taskCount); + // TODO_ERIN increase? + int32_t workerCount = B2_MIN(16, world->workerCount); + b2WorkerContext workerContext[16]; + b2SolverTaskContext context; context.stepContext = stepContext; context.world = world; context.awakeBodies = awakeBodies; context.graph = graph; context.constraints = constraints; + context.workerCount = workerCount; context.stageCount = stageCount; context.stages = stages; context.stageIndex = 0; @@ -1747,9 +1938,6 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) context.subStep = context.timeStep / velIters; context.invSubStep = velIters * stepContext->inv_dt; - b2WorkerContext workerContext[16]; - - int32_t workerCount = B2_MIN(16, world->workerCount); for (int32_t i = 0; i < workerCount; ++i) { workerContext[i].context = &context; From a86f78ef2cc1a5a8bdf7b7ee9272d217f66bad7c Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Mon, 18 Sep 2023 16:40:08 -0700 Subject: [PATCH 22/51] refactored graph solver --- samples/collection/benchmark_pyramid.cpp | 4 +- samples/sample.cpp | 2 +- src/graph.c | 498 ++++++++++------------- 3 files changed, 209 insertions(+), 295 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 9d5d6eee..6b6d1746 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -22,8 +22,8 @@ class BenchmarkPyramid : public Sample m_extent = 0.5f; m_round = 0.0f; m_baseCount = 10; - m_rowCount = g_sampleDebug ? 1 : 32; - m_columnCount = g_sampleDebug ? 1 : 32; + m_rowCount = g_sampleDebug ? 1 : 13; + m_columnCount = g_sampleDebug ? 1 : 14; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/samples/sample.cpp b/samples/sample.cpp index c8d4cd3d..da5935d3 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -63,7 +63,7 @@ Sample::Sample(const Settings& settings) b2Vec2 gravity = {0.0f, -10.0f}; // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = 16; //enki::GetNumHardwareThreads() / 2; m_scheduler.Initialize(maxThreads); m_taskCount = 0; diff --git a/src/graph.c b/src/graph.c index 1a18b099..91cbc572 100644 --- a/src/graph.c +++ b/src/graph.c @@ -92,7 +92,7 @@ typedef struct b2SolverStageType type; b2SolverBlock* blocks; int32_t blockCount; - //int32_t colorIndex; + int32_t colorIndex; _Atomic int completionCount; } b2SolverStage; @@ -1318,7 +1318,7 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i } } -static inline int32_t GetWorkerBlockIndex(int32_t workerIndex, int32_t blockCount, int32_t workerCount) +static inline int32_t GetWorkerStartIndex(int32_t workerIndex, int32_t blockCount, int32_t workerCount) { if (blockCount <= workerCount) { @@ -1337,21 +1337,28 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i b2SolverBlock* blocks = stage->blocks; int32_t blockCount = stage->blockCount; - int32_t colorIndex = (stageBits >> 8) & 0xFF; - int32_t iterIndex = (stageBits >> 16) & 0xFF; int32_t expectedSyncIndex = previousSyncIndex; - int32_t startIndex = GetWorkerBlockIndex(workerIndex, blockCount, context->workerCount); + int32_t startIndex = GetWorkerStartIndex(workerIndex, blockCount, context->workerCount); + if (startIndex == B2_NULL_INDEX) + { + return; + } + + B2_ASSERT(0 <= startIndex && startIndex < blockCount); + int32_t blockIndex = startIndex; // Caution: this can change expectedSyncIndex while (atomic_compare_exchange_strong(&blocks[blockIndex].syncIndex, &expectedSyncIndex, syncIndex) == true) { + B2_ASSERT(completedCount < blockCount); + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, threadIndex); completedCount += 1; blockIndex += 1; - if (blockIndex > blockCount) + if (blockIndex >= blockCount) { // Keep looking for work blockIndex = 0; @@ -1382,41 +1389,7 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i blockIndex -= 1; } -#if 0 - b2SolverStageType type = stage->type; - - // TODO_ERIN only main thread - if (type == b2_stagePrepareJoints) - { - if (workerIndex == 0) - { - b2PrepareJointsTask(context); - } - return; - } - - // TODO_ERIN only main thread - if (type == b2_stageSolveJoints) - { - if (workerIndex == 0) - { - bool useBias = true; - b2SolveJointsTask(context, useBias); - } - return; - } - - // TODO_ERIN only main thread - if (type == b2_stageCalmJoints) - { - if (workerIndex == 0) - { - bool useBias = false; - b2SolveJointsTask(context, useBias); - } - return; - } -#endif + (void)atomic_fetch_add(&stage->completionCount, completedCount); } static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* context, int32_t workerIndex, uint32_t syncBits) @@ -1450,19 +1423,6 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex } } -/* -b2_stageIntegrateVelocities = 0, -b2_stagePrepareJoints, -b2_stagePrepareContacts, -b2_stageSolveJoints, -b2_stageSolveContacts, -b2_stageIntegratePositions, -b2_stageFinalizePositions, -b2_stageCalmJoints, -b2_stageCalmContacts, -b2_stageStoreImpulses -*/ - void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { B2_MAYBE_UNUSED(startIndex); @@ -1471,118 +1431,128 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo b2WorkerContext* workerContext = taskContext; int32_t workerIndex = workerContext->workerIndex; b2SolverTaskContext* context = workerContext->context; - b2Graph* graph = context->graph; int32_t activeColorCount = context->activeColorCount; - b2SolverStage* stages = context->stages; - int32_t stageCount = context->stageCount; - - // TODO_ERIN ? - int32_t maximumSyncStagesPerSubstep = 2 + 2 * activeColorCount; if (threadIndex == 0) { + // Main thread synchronizes the workers and does work itself. + // // Stages are re-used for loops so that I don't need more stages for large iteration counts. // The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks. // The stage index and sync indices are combined in to sync bits for atomic synchronization. // The workers need to compute the previous sync index for a given stage so that CAS works correctly. This // setup makes this easy to do. - uint32_t bodySyncIndex = 1; - uint32_t stageIndex = 0; + /* + b2_stageIntegrateVelocities = 0, + b2_stagePrepareJoints, + b2_stagePrepareContacts, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageFinalizePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageStoreImpulses + */ + + int32_t bodySyncIndex = 1; + int32_t stageIndex = 0; uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageIntegrateVelocities); - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, 0, syncBits); + b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + stageIndex += 1; bodySyncIndex += 1; // TODO_ERIN single threaded - B2_ASSERT(stages[1].type == b2_stagePrepareJoints); + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); b2PrepareJointsTask(context); + stageIndex += 1; - uint32_t baseStageIndex = 2; - uint32_t graphSyncIndex = 1; + int32_t graphSyncIndex = 1; for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { - stageIndex = baseStageIndex + colorIndex; syncBits = (graphSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + stageIndex += 1; } graphSyncIndex += 1; - baseStageIndex += 2 + colorIndex; int32_t velocityIterations = context->velocityIterations; for (int32_t i = 0; i < velocityIterations; ++i) { // stage index restarted each iteration - stageIndex = baseStageIndex; + int32_t iterStageIndex = stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageSolveJoints); + B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveJoints); b2SolveJointsTask(context, true); - stageIndex += 1; + iterStageIndex += 1; for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { - syncBits = (graphSyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageSolveContacts); - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); - stageIndex += 1; + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveContacts); + b2ExecuteMainStage(stages + iterStageIndex, context, workerIndex, syncBits); + iterStageIndex += 1; } graphSyncIndex += 1; - B2_ASSERT(stages[stageIndex].type == b2_stageIntegratePositions); - syncBits = (bodySyncIndex << 16) | stageIndex; - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + B2_ASSERT(stages[iterStageIndex].type == b2_stageIntegratePositions); + syncBits = (bodySyncIndex << 16) | iterStageIndex; + b2ExecuteMainStage(stages + iterStageIndex, context, workerIndex, syncBits); bodySyncIndex += 1; } - baseStageIndex += 1 + activeColorCount + 1; + stageIndex += 1 + activeColorCount + 1; - stageIndex = baseStageIndex; syncBits = (bodySyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageFinalizePositions); b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); - baseStageIndex += 1; + stageIndex += 1; int32_t calmIterations = context->calmIterations; for (int32_t i = 0; i < calmIterations; ++i) { // stage index restarted each iteration - stageIndex = baseStageIndex; + int32_t iterStageIndex = stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageCalmJoints); + B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmJoints); b2SolveJointsTask(context, false); - stageIndex += 1; + iterStageIndex += 1; for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { - syncBits = (graphSyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageCalmContacts); - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); - stageIndex += 1; + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmContacts); + b2ExecuteMainStage(stages + iterStageIndex, context, workerIndex, syncBits); + iterStageIndex += 1; } graphSyncIndex += 1; } - baseStageIndex += 1 + activeColorCount; - stageIndex = baseStageIndex; + stageIndex += 1 + activeColorCount; uint32_t constraintSyncIndex = 1; syncBits = (constraintSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + atomic_store(&context->syncBits, UINT_MAX); + + B2_ASSERT(stageIndex + 1 == context->stageCount); return; } // Worker - uint32_t lastStageBits = 0; + uint32_t lastSyncBits = 0; while (true) { - // Spin until main thread bumps the sync index + // Spin until main thread bumps changes the sync bits uint32_t syncBits = atomic_load(&context->syncBits); - while (syncBits == lastStageBits) + while (syncBits == lastSyncBits) { _mm_pause(); syncBits = atomic_load(&context->syncBits); @@ -1594,16 +1564,18 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo break; } - uint32_t stageIndex = syncBits & 0xFFFF; + int32_t stageIndex = syncBits & 0xFFFF; B2_ASSERT(stageIndex < context->stageCount); - uint32_t syncIndex = (syncBits >> 16) & 0xFFFF; + int32_t syncIndex = (syncBits >> 16) & 0xFFFF; B2_ASSERT(syncIndex > 0); int32_t previousSyncIndex = syncIndex - 1; b2SolverStage* stage = stages + stageIndex; b2ExecuteStage(stage, context, workerIndex, previousSyncIndex, syncIndex, threadIndex); + + lastSyncBits = syncBits; } } @@ -1648,42 +1620,44 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) int32_t bodyBlockSize = 1 << 6; int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 6) + 1; - int32_t colorBlockCounts[b2_graphColorCount]; int32_t activeColorIndices[b2_graphColorCount]; + int32_t colorConstraintCounts[b2_graphColorCount]; + int32_t colorBlockCounts[b2_graphColorCount]; int32_t graphBlockSize = 1 << 5; int32_t activeColorCount = 0; int32_t graphBlockCount = 0; int32_t constraintCount = 0; + int32_t c = 0; for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t count = b2Array(colors[i].contactArray).count; if (count > 0) { - activeColorIndices[activeColorCount++] = i; + activeColorIndices[c] = i; + colorConstraintCounts[c] = count; int32_t blockCount = ((count - 1) >> 5) + 1; - colorBlockCounts[i] = blockCount; + colorBlockCounts[c] = blockCount; graphBlockCount += blockCount; constraintCount += count; + c += 1; } } + activeColorCount = c; b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); int32_t base = 0; - for (int32_t i = 0; i < b2_graphColorCount; ++i) + for (int32_t i = 0; i < activeColorCount; ++i) { - colors[i].contacts = constraints + base; - base += b2Array(colors[i].contactArray).count; + int32_t j = activeColorIndices[i]; + colors[j].contacts = constraints + base; + base += b2Array(colors[j].contactArray).count; } - int32_t jointCount = world->jointPool.count; - int32_t storeBlockSize = 1 << 6; int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 6) + 1 : 0; - int32_t velIters = B2_MAX(1, stepContext->velocityIterations); - int32_t calmIters = stepContext->positionIterations; /* b2_stageIntegrateVelocities = 0, @@ -1717,227 +1691,165 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stageCount += 1; b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); - b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); - int32_t taskIndex = 0; - int32_t stageIndex = 0; - - // Integrate velocities task setup - stages[stageIndex].type = b2_stageIntegrateVelocities; - stages[stageIndex].taskCount = bodyTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; - for (int32_t i = 0; i < bodyTaskCount; ++i) + for (int32_t i = 0; i < bodyBlockCount; ++i) { - int32_t startIndex = i * bodyBlockSize; - int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; + b2SolverBlock* block = bodyBlocks + i; + block->startIndex = i * bodyBlockSize; + block->endIndex = block->startIndex + bodyBlockSize; + block->syncIndex = 0; } - stageIndex += 1; + bodyBlocks[bodyBlockCount - 1].endIndex = awakeBodyCount; - // Prepare joints - if (jointCount > 0) - { - stages[stageIndex].type = b2_stagePrepareJoints; - stages[stageIndex].color = 0xFF; - stages[stageIndex].taskCount = 0; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = 0; - stages[stageIndex].taskIndex = 0; - stageIndex += 1; - } + b2SolverBlock* colorBlocks[b2_graphColorCount]; + b2SolverBlock* baseGraphBlock = graphBlocks; - // Prepare constraints task setup - for (int32_t i = 0; i < b2_graphColorCount; ++i) + for (int32_t i = 0; i < activeColorCount; ++i) { - int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; - int32_t colorTaskCount = perColorTaskCount[i]; - if (colorTaskCount == 0) - { - continue; - } - - stages[stageIndex].type = b2_stagePrepareContacts; - stages[stageIndex].color = (uint8_t)i; - stages[stageIndex].taskCount = colorTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; - - for (int32_t j = 0; j < colorTaskCount; ++j) + int32_t blockCount = colorBlockCounts[i]; + for (int32_t j = 0; j < blockCount; ++j) { - int32_t startIndex = j * contactBlockSize; - int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; - } - - stageIndex += 1; - } - - // Velocity iterations task setup - for (int32_t iter = 0; iter < velIters; ++iter) - { - // Joints - if (jointCount > 0) - { - stages[stageIndex].type = b2_stageSolveJoints; - stages[stageIndex].color = 0xFF; - stages[stageIndex].taskCount = 0; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = 0; - stages[stageIndex].taskIndex = 0; - stageIndex += 1; - } - - // Constraint graph - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; - int32_t colorTaskCount = perColorTaskCount[i]; - - if (colorTaskCount == 0) - { - continue; - } - - stages[stageIndex].type = b2_stageSolveContacts; - stages[stageIndex].color = (uint8_t)i; - stages[stageIndex].taskCount = colorTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; - - for (int32_t j = 0; j < colorTaskCount; ++j) - { - int32_t startIndex = j * contactBlockSize; - int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; - } - - stageIndex += 1; - } - - // Integrate positions - stages[stageIndex].type = b2_stageIntegratePositions; - stages[stageIndex].color = 0xFF; - stages[stageIndex].taskCount = bodyTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; - - for (int32_t i = 0; i < bodyTaskCount; ++i) - { - int32_t startIndex = i * bodyBlockSize; - int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; - } - - stageIndex += 1; - } - - // Calming iterations task setup - for (int32_t iter = 0; iter < posIters; ++iter) - { - // Joints - if (jointCount > 0) - { - stages[stageIndex].type = b2_stageCalmJoints; - stages[stageIndex].color = 0xFF; - stages[stageIndex].taskCount = 0; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = 0; - stages[stageIndex].taskIndex = 0; - stageIndex += 1; - } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - int32_t colorConstraintCount = b2Array(colors[i].contactArray).count; - int32_t colorTaskCount = perColorTaskCount[i]; - - if (colorTaskCount == 0) - { - continue; - } - - stages[stageIndex].type = b2_stageCalmContacts; - stages[stageIndex].color = (uint8_t)i; - stages[stageIndex].taskCount = colorTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; - - for (int32_t j = 0; j < colorTaskCount; ++j) - { - int32_t startIndex = j * contactBlockSize; - int32_t endIndex = B2_MIN(startIndex + contactBlockSize, colorConstraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; - } - stageIndex += 1; + b2SolverBlock* block = baseGraphBlock + j; + block->startIndex = j * graphBlockSize; + block->endIndex = block->startIndex + graphBlockSize; + block->syncIndex = 0; } + baseGraphBlock[blockCount - 1].endIndex = colorConstraintCounts[i]; + + colorBlocks[i] = baseGraphBlock; + baseGraphBlock += blockCount; } - // Prepare finalize position stage - stages[stageIndex].type = b2_stageFinalizePositions; - stages[stageIndex].color = 0xFF; - stages[stageIndex].taskCount = bodyTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; - - for (int32_t i = 0; i < bodyTaskCount; ++i) + for (int32_t i = 0; i < storeBlockCount; ++i) { - int32_t startIndex = i * bodyBlockSize; - int32_t endIndex = B2_MIN(startIndex + bodyBlockSize, awakeBodyCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; + b2SolverBlock* block = storeBlocks + i; + block->startIndex = i * storeBlockSize; + block->endIndex = block->startIndex + storeBlockSize; + block->syncIndex = 0; } - stageIndex += 1; - - // Prepare store impulses stage - if (constraintCount > 0) - { - stages[stageIndex].type = b2_stageStoreImpulses; - stages[stageIndex].color = 0xFF; - stages[stageIndex].taskCount = storeTaskCount; - stages[stageIndex].completionCount = 0; - stages[stageIndex].taskEntries = entries + taskIndex; - stages[stageIndex].taskIndex = 0; + storeBlocks[storeBlockCount - 1].endIndex = constraintCount; - for (int32_t i = 0; i < storeTaskCount; ++i) - { - int32_t startIndex = i * storeBlockSize; - int32_t endIndex = B2_MIN(startIndex + storeBlockSize, constraintCount); - entries[taskIndex++] = (b2SolverTaskEntry){startIndex, endIndex}; - } - stageIndex += 1; - } + b2SolverStage* stage = stages; - B2_ASSERT(stageIndex == stageCount); - B2_ASSERT(taskIndex == taskCount); + // Integrate velocities + stage->type = b2_stageIntegrateVelocities; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; - // TODO_ERIN increase? + // Prepare joints + stage->type = b2_stagePrepareJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Prepare constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stagePrepareContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Solve joints + stage->type = b2_stageSolveJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Solve constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageSolveContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Integrate positions + stage->type = b2_stageIntegratePositions; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Finalize positions + stage->type = b2_stageFinalizePositions; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Calm joints + stage->type = b2_stageCalmJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Calm constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageCalmContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Store impulses + stage->type = b2_stageStoreImpulses; + stage->blocks = storeBlocks; + stage->blockCount = storeBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + B2_ASSERT((int32_t)(stage - stages) == stageCount); + + // TODO_ERIN increase min? int32_t workerCount = B2_MIN(16, world->workerCount); b2WorkerContext workerContext[16]; + int32_t velIters = B2_MAX(1, stepContext->velocityIterations); + b2SolverTaskContext context; context.stepContext = stepContext; context.world = world; context.awakeBodies = awakeBodies; context.graph = graph; context.constraints = constraints; + context.activeColorCount = activeColorCount; + context.velocityIterations = velIters; + context.calmIterations = stepContext->positionIterations; context.workerCount = workerCount; context.stageCount = stageCount; context.stages = stages; - context.stageIndex = 0; context.timeStep = stepContext->dt; context.invTimeStep = stepContext->inv_dt; context.subStep = context.timeStep / velIters; context.invSubStep = velIters * stepContext->inv_dt; + context.syncBits = 0; + // TODO_ERIN use workerIndex or threadIndex? for (int32_t i = 0; i < workerCount; ++i) { workerContext[i].context = &context; @@ -1947,7 +1859,9 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) world->finishAllTasksFcn(world->userTaskContext); - b2FreeStackItem(world->stackAllocator, entries); + b2FreeStackItem(world->stackAllocator, storeBlocks); + b2FreeStackItem(world->stackAllocator, graphBlocks); + b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); b2FreeStackItem(world->stackAllocator, constraints); b2FreeStackItem(world->stackAllocator, awakeBodies); From a7cdad5faefdc6bd8a09c991a7951f9790e01c2c Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Mon, 18 Sep 2023 22:42:51 -0700 Subject: [PATCH 23/51] clean up --- samples/collection/benchmark_pyramid.cpp | 4 +- samples/sample.cpp | 2 +- src/body.h | 10 + src/graph.c | 1113 +++++----------------- 4 files changed, 232 insertions(+), 897 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 6b6d1746..2ab3634d 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -22,8 +22,8 @@ class BenchmarkPyramid : public Sample m_extent = 0.5f; m_round = 0.0f; m_baseCount = 10; - m_rowCount = g_sampleDebug ? 1 : 13; - m_columnCount = g_sampleDebug ? 1 : 14; + m_rowCount = g_sampleDebug ? 4 : 13; + m_columnCount = g_sampleDebug ? 3 : 14; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/samples/sample.cpp b/samples/sample.cpp index da5935d3..8c61f29c 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -63,7 +63,7 @@ Sample::Sample(const Settings& settings) b2Vec2 gravity = {0.0f, -10.0f}; // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = 16; //enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = 8; //enki::GetNumHardwareThreads() / 2; m_scheduler.Initialize(maxThreads); m_taskCount = 0; diff --git a/src/body.h b/src/body.h index 65532864..719cc8c0 100644 --- a/src/body.h +++ b/src/body.h @@ -83,6 +83,16 @@ typedef struct b2Body bool enlargeAABB; } b2Body; +typedef struct b2SolverBody +{ + b2Vec2 linearVelocity; + float angularVelocity; + + // These are the change in position/angle that accumulate across constraint substeps + b2Vec2 deltaPosition; + float deltaAngle; +} b2SolverBody; + bool b2ShouldBodiesCollide(b2World* world, b2Body* bodyA, b2Body* bodyB); b2ShapeId b2Body_CreatePolygon(b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon); diff --git a/src/graph.c b/src/graph.c index 91cbc572..58b459d5 100644 --- a/src/graph.c +++ b/src/graph.c @@ -17,16 +17,12 @@ #include "box2d/aabb.h" #include +#include #include -#include - -// #include -// #include #define maxBaumgarteVelocity 3.0f -// TODO_ERIN clean this up -typedef struct b2ConstraintPoint +typedef struct { b2Vec2 rA, rB; b2Vec2 localAnchorA, localAnchorB; @@ -35,9 +31,6 @@ typedef struct b2ConstraintPoint float tangentImpulse; float normalMass; float tangentMass; - float massCoefficient; - float biasCoefficient; - float impulseCoefficient; } b2ConstraintPoint; typedef struct b2Constraint @@ -48,19 +41,13 @@ typedef struct b2Constraint b2ConstraintPoint points[2]; b2Vec2 normal; float friction; + float massCoefficient; + float biasCoefficient; + float impulseCoefficient; int32_t pointCount; } b2Constraint; -typedef struct b2GraphContext -{ - b2World* world; - b2GraphColor* color; - float timeStep; - float contactHertz; - bool enableWarmStarting; -} b2GraphContext; - -typedef enum b2SolverStageType +typedef enum { b2_stageIntegrateVelocities = 0, b2_stagePrepareJoints, @@ -296,92 +283,6 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) contact->flags &= ~b2_contactStatic; } -static void b2IntegrateVelocities2(b2World* world, b2Body** bodies, int32_t bodyCount, float h) -{ - b2Vec2 gravity = world->gravity; - - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = 0; i < bodyCount; ++i) - { - b2Body* body = bodies[i]; - - if (body->type != b2_dynamicBody) - { - continue; - } - - float invMass = body->invMass; - float invI = body->invI; - - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; - - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); - w = w + h * invI * body->torque; - - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); - w *= 1.0f / (1.0f + h * body->angularDamping); - - body->linearVelocity = v; - body->angularVelocity = w; - - body->deltaAngle = 0.0f; - body->deltaPosition = b2Vec2_zero; - } -} - -static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) -{ - b2Vec2 gravity = context->world->gravity; - b2Body** bodies = context->awakeBodies; - float h = context->timeStep; - - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Body* body = bodies[i]; - - if (body->type != b2_dynamicBody) - { - continue; - } - - float invMass = body->invMass; - float invI = body->invI; - - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; - - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); - w = w + h * invI * body->torque; - - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); - w *= 1.0f / (1.0f + h * body->angularDamping); - - body->linearVelocity = v; - body->angularVelocity = w; - - body->deltaAngle = 0.0f; - body->deltaPosition = b2Vec2_zero; - } -} - static void b2PrepareJointsTask(b2SolverTaskContext* context) { b2World* world = context->world; @@ -415,7 +316,7 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver // 30 is a bit soft, 60 oscillates too much // const float contactHertz = 45.0f; // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; + const float contactHertz = 45.0f; float h = context->timeStep; bool enableWarmStarting = world->enableWarmStarting; @@ -450,6 +351,15 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver float mB = bodyB->invMass; float iB = bodyB->invI; + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float c = h * omega * (2.0f * zeta + h * omega); + constraint->impulseCoefficient = 1.0f / (1.0f + c); + constraint->massCoefficient = c * constraint->impulseCoefficient; + constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); + b2Vec2 cA = bodyA->position; b2Vec2 cB = bodyB->position; b2Rot qA = b2MakeRot(bodyA->angle); @@ -485,39 +395,8 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - - // Stiffer for static contacts to avoid bodies getting pushed through the ground - const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - // float d = 2.0f * zeta * omega / kNormal; - // float k = omega * omega / kNormal; - - // cp->gamma = 1.0f / (h * (d + h * k)); - // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); - // cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); - cp->separation = mp->separation; - - // cp->bias = h * k * cp->gamma * mp->separation; - // cp->bias = k / (d + h * k) * mp->separation; - // cp->bias = - // (omega * omega / kNormal) / (2 * zeta * omega / kNormal + h * omega * omega / kNormal) * mp->separation; - cp->biasCoefficient = omega / (2.0f * zeta + h * omega); - // cp->gamma = 0.0f; - // cp->bias = (0.2f / h) * mp->separation; - - // TODO_ERIN this can be expanded cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - // cp->normalMass = 1.0f / (kNormal + cp->gamma); - - float c = h * omega * (2.0f * zeta + h * omega); - cp->impulseCoefficient = 1.0f / (1.0f + c); - cp->massCoefficient = c * cp->impulseCoefficient; - - // meff = 1.0f / kNormal * 1.0f / (1.0f + 1.0f / (h * omega * (2 * zeta + h * omega))) - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - // = -meff * mscale * (vn + bias) - imp_scale * impulse // Warm start if (enableWarmStarting) @@ -539,6 +418,49 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver b2TracyCZoneEnd(prepare_contact); } +static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(integrate_velocity, "IntVel", b2_colorDeepPink, true); + + b2Vec2 gravity = context->world->gravity; + b2Body** bodies = context->awakeBodies; + float h = context->timeStep; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Body* body = bodies[i]; + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); + + body->linearVelocity = v; + body->angularVelocity = w; + + body->deltaAngle = 0.0f; + body->deltaPosition = b2Vec2_zero; + } + + b2TracyCZoneEnd(integrate_velocity); +} + static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) { b2World* world = context->world; @@ -556,577 +478,23 @@ static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) b2SolveJointVelocitySoft(joint, stepContext, useBias); } -} - -static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) -{ - b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - - b2World* world = context->world; - b2Graph* graph = context->graph; - b2GraphColor* color = graph->colors + colorIndex; - b2Body* bodies = world->bodies; - b2Constraint* constraints = color->contacts; - - float inv_dt = context->invTimeStep; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Constraint* constraint = constraints + i; - - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - const b2Vec2 dpA = bodyA->deltaPosition; - const float daA = bodyA->deltaAngle; - const b2Vec2 dpB = bodyB->deltaPosition; - const float daB = bodyB->deltaAngle; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = cp->massCoefficient; - impulseScale = cp->impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } - - b2TracyCZoneEnd(solve_contact); -} - -static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) -{ - b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); - - b2Body** bodies = context->awakeBodies; - float h = context->subStep; - - B2_ASSERT(startIndex <= endIndex); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Body* body = bodies[i]; - body->deltaAngle += h * body->angularVelocity; - body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); - } - - b2TracyCZoneEnd(integrate_positions); -} - -static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, uint32_t threadIndex) -{ - b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); - - b2World* world = context->world; - b2Body** bodies = context->awakeBodies; - b2Contact* contacts = world->contacts; - const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - - b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= world->bodyPool.capacity); - B2_ASSERT(endIndex <= world->bodyPool.capacity); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Body* body = bodies[i]; - - body->position = b2Add(body->position, body->deltaPosition); - body->angle += body->deltaAngle; - - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - - body->force = b2Vec2_zero; - body->torque = 0.0f; - - // Update shapes AABBs - int32_t shapeIndex = body->shapeList; - while (shapeIndex != B2_NULL_INDEX) - { - b2Shape* shape = world->shapes + shapeIndex; - - B2_ASSERT(shape->isFast == false); - - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); - - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) - { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - - shapeIndex = shape->nextShapeIndex; - } - - // TODO_ERIN legacy - int32_t contactKey = body->contactList; - while (contactKey != B2_NULL_INDEX) - { - int32_t contactIndex = contactKey >> 1; - int32_t edgeIndex = contactKey & 1; - b2Contact* contact = contacts + contactIndex; - - // Bit set to prevent duplicates - b2SetBit(awakeContactBitSet, contactIndex); - contactKey = contact->edges[edgeIndex].nextKey; - } - } - - b2TracyCZoneEnd(finalize_positions); -} - -static void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) -{ - b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); - - b2Constraint* constraints = context->constraints; - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Constraint* constraint = constraints + i; - b2Contact* contact = constraint->contact; - - b2Manifold* manifold = &contact->manifold; - - for (int32_t j = 0; j < constraint->pointCount; ++j) - { - manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; - manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; - } - } - - b2TracyCZoneEnd(store_impulses); -} - -typedef struct b2BodyContext -{ - b2World* world; - b2Body** bodies; - float h; -} b2BodyContext; - -static void b2UpdateDeltasTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) -{ - B2_MAYBE_UNUSED(threadIndex); - - b2TracyCZoneNC(update_deltas, "Deltas", b2_colorDarkSeaGreen, true); - - b2BodyContext* bodyContext = taskContext; - b2Body** bodies = bodyContext->bodies; - float h = bodyContext->h; - - B2_ASSERT(startIndex <= endIndex); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Body* body = bodies[i]; - - body->deltaAngle += h * body->angularVelocity; - body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); - } - - b2TracyCZoneEnd(update_deltas); -} - -static void b2UpdateDeltas(b2World* world, b2Body** bodies, int32_t count, float h) -{ - if (count == 0) - { - return; - } - - b2BodyContext context = {world, bodies, h}; - - int32_t minRange = 128; - if (count < minRange) - { - b2UpdateDeltasTask(0, count, 0, &context); - } - else - { - void* userTask = world->enqueueTaskFcn(&b2UpdateDeltasTask, count, minRange, &context, world->userTaskContext); - world->finishTaskFcn(userTask, world->userTaskContext); - } -} - -static void b2UpdatePositionsTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) -{ - b2TracyCZoneNC(update_positions, "Positions", b2_colorViolet, true); - - b2BodyContext* bodyContext = taskContext; - b2World* world = bodyContext->world; - b2Body** bodies = bodyContext->bodies; - b2Contact* contacts = world->contacts; - const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - float h = bodyContext->h; - - b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= world->bodyPool.capacity); - B2_ASSERT(endIndex <= world->bodyPool.capacity); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Body* body = bodies[i]; - - // Final substep - body->deltaAngle += h * body->angularVelocity; - body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); - - body->position = b2Add(body->position, body->deltaPosition); - body->angle += body->deltaAngle; - - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - - body->force = b2Vec2_zero; - body->torque = 0.0f; - - // Update shapes AABBs - int32_t shapeIndex = body->shapeList; - while (shapeIndex != B2_NULL_INDEX) - { - b2Shape* shape = world->shapes + shapeIndex; - - B2_ASSERT(shape->isFast == false); - - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); - - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) - { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - - shapeIndex = shape->nextShapeIndex; - } - - // TODO_ERIN legacy - int32_t contactKey = body->contactList; - while (contactKey != B2_NULL_INDEX) - { - int32_t contactIndex = contactKey >> 1; - int32_t edgeIndex = contactKey & 1; - b2Contact* contact = contacts + contactIndex; - - // Bit set to prevent duplicates - b2SetBit(awakeContactBitSet, contactIndex); - contactKey = contact->edges[edgeIndex].nextKey; - } - } - - b2TracyCZoneEnd(update_positions); -} - -static void b2UpdatePositions(b2World* world, b2Body** bodies, int32_t count, float h) -{ - if (count == 0) - { - return; - } - - b2BodyContext context = {world, bodies, h}; - - int32_t minRange = 32; - if (count < minRange) - { - b2UpdatePositionsTask(0, count, 0, &context); - } - else - { - void* userTask = world->enqueueTaskFcn(&b2UpdatePositionsTask, count, minRange, &context, world->userTaskContext); - world->finishTaskFcn(userTask, world->userTaskContext); - } -} - -static void b2PrepareSoftContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) -{ - B2_MAYBE_UNUSED(threadIndex); - - b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); - - b2GraphContext* graphContext = taskContext; - b2GraphColor* color = graphContext->color; - int32_t* contactIndices = color->contactArray; - b2Contact* contacts = graphContext->world->contacts; - b2Body* bodies = graphContext->world->bodies; - - float contactHertz = graphContext->contactHertz; - float h = graphContext->timeStep; - bool enableWarmStarting = graphContext->enableWarmStarting; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Contact* contact = contacts + contactIndices[i]; - - const b2Manifold* manifold = &contact->manifold; - int32_t pointCount = manifold->pointCount; - - B2_ASSERT(0 < pointCount && pointCount <= 2); - - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; - - b2Constraint* constraint = color->contacts + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - constraint->pointCount = pointCount; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* mp = manifold->points + j; - b2ConstraintPoint* cp = constraint->points + j; - - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; - - cp->rA = b2Sub(mp->point, cA); - cp->rB = b2Sub(mp->point, cB); - cp->localAnchorA = b2InvRotateVector(qA, cp->rA); - cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - - // Stiffer for static contacts to avoid bodies getting pushed through the ground - const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - // float d = 2.0f * zeta * omega / kNormal; - // float k = omega * omega / kNormal; - - // cp->gamma = 1.0f / (h * (d + h * k)); - // cp->gamma = 1.0f / (h * (2.0f * zeta * omega / kNormal + h * omega * omega / kNormal)); - // cp->gamma = kNormal / (h * omega * (2.0f * zeta + h * omega)); - - cp->separation = mp->separation; - - // cp->bias = h * k * cp->gamma * mp->separation; - // cp->bias = k / (d + h * k) * mp->separation; - // cp->bias = - // (omega * omega / kNormal) / (2 * zeta * omega / kNormal + h * omega * omega / kNormal) * mp->separation; - cp->biasCoefficient = omega / (2.0f * zeta + h * omega); - // cp->gamma = 0.0f; - // cp->bias = (0.2f / h) * mp->separation; - - // TODO_ERIN this can be expanded - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - // cp->normalMass = 1.0f / (kNormal + cp->gamma); - - float c = h * omega * (2.0f * zeta + h * omega); - cp->impulseCoefficient = 1.0f / (1.0f + c); - cp->massCoefficient = c * cp->impulseCoefficient; - - // meff = 1.0f / kNormal * 1.0f / (1.0f + 1.0f / (h * omega * (2 * zeta + h * omega))) - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - // = -meff * mscale * (vn + bias) - imp_scale * impulse - - // Warm start - if (enableWarmStarting) - { - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); - vB = b2MulAdd(vB, mB, P); - } - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } - - b2TracyCZoneEnd(prepare_contact); -} - -// h is full time step -static void b2PrepareSoftContact(b2World* world, b2GraphColor* color, float h, float contactHertz, bool warmStart) -{ - int32_t count = b2Array(color->contactArray).count; - if (count == 0) - { - return; - } - - b2GraphContext context = {world, color, h, contactHertz, warmStart}; - - int32_t minRange = 64; - if (count < minRange) - { - b2PrepareSoftContactTask(0, count, 0, &context); - } - else - { - void* userPrepareTask = world->enqueueTaskFcn(&b2PrepareSoftContactTask, count, minRange, &context, world->userTaskContext); - world->finishTaskFcn(userPrepareTask, world->userTaskContext); - } -} - -typedef struct b2ContactContext -{ - b2World* world; - b2GraphColor* color; - float inv_dt; - bool useBias; -} b2ContactContext; +} -static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) +static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { - B2_MAYBE_UNUSED(threadIndex); b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - b2ContactContext* contactContext = taskContext; - b2Body* bodies = contactContext->world->bodies; - b2Constraint* constraints = contactContext->color->contacts; + b2World* world = context->world; + b2Graph* graph = context->graph; + b2GraphColor* color = graph->colors + colorIndex; + b2Body* bodies = world->bodies; + b2Constraint* constraints = color->contacts; - float inv_dt = contactContext->inv_dt; - bool useBias = contactContext->useBias; + float inv_dt = context->invTimeStep; B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= b2Array(contactContext->color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(contactContext->color->contactArray).count); + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); for (int32_t i = startIndex; i < endIndex; ++i) { @@ -1154,6 +522,9 @@ static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t th b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; for (int32_t j = 0; j < pointCount; ++j) { @@ -1174,15 +545,16 @@ static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t th float impulseScale = 0.0f; if (s > 0.0f) { + // TODO_ERIN what time to use? // Speculative (inverse of full time step) bias = s * inv_dt; } else if (useBias) { - bias = B2_MAX(cp->biasCoefficient * s, -maxBaumgarteVelocity); + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); // bias = cp->biasCoefficient * s; - massScale = cp->massCoefficient; - impulseScale = cp->impulseCoefficient; + massScale = massCoefficient; + impulseScale = impulseCoefficient; } // Compute normal impulse @@ -1242,32 +614,100 @@ static void b2SolveContactTask(int32_t startIndex, int32_t endIndex, uint32_t th b2TracyCZoneEnd(solve_contact); } -// inv_dt is full time step inverse -static void b2SolveSoftContact(b2World* world, b2GraphColor* color, float inv_dt, bool useBias) +static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { - int32_t count = b2Array(color->contactArray).count; - if (count == 0) - { - return; - } + b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); - b2ContactContext context = {world, color, inv_dt, useBias}; + b2Body** bodies = context->awakeBodies; + float h = context->subStep; + + B2_ASSERT(startIndex <= endIndex); - int32_t minRange = 128; - if (count < minRange) + for (int32_t i = startIndex; i < endIndex; ++i) { - b2SolveContactTask(0, count, 0, &context); + b2Body* body = bodies[i]; + body->deltaAngle += h * body->angularVelocity; + body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); } - else + + b2TracyCZoneEnd(integrate_positions); +} + +static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, uint32_t threadIndex) +{ + b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); + + b2World* world = context->world; + b2Body** bodies = context->awakeBodies; + b2Contact* contacts = world->contacts; + const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; + + b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= world->bodyPool.capacity); + B2_ASSERT(endIndex <= world->bodyPool.capacity); + + for (int32_t i = startIndex; i < endIndex; ++i) { - void* userSolveTask = world->enqueueTaskFcn(&b2SolveContactTask, count, minRange, &context, world->userTaskContext); - world->finishTaskFcn(userSolveTask, world->userTaskContext); + b2Body* body = bodies[i]; + + body->position = b2Add(body->position, body->deltaPosition); + body->angle += body->deltaAngle; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + + body->force = b2Vec2_zero; + body->torque = 0.0f; + + // Update shapes AABBs + int32_t shapeIndex = body->shapeList; + while (shapeIndex != B2_NULL_INDEX) + { + b2Shape* shape = world->shapes + shapeIndex; + + B2_ASSERT(shape->isFast == false); + + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + + shapeIndex = shape->nextShapeIndex; + } + + // TODO_ERIN legacy + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) + { + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // Bit set to prevent duplicates + b2SetBit(awakeContactBitSet, contactIndex); + contactKey = contact->edges[edgeIndex].nextKey; + } } + + b2TracyCZoneEnd(finalize_positions); } -static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) +static void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { - for (int32_t i = 0; i < constraintCount; ++i) + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); + + b2Constraint* constraints = context->constraints; + + for (int32_t i = startIndex; i < endIndex; ++i) { b2Constraint* constraint = constraints + i; b2Contact* contact = constraint->contact; @@ -1280,6 +720,8 @@ static void b2StoreImpulses(b2Constraint* constraints, int32_t constraintCount) manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; } } + + b2TracyCZoneEnd(store_impulses); } static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, uint32_t threadIndex) @@ -1437,7 +879,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo if (threadIndex == 0) { // Main thread synchronizes the workers and does work itself. - // + // // Stages are re-used for loops so that I don't need more stages for large iteration counts. // The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks. // The stage index and sync indices are combined in to sync bits for atomic synchronization. @@ -1569,7 +1011,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo int32_t syncIndex = (syncBits >> 16) & 0xFFFF; B2_ASSERT(syncIndex > 0); - + int32_t previousSyncIndex = syncIndex - 1; b2SolverStage* stage = stages + stageIndex; @@ -1579,8 +1021,28 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo } } +// Threading: +// 1. build array of awake bodies, maybe copy to contiguous array +// 2. parallel-for integrate velocities +// 3. parallel prepare constraints by color +// Loop sub-steps: +// 4. parallel solve constraints by color +// 5. parallel-for update position deltas (and positions on last iter) +// End Loop +// Loop bias-removal: +// 6. parallel solve constraints by color +// End loop +// 7. parallel-for store impulses +// 8. parallel-for update aabbs, build proxy update set, build awake contact set + +// Soft constraints with constraint error substepping. Allows for stiffer contacts with a small performance hit. Includes a +// bias removal stage to help remove excess bias energy. +// http://mmacklin.com/smallsteps.pdf +// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf void b2SolveGraph(b2World* world, const b2StepContext* stepContext) { + b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); + b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; @@ -1617,14 +1079,23 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) } B2_ASSERT(index == awakeBodyCount); - int32_t bodyBlockSize = 1 << 6; - int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 6) + 1; + int32_t workerCount = world->workerCount; + const int32_t blocksPerWorker = 6; + + int32_t bodyBlockSize = 1 << 3; + int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 3) + 1; + if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) + { + bodyBlockSize = awakeBodyCount / (blocksPerWorker * workerCount); + bodyBlockCount = blocksPerWorker * workerCount; + } int32_t activeColorIndices[b2_graphColorCount]; int32_t colorConstraintCounts[b2_graphColorCount]; + int32_t colorBlockSize[b2_graphColorCount]; int32_t colorBlockCounts[b2_graphColorCount]; - int32_t graphBlockSize = 1 << 5; + int32_t graphBlockSize = 1 << 2; int32_t activeColorCount = 0; int32_t graphBlockCount = 0; int32_t constraintCount = 0; @@ -1637,7 +1108,14 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) { activeColorIndices[c] = i; colorConstraintCounts[c] = count; - int32_t blockCount = ((count - 1) >> 5) + 1; + int32_t blockCount = ((count - 1) >> 2) + 1; + colorBlockSize[c] = graphBlockSize; + if (count > blocksPerWorker * graphBlockSize * workerCount) + { + colorBlockSize[c] = count / (blocksPerWorker * workerCount); + blockCount = blocksPerWorker * workerCount; + } + colorBlockCounts[c] = blockCount; graphBlockCount += blockCount; constraintCount += count; @@ -1656,8 +1134,13 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) base += b2Array(colors[j].contactArray).count; } - int32_t storeBlockSize = 1 << 6; - int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 6) + 1 : 0; + int32_t storeBlockSize = 1 << 4; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 4) + 1 : 0; + if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) + { + storeBlockSize = constraintCount / (blocksPerWorker * workerCount); + storeBlockCount = blocksPerWorker * workerCount; + } /* b2_stageIntegrateVelocities = 0, @@ -1674,7 +1157,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) // TODO_ERIN joint tasks int32_t stageCount = 0; - + // b2_stageIntegrateVelocities stageCount += 1; // b2_stagePrepareJoints @@ -1710,15 +1193,16 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) for (int32_t i = 0; i < activeColorCount; ++i) { int32_t blockCount = colorBlockCounts[i]; + int32_t blockSize = colorBlockSize[i]; for (int32_t j = 0; j < blockCount; ++j) { b2SolverBlock* block = baseGraphBlock + j; - block->startIndex = j * graphBlockSize; - block->endIndex = block->startIndex + graphBlockSize; + block->startIndex = j * blockSize; + block->endIndex = block->startIndex + blockSize; block->syncIndex = 0; } baseGraphBlock[blockCount - 1].endIndex = colorConstraintCounts[i]; - + colorBlocks[i] = baseGraphBlock; baseGraphBlock += blockCount; } @@ -1825,8 +1309,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) B2_ASSERT((int32_t)(stage - stages) == stageCount); - // TODO_ERIN increase min? - int32_t workerCount = B2_MIN(16, world->workerCount); + B2_ASSERT(workerCount <= 16); b2WorkerContext workerContext[16]; int32_t velIters = B2_MAX(1, stepContext->velocityIterations); @@ -1849,6 +1332,8 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) context.invSubStep = velIters * stepContext->inv_dt; context.syncBits = 0; + b2TracyCZoneEnd(prepare_stages); + // TODO_ERIN use workerIndex or threadIndex? for (int32_t i = 0; i < workerCount; ++i) { @@ -1866,163 +1351,3 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, constraints); b2FreeStackItem(world->stackAllocator, awakeBodies); } - -// Threading: -// 1. build array of awake bodies, maybe copy to contiguous array -// 2. parallel-for integrate velocities -// 3. parallel prepare constraints by color -// Loop sub-steps: -// 4. parallel solve constraints by color -// 5. parallel-for update position deltas (and positions on last iter) -// End Loop -// Loop bias-removal: -// 6. parallel solve constraints by color -// End loop -// 7. parallel-for store impulses -// 8. parallel-for update aabbs, build proxy update set, build awake contact set - -// Soft constraints with constraint error substepping. Allows for stiffer contacts with a small performance hit. Includes a -// bias removal stage to help remove excess bias energy. -// http://mmacklin.com/smallsteps.pdf -// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf -void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext) -{ - b2Graph* graph = &world->graph; - b2GraphColor* colors = graph->colors; - b2Joint* joints = world->joints; - - int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; - int32_t awakeBodyCount = 0; - for (int32_t i = 0; i < awakeIslandCount; ++i) - { - int32_t islandIndex = world->awakeIslandArray[i]; - b2Island* island = world->islands + islandIndex; - awakeBodyCount += island->bodyCount; - } - - b2Body* bodies = world->bodies; - b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "body pointers"); - int32_t index = 0; - for (int32_t i = 0; i < awakeIslandCount; ++i) - { - int32_t islandIndex = world->awakeIslandArray[i]; - b2Island* island = world->islands + islandIndex; - int32_t bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* body = bodies + bodyIndex; - B2_ASSERT(b2ObjectValid(&body->object)); - - awakeBodies[index++] = body; - bodyIndex = body->islandNext; - } - } - - B2_ASSERT(index == awakeBodyCount); - - int32_t constraintCount = 0; - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - constraintCount += b2Array(colors[i].contactArray).count; - } - - b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); - int32_t base = 0; - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - colors[i].contacts = constraints + base; - base += b2Array(colors[i].contactArray).count; - } - - B2_ASSERT(base == constraintCount); - - // Full step apply gravity - b2IntegrateVelocities2(world, awakeBodies, awakeBodyCount, stepContext->dt); - - // 30 is a bit soft, 60 oscillates too much - // const float contactHertz = 45.0f; - // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - // Soft constraints initialized with full time step - bool warmStart = stepContext->enableWarmStarting; - b2PrepareSoftContact(world, colors + i, stepContext->dt, contactHertz, warmStart); - } - - int32_t jointCapacity = world->jointPool.capacity; - - for (int32_t i = 0; i < jointCapacity; ++i) - { - b2Joint* joint = joints + i; - if (b2ObjectValid(&joint->object) == false) - { - continue; - } - - b2PrepareJoint(joint, stepContext); - } - - int32_t substepCount = stepContext->velocityIterations; - float h = stepContext->dt / substepCount; - - for (int32_t substep = 0; substep < substepCount; ++substep) - { - // One constraint iteration - for (int32_t i = 0; i < jointCapacity; ++i) - { - b2Joint* joint = joints + i; - if (b2ObjectValid(&joint->object) == false) - { - continue; - } - - bool useBias = true; - b2SolveJointVelocitySoft(joint, stepContext, useBias); - } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - bool useBias = true; - b2SolveSoftContact(world, colors + i, h, useBias); - } - - if (substep < substepCount - 1) - { - b2UpdateDeltas(world, awakeBodies, awakeBodyCount, h); - } - else - { - b2UpdatePositions(world, awakeBodies, awakeBodyCount, h); - } - } - - int32_t positionIterations = stepContext->positionIterations; - for (int32_t iter = 0; iter < positionIterations; ++iter) - { - for (int32_t i = 0; i < jointCapacity; ++i) - { - b2Joint* joint = joints + i; - if (b2ObjectValid(&joint->object) == false) - { - continue; - } - - bool useBias = false; - b2SolveJointVelocitySoft(joint, stepContext, useBias); - } - - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - bool useBias = false; - b2SolveSoftContact(world, colors + i, h, useBias); - } - } - - b2StoreImpulses(constraints, constraintCount); - - b2FreeStackItem(world->stackAllocator, constraints); - b2FreeStackItem(world->stackAllocator, awakeBodies); -} From 6bafbd4d6f64529eed20647952469de0239817ad Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 19 Sep 2023 23:25:56 -0700 Subject: [PATCH 24/51] something broke --- samples/collection/benchmark_pyramid.cpp | 8 +- src/body.c | 2 +- src/body.h | 7 + src/graph.c | 347 +++++++++++++++++------ 4 files changed, 266 insertions(+), 98 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 2ab3634d..dae0ad84 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,9 +21,9 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 10; - m_rowCount = g_sampleDebug ? 4 : 13; - m_columnCount = g_sampleDebug ? 3 : 14; + m_baseCount = 3; + m_rowCount = g_sampleDebug ? 1 : 13; + m_columnCount = g_sampleDebug ? 1 : 14; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; @@ -57,7 +57,7 @@ class BenchmarkPyramid : public Sample float h = m_extent - m_round; b2Polygon cuboid = b2MakeRoundedBox(h, h, m_round); - float shift = 1.0f * h; + float shift = 0.8f * h; for (int32_t i = 0; i < m_baseCount; ++i) { diff --git a/src/body.c b/src/body.c index e0fef239..60dce6eb 100644 --- a/src/body.c +++ b/src/body.c @@ -67,7 +67,6 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->sleepTime = 0.0f; b->userData = def->userData; b->world = worldId.index; - b->islandIndex = 0; b->enableSleep = def->enableSleep; b->fixedRotation = def->fixedRotation; b->isEnabled = def->isEnabled; @@ -77,6 +76,7 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->islandIndex = B2_NULL_INDEX; b->islandPrev = B2_NULL_INDEX; b->islandNext = B2_NULL_INDEX; + b->solverIndex = B2_NULL_INDEX; if (b->type != b2_staticBody) { diff --git a/src/body.h b/src/body.h index 719cc8c0..feafa37f 100644 --- a/src/body.h +++ b/src/body.h @@ -59,6 +59,8 @@ typedef struct b2Body int32_t islandPrev; int32_t islandNext; + int32_t solverIndex; + float mass, invMass; // Rotational inertia about the center of mass. @@ -91,6 +93,11 @@ typedef struct b2SolverBody // These are the change in position/angle that accumulate across constraint substeps b2Vec2 deltaPosition; float deltaAngle; + + float invMass; + float invI; + + int32_t bodyIndex; } b2SolverBody; bool b2ShouldBodiesCollide(b2World* world, b2Body* bodyA, b2Body* bodyB); diff --git a/src/graph.c b/src/graph.c index 58b459d5..21533563 100644 --- a/src/graph.c +++ b/src/graph.c @@ -87,6 +87,7 @@ typedef struct { b2World* world; b2Body** awakeBodies; + b2SolverBody* solverBodies; b2Graph* graph; const b2StepContext* stepContext; @@ -283,6 +284,60 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) contact->flags &= ~b2_contactStatic; } +static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(integrate_velocity, "IntVel", b2_colorDeepPink, true); + + b2Vec2 gravity = context->world->gravity; + b2Body** bodies = context->awakeBodies; + b2SolverBody* solverBodies = context->solverBodies; + + float h = context->timeStep; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Body* body = bodies[i]; + //_m_prefetch(bodies[i + 1]); + + B2_ASSERT(body->solverIndex == i); + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); + + b2SolverBody* solverBody = solverBodies + i; + solverBody->linearVelocity = v; + solverBody->angularVelocity = w; + + solverBody->deltaAngle = 0.0f; + solverBody->deltaPosition = b2Vec2_zero; + + solverBody->invMass = invMass; + solverBody->invI = invI; + + solverBody->bodyIndex = body->object.index; + } + + b2TracyCZoneEnd(integrate_velocity); +} + static void b2PrepareJointsTask(b2SolverTaskContext* context) { b2World* world = context->world; @@ -312,6 +367,7 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver int32_t* contactIndices = color->contactArray; b2Contact* contacts = world->contacts; b2Body* bodies = world->bodies; + b2SolverBody* solverBodies = context->solverBodies; // 30 is a bit soft, 60 oscillates too much // const float contactHertz = 45.0f; @@ -333,10 +389,11 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver B2_ASSERT(0 < pointCount && pointCount <= 2); - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + b2Body* bodyA = bodies + contact->edges[0].bodyIndex; + b2Body* bodyB = bodies + contact->edges[1].bodyIndex; + + int32_t indexA = bodyA->solverIndex; + int32_t indexB = bodyB->solverIndex; b2Constraint* constraint = color->contacts + i; constraint->contact = contact; @@ -346,10 +403,35 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver constraint->friction = contact->friction; constraint->pointCount = pointCount; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; + b2SolverBody* solverBodyA; + b2Vec2 vA; + float wA; + float mA; + float iA; + + if (indexA != B2_NULL_INDEX) + { + solverBodyA = solverBodies + indexA; + vA = solverBodyA->linearVelocity; + wA = solverBodyA->angularVelocity; + mA = solverBodyA->invMass; + iA = solverBodyA->invI; + } + else + { + solverBodyA = NULL; + vA.x = vA.y = 0.0; + wA = 0.0f; + mA = 0.0f; + iA = 0.0f; + } + + B2_ASSERT(indexB != B2_NULL_INDEX); + b2SolverBody* solverBodyB = solverBodies + indexB; + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; // Stiffer for static contacts to avoid bodies getting pushed through the ground const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; @@ -362,13 +444,8 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver b2Vec2 cA = bodyA->position; b2Vec2 cB = bodyB->position; - b2Rot qA = b2MakeRot(bodyA->angle); - b2Rot qB = b2MakeRot(bodyB->angle); - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + b2Rot qA = bodyA->transform.q; + b2Rot qB = bodyB->transform.q; b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(constraint->normal); @@ -409,58 +486,18 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver } } - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + if (solverBodyA != NULL) + { + solverBodyA->linearVelocity = vA; + solverBodyA->angularVelocity = wA; + } + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; } b2TracyCZoneEnd(prepare_contact); } -static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) -{ - b2TracyCZoneNC(integrate_velocity, "IntVel", b2_colorDeepPink, true); - - b2Vec2 gravity = context->world->gravity; - b2Body** bodies = context->awakeBodies; - float h = context->timeStep; - - // Integrate velocities and apply damping. Initialize the body state. - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Body* body = bodies[i]; - - float invMass = body->invMass; - float invI = body->invI; - - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; - - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); - w = w + h * invI * body->torque; - - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); - w *= 1.0f / (1.0f + h * body->angularDamping); - - body->linearVelocity = v; - body->angularVelocity = w; - - body->deltaAngle = 0.0f; - body->deltaPosition = b2Vec2_zero; - } - - b2TracyCZoneEnd(integrate_velocity); -} - static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) { b2World* world = context->world; @@ -484,10 +521,9 @@ static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTa { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - b2World* world = context->world; b2Graph* graph = context->graph; b2GraphColor* color = graph->colors + colorIndex; - b2Body* bodies = world->bodies; + b2SolverBody* bodies = context->solverBodies; b2Constraint* constraints = color->contacts; float inv_dt = context->invTimeStep; @@ -500,25 +536,45 @@ static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTa { b2Constraint* constraint = constraints + i; - b2Body* bodyA = bodies + constraint->indexA; - b2Body* bodyB = bodies + constraint->indexB; + int32_t indexA = constraint->indexA; + b2SolverBody* bodyA; + b2Vec2 vA; + float wA; + float mA; + float iA; + b2Vec2 dpA; + float daA; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = constraint->pointCount; + if (indexA != B2_NULL_INDEX) + { + bodyA = bodies + indexA; + vA = bodyA->linearVelocity; + wA = bodyA->angularVelocity; + dpA = bodyA->deltaPosition; + daA = bodyA->deltaAngle; + mA = bodyA->invMass; + iA = bodyA->invI; + } + else + { + bodyA = NULL; + vA = b2Vec2_zero; + wA = 0.0f; + dpA = b2Vec2_zero; + daA = 0.0f; + mA = 0.0f; + iA = 0.0f; + } - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; + b2SolverBody* bodyB = bodies + constraint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; - const b2Vec2 dpA = bodyA->deltaPosition; - const float daA = bodyA->deltaAngle; - const b2Vec2 dpB = bodyB->deltaPosition; - const float daB = bodyB->deltaAngle; - + int32_t pointCount = constraint->pointCount; b2Vec2 normal = constraint->normal; b2Vec2 tangent = b2RightPerp(normal); float friction = constraint->friction; @@ -605,8 +661,11 @@ static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTa wB += iB * b2Cross(cp->rB, P); } - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; + if (bodyA != NULL) + { + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + } bodyB->linearVelocity = vB; bodyB->angularVelocity = wB; } @@ -618,14 +677,14 @@ static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2Sol { b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); - b2Body** bodies = context->awakeBodies; + b2SolverBody* bodies = context->solverBodies; float h = context->subStep; B2_ASSERT(startIndex <= endIndex); for (int32_t i = startIndex; i < endIndex; ++i) { - b2Body* body = bodies[i]; + b2SolverBody* body = bodies + i; body->deltaAngle += h * body->angularVelocity; body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); } @@ -638,7 +697,8 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); b2World* world = context->world; - b2Body** bodies = context->awakeBodies; + b2Body* bodies = world->bodies; + b2SolverBody* solverBodies = context->solverBodies; b2Contact* contacts = world->contacts; const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; @@ -651,10 +711,15 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv for (int32_t i = startIndex; i < endIndex; ++i) { - b2Body* body = bodies[i]; + b2SolverBody* solverBody = solverBodies + i; + + b2Body* body = bodies + solverBody->bodyIndex; + B2_ASSERT(body->solverIndex == i); - body->position = b2Add(body->position, body->deltaPosition); - body->angle += body->deltaAngle; + body->linearVelocity = solverBody->linearVelocity; + body->angularVelocity = solverBody->angularVelocity; + body->position = b2Add(body->position, solverBody->deltaPosition); + body->angle += solverBody->deltaAngle; body->transform.q = b2MakeRot(body->angle); body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); @@ -854,6 +919,14 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex B2_ASSERT(syncIndex > 0); int previousSyncIndex = syncIndex - 1; + if (stage->type == b2_stagePrepareContacts) + { + if (stage->blockCount > 0 && stage->blocks[0].syncIndex > 1) + { + stage->type += 0; + } + } + b2ExecuteStage(stage, context, workerIndex, previousSyncIndex, syncIndex, 0); while (atomic_load(&stage->completionCount) != blockCount) @@ -899,6 +972,17 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo b2_stageStoreImpulses */ + if (stages[3].type == b2_stagePrepareContacts) + { + for (int32_t i = 0; i < stages[3].blockCount; ++i) + { + if (stages[3].blocks[i].syncIndex > 0) + { + i += 0; + } + } + } + int32_t bodySyncIndex = 1; int32_t stageIndex = 0; uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; @@ -907,16 +991,45 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo stageIndex += 1; bodySyncIndex += 1; + if (stages[3].type == b2_stagePrepareContacts) + { + for (int32_t i = 0; i < stages[3].blockCount; ++i) + { + if (stages[3].blocks[i].syncIndex > 0) + { + i += 0; + } + } + } + // TODO_ERIN single threaded B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); b2PrepareJointsTask(context); stageIndex += 1; + if (stages[3].type == b2_stagePrepareContacts) + { + for (int32_t i = 0; i < stages[3].blockCount; ++i) + { + if (stages[3].blocks[i].syncIndex > 0) + { + i += 0; + } + } + } + int32_t graphSyncIndex = 1; for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { syncBits = (graphSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); + for (int32_t i = 0; i < stages[stageIndex].blockCount; ++i) + { + if (stages[stageIndex].blocks[i].syncIndex > 0) + { + i += 0; + } + } b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); stageIndex += 1; } @@ -1061,7 +1174,8 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) } b2Body* bodies = world->bodies; - b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "body pointers"); + b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); + b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) { @@ -1073,8 +1187,13 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2Body* body = bodies + bodyIndex; B2_ASSERT(b2ObjectValid(&body->object)); - awakeBodies[index++] = body; + awakeBodies[index] = body; + + // cache miss bodyIndex = body->islandNext; + body->solverIndex = index; + + index += 1; } } B2_ASSERT(index == awakeBodyCount); @@ -1175,7 +1294,11 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); - b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); + //b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); + + B2_ASSERT(graphBlockCount <= 32); + static b2SolverBlock graphBlocks[32]; + b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); for (int32_t i = 0; i < bodyBlockCount; ++i) @@ -1199,7 +1322,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2SolverBlock* block = baseGraphBlock + j; block->startIndex = j * blockSize; block->endIndex = block->startIndex + blockSize; - block->syncIndex = 0; + atomic_store(&block->syncIndex, 0); } baseGraphBlock[blockCount - 1].endIndex = colorConstraintCounts[i]; @@ -1214,7 +1337,11 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) block->endIndex = block->startIndex + storeBlockSize; block->syncIndex = 0; } - storeBlocks[storeBlockCount - 1].endIndex = constraintCount; + + if (storeBlockCount > 0) + { + storeBlocks[storeBlockCount - 1].endIndex = constraintCount; + } b2SolverStage* stage = stages; @@ -1307,6 +1434,14 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stage->completionCount = 0; stage += 1; + for (int32_t i = 0; i < graphBlockCount; ++i) + { + if (graphBlocks[i].syncIndex > 0) + { + i += 0; + } + } + B2_ASSERT((int32_t)(stage - stages) == stageCount); B2_ASSERT(workerCount <= 16); @@ -1318,6 +1453,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) context.stepContext = stepContext; context.world = world; context.awakeBodies = awakeBodies; + context.solverBodies = solverBodies; context.graph = graph; context.constraints = constraints; context.activeColorCount = activeColorCount; @@ -1330,24 +1466,49 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) context.invTimeStep = stepContext->inv_dt; context.subStep = context.timeStep / velIters; context.invSubStep = velIters * stepContext->inv_dt; - context.syncBits = 0; + atomic_store(&context.syncBits, 0); b2TracyCZoneEnd(prepare_stages); + for (int32_t i = 0; i < graphBlockCount; ++i) + { + if (graphBlocks[i].syncIndex > 0) + { + i += 0; + } + } + // TODO_ERIN use workerIndex or threadIndex? for (int32_t i = 0; i < workerCount; ++i) { + for (int32_t j = 0; j < graphBlockCount; ++j) + { + if (graphBlocks[j].syncIndex > 0) + { + j += 0; + } + } + workerContext[i].context = &context; workerContext[i].workerIndex = i; world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); } + for (int32_t i = 0; i < graphBlockCount; ++i) + { + if (graphBlocks[i].syncIndex > 0) + { + i += 0; + } + } + world->finishAllTasksFcn(world->userTaskContext); b2FreeStackItem(world->stackAllocator, storeBlocks); - b2FreeStackItem(world->stackAllocator, graphBlocks); + //b2FreeStackItem(world->stackAllocator, graphBlocks); b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); b2FreeStackItem(world->stackAllocator, constraints); + b2FreeStackItem(world->stackAllocator, solverBodies); b2FreeStackItem(world->stackAllocator, awakeBodies); } From 96e1eea6557cd4fc7d5d97c9a4c828db8e1e20f9 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 20 Sep 2023 23:18:56 -0700 Subject: [PATCH 25/51] fix worker re-entrance on thread 0 --- samples/collection/benchmark_pyramid.cpp | 8 +- samples/sample.cpp | 4 +- src/graph.c | 138 +++++++---------------- src/stack_allocator.c | 1 - 4 files changed, 49 insertions(+), 102 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index dae0ad84..37e3e228 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,9 +21,9 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 3; - m_rowCount = g_sampleDebug ? 1 : 13; - m_columnCount = g_sampleDebug ? 1 : 14; + m_baseCount = 60; + m_rowCount = g_sampleDebug ? 4 : 4; + m_columnCount = g_sampleDebug ? 4 : 4; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; @@ -57,7 +57,7 @@ class BenchmarkPyramid : public Sample float h = m_extent - m_round; b2Polygon cuboid = b2MakeRoundedBox(h, h, m_round); - float shift = 0.8f * h; + float shift = 1.0f * h; for (int32_t i = 0; i < m_baseCount; ++i) { diff --git a/samples/sample.cpp b/samples/sample.cpp index 8c61f29c..6e27e5fe 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -63,7 +63,7 @@ Sample::Sample(const Settings& settings) b2Vec2 gravity = {0.0f, -10.0f}; // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = 8; //enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = 8;// enki::GetNumHardwareThreads() / 2; m_scheduler.Initialize(maxThreads); m_taskCount = 0; @@ -355,7 +355,7 @@ void Sample::Step(Settings& settings) { // graph color g_draw.DrawPoint(point->position, 5.0f, b2MakeColor(colors[point->color], 1.0f)); - g_draw.DrawString(point->position, "%d", point->color); + //g_draw.DrawString(point->position, "%d", point->color); } else if (point->separation > b2_linearSlop) { diff --git a/src/graph.c b/src/graph.c index 21533563..d5f27682 100644 --- a/src/graph.c +++ b/src/graph.c @@ -115,6 +115,9 @@ typedef struct b2WorkerContext int32_t workerIndex; } b2WorkerContext; +// TODO_ERIN debugggin +_Atomic int firstWorkerEnterCount = 0; + void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) { bodyCapacity = B2_MAX(bodyCapacity, 8); @@ -372,7 +375,7 @@ static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2Solver // 30 is a bit soft, 60 oscillates too much // const float contactHertz = 45.0f; // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 45.0f; + const float contactHertz = 30.0f; float h = context->timeStep; bool enableWarmStarting = world->enableWarmStarting; @@ -692,7 +695,7 @@ static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2Sol b2TracyCZoneEnd(integrate_positions); } -static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, uint32_t threadIndex) +static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t workerIndex) { b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); @@ -702,8 +705,8 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2Contact* contacts = world->contacts; const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + b2BitSet* awakeContactBitSet = &world->taskContextArray[workerIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[workerIndex].shapeBitSet; B2_ASSERT(startIndex <= endIndex); B2_ASSERT(startIndex <= world->bodyPool.capacity); @@ -789,7 +792,7 @@ static void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTa b2TracyCZoneEnd(store_impulses); } -static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, uint32_t threadIndex) +static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) { b2SolverStageType type = stage->type; @@ -812,7 +815,7 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stageFinalizePositions: - b2FinalizePositionsTask(startIndex, endIndex, context, threadIndex); + b2FinalizePositionsTask(startIndex, endIndex, context, workerIndex); break; case b2_stageCalmContacts: @@ -837,8 +840,7 @@ static inline int32_t GetWorkerStartIndex(int32_t workerIndex, int32_t blockCoun return blocksPerWorker * workerIndex + B2_MIN(remainder, workerIndex); } -static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, int32_t workerIndex, int previousSyncIndex, int syncIndex, - uint32_t threadIndex) +static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, int previousSyncIndex, int syncIndex, int32_t workerIndex) { int32_t completedCount = 0; b2SolverBlock* blocks = stage->blocks; @@ -859,9 +861,11 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i // Caution: this can change expectedSyncIndex while (atomic_compare_exchange_strong(&blocks[blockIndex].syncIndex, &expectedSyncIndex, syncIndex) == true) { + B2_ASSERT(stage->type != b2_stagePrepareContacts || syncIndex < 2); + B2_ASSERT(completedCount < blockCount); - b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, threadIndex); + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, workerIndex); completedCount += 1; blockIndex += 1; @@ -891,7 +895,7 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i break; } - b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, threadIndex); + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, workerIndex); completedCount += 1; blockIndex -= 1; } @@ -899,7 +903,7 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i (void)atomic_fetch_add(&stage->completionCount, completedCount); } -static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* context, int32_t workerIndex, uint32_t syncBits) +static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t syncBits) { int32_t blockCount = stage->blockCount; if (blockCount == 0) @@ -927,7 +931,7 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex } } - b2ExecuteStage(stage, context, workerIndex, previousSyncIndex, syncIndex, 0); + b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, 0); while (atomic_load(&stage->completionCount) != blockCount) { @@ -938,10 +942,12 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex } } -void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) +// This should not use the thread index because thread 0 should not be called twice, which is possible with work stealing. +void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) { B2_MAYBE_UNUSED(startIndex); B2_MAYBE_UNUSED(endIndex); + B2_MAYBE_UNUSED(threadIndexDontUse); b2WorkerContext* workerContext = taskContext; int32_t workerIndex = workerContext->workerIndex; @@ -949,8 +955,11 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo int32_t activeColorCount = context->activeColorCount; b2SolverStage* stages = context->stages; - if (threadIndex == 0) + if (workerIndex == 0) { + B2_ASSERT(atomic_load(&firstWorkerEnterCount) == 0); + atomic_fetch_add(&firstWorkerEnterCount, 1); + // Main thread synchronizes the workers and does work itself. // // Stages are re-used for loops so that I don't need more stages for large iteration counts. @@ -976,10 +985,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo { for (int32_t i = 0; i < stages[3].blockCount; ++i) { - if (stages[3].blocks[i].syncIndex > 0) - { - i += 0; - } + B2_ASSERT(stages[3].blocks[i].syncIndex == 0); } } @@ -987,50 +993,21 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo int32_t stageIndex = 0; uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageIntegrateVelocities); - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); stageIndex += 1; bodySyncIndex += 1; - if (stages[3].type == b2_stagePrepareContacts) - { - for (int32_t i = 0; i < stages[3].blockCount; ++i) - { - if (stages[3].blocks[i].syncIndex > 0) - { - i += 0; - } - } - } - // TODO_ERIN single threaded B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); b2PrepareJointsTask(context); stageIndex += 1; - if (stages[3].type == b2_stagePrepareContacts) - { - for (int32_t i = 0; i < stages[3].blockCount; ++i) - { - if (stages[3].blocks[i].syncIndex > 0) - { - i += 0; - } - } - } - int32_t graphSyncIndex = 1; for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { syncBits = (graphSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); - for (int32_t i = 0; i < stages[stageIndex].blockCount; ++i) - { - if (stages[stageIndex].blocks[i].syncIndex > 0) - { - i += 0; - } - } - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); stageIndex += 1; } graphSyncIndex += 1; @@ -1049,14 +1026,14 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo { syncBits = (graphSyncIndex << 16) | iterStageIndex; B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveContacts); - b2ExecuteMainStage(stages + iterStageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); iterStageIndex += 1; } graphSyncIndex += 1; B2_ASSERT(stages[iterStageIndex].type == b2_stageIntegratePositions); syncBits = (bodySyncIndex << 16) | iterStageIndex; - b2ExecuteMainStage(stages + iterStageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); bodySyncIndex += 1; } @@ -1064,7 +1041,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo syncBits = (bodySyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageFinalizePositions); - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); stageIndex += 1; int32_t calmIterations = context->calmIterations; @@ -1081,7 +1058,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo { syncBits = (graphSyncIndex << 16) | iterStageIndex; B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmContacts); - b2ExecuteMainStage(stages + iterStageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); iterStageIndex += 1; } graphSyncIndex += 1; @@ -1092,8 +1069,9 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo uint32_t constraintSyncIndex = 1; syncBits = (constraintSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); - b2ExecuteMainStage(stages + stageIndex, context, workerIndex, syncBits); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + // Signal workers to finish atomic_store(&context->syncBits, UINT_MAX); B2_ASSERT(stageIndex + 1 == context->stageCount); @@ -1128,7 +1106,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, vo int32_t previousSyncIndex = syncIndex - 1; b2SolverStage* stage = stages + stageIndex; - b2ExecuteStage(stage, context, workerIndex, previousSyncIndex, syncIndex, threadIndex); + b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, workerIndex); lastSyncBits = syncBits; } @@ -1176,6 +1154,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2Body* bodies = world->bodies; b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); + int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) { @@ -1201,8 +1180,8 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) int32_t workerCount = world->workerCount; const int32_t blocksPerWorker = 6; - int32_t bodyBlockSize = 1 << 3; - int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 3) + 1; + int32_t bodyBlockSize = 1 << 4; + int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 4) + 1; if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) { bodyBlockSize = awakeBodyCount / (blocksPerWorker * workerCount); @@ -1244,6 +1223,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) activeColorCount = c; b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + int32_t base = 0; for (int32_t i = 0; i < activeColorCount; ++i) @@ -1294,11 +1274,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); - //b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); - - B2_ASSERT(graphBlockCount <= 32); - static b2SolverBlock graphBlocks[32]; - + b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); for (int32_t i = 0; i < bodyBlockCount; ++i) @@ -1434,14 +1410,6 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) stage->completionCount = 0; stage += 1; - for (int32_t i = 0; i < graphBlockCount; ++i) - { - if (graphBlocks[i].syncIndex > 0) - { - i += 0; - } - } - B2_ASSERT((int32_t)(stage - stages) == stageCount); B2_ASSERT(workerCount <= 16); @@ -1466,46 +1434,26 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) context.invTimeStep = stepContext->inv_dt; context.subStep = context.timeStep / velIters; context.invSubStep = velIters * stepContext->inv_dt; - atomic_store(&context.syncBits, 0); + context.syncBits = 0; b2TracyCZoneEnd(prepare_stages); - for (int32_t i = 0; i < graphBlockCount; ++i) - { - if (graphBlocks[i].syncIndex > 0) - { - i += 0; - } - } + atomic_store(&firstWorkerEnterCount, 0); // TODO_ERIN use workerIndex or threadIndex? for (int32_t i = 0; i < workerCount; ++i) { - for (int32_t j = 0; j < graphBlockCount; ++j) - { - if (graphBlocks[j].syncIndex > 0) - { - j += 0; - } - } - workerContext[i].context = &context; workerContext[i].workerIndex = i; world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); } - for (int32_t i = 0; i < graphBlockCount; ++i) - { - if (graphBlocks[i].syncIndex > 0) - { - i += 0; - } - } - world->finishAllTasksFcn(world->userTaskContext); + atomic_store(&firstWorkerEnterCount, 0); + b2FreeStackItem(world->stackAllocator, storeBlocks); - //b2FreeStackItem(world->stackAllocator, graphBlocks); + b2FreeStackItem(world->stackAllocator, graphBlocks); b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); b2FreeStackItem(world->stackAllocator, constraints); diff --git a/src/stack_allocator.c b/src/stack_allocator.c index 55bc2766..a8412651 100644 --- a/src/stack_allocator.c +++ b/src/stack_allocator.c @@ -79,7 +79,6 @@ void* b2AllocateStackItem(b2StackAllocator* alloc, int32_t size, const char* nam } b2Array_Push(alloc->entries, entry); - return entry.data; } From 1323884116e2c0e4292acb9ed7e0ebe9733bc292 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 22 Sep 2023 23:23:18 -0700 Subject: [PATCH 26/51] wip --- include/box2d/box2d.h | 1 - include/box2d/manifold.h | 9 +- samples/collection/benchmark_pyramid.cpp | 4 +- samples/sample.cpp | 5 +- src/body.c | 1 - src/body.h | 4 +- src/broad_phase.c | 1 + src/contact.c | 28 +- src/contact_solver.c | 1371 ++++++++++------------ src/contact_solver.h | 63 +- src/graph.c | 464 +------- src/graph.h | 9 +- src/island.c | 450 ------- src/island.h | 5 - src/joint.c | 57 +- src/joint.h | 44 +- src/revolute_joint.c | 287 +---- src/solver_data.h | 71 +- src/world.c | 91 +- 19 files changed, 828 insertions(+), 2137 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index f6845241..f4ee3d06 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -27,7 +27,6 @@ BOX2D_API void b2DestroyWorld(b2WorldId worldId); /// @param velocityIterations for the velocity constraint solver. /// @param positionIterations for the position constraint solver. BOX2D_API void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations); -BOX2D_API void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations); /// Call this to draw shapes and other debug draw data. This is intentionally non-const. BOX2D_API void b2World_Draw(b2WorldId worldId, b2DebugDraw* debugDraw); diff --git a/include/box2d/manifold.h b/include/box2d/manifold.h index d5242e48..5f05db76 100644 --- a/include/box2d/manifold.h +++ b/include/box2d/manifold.h @@ -24,11 +24,8 @@ typedef struct b2ManifoldPoint /// world coordinates of contact point b2Vec2 point; - // Friction anchors - b2Vec2 localAnchorA; - b2Vec2 localAnchorB; - b2Vec2 localNormalA; - b2Vec2 localNormalB; + /// Body anchors used by solver + b2Vec2 anchorA, anchorB; /// the separation of the contact point, negative if penetrating float separation; @@ -52,8 +49,6 @@ typedef struct b2Manifold b2ManifoldPoint points[2]; b2Vec2 normal; int32_t pointCount; - int32_t constraintIndex; - bool frictionPersisted; } b2Manifold; static const b2Manifold b2_emptyManifold = {0}; diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 37e3e228..d86415fb 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -22,8 +22,8 @@ class BenchmarkPyramid : public Sample m_extent = 0.5f; m_round = 0.0f; m_baseCount = 60; - m_rowCount = g_sampleDebug ? 4 : 4; - m_columnCount = g_sampleDebug ? 4 : 4; + m_rowCount = g_sampleDebug ? 4 : 1; + m_columnCount = g_sampleDebug ? 4 : 1; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/samples/sample.cpp b/samples/sample.cpp index 6e27e5fe..c1fed0b3 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -63,7 +63,7 @@ Sample::Sample(const Settings& settings) b2Vec2 gravity = {0.0f, -10.0f}; // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = 8;// enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = 16;// enki::GetNumHardwareThreads() / 2; m_scheduler.Initialize(maxThreads); m_taskCount = 0; @@ -89,7 +89,7 @@ Sample::Sample(const Settings& settings) // m_world->SetContactListener(this); // TODO_ERIN too expensive - b2World_SetPreSolveCallback(m_worldId, PreSolveFcn, this); + //b2World_SetPreSolveCallback(m_worldId, PreSolveFcn, this); m_stepCount = 0; @@ -428,7 +428,6 @@ bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifo cp->normalImpulse = manifold->points[j].normalImpulse; cp->tangentImpulse = manifold->points[j].tangentImpulse; cp->persisted = manifold->points[j].persisted; - cp->constraintIndex = manifold->constraintIndex; cp->color = color; ++j; } diff --git a/src/body.c b/src/body.c index 60dce6eb..1b6bea0e 100644 --- a/src/body.c +++ b/src/body.c @@ -76,7 +76,6 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->islandIndex = B2_NULL_INDEX; b->islandPrev = B2_NULL_INDEX; b->islandNext = B2_NULL_INDEX; - b->solverIndex = B2_NULL_INDEX; if (b->type != b2_staticBody) { diff --git a/src/body.h b/src/body.h index feafa37f..ed0c1730 100644 --- a/src/body.h +++ b/src/body.h @@ -59,8 +59,6 @@ typedef struct b2Body int32_t islandPrev; int32_t islandNext; - int32_t solverIndex; - float mass, invMass; // Rotational inertia about the center of mass. @@ -85,6 +83,8 @@ typedef struct b2Body bool enlargeAABB; } b2Body; +// TODO_ERIN every non-static body gets a solver body. No solver bodies for static bodies to avoid cross thread sharing and the cache misses they bring. +// Keep two solver body arrays: awake and sleeping typedef struct b2SolverBody { b2Vec2 linearVelocity; diff --git a/src/broad_phase.c b/src/broad_phase.c index d6dd280f..3065bd7c 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -234,6 +234,7 @@ static bool b2PairQueryCallback(int32_t proxyId, int32_t shapeIndex, void* conte return true; } + // TODO_ERIN per thread to eliminate atomic? int pairIndex = atomic_fetch_add(&bp->movePairIndex, 1); b2MovePair* pair; diff --git a/src/contact.c b/src/contact.c index f3e38fa2..bc46e711 100644 --- a/src/contact.c +++ b/src/contact.c @@ -413,21 +413,13 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body touching = contact->manifold.pointCount > 0; - contact->manifold.frictionPersisted = true; - - if (contact->manifold.pointCount != oldManifold.pointCount) - { - contact->manifold.frictionPersisted = false; - } - - // TODO_ERIN testing - contact->manifold.constraintIndex = oldManifold.constraintIndex; - // Match old contact ids to new contact ids and copy the // stored impulses to warm start the solver. for (int32_t i = 0; i < contact->manifold.pointCount; ++i) { b2ManifoldPoint* mp2 = contact->manifold.points + i; + mp2->anchorA = b2Sub(mp2->point, bodyA->position); + mp2->anchorB = b2Sub(mp2->point, bodyB->position); mp2->normalImpulse = 0.0f; mp2->tangentImpulse = 0.0f; mp2->persisted = false; @@ -439,28 +431,12 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body if (mp1->id == id2) { - mp2->localNormalA = mp1->localNormalA; - mp2->localNormalB = mp1->localNormalB; - mp2->localAnchorA = mp1->localAnchorA; - mp2->localAnchorB = mp1->localAnchorB; - mp2->normalImpulse = mp1->normalImpulse; mp2->tangentImpulse = mp1->tangentImpulse; mp2->persisted = true; break; } } - - if (mp2->persisted == false) - { - contact->manifold.frictionPersisted = false; - } - - // For debugging ids - // if (mp2->persisted == false && contact->manifold.pointCount == oldManifold.pointCount) - //{ - // i += 0; - //} } if (touching && world->preSolveFcn) diff --git a/src/contact_solver.c b/src/contact_solver.c index f85f1b0e..b33b7eda 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -7,904 +7,735 @@ #include "body.h" #include "contact.h" #include "core.h" -#include "stack_allocator.h" +#include "graph.h" #include "world.h" -// Solver debugging is normally disabled because the block solver sometimes has to deal with a poorly conditioned -// effective mass matrix. -#define B2_DEBUG_SOLVER 0 +#define maxBaumgarteVelocity 3.0f -typedef struct b2VelocityConstraintPoint +// TODO_ERIN prepare contact constraints directly in collision phase? +void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { - b2Vec2 rA; - b2Vec2 rB; - float normalImpulse; - float tangentImpulse; - float normalMass; - float tangentMass; - float velocityBias; - float relativeVelocity; -} b2VelocityConstraintPoint; - -typedef struct b2ContactVelocityConstraint -{ - b2Contact* contact; - b2VelocityConstraintPoint points[2]; - b2Vec2 normal; - b2Mat22 normalMass; - b2Mat22 K; - float friction; - float restitution; - float tangentSpeed; - int32_t pointCount; -} b2ContactVelocityConstraint; - -typedef struct b2ContactPositionConstraint -{ - b2Contact* contact; - b2Vec2 localAnchorsA[2]; - b2Vec2 localAnchorsB[2]; - float separations[2]; - float lambdas[2]; - b2Vec2 normal; - int32_t pointCount; -} b2ContactPositionConstraint; - -b2ContactSolver* b2CreateContactSolver(b2ContactSolverDef* def) -{ - b2StackAllocator* alloc = def->world->stackAllocator; - - b2ContactSolver* solver = b2AllocateStackItem(alloc, sizeof(b2ContactSolver), "contact solver"); - solver->context = def->context; - solver->contactList = def->contactList; - solver->contactCount = def->contactCount; - - // These are allocated conservatively because some island contacts may not have contact points - solver->positionConstraints = - b2AllocateStackItem(alloc, solver->contactCount * sizeof(b2ContactPositionConstraint), "position constraints"); - solver->velocityConstraints = - b2AllocateStackItem(alloc, solver->contactCount * sizeof(b2ContactVelocityConstraint), "velocity constraints"); - - solver->world = def->world; - solver->constraintCount = 0; - return solver; -} + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); -void b2ContactSolver_Initialize(b2ContactSolver* solver) -{ - b2World* world = solver->world; + b2World* world = context->world; + b2Graph* graph = context->graph; + b2GraphColor* color = graph->colors + colorIndex; + int32_t* contactIndices = color->contactArray; b2Contact* contacts = world->contacts; - const b2StepContext* context = solver->context; - b2Body* bodies = world->bodies; + const int32_t* bodyMap = context->bodyMap; + b2SolverBody* solverBodies = context->solverBodies; - // Initialize position independent portions of the constraints. - int32_t constraintCount = 0; - int32_t contactIndex = solver->contactList; - while (contactIndex != B2_NULL_INDEX) + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 30.0f; + + float h = context->timeStep; + bool enableWarmStarting = world->enableWarmStarting; + + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + + for (int32_t i = startIndex; i < endIndex; ++i) { - b2Contact* contact = contacts + contactIndex; - contactIndex = contact->islandNext; + b2Contact* contact = contacts + contactIndices[i]; const b2Manifold* manifold = &contact->manifold; int32_t pointCount = manifold->pointCount; - if (pointCount == 0) + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; + + b2ContactConstraint* constraint = color->contactConstraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + + b2SolverBody* solverBodyA; + b2Vec2 vA; + float wA; + float mA; + float iA; + + float hertz; + + if (indexA == B2_NULL_INDEX) { - continue; + solverBodyA = NULL; + vA.x = vA.y = 0.0; + wA = 0.0f; + mA = 0.0f; + iA = 0.0f; + hertz = 2.0f * contactHertz; + constraint->type = pointCount == 1 ? b2_onePointStaticType : b2_twoPointStaticType; } + else + { + solverBodyA = solverBodies + indexA; + vA = solverBodyA->linearVelocity; + wA = solverBodyA->angularVelocity; + mA = solverBodyA->invMass; + iA = solverBodyA->invI; + hertz = contactHertz; + constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; + } + + B2_ASSERT(indexB != B2_NULL_INDEX); + b2SolverBody* solverBodyB = solverBodies + indexB; + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; - - b2ContactVelocityConstraint* vc = solver->velocityConstraints + constraintCount; - vc->contact = contact; - vc->normal = manifold->normal; - vc->friction = contact->friction; - vc->restitution = contact->restitution; - vc->tangentSpeed = contact->tangentSpeed; - vc->pointCount = pointCount; - vc->K = b2Mat22_zero; - vc->normalMass = b2Mat22_zero; - - b2ContactPositionConstraint* pc = solver->positionConstraints + constraintCount; - pc->contact = contact; - pc->normal = manifold->normal; - pc->pointCount = pointCount; - - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Rot qA = bodyA->transform.q; - b2Vec2 cA = bodyA->position; - b2Rot qB = bodyB->transform.q; - b2Vec2 cB = bodyB->position; - - // TODO_ERIN testing - // qA = b2MakeRot(bodyA->angle); - // qB = b2MakeRot(bodyB->angle); - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float c = h * omega * (2.0f * zeta + h * omega); + constraint->impulseCoefficient = 1.0f / (1.0f + c); + constraint->massCoefficient = c * constraint->impulseCoefficient; + constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); for (int32_t j = 0; j < pointCount; ++j) { - const b2ManifoldPoint* cp = manifold->points + j; - b2VelocityConstraintPoint* vcp = vc->points + j; + const b2ManifoldPoint* mp = manifold->points + j; + b2ContactConstraintPoint* cp = constraint->points + j; - if (context->enableWarmStarting) - { - vcp->normalImpulse = context->dtRatio * cp->normalImpulse; - vcp->tangentImpulse = context->dtRatio * cp->tangentImpulse; - } - else - { - vcp->normalImpulse = 0.0f; - vcp->tangentImpulse = 0.0f; - } + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; - vcp->rA = b2Sub(cp->point, cA); - vcp->rB = b2Sub(cp->point, cB); - - float rnA = b2Cross(vcp->rA, vc->normal); - float rnB = b2Cross(vcp->rB, vc->normal); + cp->rA = mp->anchorA; + cp->rB = mp->anchorB; + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - vcp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - - b2Vec2 tangent = b2CrossVS(vc->normal, 1.0f); + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - float rtA = b2Cross(vcp->rA, tangent); - float rtB = b2Cross(vcp->rB, tangent); + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + cp->separation = mp->separation; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + // Warm start + if (enableWarmStarting) + { + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + } - vcp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + if (solverBodyA != NULL) + { + solverBodyA->linearVelocity = vA; + solverBodyA->angularVelocity = wA; + } + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; + } - // Velocity bias for speculative collision - vcp->velocityBias = -B2_MAX(0.0f, cp->separation * context->inv_dt); + b2TracyCZoneEnd(prepare_contact); +} - // Relative velocity - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - vcp->relativeVelocity = b2Dot(vc->normal, b2Sub(vrB, vrA)); +static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) +{ + b2SolverBody* bodyA = bodies + constraint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 dpA = bodyA->deltaPosition; + float daA = bodyA->deltaAngle; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; - pc->localAnchorsA[j] = b2InvRotateVector(qA, vcp->rA); - pc->localAnchorsB[j] = b2InvRotateVector(qB, vcp->rB); - pc->separations[j] = cp->separation; - pc->lambdas[j] = 0.0f; + { + b2ContactConstraintPoint* cp = constraint->points + 0; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; } - - // If we have two points, then prepare the block solver. - if (vc->pointCount == 2) + else if (useBias) { - b2VelocityConstraintPoint* vcp1 = vc->points + 0; - b2VelocityConstraintPoint* vcp2 = vc->points + 1; + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; + } - float rn1A = b2Cross(vcp1->rA, vc->normal); - float rn1B = b2Cross(vcp1->rB, vc->normal); - float rn2A = b2Cross(vcp2->rA, vc->normal); - float rn2B = b2Cross(vcp2->rB, vc->normal); + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - float k11 = mA + mB + iA * rn1A * rn1A + iB * rn1B * rn1B; - float k22 = mA + mB + iA * rn2A * rn2A + iB * rn2B * rn2B; - float k12 = mA + mB + iA * rn1A * rn2A + iB * rn1B * rn2B; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - // Ensure a reasonable condition number. - const float k_maxConditionNumber = 1000.0f; - if (k11 * k11 < k_maxConditionNumber * (k11 * k22 - k12 * k12)) - { - // K is safe to invert. - vc->K.cx = (b2Vec2){k11, k12}; - vc->K.cy = (b2Vec2){k12, k22}; - vc->normalMass = b2GetInverse22(vc->K); - } - else - { - // The constraints are redundant, just use one. - // TODO_ERIN use deepest? - vc->pointCount = 1; - } - } + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - constraintCount += 1; + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } - solver->constraintCount = constraintCount; - - // Warm start - if (context->enableWarmStarting) { - for (int32_t i = 0; i < constraintCount; ++i) - { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; + b2ContactConstraintPoint* cp = constraint->points + 0; - const b2Contact* contact = vc->contact; + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = vc->pointCount; + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - b2Vec2 normal = vc->normal; - b2Vec2 tangent = b2CrossVS(normal, 1.0f); + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - for (int32_t j = 0; j < pointCount; ++j) - { - b2VelocityConstraintPoint* vcp = vc->points + j; - b2Vec2 P = b2Add(b2MulSV(vcp->normalImpulse, normal), b2MulSV(vcp->tangentImpulse, tangent)); - wA -= iA * b2Cross(vcp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(vcp->rB, P); - vB = b2MulAdd(vB, mB, P); - } + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } -void b2ContactSolver_SolveVelocityConstraints(b2ContactSolver* solver) +static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) { - int32_t count = solver->constraintCount; - - b2World* world = solver->world; - b2Body* bodies = world->bodies; + b2SolverBody* bodyA = bodies + constraint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 dpA = bodyA->deltaPosition; + float daA = bodyA->deltaAngle; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; - for (int32_t i = 0; i < count; ++i) { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; + b2ContactConstraintPoint* cp = constraint->points + 0; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; + } - const b2Contact* contact = vc->contact; + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = vc->pointCount; + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - b2Vec2 normal = vc->normal; - b2Vec2 tangent = b2CrossVS(normal, 1.0f); - float friction = vc->friction; + { + b2ContactConstraintPoint* cp = constraint->points + 1; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; + } - B2_ASSERT(pointCount == 1 || pointCount == 2); + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - // Solve tangent constraints first because non-penetration is more important - // than friction. - for (int32_t j = 0; j < pointCount; ++j) - { - b2VelocityConstraintPoint* vcp = vc->points + j; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - // Compute tangent force - float vt = b2Dot(dv, tangent) - vc->tangentSpeed; - float lambda = vcp->tangentMass * (-vt); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - // Clamp the accumulated force - float maxFriction = friction * vcp->normalImpulse; - float newImpulse = B2_CLAMP(vcp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - vcp->tangentImpulse; - vcp->tangentImpulse = newImpulse; + { + b2ContactConstraintPoint* cp = constraint->points + 0; - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(vcp->rA, P); + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(vcp->rB, P); - } + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - // Solve normal constraints - if (pointCount == 1) - { - for (int32_t j = 0; j < pointCount; ++j) - { - b2VelocityConstraintPoint* vcp = vc->points + j; + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - // Compute normal impulse - float vn = b2Dot(dv, normal); - float lambda = -vcp->normalMass * (vn - vcp->velocityBias); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - // Clamp the accumulated impulse - float newImpulse = B2_MAX(vcp->normalImpulse + lambda, 0.0f); - lambda = newImpulse - vcp->normalImpulse; - vcp->normalImpulse = newImpulse; + { + b2ContactConstraintPoint* cp = constraint->points + 1; - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(vcp->rA, P); + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(vcp->rB, P); - } - } - else - { - // Block solver developed in collaboration with Dirk Gregorius (back in 01/07 on Box2D_Lite). - // Build the mini LCP for this contact patch - // - // vn = A * x + b, vn >= 0, x >= 0 and vn_i * x_i = 0 with i = 1..2 - // - // A = J * W * JT and J = ( -n, -r1 x n, n, r2 x n ) - // b = vn0 - velocityBias - // - // The system is solved using the "Total enumeration method" (s. Murty). The complementary constraint vn_i * - // x_i implies that we must have in any solution either vn_i = 0 or x_i = 0. So for the 2D contact problem - // the cases vn1 = 0 and vn2 = 0, x1 = 0 and x2 = 0, x1 = 0 and vn2 = 0, x2 = 0 and vn1 = 0 need to be - // tested. The first valid solution that satisfies the problem is chosen. - // - // In order to account of the accumulated impulse 'a' (because of the iterative nature of the solver which - // only requires that the accumulated impulse is clamped and not the incremental impulse) we change the - // impulse variable (x_i). - // - // Substitute: - // - // x = a + d - // - // a := old total impulse - // x := new total impulse - // d := incremental impulse - // - // For the current iteration we extend the formula for the incremental impulse - // to compute the new total impulse: - // - // vn = A * d + b - // = A * (x - a) + b - // = A * x + b - A * a - // = A * x + b' - // b' = b - A * a; - - b2VelocityConstraintPoint* cp1 = vc->points + 0; - b2VelocityConstraintPoint* cp2 = vc->points + 1; - - b2Vec2 a = {cp1->normalImpulse, cp2->normalImpulse}; - B2_ASSERT(a.x >= 0.0f && a.y >= 0.0f); - - // Relative velocity at contact - b2Vec2 vrA, vrB; - vrA = b2Add(vA, b2CrossSV(wA, cp1->rA)); - vrB = b2Add(vB, b2CrossSV(wB, cp1->rB)); - b2Vec2 dv1 = b2Sub(vrB, vrA); - vrA = b2Add(vA, b2CrossSV(wA, cp2->rA)); - vrB = b2Add(vB, b2CrossSV(wB, cp2->rB)); - b2Vec2 dv2 = b2Sub(vrB, vrA); - - // Compute normal velocity - float vn1 = b2Dot(dv1, normal); - float vn2 = b2Dot(dv2, normal); - - b2Vec2 b = {vn1 - cp1->velocityBias, vn2 - cp2->velocityBias}; - - // Compute b' - b = b2Sub(b, b2MulMV(vc->K, a)); - - const float k_errorTol = 1e-3f; - B2_MAYBE_UNUSED(k_errorTol); - - for (;;) - { - // - // Case 1: vn = 0 - // - // 0 = A * x + b' - // - // Solve for x: - // - // x = - inv(A) * b' - // - b2Vec2 x = b2Neg(b2MulMV(vc->normalMass, b)); - - if (x.x >= 0.0f && x.y >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = b2Sub(x, a); - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); - - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); - - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; - -#if B2_DEBUG_SOLVER == 1 - // Postconditions - dv1 = vB + b2Cross(wB, cp1->rB) - vA - b2Cross(wA, cp1->rA); - dv2 = vB + b2Cross(wB, cp2->rB) - vA - b2Cross(wA, cp2->rA); - - // Compute normal velocity - vn1 = b2Dot(dv1, normal); - vn2 = b2Dot(dv2, normal); - - B2_ASSERT(b2Abs(vn1 - cp1->velocityBias) < k_errorTol); - B2_ASSERT(b2Abs(vn2 - cp2->velocityBias) < k_errorTol); -#endif - break; - } - - // - // Case 2: vn1 = 0 and x2 = 0 - // - // 0 = a11 * x1 + a12 * 0 + b1' - // vn2 = a21 * x1 + a22 * 0 + b2' - // - x.x = -cp1->normalMass * b.x; - x.y = 0.0f; - vn1 = 0.0f; - vn2 = vc->K.cx.y * x.x + b.y; - if (x.x >= 0.0f && vn2 >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = b2Sub(x, a); - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); - - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); - - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; - -#if B2_DEBUG_SOLVER == 1 - // Postconditions - dv1 = vB + b2Cross(wB, cp1->rB) - vA - b2Cross(wA, cp1->rA); - - // Compute normal velocity - vn1 = b2Dot(dv1, normal); - - B2_ASSERT(b2Abs(vn1 - cp1->velocityBias) < k_errorTol); -#endif - break; - } - - // - // Case 3: vn2 = 0 and x1 = 0 - // - // vn1 = a11 * 0 + a12 * x2 + b1' - // 0 = a21 * 0 + a22 * x2 + b2' - // - x.x = 0.0f; - x.y = -cp2->normalMass * b.y; - vn1 = vc->K.cy.x * x.y + b.x; - vn2 = 0.0f; - - if (x.y >= 0.0f && vn1 >= 0.0f) - { - // Resubstitute for the incremental impulse - b2Vec2 d = b2Sub(x, a); - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); - - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); - - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; - -#if B2_DEBUG_SOLVER == 1 - // Postconditions - dv2 = vB + b2Cross(wB, cp2->rB) - vA - b2Cross(wA, cp2->rA); - - // Compute normal velocity - vn2 = b2Dot(dv2, normal); - - B2_ASSERT(b2Abs(vn2 - cp2->velocityBias) < k_errorTol); -#endif - break; - } - - // - // Case 4: x1 = 0 and x2 = 0 - // - // vn1 = b1 - // vn2 = b2; - x.x = 0.0f; - x.y = 0.0f; - vn1 = b.x; - vn2 = b.y; - - if (vn1 >= 0.0f && vn2 >= 0.0f) - { - // Resubstitute for the incremental impulse - b2Vec2 d = b2Sub(x, a); - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); - - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); - - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; - - break; - } - - // No solution, give up. This is hit sometimes, but it doesn't seem to matter. - break; - } - } + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } -void b2ContactSolver_ApplyRestitution(b2ContactSolver* solver) +static void b2SolveContactOnePointStatic(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) { - int32_t count = solver->constraintCount; - float threshold = solver->context->restitutionThreshold; - - b2World* world = solver->world; - b2Body* bodies = world->bodies; + b2SolverBody* bodyB = bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; - for (int32_t i = 0; i < count; ++i) { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; - const b2Contact* contact = vc->contact; - - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; - - if (vc->restitution == 0.0f) + b2ContactConstraintPoint* cp = constraint->points + 0; + + // Relative velocity at contact + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + float ds = b2Dot(prB, normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) { - continue; + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; } - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = vc->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - b2Vec2 normal = vc->normal; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - for (int32_t j = 0; j < pointCount; ++j) - { - b2VelocityConstraintPoint* vcp = vc->points + j; + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - // if the normal impulse is zero then there was no collision - if (vcp->relativeVelocity > -threshold || vcp->normalImpulse == 0.0f) - { - continue; - } + { + b2ContactConstraintPoint* cp = constraint->points + 0; - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + // Relative velocity at contact + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - // Compute normal impulse - float vn = b2Dot(dv, normal); - float lambda = -vcp->normalMass * (vn + vc->restitution * vcp->relativeVelocity); + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(vcp->rA, P); + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(vcp->rB, P); - } + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } + + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } -void b2ContactSolver_StoreImpulses(b2ContactSolver* solver) +static void b2SolveContactTwoPointsStatic(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) { - int32_t count = solver->constraintCount; + b2SolverBody* bodyB = bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; - for (int32_t i = 0; i < count; ++i) { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; - b2Contact* contact = vc->contact; - - b2Manifold* manifold = &contact->manifold; - - for (int32_t j = 0; j < vc->pointCount; ++j) + b2ContactConstraintPoint* cp = constraint->points + 0; + + // Relative velocity at contact + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + float ds = b2Dot(prB, normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) { - manifold->points[j].normalImpulse = vc->points[j].normalImpulse; - manifold->points[j].tangentImpulse = vc->points[j].tangentImpulse; + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; } - } -} -bool b2ContactSolver_SolvePositionConstraintsBlock(b2ContactSolver* solver) -{ - float minSeparation = 0.0f; - int32_t count = solver->constraintCount; - float slop = b2_linearSlop; + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - b2World* world = solver->world; - b2Body* bodies = world->bodies; + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - for (int32_t i = 0; i < count; ++i) { - b2ContactPositionConstraint* pc = solver->positionConstraints + i; - const b2Contact* contact = pc->contact; + b2ContactConstraintPoint* cp = constraint->points + 1; + + // Relative velocity at contact + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + float ds = b2Dot(prB, normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; + } - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - int32_t pointCount = pc->pointCount; + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - b2Vec2 cA = bodyA->position; - float aA = bodyA->angle; - b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; + { + b2ContactConstraintPoint* cp = constraint->points + 0; - b2Vec2 normal = pc->normal; + // Relative velocity at contact + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - if (pointCount == 2) - { - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - b2Vec2 rA1 = b2RotateVector(qA, pc->localAnchorsA[0]); - b2Vec2 rB1 = b2RotateVector(qB, pc->localAnchorsB[0]); - b2Vec2 rA2 = b2RotateVector(qA, pc->localAnchorsA[1]); - b2Vec2 rB2 = b2RotateVector(qB, pc->localAnchorsB[1]); + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - // Current separation - b2Vec2 d1 = b2Sub(b2Add(cB, rB1), b2Add(cA, rA1)); - float separation1 = b2Dot(d1, normal) + pc->separations[0]; + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - b2Vec2 d2 = b2Sub(b2Add(cB, rB2), b2Add(cA, rA2)); - float separation2 = b2Dot(d2, normal) + pc->separations[1]; + { + b2ContactConstraintPoint* cp = constraint->points + 1; - // Track max constraint error. - minSeparation = B2_MIN(minSeparation, separation1); - minSeparation = B2_MIN(minSeparation, separation2); + // Relative velocity at contact + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - float C1 = B2_CLAMP(b2_baumgarte * (separation1 + slop), -b2_maxLinearCorrection, 0.0f); - float C2 = B2_CLAMP(b2_baumgarte * (separation2 + slop), -b2_maxLinearCorrection, 0.0f); + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - b2Vec2 b = {C1, C2}; + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - float rn1A = b2Cross(rA1, normal); - float rn1B = b2Cross(rB1, normal); - float rn2A = b2Cross(rA2, normal); - float rn2B = b2Cross(rB2, normal); + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } - float k11 = mA + mB + iA * rn1A * rn1A + iB * rn1B * rn1B; - float k22 = mA + mB + iA * rn2A * rn2A + iB * rn2B * rn2B; - float k12 = mA + mB + iA * rn1A * rn2A + iB * rn1B * rn2B; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; +} - b2Mat22 K, invK; +void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +{ + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - // Ensure a reasonable condition number. - const float k_maxConditionNumber = 10000.0f; - if (k11 * k11 < k_maxConditionNumber * (k11 * k22 - k12 * k12)) - { - // K is safe to invert. - K.cx = (b2Vec2){k11, k12}; - K.cy = (b2Vec2){k12, k22}; - invK = b2GetInverse22(K); - } - else - { - // The constraints are redundant, however one may be deeper than the other. - // This can happen when a capsule is deeply embedded in a box. - goto manifold_degenerate; - } + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraint* constraints = context->graph->colors[colorIndex].contactConstraints; + float inv_dt = context->invTimeStep; - const float k_errorTol = 1e-3f; - B2_MAYBE_UNUSED(k_errorTol); + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraint* constraint = constraints + i; - for (;;) - { - // - // Case 1: vn = 0 - // - // 0 = A * x + b' - // - // Solve for x: - // - // x = - inv(A) * b' - // - b2Vec2 x = b2Neg(b2MulMV(invK, b)); - - if (x.x >= 0.0f && x.y >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = x; - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - - cA = b2MulSub(cA, mA, b2Add(P1, P2)); - aA -= iA * (b2Cross(rA1, P1) + b2Cross(rA2, P2)); - - cB = b2MulAdd(cB, mB, b2Add(P1, P2)); - aB += iB * (b2Cross(rB1, P1) + b2Cross(rB2, P2)); - break; - } - - // - // Case 2: vn1 = 0 and x2 = 0 - // - // 0 = a11 * x1 + a12 * 0 + b1' - // vn2 = a21 * x1 + a22 * 0 + b2' - // - x.x = -b.x / k11; - x.y = 0.0f; - float vn2 = K.cx.y * x.x + b.y; - if (x.x >= 0.0f && vn2 >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = x; - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - - cA = b2MulSub(cA, mA, b2Add(P1, P2)); - aA -= iA * (b2Cross(rA1, P1) + b2Cross(rA2, P2)); - - cB = b2MulAdd(cB, mB, b2Add(P1, P2)); - aB += iB * (b2Cross(rB1, P1) + b2Cross(rB2, P2)); - break; - } - - // - // Case 3: vn2 = 0 and x1 = 0 - // - // vn1 = a11 * 0 + a12 * x2 + b1' - // 0 = a21 * 0 + a22 * x2 + b2' - // - x.x = 0.0f; - x.y = -b.y / k22; - float vn1 = K.cy.x * x.y + b.x; - if (x.y >= 0.0f && vn1 >= 0.0f) - { - // Resubstitute for the incremental impulse - b2Vec2 d = x; - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - - cA = b2MulSub(cA, mA, b2Add(P1, P2)); - aA -= iA * (b2Cross(rA1, P1) + b2Cross(rA2, P2)); - - cB = b2MulAdd(cB, mB, b2Add(P1, P2)); - aB += iB * (b2Cross(rB1, P1) + b2Cross(rB2, P2)); - break; - } - break; - } - } - else + switch (constraint->type) { - manifold_degenerate: - for (int32_t j = 0; j < pointCount; ++j) - { - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); + case b2_onePointType: + b2SolveContactOnePoint(constraint, bodies, inv_dt, useBias); + break; - b2Vec2 rA = b2RotateVector(qA, pc->localAnchorsA[j]); - b2Vec2 rB = b2RotateVector(qB, pc->localAnchorsB[j]); + case b2_twoPointType: + b2SolveContactTwoPoints(constraint, bodies, inv_dt, useBias); + break; - // Current separation - b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - float separation = b2Dot(d, normal) + pc->separations[j]; + case b2_onePointStaticType: + b2SolveContactOnePointStatic(constraint, bodies, inv_dt, useBias); + break; - // Track max constraint error. - minSeparation = B2_MIN(minSeparation, separation); + case b2_twoPointStaticType: + b2SolveContactTwoPointsStatic(constraint, bodies, inv_dt, useBias); + break; - // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. - // This improves stacking stability significantly. - float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); + default: + B2_ASSERT(false); + } + } - // Compute the effective mass. - float rnA = b2Cross(rA, normal); - float rnB = b2Cross(rB, normal); - float K = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + b2TracyCZoneEnd(solve_contact); +} - // Compute normal impulse - float impulse = K > 0.0f ? -C / K : 0.0f; +void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); - b2Vec2 P = b2MulSV(impulse, normal); + b2ContactConstraint* constraints = context->constraints; - cA = b2MulSub(cA, mA, P); - aA -= iA * b2Cross(rA, P); + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraint* constraint = constraints + i; + b2Contact* contact = constraint->contact; + b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; - cB = b2MulAdd(cB, mB, P); - aB += iB * b2Cross(rB, P); - } + for (int32_t j = 0; j < pointCount; ++j) + { + manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; + manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; } - - bodyA->position = cA; - bodyA->angle = aA; - bodyB->position = cB; - bodyB->angle = aB; } - // We can't expect minSpeparation >= -b2_linearSlop because we don't - // push the separation above -b2_linearSlop. - return minSeparation >= -3.0f * b2_linearSlop; + b2TracyCZoneEnd(store_impulses); } diff --git a/src/contact_solver.h b/src/contact_solver.h index 916008ac..5c7ffdf7 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -4,40 +4,41 @@ #pragma once #include "solver_data.h" -#include "stack_allocator.h" -#include "box2d/callbacks.h" +typedef struct b2Contact b2Contact; -typedef struct b2ContactSolverDef +typedef struct b2ContactConstraintPoint { - const b2StepContext* context; - struct b2World* world; - int32_t contactList; - int32_t contactCount; -} b2ContactSolverDef; - -typedef struct b2ContactSolver + b2Vec2 rA, rB; + float separation; + float normalImpulse; + float tangentImpulse; + float normalMass; + float tangentMass; +} b2ContactConstraintPoint; + +typedef enum b2ContactConstraintType { - const b2StepContext* context; - struct b2World* world; - struct b2ContactPositionConstraint* positionConstraints; - struct b2ContactVelocityConstraint* velocityConstraints; - int32_t contactList; - int32_t contactCount; - int32_t constraintCount; -} b2ContactSolver; - -b2ContactSolver* b2CreateContactSolver(b2ContactSolverDef* def); + b2_onePointType, + b2_twoPointType, + b2_onePointStaticType, + b2_twoPointStaticType, +} b2ContactConstraintType; -static inline void b2DestroyContactSolver(b2ContactSolver* solver, b2StackAllocator* alloc) +typedef struct b2ContactConstraint { - b2FreeStackItem(alloc, solver->velocityConstraints); - b2FreeStackItem(alloc, solver->positionConstraints); - b2FreeStackItem(alloc, solver); -} - -void b2ContactSolver_Initialize(b2ContactSolver* solver); -void b2ContactSolver_SolveVelocityConstraints(b2ContactSolver* solver); -void b2ContactSolver_ApplyRestitution(b2ContactSolver* solver); -void b2ContactSolver_StoreImpulses(b2ContactSolver* solver); -bool b2ContactSolver_SolvePositionConstraintsBlock(b2ContactSolver* solver); + b2Contact* contact; + int32_t indexA; + int32_t indexB; + b2ContactConstraintPoint points[2]; + b2Vec2 normal; + float friction; + float massCoefficient; + float biasCoefficient; + float impulseCoefficient; + b2ContactConstraintType type; +} b2ContactConstraint; + +void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/graph.c b/src/graph.c index d5f27682..c98cf580 100644 --- a/src/graph.c +++ b/src/graph.c @@ -7,6 +7,7 @@ #include "array.h" #include "body.h" #include "contact.h" +#include "contact_solver.h" #include "core.h" #include "joint.h" #include "shape.h" @@ -20,104 +21,12 @@ #include #include -#define maxBaumgarteVelocity 3.0f - -typedef struct -{ - b2Vec2 rA, rB; - b2Vec2 localAnchorA, localAnchorB; - float separation; - float normalImpulse; - float tangentImpulse; - float normalMass; - float tangentMass; -} b2ConstraintPoint; - -typedef struct b2Constraint -{ - b2Contact* contact; - int32_t indexA; - int32_t indexB; - b2ConstraintPoint points[2]; - b2Vec2 normal; - float friction; - float massCoefficient; - float biasCoefficient; - float impulseCoefficient; - int32_t pointCount; -} b2Constraint; - -typedef enum -{ - b2_stageIntegrateVelocities = 0, - b2_stagePrepareJoints, - b2_stagePrepareContacts, - b2_stageSolveJoints, - b2_stageSolveContacts, - b2_stageIntegratePositions, - b2_stageFinalizePositions, - b2_stageCalmJoints, - b2_stageCalmContacts, - b2_stageStoreImpulses -} b2SolverStageType; - -// Each block of work has a sync index that gets incremented when a worker claims the block. This ensures only a single worker claims a -// block, yet lets work be distributed dynamically across multiple workers (work stealing). This also reduces contention on a single block -// index atomic. For non-iterative stages the sync index is simply set to one. For iterative stages (solver iteration) the same block of -// work is executed once per iteration and the atomic sync index is shared across iterations, so it increases monotonically. -typedef struct -{ - int32_t startIndex; - int32_t endIndex; - _Atomic int syncIndex; -} b2SolverBlock; - -// Each stage must be completed before going to the next stage. -// Non-iterative stages use a stage instance once while iterative stages re-use the same instance each iteration. -typedef struct -{ - b2SolverStageType type; - b2SolverBlock* blocks; - int32_t blockCount; - int32_t colorIndex; - _Atomic int completionCount; -} b2SolverStage; - -typedef struct -{ - b2World* world; - b2Body** awakeBodies; - b2SolverBody* solverBodies; - b2Graph* graph; - - const b2StepContext* stepContext; - b2Constraint* constraints; - int32_t activeColorCount; - int32_t velocityIterations; - int32_t calmIterations; - int32_t workerCount; - - float timeStep; - float invTimeStep; - float subStep; - float invSubStep; - - b2SolverStage* stages; - int32_t stageCount; - - // sync index (16-bits) | stage type (16-bits) - _Atomic unsigned int syncBits; -} b2SolverTaskContext; - typedef struct b2WorkerContext { b2SolverTaskContext* context; int32_t workerIndex; } b2WorkerContext; -// TODO_ERIN debugggin -_Atomic int firstWorkerEnterCount = 0; - void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) { bodyCapacity = B2_MAX(bodyCapacity, 8); @@ -294,6 +203,7 @@ static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2So b2Vec2 gravity = context->world->gravity; b2Body** bodies = context->awakeBodies; b2SolverBody* solverBodies = context->solverBodies; + int32_t* bodyMap = context->bodyMap; float h = context->timeStep; @@ -303,7 +213,9 @@ static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2So b2Body* body = bodies[i]; //_m_prefetch(bodies[i + 1]); - B2_ASSERT(body->solverIndex == i); + // create body map used to prepare constraints + B2_ASSERT(body->object.index < context->world->bodyPool.capacity); + bodyMap[body->object.index] = i; float invMass = body->invMass; float invI = body->invI; @@ -346,7 +258,7 @@ static void b2PrepareJointsTask(b2SolverTaskContext* context) b2World* world = context->world; b2Joint* joints = world->joints; int32_t jointCapacity = world->jointPool.capacity; - const b2StepContext* stepContext = context->stepContext; + b2StepContext* stepContext = context->stepContext; for (int32_t i = 0; i < jointCapacity; ++i) { @@ -360,153 +272,12 @@ static void b2PrepareJointsTask(b2SolverTaskContext* context) } } -static void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) -{ - b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); - - b2World* world = context->world; - b2Graph* graph = context->graph; - b2GraphColor* color = graph->colors + colorIndex; - int32_t* contactIndices = color->contactArray; - b2Contact* contacts = world->contacts; - b2Body* bodies = world->bodies; - b2SolverBody* solverBodies = context->solverBodies; - - // 30 is a bit soft, 60 oscillates too much - // const float contactHertz = 45.0f; - // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; - - float h = context->timeStep; - bool enableWarmStarting = world->enableWarmStarting; - - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Contact* contact = contacts + contactIndices[i]; - - const b2Manifold* manifold = &contact->manifold; - int32_t pointCount = manifold->pointCount; - - B2_ASSERT(0 < pointCount && pointCount <= 2); - - b2Body* bodyA = bodies + contact->edges[0].bodyIndex; - b2Body* bodyB = bodies + contact->edges[1].bodyIndex; - - int32_t indexA = bodyA->solverIndex; - int32_t indexB = bodyB->solverIndex; - - b2Constraint* constraint = color->contacts + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - constraint->pointCount = pointCount; - - b2SolverBody* solverBodyA; - b2Vec2 vA; - float wA; - float mA; - float iA; - - if (indexA != B2_NULL_INDEX) - { - solverBodyA = solverBodies + indexA; - vA = solverBodyA->linearVelocity; - wA = solverBodyA->angularVelocity; - mA = solverBodyA->invMass; - iA = solverBodyA->invI; - } - else - { - solverBodyA = NULL; - vA.x = vA.y = 0.0; - wA = 0.0f; - mA = 0.0f; - iA = 0.0f; - } - - B2_ASSERT(indexB != B2_NULL_INDEX); - b2SolverBody* solverBodyB = solverBodies + indexB; - b2Vec2 vB = solverBodyB->linearVelocity; - float wB = solverBodyB->angularVelocity; - float mB = solverBodyB->invMass; - float iB = solverBodyB->invI; - - // Stiffer for static contacts to avoid bodies getting pushed through the ground - const float hertz = mA == 0.0f ? 2.0f * contactHertz : contactHertz; - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - float c = h * omega * (2.0f * zeta + h * omega); - constraint->impulseCoefficient = 1.0f / (1.0f + c); - constraint->massCoefficient = c * constraint->impulseCoefficient; - constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); - - b2Vec2 cA = bodyA->position; - b2Vec2 cB = bodyB->position; - b2Rot qA = bodyA->transform.q; - b2Rot qB = bodyB->transform.q; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* mp = manifold->points + j; - b2ConstraintPoint* cp = constraint->points + j; - - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; - - cp->rA = b2Sub(mp->point, cA); - cp->rB = b2Sub(mp->point, cB); - cp->localAnchorA = b2InvRotateVector(qA, cp->rA); - cp->localAnchorB = b2InvRotateVector(qB, cp->rB); - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - cp->separation = mp->separation; - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - - // Warm start - if (enableWarmStarting) - { - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); - vB = b2MulAdd(vB, mB, P); - } - } - - if (solverBodyA != NULL) - { - solverBodyA->linearVelocity = vA; - solverBodyA->angularVelocity = wA; - } - solverBodyB->linearVelocity = vB; - solverBodyB->angularVelocity = wB; - } - - b2TracyCZoneEnd(prepare_contact); -} - static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) { b2World* world = context->world; b2Joint* joints = world->joints; int32_t jointCapacity = world->jointPool.capacity; - const b2StepContext* stepContext = context->stepContext; + b2StepContext* stepContext = context->stepContext; for (int32_t i = 0; i < jointCapacity; ++i) { @@ -516,166 +287,10 @@ static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) continue; } - b2SolveJointVelocitySoft(joint, stepContext, useBias); + b2SolveJointVelocity(joint, stepContext, useBias); } } -static void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) -{ - b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - - b2Graph* graph = context->graph; - b2GraphColor* color = graph->colors + colorIndex; - b2SolverBody* bodies = context->solverBodies; - b2Constraint* constraints = color->contacts; - - float inv_dt = context->invTimeStep; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Constraint* constraint = constraints + i; - - int32_t indexA = constraint->indexA; - b2SolverBody* bodyA; - b2Vec2 vA; - float wA; - float mA; - float iA; - b2Vec2 dpA; - float daA; - - if (indexA != B2_NULL_INDEX) - { - bodyA = bodies + indexA; - vA = bodyA->linearVelocity; - wA = bodyA->angularVelocity; - dpA = bodyA->deltaPosition; - daA = bodyA->deltaAngle; - mA = bodyA->invMass; - iA = bodyA->invI; - } - else - { - bodyA = NULL; - vA = b2Vec2_zero; - wA = 0.0f; - dpA = b2Vec2_zero; - daA = 0.0f; - mA = 0.0f; - iA = 0.0f; - } - - b2SolverBody* bodyB = bodies + constraint->indexB; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - b2Vec2 dpB = bodyB->deltaPosition; - float daB = bodyB->deltaAngle; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - int32_t pointCount = constraint->pointCount; - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - float biasCoefficient = constraint->biasCoefficient; - float massCoefficient = constraint->massCoefficient; - float impulseCoefficient = constraint->impulseCoefficient; - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - for (int32_t j = 0; j < pointCount; ++j) - { - b2ConstraintPoint* cp = constraint->points + j; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - if (bodyA != NULL) - { - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - } - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } - - b2TracyCZoneEnd(solve_contact); -} - static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); @@ -717,7 +332,6 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2SolverBody* solverBody = solverBodies + i; b2Body* body = bodies + solverBody->bodyIndex; - B2_ASSERT(body->solverIndex == i); body->linearVelocity = solverBody->linearVelocity; body->angularVelocity = solverBody->angularVelocity; @@ -769,29 +383,6 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2TracyCZoneEnd(finalize_positions); } -static void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) -{ - b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); - - b2Constraint* constraints = context->constraints; - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Constraint* constraint = constraints + i; - b2Contact* contact = constraint->contact; - - b2Manifold* manifold = &contact->manifold; - - for (int32_t j = 0; j < constraint->pointCount; ++j) - { - manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; - manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; - } - } - - b2TracyCZoneEnd(store_impulses); -} - static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) { b2SolverStageType type = stage->type; @@ -957,9 +548,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont if (workerIndex == 0) { - B2_ASSERT(atomic_load(&firstWorkerEnterCount) == 0); - atomic_fetch_add(&firstWorkerEnterCount, 1); - // Main thread synchronizes the workers and does work itself. // // Stages are re-used for loops so that I don't need more stages for large iteration counts. @@ -1130,7 +718,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont // bias removal stage to help remove excess bias energy. // http://mmacklin.com/smallsteps.pdf // https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf -void b2SolveGraph(b2World* world, const b2StepContext* stepContext) +void b2SolveGraph(b2World* world, b2StepContext* stepContext) { b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); @@ -1155,6 +743,10 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); + int32_t bodyCapacity = world->bodyPool.capacity; + int32_t* bodyMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); + memset(bodyMap, 0xFF, bodyCapacity * sizeof(int32_t)); + int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) { @@ -1165,12 +757,15 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) { b2Body* body = bodies + bodyIndex; B2_ASSERT(b2ObjectValid(&body->object)); + B2_ASSERT(body->object.index == bodyIndex); awakeBodies[index] = body; + B2_ASSERT(0 < bodyIndex && bodyIndex < bodyCapacity); + bodyMap[bodyIndex] = index; + // cache miss bodyIndex = body->islandNext; - body->solverIndex = index; index += 1; } @@ -1180,8 +775,8 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) int32_t workerCount = world->workerCount; const int32_t blocksPerWorker = 6; - int32_t bodyBlockSize = 1 << 4; - int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 4) + 1; + int32_t bodyBlockSize = 1 << 5; + int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 5) + 1; if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) { bodyBlockSize = awakeBodyCount / (blocksPerWorker * workerCount); @@ -1222,14 +817,15 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) } activeColorCount = c; - b2Constraint* constraints = b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2Constraint), "constraint"); + b2ContactConstraint* constraints = + b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraint), "constraint"); int32_t base = 0; for (int32_t i = 0; i < activeColorCount; ++i) { int32_t j = activeColorIndices[i]; - colors[j].contacts = constraints + base; + colors[j].contactConstraints = constraints + base; base += b2Array(colors[j].contactArray).count; } @@ -1417,12 +1013,17 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) int32_t velIters = B2_MAX(1, stepContext->velocityIterations); + stepContext->bodyMap = bodyMap; + stepContext->solverBodies = solverBodies; + stepContext->bodyCount = awakeBodyCount; + b2SolverTaskContext context; - context.stepContext = stepContext; context.world = world; + context.graph = graph; context.awakeBodies = awakeBodies; context.solverBodies = solverBodies; - context.graph = graph; + context.bodyMap = bodyMap; + context.stepContext = stepContext; context.constraints = constraints; context.activeColorCount = activeColorCount; context.velocityIterations = velIters; @@ -1438,9 +1039,7 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) b2TracyCZoneEnd(prepare_stages); - atomic_store(&firstWorkerEnterCount, 0); - - // TODO_ERIN use workerIndex or threadIndex? + // Must use worker index because thread 0 can be assigned multiple tasks by enkiTS for (int32_t i = 0; i < workerCount; ++i) { workerContext[i].context = &context; @@ -1450,13 +1049,12 @@ void b2SolveGraph(b2World* world, const b2StepContext* stepContext) world->finishAllTasksFcn(world->userTaskContext); - atomic_store(&firstWorkerEnterCount, 0); - b2FreeStackItem(world->stackAllocator, storeBlocks); b2FreeStackItem(world->stackAllocator, graphBlocks); b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); b2FreeStackItem(world->stackAllocator, constraints); + b2FreeStackItem(world->stackAllocator, bodyMap); b2FreeStackItem(world->stackAllocator, solverBodies); b2FreeStackItem(world->stackAllocator, awakeBodies); } diff --git a/src/graph.h b/src/graph.h index 344ed56b..5716bf45 100644 --- a/src/graph.h +++ b/src/graph.h @@ -5,11 +5,9 @@ #include "array.h" #include "bitset.h" -#include "table.h" - -#include "box2d/dynamic_tree.h" typedef struct b2Contact b2Contact; +typedef struct b2ContactConstraint b2ContactConstraint; typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; @@ -24,7 +22,7 @@ typedef struct b2GraphColor int32_t* jointArray; // transient - struct b2Constraint* contacts; + b2ContactConstraint* contactConstraints; } b2GraphColor; typedef struct b2Graph @@ -42,5 +40,4 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); void b2AddJointToGraph(b2World* world, b2Joint* contact); void b2RemoveJointFromGraph(b2World* world, b2Joint* contact); -void b2SolveGraph(b2World* world, const b2StepContext* stepContext); -void b2SolveGraphSoftStep(b2World* world, const b2StepContext* stepContext); +void b2SolveGraph(b2World* world, b2StepContext* stepContext); diff --git a/src/island.c b/src/island.c index 82bb9b3c..d4bbb5ff 100644 --- a/src/island.c +++ b/src/island.c @@ -140,8 +140,6 @@ void b2CreateIsland(b2Island* island) island->awakeIndex = B2_NULL_INDEX; island->constraintRemoveCount = 0; island->maySplit = false; - island->stepContext = NULL; - island->contactSolver = NULL; } void b2DestroyIsland(b2Island* island) @@ -669,94 +667,6 @@ static int b2CompareIslands(const void* A, const void* B) #define B2_CONTACT_REMOVE_THRESHOLD 1 -// Sort islands so that the largest islands are solved first to avoid -// long tails in the island parallel-for loop. -void b2SortIslands(b2World* world, b2Island** islands, int32_t count) -{ - // Sort descending order (largest island first) - qsort(islands, count, sizeof(b2Island*), b2CompareIslands); - - // Look for an island to split. Large islands have priority. - world->splitIslandIndex = B2_NULL_INDEX; - for (int32_t i = 0; i < count; ++i) - { - if (islands[i]->constraintRemoveCount >= B2_CONTACT_REMOVE_THRESHOLD) - { - // This and only this island may split this time step - islands[i]->maySplit = true; - world->splitIslandIndex = islands[i]->object.index; - break; - } - } -} - -void b2PrepareIsland(b2Island* island, b2StepContext* stepContext) -{ - island->stepContext = stepContext; - - b2ContactSolverDef contactSolverDef; - contactSolverDef.context = island->stepContext; - contactSolverDef.world = island->world; - contactSolverDef.contactList = island->headContact; - contactSolverDef.contactCount = island->contactCount; - island->contactSolver = b2CreateContactSolver(&contactSolverDef); -} - -#if 0 -if (island->bodyCount > 16) -{ - int32_t k = 4; - b2Vec2 clusterCenters[4] = { 0 }; - int32_t clusterCounts[4] = { 0 }; - int32_t m = island->bodyCount / k; - - // seed cluster positions - for (int32_t i = 0; i < k; ++i) - { - int32_t j = (i * m) % island->bodyCount; - clusterCenters[i] = island->bodies[j]->position; - } - - for (int32_t i = 0; i < island->bodyCount; ++i) - { - b2Body* b = island->bodies[i]; - b2Vec2 p = b->position; - float bestDist = b2DistanceSquared(clusterCenters[0], p); - b->cluster = 0; - - for (int32_t j = 1; j < k; ++j) - { - float dist = b2DistanceSquared(clusterCenters[j], p); - if (dist < bestDist) - { - bestDist = dist; - b->cluster = j; - } - } - } - - int32_t maxIter = 4; - for (int32_t iter = 0; iter < maxIter; ++iter) - { - // reset clusters - for (int32_t i = 0; i < k; ++i) - { - clusterCenters[i] = b2Vec2_zero; - clusterCounts[i] = 0; - } - - // computer new clusters - for (int32_t i = 0; i < island->bodyCount; ++i) - { - b2Body* b = island->bodies[i]; - int32_t j = b->cluster; - clusterCenters[j] = b2Add(clusterCenters[j], b->position); - clusterCounts[j] += 1; - } - } -} -#endif - // Split an island because some contacts and/or joints have been removed // Note: contacts/joints connecting to static bodies must belong to an island but don't affect island connectivity // Note: static bodies are never in an island @@ -1002,366 +912,6 @@ static void b2SplitIsland(b2Island* baseIsland) b2TracyCZoneEnd(split); } -// This must be thread safe -void b2SolveIsland(b2Island* island, uint32_t threadIndex) -{ - b2World* world = island->world; - b2Body* bodies = world->bodies; - b2StepContext* context = island->stepContext; - b2Joint* joints = world->joints; - - b2Vec2 gravity = world->gravity; - - float h = context->dt; - - // Integrate velocities and apply damping. Initialize the body state. - int32_t bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - - float invMass = b->invMass; - float invI = b->invI; - - if (b->type == b2_dynamicBody) - { - b2Vec2 v = b->linearVelocity; - float w = b->angularVelocity; - - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(b->force, b->gravityScale * b->mass, gravity))); - w = w + h * invI * b->torque; - - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * b->linearDamping), v); - w *= 1.0f / (1.0f + h * b->angularDamping); - - b->linearVelocity = v; - b->angularVelocity = w; - } - - bodyIndex = b->islandNext; - } - - // Solver data - b2ContactSolver_Initialize(island->contactSolver); - - int32_t jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = joints + jointIndex; - b2PrepareJoint(joint, context); - jointIndex = joint->islandNext; - } - - b2TracyCZoneNC(velc, "Velocity Constraints", b2_colorCadetBlue, true); - // Solve velocity constraints - for (int32_t i = 0; i < context->velocityIterations; ++i) - { - jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = joints + jointIndex; - b2SolveJointVelocity(joint, context); - jointIndex = joint->islandNext; - } - - b2ContactSolver_SolveVelocityConstraints(island->contactSolver); - } - b2TracyCZoneEnd(velc); - - // Special handling for restitution - b2ContactSolver_ApplyRestitution(island->contactSolver); - - // Store impulses for warm starting - b2ContactSolver_StoreImpulses(island->contactSolver); - - // Integrate positions - bool enableContinuous = world->enableContinuous; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - - b2Vec2 c = b->position; - float a = b->angle; - b2Vec2 v = b->linearVelocity; - float w = b->angularVelocity; - - // Clamp large velocities - b2Vec2 translation = b2MulSV(h, v); - if (b2Dot(translation, translation) > b2_maxTranslationSquared) - { - float ratio = b2_maxTranslation / b2Length(translation); - v = b2MulSV(ratio, v); - } - - float rotation = h * w; - if (rotation * rotation > b2_maxRotationSquared) - { - float ratio = b2_maxRotation / B2_ABS(rotation); - w *= ratio; - } - - // Integrate - c = b2MulAdd(c, h, v); - a += h * w; - - b->position = c; - b->angle = a; - b->linearVelocity = v; - b->angularVelocity = w; - - const float saftetyFactor = 0.5f; - if (enableContinuous && (b2Length(v) + B2_ABS(w) * b->maxExtent) * h > saftetyFactor * b->minExtent) - { - // Store in fast array for the continuous collision stage - int fastIndex = atomic_fetch_add(&world->fastBodyCount, 1); - world->fastBodies[fastIndex] = bodyIndex; - b->isFast = true; - } - else - { - // Body is safe to advance - b->isFast = false; - b->position0 = b->position; - b->angle0 = b->angle; - } - - bodyIndex = b->islandNext; - } - - b2TracyCZoneNC(posc, "Position Constraints", b2_colorBurlywood, true); - - // Solve position constraints - bool positionSolved = false; - for (int32_t i = 0; i < context->positionIterations; ++i) - { - bool contactsOkay = b2ContactSolver_SolvePositionConstraintsBlock(island->contactSolver); - - bool jointsOkay = true; - jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = joints + jointIndex; - - bool jointOkay = b2SolveJointPosition(joint, context); - jointsOkay = jointsOkay && jointOkay; - - jointIndex = joint->islandNext; - } - - if (contactsOkay && jointsOkay) - { - // Exit early if the position errors are small. - positionSolved = true; - break; - } - } - - b2TracyCZoneEnd(posc); - - b2TracyCZoneNC(sleep, "Sleep", b2_colorSalmon2, true); - - // Update transform - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* body = bodies + bodyIndex; - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - bodyIndex = body->islandNext; - } - - // Update sleep - bool isIslandAwake = true; - - // Don't allow an island that will be split to fall asleep just yet - if (world->enableSleep && island->maySplit == false) - { - float minSleepTime = FLT_MAX; - - const float linTolSqr = b2_linearSleepTolerance * b2_linearSleepTolerance; - const float angTolSqr = b2_angularSleepTolerance * b2_angularSleepTolerance; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - - if (b->enableSleep == false || b->angularVelocity * b->angularVelocity > angTolSqr || - b2Dot(b->linearVelocity, b->linearVelocity) > linTolSqr) - { - b->sleepTime = 0.0f; - minSleepTime = 0.0f; - } - else - { - b->sleepTime += h; - minSleepTime = B2_MIN(minSleepTime, b->sleepTime); - } - - bodyIndex = b->islandNext; - } - - if (minSleepTime >= b2_timeToSleep && positionSolved) - { - isIslandAwake = false; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - B2_ASSERT(b->isFast == false); - - b->sleepTime = 0.0f; - b->linearVelocity = b2Vec2_zero; - b->angularVelocity = 0.0f; - b->force = b2Vec2_zero; - b->torque = 0.0f; - - bodyIndex = b->islandNext; - } - } - } - - if (isIslandAwake == false) - { - // This signals that this island should not be added to awake island array - island->awakeIndex = B2_NULL_INDEX; - } - else - { - b2Contact* contacts = world->contacts; - const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* body = bodies + bodyIndex; - - body->force = b2Vec2_zero; - body->torque = 0.0f; - - bool isFast = body->isFast; - - // Update shapes AABBs - int32_t shapeIndex = body->shapeList; - while (shapeIndex != B2_NULL_INDEX) - { - b2Shape* shape = world->shapes + shapeIndex; - - B2_ASSERT(shape->isFast == false); - - if (isFast) - { - // The AABB is updated after continuous collision. - // Add to moved shapes regardless of AABB changes. - shape->isFast = true; - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - else - { - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); - - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) - { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - } - - shapeIndex = shape->nextShapeIndex; - } - - // Prepare awake contacts. May include contacts that are not touching - // so they may not be island contacts. - int32_t contactKey = body->contactList; - while (contactKey != B2_NULL_INDEX) - { - int32_t contactIndex = contactKey >> 1; - int32_t edgeIndex = contactKey & 1; - b2Contact* contact = contacts + contactIndex; - - // Bit set to prevent duplicates - b2SetBit(awakeContactBitSet, contactIndex); - contactKey = contact->edges[edgeIndex].nextKey; - } - - bodyIndex = body->islandNext; - } - } - - if (island->maySplit) - { - b2SplitIsland(island); - } - - b2TracyCZoneEnd(sleep); -} - -// Single threaded work -void b2CompleteIsland(b2Island* island) -{ - b2World* world = island->world; - -#if 0 - // Report impulses - b2PostSolveFcn* postSolveFcn = world->postSolveFcn; - if (postSolveFcn != NULL) - { - b2Contact* contacts = world->contacts; - int16_t worldIndex = world->index; - const b2Shape* shapes = world->shapes; - - int32_t contactIndex = island->headContact; - while (contactIndex != B2_NULL_INDEX) - { - const b2Contact* contact = contacts + contactIndex; - - const b2Shape* shapeA = shapes + contact->shapeIndexA; - const b2Shape* shapeB = shapes + contact->shapeIndexB; - - b2ShapeId idA = {shapeA->object.index, worldIndex, shapeA->object.revision}; - b2ShapeId idB = {shapeB->object.index, worldIndex, shapeB->object.revision}; - postSolveFcn(idA, idB, &contact->manifold, world->postSolveContext); - } - } -#endif - - // Destroy in reverse order - b2DestroyContactSolver(island->contactSolver, world->stackAllocator); - island->contactSolver = NULL; - - // Wake island - if (island->awakeIndex != B2_NULL_INDEX) - { - island->awakeIndex = B2_NULL_INDEX; - b2WakeIsland(island); - } -} - -// This island was just split. Handle any remaining single threaded cleanup. -void b2CompleteBaseSplitIsland(b2Island* island) -{ - b2DestroyContactSolver(island->contactSolver, island->world->stackAllocator); - island->contactSolver = NULL; -} - // This island was just created through splitting. Handle single thread work. void b2CompleteSplitIsland(b2Island* island) { diff --git a/src/island.h b/src/island.h index 6cf31330..571e1669 100644 --- a/src/island.h +++ b/src/island.h @@ -56,10 +56,6 @@ typedef struct b2Island // This island has been chosen to be split up into smaller islands because a sufficient // number of contacts have been removed. bool maySplit; - - // Transient solver data - b2StepContext* stepContext; - struct b2ContactSolver* contactSolver; } b2Island; void b2CreateIsland(b2Island* island); @@ -80,7 +76,6 @@ void b2LinkJoint(b2World* world, b2Joint* joint); void b2UnlinkJoint(b2World* world, b2Joint* joint); void b2MergeAwakeIslands(b2World* world); -void b2SortIslands(b2World* world, b2Island** islands, int32_t count); void b2PrepareIsland(b2Island* island, b2StepContext* stepContext); diff --git a/src/joint.c b/src/joint.c index 858aa970..ceb8b493 100644 --- a/src/joint.c +++ b/src/joint.c @@ -367,11 +367,11 @@ void b2World_DestroyJoint(b2JointId jointId) b2FreeObject(&world->jointPool, &joint->object); } -extern void b2PrepareMouse(b2Joint* base, const b2StepContext* context); -extern void b2PrepareRevolute(b2Joint* base, const b2StepContext* context); -extern void b2PrepareWeld(b2Joint* base, const b2StepContext* context); +extern void b2PrepareMouse(b2Joint* base, b2StepContext* context); +extern void b2PrepareRevolute(b2Joint* base, b2StepContext* context); +extern void b2PrepareWeld(b2Joint* base, b2StepContext* context); -void b2PrepareJoint(b2Joint* joint, const b2StepContext* context) +void b2PrepareJoint(b2Joint* joint, b2StepContext* context) { switch (joint->type) { @@ -392,33 +392,11 @@ void b2PrepareJoint(b2Joint* joint, const b2StepContext* context) } } -extern void b2SolveMouseVelocity(b2Joint* base, const b2StepContext* context); -extern void b2SolveRevoluteVelocity(b2Joint* base, const b2StepContext* context); +extern void b2SolveMouseVelocity(b2Joint* base, b2StepContext* context); +extern void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeOverlap); +extern void b2SolveWeldVelocity(b2Joint* base, b2StepContext* context, bool removeOverlap); -void b2SolveJointVelocity(b2Joint* joint, const b2StepContext* context) -{ - switch (joint->type) - { - case b2_mouseJoint: - b2SolveMouseVelocity(joint, context); - break; - - case b2_revoluteJoint: - b2SolveRevoluteVelocity(joint, context); - break; - - case b2_weldJoint: - break; - - default: - B2_ASSERT(false); - } -} - -extern void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap); -extern void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap); - -void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool removeOverlap) +void b2SolveJointVelocity(b2Joint* joint, b2StepContext* context, bool removeOverlap) { switch (joint->type) { @@ -430,11 +408,11 @@ void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool break; case b2_revoluteJoint: - b2SolveRevoluteVelocitySoft(joint, context, removeOverlap); + b2SolveRevoluteVelocity(joint, context, removeOverlap); break; case b2_weldJoint: - b2SolveWeldVelocitySoft(joint, context, removeOverlap); + b2SolveWeldVelocity(joint, context, removeOverlap); break; default: @@ -442,21 +420,6 @@ void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool } } -extern bool b2SolveRevolutePosition(b2Joint* base, const b2StepContext* context); - -// This returns true if the position errors are within tolerance. -bool b2SolveJointPosition(b2Joint* joint, const b2StepContext* context) -{ - switch (joint->type) - { - case b2_revoluteJoint: - return b2SolveRevolutePosition(joint, context); - - default: - return true; - } -} - extern void b2DrawRevolute(b2DebugDraw* draw, b2Joint* base, b2Body* bodyA, b2Body* bodyB); void b2DrawJoint(b2DebugDraw* draw, b2World* world, b2Joint* joint) diff --git a/src/joint.h b/src/joint.h index 08104dc4..248597c2 100644 --- a/src/joint.h +++ b/src/joint.h @@ -1,11 +1,10 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT -#include "box2d/id.h" -#include "box2d/types.h" - #include "pool.h" +#include "box2d/types.h" + #include typedef struct b2DebugDraw b2DebugDraw; @@ -22,7 +21,7 @@ typedef enum b2JointType b2_mouseJoint, b2_gearJoint, b2_wheelJoint, - b2_weldJoint, + b2_weldJoint, b2_frictionJoint, b2_motorJoint } b2JointType; @@ -52,10 +51,10 @@ typedef struct b2MouseJoint float gamma; // Solver temp + int32_t solverIndexB; + b2Vec2 positionB; b2Vec2 rB; b2Vec2 localCenterB; - float invMassB; - float invIB; b2Mat22 mass; b2Vec2 C; } b2MouseJoint; @@ -76,16 +75,14 @@ typedef struct b2RevoluteJoint float upperAngle; // Solver temp - b2Vec2 rA; - b2Vec2 rB; + int32_t solverIndexA; + int32_t solverIndexB; + b2Vec2 positionA; + b2Vec2 positionB; + float angleA; + float angleB; b2Vec2 localCenterA; b2Vec2 localCenterB; - float invMassA; - float invMassB; - float invIA; - float invIB; - b2Mat22 K; - b2Vec2 separation; float biasCoefficient; float massCoefficient; float impulseCoefficient; @@ -110,12 +107,12 @@ typedef struct b2WeldJoint b2Vec3 impulse; // Solver temp + int32_t solverIndexA; + int32_t solverIndexB; + b2Vec2 positionA; + b2Vec2 positionB; b2Vec2 localCenterA; b2Vec2 localCenterB; - float invMassA; - float invMassB; - float invIA; - float invIB; } b2WeldJoint; /// The base joint class. Joints are used to constraint two bodies together in @@ -123,9 +120,7 @@ typedef struct b2WeldJoint typedef struct b2Joint { b2Object object; - b2JointType type; - b2JointEdge edges[2]; int32_t islandIndex; @@ -146,11 +141,6 @@ typedef struct b2Joint bool collideConnected; } b2Joint; -void b2PrepareJoint(b2Joint* joint, const b2StepContext* context); -void b2SolveJointVelocity(b2Joint* joint, const b2StepContext* context); -void b2SolveJointVelocitySoft(b2Joint* joint, const b2StepContext* context, bool removeOverlap); - -// This returns true if the position errors are within tolerance. -bool b2SolveJointPosition(b2Joint* joint, const b2StepContext* context); - +void b2PrepareJoint(b2Joint* joint, b2StepContext* context); +void b2SolveJointVelocity(b2Joint* joint, b2StepContext* context, bool removeOverlap); void b2DrawJoint(b2DebugDraw* draw, b2World* world, b2Joint* joint); diff --git a/src/revolute_joint.c b/src/revolute_joint.c index be6ddb19..529a18d3 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -28,51 +28,27 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) int32_t indexA = base->edges[0].bodyIndex; int32_t indexB = base->edges[1].bodyIndex; - B2_ASSERT(0 <= indexA && indexA < context->bodyCapacity); - B2_ASSERT(0 <= indexB && indexB < context->bodyCapacity); - b2Body* bodyA = context->bodies + indexA; b2Body* bodyB = context->bodies + indexB; - B2_ASSERT(bodyA->object.index == bodyA->object.next); - B2_ASSERT(bodyB->object.index == bodyB->object.next); + B2_ASSERT(b2ObjectValid(&bodyA->object)); + B2_ASSERT(b2ObjectValid(&bodyB->object)); b2RevoluteJoint* joint = &base->revoluteJoint; - joint->localCenterA = bodyA->localCenter; - joint->invMassA = bodyA->invMass; - joint->invIA = bodyA->invI; + joint->solverIndexA = indexA == B2_NULL_INDEX ? B2_NULL_INDEX : context->bodyMap[indexA]; + joint->solverIndexB = context->bodyMap[indexB]; + joint->localCenterA = bodyA->localCenter; joint->localCenterB = bodyB->localCenter; - joint->invMassB = bodyB->invMass; - joint->invIB = bodyB->invI; + joint->positionA = bodyA->position; + joint->positionB = bodyB->position; + joint->angleA = bodyA->angle; + joint->angleB = bodyB->angle; - float aA = bodyA->angle; - b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; - - float aB = bodyB->angle; - b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); - - joint->rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); - joint->rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); - - // J = [-I -r1_skew I r2_skew] - // r_skew = [-ry; rx] - - // Matlab - // K = [ mA+r1y^2*iA+mB+r2y^2*iB, -r1y*iA*r1x-r2y*iB*r2x] - // [ -r1y*iA*r1x-r2y*iB*r2x, mA+r1x^2*iA+mB+r2x^2*iB] - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; - - joint->K.cx.x = mA + mB + joint->rA.y * joint->rA.y * iA + joint->rB.y * joint->rB.y * iB; - joint->K.cy.x = -joint->rA.y * joint->rA.x * iA - joint->rB.y * joint->rB.x * iB; - joint->K.cx.y = joint->K.cy.x; - joint->K.cy.y = mA + mB + joint->rA.x * joint->rA.x * iA + joint->rB.x * joint->rB.x * iB; + float iA = bodyA->invI; + float iB = bodyB->invI; joint->axialMass = iA + iB; bool fixedRotation; @@ -92,13 +68,12 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) float omega = 2.0f * b2_pi * hertz; float h = context->dt; - joint->separation = b2Add(b2Sub(joint->rB, joint->rA), b2Sub(bodyB->position, bodyA->position)); joint->biasCoefficient = omega / (2.0f * zeta + h * omega); float c = h * omega * (2.0f * zeta + h * omega); joint->impulseCoefficient = 1.0f / (1.0f + c); joint->massCoefficient = c * joint->impulseCoefficient; - joint->angle = aB - aA - joint->referenceAngle; + joint->angle = bodyB->angle - bodyA->angle - joint->referenceAngle; if (joint->enableLimit == false || fixedRotation) { joint->lowerImpulse = 0.0f; @@ -116,24 +91,14 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) // Soft step works best when bilateral constraints have no warm starting. joint->impulse = b2Vec2_zero; - //joint->impulse.x = 0.0f; joint->motorImpulse *= dtRatio; joint->lowerImpulse *= dtRatio; joint->upperImpulse *= dtRatio; float axialImpulse = joint->motorImpulse + joint->lowerImpulse - joint->upperImpulse; - b2Vec2 P = {joint->impulse.x, joint->impulse.y}; - - vA = b2MulSub(vA, mA, P); - wA -= iA * (b2Cross(joint->rA, P) + axialImpulse); - vB = b2MulAdd(vB, mB, P); - wB += iB * (b2Cross(joint->rB, P) + axialImpulse); - - //vA.x = 0.0f; - //wA = 0.0f; - //vB.x = 0.0f; - //wB = 0.0f; + wA -= iA * axialImpulse; + wB += iB * axialImpulse; } else { @@ -143,118 +108,46 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) joint->upperImpulse = 0.0f; } - bodyA->linearVelocity = vA; bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; bodyB->angularVelocity = wB; } -void b2SolveRevoluteVelocity(b2Joint* base, const b2StepContext* context) +void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeOverlap) { B2_ASSERT(base->type == b2_revoluteJoint); b2RevoluteJoint* joint = &base->revoluteJoint; - b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; - - bool fixedRotation = (iA + iB == 0.0f); - - // Solve motor constraint. - if (joint->enableMotor && fixedRotation == false) + b2SolverBody* bodyA = NULL; + float mA, iA, wA, aA; + b2Vec2 vA, cA; + if (joint->solverIndexA == B2_NULL_INDEX) { - float Cdot = wB - wA - joint->motorSpeed; - float impulse = -joint->axialMass * Cdot; - float oldImpulse = joint->motorImpulse; - float maxImpulse = context->dt * joint->maxMotorTorque; - joint->motorImpulse = B2_CLAMP(joint->motorImpulse + impulse, -maxImpulse, maxImpulse); - impulse = joint->motorImpulse - oldImpulse; - - wA -= iA * impulse; - wB += iB * impulse; - } - - if (joint->enableLimit && fixedRotation == false) - { - // Lower limit - { - float C = joint->angle - joint->lowerAngle; - float Cdot = wB - wA; - float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); - float oldImpulse = joint->lowerImpulse; - joint->lowerImpulse = B2_MAX(joint->lowerImpulse + impulse, 0.0f); - impulse = joint->lowerImpulse - oldImpulse; - - wA -= iA * impulse; - wB += iB * impulse; - } - - // Upper limit - // Note: signs are flipped to keep C positive when the constraint is satisfied. - // This also keeps the impulse positive when the limit is active. - { - float C = joint->upperAngle - joint->angle; - float Cdot = wA - wB; - float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); - float oldImpulse = joint->upperImpulse; - joint->upperImpulse = B2_MAX(joint->upperImpulse + impulse, 0.0f); - impulse = joint->upperImpulse - oldImpulse; - - wA += iA * impulse; - wB -= iB * impulse; - } + mA = 0.0f; + iA = 0.0f; + cA = joint->positionA; + aA = joint->angleA; + vA = b2Vec2_zero; + wA = 0.0f; } - - // Solve point-to-point constraint + else { - b2Vec2 Cdot = b2Sub(b2Add(vB, b2CrossSV(wB, joint->rB)), b2Add(vA, b2CrossSV(wA, joint->rA))); - b2Vec2 impulse = b2Solve22(joint->K, b2Neg(Cdot)); - - joint->impulse.x += impulse.x; - joint->impulse.y += impulse.y; - - vA = b2MulSub(vA, mA, impulse); - wA -= iA * b2Cross(joint->rA, impulse); - - vB = b2MulAdd(vB, mB, impulse); - wB += iB * b2Cross(joint->rB, impulse); + bodyA = context->solverBodies + joint->solverIndexA; + mA = bodyA->invMass; + iA = bodyA->invI; + cA = b2Add(joint->positionA, bodyA->deltaPosition); + aA = joint->angleA + bodyA->deltaAngle; + vA = bodyA->linearVelocity; + wA = bodyA->angularVelocity; } - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -} - -void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap) -{ - B2_ASSERT(base->type == b2_revoluteJoint); - - b2RevoluteJoint* joint = &base->revoluteJoint; - - b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; + b2SolverBody* bodyB = context->solverBodies + joint->solverIndexB; + float mB = bodyB->invMass; + float iB = bodyB->invI; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - - const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); - const float aA = bodyA->angle + bodyA->deltaAngle; - const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); - const float aB = bodyB->angle + bodyB->deltaAngle; - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; + b2Vec2 cB = b2Add(joint->positionB, bodyB->deltaPosition); + float aB = joint->angleB + bodyB->deltaAngle; bool fixedRotation = (iA + iB == 0.0f); @@ -342,6 +235,13 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + // J = [-I -r1_skew I r2_skew] + // r_skew = [-ry; rx] + + // Matlab + // K = [ mA+r1y^2*iA+mB+r2y^2*iB, -r1y*iA*r1x-r2y*iB*r2x] + // [ -r1y*iA*r1x-r2y*iB*r2x, mA+r1x^2*iA+mB+r2x^2*iB] + b2Mat22 K; K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; @@ -349,7 +249,6 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB; b2Vec2 separation = b2Add(b2Sub(rB, rA), b2Sub(cB, cA)); - b2Vec2 Cdot = b2Sub(b2Add(vB, b2CrossSV(wB, rB)), b2Add(vA, b2CrossSV(wA, rA))); float biasScale = 0.0f; @@ -377,100 +276,14 @@ void b2SolveRevoluteVelocitySoft(b2Joint* base, const b2StepContext* context, bo wB += iB * b2Cross(rB, impulse); } - //vA.x = 0.0f; - //wA = 0.0f; - //vB.x = 0.0f; - //wB = 0.0f; - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -} - -bool b2SolveRevolutePosition(b2Joint* base, b2StepContext* context) -{ - B2_ASSERT(base->type == b2_revoluteJoint); - - b2RevoluteJoint* joint = &base->revoluteJoint; - - b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; - - b2Vec2 cA = bodyA->position; - float aA = bodyA->angle; - b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; - - b2Rot qA = b2MakeRot(aA), qB = b2MakeRot(aB); - - float angularError = 0.0f; - float positionError = 0.0f; - - bool fixedRotation = (joint->invIA + joint->invIB == 0.0f); - - // Solve angular limit constraint - if (joint->enableLimit && fixedRotation == false) - { - float angle = aB - aA - joint->referenceAngle; - float C = 0.0f; - - if (B2_ABS(joint->upperAngle - joint->lowerAngle) < 2.0f * b2_angularSlop) - { - // Prevent large angular corrections - C = B2_CLAMP(angle - joint->lowerAngle, -b2_maxAngularCorrection, b2_maxAngularCorrection); - } - else if (angle <= joint->lowerAngle) - { - // Prevent large angular corrections and allow some slop. - C = B2_CLAMP(angle - joint->lowerAngle + b2_angularSlop, -b2_maxAngularCorrection, 0.0f); - } - else if (angle >= joint->upperAngle) - { - // Prevent large angular corrections and allow some slop. - C = B2_CLAMP(angle - joint->upperAngle - b2_angularSlop, 0.0f, b2_maxAngularCorrection); - } - - float limitImpulse = -joint->axialMass * C; - aA -= joint->invIA * limitImpulse; - aB += joint->invIB * limitImpulse; - angularError = B2_ABS(C); - } - - // Solve point-to-point constraint. + if (bodyA != NULL) { - qA = b2MakeRot(aA); - qB = b2MakeRot(aB); - b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); - b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); - - b2Vec2 C = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - positionError = b2Length(C); - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; - - b2Mat22 K; - K.cx.x = mA + mB + iA * rA.y * rA.y + iB * rB.y * rB.y; - K.cx.y = -iA * rA.x * rA.y - iB * rB.x * rB.y; - K.cy.x = K.cx.y; - K.cy.y = mA + mB + iA * rA.x * rA.x + iB * rB.x * rB.x; - - b2Vec2 impulse = b2Solve22(K, b2Neg(C)); - - cA = b2MulSub(cA, mA, impulse); - aA -= iA * b2Cross(rA, impulse); - - cB = b2MulAdd(cB, mB, impulse); - aB += iB * b2Cross(rB, impulse); + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; } - bodyA->position = cA; - bodyA->angle = aA; - bodyB->position = cB; - bodyB->angle = aB; - - return positionError <= b2_linearSlop && angularError <= b2_angularSlop; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit) diff --git a/src/solver_data.h b/src/solver_data.h index fab10569..c51252a9 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -26,9 +26,78 @@ typedef struct b2StepContext float restitutionThreshold; - // From b2World::bodies for convenience + // TODO_ERIN for joints struct b2Body* bodies; int32_t bodyCapacity; + // Map from world body pool index to solver body + int32_t* bodyMap; + + struct b2SolverBody* solverBodies; + int32_t bodyCount; + bool enableWarmStarting; } b2StepContext; + +typedef enum b2SolverStageType +{ + b2_stageIntegrateVelocities = 0, + b2_stagePrepareJoints, + b2_stagePrepareContacts, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageFinalizePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageStoreImpulses +} b2SolverStageType; + +// Each block of work has a sync index that gets incremented when a worker claims the block. This ensures only a single worker claims a +// block, yet lets work be distributed dynamically across multiple workers (work stealing). This also reduces contention on a single block +// index atomic. For non-iterative stages the sync index is simply set to one. For iterative stages (solver iteration) the same block of +// work is executed once per iteration and the atomic sync index is shared across iterations, so it increases monotonically. +typedef struct b2SolverBlock +{ + int32_t startIndex; + int32_t endIndex; + _Atomic int syncIndex; +} b2SolverBlock; + +// Each stage must be completed before going to the next stage. +// Non-iterative stages use a stage instance once while iterative stages re-use the same instance each iteration. +typedef struct b2SolverStage +{ + b2SolverStageType type; + b2SolverBlock* blocks; + int32_t blockCount; + int32_t colorIndex; + _Atomic int completionCount; +} b2SolverStage; + +typedef struct b2SolverTaskContext +{ + struct b2World* world; + struct b2Graph* graph; + struct b2Body** awakeBodies; + struct b2SolverBody* solverBodies; + int32_t* bodyMap; + + b2StepContext* stepContext; + struct b2ContactConstraint* constraints; + int32_t activeColorCount; + int32_t velocityIterations; + int32_t calmIterations; + int32_t workerCount; + + float timeStep; + float invTimeStep; + float subStep; + float invSubStep; + + b2SolverStage* stages; + int32_t stageCount; + + // sync index (16-bits) | stage type (16-bits) + _Atomic unsigned int syncBits; +} b2SolverTaskContext; diff --git a/src/world.c b/src/world.c index 012e5312..99ce9444 100644 --- a/src/world.c +++ b/src/world.c @@ -627,6 +627,7 @@ static void b2ContinuousParallelForTask(int32_t startIndex, int32_t endIndex, ui b2TracyCZoneEnd(continuous_task); } +#if 0 static void b2Solve(b2World* world, b2StepContext* context) { b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); @@ -913,9 +914,10 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(solve); } +#endif // Graph coloring experiment -static void b2Solve2(b2World* world, b2StepContext* context) +static void b2Solve(b2World* world, b2StepContext* context) { b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); @@ -1136,93 +1138,6 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, b2TracyCZoneEnd(world_step); } -void b2World_Step2(b2WorldId worldId, float timeStep, int32_t velocityIterations, int32_t positionIterations) -{ - if (timeStep == 0.0f) - { - // TODO_ERIN would be useful to still process collision while paused - return; - } - - b2TracyCZoneNC(world_step, "Step", b2_colorChartreuse, true); - - b2World* world = b2GetWorldFromId(worldId); - B2_ASSERT(world->locked == false); - if (world->locked) - { - return; - } - - world->profile = b2_emptyProfile; - - b2Timer stepTimer = b2CreateTimer(); - - // Update collision pairs and create contacts - { - b2Timer timer = b2CreateTimer(); - b2UpdateBroadPhasePairs(world); - world->profile.pairs = b2GetMilliseconds(&timer); - } - - // TODO_ERIN atomic - world->locked = true; - - b2StepContext context = {0}; - context.dt = timeStep; - context.velocityIterations = velocityIterations; - context.positionIterations = positionIterations; - if (timeStep > 0.0f) - { - context.inv_dt = 1.0f / timeStep; - } - else - { - context.inv_dt = 0.0f; - } - - context.dtRatio = world->inv_dt0 * timeStep; - context.restitutionThreshold = world->restitutionThreshold; - context.enableWarmStarting = world->enableWarmStarting; - context.bodies = world->bodies; - context.bodyCapacity = world->bodyPool.capacity; - - // Update contacts - { - b2Timer timer = b2CreateTimer(); - b2Collide(world); - world->profile.collide = b2GetMilliseconds(&timer); - } - - if (b2_parallel) - { - world->finishAllTasksFcn(world->userTaskContext); - } - - // Integrate velocities, solve velocity constraints, and integrate positions. - if (context.dt > 0.0f) - { - b2Timer timer = b2CreateTimer(); - b2Solve2(world, &context); - world->profile.solve = b2GetMilliseconds(&timer); - } - - if (context.dt > 0.0f) - { - world->inv_dt0 = context.inv_dt; - } - - world->locked = false; - - world->profile.step = b2GetMilliseconds(&stepTimer); - - B2_ASSERT(b2GetStackAllocation(world->stackAllocator) == 0); - - // Ensure stack is large enough - b2GrowStack(world->stackAllocator); - - b2TracyCZoneEnd(world_step); -} - static void b2DrawShape(b2DebugDraw* draw, b2Shape* shape, b2Transform xf, b2Color color) { switch (shape->type) From dffd3c24033aa261fac92cda3ee110347a2309e0 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 23 Sep 2023 19:58:11 -0700 Subject: [PATCH 27/51] fix joints --- include/box2d/box2d.h | 3 +- include/box2d/debug_draw.h | 2 +- samples/collection/behavior.cpp | 7 +- samples/collection/benchmark_pyramid.cpp | 6 +- samples/collection/sample_joints.cpp | 4 +- samples/draw.cpp | 2 +- samples/main.cpp | 2 +- samples/sample.cpp | 6 +- samples/settings.cpp | 2 +- samples/settings.h | 6 +- src/body.c | 2 +- src/contact_solver.c | 308 ++--------------------- src/contact_solver.h | 2 - src/joint.c | 6 +- src/joint.h | 12 +- src/mouse_joint.c | 43 ++-- src/revolute_joint.c | 136 +++++----- src/weld_joint.c | 59 +++-- src/world.c | 31 ++- 19 files changed, 208 insertions(+), 431 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index f4ee3d06..cb457ea3 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -86,8 +86,9 @@ BOX2D_API void b2RevoluteJoint_EnableMotor(b2JointId jointId, bool enableMotor); BOX2D_API void b2RevoluteJoint_SetMotorSpeed(b2JointId jointId, float motorSpeed); BOX2D_API float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep); BOX2D_API void b2RevoluteJoint_SetMaxMotorTorque(b2JointId jointId, float torque); +BOX2D_API b2Vec2 b2RevoluteJoint_GetConstraintForce(b2JointId jointId); -/// This function receives shapes found in the AABB query. + /// This function receives shapes found in the AABB query. /// @return true if the query should continue typedef bool b2QueryCallbackFcn(b2ShapeId shapeId, void* context); diff --git a/include/box2d/debug_draw.h b/include/box2d/debug_draw.h index 2a86be0e..5b8b8755 100644 --- a/include/box2d/debug_draw.h +++ b/include/box2d/debug_draw.h @@ -45,6 +45,6 @@ typedef struct b2DebugDraw bool drawShapes; bool drawJoints; bool drawAABBs; - bool drawCOMs; + bool drawMass; void* context; } b2DebugDraw; diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index d7e46753..3488da32 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -6,6 +6,7 @@ #include "box2d/box2d.h" #include "box2d/geometry.h" #include "box2d/hull.h" +#include "box2d/joint_util.h" #include #include @@ -108,19 +109,19 @@ class HighMassRatio2 : public Sample b2Polygon bigBox = b2MakeBox(10.0f * extent, 10.0f * extent); { - bodyDef.position = {-9.5f * extent, 0.5f * extent}; + bodyDef.position = {-9.0f * extent, 0.5f * extent}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); b2Body_CreatePolygon(bodyId, &shapeDef, &smallBox); } { - bodyDef.position = {9.5f * extent, 0.5f * extent}; + bodyDef.position = {9.0f * extent, 0.5f * extent}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); b2Body_CreatePolygon(bodyId, &shapeDef, &smallBox); } { - bodyDef.position = {0.0f, (10.0f + 1.0f) * extent}; + bodyDef.position = {0.0f, (10.0f + 16.0f) * extent}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); b2Body_CreatePolygon(bodyId, &shapeDef, &bigBox); } diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index d86415fb..504f3c29 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -21,9 +21,9 @@ class BenchmarkPyramid : public Sample { m_extent = 0.5f; m_round = 0.0f; - m_baseCount = 60; - m_rowCount = g_sampleDebug ? 4 : 1; - m_columnCount = g_sampleDebug ? 4 : 1; + m_baseCount = 10; + m_rowCount = g_sampleDebug ? 1 : 14; + m_columnCount = g_sampleDebug ? 1 : 13; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index 146bd20b..551f171e 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -239,7 +239,7 @@ class BallAndChain : public Sample #if 1 { float hx = 0.5f; - b2Polygon box = b2MakeBox(hx, 0.125f); + b2Capsule capsule = {{-hx, 0.0f}, {hx, 0.0f}, 0.125f}; b2ShapeDef sd = b2DefaultShapeDef(); sd.density = 20.0f; @@ -255,7 +255,7 @@ class BallAndChain : public Sample bd.type = b2_dynamicBody; bd.position = {(1.0f + 2.0f * i) * hx, e_count * hx}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); - b2Body_CreatePolygon(bodyId, &sd, &box); + b2Body_CreateCapsule(bodyId, &sd, &capsule); b2Vec2 pivot = {(2.0f * i) * hx, e_count * hx}; jd.bodyIdA = prevBodyId; diff --git a/samples/draw.cpp b/samples/draw.cpp index 1dfbe0c6..5d5b1140 100644 --- a/samples/draw.cpp +++ b/samples/draw.cpp @@ -1319,7 +1319,7 @@ void Draw::DrawString(b2Vec2 pw, const char* string, ...) ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoScrollbar); ImGui::SetCursorPos(ImVec2(ps.x, ps.y)); - ImGui::TextColoredV(ImColor(230, 153, 153, 255), string, arg); + ImGui::TextColoredV(ImColor(230, 230, 230, 255), string, arg); ImGui::End(); va_end(arg); } diff --git a/samples/main.cpp b/samples/main.cpp index dd88c02d..6a89e273 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -410,7 +410,7 @@ static void UpdateUI() ImGui::Checkbox("Contact Normals", &s_settings.m_drawContactNormals); ImGui::Checkbox("Contact Impulses", &s_settings.m_drawContactImpulse); ImGui::Checkbox("Friction Impulses", &s_settings.m_drawFrictionImpulse); - ImGui::Checkbox("Center of Masses", &s_settings.m_drawCOMs); + ImGui::Checkbox("Center of Masses", &s_settings.m_drawMass); ImGui::Checkbox("Statistics", &s_settings.m_drawStats); ImGui::Checkbox("Profile", &s_settings.m_drawProfile); diff --git a/samples/sample.cpp b/samples/sample.cpp index c1fed0b3..af7ebd59 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -166,7 +166,7 @@ void Sample::MouseDown(b2Vec2 p, int button, int mod) float dampingRatio = 0.7f; float mass = b2Body_GetMass(queryContext.bodyId); - b2MouseJointDef jd; + b2MouseJointDef jd = b2DefaultMouseJointDef(); jd.bodyIdA = m_groundBodyId; jd.bodyIdB = queryContext.bodyId; jd.target = p; @@ -226,7 +226,7 @@ void Sample::Step(Settings& settings) g_draw.m_debugDraw.drawShapes = settings.m_drawShapes; g_draw.m_debugDraw.drawJoints = settings.m_drawJoints; g_draw.m_debugDraw.drawAABBs = settings.m_drawAABBs; - g_draw.m_debugDraw.drawCOMs = settings.m_drawCOMs; + g_draw.m_debugDraw.drawMass = settings.m_drawMass; b2World_EnableSleeping(m_worldId, settings.m_enableSleep); b2World_EnableWarmStarting(m_worldId, settings.m_enableWarmStarting); @@ -238,7 +238,7 @@ void Sample::Step(Settings& settings) for (int32_t i = 0; i < 1; ++i) { - b2World_Step2(m_worldId, timeStep, settings.m_velocityIterations, settings.m_positionIterations); + b2World_Step(m_worldId, timeStep, settings.m_velocityIterations, settings.m_positionIterations); } b2World_Draw(m_worldId, &g_draw.m_debugDraw); diff --git a/samples/settings.cpp b/samples/settings.cpp index 237b46da..42d4fcbb 100644 --- a/samples/settings.cpp +++ b/samples/settings.cpp @@ -51,7 +51,7 @@ void Settings::Save() fprintf(file, " \"drawContactNormals\": %s,\n", m_drawContactNormals ? "true" : "false"); fprintf(file, " \"drawContactImpulse\": %s,\n", m_drawContactImpulse ? "true" : "false"); fprintf(file, " \"drawFrictionImpulse\": %s,\n", m_drawFrictionImpulse ? "true" : "false"); - fprintf(file, " \"drawCOMs\": %s,\n", m_drawCOMs ? "true" : "false"); + fprintf(file, " \"drawMass\": %s,\n", m_drawMass ? "true" : "false"); fprintf(file, " \"drawStats\": %s,\n", m_drawStats ? "true" : "false"); fprintf(file, " \"drawProfile\": %s,\n", m_drawProfile ? "true" : "false"); fprintf(file, " \"enableWarmStarting\": %s,\n", m_enableWarmStarting ? "true" : "false"); diff --git a/samples/settings.h b/samples/settings.h index ae1e4100..404cec40 100644 --- a/samples/settings.h +++ b/samples/settings.h @@ -9,8 +9,8 @@ struct Settings void Load(); int m_sampleIndex = 0; - int m_windowWidth = 1280; - int m_windowHeight = 720; + int m_windowWidth = 1920; + int m_windowHeight = 1080; float m_hertz = 60.0f; int m_velocityIterations = 8; int m_positionIterations = 3; @@ -21,7 +21,7 @@ struct Settings bool m_drawContactNormals = false; bool m_drawContactImpulse = false; bool m_drawFrictionImpulse = false; - bool m_drawCOMs = false; + bool m_drawMass = false; bool m_drawStats = false; bool m_drawProfile = false; bool m_enableWarmStarting = true; diff --git a/src/body.c b/src/body.c index 1b6bea0e..65a321f3 100644 --- a/src/body.c +++ b/src/body.c @@ -686,7 +686,7 @@ bool b2ShouldBodiesCollide(b2World* world, b2Body* bodyA, b2Body* bodyB) int32_t otherEdgeIndex = edgeIndex ^ 1; b2Joint* joint = world->joints + jointIndex; - if (joint->edges[otherEdgeIndex].bodyIndex == otherBodyIndex) + if (joint->collideConnected == false && joint->edges[otherEdgeIndex].bodyIndex == otherBodyIndex) { return false; } diff --git a/src/contact_solver.c b/src/contact_solver.c index b33b7eda..5aeabe82 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -25,6 +25,9 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon const int32_t* bodyMap = context->bodyMap; b2SolverBody* solverBodies = context->solverBodies; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + // 30 is a bit soft, 60 oscillates too much // const float contactHertz = 45.0f; // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); @@ -55,41 +58,21 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon constraint->normal = manifold->normal; constraint->friction = contact->friction; - b2SolverBody* solverBodyA; - b2Vec2 vA; - float wA; - float mA; - float iA; - - float hertz; + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; - if (indexA == B2_NULL_INDEX) - { - solverBodyA = NULL; - vA.x = vA.y = 0.0; - wA = 0.0f; - mA = 0.0f; - iA = 0.0f; - hertz = 2.0f * contactHertz; - constraint->type = pointCount == 1 ? b2_onePointStaticType : b2_twoPointStaticType; - } - else - { - solverBodyA = solverBodies + indexA; - vA = solverBodyA->linearVelocity; - wA = solverBodyA->angularVelocity; - mA = solverBodyA->invMass; - iA = solverBodyA->invI; - hertz = contactHertz; - constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; - } + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; + b2Vec2 vA = solverBodyA->linearVelocity; + float wA = solverBodyA->angularVelocity; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; - B2_ASSERT(indexB != B2_NULL_INDEX); - b2SolverBody* solverBodyB = solverBodies + indexB; b2Vec2 vB = solverBodyB->linearVelocity; float wB = solverBodyB->angularVelocity; float mB = solverBodyB->invMass; float iB = solverBodyB->invI; + + constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; // Stiffer for static contacts to avoid bodies getting pushed through the ground const float zeta = 1.0f; @@ -136,11 +119,8 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon } } - if (solverBodyA != NULL) - { - solverBodyA->linearVelocity = vA; - solverBodyA->angularVelocity = wA; - } + solverBodyA->linearVelocity = vA; + solverBodyA->angularVelocity = wA; solverBodyB->linearVelocity = vB; solverBodyB->angularVelocity = wB; } @@ -150,7 +130,10 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) { - b2SolverBody* bodyA = bodies + constraint->indexA; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; b2Vec2 dpA = bodyA->deltaPosition; @@ -158,7 +141,7 @@ static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody float mA = bodyA->invMass; float iA = bodyA->invI; - b2SolverBody* bodyB = bodies + constraint->indexB; + b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; b2Vec2 dpB = bodyB->deltaPosition; @@ -258,7 +241,10 @@ static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) { - b2SolverBody* bodyA = bodies + constraint->indexA; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; b2Vec2 dpA = bodyA->deltaPosition; @@ -266,7 +252,7 @@ static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBod float mA = bodyA->invMass; float iA = bodyA->invI; - b2SolverBody* bodyB = bodies + constraint->indexB; + b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; b2Vec2 dpB = bodyB->deltaPosition; @@ -441,244 +427,6 @@ static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBod bodyB->angularVelocity = wB; } -static void b2SolveContactOnePointStatic(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) -{ - b2SolverBody* bodyB = bodies + constraint->indexB; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - b2Vec2 dpB = bodyB->deltaPosition; - float daB = bodyB->deltaAngle; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - float biasCoefficient = constraint->biasCoefficient; - float massCoefficient = constraint->massCoefficient; - float impulseCoefficient = constraint->impulseCoefficient; - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - float ds = b2Dot(prB, normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -} - -static void b2SolveContactTwoPointsStatic(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) -{ - b2SolverBody* bodyB = bodies + constraint->indexB; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - b2Vec2 dpB = bodyB->deltaPosition; - float daB = bodyB->deltaAngle; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - float biasCoefficient = constraint->biasCoefficient; - float massCoefficient = constraint->massCoefficient; - float impulseCoefficient = constraint->impulseCoefficient; - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - float ds = b2Dot(prB, normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 1; - - // Relative velocity at contact - b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - float ds = b2Dot(prB, normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 1; - - // Relative velocity at contact - b2Vec2 dv = b2Add(vB, b2CrossSV(wB, cp->rB)); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -} - void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); @@ -701,14 +449,6 @@ void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskConte b2SolveContactTwoPoints(constraint, bodies, inv_dt, useBias); break; - case b2_onePointStaticType: - b2SolveContactOnePointStatic(constraint, bodies, inv_dt, useBias); - break; - - case b2_twoPointStaticType: - b2SolveContactTwoPointsStatic(constraint, bodies, inv_dt, useBias); - break; - default: B2_ASSERT(false); } diff --git a/src/contact_solver.h b/src/contact_solver.h index 5c7ffdf7..74463213 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -21,8 +21,6 @@ typedef enum b2ContactConstraintType { b2_onePointType, b2_twoPointType, - b2_onePointStaticType, - b2_twoPointStaticType, } b2ContactConstraintType; typedef struct b2ContactConstraint diff --git a/src/joint.c b/src/joint.c index ceb8b493..c8f1c96f 100644 --- a/src/joint.c +++ b/src/joint.c @@ -181,9 +181,9 @@ b2JointId b2World_CreateMouseJoint(b2WorldId worldId, const b2MouseJointDef* def b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); joint->type = b2_mouseJoint; - joint->localAnchorA = b2InvTransformPoint(bodyA->transform, def->target); joint->localAnchorB = b2InvTransformPoint(bodyB->transform, def->target); + joint->collideConnected = true; b2MouseJoint empty = {0}; joint->mouseJoint = empty; @@ -217,9 +217,9 @@ b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2RevoluteJointDe b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); joint->type = b2_revoluteJoint; - joint->localAnchorA = def->localAnchorA; joint->localAnchorB = def->localAnchorB; + joint->collideConnected = def->collideConnected; b2RevoluteJoint empty = {0}; joint->revoluteJoint = empty; @@ -269,9 +269,9 @@ b2JointId b2World_CreateWeldJoint(b2WorldId worldId, const b2WeldJointDef* def) b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); joint->type = b2_weldJoint; - joint->localAnchorA = def->localAnchorA; joint->localAnchorB = def->localAnchorB; + joint->collideConnected = def->collideConnected; b2WeldJoint empty = {0}; joint->weldJoint = empty; diff --git a/src/joint.h b/src/joint.h index 248597c2..6a8073af 100644 --- a/src/joint.h +++ b/src/joint.h @@ -51,7 +51,7 @@ typedef struct b2MouseJoint float gamma; // Solver temp - int32_t solverIndexB; + int32_t indexB; b2Vec2 positionB; b2Vec2 rB; b2Vec2 localCenterB; @@ -75,8 +75,8 @@ typedef struct b2RevoluteJoint float upperAngle; // Solver temp - int32_t solverIndexA; - int32_t solverIndexB; + int32_t indexA; + int32_t indexB; b2Vec2 positionA; b2Vec2 positionB; float angleA; @@ -107,10 +107,12 @@ typedef struct b2WeldJoint b2Vec3 impulse; // Solver temp - int32_t solverIndexA; - int32_t solverIndexB; + int32_t indexA; + int32_t indexB; b2Vec2 positionA; b2Vec2 positionB; + float angleA; + float angleB; b2Vec2 localCenterA; b2Vec2 localCenterB; } b2WeldJoint; diff --git a/src/mouse_joint.c b/src/mouse_joint.c index f6cee647..d6988481 100644 --- a/src/mouse_joint.c +++ b/src/mouse_joint.c @@ -44,20 +44,23 @@ void b2PrepareMouse(b2Joint* base, b2StepContext* context) B2_ASSERT(bodyB->object.index == bodyB->object.next); b2MouseJoint* joint = &base->mouseJoint; + joint->indexB = context->bodyMap[indexB]; joint->localCenterB = bodyB->localCenter; - joint->invMassB = bodyB->invMass; - joint->invIB = bodyB->invI; b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + b2Rot qB = bodyB->transform.q; + + b2SolverBody* solverBodyB = context->solverBodies + joint->indexB; + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; - b2Rot qB = b2MakeRot(aB); + float mB = bodyB->invMass; + float iB = bodyB->invI; float d = joint->damping; float k = joint->stiffness; + // TODO_ERIN convert to bias/mass/impulse scales // magic formulas // gamma has units of inverse mass. // beta has units of inverse time. @@ -76,10 +79,10 @@ void b2PrepareMouse(b2Joint* base, b2StepContext* context) // = [1/m1+1/m2 0 ] + invI1 * [r1.y*r1.y -r1.x*r1.y] + invI2 * [r1.y*r1.y -r1.x*r1.y] // [ 0 1/m1+1/m2] [-r1.x*r1.y r1.x*r1.x] [-r1.x*r1.y r1.x*r1.x] b2Mat22 K; - K.cx.x = joint->invMassB + joint->invIB * joint->rB.y * joint->rB.y + joint->gamma; - K.cx.y = -joint->invIB * joint->rB.x * joint->rB.y; + K.cx.x = mB + iB * joint->rB.y * joint->rB.y + joint->gamma; + K.cx.y = -iB * joint->rB.x * joint->rB.y; K.cy.x = K.cx.y; - K.cy.y = joint->invMassB + joint->invIB * joint->rB.x * joint->rB.x + joint->gamma; + K.cy.y = mB + iB * joint->rB.x * joint->rB.x + joint->gamma; joint->mass = b2GetInverse22(K); @@ -92,30 +95,30 @@ void b2PrepareMouse(b2Joint* base, b2StepContext* context) if (context->enableWarmStarting) { joint->impulse = b2MulSV(context->dtRatio, joint->impulse); - vB = b2MulAdd(vB, joint->invMassB, joint->impulse); - wB += joint->invIB * b2Cross(joint->rB, joint->impulse); + vB = b2MulAdd(vB, mB, joint->impulse); + wB += iB * b2Cross(joint->rB, joint->impulse); } else { joint->impulse = b2Vec2_zero; } - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; } void b2SolveMouseVelocity(b2Joint* base, b2StepContext* context) { b2MouseJoint* joint = &base->mouseJoint; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; + b2SolverBody* bodyB = context->solverBodies + joint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - // Cdot = v + cross(w, r) - b2Vec2 Cdot = b2Add(vB, b2CrossSV(wB, joint->rB)); - b2Vec2 SoftCdot = b2Add(Cdot, b2MulAdd(joint->C, joint->gamma, joint->impulse)); - b2Vec2 impulse = b2Neg(b2MulMV(joint->mass, SoftCdot)); + // dv = v + cross(w, r) + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, joint->rB)); + b2Vec2 Cdot = b2Add(dv, b2MulAdd(joint->C, joint->gamma, joint->impulse)); + b2Vec2 impulse = b2Neg(b2MulMV(joint->mass, Cdot)); b2Vec2 oldImpulse = joint->impulse; joint->impulse = b2Add(joint->impulse, impulse); @@ -126,8 +129,8 @@ void b2SolveMouseVelocity(b2Joint* base, b2StepContext* context) } impulse = b2Sub(joint->impulse, oldImpulse); - vB = b2MulAdd(vB, joint->invMassB, impulse); - wB += joint->invIB * b2Cross(joint->rB, impulse); + vB = b2MulAdd(vB, bodyB->invMass, impulse); + wB += bodyB->invI * b2Cross(joint->rB, impulse); bodyB->linearVelocity = vB; bodyB->angularVelocity = wB; diff --git a/src/revolute_joint.c b/src/revolute_joint.c index 529a18d3..a4ef1f84 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT +#define _CRT_SECURE_NO_WARNINGS + #include "body.h" #include "core.h" #include "joint.h" @@ -9,6 +11,8 @@ #include "box2d/debug_draw.h" +#include + // Point-to-point constraint // C = p2 - p1 // Cdot = v2 - v1 @@ -35,8 +39,8 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) b2RevoluteJoint* joint = &base->revoluteJoint; - joint->solverIndexA = indexA == B2_NULL_INDEX ? B2_NULL_INDEX : context->bodyMap[indexA]; - joint->solverIndexB = context->bodyMap[indexB]; + joint->indexA = context->bodyMap[indexA]; + joint->indexB = context->bodyMap[indexB]; joint->localCenterA = bodyA->localCenter; joint->localCenterB = bodyB->localCenter; joint->positionA = bodyA->position; @@ -44,11 +48,15 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) joint->angleA = bodyA->angle; joint->angleB = bodyB->angle; - float wA = bodyA->angularVelocity; - float wB = bodyB->angularVelocity; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; - float iA = bodyA->invI; - float iB = bodyB->invI; + // Note: must warm start solver bodies + b2SolverBody* solverBodyA = joint->indexA == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexA; + float iA = solverBodyA->invI; + + b2SolverBody* solverBodyB = joint->indexB == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexB; + float iB = solverBodyB->invI; joint->axialMass = iA + iB; bool fixedRotation; @@ -63,7 +71,7 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) } // hertz = 1/4 * substep Hz - const float hertz = (1.0f / 4.0f) * context->velocityIterations * context->inv_dt; + const float hertz = 0.25f * context->velocityIterations * context->inv_dt; const float zeta = 1.0f; float omega = 2.0f * b2_pi * hertz; float h = context->dt; @@ -95,10 +103,11 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) joint->lowerImpulse *= dtRatio; joint->upperImpulse *= dtRatio; + // TODO_ERIN is warm starting axial stuff useful? float axialImpulse = joint->motorImpulse + joint->lowerImpulse - joint->upperImpulse; - wA -= iA * axialImpulse; - wB += iB * axialImpulse; + solverBodyA->angularVelocity -= iA * axialImpulse; + solverBodyB->angularVelocity += iB * axialImpulse; } else { @@ -107,47 +116,33 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) joint->lowerImpulse = 0.0f; joint->upperImpulse = 0.0f; } - - bodyA->angularVelocity = wA; - bodyB->angularVelocity = wB; } -void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeOverlap) +void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool useBias) { B2_ASSERT(base->type == b2_revoluteJoint); b2RevoluteJoint* joint = &base->revoluteJoint; - b2SolverBody* bodyA = NULL; - float mA, iA, wA, aA; - b2Vec2 vA, cA; - if (joint->solverIndexA == B2_NULL_INDEX) - { - mA = 0.0f; - iA = 0.0f; - cA = joint->positionA; - aA = joint->angleA; - vA = b2Vec2_zero; - wA = 0.0f; - } - else - { - bodyA = context->solverBodies + joint->solverIndexA; - mA = bodyA->invMass; - iA = bodyA->invI; - cA = b2Add(joint->positionA, bodyA->deltaPosition); - aA = joint->angleA + bodyA->deltaAngle; - vA = bodyA->linearVelocity; - wA = bodyA->angularVelocity; - } + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; - b2SolverBody* bodyB = context->solverBodies + joint->solverIndexB; - float mB = bodyB->invMass; - float iB = bodyB->invI; + b2SolverBody* bodyA = joint->indexA == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = joint->indexB == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - b2Vec2 cB = b2Add(joint->positionB, bodyB->deltaPosition); - float aB = joint->angleB + bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + const b2Vec2 cA = b2Add(joint->positionA, bodyA->deltaPosition); + const float aA = joint->angleA + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(joint->positionB, bodyB->deltaPosition); + const float aB = joint->angleB + bodyB->deltaAngle; bool fixedRotation = (iA + iB == 0.0f); @@ -177,9 +172,10 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO float impulseScale = 0.0f; if (C > 0.0f) { + // speculation bias = C * context->inv_dt; } - else if (removeOverlap) + else if (useBias) { bias = joint->biasCoefficient * C; massScale = joint->massCoefficient; @@ -187,7 +183,7 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO } float Cdot = wB - wA; - float impulse = -joint->axialMass * massScale * (Cdot + bias) - impulseScale * joint->lowerImpulse; + float impulse = -joint->axialMass * massScale * (Cdot + bias) - impulseScale * joint->lowerImpulse; float oldImpulse = joint->lowerImpulse; joint->lowerImpulse = B2_MAX(joint->lowerImpulse + impulse, 0.0f); impulse = joint->lowerImpulse - oldImpulse; @@ -201,7 +197,7 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO // This also keeps the impulse positive when the limit is active. { float C = joint->upperAngle - jointAngle; - + float bias = 0.0f; float massScale = 1.0f; float impulseScale = 0.0f; @@ -209,7 +205,7 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO { bias = C * context->inv_dt; } - else if (removeOverlap) + else if (useBias) { bias = joint->biasCoefficient * C; massScale = joint->massCoefficient; @@ -229,12 +225,6 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO // Solve point-to-point constraint { - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); - - b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); - b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); - // J = [-I -r1_skew I r2_skew] // r_skew = [-ry; rx] @@ -242,26 +232,32 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO // K = [ mA+r1y^2*iA+mB+r2y^2*iB, -r1y*iA*r1x-r2y*iB*r2x] // [ -r1y*iA*r1x-r2y*iB*r2x, mA+r1x^2*iA+mB+r2x^2*iB] + // TODO_ERIN approximate the separation similar to contacts. Test if updating K makes a difference. + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); + b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); + b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + b2Mat22 K; K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; K.cx.y = K.cy.x; K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB; - b2Vec2 separation = b2Add(b2Sub(rB, rA), b2Sub(cB, cA)); b2Vec2 Cdot = b2Sub(b2Add(vB, b2CrossSV(wB, rB)), b2Add(vA, b2CrossSV(wA, rA))); - float biasScale = 0.0f; + b2Vec2 bias = b2Vec2_zero; float massScale = 1.0f; float impulseScale = 0.0f; - if (removeOverlap) + if (useBias) { - biasScale = joint->biasCoefficient; + b2Vec2 separation = b2Add(b2Sub(rB, rA), b2Sub(cB, cA)); + bias = b2MulSV(joint->biasCoefficient, separation); massScale = joint->massCoefficient; impulseScale = joint->impulseCoefficient; } - b2Vec2 b = b2Solve22(K, b2MulAdd(Cdot, biasScale, separation)); + b2Vec2 b = b2Solve22(K, b2Add(Cdot, bias)); b2Vec2 impulse; impulse.x = -massScale * b.x - impulseScale * joint->impulse.x; impulse.y = -massScale * b.y - impulseScale * joint->impulse.y; @@ -275,13 +271,9 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeO vB = b2MulAdd(vB, mB, impulse); wB += iB * b2Cross(rB, impulse); } - - if (bodyA != NULL) - { - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - } + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; bodyB->linearVelocity = vB; bodyB->angularVelocity = wB; } @@ -376,6 +368,24 @@ void b2RevoluteJoint_SetMaxMotorTorque(b2JointId jointId, float torque) joint->revoluteJoint.maxMotorTorque = torque; } +b2Vec2 b2RevoluteJoint_GetConstraintForce(b2JointId jointId) +{ + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return b2Vec2_zero; + } + + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); + + b2Joint* joint = world->joints + jointId.index; + B2_ASSERT(joint->object.index == joint->object.next); + B2_ASSERT(joint->object.revision == jointId.revision); + B2_ASSERT(joint->type == b2_revoluteJoint); + return joint->revoluteJoint.impulse; +} + #if 0 void b2RevoluteJoint::Dump() { @@ -442,4 +452,8 @@ void b2DrawRevolute(b2DebugDraw* draw, b2Joint* base, b2Body* bodyA, b2Body* bod draw->DrawSegment(xfA.p, pA, color, draw->context); draw->DrawSegment(pA, pB, color, draw->context); draw->DrawSegment(xfB.p, pB, color, draw->context); + + //char buffer[32]; + //sprintf(buffer, "%.1f", b2Length(joint->impulse)); + //draw->DrawString(pA, buffer, draw->context); } diff --git a/src/weld_joint.c b/src/weld_joint.c index 45dacf2d..a512fc4a 100644 --- a/src/weld_joint.c +++ b/src/weld_joint.c @@ -38,13 +38,14 @@ void b2PrepareWeld(b2Joint* base, b2StepContext* context) B2_ASSERT(bodyB->object.index == bodyB->object.next); b2WeldJoint* joint = &base->weldJoint; + joint->indexA = context->bodyMap[indexA]; + joint->indexB = context->bodyMap[indexB]; joint->localCenterA = bodyA->localCenter; - joint->invMassA = bodyA->invMass; - joint->invIA = bodyA->invI; - joint->localCenterB = bodyB->localCenter; - joint->invMassB = bodyB->invMass; - joint->invIB = bodyB->invI; + joint->positionA = bodyA->position; + joint->positionB = bodyB->position; + joint->angleA = bodyA->angle; + joint->angleB = bodyB->angle; const float h = context->dt; @@ -58,9 +59,9 @@ void b2PrepareWeld(b2Joint* base, b2StepContext* context) const float zeta = joint->linearDampingRatio; const float omega = 2.0f * b2_pi * linearHertz; joint->linearBiasCoefficient = omega / (2.0f * zeta + h * omega); - float c = h * omega * (2.0f * zeta + h * omega); - joint->linearImpulseCoefficient = 1.0f / (1.0f + c); - joint->linearMassCoefficient = c * joint->linearImpulseCoefficient; + float a = h * omega * (2.0f * zeta + h * omega); + joint->linearImpulseCoefficient = 1.0f / (1.0f + a); + joint->linearMassCoefficient = a * joint->linearImpulseCoefficient; } float angularHertz = joint->angularHertz; @@ -73,35 +74,39 @@ void b2PrepareWeld(b2Joint* base, b2StepContext* context) const float zeta = joint->angularDampingRatio; const float omega = 2.0f * b2_pi * angularHertz; joint->angularBiasCoefficient = omega / (2.0f * zeta + h * omega); - float c = h * omega * (2.0f * zeta + h * omega); - joint->angularImpulseCoefficient = 1.0f / (1.0f + c); - joint->angularMassCoefficient = c * joint->angularImpulseCoefficient; + float a = h * omega * (2.0f * zeta + h * omega); + joint->angularImpulseCoefficient = 1.0f / (1.0f + a); + joint->angularMassCoefficient = a * joint->angularImpulseCoefficient; } joint->impulse = b2Vec3_zero; } -void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool removeOverlap) +void b2SolveWeldVelocity(b2Joint* base, const b2StepContext* context, bool useBias) { B2_ASSERT(base->type == b2_weldJoint); b2WeldJoint* joint = &base->weldJoint; - b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + b2SolverBody* bodyA = joint->indexA == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexA; b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = joint->indexB == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; + float mB = bodyB->invMass; + float iB = bodyB->invI; - const b2Vec2 cA = b2Add(bodyA->position, bodyA->deltaPosition); - const float aA = bodyA->angle + bodyA->deltaAngle; - const b2Vec2 cB = b2Add(bodyB->position, bodyB->deltaPosition); - const float aB = bodyB->angle + bodyB->deltaAngle; - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; + const b2Vec2 cA = b2Add(joint->positionA, bodyA->deltaPosition); + const float aA = joint->angleA + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(joint->positionB, bodyB->deltaPosition); + const float aB = joint->angleB + bodyB->deltaAngle; b2Rot qA = b2MakeRot(aA); b2Rot qB = b2MakeRot(aB); @@ -132,7 +137,7 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r float angularBiasScale = 0.0f; float angularMassScale = 1.0f; float angularImpulseScale = 0.0f; - if (removeOverlap) + if (useBias) { linearBiasScale = joint->linearBiasCoefficient; linearMassScale = joint->linearMassCoefficient; @@ -145,12 +150,12 @@ void b2SolveWeldVelocitySoft(b2Joint* base, const b2StepContext* context, bool r b2Vec2 C1 = b2Add(b2Sub(cB, cA), b2Sub(rB, rA)); float C2 = aB - aA - joint->referenceAngle; - b2Vec3 c; - c.x = Cdot1.x + linearBiasScale * C1.x; - c.y = Cdot1.y + linearBiasScale * C1.y; - c.z = Cdot2 + angularBiasScale * C2; + b2Vec3 Cdot; + Cdot.x = Cdot1.x + linearBiasScale * C1.x; + Cdot.y = Cdot1.y + linearBiasScale * C1.y; + Cdot.z = Cdot2 + angularBiasScale * C2; - b2Vec3 b = b2Solve33(K, c); + b2Vec3 b = b2Solve33(K, Cdot); b2Vec3 impulse; impulse.x = -linearMassScale * b.x - linearImpulseScale * joint->impulse.x; impulse.y = -linearMassScale * b.y - linearImpulseScale * joint->impulse.y; diff --git a/src/world.c b/src/world.c index 99ce9444..d69028a4 100644 --- a/src/world.c +++ b/src/world.c @@ -1347,15 +1347,28 @@ void b2World_Draw(b2WorldId worldId, b2DebugDraw* draw) //} } - // if (flags & b2Draw::e_centerOfMassBit) - //{ - // for (b2Body* b = m_bodyList; b; b = b->GetNext()) - // { - // b2Transform xf = b->GetTransform(); - // xf.p = b->GetWorldCenter(); - // m_debugDraw->DrawTransform(xf); - // } - // } + if (draw->drawMass) + { + b2Vec2 offset = {0.1f, 0.1f}; + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + draw->DrawTransform(body->transform, draw->context); + + b2Vec2 p = b2TransformPoint(body->transform, offset); + + char buffer[32]; + sprintf(buffer, "%.1f", body->mass); + draw->DrawString(p, buffer, draw->context); + } + } } void b2World_EnableSleeping(b2WorldId worldId, bool flag) From 0d58f34357c85d4e49bbaa0f9162671624900ed2 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 24 Sep 2023 23:22:59 -0700 Subject: [PATCH 28/51] AVX wip --- src/CMakeLists.txt | 6 +- src/body.h | 15 ++- src/contact_solver.c | 288 ++++++++++++++++++++++++++++++++++++++++++- src/graph.c | 4 + src/solver_data.h | 5 +- src/world.c | 7 +- 6 files changed, 309 insertions(+), 16 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a7203fff..b06fcdcd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -84,15 +84,15 @@ message(STATUS "CMake system name: ${CMAKE_SYSTEM_NAME}") if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") target_compile_options(box2d PRIVATE /W4 /WX /experimental:c11atomics) - # target_compile_options(box2d PRIVATE /arch:AVX2 /fp:fast /fp:except-) + target_compile_options(box2d PRIVATE /arch:AVX) # target_compile_options(box2d PRIVATE /arch:SSE2 /fp:except-) # target_compile_definitions(box2d PUBLIC "$<$:B2_ENABLE_ASSERT>") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") message(STATUS "CMake using Clang-CL") - target_compile_options(box2d PRIVATE /W4 /WX /std:c17) + target_compile_options(box2d PRIVATE /W4 /WX /std:c17 /arch:AVX) endif() else() - target_compile_options(box2d PRIVATE -Wall -Wextra -Wpedantic -Werror) + target_compile_options(box2d PRIVATE -Wall -Wextra -Wpedantic -Werror -mavx) endif() find_library(MATH_LIBRARY m) diff --git a/src/body.h b/src/body.h index ed0c1730..55b6821b 100644 --- a/src/body.h +++ b/src/body.h @@ -85,19 +85,18 @@ typedef struct b2Body // TODO_ERIN every non-static body gets a solver body. No solver bodies for static bodies to avoid cross thread sharing and the cache misses they bring. // Keep two solver body arrays: awake and sleeping +// 12 + 12 + 8 = 32 bytes typedef struct b2SolverBody { - b2Vec2 linearVelocity; - float angularVelocity; + b2Vec2 linearVelocity; // 8 + float angularVelocity; // 4 // These are the change in position/angle that accumulate across constraint substeps - b2Vec2 deltaPosition; - float deltaAngle; - - float invMass; - float invI; + b2Vec2 deltaPosition; // 8 + float deltaAngle; // 4 - int32_t bodyIndex; + float invMass; // 4 + float invI; // 4 } b2SolverBody; bool b2ShouldBodiesCollide(b2World* world, b2Body* bodyA, b2Body* bodyB); diff --git a/src/contact_solver.c b/src/contact_solver.c index 5aeabe82..43837c8c 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -10,6 +10,10 @@ #include "graph.h" #include "world.h" +#include +// or superset +// #include + #define maxBaumgarteVelocity 3.0f // TODO_ERIN prepare contact constraints directly in collision phase? @@ -71,7 +75,7 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon float wB = solverBodyB->angularVelocity; float mB = solverBodyB->invMass; float iB = solverBodyB->invI; - + constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; // Stiffer for static contacts to avoid bodies getting pushed through the ground @@ -427,6 +431,288 @@ static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBod bodyB->angularVelocity = wB; } +typedef struct b2ContactConstraintAVX +{ + b2Contact* contacts[8]; + int32_t indexA[8]; + int32_t indexB[8]; + + __m256 rAx1, rAy1, rAz1; + __m256 rBx1, rBy1, rBz1; + __m256 rAx2, rAy2, rAz2; + __m256 rBx2, rBy2, rBz2; + __m256 separation1, separation2; + __m256 normalImpulse1, normalImpulse2; + __m256 tangentImpulse1, tangentImpulse2; + __m256 normalMass1, tangentMass1; + __m256 normalMass2, tangentMass2; + __m256 normalX, normalY, normalZ; + __m256 friction; + __m256 massCoefficient; + __m256 biasCoefficient; + __m256 impulseCoefficient; +} b2ContactConstraintAVX; + +typedef struct b2SimdBody +{ + __m256 vx, vy; + __m256 w; + __m256 dpx, dpy; + __m256 da; + __m256 invM, invI; +} b2SimdBody; + +// This is a load and 8x8 transpose +static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* restrict indices) +{ + B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); + __m256 zero = _mm256_setzero_ps(); + __m256 b0 = indices[0] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[0])); + __m256 b1 = indices[1] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[1])); + __m256 b2 = indices[2] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[2])); + __m256 b3 = indices[3] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[3])); + __m256 b4 = indices[4] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[4])); + __m256 b5 = indices[5] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[5])); + __m256 b6 = indices[6] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[6])); + __m256 b7 = indices[7] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[7])); + + __m256 t0 = _mm256_unpacklo_ps(b0, b1); + __m256 t1 = _mm256_unpackhi_ps(b0, b1); + __m256 t2 = _mm256_unpacklo_ps(b2, b3); + __m256 t3 = _mm256_unpackhi_ps(b2, b3); + __m256 t4 = _mm256_unpacklo_ps(b4, b5); + __m256 t5 = _mm256_unpackhi_ps(b4, b5); + __m256 t6 = _mm256_unpacklo_ps(b6, b7); + __m256 t7 = _mm256_unpackhi_ps(b6, b7); + __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + + b2SimdBody simdBody; + simdBody.vx = _mm256_permute2f128_ps(tt0, tt4, 0x20); + simdBody.vy = _mm256_permute2f128_ps(tt1, tt5, 0x20); + simdBody.w = _mm256_permute2f128_ps(tt2, tt6, 0x20); + simdBody.dpx = _mm256_permute2f128_ps(tt3, tt7, 0x20); + simdBody.dpy = _mm256_permute2f128_ps(tt0, tt4, 0x31); + simdBody.da = _mm256_permute2f128_ps(tt1, tt5, 0x31); + simdBody.invM = _mm256_permute2f128_ps(tt2, tt6, 0x31); + simdBody.invI = _mm256_permute2f128_ps(tt3, tt7, 0x31); + + return simdBody; +} + +// This writes everything back but only the velocities change +static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) +{ + B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); + __m256 t0 = _mm256_unpacklo_ps(simdBody->vx, simdBody->vy); + __m256 t1 = _mm256_unpackhi_ps(simdBody->vx, simdBody->vy); + __m256 t2 = _mm256_unpacklo_ps(simdBody->w, simdBody->dpx); + __m256 t3 = _mm256_unpackhi_ps(simdBody->w, simdBody->dpx); + __m256 t4 = _mm256_unpacklo_ps(simdBody->dpy, simdBody->da); + __m256 t5 = _mm256_unpackhi_ps(simdBody->dpy, simdBody->da); + __m256 t6 = _mm256_unpacklo_ps(simdBody->invM, simdBody->invI); + __m256 t7 = _mm256_unpackhi_ps(simdBody->invM, simdBody->invI); + __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + + if (indices[0] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[0]), _mm256_permute2f128_ps(tt0, tt4, 0x20)); + if (indices[1] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[1]), _mm256_permute2f128_ps(tt1, tt5, 0x20)); + if (indices[2] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[2]), _mm256_permute2f128_ps(tt2, tt6, 0x20)); + if (indices[3] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[3]), _mm256_permute2f128_ps(tt3, tt7, 0x20)); + if (indices[4] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[4]), _mm256_permute2f128_ps(tt0, tt4, 0x31)); + if (indices[5] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[5]), _mm256_permute2f128_ps(tt1, tt5, 0x31)); + if (indices[6] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[6]), _mm256_permute2f128_ps(tt2, tt6, 0x31)); + if (indices[7] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); +} + +#define add(a, b) _mm256_add_ps((a), (b)) +#define sub(a, b) _mm256_sub_ps((a), (b)) +#define mul(a, b) _mm256_mul_ps((a), (b)) + +static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, bool useBias) +{ + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); + + __m256 useBiasMul = useBias ? _mm256_setzero_ps() : _mm256_set1_ps(1.0f); + __m256 invDtMul = _mm256_set1_ps(inv_dt); + __m256 minBiasVel = _mm256_set1_ps(-maxBaumgarteVelocity); + + // b2Vec2 tangent = b2RightPerp(normal); + // float friction = constraint->friction; + // float biasCoefficient = constraint->biasCoefficient; + // float massCoefficient = constraint->massCoefficient; + // float impulseCoefficient = constraint->impulseCoefficient; + + { + // Compute change in separation (small angle approximation of sin(angle) == angle) + __m256 prx = sub(sub(bB.dpx, mul(bB.da, c->rBy1)), sub(bA.dpx, mul(bA.da, c->rAy1))); + __m256 pry = sub(add(bB.dpy, mul(bB.da, c->rBx1)), add(bA.dpy, mul(bA.da, c->rAx1))); + __m256 ds = add(mul(prx, c->normalX), mul(pry, c->normalY)); + + __m256 s = add(c->separation1, ds); + + __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); + __m256 specBias = mul(s, invDtMul); + __m256 softBias = mul(_mm256_max_ps(mul(c->biasCoefficient, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); + + // Relative velocity at contact + __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy1)), sub(bA.vx, mul(bA.w, c->rAy1))); + __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx1)), add(bA.vy, mul(bA.w, c->rAx1))); + __m256 vn = add(mul(dvx, c->normalX), mul(dvy, c->normalY)); + + //// Compute normal impulse + __m256 impulse = sub(_mm256_setzero_ps(), add(mul(cp->normalMass, mul(massScale, add(vn + bias) - impulseScale * cp->normalImpulse; + //// float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + + //// Clamp the accumulated impulse + // float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + // impulse = newImpulse - cp->normalImpulse; + // cp->normalImpulse = newImpulse; + + //// Apply contact impulse + // b2Vec2 P = b2MulSV(impulse, normal); + // vA = b2MulSub(vA, mA, P); + // wA -= iA * b2Cross(cp->rA, P); + + // vB = b2MulAdd(vB, mB, P); + // wB += iB * b2Cross(cp->rB, P); + } + +#if 0 + { + b2ContactConstraintPoint* cp = constraint->points + 1; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; + } + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + { + b2ContactConstraintPoint* cp = constraint->points + 0; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + { + b2ContactConstraintPoint* cp = constraint->points + 1; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; +#endif +} + void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); diff --git a/src/graph.c b/src/graph.c index c98cf580..6922488e 100644 --- a/src/graph.c +++ b/src/graph.c @@ -743,6 +743,10 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); + // Map from solver body to body + // TODO_ERIN have body directly reference solver body for user access + int32_t* solverBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); + int32_t bodyCapacity = world->bodyPool.capacity; int32_t* bodyMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); memset(bodyMap, 0xFF, bodyCapacity * sizeof(int32_t)); diff --git a/src/solver_data.h b/src/solver_data.h index c51252a9..12ee7e32 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -33,8 +33,11 @@ typedef struct b2StepContext // Map from world body pool index to solver body int32_t* bodyMap; + // Map from solver body to world body + int32_t* solverBodyMap; + struct b2SolverBody* solverBodies; - int32_t bodyCount; + int32_t solverBodyCount; bool enableWarmStarting; } b2StepContext; diff --git a/src/world.c b/src/world.c index d69028a4..486abd00 100644 --- a/src/world.c +++ b/src/world.c @@ -934,16 +934,17 @@ static void b2Solve(b2World* world, b2StepContext* context) b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); } + b2MergeAwakeIslands(world); + world->profile.buildIslands = 0.0f; - b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); + b2TracyCZoneNC(graph_solver, "Graph", b2_colorSeaGreen, true); b2SolveGraph(world, context); - //b2SolveGraphSoftStep(world, context); b2ValidateNoEnlarged(&world->broadPhase); - b2TracyCZoneEnd(island_solver); + b2TracyCZoneEnd(graph_solver); world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); From 92b7c81ff9f44c70a21ab8574b8c795f3ad0169b Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Mon, 25 Sep 2023 22:44:40 -0700 Subject: [PATCH 29/51] wip --- src/contact_solver.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/contact_solver.c b/src/contact_solver.c index 43837c8c..81a85567 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -574,7 +574,7 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); __m256 specBias = mul(s, invDtMul); - __m256 softBias = mul(_mm256_max_ps(mul(c->biasCoefficient, s), minBiasVel); + __m256 softBias = _mm256_max_ps(mul(c->biasCoefficient, s), minBiasVel); __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); // Relative velocity at contact @@ -582,22 +582,26 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx1)), add(bA.vy, mul(bA.w, c->rAx1))); __m256 vn = add(mul(dvx, c->normalX), mul(dvy, c->normalY)); - //// Compute normal impulse - __m256 impulse = sub(_mm256_setzero_ps(), add(mul(cp->normalMass, mul(massScale, add(vn + bias) - impulseScale * cp->normalImpulse; - //// float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + // Compute normal impulse + __m256 negImpulse = add(mul(c->normalMass1, mul(c->massCoefficient, add(vn, bias))), mul(c->impulseCoefficient, c->normalImpulse1)); + // float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - //// Clamp the accumulated impulse - // float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - // impulse = newImpulse - cp->normalImpulse; - // cp->normalImpulse = newImpulse; + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse1, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse1); + c->normalImpulse1 = newImpulse; - //// Apply contact impulse - // b2Vec2 P = b2MulSV(impulse, normal); - // vA = b2MulSub(vA, mA, P); - // wA -= iA * b2Cross(cp->rA, P); + // Apply contact impulse + __m256 Px = mul(impulse, c->normalX); + __m256 Py = mul(impulse, c->normalY); - // vB = b2MulAdd(vB, mB, P); - // wB += iB * b2Cross(cp->rB, P); + bA.vx = sub(bA.vx, mul(bA.invM, Px)); + bA.vy = sub(bA.vy, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx1, Py), mul(c->rAy1, Px)))); + + bB.vx = add(bB.vx, mul(bB.invM, Px)); + bB.vy = add(bB.vy, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx1, Py), mul(c->rBy1, Px)))); } #if 0 @@ -634,7 +638,6 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol // Compute normal impulse float vn = b2Dot(dv, normal); float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); // Clamp the accumulated impulse float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); From add7ec68d6d5d216b4f25ca78d147f34bb6dde2a Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 26 Sep 2023 23:01:46 -0700 Subject: [PATCH 30/51] more avx --- src/CMakeLists.txt | 1 + src/contact_solver.c | 136 ++++++++++++++++++++++++++----------------- 2 files changed, 83 insertions(+), 54 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b06fcdcd..840fe321 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -83,6 +83,7 @@ message(STATUS "CMake system name: ${CMAKE_SYSTEM_NAME}") if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + # target_compile_options(box2d PRIVATE /W4 /WX) target_compile_options(box2d PRIVATE /W4 /WX /experimental:c11atomics) target_compile_options(box2d PRIVATE /arch:AVX) # target_compile_options(box2d PRIVATE /arch:SSE2 /fp:except-) diff --git a/src/contact_solver.c b/src/contact_solver.c index 81a85567..4a7ea61d 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -558,12 +558,11 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 invDtMul = _mm256_set1_ps(inv_dt); __m256 minBiasVel = _mm256_set1_ps(-maxBaumgarteVelocity); - // b2Vec2 tangent = b2RightPerp(normal); - // float friction = constraint->friction; // float biasCoefficient = constraint->biasCoefficient; // float massCoefficient = constraint->massCoefficient; // float impulseCoefficient = constraint->impulseCoefficient; + // first point non-penetration constraint { // Compute change in separation (small angle approximation of sin(angle) == angle) __m256 prx = sub(sub(bB.dpx, mul(bB.da, c->rBy1)), sub(bA.dpx, mul(bA.da, c->rAy1))); @@ -604,83 +603,112 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx1, Py), mul(c->rBy1, Px)))); } -#if 0 + // second point non-penetration constraint { - b2ContactConstraintPoint* cp = constraint->points + 1; + // Compute change in separation (small angle approximation of sin(angle) == angle) + __m256 prx = sub(sub(bB.dpx, mul(bB.da, c->rBy2)), sub(bA.dpx, mul(bA.da, c->rAy2))); + __m256 pry = sub(add(bB.dpy, mul(bB.da, c->rBx2)), add(bA.dpy, mul(bA.da, c->rAx2))); + __m256 ds = add(mul(prx, c->normalX), mul(pry, c->normalY)); - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + __m256 s = add(c->separation2, ds); - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } + __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); + __m256 specBias = mul(s, invDtMul); + __m256 softBias = _mm256_max_ps(mul(c->biasCoefficient, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); + + // Relative velocity at contact + __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy2)), sub(bA.vx, mul(bA.w, c->rAy2))); + __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx2)), add(bA.vy, mul(bA.w, c->rAx2))); + __m256 vn = add(mul(dvx, c->normalX), mul(dvy, c->normalY)); // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + __m256 negImpulse = add(mul(c->normalMass2, mul(c->massCoefficient, add(vn, bias))), mul(c->impulseCoefficient, c->normalImpulse2)); // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse2); + c->normalImpulse2 = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); + __m256 Px = mul(impulse, c->normalX); + __m256 Py = mul(impulse, c->normalY); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); + bA.vx = sub(bA.vx, mul(bA.invM, Px)); + bA.vy = sub(bA.vy, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx2, Py), mul(c->rAy2, Px)))); + + bB.vx = add(bB.vx, mul(bB.invM, Px)); + bB.vy = add(bB.vy, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx2, Py), mul(c->rBy2, Px)))); } + __m256 tangentX = c->normalY; + __m256 tangentY = sub(_mm256_setzero_ps(), c->normalX); + // float friction = constraint->friction; + + // first point friction constraint { - b2ContactConstraintPoint* cp = constraint->points + 0; + // Relative velocity at contact + __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy1)), sub(bA.vx, mul(bA.w, c->rAy1))); + __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx1)), add(bA.vy, mul(bA.w, c->rAx1))); + __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); + + // Compute tangent force + __m256 negImpulse = mul(c->tangentMass1, vt); + + // Clamp the accumulated force + __m256 maxFriction = mul(c->friction, c->normalImpulse1); + __m256 newImpulse = sub(c->tangentImpulse1, negImpulse); + newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); + __m256 impulse = sub(newImpulse, c->tangentImpulse1); + c->tangentImpulse1 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, tangentX); + __m256 Py = mul(impulse, tangentY); + + bA.vx = sub(bA.vx, mul(bA.invM, Px)); + bA.vy = sub(bA.vy, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx1, Py), mul(c->rAy1, Px)))); + + bB.vx = add(bB.vx, mul(bB.invM, Px)); + bB.vy = add(bB.vy, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx1, Py), mul(c->rBy1, Px)))); + } + // second point friction constraint + { // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy2)), sub(bA.vx, mul(bA.w, c->rAy2))); + __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx2)), add(bA.vy, mul(bA.w, c->rAx2))); + __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); + __m256 negImpulse = mul(c->tangentMass2, vt); // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; + __m256 maxFriction = mul(c->friction, c->normalImpulse2); + __m256 newImpulse = sub(c->tangentImpulse2, negImpulse); + newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); + __m256 impulse = sub(newImpulse, c->tangentImpulse2); + c->tangentImpulse2 = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); + __m256 Px = mul(impulse, tangentX); + __m256 Py = mul(impulse, tangentY); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); + bA.vx = sub(bA.vx, mul(bA.invM, Px)); + bA.vy = sub(bA.vy, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx2, Py), mul(c->rAy2, Px)))); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); + bB.vx = add(bB.vx, mul(bB.invM, Px)); + bB.vy = add(bB.vy, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx2, Py), mul(c->rBy2, Px)))); } +#if 0 + { b2ContactConstraintPoint* cp = constraint->points + 1; From 2a3b07c2552b6fc374bd4b3cca852966a8ca92e4 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 4 Oct 2023 23:21:34 -0700 Subject: [PATCH 31/51] avx stuff --- src/contact_solver.c | 609 +++++++++++++++++++++++++++++-------------- src/contact_solver.h | 31 +++ src/graph.h | 2 + src/solver_data.h | 2 + 4 files changed, 444 insertions(+), 200 deletions(-) diff --git a/src/contact_solver.c b/src/contact_solver.c index 4a7ea61d..5c84807d 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -16,6 +16,107 @@ #define maxBaumgarteVelocity 3.0f +#define add(a, b) _mm256_add_ps((a), (b)) +#define sub(a, b) _mm256_sub_ps((a), (b)) +#define mul(a, b) _mm256_mul_ps((a), (b)) + +static inline __m256 b2CrossW(b2Vec2W a, b2Vec2W b) +{ + return sub(mul(a.X, b.Y), mul(a.Y, b.X)); +} + +typedef struct b2SimdBody +{ + b2Vec2W v; + __m256 w; + b2Vec2W dp; + __m256 da; + __m256 invM, invI; +} b2SimdBody; + +// This is a load and 8x8 transpose +static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* restrict indices) +{ + B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); + __m256 zero = _mm256_setzero_ps(); + __m256 b0 = indices[0] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[0])); + __m256 b1 = indices[1] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[1])); + __m256 b2 = indices[2] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[2])); + __m256 b3 = indices[3] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[3])); + __m256 b4 = indices[4] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[4])); + __m256 b5 = indices[5] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[5])); + __m256 b6 = indices[6] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[6])); + __m256 b7 = indices[7] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[7])); + + __m256 t0 = _mm256_unpacklo_ps(b0, b1); + __m256 t1 = _mm256_unpackhi_ps(b0, b1); + __m256 t2 = _mm256_unpacklo_ps(b2, b3); + __m256 t3 = _mm256_unpackhi_ps(b2, b3); + __m256 t4 = _mm256_unpacklo_ps(b4, b5); + __m256 t5 = _mm256_unpackhi_ps(b4, b5); + __m256 t6 = _mm256_unpacklo_ps(b6, b7); + __m256 t7 = _mm256_unpackhi_ps(b6, b7); + __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + + b2SimdBody simdBody; + simdBody.v.X = _mm256_permute2f128_ps(tt0, tt4, 0x20); + simdBody.v.Y = _mm256_permute2f128_ps(tt1, tt5, 0x20); + simdBody.w = _mm256_permute2f128_ps(tt2, tt6, 0x20); + simdBody.dp.X = _mm256_permute2f128_ps(tt3, tt7, 0x20); + simdBody.dp.Y = _mm256_permute2f128_ps(tt0, tt4, 0x31); + simdBody.da = _mm256_permute2f128_ps(tt1, tt5, 0x31); + simdBody.invM = _mm256_permute2f128_ps(tt2, tt6, 0x31); + simdBody.invI = _mm256_permute2f128_ps(tt3, tt7, 0x31); + + return simdBody; +} + +// This writes everything back but only the velocities change +static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) +{ + B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); + __m256 t0 = _mm256_unpacklo_ps(simdBody->v.X, simdBody->v.Y); + __m256 t1 = _mm256_unpackhi_ps(simdBody->v.X, simdBody->v.Y); + __m256 t2 = _mm256_unpacklo_ps(simdBody->w, simdBody->dp.X); + __m256 t3 = _mm256_unpackhi_ps(simdBody->w, simdBody->dp.X); + __m256 t4 = _mm256_unpacklo_ps(simdBody->dp.Y, simdBody->da); + __m256 t5 = _mm256_unpackhi_ps(simdBody->dp.Y, simdBody->da); + __m256 t6 = _mm256_unpacklo_ps(simdBody->invM, simdBody->invI); + __m256 t7 = _mm256_unpackhi_ps(simdBody->invM, simdBody->invI); + __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + + if (indices[0] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[0]), _mm256_permute2f128_ps(tt0, tt4, 0x20)); + if (indices[1] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[1]), _mm256_permute2f128_ps(tt1, tt5, 0x20)); + if (indices[2] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[2]), _mm256_permute2f128_ps(tt2, tt6, 0x20)); + if (indices[3] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[3]), _mm256_permute2f128_ps(tt3, tt7, 0x20)); + if (indices[4] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[4]), _mm256_permute2f128_ps(tt0, tt4, 0x31)); + if (indices[5] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[5]), _mm256_permute2f128_ps(tt1, tt5, 0x31)); + if (indices[6] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[6]), _mm256_permute2f128_ps(tt2, tt6, 0x31)); + if (indices[7] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); +} + // TODO_ERIN prepare contact constraints directly in collision phase? void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { @@ -132,6 +233,187 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon b2TracyCZoneEnd(prepare_contact); } +void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); + + b2World* world = context->world; + b2Contact* contacts = world->contacts; + const int32_t* bodyMap = context->bodyMap; + const int32_t* contactIndices = context->contactIndices; + b2SolverBody* solverBodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->constraintAVXs; + + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 30.0f; + + float h = context->timeStep; + + B2_ASSERT((startIndex & 0x7) == 0); + B2_ASSERT((endIndex & 0x7) == 0); + + int32_t vectorIndex = 0; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + + const b2Manifold* manifold = &contact->manifold; + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; + + b2ContactConstraintAVX* constraint = NULL; + constraint = constraints + (i >> 3); + constraint->indexA[vectorIndex] = indexA; + constraint->indexB[vectorIndex] = indexB; + + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; + + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; + + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float d = (2.0f * zeta + h * omega); + float c = h * omega * d; + float impulseCoefficient = 1.0f / (1.0f + c); + + ((float*)&constraint->friction)[vectorIndex] = contact->friction; + ((float*)&constraint->impulseCoefficient)[vectorIndex] = impulseCoefficient; + ((float*)&constraint->massCoefficient)[vectorIndex] = c * impulseCoefficient; + ((float*)&constraint->biasCoefficient)[vectorIndex] = omega / d; + + b2Vec2 normal = manifold->normal; + ((float*)&constraint->normal.X)[vectorIndex] = normal.x; + ((float*)&constraint->normal.Y)[vectorIndex] = normal.y; + + b2Vec2 tangent = b2RightPerp(normal); + + { + const b2ManifoldPoint* mp = manifold->points + 0; + ((float*)&constraint->separation1)[vectorIndex] = mp->separation; + ((float*)&constraint->normalImpulse1)[vectorIndex] = mp->normalImpulse; + ((float*)&constraint->tangentImpulse1)[vectorIndex] = mp->tangentImpulse; + + ((float*)&constraint->rA1.X)[vectorIndex] = mp->anchorA.x; + ((float*)&constraint->rA1.Y)[vectorIndex] = mp->anchorA.y; + ((float*)&constraint->rB1.X)[vectorIndex] = mp->anchorB.x; + ((float*)&constraint->rB1.Y)[vectorIndex] = mp->anchorB.y; + + float rnA = b2Cross(mp->anchorA, normal); + float rnB = b2Cross(mp->anchorB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + ((float*)&constraint->normalMass1)[vectorIndex] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(mp->anchorA, tangent); + float rtB = b2Cross(mp->anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + ((float*)&constraint->tangentMass1)[vectorIndex] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + } + + int32_t pointCount = manifold->pointCount; + B2_ASSERT(0 < pointCount && pointCount <= 2); + + if (pointCount == 2) + { + const b2ManifoldPoint* mp = manifold->points + 1; + ((float*)&constraint->separation2)[vectorIndex] = mp->separation; + ((float*)&constraint->normalImpulse2)[vectorIndex] = mp->normalImpulse; + ((float*)&constraint->tangentImpulse2)[vectorIndex] = mp->tangentImpulse; + + ((float*)&constraint->rA2.X)[vectorIndex] = mp->anchorA.x; + ((float*)&constraint->rA2.Y)[vectorIndex] = mp->anchorA.y; + ((float*)&constraint->rB2.X)[vectorIndex] = mp->anchorB.x; + ((float*)&constraint->rB2.Y)[vectorIndex] = mp->anchorB.y; + + float rnA = b2Cross(mp->anchorA, normal); + float rnB = b2Cross(mp->anchorB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + ((float*)&constraint->normalMass2)[vectorIndex] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(mp->anchorA, tangent); + float rtB = b2Cross(mp->anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + ((float*)&constraint->tangentMass2)[vectorIndex] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + } + else + { + // dummy data that has no effect + ((float*)&constraint->separation2)[vectorIndex] = 0.0f; + ((float*)&constraint->normalImpulse2)[vectorIndex] = 0.0f; + ((float*)&constraint->tangentImpulse2)[vectorIndex] = 0.0f; + ((float*)&constraint->rA2.X)[vectorIndex] = 0.0f; + ((float*)&constraint->rA2.Y)[vectorIndex] = 0.0f; + ((float*)&constraint->rB2.X)[vectorIndex] = 0.0f; + ((float*)&constraint->rB2.Y)[vectorIndex] = 0.0f; + ((float*)&constraint->normalMass2)[vectorIndex] = 0.0f; + ((float*)&constraint->tangentMass2)[vectorIndex] = 0.0f; + } + + // Cycle [0-7] + vectorIndex = (vectorIndex + 1) & 0x7; + } + + b2TracyCZoneEnd(prepare_contact); +} + +void b2WarmStartContactConstraints(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +{ + b2TracyCZoneNC(warm_start_contact, "Warm Start Contact", b2_colorGreen1, true); + + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraintAVX* c = constraints + i; + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); + + __m256 tangentX = c->normal.Y; + __m256 tangentY = sub(_mm256_setzero_ps(), c->normal.X); + + { + b2Vec2W P; + P.X = add(mul(c->normalImpulse1, c->normal.X), mul(c->tangentImpulse1, tangentX)); + P.Y = add(mul(c->normalImpulse1, c->normal.Y), mul(c->tangentImpulse1, tangentY)); + bA.w = _mm256_fnmadd_ps(bA.invI, b2CrossW(c->rA1, P), bA.w); + bA.v.X = _mm256_fnmadd_ps(bA.invM, P.X, bA.v.X); + bA.v.Y = _mm256_fnmadd_ps(bA.invM, P.Y, bA.v.Y); + bB.w = _mm256_fmadd_ps(bB.invI, b2CrossW(c->rB1, P), bB.w); + bB.v.X = _mm256_fmadd_ps(bB.invM, P.X, bB.v.X); + bB.v.Y = _mm256_fmadd_ps(bB.invM, P.Y, bB.v.Y); + } + + { + b2Vec2W P; + P.X = add(mul(c->normalImpulse2, c->normal.X), mul(c->tangentImpulse2, tangentX)); + P.Y = add(mul(c->normalImpulse2, c->normal.Y), mul(c->tangentImpulse2, tangentY)); + bA.w = _mm256_fnmadd_ps(bA.invI, b2CrossW(c->rA2, P), bA.w); + bA.v.X = _mm256_fnmadd_ps(bA.invM, P.X, bA.v.X); + bA.v.Y = _mm256_fnmadd_ps(bA.invM, P.Y, bA.v.Y); + bB.w = _mm256_fmadd_ps(bB.invI, b2CrossW(c->rB2, P), bB.w); + bB.v.X = _mm256_fmadd_ps(bB.invM, P.X, bB.v.X); + bB.v.Y = _mm256_fmadd_ps(bB.invM, P.Y, bB.v.Y); + } + + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); + } + + b2TracyCZoneEnd(prepare_contact); +} + static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) { // This is a dummy body to represent a static body since static bodies don't have a solver body. @@ -431,124 +713,6 @@ static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBod bodyB->angularVelocity = wB; } -typedef struct b2ContactConstraintAVX -{ - b2Contact* contacts[8]; - int32_t indexA[8]; - int32_t indexB[8]; - - __m256 rAx1, rAy1, rAz1; - __m256 rBx1, rBy1, rBz1; - __m256 rAx2, rAy2, rAz2; - __m256 rBx2, rBy2, rBz2; - __m256 separation1, separation2; - __m256 normalImpulse1, normalImpulse2; - __m256 tangentImpulse1, tangentImpulse2; - __m256 normalMass1, tangentMass1; - __m256 normalMass2, tangentMass2; - __m256 normalX, normalY, normalZ; - __m256 friction; - __m256 massCoefficient; - __m256 biasCoefficient; - __m256 impulseCoefficient; -} b2ContactConstraintAVX; - -typedef struct b2SimdBody -{ - __m256 vx, vy; - __m256 w; - __m256 dpx, dpy; - __m256 da; - __m256 invM, invI; -} b2SimdBody; - -// This is a load and 8x8 transpose -static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* restrict indices) -{ - B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); - __m256 zero = _mm256_setzero_ps(); - __m256 b0 = indices[0] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[0])); - __m256 b1 = indices[1] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[1])); - __m256 b2 = indices[2] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[2])); - __m256 b3 = indices[3] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[3])); - __m256 b4 = indices[4] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[4])); - __m256 b5 = indices[5] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[5])); - __m256 b6 = indices[6] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[6])); - __m256 b7 = indices[7] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[7])); - - __m256 t0 = _mm256_unpacklo_ps(b0, b1); - __m256 t1 = _mm256_unpackhi_ps(b0, b1); - __m256 t2 = _mm256_unpacklo_ps(b2, b3); - __m256 t3 = _mm256_unpackhi_ps(b2, b3); - __m256 t4 = _mm256_unpacklo_ps(b4, b5); - __m256 t5 = _mm256_unpackhi_ps(b4, b5); - __m256 t6 = _mm256_unpacklo_ps(b6, b7); - __m256 t7 = _mm256_unpackhi_ps(b6, b7); - __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); - __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); - __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); - __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); - - b2SimdBody simdBody; - simdBody.vx = _mm256_permute2f128_ps(tt0, tt4, 0x20); - simdBody.vy = _mm256_permute2f128_ps(tt1, tt5, 0x20); - simdBody.w = _mm256_permute2f128_ps(tt2, tt6, 0x20); - simdBody.dpx = _mm256_permute2f128_ps(tt3, tt7, 0x20); - simdBody.dpy = _mm256_permute2f128_ps(tt0, tt4, 0x31); - simdBody.da = _mm256_permute2f128_ps(tt1, tt5, 0x31); - simdBody.invM = _mm256_permute2f128_ps(tt2, tt6, 0x31); - simdBody.invI = _mm256_permute2f128_ps(tt3, tt7, 0x31); - - return simdBody; -} - -// This writes everything back but only the velocities change -static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) -{ - B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); - __m256 t0 = _mm256_unpacklo_ps(simdBody->vx, simdBody->vy); - __m256 t1 = _mm256_unpackhi_ps(simdBody->vx, simdBody->vy); - __m256 t2 = _mm256_unpacklo_ps(simdBody->w, simdBody->dpx); - __m256 t3 = _mm256_unpackhi_ps(simdBody->w, simdBody->dpx); - __m256 t4 = _mm256_unpacklo_ps(simdBody->dpy, simdBody->da); - __m256 t5 = _mm256_unpackhi_ps(simdBody->dpy, simdBody->da); - __m256 t6 = _mm256_unpacklo_ps(simdBody->invM, simdBody->invI); - __m256 t7 = _mm256_unpackhi_ps(simdBody->invM, simdBody->invI); - __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); - __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); - __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); - __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); - __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); - - if (indices[0] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[0]), _mm256_permute2f128_ps(tt0, tt4, 0x20)); - if (indices[1] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[1]), _mm256_permute2f128_ps(tt1, tt5, 0x20)); - if (indices[2] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[2]), _mm256_permute2f128_ps(tt2, tt6, 0x20)); - if (indices[3] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[3]), _mm256_permute2f128_ps(tt3, tt7, 0x20)); - if (indices[4] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[4]), _mm256_permute2f128_ps(tt0, tt4, 0x31)); - if (indices[5] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[5]), _mm256_permute2f128_ps(tt1, tt5, 0x31)); - if (indices[6] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[6]), _mm256_permute2f128_ps(tt2, tt6, 0x31)); - if (indices[7] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); -} - -#define add(a, b) _mm256_add_ps((a), (b)) -#define sub(a, b) _mm256_sub_ps((a), (b)) -#define mul(a, b) _mm256_mul_ps((a), (b)) - static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, bool useBias) { b2SimdBody bA = b2GatherBodies(bodies, c->indexA); @@ -565,9 +729,9 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol // first point non-penetration constraint { // Compute change in separation (small angle approximation of sin(angle) == angle) - __m256 prx = sub(sub(bB.dpx, mul(bB.da, c->rBy1)), sub(bA.dpx, mul(bA.da, c->rAy1))); - __m256 pry = sub(add(bB.dpy, mul(bB.da, c->rBx1)), add(bA.dpy, mul(bA.da, c->rAx1))); - __m256 ds = add(mul(prx, c->normalX), mul(pry, c->normalY)); + __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB1.Y)), sub(bA.dp.X, mul(bA.da, c->rA1.Y))); + __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB1.X)), add(bA.dp.Y, mul(bA.da, c->rA1.X))); + __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); __m256 s = add(c->separation1, ds); @@ -577,9 +741,9 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); // Relative velocity at contact - __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy1)), sub(bA.vx, mul(bA.w, c->rAy1))); - __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx1)), add(bA.vy, mul(bA.w, c->rAx1))); - __m256 vn = add(mul(dvx, c->normalX), mul(dvy, c->normalY)); + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); // Compute normal impulse __m256 negImpulse = add(mul(c->normalMass1, mul(c->massCoefficient, add(vn, bias))), mul(c->impulseCoefficient, c->normalImpulse1)); @@ -591,24 +755,24 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol c->normalImpulse1 = newImpulse; // Apply contact impulse - __m256 Px = mul(impulse, c->normalX); - __m256 Py = mul(impulse, c->normalY); + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); - bA.vx = sub(bA.vx, mul(bA.invM, Px)); - bA.vy = sub(bA.vy, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx1, Py), mul(c->rAy1, Px)))); + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); - bB.vx = add(bB.vx, mul(bB.invM, Px)); - bB.vy = add(bB.vy, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx1, Py), mul(c->rBy1, Px)))); + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); } // second point non-penetration constraint { // Compute change in separation (small angle approximation of sin(angle) == angle) - __m256 prx = sub(sub(bB.dpx, mul(bB.da, c->rBy2)), sub(bA.dpx, mul(bA.da, c->rAy2))); - __m256 pry = sub(add(bB.dpy, mul(bB.da, c->rBx2)), add(bA.dpy, mul(bA.da, c->rAx2))); - __m256 ds = add(mul(prx, c->normalX), mul(pry, c->normalY)); + __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB2.Y)), sub(bA.dp.X, mul(bA.da, c->rA2.Y))); + __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB2.X)), add(bA.dp.Y, mul(bA.da, c->rA2.X))); + __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); __m256 s = add(c->separation2, ds); @@ -618,9 +782,9 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); // Relative velocity at contact - __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy2)), sub(bA.vx, mul(bA.w, c->rAy2))); - __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx2)), add(bA.vy, mul(bA.w, c->rAx2))); - __m256 vn = add(mul(dvx, c->normalX), mul(dvy, c->normalY)); + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); // Compute normal impulse __m256 negImpulse = add(mul(c->normalMass2, mul(c->massCoefficient, add(vn, bias))), mul(c->impulseCoefficient, c->normalImpulse2)); @@ -631,27 +795,27 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol c->normalImpulse2 = newImpulse; // Apply contact impulse - __m256 Px = mul(impulse, c->normalX); - __m256 Py = mul(impulse, c->normalY); + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); - bA.vx = sub(bA.vx, mul(bA.invM, Px)); - bA.vy = sub(bA.vy, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx2, Py), mul(c->rAy2, Px)))); + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - bB.vx = add(bB.vx, mul(bB.invM, Px)); - bB.vy = add(bB.vy, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx2, Py), mul(c->rBy2, Px)))); + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); } - __m256 tangentX = c->normalY; - __m256 tangentY = sub(_mm256_setzero_ps(), c->normalX); + __m256 tangentX = c->normal.Y; + __m256 tangentY = sub(_mm256_setzero_ps(), c->normal.X); // float friction = constraint->friction; // first point friction constraint { // Relative velocity at contact - __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy1)), sub(bA.vx, mul(bA.w, c->rAy1))); - __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx1)), add(bA.vy, mul(bA.w, c->rAx1))); + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); // Compute tangent force @@ -668,20 +832,20 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 Px = mul(impulse, tangentX); __m256 Py = mul(impulse, tangentY); - bA.vx = sub(bA.vx, mul(bA.invM, Px)); - bA.vy = sub(bA.vy, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx1, Py), mul(c->rAy1, Px)))); + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); - bB.vx = add(bB.vx, mul(bB.invM, Px)); - bB.vy = add(bB.vy, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx1, Py), mul(c->rBy1, Px)))); + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); } // second point friction constraint { // Relative velocity at contact - __m256 dvx = sub(sub(bB.vx, mul(bB.w, c->rBy2)), sub(bA.vx, mul(bA.w, c->rAy2))); - __m256 dvy = sub(add(bB.vy, mul(bB.w, c->rBx2)), add(bA.vy, mul(bA.w, c->rAx2))); + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); // Compute tangent force @@ -698,50 +862,14 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 Px = mul(impulse, tangentX); __m256 Py = mul(impulse, tangentY); - bA.vx = sub(bA.vx, mul(bA.invM, Px)); - bA.vy = sub(bA.vy, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rAx2, Py), mul(c->rAy2, Px)))); - - bB.vx = add(bB.vx, mul(bB.invM, Px)); - bB.vy = add(bB.vy, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rBx2, Py), mul(c->rBy2, Px)))); - } - -#if 0 - - { - b2ContactConstraintPoint* cp = constraint->points + 1; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -#endif } void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) @@ -796,3 +924,84 @@ void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskConte b2TracyCZoneEnd(store_impulses); } + +void b2StoreImpulsesTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); + + b2Contact* contacts = context->world->contacts; + const b2ContactConstraintAVX* constraints = context->constraintAVXs; + const int32_t* indices = context->contactIndices; + + b2Manifold dummy = {0}; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + const b2ContactConstraintAVX* c = constraints + i; + const float* normalImpulse1 = (float*)&c->normalImpulse1; + const float* normalImpulse2 = (float*)&c->normalImpulse2; + const float* tangentImpulse1 = (float*)&c->tangentImpulse1; + const float* tangentImpulse2 = (float*)&c->tangentImpulse2; + + const int32_t* base = indices + 8 * i; + int32_t index0 = base[0]; + int32_t index1 = base[1]; + int32_t index2 = base[2]; + int32_t index3 = base[3]; + int32_t index4 = base[4]; + int32_t index5 = base[5]; + int32_t index6 = base[6]; + int32_t index7 = base[7]; + + b2Manifold* m0 = index0 == B2_NULL_INDEX ? &dummy : &contacts[index0].manifold; + b2Manifold* m1 = index1 == B2_NULL_INDEX ? &dummy : &contacts[index1].manifold; + b2Manifold* m2 = index2 == B2_NULL_INDEX ? &dummy : &contacts[index2].manifold; + b2Manifold* m3 = index3 == B2_NULL_INDEX ? &dummy : &contacts[index3].manifold; + b2Manifold* m4 = index4 == B2_NULL_INDEX ? &dummy : &contacts[index4].manifold; + b2Manifold* m5 = index5 == B2_NULL_INDEX ? &dummy : &contacts[index5].manifold; + b2Manifold* m6 = index6 == B2_NULL_INDEX ? &dummy : &contacts[index6].manifold; + b2Manifold* m7 = index7 == B2_NULL_INDEX ? &dummy : &contacts[index7].manifold; + + m0->points[0].normalImpulse = normalImpulse1[0]; + m0->points[0].tangentImpulse = tangentImpulse1[0]; + m0->points[1].normalImpulse = normalImpulse2[0]; + m0->points[1].tangentImpulse = tangentImpulse2[0]; + + m1->points[0].normalImpulse = normalImpulse1[1]; + m1->points[0].tangentImpulse = tangentImpulse1[1]; + m1->points[1].normalImpulse = normalImpulse2[1]; + m1->points[1].tangentImpulse = tangentImpulse2[1]; + + m2->points[0].normalImpulse = normalImpulse1[2]; + m2->points[0].tangentImpulse = tangentImpulse1[2]; + m2->points[1].normalImpulse = normalImpulse2[2]; + m2->points[1].tangentImpulse = tangentImpulse2[2]; + + m3->points[0].normalImpulse = normalImpulse1[3]; + m3->points[0].tangentImpulse = tangentImpulse1[3]; + m3->points[1].normalImpulse = normalImpulse2[3]; + m3->points[1].tangentImpulse = tangentImpulse2[3]; + + m4->points[0].normalImpulse = normalImpulse1[4]; + m4->points[0].tangentImpulse = tangentImpulse1[4]; + m4->points[1].normalImpulse = normalImpulse2[4]; + m4->points[1].tangentImpulse = tangentImpulse2[4]; + + m5->points[0].normalImpulse = normalImpulse1[5]; + m5->points[0].tangentImpulse = tangentImpulse1[5]; + m5->points[1].normalImpulse = normalImpulse2[5]; + m5->points[1].tangentImpulse = tangentImpulse2[5]; + + m6->points[0].normalImpulse = normalImpulse1[6]; + m6->points[0].tangentImpulse = tangentImpulse1[6]; + m6->points[1].normalImpulse = normalImpulse2[6]; + m6->points[1].tangentImpulse = tangentImpulse2[6]; + + m7->points[0].normalImpulse = normalImpulse1[7]; + m7->points[0].tangentImpulse = tangentImpulse1[7]; + m7->points[1].normalImpulse = normalImpulse2[7]; + m7->points[1].tangentImpulse = tangentImpulse2[7]; + } + + b2TracyCZoneEnd(store_impulses); +} diff --git a/src/contact_solver.h b/src/contact_solver.h index 74463213..f4376d79 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -5,6 +5,8 @@ #include "solver_data.h" +#include + typedef struct b2Contact b2Contact; typedef struct b2ContactConstraintPoint @@ -37,6 +39,35 @@ typedef struct b2ContactConstraint b2ContactConstraintType type; } b2ContactConstraint; +typedef __m256 b2Float8; + +typedef struct b2Vec2W +{ + b2Float8 X, Y; +} b2Vec2W; + +typedef struct b2ContactConstraintAVX +{ + int32_t indexA[8]; + int32_t indexB[8]; + + b2Vec2W normal; + __m256 friction; + b2Vec2W rA1, rB1; + b2Vec2W rA2, rB2; + __m256 separation1, separation2; + __m256 normalImpulse1, normalImpulse2; + __m256 tangentImpulse1, tangentImpulse2; + __m256 normalMass1, tangentMass1; + __m256 normalMass2, tangentMass2; + __m256 massCoefficient; + __m256 biasCoefficient; + __m256 impulseCoefficient; +} b2ContactConstraintAVX; + +void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +void b2WarmStartContactConstraints(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); + void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/graph.h b/src/graph.h index 5716bf45..f9c8fdb8 100644 --- a/src/graph.h +++ b/src/graph.h @@ -8,6 +8,7 @@ typedef struct b2Contact b2Contact; typedef struct b2ContactConstraint b2ContactConstraint; +typedef struct b2ContactConstraintAVX b2ContactConstraintAVX; typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; @@ -23,6 +24,7 @@ typedef struct b2GraphColor // transient b2ContactConstraint* contactConstraints; + b2ContactConstraintAVX* contactConstraintAVXs; } b2GraphColor; typedef struct b2Graph diff --git a/src/solver_data.h b/src/solver_data.h index 12ee7e32..0a246c02 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -85,9 +85,11 @@ typedef struct b2SolverTaskContext struct b2Body** awakeBodies; struct b2SolverBody* solverBodies; int32_t* bodyMap; + int32_t* contactIndices; b2StepContext* stepContext; struct b2ContactConstraint* constraints; + struct b2ContactConstraintAVX* constraintAVXs; int32_t activeColorCount; int32_t velocityIterations; int32_t calmIterations; From 3a3eb3620805fdf14e0f8c3ba83b8b5054ec9d79 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Thu, 5 Oct 2023 23:02:53 -0700 Subject: [PATCH 32/51] wip hooking up avx --- samples/sample.cpp | 8 +- src/contact_solver.c | 27 +- src/contact_solver.h | 2 + src/graph.c | 637 ++++++++++++++++++++++++++++++++++++++++--- src/graph.h | 2 + src/mouse_joint.c | 2 +- src/revolute_joint.c | 4 +- src/solver_data.h | 10 +- src/weld_joint.c | 4 +- 9 files changed, 643 insertions(+), 53 deletions(-) diff --git a/samples/sample.cpp b/samples/sample.cpp index af7ebd59..617f8641 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -68,10 +68,10 @@ Sample::Sample(const Settings& settings) m_taskCount = 0; b2WorldDef worldDef = b2DefaultWorldDef(); - worldDef.workerCount = maxThreads; - worldDef.enqueueTask = &EnqueueTask; - worldDef.finishTask = &FinishTask; - worldDef.finishAllTasks = &FinishAllTasks; + //worldDef.workerCount = maxThreads; + //worldDef.enqueueTask = &EnqueueTask; + //worldDef.finishTask = &FinishTask; + //worldDef.finishAllTasks = &FinishAllTasks; worldDef.bodyCapacity = 1024; worldDef.contactCapacity = 4 * 1024; worldDef.userTaskContext = this; diff --git a/src/contact_solver.c b/src/contact_solver.c index 5c84807d..e8fd49d6 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -127,7 +127,7 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon b2GraphColor* color = graph->colors + colorIndex; int32_t* contactIndices = color->contactArray; b2Contact* contacts = world->contacts; - const int32_t* bodyMap = context->bodyMap; + const int32_t* bodyMap = context->bodyToSolverMap; b2SolverBody* solverBodies = context->solverBodies; // This is a dummy body to represent a static body since static bodies don't have a solver body. @@ -233,16 +233,17 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon b2TracyCZoneEnd(prepare_contact); } +// TODO_ERIN use b2GraphColor::contactArray to handle empty AVX lanes at the end of each color constraint array, but how to parallel-for? void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); b2World* world = context->world; b2Contact* contacts = world->contacts; - const int32_t* bodyMap = context->bodyMap; - const int32_t* contactIndices = context->contactIndices; + const int32_t* bodyMap = context->bodyToSolverMap; b2SolverBody* solverBodies = context->solverBodies; b2ContactConstraintAVX* constraints = context->constraintAVXs; + b2Graph* graph = context->graph; // This is a dummy body to represent a static body since static bodies don't have a solver body. b2SolverBody dummyBody = {0}; @@ -254,9 +255,6 @@ void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTask float h = context->timeStep; - B2_ASSERT((startIndex & 0x7) == 0); - B2_ASSERT((endIndex & 0x7) == 0); - int32_t vectorIndex = 0; for (int32_t i = startIndex; i < endIndex; ++i) @@ -902,6 +900,23 @@ void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskConte b2TracyCZoneEnd(solve_contact); } +void b2SolveContactAVXsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +{ + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); + + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; + float inv_dt = context->invTimeStep; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraintAVX* constraint = constraints + i; + b2SolveContactTwoPointsAVX(constraint, bodies, inv_dt, useBias); + } + + b2TracyCZoneEnd(solve_contact); +} + void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); diff --git a/src/contact_solver.h b/src/contact_solver.h index f4376d79..dc7d7ced 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -67,6 +67,8 @@ typedef struct b2ContactConstraintAVX void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); void b2WarmStartContactConstraints(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2SolveContactAVXsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2StoreImpulsesTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); diff --git a/src/graph.c b/src/graph.c index 6922488e..653f17e8 100644 --- a/src/graph.c +++ b/src/graph.c @@ -203,7 +203,7 @@ static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2So b2Vec2 gravity = context->world->gravity; b2Body** bodies = context->awakeBodies; b2SolverBody* solverBodies = context->solverBodies; - int32_t* bodyMap = context->bodyMap; + int32_t* bodyToSolverMap = context->bodyToSolverMap; float h = context->timeStep; @@ -215,7 +215,7 @@ static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2So // create body map used to prepare constraints B2_ASSERT(body->object.index < context->world->bodyPool.capacity); - bodyMap[body->object.index] = i; + bodyToSolverMap[body->object.index] = i; float invMass = body->invMass; float invI = body->invI; @@ -246,8 +246,6 @@ static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2So solverBody->invMass = invMass; solverBody->invI = invI; - - solverBody->bodyIndex = body->object.index; } b2TracyCZoneEnd(integrate_velocity); @@ -316,8 +314,9 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2World* world = context->world; b2Body* bodies = world->bodies; - b2SolverBody* solverBodies = context->solverBodies; + const b2SolverBody* solverBodies = context->solverBodies; b2Contact* contacts = world->contacts; + const int32_t* solverToBodyMap = context->solverToBodyMap; const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; b2BitSet* awakeContactBitSet = &world->taskContextArray[workerIndex].awakeContactBitSet; @@ -329,9 +328,9 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv for (int32_t i = startIndex; i < endIndex; ++i) { - b2SolverBody* solverBody = solverBodies + i; + const b2SolverBody* solverBody = solverBodies + i; - b2Body* body = bodies + solverBody->bodyIndex; + b2Body* body = bodies + solverToBodyMap[i]; body->linearVelocity = solverBody->linearVelocity; body->angularVelocity = solverBody->angularVelocity; @@ -383,6 +382,7 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2TracyCZoneEnd(finalize_positions); } +#if 0 static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) { b2SolverStageType type = stage->type; @@ -419,6 +419,50 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i } } +#else + +// AVX +static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) +{ + b2SolverStageType type = stage->type; + + switch (type) + { + case b2_stageIntegrateVelocities: + b2IntegrateVelocitiesTask(startIndex, endIndex, context); + break; + + case b2_stagePrepareContacts: + b2PrepareContactsTaskAVX(startIndex, endIndex, context); + break; + + case b2_stageWarmStartContacts: + b2WarmStartContactConstraints(startIndex, endIndex, context, stage->colorIndex); + break; + + case b2_stageSolveContacts: + b2SolveContactAVXsTask(startIndex, endIndex, context, stage->colorIndex, true); + break; + + case b2_stageIntegratePositions: + b2IntegratePositionsTask(startIndex, endIndex, context); + break; + + case b2_stageFinalizePositions: + b2FinalizePositionsTask(startIndex, endIndex, context, workerIndex); + break; + + case b2_stageCalmContacts: + b2SolveContactsTask(startIndex, endIndex, context, stage->colorIndex, false); + break; + + case b2_stageStoreImpulses: + b2StoreImpulsesTaskAVX(startIndex, endIndex, context); + break; + } +} +#endif + static inline int32_t GetWorkerStartIndex(int32_t workerIndex, int32_t blockCount, int32_t workerCount) { if (blockCount <= workerCount) @@ -514,14 +558,6 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex B2_ASSERT(syncIndex > 0); int previousSyncIndex = syncIndex - 1; - if (stage->type == b2_stagePrepareContacts) - { - if (stage->blockCount > 0 && stage->blocks[0].syncIndex > 1) - { - stage->type += 0; - } - } - b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, 0); while (atomic_load(&stage->completionCount) != blockCount) @@ -533,7 +569,8 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex } } -// This should not use the thread index because thread 0 should not be called twice, which is possible with work stealing. +#if 0 +// This should not use the thread index because thread 0 can be called twice by enkiTS. void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) { B2_MAYBE_UNUSED(startIndex); @@ -558,8 +595,9 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont /* b2_stageIntegrateVelocities = 0, - b2_stagePrepareJoints, b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, @@ -569,14 +607,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2_stageStoreImpulses */ - if (stages[3].type == b2_stagePrepareContacts) - { - for (int32_t i = 0; i < stages[3].blockCount; ++i) - { - B2_ASSERT(stages[3].blocks[i].syncIndex == 0); - } - } - int32_t bodySyncIndex = 1; int32_t stageIndex = 0; uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; @@ -699,6 +729,177 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont lastSyncBits = syncBits; } } +#else + +// AVX +// This should not use the thread index because thread 0 can be called twice by enkiTS. +void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) +{ + B2_MAYBE_UNUSED(startIndex); + B2_MAYBE_UNUSED(endIndex); + B2_MAYBE_UNUSED(threadIndexDontUse); + + b2WorkerContext* workerContext = taskContext; + int32_t workerIndex = workerContext->workerIndex; + b2SolverTaskContext* context = workerContext->context; + int32_t activeColorCount = context->activeColorCount; + b2SolverStage* stages = context->stages; + + if (workerIndex == 0) + { + // Main thread synchronizes the workers and does work itself. + // + // Stages are re-used for loops so that I don't need more stages for large iteration counts. + // The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks. + // The stage index and sync indices are combined in to sync bits for atomic synchronization. + // The workers need to compute the previous sync index for a given stage so that CAS works correctly. This + // setup makes this easy to do. + + /* + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageFinalizePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageStoreImpulses + */ + + int32_t bodySyncIndex = 1; + int32_t stageIndex = 0; + uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageIntegrateVelocities); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + bodySyncIndex += 1; + + uint32_t constraintSyncIndex = 1; + syncBits = (constraintSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + constraintSyncIndex += 1; + + int32_t graphSyncIndex = 1; + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageWarmStartContacts); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + } + graphSyncIndex += 1; + + // TODO_ERIN single threaded + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); + b2PrepareJointsTask(context); + stageIndex += 1; + + int32_t velocityIterations = context->velocityIterations; + for (int32_t i = 0; i < velocityIterations; ++i) + { + // stage index restarted each iteration + int32_t iterStageIndex = stageIndex; + + // TODO_ERIN single threaded + B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveJoints); + b2SolveJointsTask(context, true); + iterStageIndex += 1; + + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveContacts); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + iterStageIndex += 1; + } + graphSyncIndex += 1; + + B2_ASSERT(stages[iterStageIndex].type == b2_stageIntegratePositions); + syncBits = (bodySyncIndex << 16) | iterStageIndex; + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + bodySyncIndex += 1; + } + + stageIndex += 1 + activeColorCount + 1; + + syncBits = (bodySyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageFinalizePositions); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + + int32_t calmIterations = context->calmIterations; + for (int32_t i = 0; i < calmIterations; ++i) + { + // stage index restarted each iteration + int32_t iterStageIndex = stageIndex; + + B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmJoints); + b2SolveJointsTask(context, false); + iterStageIndex += 1; + + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmContacts); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + iterStageIndex += 1; + } + graphSyncIndex += 1; + } + + stageIndex += 1 + activeColorCount; + + syncBits = (constraintSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + + // Signal workers to finish + atomic_store(&context->syncBits, UINT_MAX); + + B2_ASSERT(stageIndex + 1 == context->stageCount); + return; + } + + // Worker + uint32_t lastSyncBits = 0; + + while (true) + { + // Spin until main thread bumps changes the sync bits + uint32_t syncBits = atomic_load(&context->syncBits); + while (syncBits == lastSyncBits) + { + _mm_pause(); + syncBits = atomic_load(&context->syncBits); + } + + if (syncBits == UINT_MAX) + { + // sentinel hit + break; + } + + int32_t stageIndex = syncBits & 0xFFFF; + B2_ASSERT(stageIndex < context->stageCount); + + int32_t syncIndex = (syncBits >> 16) & 0xFFFF; + B2_ASSERT(syncIndex > 0); + + int32_t previousSyncIndex = syncIndex - 1; + + b2SolverStage* stage = stages + stageIndex; + b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, workerIndex); + + lastSyncBits = syncBits; + } +} +#endif + // Threading: // 1. build array of awake bodies, maybe copy to contiguous array @@ -718,6 +919,8 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont // bias removal stage to help remove excess bias energy. // http://mmacklin.com/smallsteps.pdf // https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf + +#if 0 void b2SolveGraph(b2World* world, b2StepContext* stepContext) { b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); @@ -745,11 +948,11 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) // Map from solver body to body // TODO_ERIN have body directly reference solver body for user access - int32_t* solverBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); + int32_t* solverToBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); int32_t bodyCapacity = world->bodyPool.capacity; - int32_t* bodyMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); - memset(bodyMap, 0xFF, bodyCapacity * sizeof(int32_t)); + int32_t* bodyToSolverMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); + memset(bodyToSolverMap, 0xFF, bodyCapacity * sizeof(int32_t)); int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) @@ -766,7 +969,8 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) awakeBodies[index] = body; B2_ASSERT(0 < bodyIndex && bodyIndex < bodyCapacity); - bodyMap[bodyIndex] = index; + bodyToSolverMap[bodyIndex] = index; + solverToBodyMap[index] = bodyIndex; // cache miss bodyIndex = body->islandNext; @@ -843,8 +1047,9 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) /* b2_stageIntegrateVelocities = 0, - b2_stagePrepareJoints, b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, @@ -1017,16 +1222,17 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t velIters = B2_MAX(1, stepContext->velocityIterations); - stepContext->bodyMap = bodyMap; stepContext->solverBodies = solverBodies; - stepContext->bodyCount = awakeBodyCount; + stepContext->solverToBodyMap = solverToBodyMap; + stepContext->bodyToSolverMap = bodyToSolverMap; b2SolverTaskContext context; context.world = world; context.graph = graph; context.awakeBodies = awakeBodies; context.solverBodies = solverBodies; - context.bodyMap = bodyMap; + context.bodyToSolverMap = bodyToSolverMap; + context.solverToBodyMap = solverToBodyMap; context.stepContext = stepContext; context.constraints = constraints; context.activeColorCount = activeColorCount; @@ -1058,7 +1264,370 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); b2FreeStackItem(world->stackAllocator, constraints); - b2FreeStackItem(world->stackAllocator, bodyMap); + b2FreeStackItem(world->stackAllocator, bodyToSolverMap); + b2FreeStackItem(world->stackAllocator, solverToBodyMap); b2FreeStackItem(world->stackAllocator, solverBodies); b2FreeStackItem(world->stackAllocator, awakeBodies); } + +#else + +void b2SolveGraph(b2World* world, b2StepContext* stepContext) +{ + b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); + + b2Graph* graph = &world->graph; + b2GraphColor* colors = graph->colors; + + int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; + int32_t awakeBodyCount = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + awakeBodyCount += island->bodyCount; + } + + if (awakeBodyCount == 0) + { + return; + } + + b2Body* bodies = world->bodies; + b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); + b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); + + // Map from solver body to body + // TODO_ERIN have body directly reference solver body for user access + int32_t* solverToBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); + + int32_t bodyCapacity = world->bodyPool.capacity; + int32_t* bodyToSolverMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); + memset(bodyToSolverMap, 0xFF, bodyCapacity * sizeof(int32_t)); + + int32_t index = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = bodies + bodyIndex; + B2_ASSERT(b2ObjectValid(&body->object)); + B2_ASSERT(body->object.index == bodyIndex); + + awakeBodies[index] = body; + + B2_ASSERT(0 < bodyIndex && bodyIndex < bodyCapacity); + bodyToSolverMap[bodyIndex] = index; + solverToBodyMap[index] = bodyIndex; + + // cache miss + bodyIndex = body->islandNext; + + index += 1; + } + } + B2_ASSERT(index == awakeBodyCount); + + int32_t workerCount = world->workerCount; + const int32_t blocksPerWorker = 6; + + int32_t bodyBlockSize = 1 << 5; + int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 5) + 1; + if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) + { + bodyBlockSize = awakeBodyCount / (blocksPerWorker * workerCount); + bodyBlockCount = blocksPerWorker * workerCount; + } + + int32_t activeColorIndices[b2_graphColorCount]; + int32_t colorConstraintCounts[b2_graphColorCount]; + int32_t colorBlockSize[b2_graphColorCount]; + int32_t colorBlockCounts[b2_graphColorCount]; + + int32_t activeColorCount = 0; + int32_t graphBlockCount = 0; + int32_t constraintCount = 0; + + int32_t c = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + int32_t count = b2Array(colors[i].contactArray).count; + if (count > 0) + { + int32_t avxCount = ((count - 1) >> 3) + 1; + activeColorIndices[c] = i; + colorConstraintCounts[c] = avxCount; + int32_t blockCount = avxCount; + + // TODO_ERIN work stealing unit is 1 AVX constraint + colorBlockSize[c] = 1; + + colorBlockCounts[c] = blockCount; + graphBlockCount += blockCount; + constraintCount += avxCount; + c += 1; + } + } + activeColorCount = c; + + b2ContactConstraintAVX* constraints = + b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "constraint"); + + int32_t base = 0; + + for (int32_t i = 0; i < activeColorCount; ++i) + { + int32_t j = activeColorIndices[i]; + colors[j].contactConstraintAVXs = constraints + base; + base += colorConstraintCounts[j]; + } + + int32_t storeBlockSize = 1 << 4; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 4) + 1 : 0; + if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) + { + storeBlockSize = constraintCount / (blocksPerWorker * workerCount); + storeBlockCount = blocksPerWorker * workerCount; + } + + /* + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageFinalizePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageStoreImpulses + */ + + // TODO_ERIN joint tasks + int32_t stageCount = 0; + + // b2_stageIntegrateVelocities + stageCount += 1; + // b2_stagePrepareContacts + stageCount += 1; + // b2_stageWarmStartContacts + stageCount += activeColorCount; + // b2_stagePrepareJoints + stageCount += 1; + // b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions + stageCount += 1 + activeColorCount + 1; + // b2_stageFinalizePositions + stageCount += 1; + // b2_stageCalmJoints, b2_stageCalmContacts + stageCount += 1 + activeColorCount; + // b2_stageStoreImpulses + stageCount += 1; + + b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); + b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); + b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); + b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); + + for (int32_t i = 0; i < bodyBlockCount; ++i) + { + b2SolverBlock* block = bodyBlocks + i; + block->startIndex = i * bodyBlockSize; + block->endIndex = block->startIndex + bodyBlockSize; + block->syncIndex = 0; + } + bodyBlocks[bodyBlockCount - 1].endIndex = awakeBodyCount; + + b2SolverBlock* colorBlocks[b2_graphColorCount]; + b2SolverBlock* baseGraphBlock = graphBlocks; + + for (int32_t i = 0; i < activeColorCount; ++i) + { + int32_t blockCount = colorBlockCounts[i]; + int32_t blockSize = colorBlockSize[i]; + for (int32_t j = 0; j < blockCount; ++j) + { + b2SolverBlock* block = baseGraphBlock + j; + block->startIndex = j * blockSize; + block->endIndex = block->startIndex + blockSize; + atomic_store(&block->syncIndex, 0); + } + baseGraphBlock[blockCount - 1].endIndex = colorConstraintCounts[i]; + + colorBlocks[i] = baseGraphBlock; + baseGraphBlock += blockCount; + } + + for (int32_t i = 0; i < storeBlockCount; ++i) + { + b2SolverBlock* block = storeBlocks + i; + block->startIndex = i * storeBlockSize; + block->endIndex = block->startIndex + storeBlockSize; + block->syncIndex = 0; + } + + if (storeBlockCount > 0) + { + storeBlocks[storeBlockCount - 1].endIndex = constraintCount; + } + + b2SolverStage* stage = stages; + + // Integrate velocities + stage->type = b2_stageIntegrateVelocities; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Prepare constraints + stage->type = b2_stagePrepareContacts; + stage->blocks = storeBlocks; + stage->blockCount = storeBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Warm start contacts + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageWarmStartContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Prepare joints + stage->type = b2_stagePrepareJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Solve joints + stage->type = b2_stageSolveJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Solve constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageSolveContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Integrate positions + stage->type = b2_stageIntegratePositions; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Finalize positions + stage->type = b2_stageFinalizePositions; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Calm joints + stage->type = b2_stageCalmJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Calm constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageCalmContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Store impulses + stage->type = b2_stageStoreImpulses; + stage->blocks = storeBlocks; + stage->blockCount = storeBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + B2_ASSERT((int32_t)(stage - stages) == stageCount); + + B2_ASSERT(workerCount <= 16); + b2WorkerContext workerContext[16]; + + int32_t velIters = B2_MAX(1, stepContext->velocityIterations); + + stepContext->solverBodies = solverBodies; + stepContext->solverToBodyMap = solverToBodyMap; + stepContext->bodyToSolverMap = bodyToSolverMap; + + b2SolverTaskContext context; + context.world = world; + context.graph = graph; + context.awakeBodies = awakeBodies; + context.solverBodies = solverBodies; + context.bodyToSolverMap = bodyToSolverMap; + context.solverToBodyMap = solverToBodyMap; + context.stepContext = stepContext; + context.constraints = NULL; + context.constraintAVXs = constraints; + context.activeColorCount = activeColorCount; + context.velocityIterations = velIters; + context.calmIterations = stepContext->positionIterations; + context.workerCount = workerCount; + context.stageCount = stageCount; + context.stages = stages; + context.timeStep = stepContext->dt; + context.invTimeStep = stepContext->inv_dt; + context.subStep = context.timeStep / velIters; + context.invSubStep = velIters * stepContext->inv_dt; + context.syncBits = 0; + + b2TracyCZoneEnd(prepare_stages); + + // Must use worker index because thread 0 can be assigned multiple tasks by enkiTS + for (int32_t i = 0; i < workerCount; ++i) + { + workerContext[i].context = &context; + workerContext[i].workerIndex = i; + world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + } + + world->finishAllTasksFcn(world->userTaskContext); + + b2FreeStackItem(world->stackAllocator, storeBlocks); + b2FreeStackItem(world->stackAllocator, graphBlocks); + b2FreeStackItem(world->stackAllocator, bodyBlocks); + b2FreeStackItem(world->stackAllocator, stages); + b2FreeStackItem(world->stackAllocator, constraints); + b2FreeStackItem(world->stackAllocator, bodyToSolverMap); + b2FreeStackItem(world->stackAllocator, solverToBodyMap); + b2FreeStackItem(world->stackAllocator, solverBodies); + b2FreeStackItem(world->stackAllocator, awakeBodies); +} + +#endif + diff --git a/src/graph.h b/src/graph.h index f9c8fdb8..a8f7d9a8 100644 --- a/src/graph.h +++ b/src/graph.h @@ -24,6 +24,8 @@ typedef struct b2GraphColor // transient b2ContactConstraint* contactConstraints; + + // TODO_ERIN these could be split up by worker so that workers get a contiguous array of constraints across colors b2ContactConstraintAVX* contactConstraintAVXs; } b2GraphColor; diff --git a/src/mouse_joint.c b/src/mouse_joint.c index d6988481..2142c07c 100644 --- a/src/mouse_joint.c +++ b/src/mouse_joint.c @@ -44,7 +44,7 @@ void b2PrepareMouse(b2Joint* base, b2StepContext* context) B2_ASSERT(bodyB->object.index == bodyB->object.next); b2MouseJoint* joint = &base->mouseJoint; - joint->indexB = context->bodyMap[indexB]; + joint->indexB = context->bodyToSolverMap[indexB]; joint->localCenterB = bodyB->localCenter; b2Vec2 cB = bodyB->position; diff --git a/src/revolute_joint.c b/src/revolute_joint.c index a4ef1f84..002f0b1f 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -39,8 +39,8 @@ void b2PrepareRevolute(b2Joint* base, b2StepContext* context) b2RevoluteJoint* joint = &base->revoluteJoint; - joint->indexA = context->bodyMap[indexA]; - joint->indexB = context->bodyMap[indexB]; + joint->indexA = context->bodyToSolverMap[indexA]; + joint->indexB = context->bodyToSolverMap[indexB]; joint->localCenterA = bodyA->localCenter; joint->localCenterB = bodyB->localCenter; joint->positionA = bodyA->position; diff --git a/src/solver_data.h b/src/solver_data.h index 0a246c02..bad501cf 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -31,10 +31,10 @@ typedef struct b2StepContext int32_t bodyCapacity; // Map from world body pool index to solver body - int32_t* bodyMap; + const int32_t* bodyToSolverMap; // Map from solver body to world body - int32_t* solverBodyMap; + const int32_t* solverToBodyMap; struct b2SolverBody* solverBodies; int32_t solverBodyCount; @@ -45,8 +45,9 @@ typedef struct b2StepContext typedef enum b2SolverStageType { b2_stageIntegrateVelocities = 0, - b2_stagePrepareJoints, b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, @@ -84,7 +85,8 @@ typedef struct b2SolverTaskContext struct b2Graph* graph; struct b2Body** awakeBodies; struct b2SolverBody* solverBodies; - int32_t* bodyMap; + int32_t* bodyToSolverMap; + int32_t* solverToBodyMap; int32_t* contactIndices; b2StepContext* stepContext; diff --git a/src/weld_joint.c b/src/weld_joint.c index a512fc4a..85c00edd 100644 --- a/src/weld_joint.c +++ b/src/weld_joint.c @@ -38,8 +38,8 @@ void b2PrepareWeld(b2Joint* base, b2StepContext* context) B2_ASSERT(bodyB->object.index == bodyB->object.next); b2WeldJoint* joint = &base->weldJoint; - joint->indexA = context->bodyMap[indexA]; - joint->indexB = context->bodyMap[indexB]; + joint->indexA = context->bodyToSolverMap[indexA]; + joint->indexB = context->bodyToSolverMap[indexB]; joint->localCenterA = bodyA->localCenter; joint->localCenterB = bodyB->localCenter; joint->positionA = bodyA->position; From 2906e7ead2dff61545b6208de855c312d62003e9 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 6 Oct 2023 21:48:26 -0700 Subject: [PATCH 33/51] avx working --- samples/collection/sample_vertical_stack.cpp | 4 +- samples/main.cpp | 8 +- samples/sample.cpp | 10 +- src/CMakeLists.txt | 7 +- src/allocate.c | 12 +- src/box2d.natvis | 27 +++ src/contact_solver.c | 243 +++++++++++-------- src/graph.c | 86 ++++--- src/stack_allocator.c | 16 +- 9 files changed, 258 insertions(+), 155 deletions(-) create mode 100644 src/box2d.natvis diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index 319ec6c8..b97758e8 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -55,8 +55,8 @@ class VerticalStack : public Sample m_bullets[i] = b2_nullBodyId; } - m_shapeType = e_boxShape; - m_rowCount = 14; + m_shapeType = e_circleShape; + m_rowCount = 1; m_columnCount = g_sampleDebug ? 1 : e_maxColumns; m_bulletCount = 1; m_bulletType = e_circleShape; diff --git a/samples/main.cpp b/samples/main.cpp index 6a89e273..6dab3954 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -55,12 +55,12 @@ static float s_framebufferScale = 1.0f; void* AllocFcn(uint32_t size) { - size_t size16 = ((size - 1) | 0xF) + 1; - assert((size16 & 0xF) == 0); + size_t size32 = ((size - 1) | 0x1F) + 1; + assert((size32 & 0x1F) == 0); #if defined(_WIN64) - void* ptr = _aligned_malloc(size16, 16); + void* ptr = _aligned_malloc(size32, 32); #else - void* ptr = aligned_alloc(16, size16); + void* ptr = aligned_alloc(32, size32); #endif return ptr; } diff --git a/samples/sample.cpp b/samples/sample.cpp index 617f8641..9892935b 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -63,15 +63,15 @@ Sample::Sample(const Settings& settings) b2Vec2 gravity = {0.0f, -10.0f}; // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = 16;// enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = 8;// enki::GetNumHardwareThreads() / 2; m_scheduler.Initialize(maxThreads); m_taskCount = 0; b2WorldDef worldDef = b2DefaultWorldDef(); - //worldDef.workerCount = maxThreads; - //worldDef.enqueueTask = &EnqueueTask; - //worldDef.finishTask = &FinishTask; - //worldDef.finishAllTasks = &FinishAllTasks; + worldDef.workerCount = maxThreads; + worldDef.enqueueTask = &EnqueueTask; + worldDef.finishTask = &FinishTask; + worldDef.finishAllTasks = &FinishAllTasks; worldDef.bodyCapacity = 1024; worldDef.contactCapacity = 4 * 1024; worldDef.userTaskContext = this; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 840fe321..c4eb1009 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,7 +71,12 @@ set(BOX2D_API_FILES ../include/box2d/types.h ) -add_library(box2d ${BOX2D_SOURCE_FILES} ${BOX2D_API_FILES}) +if (MSVC) + add_library(box2d ${BOX2D_SOURCE_FILES} ${BOX2D_API_FILES} box2d.natvis) +else() + add_library(box2d ${BOX2D_SOURCE_FILES} ${BOX2D_API_FILES}) +endif() + target_include_directories(box2d PUBLIC $ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/allocate.c b/src/allocate.c index 66ba872f..f1840766 100644 --- a/src/allocate.c +++ b/src/allocate.c @@ -50,17 +50,23 @@ void* b2Alloc(uint32_t size) { void* ptr = b2_allocFcn(size); b2TracyCAlloc(ptr, size); + + B2_ASSERT(((uintptr_t)ptr & 0x1F) == 0); + return ptr; } - uint32_t size16 = ((size - 1) | 0xF) + 1; + uint32_t size32 = ((size - 1) | 0x1F) + 1; #ifdef B2_PLATFORM_WINDOWS - void* ptr = _aligned_malloc(size16, 16); + void* ptr = _aligned_malloc(size32, 32); #else - void* ptr = aligned_alloc(16, size16); + void* ptr = aligned_alloc(32, size32); #endif b2TracyCAlloc(ptr, size); + + B2_ASSERT(((uintptr_t)ptr & 0x1F) == 0); + return ptr; } diff --git a/src/box2d.natvis b/src/box2d.natvis new file mode 100644 index 00000000..89e4f92b --- /dev/null +++ b/src/box2d.natvis @@ -0,0 +1,27 @@ + + + + [{m128_f32[0]}, {m128_f32[1]}, {m128_f32[2]}, {m128_f32[3]}] + + m128_f32[0] + m128_f32[1] + m128_f32[2] + m128_f32[3] + (void*)this + + + + [{m256_f32[0]}, {m256_f32[1]}, {m256_f32[2]}, {m256_f32[3]}, {m256_f32[4]}, {m256_f32[5]}, {m256_f32[6]}, {m256_f32[7]}] + + m256_f32[0] + m256_f32[1] + m256_f32[2] + m256_f32[3] + m256_f32[4] + m256_f32[5] + m256_f32[6] + m256_f32[7] + (void*)this + + + diff --git a/src/contact_solver.c b/src/contact_solver.c index e8fd49d6..cc84cbb9 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -37,6 +37,7 @@ typedef struct b2SimdBody // This is a load and 8x8 transpose static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* restrict indices) { + _Static_assert(sizeof(b2SolverBody) == 32); B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); __m256 zero = _mm256_setzero_ps(); __m256 b0 = indices[0] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[0])); @@ -81,6 +82,7 @@ static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* r // This writes everything back but only the velocities change static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) { + _Static_assert(sizeof(b2SolverBody) == 32); B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); __m256 t0 = _mm256_unpacklo_ps(simdBody->v.X, simdBody->v.Y); __m256 t1 = _mm256_unpackhi_ps(simdBody->v.X, simdBody->v.Y); @@ -233,7 +235,6 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon b2TracyCZoneEnd(prepare_contact); } -// TODO_ERIN use b2GraphColor::contactArray to handle empty AVX lanes at the end of each color constraint array, but how to parallel-for? void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); @@ -243,7 +244,7 @@ void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTask const int32_t* bodyMap = context->bodyToSolverMap; b2SolverBody* solverBodies = context->solverBodies; b2ContactConstraintAVX* constraints = context->constraintAVXs; - b2Graph* graph = context->graph; + const int32_t* contactIndices = context->contactIndices; // This is a dummy body to represent a static body since static bodies don't have a solver body. b2SolverBody dummyBody = {0}; @@ -255,111 +256,146 @@ void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTask float h = context->timeStep; - int32_t vectorIndex = 0; - for (int32_t i = startIndex; i < endIndex; ++i) { - b2Contact* contact = contacts + contactIndices[i]; - - const b2Manifold* manifold = &contact->manifold; - int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; - int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; - - b2ContactConstraintAVX* constraint = NULL; - constraint = constraints + (i >> 3); - constraint->indexA[vectorIndex] = indexA; - constraint->indexB[vectorIndex] = indexB; - - b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; - b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; - float mA = solverBodyA->invMass; - float iA = solverBodyA->invI; - float mB = solverBodyB->invMass; - float iB = solverBodyB->invI; - - float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; - - // Stiffer for static contacts to avoid bodies getting pushed through the ground - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - float d = (2.0f * zeta + h * omega); - float c = h * omega * d; - float impulseCoefficient = 1.0f / (1.0f + c); - - ((float*)&constraint->friction)[vectorIndex] = contact->friction; - ((float*)&constraint->impulseCoefficient)[vectorIndex] = impulseCoefficient; - ((float*)&constraint->massCoefficient)[vectorIndex] = c * impulseCoefficient; - ((float*)&constraint->biasCoefficient)[vectorIndex] = omega / d; - - b2Vec2 normal = manifold->normal; - ((float*)&constraint->normal.X)[vectorIndex] = normal.x; - ((float*)&constraint->normal.Y)[vectorIndex] = normal.y; - - b2Vec2 tangent = b2RightPerp(normal); - - { - const b2ManifoldPoint* mp = manifold->points + 0; - ((float*)&constraint->separation1)[vectorIndex] = mp->separation; - ((float*)&constraint->normalImpulse1)[vectorIndex] = mp->normalImpulse; - ((float*)&constraint->tangentImpulse1)[vectorIndex] = mp->tangentImpulse; - - ((float*)&constraint->rA1.X)[vectorIndex] = mp->anchorA.x; - ((float*)&constraint->rA1.Y)[vectorIndex] = mp->anchorA.y; - ((float*)&constraint->rB1.X)[vectorIndex] = mp->anchorB.x; - ((float*)&constraint->rB1.Y)[vectorIndex] = mp->anchorB.y; - - float rnA = b2Cross(mp->anchorA, normal); - float rnB = b2Cross(mp->anchorB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - ((float*)&constraint->normalMass1)[vectorIndex] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - - float rtA = b2Cross(mp->anchorA, tangent); - float rtB = b2Cross(mp->anchorB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - ((float*)&constraint->tangentMass1)[vectorIndex] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - } - - int32_t pointCount = manifold->pointCount; - B2_ASSERT(0 < pointCount && pointCount <= 2); + b2ContactConstraintAVX* constraint = constraints + i; - if (pointCount == 2) + for (int32_t j = 0; j < 8; ++j) { - const b2ManifoldPoint* mp = manifold->points + 1; - ((float*)&constraint->separation2)[vectorIndex] = mp->separation; - ((float*)&constraint->normalImpulse2)[vectorIndex] = mp->normalImpulse; - ((float*)&constraint->tangentImpulse2)[vectorIndex] = mp->tangentImpulse; - - ((float*)&constraint->rA2.X)[vectorIndex] = mp->anchorA.x; - ((float*)&constraint->rA2.Y)[vectorIndex] = mp->anchorA.y; - ((float*)&constraint->rB2.X)[vectorIndex] = mp->anchorB.x; - ((float*)&constraint->rB2.Y)[vectorIndex] = mp->anchorB.y; - - float rnA = b2Cross(mp->anchorA, normal); - float rnB = b2Cross(mp->anchorB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - ((float*)&constraint->normalMass2)[vectorIndex] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + int32_t contactIndex = contactIndices[8 * i + j]; - float rtA = b2Cross(mp->anchorA, tangent); - float rtB = b2Cross(mp->anchorB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - ((float*)&constraint->tangentMass2)[vectorIndex] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - } - else - { - // dummy data that has no effect - ((float*)&constraint->separation2)[vectorIndex] = 0.0f; - ((float*)&constraint->normalImpulse2)[vectorIndex] = 0.0f; - ((float*)&constraint->tangentImpulse2)[vectorIndex] = 0.0f; - ((float*)&constraint->rA2.X)[vectorIndex] = 0.0f; - ((float*)&constraint->rA2.Y)[vectorIndex] = 0.0f; - ((float*)&constraint->rB2.X)[vectorIndex] = 0.0f; - ((float*)&constraint->rB2.Y)[vectorIndex] = 0.0f; - ((float*)&constraint->normalMass2)[vectorIndex] = 0.0f; - ((float*)&constraint->tangentMass2)[vectorIndex] = 0.0f; + if (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = contacts + contactIndex; + + const b2Manifold* manifold = &contact->manifold; + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; + + constraint->indexA[j] = indexA; + constraint->indexB[j] = indexB; + + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; + + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; + + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float d = (2.0f * zeta + h * omega); + float c = h * omega * d; + float impulseCoefficient = 1.0f / (1.0f + c); + + ((float*)&constraint->friction)[j] = contact->friction; + ((float*)&constraint->impulseCoefficient)[j] = impulseCoefficient; + ((float*)&constraint->massCoefficient)[j] = c * impulseCoefficient; + ((float*)&constraint->biasCoefficient)[j] = omega / d; + + b2Vec2 normal = manifold->normal; + ((float*)&constraint->normal.X)[j] = normal.x; + ((float*)&constraint->normal.Y)[j] = normal.y; + + b2Vec2 tangent = b2RightPerp(normal); + + { + const b2ManifoldPoint* mp = manifold->points + 0; + ((float*)&constraint->separation1)[j] = mp->separation; + ((float*)&constraint->normalImpulse1)[j] = mp->normalImpulse; + ((float*)&constraint->tangentImpulse1)[j] = mp->tangentImpulse; + + ((float*)&constraint->rA1.X)[j] = mp->anchorA.x; + ((float*)&constraint->rA1.Y)[j] = mp->anchorA.y; + ((float*)&constraint->rB1.X)[j] = mp->anchorB.x; + ((float*)&constraint->rB1.Y)[j] = mp->anchorB.y; + + float rnA = b2Cross(mp->anchorA, normal); + float rnB = b2Cross(mp->anchorB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + ((float*)&constraint->normalMass1)[j] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(mp->anchorA, tangent); + float rtB = b2Cross(mp->anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + ((float*)&constraint->tangentMass1)[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + } + + int32_t pointCount = manifold->pointCount; + B2_ASSERT(0 < pointCount && pointCount <= 2); + + if (pointCount == 2) + { + const b2ManifoldPoint* mp = manifold->points + 1; + ((float*)&constraint->separation2)[j] = mp->separation; + ((float*)&constraint->normalImpulse2)[j] = mp->normalImpulse; + ((float*)&constraint->tangentImpulse2)[j] = mp->tangentImpulse; + + ((float*)&constraint->rA2.X)[j] = mp->anchorA.x; + ((float*)&constraint->rA2.Y)[j] = mp->anchorA.y; + ((float*)&constraint->rB2.X)[j] = mp->anchorB.x; + ((float*)&constraint->rB2.Y)[j] = mp->anchorB.y; + + float rnA = b2Cross(mp->anchorA, normal); + float rnB = b2Cross(mp->anchorB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + ((float*)&constraint->normalMass2)[j] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(mp->anchorA, tangent); + float rtB = b2Cross(mp->anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + ((float*)&constraint->tangentMass2)[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + } + else + { + // dummy data that has no effect + ((float*)&constraint->separation2)[j] = 0.0f; + ((float*)&constraint->normalImpulse2)[j] = 0.0f; + ((float*)&constraint->tangentImpulse2)[j] = 0.0f; + ((float*)&constraint->rA2.X)[j] = 0.0f; + ((float*)&constraint->rA2.Y)[j] = 0.0f; + ((float*)&constraint->rB2.X)[j] = 0.0f; + ((float*)&constraint->rB2.Y)[j] = 0.0f; + ((float*)&constraint->normalMass2)[j] = 0.0f; + ((float*)&constraint->tangentMass2)[j] = 0.0f; + } + } + else + { + // remainder + constraint->indexA[j] = B2_NULL_INDEX; + constraint->indexB[j] = B2_NULL_INDEX; + ((float*)&constraint->friction)[j] = 0.0f; + ((float*)&constraint->impulseCoefficient)[j] = 0.0f; + ((float*)&constraint->massCoefficient)[j] = 0.0f; + ((float*)&constraint->biasCoefficient)[j] = 0.0f; + ((float*)&constraint->normal.X)[j] = 0.0f; + ((float*)&constraint->normal.Y)[j] = 0.0f; + + ((float*)&constraint->separation1)[j] = 0.0f; + ((float*)&constraint->normalImpulse1)[j] = 0.0f; + ((float*)&constraint->tangentImpulse1)[j] = 0.0f; + ((float*)&constraint->rA1.X)[j] = 0.0f; + ((float*)&constraint->rA1.Y)[j] = 0.0f; + ((float*)&constraint->rB1.X)[j] = 0.0f; + ((float*)&constraint->rB1.Y)[j] = 0.0f; + ((float*)&constraint->normalMass1)[j] = 0.0f; + ((float*)&constraint->tangentMass1)[j] = 0.0f; + + ((float*)&constraint->separation2)[j] = 0.0f; + ((float*)&constraint->normalImpulse2)[j] = 0.0f; + ((float*)&constraint->tangentImpulse2)[j] = 0.0f; + ((float*)&constraint->rA2.X)[j] = 0.0f; + ((float*)&constraint->rA2.Y)[j] = 0.0f; + ((float*)&constraint->rB2.X)[j] = 0.0f; + ((float*)&constraint->rB2.Y)[j] = 0.0f; + ((float*)&constraint->normalMass2)[j] = 0.0f; + ((float*)&constraint->tangentMass2)[j] = 0.0f; + } } - - // Cycle [0-7] - vectorIndex = (vectorIndex + 1) & 0x7; } b2TracyCZoneEnd(prepare_contact); @@ -716,7 +752,7 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol b2SimdBody bA = b2GatherBodies(bodies, c->indexA); b2SimdBody bB = b2GatherBodies(bodies, c->indexB); - __m256 useBiasMul = useBias ? _mm256_setzero_ps() : _mm256_set1_ps(1.0f); + __m256 useBiasMul = useBias ? _mm256_set1_ps(1.0f) : _mm256_setzero_ps(); __m256 invDtMul = _mm256_set1_ps(inv_dt); __m256 minBiasVel = _mm256_set1_ps(-maxBaumgarteVelocity); @@ -868,6 +904,9 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); } + + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); } void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) diff --git a/src/graph.c b/src/graph.c index 653f17e8..df67c889 100644 --- a/src/graph.c +++ b/src/graph.c @@ -21,6 +21,8 @@ #include #include +#define B2_AVX 1 + typedef struct b2WorkerContext { b2SolverTaskContext* context; @@ -382,7 +384,8 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv b2TracyCZoneEnd(finalize_positions); } -#if 0 +#if B2_AVX == 0 + static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) { b2SolverStageType type = stage->type; @@ -453,7 +456,7 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stageCalmContacts: - b2SolveContactsTask(startIndex, endIndex, context, stage->colorIndex, false); + b2SolveContactAVXsTask(startIndex, endIndex, context, stage->colorIndex, false); break; case b2_stageStoreImpulses: @@ -569,7 +572,7 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex } } -#if 0 +#if B2_AVX == 0 // This should not use the thread index because thread 0 can be called twice by enkiTS. void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) { @@ -615,11 +618,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont stageIndex += 1; bodySyncIndex += 1; - // TODO_ERIN single threaded - B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); - b2PrepareJointsTask(context); - stageIndex += 1; - int32_t graphSyncIndex = 1; for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { @@ -630,6 +628,11 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont } graphSyncIndex += 1; + // TODO_ERIN single threaded + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); + b2PrepareJointsTask(context); + stageIndex += 1; + int32_t velocityIterations = context->velocityIterations; for (int32_t i = 0; i < velocityIterations; ++i) { @@ -857,7 +860,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont syncBits = (constraintSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); b2ExecuteMainStage(stages + stageIndex, context, syncBits); - + // Signal workers to finish atomic_store(&context->syncBits, UINT_MAX); @@ -900,7 +903,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont } #endif - // Threading: // 1. build array of awake bodies, maybe copy to contiguous array // 2. parallel-for integrate velocities @@ -920,7 +922,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont // http://mmacklin.com/smallsteps.pdf // https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf -#if 0 +#if B2_AVX == 0 void b2SolveGraph(b2World* world, b2StepContext* stepContext) { b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); @@ -1064,10 +1066,10 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) // b2_stageIntegrateVelocities stageCount += 1; - // b2_stagePrepareJoints - stageCount += 1; // b2_stagePrepareContacts stageCount += activeColorCount; + // b2_stagePrepareJoints + stageCount += 1; // b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions stageCount += 1 + activeColorCount + 1; // b2_stageFinalizePositions @@ -1134,14 +1136,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage->completionCount = 0; stage += 1; - // Prepare joints - stage->type = b2_stagePrepareJoints; - stage->blocks = NULL; - stage->blockCount = 0; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - // Prepare constraints for (int32_t i = 0; i < activeColorCount; ++i) { @@ -1153,6 +1147,14 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage += 1; } + // Prepare joints + stage->type = b2_stagePrepareJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + // Solve joints stage->type = b2_stageSolveJoints; stage->blocks = NULL; @@ -1374,25 +1376,42 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) activeColorCount = c; b2ContactConstraintAVX* constraints = - b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "constraint"); + b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "contact constraint"); - int32_t base = 0; + int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); + int32_t base = 0; for (int32_t i = 0; i < activeColorCount; ++i) { int32_t j = activeColorIndices[i]; - colors[j].contactConstraintAVXs = constraints + base; - base += colorConstraintCounts[j]; - } + b2GraphColor* color = colors + j; - int32_t storeBlockSize = 1 << 4; - int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 4) + 1 : 0; - if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) - { - storeBlockSize = constraintCount / (blocksPerWorker * workerCount); - storeBlockCount = blocksPerWorker * workerCount; + color->contactConstraintAVXs = constraints + base; + + int32_t colorContactCount = b2Array(color->contactArray).count; + for (int32_t k = 0; k < colorContactCount; ++k) + { + contactIndices[8 * base + k] = color->contactArray[k]; + } + + // remainder + int32_t colorConstraintCount = colorConstraintCounts[i]; + for (int32_t k = colorContactCount; k < 8 * colorConstraintCount; ++k) + { + contactIndices[8 * base + k] = B2_NULL_INDEX; + } + + base += colorConstraintCount; } + int32_t storeBlockSize = 1; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 3) + 1 : 0; + //if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) + //{ + // storeBlockSize = constraintCount / (blocksPerWorker * workerCount); + // storeBlockCount = blocksPerWorker * workerCount; + //} + /* b2_stageIntegrateVelocities = 0, b2_stagePrepareContacts, @@ -1594,6 +1613,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) context.stepContext = stepContext; context.constraints = NULL; context.constraintAVXs = constraints; + context.contactIndices = contactIndices; context.activeColorCount = activeColorCount; context.velocityIterations = velIters; context.calmIterations = stepContext->positionIterations; @@ -1622,6 +1642,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, graphBlocks); b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); + b2FreeStackItem(world->stackAllocator, contactIndices); b2FreeStackItem(world->stackAllocator, constraints); b2FreeStackItem(world->stackAllocator, bodyToSolverMap); b2FreeStackItem(world->stackAllocator, solverToBodyMap); @@ -1630,4 +1651,3 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) } #endif - diff --git a/src/stack_allocator.c b/src/stack_allocator.c index a8412651..0e0e3a3f 100644 --- a/src/stack_allocator.c +++ b/src/stack_allocator.c @@ -56,23 +56,29 @@ void b2DestroyStackAllocator(b2StackAllocator* allocator) void* b2AllocateStackItem(b2StackAllocator* alloc, int32_t size, const char* name) { + int32_t size32 = ((size - 1) | 0x1F) + 1; + b2StackEntry entry; - entry.size = size; + entry.size = size32; entry.name = name; - if (alloc->index + size > alloc->capacity) + if (alloc->index + size32 > alloc->capacity) { // fall back to the heap (undesirable) - entry.data = (char*)b2Alloc(size); + entry.data = (char*)b2Alloc(size32); entry.usedMalloc = true; + + B2_ASSERT(((uintptr_t)entry.data & 0x1F) == 0); } else { entry.data = alloc->data + alloc->index; entry.usedMalloc = false; - alloc->index += size; + alloc->index += size32; + + B2_ASSERT(((uintptr_t)entry.data & 0x1F) == 0); } - alloc->allocation += size; + alloc->allocation += size32; if (alloc->allocation > alloc->maxAllocation) { alloc->maxAllocation = alloc->allocation; From d6fb34a7d925d3eee25c3f59d4741f483dc0a300 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 6 Oct 2023 22:31:41 -0700 Subject: [PATCH 34/51] fixes --- samples/collection/benchmark_pyramid.cpp | 4 ++-- src/contact_solver.c | 2 +- src/graph.c | 20 ++++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index 504f3c29..fa01a9cf 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -22,8 +22,8 @@ class BenchmarkPyramid : public Sample m_extent = 0.5f; m_round = 0.0f; m_baseCount = 10; - m_rowCount = g_sampleDebug ? 1 : 14; - m_columnCount = g_sampleDebug ? 1 : 13; + m_rowCount = g_sampleDebug ? 4 : 14; + m_columnCount = g_sampleDebug ? 4 : 13; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; diff --git a/src/contact_solver.c b/src/contact_solver.c index cc84cbb9..d0f4ef89 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -445,7 +445,7 @@ void b2WarmStartContactConstraints(int32_t startIndex, int32_t endIndex, b2Solve b2ScatterBodies(bodies, c->indexB, &bB); } - b2TracyCZoneEnd(prepare_contact); + b2TracyCZoneEnd(warm_start_contact); } static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) diff --git a/src/graph.c b/src/graph.c index df67c889..621c9c6e 100644 --- a/src/graph.c +++ b/src/graph.c @@ -1362,11 +1362,11 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t avxCount = ((count - 1) >> 3) + 1; activeColorIndices[c] = i; colorConstraintCounts[c] = avxCount; - int32_t blockCount = avxCount; - // TODO_ERIN work stealing unit is 1 AVX constraint - colorBlockSize[c] = 1; + int32_t blockSize = 4; + int32_t blockCount = ((avxCount - 1) >> 2) + 1; + colorBlockSize[c] = blockSize; colorBlockCounts[c] = blockCount; graphBlockCount += blockCount; constraintCount += avxCount; @@ -1404,13 +1404,13 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) base += colorConstraintCount; } - int32_t storeBlockSize = 1; - int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 3) + 1 : 0; - //if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) - //{ - // storeBlockSize = constraintCount / (blocksPerWorker * workerCount); - // storeBlockCount = blocksPerWorker * workerCount; - //} + int32_t storeBlockSize = 4; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 2) + 1 : 0; + if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) + { + storeBlockSize = constraintCount / (blocksPerWorker * workerCount); + storeBlockCount = blocksPerWorker * workerCount; + } /* b2_stageIntegrateVelocities = 0, From ae8a007343a9b2db535f7483bb0b0ce6fb8cef98 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Thu, 12 Oct 2023 23:01:14 -0700 Subject: [PATCH 35/51] island sleeping wip --- src/bitset.c | 2 +- src/graph.h | 9 +++++++++ src/island.c | 14 ++++++++++++++ src/world.c | 36 +++++++++++++++++++++++++++++++++++- src/world.h | 2 ++ 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/bitset.c b/src/bitset.c index fbecc26d..ddb87847 100644 --- a/src/bitset.c +++ b/src/bitset.c @@ -57,7 +57,7 @@ void b2GrowBitSet(b2BitSet* bitSet, uint32_t wordCount) bitSet->wordCount = wordCount; } -void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB) +void b2InPlaceUnion(b2BitSet* restrict setA, const b2BitSet* restrict setB) { B2_ASSERT(setA->wordCount == setB->wordCount); uint32_t wordCount = setA->wordCount; diff --git a/src/graph.h b/src/graph.h index a8f7d9a8..ac5feb19 100644 --- a/src/graph.h +++ b/src/graph.h @@ -29,6 +29,15 @@ typedef struct b2GraphColor b2ContactConstraintAVX* contactConstraintAVXs; } b2GraphColor; +// This holds constraints that cannot fit the graph color limit. This happens when a single dynamic body +// is touching many other bodies. +typedef struct +{ + int32_t* contactArray; + int32_t* jointArray; + b2ContactConstraint* contactConstraints; +} b2GraphOverflow; + typedef struct b2Graph { b2GraphColor colors[b2_graphColorCount]; diff --git a/src/island.c b/src/island.c index d4bbb5ff..f7f00c14 100644 --- a/src/island.c +++ b/src/island.c @@ -184,6 +184,8 @@ void b2WakeIsland(b2Island* island) island->awakeIndex = b2Array(world->awakeIslandArray).count; b2Array_Push(world->awakeIslandArray, island->object.index); + + // TODO_ISLAND add constraints to graph } #if B2_GRAPH_COLOR == 1 @@ -912,6 +914,18 @@ static void b2SplitIsland(b2Island* baseIsland) b2TracyCZoneEnd(split); } +void b2CompleteIsland(b2Island* island) +{ + b2World* world = island->world; + + // Wake island + if (island->awakeIndex != B2_NULL_INDEX) + { + island->awakeIndex = B2_NULL_INDEX; + b2WakeIsland(island); + } +} + // This island was just created through splitting. Handle single thread work. void b2CompleteSplitIsland(b2Island* island) { diff --git a/src/world.c b/src/world.c index 486abd00..1da72c54 100644 --- a/src/world.c +++ b/src/world.c @@ -164,6 +164,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->taskContextArray[i].contactStateBitSet = b2CreateBitSet(def->contactCapacity); world->taskContextArray[i].awakeContactBitSet = b2CreateBitSet(def->contactCapacity); world->taskContextArray[i].shapeBitSet = b2CreateBitSet(def->shapeCapacity); + world->taskContextArray[i].awakeIslandBitSet = b2CreateBitSet(256); } return id; @@ -178,6 +179,7 @@ void b2DestroyWorld(b2WorldId id) b2DestroyBitSet(&world->taskContextArray[i].contactStateBitSet); b2DestroyBitSet(&world->taskContextArray[i].awakeContactBitSet); b2DestroyBitSet(&world->taskContextArray[i].shapeBitSet); + b2DestroyBitSet(&world->taskContextArray[i].awakeIslandBitSet); } b2DestroyArray(world->taskContextArray, sizeof(b2TaskContext)); @@ -916,7 +918,7 @@ static void b2Solve(b2World* world, b2StepContext* context) } #endif -// Graph coloring experiment +// Solve with graph coloring static void b2Solve(b2World* world, b2StepContext* context) { b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); @@ -928,10 +930,12 @@ static void b2Solve(b2World* world, b2StepContext* context) // Prepare contact and shape bit-sets int32_t contactCapacity = world->contactPool.capacity; int32_t shapeCapacity = world->shapePool.capacity; + int32_t islandCapacity = world->islandPool.capacity; for (uint32_t i = 0; i < world->workerCount; ++i) { b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); } b2MergeAwakeIslands(world); @@ -997,6 +1001,36 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(enlarge_proxies); + b2TracyCZoneNC(awake_islands, "Awake Islands", b2_colorGainsboro, true); + { + b2BitSet* bitSet = &world->taskContextArray[0].awakeIslandBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) + { + b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeIslandBitSet); + } + + int32_t count = b2Array(world->awakeIslandArray).count; + for (int32_t i = 0; i < count; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + if (b2GetBit(bitSet, islandIndex) == true) + { + continue; + } + + // TODO_ISLAND + // Put island to sleep. Remove edges from graph. + } + + // TODO_ISLAND + // Clear awake island array + // Use bitSet to build awake island array. No need to add edges. + } + + for (int32_t i = 0; i ) + + b2TracyCZoneEnd(awake_islands); + b2TracyCZoneNC(awake_contacts, "Awake Contacts", b2_colorYellowGreen, true); // Build awake contact array diff --git a/src/world.h b/src/world.h index c968a272..ba737925 100644 --- a/src/world.h +++ b/src/world.h @@ -29,6 +29,8 @@ typedef struct b2TaskContext // Used to sort shapes that have enlarged AABBs b2BitSet shapeBitSet; + // Used to wake islands + b2BitSet awakeIslandBitSet; } b2TaskContext; /// The world class manages all physics entities, dynamic simulation, From f34c0f5d0cba9c7463d098b92f90db82e65b5c76 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 13 Oct 2023 23:05:45 -0700 Subject: [PATCH 36/51] island sleeping --- src/contact.c | 6 +- src/contact.h | 7 +- src/contact_solver.c | 31 ++++-- src/graph.c | 257 +++++++++++++++++++++++++++++++++++-------- src/graph.h | 6 +- src/island.c | 93 +++++----------- src/island.h | 1 - src/joint.c | 7 ++ src/joint.h | 6 + src/solver_data.h | 2 +- src/world.c | 53 ++++++++- 11 files changed, 329 insertions(+), 140 deletions(-) diff --git a/src/contact.c b/src/contact.c index bc46e711..8e98654a 100644 --- a/src/contact.c +++ b/src/contact.c @@ -197,7 +197,7 @@ void b2CreateContact(b2World* world, b2Shape* shapeA, b2Shape* shapeB) contact->islandIndex = B2_NULL_INDEX; contact->islandPrev = B2_NULL_INDEX; contact->islandNext = B2_NULL_INDEX; - contact->colorContactIndex = B2_NULL_INDEX; + contact->colorSubIndex = B2_NULL_INDEX; contact->colorIndex = B2_NULL_INDEX; b2Body* bodyA = world->bodies + shapeA->bodyIndex; @@ -443,10 +443,6 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body { // TODO_ERIN this call assumes thread safety int32_t colorIndex = contact->colorIndex; - if (contact->flags & b2_contactStatic) - { - colorIndex += 8; - } bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, colorIndex, world->preSolveContext); if (collide == false) { diff --git a/src/contact.h b/src/contact.h index e9d88a44..7ffff896 100644 --- a/src/contact.h +++ b/src/contact.h @@ -52,8 +52,6 @@ enum b2ContactFlags b2_contactStoppedTouching = 0x00000080, b2_contactIslandFlag = 0x00000100, - - b2_contactStatic = 0x00000200 }; /// The class manages contact between two shapes. A contact exists for each overlapping @@ -68,10 +66,11 @@ typedef struct b2Contact // This is too hot and has been moved to a separate array //int32_t awakeIndex; + // The color of this constraint in the graph coloring int32_t colorIndex; - // For fast removal from graph color - int32_t colorContactIndex; + // Index of contact within color + int32_t colorSubIndex; b2ContactEdge edges[2]; diff --git a/src/contact_solver.c b/src/contact_solver.c index d0f4ef89..1a0766a8 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -752,14 +752,23 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol b2SimdBody bA = b2GatherBodies(bodies, c->indexA); b2SimdBody bB = b2GatherBodies(bodies, c->indexB); - __m256 useBiasMul = useBias ? _mm256_set1_ps(1.0f) : _mm256_setzero_ps(); + __m256 biasCoeff, massCoeff, impulseCoeff; + if (useBias) + { + biasCoeff = c->biasCoefficient; + massCoeff = c->massCoefficient; + impulseCoeff = c->impulseCoefficient; + } + else + { + biasCoeff = _mm256_setzero_ps(); + massCoeff = _mm256_set1_ps(1.0f); + impulseCoeff = _mm256_setzero_ps(); + } + __m256 invDtMul = _mm256_set1_ps(inv_dt); __m256 minBiasVel = _mm256_set1_ps(-maxBaumgarteVelocity); - // float biasCoefficient = constraint->biasCoefficient; - // float massCoefficient = constraint->massCoefficient; - // float impulseCoefficient = constraint->impulseCoefficient; - // first point non-penetration constraint { // Compute change in separation (small angle approximation of sin(angle) == angle) @@ -771,8 +780,8 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); __m256 specBias = mul(s, invDtMul); - __m256 softBias = _mm256_max_ps(mul(c->biasCoefficient, s), minBiasVel); - __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); + __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(softBias, specBias, test); // Relative velocity at contact __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); @@ -780,7 +789,7 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); // Compute normal impulse - __m256 negImpulse = add(mul(c->normalMass1, mul(c->massCoefficient, add(vn, bias))), mul(c->impulseCoefficient, c->normalImpulse1)); + __m256 negImpulse = add(mul(c->normalMass1, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse1)); // float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; // Clamp the accumulated impulse @@ -812,8 +821,8 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); __m256 specBias = mul(s, invDtMul); - __m256 softBias = _mm256_max_ps(mul(c->biasCoefficient, s), minBiasVel); - __m256 bias = _mm256_blendv_ps(specBias, mul(softBias, useBiasMul), test); + __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(softBias, specBias, test); // Relative velocity at contact __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); @@ -821,7 +830,7 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); // Compute normal impulse - __m256 negImpulse = add(mul(c->normalMass2, mul(c->massCoefficient, add(vn, bias))), mul(c->impulseCoefficient, c->normalImpulse2)); + __m256 negImpulse = add(mul(c->normalMass2, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse2)); // Clamp the accumulated impulse __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); diff --git a/src/graph.c b/src/graph.c index 621c9c6e..7745d738 100644 --- a/src/graph.c +++ b/src/graph.c @@ -29,10 +29,11 @@ typedef struct b2WorkerContext int32_t workerIndex; } b2WorkerContext; -void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity) +void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity) { bodyCapacity = B2_MAX(bodyCapacity, 8); contactCapacity = B2_MAX(contactCapacity, 8); + jointCapacity = B2_MAX(jointCapacity, 8); for (int32_t i = 0; i < b2_graphColorCount; ++i) { @@ -41,6 +42,7 @@ void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity b2SetBitCountAndClear(&color->bodySet, bodyCapacity); color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); + color->jointArray = b2CreateArray(sizeof(int32_t), jointCapacity); } } @@ -51,13 +53,14 @@ void b2DestroyGraph(b2Graph* graph) b2GraphColor* color = graph->colors + i; b2DestroyBitSet(&color->bodySet); b2DestroyArray(color->contactArray, sizeof(int32_t)); + b2DestroyArray(color->jointArray, sizeof(int32_t)); } } void b2AddContactToGraph(b2World* world, b2Contact* contact) { - B2_ASSERT(contact->colorContactIndex == B2_NULL_INDEX); B2_ASSERT(contact->colorIndex == B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex == B2_NULL_INDEX); b2Graph* graph = &world->graph; @@ -80,10 +83,9 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) b2SetBitGrow(&color->bodySet, bodyIndexA); b2SetBitGrow(&color->bodySet, bodyIndexB); - contact->colorContactIndex = b2Array(color->contactArray).count; + contact->colorSubIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); contact->colorIndex = i; - contact->flags &= ~b2_contactStatic; break; } } @@ -100,7 +102,7 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) b2SetBitGrow(&color->bodySet, bodyIndexA); - contact->colorContactIndex = b2Array(color->contactArray).count; + contact->colorSubIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); contact->colorIndex = i; break; @@ -119,20 +121,20 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) b2SetBitGrow(&color->bodySet, bodyIndexB); - contact->colorContactIndex = b2Array(color->contactArray).count; + contact->colorSubIndex = b2Array(color->contactArray).count; b2Array_Push(color->contactArray, contact->object.index); contact->colorIndex = i; break; } } - B2_ASSERT(contact->colorIndex != B2_NULL_INDEX && contact->colorContactIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX && contact->colorSubIndex != B2_NULL_INDEX); } void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) { B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); - B2_ASSERT(contact->colorContactIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex != B2_NULL_INDEX); b2Graph* graph = &world->graph; @@ -148,13 +150,13 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) b2GraphColor* color = graph->colors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(color->contactArray, colorSubIndex); + if (colorSubIndex < b2Array(color->contactArray).count) { // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + int32_t swappedIndex = color->contactArray[colorSubIndex]; + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; } b2ClearBit(&color->bodySet, bodyIndexA); @@ -165,13 +167,13 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) b2GraphColor* color = graph->colors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(color->contactArray, colorSubIndex); + if (colorSubIndex < b2Array(color->contactArray).count) { // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + int32_t swappedIndex = color->contactArray[colorSubIndex]; + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; } b2ClearBit(&color->bodySet, bodyIndexA); @@ -181,21 +183,160 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) b2GraphColor* color = graph->colors + contact->colorIndex; B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); - int32_t colorContactIndex = contact->colorContactIndex; - b2Array_RemoveSwap(color->contactArray, colorContactIndex); - if (colorContactIndex < b2Array(color->contactArray).count) + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(color->contactArray, colorSubIndex); + if (colorSubIndex < b2Array(color->contactArray).count) { // Fix index on swapped contact - int32_t swappedContactIndex = color->contactArray[colorContactIndex]; - world->contacts[swappedContactIndex].colorContactIndex = colorContactIndex; + int32_t swappedIndex = color->contactArray[colorSubIndex]; + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; } b2ClearBit(&color->bodySet, bodyIndexB); } contact->colorIndex = B2_NULL_INDEX; - contact->colorContactIndex = B2_NULL_INDEX; - contact->flags &= ~b2_contactStatic; + contact->colorSubIndex = B2_NULL_INDEX; +} + +void b2AddJointToGraph(b2World* world, b2Joint* joint) +{ + B2_ASSERT(joint->colorIndex == B2_NULL_INDEX); + B2_ASSERT(joint->colorSubIndex == B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + int32_t bodyIndexA = joint->edges[0].bodyIndex; + int32_t bodyIndexB = joint->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA) || b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexA); + b2SetBitGrow(&color->bodySet, bodyIndexB); + + joint->colorSubIndex = b2Array(color->jointArray).count; + b2Array_Push(color->jointArray, joint->object.index); + joint->colorIndex = i; + break; + } + } + else if (typeA == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexA); + + joint->colorSubIndex = b2Array(color->jointArray).count; + b2Array_Push(color->jointArray, joint->object.index); + joint->colorIndex = i; + break; + } + } + else if (typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexB); + + joint->colorSubIndex = b2Array(color->jointArray).count; + b2Array_Push(color->jointArray, joint->object.index); + joint->colorIndex = i; + break; + } + } + + B2_ASSERT(joint->colorIndex != B2_NULL_INDEX && joint->colorSubIndex != B2_NULL_INDEX); +} + +void b2RemoveJointFromGraph(b2World* world, b2Joint* joint) +{ + B2_ASSERT(joint->colorIndex != B2_NULL_INDEX); + B2_ASSERT(joint->colorSubIndex != B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + B2_ASSERT(0 <= joint->colorIndex && joint->colorIndex < b2_graphColorCount); + int32_t bodyIndexA = joint->edges[0].bodyIndex; + int32_t bodyIndexB = joint->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + joint->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorSubIndex = joint->colorSubIndex; + b2Array_RemoveSwap(color->jointArray, colorSubIndex); + if (colorSubIndex < b2Array(color->jointArray).count) + { + // Fix index on swapped joint + int32_t swappedIndex = color->jointArray[colorSubIndex]; + world->joints[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + b2ClearBit(&color->bodySet, bodyIndexB); + } + else if (typeA == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + joint->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + + int32_t colorSubIndex = joint->colorSubIndex; + b2Array_RemoveSwap(color->jointArray, colorSubIndex); + if (colorSubIndex < b2Array(color->jointArray).count) + { + // Fix index on swapped joint + int32_t swappedIndex = color->jointArray[colorSubIndex]; + world->joints[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + } + else if (typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + joint->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorSubIndex = joint->colorSubIndex; + b2Array_RemoveSwap(color->jointArray, colorSubIndex); + if (colorSubIndex < b2Array(color->jointArray).count) + { + // Fix index on swapped joint + int32_t swappedIndex = color->jointArray[colorSubIndex]; + world->joints[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexB); + } + + joint->colorIndex = B2_NULL_INDEX; + joint->colorSubIndex = B2_NULL_INDEX; } static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) @@ -310,19 +451,22 @@ static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2Sol b2TracyCZoneEnd(integrate_positions); } -static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t workerIndex) +static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t workerIndex) { b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); b2World* world = context->world; + bool enableSleep = world->enableSleep; b2Body* bodies = world->bodies; const b2SolverBody* solverBodies = context->solverBodies; b2Contact* contacts = world->contacts; const int32_t* solverToBodyMap = context->solverToBodyMap; const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; + float timeStep = context->timeStep; b2BitSet* awakeContactBitSet = &world->taskContextArray[workerIndex].awakeContactBitSet; b2BitSet* shapeBitSet = &world->taskContextArray[workerIndex].shapeBitSet; + b2BitSet* awakeIslandBitSet = &world->taskContextArray[workerIndex].awakeIslandBitSet; B2_ASSERT(startIndex <= endIndex); B2_ASSERT(startIndex <= world->bodyPool.capacity); @@ -345,6 +489,28 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv body->force = b2Vec2_zero; body->torque = 0.0f; + // Update sleep + const float linTolSqr = b2_linearSleepTolerance * b2_linearSleepTolerance; + const float angTolSqr = b2_angularSleepTolerance * b2_angularSleepTolerance; + + if (enableSleep == false || body->enableSleep == false || + body->angularVelocity * body->angularVelocity > angTolSqr || + b2Dot(body->linearVelocity, body->linearVelocity) > linTolSqr) + { + body->sleepTime = 0.0f; + } + else + { + body->sleepTime += timeStep; + } + + // Any single body in an island can keep it awake + if (body->sleepTime < b2_timeToSleep) + { + B2_ASSERT(0 <= body->islandIndex && body->islandIndex < world->islandPool.capacity); + b2SetBit(awakeIslandBitSet, body->islandIndex); + } + // Update shapes AABBs int32_t shapeIndex = body->shapeList; while (shapeIndex != B2_NULL_INDEX) @@ -367,7 +533,6 @@ static void b2FinalizePositionsTask(int32_t startIndex, int32_t endIndex, b2Solv shapeIndex = shape->nextShapeIndex; } - // TODO_ERIN legacy int32_t contactKey = body->contactList; while (contactKey != B2_NULL_INDEX) { @@ -451,14 +616,14 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i b2IntegratePositionsTask(startIndex, endIndex, context); break; - case b2_stageFinalizePositions: - b2FinalizePositionsTask(startIndex, endIndex, context, workerIndex); - break; - case b2_stageCalmContacts: b2SolveContactAVXsTask(startIndex, endIndex, context, stage->colorIndex, false); break; + case b2_stageFinalizeBodies: + b2FinalizeBodiesTask(startIndex, endIndex, context, workerIndex); + break; + case b2_stageStoreImpulses: b2StoreImpulsesTaskAVX(startIndex, endIndex, context); break; @@ -604,9 +769,9 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, - b2_stageFinalizePositions, b2_stageCalmJoints, b2_stageCalmContacts, + b2_stageFinalizeBodies, b2_stageStoreImpulses */ @@ -766,9 +931,9 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, - b2_stageFinalizePositions, b2_stageCalmJoints, b2_stageCalmContacts, + b2_stageFinalizeBodies, b2_stageStoreImpulses */ @@ -830,11 +995,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont stageIndex += 1 + activeColorCount + 1; - syncBits = (bodySyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageFinalizePositions); - b2ExecuteMainStage(stages + stageIndex, context, syncBits); - stageIndex += 1; - int32_t calmIterations = context->calmIterations; for (int32_t i = 0; i < calmIterations; ++i) { @@ -857,6 +1017,11 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont stageIndex += 1 + activeColorCount; + syncBits = (bodySyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageFinalizeBodies); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + syncBits = (constraintSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); b2ExecuteMainStage(stages + stageIndex, context, syncBits); @@ -1557,14 +1722,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage->completionCount = 0; stage += 1; - // Finalize positions - stage->type = b2_stageFinalizePositions; - stage->blocks = bodyBlocks; - stage->blockCount = bodyBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - // Calm joints stage->type = b2_stageCalmJoints; stage->blocks = NULL; @@ -1584,6 +1741,14 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage += 1; } + // Finalize bodies + stage->type = b2_stageFinalizeBodies; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + // Store impulses stage->type = b2_stageStoreImpulses; stage->blocks = storeBlocks; diff --git a/src/graph.h b/src/graph.h index ac5feb19..c3d2181f 100644 --- a/src/graph.h +++ b/src/graph.h @@ -44,13 +44,13 @@ typedef struct b2Graph int32_t colorCount; } b2Graph; -void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity); +void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity); void b2DestroyGraph(b2Graph* graph); void b2AddContactToGraph(b2World* world, b2Contact* contact); void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); -void b2AddJointToGraph(b2World* world, b2Joint* contact); -void b2RemoveJointFromGraph(b2World* world, b2Joint* contact); +void b2AddJointToGraph(b2World* world, b2Joint* joint); +void b2RemoveJointFromGraph(b2World* world, b2Joint* joint); void b2SolveGraph(b2World* world, b2StepContext* stepContext); diff --git a/src/island.c b/src/island.c index f7f00c14..b7b17ca5 100644 --- a/src/island.c +++ b/src/island.c @@ -178,33 +178,46 @@ void b2WakeIsland(b2Island* island) if (island->awakeIndex != B2_NULL_INDEX) { + // already awake B2_ASSERT(world->awakeIslandArray[island->awakeIndex] == island->object.index); return; } + int32_t islandIndex = island->object.index; island->awakeIndex = b2Array(world->awakeIslandArray).count; - b2Array_Push(world->awakeIslandArray, island->object.index); - - // TODO_ISLAND add constraints to graph -} - -#if B2_GRAPH_COLOR == 1 + b2Array_Push(world->awakeIslandArray, islandIndex); -void b2LinkContact(b2World* world, b2Contact* contact) -{ - B2_MAYBE_UNUSED(world); - B2_MAYBE_UNUSED(contact); -} + // Reset sleep timers on bodies + // TODO_ERIN make this parallel somehow? + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = world->bodies + bodyIndex; + B2_ASSERT(body->islandIndex == islandIndex); + body->sleepTime = 0.0f; + bodyIndex = body->islandNext; + } -void b2UnlinkContact(b2World* world, b2Contact* contact) -{ - B2_MAYBE_UNUSED(world); - B2_MAYBE_UNUSED(contact); + // Add constraints to graph + int32_t contactIndex = island->headContact; + while (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = world->contacts + contactIndex; + B2_ASSERT(contact->islandIndex == islandIndex); + b2AddContactToGraph(world, contact); + contactIndex = contact->islandNext; + } + int32_t jointIndex = island->headJoint; + while (jointIndex != B2_NULL_INDEX) + { + b2Joint* joint = world->joints + jointIndex; + B2_ASSERT(joint->islandIndex == islandIndex); + b2AddJointToGraph(world, joint); + jointIndex = joint->islandNext; + } } -#else - // https://en.wikipedia.org/wiki/Disjoint-set_data_structure void b2LinkContact(b2World* world, b2Contact* contact) { @@ -329,8 +342,6 @@ void b2UnlinkContact(b2World* world, b2Contact* contact) contact->islandNext = B2_NULL_INDEX; } -#endif - static void b2AddJointToIsland(b2World* world, b2Island* island, b2Joint* joint) { B2_ASSERT(joint->islandIndex == B2_NULL_INDEX); @@ -660,13 +671,6 @@ void b2MergeAwakeIslands(b2World* world) world->islands = (b2Island*)world->islandPool.memory; } -static int b2CompareIslands(const void* A, const void* B) -{ - const b2Island* islandA = *(const b2Island**)A; - const b2Island* islandB = *(const b2Island**)B; - return islandB->bodyCount - islandA->bodyCount; -} - #define B2_CONTACT_REMOVE_THRESHOLD 1 // Split an island because some contacts and/or joints have been removed @@ -914,46 +918,9 @@ static void b2SplitIsland(b2Island* baseIsland) b2TracyCZoneEnd(split); } -void b2CompleteIsland(b2Island* island) -{ - b2World* world = island->world; - - // Wake island - if (island->awakeIndex != B2_NULL_INDEX) - { - island->awakeIndex = B2_NULL_INDEX; - b2WakeIsland(island); - } -} - // This island was just created through splitting. Handle single thread work. void b2CompleteSplitIsland(b2Island* island) { -// Report impulses -#if 0 - b2World* world = island->world; - b2PostSolveFcn* postSolveFcn = island->world->postSolveFcn; - if (postSolveFcn != NULL) - { - b2Contact* contacts = world->contacts; - int16_t worldIndex = world->index; - const b2Shape* shapes = world->shapes; - - int32_t contactIndex = island->headContact; - while (contactIndex != B2_NULL_INDEX) - { - const b2Contact* contact = contacts + contactIndex; - - const b2Shape* shapeA = shapes + contact->shapeIndexA; - const b2Shape* shapeB = shapes + contact->shapeIndexB; - - b2ShapeId idA = {shapeA->object.index, worldIndex, shapeA->object.revision}; - b2ShapeId idB = {shapeB->object.index, worldIndex, shapeB->object.revision}; - postSolveFcn(idA, idB, &contact->manifold, world->postSolveContext); - } - } -#endif - // Split islands are kept awake as part of the splitting process. They can // fall asleep the next time step. island->awakeIndex = B2_NULL_INDEX; diff --git a/src/island.h b/src/island.h index 571e1669..29e10077 100644 --- a/src/island.h +++ b/src/island.h @@ -81,7 +81,6 @@ void b2PrepareIsland(b2Island* island, b2StepContext* stepContext); void b2SolveIsland(b2Island* island, uint32_t threadIndex); -void b2CompleteIsland(b2Island* island); void b2CompleteBaseSplitIsland(b2Island* island); void b2CompleteSplitIsland(b2Island* island); diff --git a/src/joint.c b/src/joint.c index c8f1c96f..af0a1749 100644 --- a/src/joint.c +++ b/src/joint.c @@ -116,6 +116,8 @@ static b2Joint* b2CreateJoint(b2World* world, b2Body* bodyA, b2Body* bodyB) joint->islandIndex = B2_NULL_INDEX; joint->islandPrev = B2_NULL_INDEX; joint->islandNext = B2_NULL_INDEX; + joint->colorIndex = B2_NULL_INDEX; + joint->colorSubIndex = B2_NULL_INDEX; joint->isMarked = false; @@ -123,6 +125,11 @@ static b2Joint* b2CreateJoint(b2World* world, b2Body* bodyA, b2Body* bodyB) { // Add edge to island graph b2LinkJoint(world, joint); + + if (b2IsBodyAwake(world, bodyA) || b2IsBodyAwake(world, bodyB)) + { + b2AddJointToGraph(world, joint); + } } return joint; diff --git a/src/joint.h b/src/joint.h index 6a8073af..d21c5514 100644 --- a/src/joint.h +++ b/src/joint.h @@ -129,6 +129,12 @@ typedef struct b2Joint int32_t islandPrev; int32_t islandNext; + // The color of this constraint in the graph coloring + int32_t colorIndex; + + // Index of joint within color + int32_t colorSubIndex; + b2Vec2 localAnchorA; b2Vec2 localAnchorB; diff --git a/src/solver_data.h b/src/solver_data.h index bad501cf..46b17e6f 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -51,9 +51,9 @@ typedef enum b2SolverStageType b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions, - b2_stageFinalizePositions, b2_stageCalmJoints, b2_stageCalmContacts, + b2_stageFinalizeBodies, b2_stageStoreImpulses } b2SolverStageType; diff --git a/src/world.c b/src/world.c index 1da72c54..adcfdd32 100644 --- a/src/world.c +++ b/src/world.c @@ -100,7 +100,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->stackAllocator = b2CreateStackAllocator(def->stackAllocatorCapacity); b2CreateBroadPhase(&world->broadPhase); - b2CreateGraph(&world->graph, def->bodyCapacity, def->contactCapacity); + b2CreateGraph(&world->graph, def->bodyCapacity, def->contactCapacity, def->jointCapacity); // pools world->bodyPool = b2CreatePool(sizeof(b2Body), B2_MAX(def->bodyCapacity, 1)); @@ -942,6 +942,8 @@ static void b2Solve(b2World* world, b2StepContext* context) world->profile.buildIslands = 0.0f; + // TODO_ISLAND task to split island + b2TracyCZoneNC(graph_solver, "Graph", b2_colorSeaGreen, true); b2SolveGraph(world, context); @@ -1018,16 +1020,55 @@ static void b2Solve(b2World* world, b2StepContext* context) continue; } - // TODO_ISLAND - // Put island to sleep. Remove edges from graph. + // Put island to sleep + b2Island* island = world->islands + islandIndex; + island->awakeIndex = B2_NULL_INDEX; + + // Remove edges from graph + int32_t contactIndex = island->headContact; + while (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = world->contacts + contactIndex; + b2RemoveContactFromGraph(world, contact); + contactIndex = contact->islandNext; + } + + int32_t jointIndex = island->headJoint; + while (jointIndex != B2_NULL_INDEX) + { + b2Joint* joint = world->joints + jointIndex; + b2RemoveJointFromGraph(world, joint); + jointIndex = joint->islandNext; + } } - // TODO_ISLAND // Clear awake island array + b2Array_Clear(world->awakeIslandArray); + // Use bitSet to build awake island array. No need to add edges. - } + uint64_t word; + uint32_t wordCount = bitSet->wordCount; + uint64_t* bits = bitSet->bits; + int32_t awakeIndex = 0; + for (uint32_t k = 0; k < wordCount; ++k) + { + word = bits[k]; + while (word != 0) + { + uint32_t ctz = b2CTZ(word); + uint32_t islandIndex = 64 * k + ctz; + + b2Array_Push(world->awakeIslandArray, islandIndex); - for (int32_t i = 0; i ) + // Reference index. This tells the island and bodies they are awake. + world->islands[islandIndex].awakeIndex = awakeIndex; + awakeIndex += 1; + + // Clear the smallest set bit + word = word & (word - 1); + } + } + } b2TracyCZoneEnd(awake_islands); From 8aab30e841954545634d88adc933cd8d25a75e6b Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 14 Oct 2023 15:26:23 -0700 Subject: [PATCH 37/51] wip --- src/contact_solver.c | 203 +++++++++++-- src/contact_solver.h | 23 +- src/graph.c | 684 +++++++------------------------------------ src/graph.h | 6 +- 4 files changed, 293 insertions(+), 623 deletions(-) diff --git a/src/contact_solver.c b/src/contact_solver.c index 1a0766a8..d8260e7a 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -11,8 +11,10 @@ #include "world.h" #include -// or superset -// #include + +// Soft constraints with constraint error substepping. Includes a bias removal stage to help remove excess energy. +// http://mmacklin.com/smallsteps.pdf +// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf #define maxBaumgarteVelocity 3.0f @@ -79,7 +81,7 @@ static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* r return simdBody; } -// This writes everything back but only the velocities change +// This writes everything back to the solver bodies but only the velocities change static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) { _Static_assert(sizeof(b2SolverBody) == 32); @@ -101,6 +103,8 @@ static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict ind __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + // I don't use any dummy body in the body array because this will lead to multithreaded sharing and the + // associated cache flushing. if (indices[0] != B2_NULL_INDEX) _mm256_store_ps((float*)(bodies + indices[0]), _mm256_permute2f128_ps(tt0, tt4, 0x20)); if (indices[1] != B2_NULL_INDEX) @@ -119,20 +123,36 @@ static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict ind _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); } -// TODO_ERIN prepare contact constraints directly in collision phase? -void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +void b2PrepareContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); b2World* world = context->world; b2Graph* graph = context->graph; - b2GraphColor* color = graph->colors + colorIndex; - int32_t* contactIndices = color->contactArray; b2Contact* contacts = world->contacts; const int32_t* bodyMap = context->bodyToSolverMap; b2SolverBody* solverBodies = context->solverBodies; - // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2ContactConstraint* constraints; + int32_t* contactIndices; + + if (colorIndex == b2_overflowIndex) + { + B2_ASSERT(startIndex == 0); + B2_ASSERT(endIndex == b2Array(graph->overflow.contactArray).count); + contactIndices = graph->overflow.contactArray; + constraints = graph->overflow.contactConstraints; + } + else + { + b2GraphColor* color = graph->colors + colorIndex; + contactIndices = color->contactArray; + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + constraints = color->contactConstraints; + } + + // This is a dummy body to represent a static body because static bodies don't have a solver body. b2SolverBody dummyBody = {0}; // 30 is a bit soft, 60 oscillates too much @@ -141,10 +161,7 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon const float contactHertz = 30.0f; float h = context->timeStep; - bool enableWarmStarting = world->enableWarmStarting; - - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + //bool enableWarmStarting = world->enableWarmStarting; for (int32_t i = startIndex; i < endIndex; ++i) { @@ -158,7 +175,7 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; - b2ContactConstraint* constraint = color->contactConstraints + i; + b2ContactConstraint* constraint = constraints + i; constraint->contact = contact; constraint->indexA = indexA; constraint->indexB = indexB; @@ -216,14 +233,14 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; // Warm start - if (enableWarmStarting) - { - b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - wA -= iA * b2Cross(cp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(cp->rB, P); - vB = b2MulAdd(vB, mB, P); - } + //if (enableWarmStarting) + //{ + // b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + // wA -= iA * b2Cross(cp->rA, P); + // vA = b2MulAdd(vA, -mA, P); + // wB += iB * b2Cross(cp->rB, P); + // vB = b2MulAdd(vB, mB, P); + //} } solverBodyA->linearVelocity = vA; @@ -235,7 +252,7 @@ void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCon b2TracyCZoneEnd(prepare_contact); } -void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); @@ -401,9 +418,138 @@ void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTask b2TracyCZoneEnd(prepare_contact); } -void b2WarmStartContactConstraints(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +void b2WarmStartContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { - b2TracyCZoneNC(warm_start_contact, "Warm Start Contact", b2_colorGreen1, true); + b2TracyCZoneNC(prepare_contact, "Warm Start", b2_colorYellow, true); + + b2World* world = context->world; + b2Graph* graph = context->graph; + b2Contact* contacts = world->contacts; + const int32_t* bodyMap = context->bodyToSolverMap; + b2SolverBody* solverBodies = context->solverBodies; + + b2ContactConstraint* constraints; + int32_t* contactIndices; + + if (colorIndex == b2_overflowIndex) + { + B2_ASSERT(startIndex == 0); + B2_ASSERT(endIndex == b2Array(graph->overflow.contactArray).count); + contactIndices = graph->overflow.contactArray; + constraints = graph->overflow.contactConstraints; + } + else + { + b2GraphColor* color = graph->colors + colorIndex; + contactIndices = color->contactArray; + B2_ASSERT(startIndex <= b2Array(color->contactArray).count); + B2_ASSERT(endIndex <= b2Array(color->contactArray).count); + constraints = color->contactConstraints; + } + + // This is a dummy body to represent a static body because static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 30.0f; + + float h = context->timeStep; + // bool enableWarmStarting = world->enableWarmStarting; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + + const b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; + + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; + + b2ContactConstraint* constraint = constraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; + + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; + b2Vec2 vA = solverBodyA->linearVelocity; + float wA = solverBodyA->angularVelocity; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; + + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; + + constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; + + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float c = h * omega * (2.0f * zeta + h * omega); + constraint->impulseCoefficient = 1.0f / (1.0f + c); + constraint->massCoefficient = c * constraint->impulseCoefficient; + constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* mp = manifold->points + j; + b2ContactConstraintPoint* cp = constraint->points + j; + + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; + + cp->rA = mp->anchorA; + cp->rB = mp->anchorB; + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + cp->separation = mp->separation; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + // Warm start + // if (enableWarmStarting) + //{ + // b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + // wA -= iA * b2Cross(cp->rA, P); + // vA = b2MulAdd(vA, -mA, P); + // wB += iB * b2Cross(cp->rB, P); + // vB = b2MulAdd(vB, mB, P); + //} + } + + solverBodyA->linearVelocity = vA; + solverBodyA->angularVelocity = wA; + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; + } + + b2TracyCZoneEnd(prepare_contact); +} + +void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +{ + b2TracyCZoneNC(warm_start_contact, "Warm Start", b2_colorGreen1, true); b2SolverBody* bodies = context->solverBodies; b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; @@ -747,6 +893,7 @@ static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBod bodyB->angularVelocity = wB; } + static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, bool useBias) { b2SimdBody bA = b2GatherBodies(bodies, c->indexA); @@ -918,7 +1065,7 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol b2ScatterBodies(bodies, c->indexB, &bB); } -void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +void b2SolveContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); @@ -948,7 +1095,7 @@ void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskConte b2TracyCZoneEnd(solve_contact); } -void b2SolveContactAVXsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); @@ -965,7 +1112,7 @@ void b2SolveContactAVXsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskCo b2TracyCZoneEnd(solve_contact); } -void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +void b2StoreImpulses(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); @@ -988,7 +1135,7 @@ void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskConte b2TracyCZoneEnd(store_impulses); } -void b2StoreImpulsesTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +void b2StoreImpulsesAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); diff --git a/src/contact_solver.h b/src/contact_solver.h index dc7d7ced..606e4bb4 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -39,11 +39,13 @@ typedef struct b2ContactConstraint b2ContactConstraintType type; } b2ContactConstraint; -typedef __m256 b2Float8; +// Wide float +typedef __m256 b2FloatW; +// Wide vec2 typedef struct b2Vec2W { - b2Float8 X, Y; + b2FloatW X, Y; } b2Vec2W; typedef struct b2ContactConstraintAVX @@ -65,11 +67,14 @@ typedef struct b2ContactConstraintAVX __m256 impulseCoefficient; } b2ContactConstraintAVX; -void b2PrepareContactsTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); -void b2WarmStartContactConstraints(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); -void b2SolveContactAVXsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); -void b2StoreImpulsesTaskAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +// Scalar +void b2PrepareContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2WarmStartContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2SolveContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2StoreImpulses(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); -void b2PrepareContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); -void b2SolveContactsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); -void b2StoreImpulsesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +// AVX versions +void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2StoreImpulsesAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/graph.c b/src/graph.c index 7745d738..f54d6571 100644 --- a/src/graph.c +++ b/src/graph.c @@ -43,7 +43,14 @@ void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); color->jointArray = b2CreateArray(sizeof(int32_t), jointCapacity); + + color->contactConstraints = NULL; + color->contactConstraintAVXs = NULL; } + + graph->overflow.contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); + graph->overflow.jointArray = b2CreateArray(sizeof(int32_t), jointCapacity); + graph->overflow.contactConstraints = NULL; } void b2DestroyGraph(b2Graph* graph) @@ -55,6 +62,9 @@ void b2DestroyGraph(b2Graph* graph) b2DestroyArray(color->contactArray, sizeof(int32_t)); b2DestroyArray(color->jointArray, sizeof(int32_t)); } + + b2DestroyArray(graph->overflow.contactArray, sizeof(int32_t)); + b2DestroyArray(graph->overflow.jointArray, sizeof(int32_t)); } void b2AddContactToGraph(b2World* world, b2Contact* contact) @@ -128,7 +138,13 @@ void b2AddContactToGraph(b2World* world, b2Contact* contact) } } - B2_ASSERT(contact->colorIndex != B2_NULL_INDEX && contact->colorSubIndex != B2_NULL_INDEX); + // Overflow + if (contact->colorIndex == B2_NULL_INDEX) + { + contact->colorSubIndex = b2Array(graph->overflow.contactArray).count; + b2Array_Push(graph->overflow.contactArray, contact->object.index); + contact->colorIndex = b2_overflowIndex; + } } void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) @@ -138,6 +154,22 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) b2Graph* graph = &world->graph; + // Overflow + if (contact->colorIndex == b2_overflowIndex) + { + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(graph->overflow.contactArray, colorSubIndex); + if (colorSubIndex < b2Array(graph->overflow.contactArray).count) + { + // Fix index on swapped contact + int32_t swappedIndex = graph->overflow.contactArray[colorSubIndex]; + B2_ASSERT(world->contacts[swappedIndex].colorIndex == b2_overflowIndex); + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; + } + + return; + } + B2_ASSERT(0 <= contact->colorIndex && contact->colorIndex < b2_graphColorCount); int32_t bodyIndexA = contact->edges[0].bodyIndex; int32_t bodyIndexB = contact->edges[1].bodyIndex; @@ -268,6 +300,8 @@ void b2AddJointToGraph(b2World* world, b2Joint* joint) } } + // TODO_ERIN handle joint overflow + B2_ASSERT(joint->colorIndex != B2_NULL_INDEX && joint->colorSubIndex != B2_NULL_INDEX); } @@ -601,15 +635,15 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stagePrepareContacts: - b2PrepareContactsTaskAVX(startIndex, endIndex, context); + b2PrepareContactsAVX(startIndex, endIndex, context); break; case b2_stageWarmStartContacts: - b2WarmStartContactConstraints(startIndex, endIndex, context, stage->colorIndex); + b2WarmStartContactsAVX(startIndex, endIndex, context, stage->colorIndex); break; case b2_stageSolveContacts: - b2SolveContactAVXsTask(startIndex, endIndex, context, stage->colorIndex, true); + b2SolveContactsAVX(startIndex, endIndex, context, stage->colorIndex, true); break; case b2_stageIntegratePositions: @@ -617,7 +651,7 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stageCalmContacts: - b2SolveContactAVXsTask(startIndex, endIndex, context, stage->colorIndex, false); + b2SolveContactsAVX(startIndex, endIndex, context, stage->colorIndex, false); break; case b2_stageFinalizeBodies: @@ -625,7 +659,7 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stageStoreImpulses: - b2StoreImpulsesTaskAVX(startIndex, endIndex, context); + b2StoreImpulsesAVX(startIndex, endIndex, context); break; } } @@ -737,169 +771,6 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex } } -#if B2_AVX == 0 -// This should not use the thread index because thread 0 can be called twice by enkiTS. -void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) -{ - B2_MAYBE_UNUSED(startIndex); - B2_MAYBE_UNUSED(endIndex); - B2_MAYBE_UNUSED(threadIndexDontUse); - - b2WorkerContext* workerContext = taskContext; - int32_t workerIndex = workerContext->workerIndex; - b2SolverTaskContext* context = workerContext->context; - int32_t activeColorCount = context->activeColorCount; - b2SolverStage* stages = context->stages; - - if (workerIndex == 0) - { - // Main thread synchronizes the workers and does work itself. - // - // Stages are re-used for loops so that I don't need more stages for large iteration counts. - // The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks. - // The stage index and sync indices are combined in to sync bits for atomic synchronization. - // The workers need to compute the previous sync index for a given stage so that CAS works correctly. This - // setup makes this easy to do. - - /* - b2_stageIntegrateVelocities = 0, - b2_stagePrepareContacts, - b2_stageWarmStartContacts, - b2_stagePrepareJoints, - b2_stageSolveJoints, - b2_stageSolveContacts, - b2_stageIntegratePositions, - b2_stageCalmJoints, - b2_stageCalmContacts, - b2_stageFinalizeBodies, - b2_stageStoreImpulses - */ - - int32_t bodySyncIndex = 1; - int32_t stageIndex = 0; - uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageIntegrateVelocities); - b2ExecuteMainStage(stages + stageIndex, context, syncBits); - stageIndex += 1; - bodySyncIndex += 1; - - int32_t graphSyncIndex = 1; - for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) - { - syncBits = (graphSyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); - b2ExecuteMainStage(stages + stageIndex, context, syncBits); - stageIndex += 1; - } - graphSyncIndex += 1; - - // TODO_ERIN single threaded - B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); - b2PrepareJointsTask(context); - stageIndex += 1; - - int32_t velocityIterations = context->velocityIterations; - for (int32_t i = 0; i < velocityIterations; ++i) - { - // stage index restarted each iteration - int32_t iterStageIndex = stageIndex; - - B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveJoints); - b2SolveJointsTask(context, true); - iterStageIndex += 1; - - for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) - { - syncBits = (graphSyncIndex << 16) | iterStageIndex; - B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveContacts); - b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); - iterStageIndex += 1; - } - graphSyncIndex += 1; - - B2_ASSERT(stages[iterStageIndex].type == b2_stageIntegratePositions); - syncBits = (bodySyncIndex << 16) | iterStageIndex; - b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); - bodySyncIndex += 1; - } - - stageIndex += 1 + activeColorCount + 1; - - syncBits = (bodySyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageFinalizePositions); - b2ExecuteMainStage(stages + stageIndex, context, syncBits); - stageIndex += 1; - - int32_t calmIterations = context->calmIterations; - for (int32_t i = 0; i < calmIterations; ++i) - { - // stage index restarted each iteration - int32_t iterStageIndex = stageIndex; - - B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmJoints); - b2SolveJointsTask(context, false); - iterStageIndex += 1; - - for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) - { - syncBits = (graphSyncIndex << 16) | iterStageIndex; - B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmContacts); - b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); - iterStageIndex += 1; - } - graphSyncIndex += 1; - } - - stageIndex += 1 + activeColorCount; - - uint32_t constraintSyncIndex = 1; - syncBits = (constraintSyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); - b2ExecuteMainStage(stages + stageIndex, context, syncBits); - - // Signal workers to finish - atomic_store(&context->syncBits, UINT_MAX); - - B2_ASSERT(stageIndex + 1 == context->stageCount); - return; - } - - // Worker - uint32_t lastSyncBits = 0; - - while (true) - { - // Spin until main thread bumps changes the sync bits - uint32_t syncBits = atomic_load(&context->syncBits); - while (syncBits == lastSyncBits) - { - _mm_pause(); - syncBits = atomic_load(&context->syncBits); - } - - if (syncBits == UINT_MAX) - { - // sentinel hit - break; - } - - int32_t stageIndex = syncBits & 0xFFFF; - B2_ASSERT(stageIndex < context->stageCount); - - int32_t syncIndex = (syncBits >> 16) & 0xFFFF; - B2_ASSERT(syncIndex > 0); - - int32_t previousSyncIndex = syncIndex - 1; - - b2SolverStage* stage = stages + stageIndex; - b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, workerIndex); - - lastSyncBits = syncBits; - } -} -#else - -// AVX // This should not use the thread index because thread 0 can be called twice by enkiTS. void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) { @@ -967,6 +838,8 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2PrepareJointsTask(context); stageIndex += 1; + b2PrepareContactsTask(); + int32_t velocityIterations = context->velocityIterations; for (int32_t i = 0; i < velocityIterations; ++i) { @@ -1066,7 +939,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont lastSyncBits = syncBits; } } -#endif // Threading: // 1. build array of awake bodies, maybe copy to contiguous array @@ -1081,13 +953,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont // End loop // 7. parallel-for store impulses // 8. parallel-for update aabbs, build proxy update set, build awake contact set - -// Soft constraints with constraint error substepping. Allows for stiffer contacts with a small performance hit. Includes a -// bias removal stage to help remove excess bias energy. -// http://mmacklin.com/smallsteps.pdf -// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf - -#if B2_AVX == 0 void b2SolveGraph(b2World* world, b2StepContext* stepContext) { b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); @@ -1163,7 +1028,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t colorBlockSize[b2_graphColorCount]; int32_t colorBlockCounts[b2_graphColorCount]; - int32_t graphBlockSize = 1 << 2; int32_t activeColorCount = 0; int32_t graphBlockCount = 0; int32_t constraintCount = 0; @@ -1174,44 +1038,63 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t count = b2Array(colors[i].contactArray).count; if (count > 0) { + int32_t avxCount = ((count - 1) >> 3) + 1; activeColorIndices[c] = i; - colorConstraintCounts[c] = count; - int32_t blockCount = ((count - 1) >> 2) + 1; - colorBlockSize[c] = graphBlockSize; - if (count > blocksPerWorker * graphBlockSize * workerCount) - { - colorBlockSize[c] = count / (blocksPerWorker * workerCount); - blockCount = blocksPerWorker * workerCount; - } + colorConstraintCounts[c] = avxCount; + + int32_t blockSize = 4; + int32_t blockCount = ((avxCount - 1) >> 2) + 1; + colorBlockSize[c] = blockSize; colorBlockCounts[c] = blockCount; graphBlockCount += blockCount; - constraintCount += count; + constraintCount += avxCount; c += 1; } } activeColorCount = c; - b2ContactConstraint* constraints = - b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraint), "constraint"); + b2ContactConstraintAVX* constraints = + b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "contact constraint"); + + int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); int32_t base = 0; - for (int32_t i = 0; i < activeColorCount; ++i) { int32_t j = activeColorIndices[i]; - colors[j].contactConstraints = constraints + base; - base += b2Array(colors[j].contactArray).count; + b2GraphColor* color = colors + j; + + color->contactConstraintAVXs = constraints + base; + + int32_t colorContactCount = b2Array(color->contactArray).count; + for (int32_t k = 0; k < colorContactCount; ++k) + { + contactIndices[8 * base + k] = color->contactArray[k]; + } + + // remainder + int32_t colorConstraintCount = colorConstraintCounts[i]; + for (int32_t k = colorContactCount; k < 8 * colorConstraintCount; ++k) + { + contactIndices[8 * base + k] = B2_NULL_INDEX; + } + + base += colorConstraintCount; } - int32_t storeBlockSize = 1 << 4; - int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 4) + 1 : 0; + int32_t storeBlockSize = 4; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 2) + 1 : 0; if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) { storeBlockSize = constraintCount / (blocksPerWorker * workerCount); storeBlockCount = blocksPerWorker * workerCount; } + int32_t overflowContactCount = b2Array(graph->overflow.contactArray).count; + graph->overflow.contactConstraints = + b2AllocateStackItem(world->stackAllocator, overflowContactCount * sizeof(b2ContactConstraint), "overflow contact constraint"); + /* b2_stageIntegrateVelocities = 0, b2_stagePrepareContacts, @@ -1232,6 +1115,8 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) // b2_stageIntegrateVelocities stageCount += 1; // b2_stagePrepareContacts + stageCount += 1; + // b2_stageWarmStartContacts stageCount += activeColorCount; // b2_stagePrepareJoints stageCount += 1; @@ -1302,9 +1187,17 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage += 1; // Prepare constraints + stage->type = b2_stagePrepareContacts; + stage->blocks = storeBlocks; + stage->blockCount = storeBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Warm start contacts for (int32_t i = 0; i < activeColorCount; ++i) { - stage->type = b2_stagePrepareContacts; + stage->type = b2_stageWarmStartContacts; stage->blocks = colorBlocks[i]; stage->blockCount = colorBlockCounts[i]; stage->colorIndex = activeColorIndices[i]; @@ -1347,14 +1240,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage->completionCount = 0; stage += 1; - // Finalize positions - stage->type = b2_stageFinalizePositions; - stage->blocks = bodyBlocks; - stage->blockCount = bodyBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - // Calm joints stage->type = b2_stageCalmJoints; stage->blocks = NULL; @@ -1374,6 +1259,14 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage += 1; } + // Finalize bodies + stage->type = b2_stageFinalizeBodies; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + // Store impulses stage->type = b2_stageStoreImpulses; stage->blocks = storeBlocks; @@ -1401,7 +1294,9 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) context.bodyToSolverMap = bodyToSolverMap; context.solverToBodyMap = solverToBodyMap; context.stepContext = stepContext; - context.constraints = constraints; + context.constraints = NULL; + context.constraintAVXs = constraints; + context.contactIndices = contactIndices; context.activeColorCount = activeColorCount; context.velocityIterations = velIters; context.calmIterations = stepContext->positionIterations; @@ -1430,389 +1325,10 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, graphBlocks); b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); + b2FreeStackItem(world->stackAllocator, contactIndices); b2FreeStackItem(world->stackAllocator, constraints); b2FreeStackItem(world->stackAllocator, bodyToSolverMap); b2FreeStackItem(world->stackAllocator, solverToBodyMap); b2FreeStackItem(world->stackAllocator, solverBodies); b2FreeStackItem(world->stackAllocator, awakeBodies); } - -#else - -void b2SolveGraph(b2World* world, b2StepContext* stepContext) -{ - b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); - - b2Graph* graph = &world->graph; - b2GraphColor* colors = graph->colors; - - int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; - int32_t awakeBodyCount = 0; - for (int32_t i = 0; i < awakeIslandCount; ++i) - { - int32_t islandIndex = world->awakeIslandArray[i]; - b2Island* island = world->islands + islandIndex; - awakeBodyCount += island->bodyCount; - } - - if (awakeBodyCount == 0) - { - return; - } - - b2Body* bodies = world->bodies; - b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); - b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); - - // Map from solver body to body - // TODO_ERIN have body directly reference solver body for user access - int32_t* solverToBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); - - int32_t bodyCapacity = world->bodyPool.capacity; - int32_t* bodyToSolverMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); - memset(bodyToSolverMap, 0xFF, bodyCapacity * sizeof(int32_t)); - - int32_t index = 0; - for (int32_t i = 0; i < awakeIslandCount; ++i) - { - int32_t islandIndex = world->awakeIslandArray[i]; - b2Island* island = world->islands + islandIndex; - int32_t bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* body = bodies + bodyIndex; - B2_ASSERT(b2ObjectValid(&body->object)); - B2_ASSERT(body->object.index == bodyIndex); - - awakeBodies[index] = body; - - B2_ASSERT(0 < bodyIndex && bodyIndex < bodyCapacity); - bodyToSolverMap[bodyIndex] = index; - solverToBodyMap[index] = bodyIndex; - - // cache miss - bodyIndex = body->islandNext; - - index += 1; - } - } - B2_ASSERT(index == awakeBodyCount); - - int32_t workerCount = world->workerCount; - const int32_t blocksPerWorker = 6; - - int32_t bodyBlockSize = 1 << 5; - int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 5) + 1; - if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) - { - bodyBlockSize = awakeBodyCount / (blocksPerWorker * workerCount); - bodyBlockCount = blocksPerWorker * workerCount; - } - - int32_t activeColorIndices[b2_graphColorCount]; - int32_t colorConstraintCounts[b2_graphColorCount]; - int32_t colorBlockSize[b2_graphColorCount]; - int32_t colorBlockCounts[b2_graphColorCount]; - - int32_t activeColorCount = 0; - int32_t graphBlockCount = 0; - int32_t constraintCount = 0; - - int32_t c = 0; - for (int32_t i = 0; i < b2_graphColorCount; ++i) - { - int32_t count = b2Array(colors[i].contactArray).count; - if (count > 0) - { - int32_t avxCount = ((count - 1) >> 3) + 1; - activeColorIndices[c] = i; - colorConstraintCounts[c] = avxCount; - - int32_t blockSize = 4; - int32_t blockCount = ((avxCount - 1) >> 2) + 1; - - colorBlockSize[c] = blockSize; - colorBlockCounts[c] = blockCount; - graphBlockCount += blockCount; - constraintCount += avxCount; - c += 1; - } - } - activeColorCount = c; - - b2ContactConstraintAVX* constraints = - b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "contact constraint"); - - int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); - - int32_t base = 0; - for (int32_t i = 0; i < activeColorCount; ++i) - { - int32_t j = activeColorIndices[i]; - b2GraphColor* color = colors + j; - - color->contactConstraintAVXs = constraints + base; - - int32_t colorContactCount = b2Array(color->contactArray).count; - for (int32_t k = 0; k < colorContactCount; ++k) - { - contactIndices[8 * base + k] = color->contactArray[k]; - } - - // remainder - int32_t colorConstraintCount = colorConstraintCounts[i]; - for (int32_t k = colorContactCount; k < 8 * colorConstraintCount; ++k) - { - contactIndices[8 * base + k] = B2_NULL_INDEX; - } - - base += colorConstraintCount; - } - - int32_t storeBlockSize = 4; - int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 2) + 1 : 0; - if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) - { - storeBlockSize = constraintCount / (blocksPerWorker * workerCount); - storeBlockCount = blocksPerWorker * workerCount; - } - - /* - b2_stageIntegrateVelocities = 0, - b2_stagePrepareContacts, - b2_stageWarmStartContacts, - b2_stagePrepareJoints, - b2_stageSolveJoints, - b2_stageSolveContacts, - b2_stageIntegratePositions, - b2_stageFinalizePositions, - b2_stageCalmJoints, - b2_stageCalmContacts, - b2_stageStoreImpulses - */ - - // TODO_ERIN joint tasks - int32_t stageCount = 0; - - // b2_stageIntegrateVelocities - stageCount += 1; - // b2_stagePrepareContacts - stageCount += 1; - // b2_stageWarmStartContacts - stageCount += activeColorCount; - // b2_stagePrepareJoints - stageCount += 1; - // b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions - stageCount += 1 + activeColorCount + 1; - // b2_stageFinalizePositions - stageCount += 1; - // b2_stageCalmJoints, b2_stageCalmContacts - stageCount += 1 + activeColorCount; - // b2_stageStoreImpulses - stageCount += 1; - - b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); - b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); - b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); - b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); - - for (int32_t i = 0; i < bodyBlockCount; ++i) - { - b2SolverBlock* block = bodyBlocks + i; - block->startIndex = i * bodyBlockSize; - block->endIndex = block->startIndex + bodyBlockSize; - block->syncIndex = 0; - } - bodyBlocks[bodyBlockCount - 1].endIndex = awakeBodyCount; - - b2SolverBlock* colorBlocks[b2_graphColorCount]; - b2SolverBlock* baseGraphBlock = graphBlocks; - - for (int32_t i = 0; i < activeColorCount; ++i) - { - int32_t blockCount = colorBlockCounts[i]; - int32_t blockSize = colorBlockSize[i]; - for (int32_t j = 0; j < blockCount; ++j) - { - b2SolverBlock* block = baseGraphBlock + j; - block->startIndex = j * blockSize; - block->endIndex = block->startIndex + blockSize; - atomic_store(&block->syncIndex, 0); - } - baseGraphBlock[blockCount - 1].endIndex = colorConstraintCounts[i]; - - colorBlocks[i] = baseGraphBlock; - baseGraphBlock += blockCount; - } - - for (int32_t i = 0; i < storeBlockCount; ++i) - { - b2SolverBlock* block = storeBlocks + i; - block->startIndex = i * storeBlockSize; - block->endIndex = block->startIndex + storeBlockSize; - block->syncIndex = 0; - } - - if (storeBlockCount > 0) - { - storeBlocks[storeBlockCount - 1].endIndex = constraintCount; - } - - b2SolverStage* stage = stages; - - // Integrate velocities - stage->type = b2_stageIntegrateVelocities; - stage->blocks = bodyBlocks; - stage->blockCount = bodyBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Prepare constraints - stage->type = b2_stagePrepareContacts; - stage->blocks = storeBlocks; - stage->blockCount = storeBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Warm start contacts - for (int32_t i = 0; i < activeColorCount; ++i) - { - stage->type = b2_stageWarmStartContacts; - stage->blocks = colorBlocks[i]; - stage->blockCount = colorBlockCounts[i]; - stage->colorIndex = activeColorIndices[i]; - stage->completionCount = 0; - stage += 1; - } - - // Prepare joints - stage->type = b2_stagePrepareJoints; - stage->blocks = NULL; - stage->blockCount = 0; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Solve joints - stage->type = b2_stageSolveJoints; - stage->blocks = NULL; - stage->blockCount = 0; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Solve constraints - for (int32_t i = 0; i < activeColorCount; ++i) - { - stage->type = b2_stageSolveContacts; - stage->blocks = colorBlocks[i]; - stage->blockCount = colorBlockCounts[i]; - stage->colorIndex = activeColorIndices[i]; - stage->completionCount = 0; - stage += 1; - } - - // Integrate positions - stage->type = b2_stageIntegratePositions; - stage->blocks = bodyBlocks; - stage->blockCount = bodyBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Calm joints - stage->type = b2_stageCalmJoints; - stage->blocks = NULL; - stage->blockCount = 0; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Calm constraints - for (int32_t i = 0; i < activeColorCount; ++i) - { - stage->type = b2_stageCalmContacts; - stage->blocks = colorBlocks[i]; - stage->blockCount = colorBlockCounts[i]; - stage->colorIndex = activeColorIndices[i]; - stage->completionCount = 0; - stage += 1; - } - - // Finalize bodies - stage->type = b2_stageFinalizeBodies; - stage->blocks = bodyBlocks; - stage->blockCount = bodyBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - // Store impulses - stage->type = b2_stageStoreImpulses; - stage->blocks = storeBlocks; - stage->blockCount = storeBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - - B2_ASSERT((int32_t)(stage - stages) == stageCount); - - B2_ASSERT(workerCount <= 16); - b2WorkerContext workerContext[16]; - - int32_t velIters = B2_MAX(1, stepContext->velocityIterations); - - stepContext->solverBodies = solverBodies; - stepContext->solverToBodyMap = solverToBodyMap; - stepContext->bodyToSolverMap = bodyToSolverMap; - - b2SolverTaskContext context; - context.world = world; - context.graph = graph; - context.awakeBodies = awakeBodies; - context.solverBodies = solverBodies; - context.bodyToSolverMap = bodyToSolverMap; - context.solverToBodyMap = solverToBodyMap; - context.stepContext = stepContext; - context.constraints = NULL; - context.constraintAVXs = constraints; - context.contactIndices = contactIndices; - context.activeColorCount = activeColorCount; - context.velocityIterations = velIters; - context.calmIterations = stepContext->positionIterations; - context.workerCount = workerCount; - context.stageCount = stageCount; - context.stages = stages; - context.timeStep = stepContext->dt; - context.invTimeStep = stepContext->inv_dt; - context.subStep = context.timeStep / velIters; - context.invSubStep = velIters * stepContext->inv_dt; - context.syncBits = 0; - - b2TracyCZoneEnd(prepare_stages); - - // Must use worker index because thread 0 can be assigned multiple tasks by enkiTS - for (int32_t i = 0; i < workerCount; ++i) - { - workerContext[i].context = &context; - workerContext[i].workerIndex = i; - world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); - } - - world->finishAllTasksFcn(world->userTaskContext); - - b2FreeStackItem(world->stackAllocator, storeBlocks); - b2FreeStackItem(world->stackAllocator, graphBlocks); - b2FreeStackItem(world->stackAllocator, bodyBlocks); - b2FreeStackItem(world->stackAllocator, stages); - b2FreeStackItem(world->stackAllocator, contactIndices); - b2FreeStackItem(world->stackAllocator, constraints); - b2FreeStackItem(world->stackAllocator, bodyToSolverMap); - b2FreeStackItem(world->stackAllocator, solverToBodyMap); - b2FreeStackItem(world->stackAllocator, solverBodies); - b2FreeStackItem(world->stackAllocator, awakeBodies); -} - -#endif diff --git a/src/graph.h b/src/graph.h index c3d2181f..db651347 100644 --- a/src/graph.h +++ b/src/graph.h @@ -13,8 +13,8 @@ typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; -// TODO_ERIN fixme -#define b2_graphColorCount 64 +#define b2_graphColorCount 16 +#define b2_overflowIndex b2_graphColorCount typedef struct b2GraphColor { @@ -42,6 +42,8 @@ typedef struct b2Graph { b2GraphColor colors[b2_graphColorCount]; int32_t colorCount; + + b2GraphOverflow overflow; } b2Graph; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity); From ef603d1d0df4ecb21d47d24fc39c88a487fc376f Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 15 Oct 2023 22:30:49 -0700 Subject: [PATCH 38/51] overflow contact contraints support --- samples/collection/benchmark_tumbler.cpp | 4 +- src/contact_solver.c | 883 +++++++---------------- src/contact_solver.h | 15 +- src/graph.c | 12 +- src/island.c | 3 +- src/joint.c | 6 +- src/world.c | 3 +- 7 files changed, 295 insertions(+), 631 deletions(-) diff --git a/samples/collection/benchmark_tumbler.cpp b/samples/collection/benchmark_tumbler.cpp index cb47a91f..d6087a73 100644 --- a/samples/collection/benchmark_tumbler.cpp +++ b/samples/collection/benchmark_tumbler.cpp @@ -79,11 +79,11 @@ class BenchmarkTumbler : public Sample { Sample::Step(settings); - for (int32_t i = 0; i < 10 && m_count < m_maxCount; ++i) + for (int32_t i = 0; i < 5 && m_count < m_maxCount; ++i) { b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; - bd.position = {0.25f * i, 10.0f}; + bd.position = {0.25f * i, 10.0f + 1.0f * (m_stepCount & 1)}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); b2ShapeDef sd = b2DefaultShapeDef(); diff --git a/src/contact_solver.c b/src/contact_solver.c index d8260e7a..0abc2599 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -18,6 +18,271 @@ #define maxBaumgarteVelocity 3.0f +void b2PrepareOverflowContacts(b2SolverTaskContext* context) +{ + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); + + b2World* world = context->world; + b2Graph* graph = context->graph; + b2Contact* contacts = world->contacts; + const int32_t* bodyMap = context->bodyToSolverMap; + b2SolverBody* solverBodies = context->solverBodies; + + b2ContactConstraint* constraints = graph->overflow.contactConstraints; + int32_t* contactIndices = graph->overflow.contactArray; + int32_t contactCount = b2Array(graph->overflow.contactArray).count; + + // This is a dummy body to represent a static body because static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = 30.0f; + + float h = context->timeStep; + bool enableWarmStarting = world->enableWarmStarting; + + for (int32_t i = 0; i < contactCount; ++i) + { + b2Contact* contact = contacts + contactIndices[i]; + + const b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; + + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; + + b2ContactConstraint* constraint = constraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->pointCount = pointCount; + + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; + + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; + b2Vec2 vA = solverBodyA->linearVelocity; + float wA = solverBodyA->angularVelocity; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; + + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; + + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float c = h * omega * (2.0f * zeta + h * omega); + constraint->impulseCoefficient = 1.0f / (1.0f + c); + constraint->massCoefficient = c * constraint->impulseCoefficient; + constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) + { + const b2ManifoldPoint* mp = manifold->points + j; + b2ContactConstraintPoint* cp = constraint->points + j; + + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; + + cp->rA = mp->anchorA; + cp->rB = mp->anchorB; + + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + cp->separation = mp->separation; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + // Warm start + if (enableWarmStarting) + { + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); + } + } + + solverBodyA->linearVelocity = vA; + solverBodyA->angularVelocity = wA; + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; + } + + b2TracyCZoneEnd(prepare_contact); +} + +void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias) +{ + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); + + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; + int32_t count = b2Array(context->graph->overflow.contactArray).count; + float inv_dt = context->invTimeStep; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + for (int32_t i = 0; i < count; ++i) + { + b2ContactConstraint* constraint = constraints + i; + + b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 dpA = bodyA->deltaPosition; + float daA = bodyA->deltaAngle; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; + + int32_t pointCount = constraint->pointCount; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ContactConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) + { + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; + } + else if (useBias) + { + bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; + } + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ContactConstraintPoint* cp = constraint->points + j; + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); + + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); + + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } + + b2TracyCZoneEnd(solve_contact); +} + +void b2StoreOverflowImpulses(b2SolverTaskContext* context) +{ + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); + + b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; + int32_t count = b2Array(context->graph->overflow.contactArray).count; + + for (int32_t i = 0; i < count; ++i) + { + b2ContactConstraint* constraint = constraints + i; + b2Contact* contact = constraint->contact; + b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; + + for (int32_t j = 0; j < pointCount; ++j) + { + manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; + manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; + } + } + + b2TracyCZoneEnd(store_impulses); +} + #define add(a, b) _mm256_add_ps((a), (b)) #define sub(a, b) _mm256_sub_ps((a), (b)) #define mul(a, b) _mm256_mul_ps((a), (b)) @@ -117,139 +382,10 @@ static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict ind _mm256_store_ps((float*)(bodies + indices[4]), _mm256_permute2f128_ps(tt0, tt4, 0x31)); if (indices[5] != B2_NULL_INDEX) _mm256_store_ps((float*)(bodies + indices[5]), _mm256_permute2f128_ps(tt1, tt5, 0x31)); - if (indices[6] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[6]), _mm256_permute2f128_ps(tt2, tt6, 0x31)); - if (indices[7] != B2_NULL_INDEX) - _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); -} - -void b2PrepareContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) -{ - b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); - - b2World* world = context->world; - b2Graph* graph = context->graph; - b2Contact* contacts = world->contacts; - const int32_t* bodyMap = context->bodyToSolverMap; - b2SolverBody* solverBodies = context->solverBodies; - - b2ContactConstraint* constraints; - int32_t* contactIndices; - - if (colorIndex == b2_overflowIndex) - { - B2_ASSERT(startIndex == 0); - B2_ASSERT(endIndex == b2Array(graph->overflow.contactArray).count); - contactIndices = graph->overflow.contactArray; - constraints = graph->overflow.contactConstraints; - } - else - { - b2GraphColor* color = graph->colors + colorIndex; - contactIndices = color->contactArray; - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - constraints = color->contactConstraints; - } - - // This is a dummy body to represent a static body because static bodies don't have a solver body. - b2SolverBody dummyBody = {0}; - - // 30 is a bit soft, 60 oscillates too much - // const float contactHertz = 45.0f; - // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; - - float h = context->timeStep; - //bool enableWarmStarting = world->enableWarmStarting; - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Contact* contact = contacts + contactIndices[i]; - - const b2Manifold* manifold = &contact->manifold; - int32_t pointCount = manifold->pointCount; - - B2_ASSERT(0 < pointCount && pointCount <= 2); - - int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; - int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; - - b2ContactConstraint* constraint = constraints + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - - b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; - b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; - - float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; - b2Vec2 vA = solverBodyA->linearVelocity; - float wA = solverBodyA->angularVelocity; - float mA = solverBodyA->invMass; - float iA = solverBodyA->invI; - - b2Vec2 vB = solverBodyB->linearVelocity; - float wB = solverBodyB->angularVelocity; - float mB = solverBodyB->invMass; - float iB = solverBodyB->invI; - - constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; - - // Stiffer for static contacts to avoid bodies getting pushed through the ground - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - float c = h * omega * (2.0f * zeta + h * omega); - constraint->impulseCoefficient = 1.0f / (1.0f + c); - constraint->massCoefficient = c * constraint->impulseCoefficient; - constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* mp = manifold->points + j; - b2ContactConstraintPoint* cp = constraint->points + j; - - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; - - cp->rA = mp->anchorA; - cp->rB = mp->anchorB; - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - cp->separation = mp->separation; - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - - // Warm start - //if (enableWarmStarting) - //{ - // b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - // wA -= iA * b2Cross(cp->rA, P); - // vA = b2MulAdd(vA, -mA, P); - // wB += iB * b2Cross(cp->rB, P); - // vB = b2MulAdd(vB, mB, P); - //} - } - - solverBodyA->linearVelocity = vA; - solverBodyA->angularVelocity = wA; - solverBodyB->linearVelocity = vB; - solverBodyB->angularVelocity = wB; - } - - b2TracyCZoneEnd(prepare_contact); + if (indices[6] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[6]), _mm256_permute2f128_ps(tt2, tt6, 0x31)); + if (indices[7] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); } void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) @@ -418,135 +554,6 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont b2TracyCZoneEnd(prepare_contact); } -void b2WarmStartContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) -{ - b2TracyCZoneNC(prepare_contact, "Warm Start", b2_colorYellow, true); - - b2World* world = context->world; - b2Graph* graph = context->graph; - b2Contact* contacts = world->contacts; - const int32_t* bodyMap = context->bodyToSolverMap; - b2SolverBody* solverBodies = context->solverBodies; - - b2ContactConstraint* constraints; - int32_t* contactIndices; - - if (colorIndex == b2_overflowIndex) - { - B2_ASSERT(startIndex == 0); - B2_ASSERT(endIndex == b2Array(graph->overflow.contactArray).count); - contactIndices = graph->overflow.contactArray; - constraints = graph->overflow.contactConstraints; - } - else - { - b2GraphColor* color = graph->colors + colorIndex; - contactIndices = color->contactArray; - B2_ASSERT(startIndex <= b2Array(color->contactArray).count); - B2_ASSERT(endIndex <= b2Array(color->contactArray).count); - constraints = color->contactConstraints; - } - - // This is a dummy body to represent a static body because static bodies don't have a solver body. - b2SolverBody dummyBody = {0}; - - // 30 is a bit soft, 60 oscillates too much - // const float contactHertz = 45.0f; - // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; - - float h = context->timeStep; - // bool enableWarmStarting = world->enableWarmStarting; - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2Contact* contact = contacts + contactIndices[i]; - - const b2Manifold* manifold = &contact->manifold; - int32_t pointCount = manifold->pointCount; - - B2_ASSERT(0 < pointCount && pointCount <= 2); - - int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; - int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; - - b2ContactConstraint* constraint = constraints + i; - constraint->contact = contact; - constraint->indexA = indexA; - constraint->indexB = indexB; - constraint->normal = manifold->normal; - constraint->friction = contact->friction; - - b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; - b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; - - float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; - b2Vec2 vA = solverBodyA->linearVelocity; - float wA = solverBodyA->angularVelocity; - float mA = solverBodyA->invMass; - float iA = solverBodyA->invI; - - b2Vec2 vB = solverBodyB->linearVelocity; - float wB = solverBodyB->angularVelocity; - float mB = solverBodyB->invMass; - float iB = solverBodyB->invI; - - constraint->type = pointCount == 1 ? b2_onePointType : b2_twoPointType; - - // Stiffer for static contacts to avoid bodies getting pushed through the ground - const float zeta = 1.0f; - float omega = 2.0f * b2_pi * hertz; - float c = h * omega * (2.0f * zeta + h * omega); - constraint->impulseCoefficient = 1.0f / (1.0f + c); - constraint->massCoefficient = c * constraint->impulseCoefficient; - constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(constraint->normal); - - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* mp = manifold->points + j; - b2ContactConstraintPoint* cp = constraint->points + j; - - cp->normalImpulse = mp->normalImpulse; - cp->tangentImpulse = mp->tangentImpulse; - - cp->rA = mp->anchorA; - cp->rB = mp->anchorB; - - float rnA = b2Cross(cp->rA, normal); - float rnB = b2Cross(cp->rB, normal); - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - - float rtA = b2Cross(cp->rA, tangent); - float rtB = b2Cross(cp->rB, tangent); - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - - cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; - cp->separation = mp->separation; - cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - - // Warm start - // if (enableWarmStarting) - //{ - // b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); - // wA -= iA * b2Cross(cp->rA, P); - // vA = b2MulAdd(vA, -mA, P); - // wB += iB * b2Cross(cp->rB, P); - // vB = b2MulAdd(vB, mB, P); - //} - } - - solverBodyA->linearVelocity = vA; - solverBodyA->angularVelocity = wA; - solverBodyB->linearVelocity = vB; - solverBodyB->angularVelocity = wB; - } - - b2TracyCZoneEnd(prepare_contact); -} - void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { b2TracyCZoneNC(warm_start_contact, "Warm Start", b2_colorGreen1, true); @@ -594,305 +601,6 @@ void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCo b2TracyCZoneEnd(warm_start_contact); } -static void b2SolveContactOnePoint(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) -{ - // This is a dummy body to represent a static body since static bodies don't have a solver body. - b2SolverBody dummyBody = {0}; - - b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 dpA = bodyA->deltaPosition; - float daA = bodyA->deltaAngle; - float mA = bodyA->invMass; - float iA = bodyA->invI; - - b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - b2Vec2 dpB = bodyB->deltaPosition; - float daB = bodyB->deltaAngle; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - float biasCoefficient = constraint->biasCoefficient; - float massCoefficient = constraint->massCoefficient; - float impulseCoefficient = constraint->impulseCoefficient; - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -} - -static void b2SolveContactTwoPoints(b2ContactConstraint* constraint, b2SolverBody* bodies, float inv_dt, bool useBias) -{ - // This is a dummy body to represent a static body since static bodies don't have a solver body. - b2SolverBody dummyBody = {0}; - - b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 dpA = bodyA->deltaPosition; - float daA = bodyA->deltaAngle; - float mA = bodyA->invMass; - float iA = bodyA->invI; - - b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - b2Vec2 dpB = bodyB->deltaPosition; - float daB = bodyB->deltaAngle; - float mB = bodyB->invMass; - float iB = bodyB->invI; - - b2Vec2 normal = constraint->normal; - b2Vec2 tangent = b2RightPerp(normal); - float friction = constraint->friction; - float biasCoefficient = constraint->biasCoefficient; - float massCoefficient = constraint->massCoefficient; - float impulseCoefficient = constraint->impulseCoefficient; - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 1; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute change in separation (small angle approximation of sin(angle) == angle) - b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); - b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); - float ds = b2Dot(b2Sub(prB, prA), normal); - float s = cp->separation + ds; - float bias = 0.0f; - float massScale = 1.0f; - float impulseScale = 0.0f; - if (s > 0.0f) - { - // TODO_ERIN what time to use? - // Speculative (inverse of full time step) - bias = s * inv_dt; - } - else if (useBias) - { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); - // bias = cp->biasCoefficient * s; - massScale = massCoefficient; - impulseScale = impulseCoefficient; - } - - // Compute normal impulse - float vn = b2Dot(dv, normal); - float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - - // Clamp the accumulated impulse - float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); - impulse = newImpulse - cp->normalImpulse; - cp->normalImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(impulse, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 0; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - { - b2ContactConstraintPoint* cp = constraint->points + 1; - - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); - - // Compute tangent force - float vt = b2Dot(dv, tangent); - float lambda = cp->tangentMass * (-vt); - - // Clamp the accumulated force - float maxFriction = friction * cp->normalImpulse; - float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - cp->tangentImpulse; - cp->tangentImpulse = newImpulse; - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(cp->rA, P); - - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(cp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; -} - static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, bool useBias) { @@ -1065,36 +773,6 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol b2ScatterBodies(bodies, c->indexB, &bB); } -void b2SolveContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) -{ - b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - - b2SolverBody* bodies = context->solverBodies; - b2ContactConstraint* constraints = context->graph->colors[colorIndex].contactConstraints; - float inv_dt = context->invTimeStep; - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2ContactConstraint* constraint = constraints + i; - - switch (constraint->type) - { - case b2_onePointType: - b2SolveContactOnePoint(constraint, bodies, inv_dt, useBias); - break; - - case b2_twoPointType: - b2SolveContactTwoPoints(constraint, bodies, inv_dt, useBias); - break; - - default: - B2_ASSERT(false); - } - } - - b2TracyCZoneEnd(solve_contact); -} - void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); @@ -1112,29 +790,6 @@ void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContex b2TracyCZoneEnd(solve_contact); } -void b2StoreImpulses(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) -{ - b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); - - b2ContactConstraint* constraints = context->constraints; - - for (int32_t i = startIndex; i < endIndex; ++i) - { - b2ContactConstraint* constraint = constraints + i; - b2Contact* contact = constraint->contact; - b2Manifold* manifold = &contact->manifold; - int32_t pointCount = manifold->pointCount; - - for (int32_t j = 0; j < pointCount; ++j) - { - manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; - manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; - } - } - - b2TracyCZoneEnd(store_impulses); -} - void b2StoreImpulsesAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); diff --git a/src/contact_solver.h b/src/contact_solver.h index 606e4bb4..cde232b6 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -19,12 +19,6 @@ typedef struct b2ContactConstraintPoint float tangentMass; } b2ContactConstraintPoint; -typedef enum b2ContactConstraintType -{ - b2_onePointType, - b2_twoPointType, -} b2ContactConstraintType; - typedef struct b2ContactConstraint { b2Contact* contact; @@ -36,7 +30,7 @@ typedef struct b2ContactConstraint float massCoefficient; float biasCoefficient; float impulseCoefficient; - b2ContactConstraintType type; + int32_t pointCount; } b2ContactConstraint; // Wide float @@ -68,10 +62,9 @@ typedef struct b2ContactConstraintAVX } b2ContactConstraintAVX; // Scalar -void b2PrepareContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); -void b2WarmStartContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); -void b2SolveContacts(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); -void b2StoreImpulses(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +void b2PrepareOverflowContacts(b2SolverTaskContext* context); +void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias); +void b2StoreOverflowImpulses(b2SolverTaskContext* context); // AVX versions void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/graph.c b/src/graph.c index f54d6571..e60f2858 100644 --- a/src/graph.c +++ b/src/graph.c @@ -166,6 +166,9 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) B2_ASSERT(world->contacts[swappedIndex].colorIndex == b2_overflowIndex); world->contacts[swappedIndex].colorSubIndex = colorSubIndex; } + + contact->colorIndex = B2_NULL_INDEX; + contact->colorSubIndex = B2_NULL_INDEX; return; } @@ -838,7 +841,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2PrepareJointsTask(context); stageIndex += 1; - b2PrepareContactsTask(); + b2PrepareOverflowContacts(context); int32_t velocityIterations = context->velocityIterations; for (int32_t i = 0; i < velocityIterations; ++i) @@ -851,6 +854,8 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2SolveJointsTask(context, true); iterStageIndex += 1; + b2SolveOverflowContacts(context, true); + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { syncBits = (graphSyncIndex << 16) | iterStageIndex; @@ -878,6 +883,8 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2SolveJointsTask(context, false); iterStageIndex += 1; + b2SolveOverflowContacts(context, false); + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) { syncBits = (graphSyncIndex << 16) | iterStageIndex; @@ -899,6 +906,8 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); b2ExecuteMainStage(stages + stageIndex, context, syncBits); + b2StoreOverflowImpulses(context); + // Signal workers to finish atomic_store(&context->syncBits, UINT_MAX); @@ -1325,6 +1334,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2FreeStackItem(world->stackAllocator, graphBlocks); b2FreeStackItem(world->stackAllocator, bodyBlocks); b2FreeStackItem(world->stackAllocator, stages); + b2FreeStackItem(world->stackAllocator, graph->overflow.contactConstraints); b2FreeStackItem(world->stackAllocator, contactIndices); b2FreeStackItem(world->stackAllocator, constraints); b2FreeStackItem(world->stackAllocator, bodyToSolverMap); diff --git a/src/island.c b/src/island.c index b7b17ca5..93346579 100644 --- a/src/island.c +++ b/src/island.c @@ -213,7 +213,8 @@ void b2WakeIsland(b2Island* island) { b2Joint* joint = world->joints + jointIndex; B2_ASSERT(joint->islandIndex == islandIndex); - b2AddJointToGraph(world, joint); + // TODO_JOINT_GRAPH + //b2AddJointToGraph(world, joint); jointIndex = joint->islandNext; } } diff --git a/src/joint.c b/src/joint.c index af0a1749..0531c964 100644 --- a/src/joint.c +++ b/src/joint.c @@ -128,7 +128,8 @@ static b2Joint* b2CreateJoint(b2World* world, b2Body* bodyA, b2Body* bodyB) if (b2IsBodyAwake(world, bodyA) || b2IsBodyAwake(world, bodyB)) { - b2AddJointToGraph(world, joint); + // TODO_JOINT_GRAPH + //b2AddJointToGraph(world, joint); } } @@ -371,6 +372,9 @@ void b2World_DestroyJoint(b2JointId jointId) b2UnlinkJoint(world, joint); + // TODO_JOINT_GRAPH + // b2RemoveJointFromGraph(joint); + b2FreeObject(&world->jointPool, &joint->object); } diff --git a/src/world.c b/src/world.c index adcfdd32..c92a89f1 100644 --- a/src/world.c +++ b/src/world.c @@ -1037,7 +1037,8 @@ static void b2Solve(b2World* world, b2StepContext* context) while (jointIndex != B2_NULL_INDEX) { b2Joint* joint = world->joints + jointIndex; - b2RemoveJointFromGraph(world, joint); + // TODO_JOINT_GRAPH + //b2RemoveJointFromGraph(world, joint); jointIndex = joint->islandNext; } } From 88c1562db9d65a87c2f93532376d2ea7097a3c58 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 18 Oct 2023 22:59:17 -0700 Subject: [PATCH 39/51] wip split islands (crashes) --- samples/collection/benchmark_tumbler.cpp | 3 +- src/body.c | 15 - src/contact.c | 1 + src/contact.h | 7 +- src/graph.c | 100 ++--- src/island.c | 72 ++-- src/island.h | 10 +- src/world.c | 483 ++++++----------------- src/world.h | 8 +- 9 files changed, 225 insertions(+), 474 deletions(-) diff --git a/samples/collection/benchmark_tumbler.cpp b/samples/collection/benchmark_tumbler.cpp index d6087a73..5a1a8bfa 100644 --- a/samples/collection/benchmark_tumbler.cpp +++ b/samples/collection/benchmark_tumbler.cpp @@ -57,7 +57,8 @@ class BenchmarkTumbler : public Sample m_jointId = b2World_CreateRevoluteJoint(m_worldId, &jd); } - m_maxCount = g_sampleDebug ? 500 : 2000; + //m_maxCount = g_sampleDebug ? 500 : 2000; + m_maxCount = 2000; m_count = 0; } diff --git a/src/body.c b/src/body.c index 65a321f3..5a36d8a6 100644 --- a/src/body.c +++ b/src/body.c @@ -236,23 +236,8 @@ void b2World_DestroyBody(b2BodyId bodyId) B2_ASSERT(island->contactCount == 0); B2_ASSERT(island->jointCount == 0); - // Remove from awake islands array - if (island->awakeIndex != B2_NULL_INDEX) - { - int32_t islandCount = b2Array(world->awakeIslandArray).count; - B2_ASSERT(islandCount > 0); - b2Array_RemoveSwap(world->awakeIslandArray, island->awakeIndex); - if (island->awakeIndex < islandCount - 1) - { - // Fix awake index on swapped island - int32_t swappedIslandIndex = world->awakeIslandArray[island->awakeIndex]; - world->islands[swappedIslandIndex].awakeIndex = island->awakeIndex; - } - } - // Free the island b2DestroyIsland(island); - b2FreeObject(&world->islandPool, &island->object); islandDestroyed = true; } } diff --git a/src/contact.c b/src/contact.c index 8e98654a..ae928121 100644 --- a/src/contact.c +++ b/src/contact.c @@ -199,6 +199,7 @@ void b2CreateContact(b2World* world, b2Shape* shapeA, b2Shape* shapeB) contact->islandNext = B2_NULL_INDEX; contact->colorSubIndex = B2_NULL_INDEX; contact->colorIndex = B2_NULL_INDEX; + contact->isMarked = false; b2Body* bodyA = world->bodies + shapeA->bodyIndex; b2Body* bodyB = world->bodies + shapeB->bodyIndex; diff --git a/src/contact.h b/src/contact.h index 7ffff896..26dcd498 100644 --- a/src/contact.h +++ b/src/contact.h @@ -50,8 +50,6 @@ enum b2ContactFlags // This contact stopped touching b2_contactStoppedTouching = 0x00000080, - - b2_contactIslandFlag = 0x00000100, }; /// The class manages contact between two shapes. A contact exists for each overlapping @@ -63,9 +61,6 @@ typedef struct b2Contact uint32_t flags; - // This is too hot and has been moved to a separate array - //int32_t awakeIndex; - // The color of this constraint in the graph coloring int32_t colorIndex; @@ -91,6 +86,8 @@ typedef struct b2Contact // For conveyor belts float tangentSpeed; + + bool isMarked; } b2Contact; void b2InitializeContactRegisters(void); diff --git a/src/graph.c b/src/graph.c index e60f2858..e6d0463e 100644 --- a/src/graph.c +++ b/src/graph.c @@ -27,6 +27,7 @@ typedef struct b2WorkerContext { b2SolverTaskContext* context; int32_t workerIndex; + void* userTask; } b2WorkerContext; void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity) @@ -586,47 +587,6 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverT b2TracyCZoneEnd(finalize_positions); } -#if B2_AVX == 0 - -static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) -{ - b2SolverStageType type = stage->type; - - switch (type) - { - case b2_stageIntegrateVelocities: - b2IntegrateVelocitiesTask(startIndex, endIndex, context); - break; - - case b2_stagePrepareContacts: - b2PrepareContactsTask(startIndex, endIndex, context, stage->colorIndex); - break; - - case b2_stageSolveContacts: - b2SolveContactsTask(startIndex, endIndex, context, stage->colorIndex, true); - break; - - case b2_stageIntegratePositions: - b2IntegratePositionsTask(startIndex, endIndex, context); - break; - - case b2_stageFinalizePositions: - b2FinalizePositionsTask(startIndex, endIndex, context, workerIndex); - break; - - case b2_stageCalmContacts: - b2SolveContactsTask(startIndex, endIndex, context, stage->colorIndex, false); - break; - - case b2_stageStoreImpulses: - b2StoreImpulsesTask(startIndex, endIndex, context); - break; - } -} - -#else - -// AVX static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) { b2SolverStageType type = stage->type; @@ -666,7 +626,6 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; } } -#endif static inline int32_t GetWorkerStartIndex(int32_t workerIndex, int32_t blockCount, int32_t workerCount) { @@ -995,11 +954,23 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t* bodyToSolverMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); memset(bodyToSolverMap, 0xFF, bodyCapacity * sizeof(int32_t)); + // Search for an awake island to split + int32_t splitIslandIndex = B2_NULL_INDEX; + int32_t maxRemovedContacts = 0; + + // Build array of awake bodies int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) { int32_t islandIndex = world->awakeIslandArray[i]; b2Island* island = world->islands + islandIndex; + + if (island->constraintRemoveCount > maxRemovedContacts) + { + maxRemovedContacts = island->constraintRemoveCount; + splitIslandIndex = islandIndex; + } + int32_t bodyIndex = island->headBody; while (bodyIndex != B2_NULL_INDEX) { @@ -1067,6 +1038,9 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "contact constraint"); int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); + int32_t overflowContactCount = b2Array(graph->overflow.contactArray).count; + graph->overflow.contactConstraints = + b2AllocateStackItem(world->stackAllocator, overflowContactCount * sizeof(b2ContactConstraint), "overflow contact constraint"); int32_t base = 0; for (int32_t i = 0; i < activeColorCount; ++i) @@ -1100,10 +1074,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) storeBlockCount = blocksPerWorker * workerCount; } - int32_t overflowContactCount = b2Array(graph->overflow.contactArray).count; - graph->overflow.contactConstraints = - b2AllocateStackItem(world->stackAllocator, overflowContactCount * sizeof(b2ContactConstraint), "overflow contact constraint"); - /* b2_stageIntegrateVelocities = 0, b2_stagePrepareContacts, @@ -1143,6 +1113,28 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); + // Split an awake island. This modifies: + // - stack allocator + // - awake island array + // - island pool + // - island indices on bodies, contacts, and joints + // I'm squeezing this task in here because it may be expensive and this + // is a safe place to put it. + world->splitIslandIndex = splitIslandIndex; + void* splitIslandTask = NULL; + if (splitIslandIndex != B2_NULL_INDEX) + { + extern bool b2_parallel; + if (b2_parallel) + { + splitIslandTask = world->enqueueTaskFcn(&b2SplitIslandTask, 1, 1, world, world->userTaskContext); + } + else + { + b2SplitIslandTask(0, 1, 0, world); + } + } + for (int32_t i = 0; i < bodyBlockCount; ++i) { b2SolverBlock* block = bodyBlocks + i; @@ -1325,10 +1317,22 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) { workerContext[i].context = &context; workerContext[i].workerIndex = i; - world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + workerContext[i].userTask = world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + } + + // Finish solve + for (int32_t i = 0; i < workerCount; ++i) + { + world->finishTaskFcn(workerContext[i].userTask, world->userTaskContext); + } + + // Finish split + if (splitIslandTask != NULL) + { + world->finishTaskFcn(splitIslandTask, world->userTaskContext); } - world->finishAllTasksFcn(world->userTaskContext); + world->splitIslandIndex = B2_NULL_INDEX; b2FreeStackItem(world->stackAllocator, storeBlocks); b2FreeStackItem(world->stackAllocator, graphBlocks); diff --git a/src/island.c b/src/island.c index 93346579..24f6cec5 100644 --- a/src/island.c +++ b/src/island.c @@ -139,12 +139,26 @@ void b2CreateIsland(b2Island* island) island->parentIsland = B2_NULL_INDEX; island->awakeIndex = B2_NULL_INDEX; island->constraintRemoveCount = 0; - island->maySplit = false; } void b2DestroyIsland(b2Island* island) { - B2_MAYBE_UNUSED(island); + // Remove from awake islands array + if (island->awakeIndex != B2_NULL_INDEX) + { + b2World* world = island->world; + int32_t islandCount = b2Array(world->awakeIslandArray).count; + B2_ASSERT(islandCount > 0); + b2Array_RemoveSwap(world->awakeIslandArray, island->awakeIndex); + if (island->awakeIndex < islandCount - 1) + { + // Fix awake index on swapped island + int32_t swappedIslandIndex = world->awakeIslandArray[island->awakeIndex]; + world->islands[swappedIslandIndex].awakeIndex = island->awakeIndex; + } + } + + b2FreeObject(&island->world->islandPool, &island->object); } static void b2AddContactToIsland(b2World* world, b2Island* island, b2Contact* contact) @@ -653,18 +667,7 @@ void b2MergeAwakeIslands(b2World* world) int32_t mergedBodyCount = b2MergeIsland(island); maxBodyCount = B2_MAX(maxBodyCount, mergedBodyCount); - int32_t count = b2Array(world->awakeIslandArray).count; - int32_t awakeIndex = island->awakeIndex; - b2Array_RemoveSwap(world->awakeIslandArray, awakeIndex); - if (awakeIndex < count - 1) - { - // Fix awake index on swapped island - int32_t swappedIslandIndex = world->awakeIslandArray[awakeIndex]; - world->islands[swappedIslandIndex].awakeIndex = awakeIndex; - } - b2DestroyIsland(island); - b2FreeObject(&world->islandPool, &island->object); } // Step 3: ensure island pool has sufficient space to split the largest island @@ -675,16 +678,27 @@ void b2MergeAwakeIslands(b2World* world) #define B2_CONTACT_REMOVE_THRESHOLD 1 // Split an island because some contacts and/or joints have been removed -// Note: contacts/joints connecting to static bodies must belong to an island but don't affect island connectivity +// Note: contacts/joints connected to static bodies must belong to an island but don't affect island connectivity // Note: static bodies are never in an island -// TODO_ERIN I think this can be done during collision -static void b2SplitIsland(b2Island* baseIsland) +// Note: this task interacts with some allocators without locks under the assumption that no other tasks +// are interacting with these data structures. +// WARNING: this cannot be done during the narrow-phase because this is when contacts start and stop touching +void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context) { b2TracyCZoneNC(split, "Split Island", b2_colorHoneydew2, true); + B2_MAYBE_UNUSED(startIndex); + B2_MAYBE_UNUSED(endIndex); + B2_MAYBE_UNUSED(threadIndex); + + b2World* world = context; + + B2_ASSERT(world->splitIslandIndex != B2_NULL_INDEX); + + b2Island* baseIsland = world->islands + world->splitIslandIndex; + b2ValidateIsland(baseIsland); - b2World* world = baseIsland->world; int32_t bodyCount = baseIsland->bodyCount; b2Body* bodies = world->bodies; @@ -693,7 +707,7 @@ static void b2SplitIsland(b2Island* baseIsland) b2StackAllocator* alloc = world->stackAllocator; - // No lock is needed because only one island can split per time step. + // No lock is needed because I ensure these are not used while this task is active. int32_t* stack = b2AllocateStackItem(alloc, bodyCount * sizeof(int32_t), "island stack"); int32_t* bodyIndices = b2AllocateStackItem(alloc, bodyCount * sizeof(int32_t), "body indices"); @@ -719,7 +733,7 @@ static void b2SplitIsland(b2Island* baseIsland) while (nextContact != B2_NULL_INDEX) { b2Contact* contact = contacts + nextContact; - contact->flags &= ~b2_contactIslandFlag; + contact->isMarked = false; nextContact = contact->islandNext; } @@ -732,6 +746,10 @@ static void b2SplitIsland(b2Island* baseIsland) nextJoint = joint->islandNext; } + // Done with the base split island. + b2DestroyIsland(baseIsland); + baseIsland = NULL; + // Each island is found as a depth first search starting from a seed body for (int32_t i = 0; i < bodyCount; ++i) { @@ -802,7 +820,7 @@ static void b2SplitIsland(b2Island* baseIsland) contactKey = contact->edges[edgeIndex].nextKey; // Has this contact already been added to this island? - if (contact->flags & b2_contactIslandFlag) + if (contact->isMarked) { continue; } @@ -819,7 +837,7 @@ static void b2SplitIsland(b2Island* baseIsland) continue; } - contact->flags |= b2_contactIslandFlag; + contact->isMarked = true; int32_t otherEdgeIndex = edgeIndex ^ 1; int32_t otherBodyIndex = contact->edges[otherEdgeIndex].bodyIndex; @@ -910,7 +928,8 @@ static void b2SplitIsland(b2Island* baseIsland) } b2ValidateIsland(island); - b2Array_Push(world->splitIslandArray, island->object.index); + island->awakeIndex = b2Array(world->awakeIslandArray).count; + b2Array_Push(world->awakeIslandArray, islandIndex); } b2FreeStackItem(alloc, bodyIndices); @@ -919,15 +938,6 @@ static void b2SplitIsland(b2Island* baseIsland) b2TracyCZoneEnd(split); } -// This island was just created through splitting. Handle single thread work. -void b2CompleteSplitIsland(b2Island* island) -{ - // Split islands are kept awake as part of the splitting process. They can - // fall asleep the next time step. - island->awakeIndex = B2_NULL_INDEX; - b2WakeIsland(island); -} - #if B2_VALIDATE void b2ValidateIsland(b2Island* island) diff --git a/src/island.h b/src/island.h index 29e10077..f5c73f24 100644 --- a/src/island.h +++ b/src/island.h @@ -52,10 +52,6 @@ typedef struct b2Island // Keeps track of how many contacts have been removed from this island. int32_t constraintRemoveCount; - - // This island has been chosen to be split up into smaller islands because a sufficient - // number of contacts have been removed. - bool maySplit; } b2Island; void b2CreateIsland(b2Island* island); @@ -77,11 +73,7 @@ void b2UnlinkJoint(b2World* world, b2Joint* joint); void b2MergeAwakeIslands(b2World* world); -void b2PrepareIsland(b2Island* island, b2StepContext* stepContext); - -void b2SolveIsland(b2Island* island, uint32_t threadIndex); - -void b2CompleteBaseSplitIsland(b2Island* island); +void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context); void b2CompleteSplitIsland(b2Island* island); void b2ValidateIsland(b2Island* island); diff --git a/src/world.c b/src/world.c index c92a89f1..1e9dc570 100644 --- a/src/world.c +++ b/src/world.c @@ -119,12 +119,10 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->islands = (b2Island*)world->islandPool.memory; world->awakeIslandArray = b2CreateArray(sizeof(int32_t), B2_MAX(def->bodyCapacity, 1)); - world->splitIslandArray = b2CreateArray(sizeof(int32_t), B2_MAX(def->bodyCapacity, 1)); world->awakeContactArray = b2CreateArray(sizeof(int32_t), B2_MAX(def->contactCapacity, 1)); world->contactAwakeIndexArray = b2CreateArray(sizeof(int32_t), world->contactPool.capacity); - world->splitIslandIndex = B2_NULL_INDEX; world->stepId = 0; // Globals start at 0. It should be fine for this to roll over. @@ -138,6 +136,8 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->enableWarmStarting = true; world->enableContinuous = true; world->profile = b2_emptyProfile; + world->userTreeTask = NULL; + world->splitIslandIndex = B2_NULL_INDEX; id.revision = world->revision; @@ -188,18 +188,6 @@ void b2DestroyWorld(b2WorldId id) b2DestroyArray(world->awakeIslandArray, sizeof(int32_t)); b2DestroyArray(world->contactAwakeIndexArray, sizeof(int32_t)); - b2DestroyArray(world->splitIslandArray, sizeof(int32_t)); - - b2Island* islands = world->islands; - int32_t islandCapacity = world->islandPool.capacity; - for (int32_t i = 0; i < islandCapacity; ++i) - { - b2Island* island = islands + i; - if (b2ObjectValid(&island->object) == true) - { - b2DestroyIsland(island); - } - } b2DestroyPool(&world->islandPool); b2DestroyPool(&world->jointPool); @@ -216,7 +204,6 @@ void b2DestroyWorld(b2WorldId id) memset(world, 0, sizeof(b2World)); } -// Locked version static void b2CollideTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context) { b2TracyCZoneNC(collide_task, "Collide Task", b2_colorDodgerBlue1, true); @@ -307,13 +294,15 @@ static void b2UpdateTreesTask(int32_t startIndex, int32_t endIndex, uint32_t thr b2TracyCZoneEnd(tree_task); } +// Narrow-phase collision static void b2Collide(b2World* world) { B2_ASSERT(world->workerCount > 0); b2TracyCZoneNC(collide, "Collide", b2_colorDarkOrchid, true); - // Rebuild the collision tree for dynamic and kinematic bodies to keep their query performance good. + // Tasks that can be done in parallel with the narrow-phase + // - rebuild the collision tree for dynamic and kinematic bodies to keep their query performance good if (b2_parallel) { world->userTreeTask = world->enqueueTaskFcn(&b2UpdateTreesTask, 1, 1, world, world->userTaskContext); @@ -328,6 +317,7 @@ static void b2Collide(b2World* world) if (awakeContactCount == 0) { + b2TracyCZoneEnd(collide); return; } @@ -389,6 +379,10 @@ static void b2Collide(b2World* world) else { B2_ASSERT(contact->flags & b2_contactStoppedTouching); + if (contact->colorIndex == B2_NULL_INDEX) + { + contact->colorIndex = B2_NULL_INDEX; + } b2UnlinkContact(world, contact); b2RemoveContactFromGraph(world, contact); @@ -400,32 +394,11 @@ static void b2Collide(b2World* world) } } - // TODO_ERIN clear awake contact array here? - b2TracyCZoneEnd(contact_state); b2TracyCZoneEnd(collide); } -static void b2IslandParallelForTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) -{ - b2TracyCZoneNC(island_task, "Island Task", b2_colorYellow, true); - - b2World* world = taskContext; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= b2Array(world->awakeIslandArray).count); - B2_ASSERT(endIndex <= b2Array(world->awakeIslandArray).count); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - int32_t index = world->awakeIslandArray[i]; - b2SolveIsland(world->islands + index, threadIndex); - } - - b2TracyCZoneEnd(island_task); -} - struct b2ContinuousContext { b2World* world; @@ -629,125 +602,102 @@ static void b2ContinuousParallelForTask(int32_t startIndex, int32_t endIndex, ui b2TracyCZoneEnd(continuous_task); } -#if 0 +// Solve with graph coloring static void b2Solve(b2World* world, b2StepContext* context) { b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); - b2TracyCZoneNC(prepare_islands, "Prepare Islands", b2_colorDarkSalmon, true); b2Timer timer = b2CreateTimer(); - b2Array_Clear(world->splitIslandArray); world->stepId += 1; + b2MergeAwakeIslands(world); + + world->profile.buildIslands = b2GetMillisecondsAndReset(&timer); + + b2TracyCZoneNC(graph_solver, "Graph", b2_colorSeaGreen, true); + // Prepare contact and shape bit-sets int32_t contactCapacity = world->contactPool.capacity; int32_t shapeCapacity = world->shapePool.capacity; + int32_t islandCapacity = world->islandPool.capacity; for (uint32_t i = 0; i < world->workerCount; ++i) { b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); } - b2MergeAwakeIslands(world); - - // Careful, this is modified by island merging - int32_t count = b2Array(world->awakeIslandArray).count; - - int32_t fastBodyCapacity = 0; - b2Island** islands = b2AllocateStackItem(world->stackAllocator, count * sizeof(b2Island*), "island array"); - for (int32_t i = 0; i < count; ++i) - { - b2Island* island = world->islands + world->awakeIslandArray[i]; - B2_ASSERT(island->awakeIndex == i); - islands[i] = island; - fastBodyCapacity += island->bodyCount; - } - - world->fastBodyCapacity = fastBodyCapacity; - world->fastBodyCount = 0; - world->fastBodies = b2AllocateStackItem(world->stackAllocator, fastBodyCapacity * sizeof(int32_t), "fast bodies"); - - // Sort islands to improve task distribution - b2SortIslands(world, islands, count); - - // Now create the island solvers - for (int32_t i = 0; i < count; ++i) - { - b2PrepareIsland(islands[i], context); - } + // Solve constraints using graph coloring + b2SolveGraph(world, context); - b2TracyCZoneEnd(prepare_islands); + b2ValidateNoEnlarged(&world->broadPhase); - world->profile.buildIslands = b2GetMillisecondsAndReset(&timer); + b2TracyCZoneEnd(graph_solver); - b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); + world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); - if (b2_parallel) + b2TracyCZoneNC(awake_islands, "Awake Islands", b2_colorGainsboro, true); { - int32_t minRange = b2_islandMinRange; - void* userIslandTask = world->enqueueTaskFcn(&b2IslandParallelForTask, count, minRange, world, world->userTaskContext); - world->finishTaskFcn(userIslandTask, world->userTaskContext); - - // Finish the user tree task that was queued early in the time step - if (world->userTreeTask != NULL) + b2BitSet* bitSet = &world->taskContextArray[0].awakeIslandBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) { - world->finishTaskFcn(world->userTreeTask, world->userTaskContext); + b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeIslandBitSet); } - world->userTreeTask = NULL; - } - else - { - b2IslandParallelForTask(0, count, 0, world); - } - - b2ValidateNoEnlarged(&world->broadPhase); - - b2TracyCZoneEnd(island_solver); - - world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); - - b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); + int32_t count = b2Array(world->awakeIslandArray).count; + for (int32_t i = 0; i < count; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + if (b2GetBit(bitSet, islandIndex) == true) + { + continue; + } - b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true); + // Put island to sleep + b2Island* island = world->islands + islandIndex; + island->awakeIndex = B2_NULL_INDEX; - // Enlarge broad-phase proxies and build move array - { - b2BroadPhase* broadPhase = &world->broadPhase; + // Remove edges from graph + int32_t contactIndex = island->headContact; + while (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = world->contacts + contactIndex; + b2RemoveContactFromGraph(world, contact); + contactIndex = contact->islandNext; + } - // Gather bits for all shapes that have enlarged AABBs - b2BitSet* bitSet = &world->taskContextArray[0].shapeBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].shapeBitSet); + int32_t jointIndex = island->headJoint; + while (jointIndex != B2_NULL_INDEX) + { + b2Joint* joint = world->joints + jointIndex; + // TODO_JOINT_GRAPH + // b2RemoveJointFromGraph(world, joint); + jointIndex = joint->islandNext; + } } - // Apply shape AABB changes to broadphase. This also create the move array which must be - // ordered to ensure determinism. - b2Shape* shapes = world->shapes; + // Clear awake island array + b2Array_Clear(world->awakeIslandArray); + + // Use bitSet to build awake island array. No need to add edges. uint64_t word; uint32_t wordCount = bitSet->wordCount; uint64_t* bits = bitSet->bits; + int32_t awakeIndex = 0; for (uint32_t k = 0; k < wordCount; ++k) { word = bits[k]; while (word != 0) { uint32_t ctz = b2CTZ(word); - uint32_t shapeIndex = 64 * k + ctz; + uint32_t islandIndex = 64 * k + ctz; - b2Shape* shape = shapes + shapeIndex; - B2_ASSERT(b2ObjectValid(&shape->object)); - if (shape->isFast == false) - { - b2BroadPhase_EnlargeProxy(broadPhase, shape->proxyKey, shape->fatAABB); - } - else - { - // Shape is fast. It's aabb will be enlarged in continuous collision. - b2BufferMove(broadPhase, shape->proxyKey); - } + b2Array_Push(world->awakeIslandArray, islandIndex); + + // Reference index. This tells the island and bodies they are awake. + world->islands[islandIndex].awakeIndex = awakeIndex; + awakeIndex += 1; // Clear the smallest set bit word = word & (word - 1); @@ -755,7 +705,7 @@ static void b2Solve(b2World* world, b2StepContext* context) } } - b2TracyCZoneEnd(enlarge_proxies); + b2TracyCZoneEnd(awake_islands); b2TracyCZoneNC(awake_contacts, "Awake Contacts", b2_colorYellowGreen, true); @@ -801,50 +751,73 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(awake_contacts); - b2TracyCZoneNC(complete_island, "Complete Island", b2_colorBlueViolet, true); - - // Complete islands (reverse order for stack allocator) - // This rebuilds the awake island array and awake contact array - b2Array_Clear(world->awakeIslandArray); - - for (int32_t i = count - 1; i >= 0; --i) + // Finish the user tree task that was queued early in the time step. This must be done before touching the broadphase. + if (b2_parallel) { - b2Island* island = islands[i]; - if (island->object.index == world->splitIslandIndex) - { - b2CompleteBaseSplitIsland(island); - } - else + if (world->userTreeTask != NULL) { - b2CompleteIsland(island); + world->finishTaskFcn(world->userTreeTask, world->userTaskContext); } + + world->userTreeTask = NULL; } - // Handle islands created from splitting - if (world->splitIslandIndex != B2_NULL_INDEX) + b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); + + b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true); + + // Enlarge broad-phase proxies and build move array { - b2Island* baseIsland = world->islands + world->splitIslandIndex; - int32_t splitCount = b2Array(world->splitIslandArray).count; - for (int32_t i = 0; i < splitCount; ++i) + b2BroadPhase* broadPhase = &world->broadPhase; + + // Gather bits for all shapes that have enlarged AABBs + b2BitSet* bitSet = &world->taskContextArray[0].shapeBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) { - int32_t index = world->splitIslandArray[i]; - b2Island* splitIsland = world->islands + index; - b2CompleteSplitIsland(splitIsland); + b2InPlaceUnion(bitSet, &world->taskContextArray[i].shapeBitSet); } - // Done with the base split island. - b2DestroyIsland(baseIsland); - b2FreeObject(&world->islandPool, &baseIsland->object); + // Apply shape AABB changes to broadphase. This also create the move array which must be + // ordered to ensure determinism. + b2Shape* shapes = world->shapes; + uint64_t word; + uint32_t wordCount = bitSet->wordCount; + uint64_t* bits = bitSet->bits; + for (uint32_t k = 0; k < wordCount; ++k) + { + word = bits[k]; + while (word != 0) + { + uint32_t ctz = b2CTZ(word); + uint32_t shapeIndex = 64 * k + ctz; + + b2Shape* shape = shapes + shapeIndex; + B2_ASSERT(b2ObjectValid(&shape->object)); + if (shape->isFast == false) + { + b2BroadPhase_EnlargeProxy(broadPhase, shape->proxyKey, shape->fatAABB); + } + else + { + // Shape is fast. It's aabb will be enlarged in continuous collision. + b2BufferMove(broadPhase, shape->proxyKey); + } + + // Clear the smallest set bit + word = word & (word - 1); + } + } } - b2ValidateBroadphase(&world->broadPhase); + b2TracyCZoneEnd(enlarge_proxies); - b2TracyCZoneEnd(complete_island); + b2ValidateBroadphase(&world->broadPhase); world->profile.broadphase = b2GetMilliseconds(&timer); b2TracyCZoneEnd(broad_phase); +#if 0 b2TracyCZoneNC(continuous_collision, "Continuous", b2_colorDarkGoldenrod, true); // Parallel continuous collision @@ -898,6 +871,7 @@ static void b2Solve(b2World* world, b2StepContext* context) B2_ASSERT(B2_PROXY_TYPE(proxyKey) == b2_dynamicBody); // all fast shapes should already be in the move buffer + b2DynamicTree_EnlargeProxy(tree, proxyId, shape->fatAABB); shapeIndex = shape->nextShapeIndex; @@ -911,218 +885,8 @@ static void b2Solve(b2World* world, b2StepContext* context) world->fastBodies = NULL; world->profile.continuous = b2GetMilliseconds(&timer); - - b2FreeStackItem(world->stackAllocator, islands); - - b2TracyCZoneEnd(solve); -} #endif -// Solve with graph coloring -static void b2Solve(b2World* world, b2StepContext* context) -{ - b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); - - b2Timer timer = b2CreateTimer(); - - world->stepId += 1; - - // Prepare contact and shape bit-sets - int32_t contactCapacity = world->contactPool.capacity; - int32_t shapeCapacity = world->shapePool.capacity; - int32_t islandCapacity = world->islandPool.capacity; - for (uint32_t i = 0; i < world->workerCount; ++i) - { - b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); - b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); - b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); - } - - b2MergeAwakeIslands(world); - - world->profile.buildIslands = 0.0f; - - // TODO_ISLAND task to split island - - b2TracyCZoneNC(graph_solver, "Graph", b2_colorSeaGreen, true); - - b2SolveGraph(world, context); - - b2ValidateNoEnlarged(&world->broadPhase); - - b2TracyCZoneEnd(graph_solver); - - world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); - - b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); - - b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true); - - // Enlarge broad-phase proxies and build move array - { - b2BroadPhase* broadPhase = &world->broadPhase; - - // Gather bits for all shapes that have enlarged AABBs - b2BitSet* bitSet = &world->taskContextArray[0].shapeBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].shapeBitSet); - } - - // Apply shape AABB changes to broadphase. This also create the move array which must be - // ordered to ensure determinism. - b2Shape* shapes = world->shapes; - uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; - for (uint32_t k = 0; k < wordCount; ++k) - { - word = bits[k]; - while (word != 0) - { - uint32_t ctz = b2CTZ(word); - uint32_t shapeIndex = 64 * k + ctz; - - b2Shape* shape = shapes + shapeIndex; - B2_ASSERT(b2ObjectValid(&shape->object)); - if (shape->isFast == false) - { - b2BroadPhase_EnlargeProxy(broadPhase, shape->proxyKey, shape->fatAABB); - } - else - { - // Shape is fast. It's aabb will be enlarged in continuous collision. - b2BufferMove(broadPhase, shape->proxyKey); - } - - // Clear the smallest set bit - word = word & (word - 1); - } - } - } - - b2TracyCZoneEnd(enlarge_proxies); - - b2TracyCZoneNC(awake_islands, "Awake Islands", b2_colorGainsboro, true); - { - b2BitSet* bitSet = &world->taskContextArray[0].awakeIslandBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeIslandBitSet); - } - - int32_t count = b2Array(world->awakeIslandArray).count; - for (int32_t i = 0; i < count; ++i) - { - int32_t islandIndex = world->awakeIslandArray[i]; - if (b2GetBit(bitSet, islandIndex) == true) - { - continue; - } - - // Put island to sleep - b2Island* island = world->islands + islandIndex; - island->awakeIndex = B2_NULL_INDEX; - - // Remove edges from graph - int32_t contactIndex = island->headContact; - while (contactIndex != B2_NULL_INDEX) - { - b2Contact* contact = world->contacts + contactIndex; - b2RemoveContactFromGraph(world, contact); - contactIndex = contact->islandNext; - } - - int32_t jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = world->joints + jointIndex; - // TODO_JOINT_GRAPH - //b2RemoveJointFromGraph(world, joint); - jointIndex = joint->islandNext; - } - } - - // Clear awake island array - b2Array_Clear(world->awakeIslandArray); - - // Use bitSet to build awake island array. No need to add edges. - uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; - int32_t awakeIndex = 0; - for (uint32_t k = 0; k < wordCount; ++k) - { - word = bits[k]; - while (word != 0) - { - uint32_t ctz = b2CTZ(word); - uint32_t islandIndex = 64 * k + ctz; - - b2Array_Push(world->awakeIslandArray, islandIndex); - - // Reference index. This tells the island and bodies they are awake. - world->islands[islandIndex].awakeIndex = awakeIndex; - awakeIndex += 1; - - // Clear the smallest set bit - word = word & (word - 1); - } - } - } - - b2TracyCZoneEnd(awake_islands); - - b2TracyCZoneNC(awake_contacts, "Awake Contacts", b2_colorYellowGreen, true); - - // Build awake contact array - { - b2BitSet* bitSet = &world->taskContextArray[0].awakeContactBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeContactBitSet); - } - - b2Array_Clear(world->awakeContactArray); - - int32_t* contactAwakeIndexArray = world->contactAwakeIndexArray; - - // Iterate the bit set - // The order of the awake contact array doesn't matter, but I don't want duplicates. It is possible - // that body A or body B or both bodies wake the contact. - uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; - for (uint32_t k = 0; k < wordCount; ++k) - { - word = bits[k]; - while (word != 0) - { - uint32_t ctz = b2CTZ(word); - uint32_t contactIndex = 64 * k + ctz; - - B2_ASSERT(contactAwakeIndexArray[contactIndex] == B2_NULL_INDEX); - - // This cache miss is brutal but is necessary to make contact destruction reasonably quick. - contactAwakeIndexArray[contactIndex] = b2Array(world->awakeContactArray).count; - - // This is fast - b2Array_Push(world->awakeContactArray, contactIndex); - - // Clear the smallest set bit - word = word & (word - 1); - } - } - } - - b2TracyCZoneEnd(awake_contacts); - - b2ValidateBroadphase(&world->broadPhase); - - world->profile.broadphase = b2GetMilliseconds(&timer); - - b2TracyCZoneEnd(broad_phase); - world->profile.continuous = 0.0f; b2TracyCZoneEnd(solve); @@ -1202,16 +966,17 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, world->profile.step = b2GetMilliseconds(&stepTimer); - B2_ASSERT(b2GetStackAllocation(world->stackAllocator) == 0); - - // Ensure stack is large enough - b2GrowStack(world->stackAllocator); - if (b2_parallel) { + // This finishes tree rebuild and split island tasks world->finishAllTasksFcn(world->userTaskContext); } + B2_ASSERT(b2GetStackAllocation(world->stackAllocator) == 0); + + // Ensure stack is large enough + b2GrowStack(world->stackAllocator); + b2TracyCZoneEnd(world_step); } diff --git a/src/world.h b/src/world.h index ba737925..5cc0eddb 100644 --- a/src/world.h +++ b/src/world.h @@ -74,12 +74,6 @@ typedef struct b2World // Hot data split from b2Contact int32_t* contactAwakeIndexArray; - // This transient array holds islands created from splitting a larger island. - int32_t* splitIslandArray; - - // Transient index of the island being split this time step. May be B2_NULL_INDEX. - int32_t splitIslandIndex; - // Array of fast bodies that need continuous collision handling int32_t* fastBodies; int32_t fastBodyCapacity; @@ -112,6 +106,8 @@ typedef struct b2World void* userTreeTask; + int32_t splitIslandIndex; + bool enableSleep; bool locked; bool enableWarmStarting; From 74eb27fa7a1fbdd2516ead9ecd5cdb5f1b1078a3 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Thu, 19 Oct 2023 22:47:47 -0700 Subject: [PATCH 40/51] fixed island splitting bug --- src/graph.c | 83 +++++++++++++++++++++++++++-------------------- src/island.c | 7 ++-- src/solver_data.h | 1 - src/world.c | 69 ++++++++++++++++++++++++++------------- 4 files changed, 96 insertions(+), 64 deletions(-) diff --git a/src/graph.c b/src/graph.c index e6d0463e..98aa31d6 100644 --- a/src/graph.c +++ b/src/graph.c @@ -23,6 +23,8 @@ #define B2_AVX 1 +extern bool b2_parallel; + typedef struct b2WorkerContext { b2SolverTaskContext* context; @@ -489,10 +491,11 @@ static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2Sol b2TracyCZoneEnd(integrate_positions); } -static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t workerIndex) +static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) { - b2TracyCZoneNC(finalize_positions, "FinPos", b2_colorViolet, true); + b2TracyCZoneNC(finalize_bodies, "FinalizeBodies", b2_colorViolet, true); + b2SolverTaskContext* context = taskContext; b2World* world = context->world; bool enableSleep = world->enableSleep; b2Body* bodies = world->bodies; @@ -502,9 +505,9 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverT const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; float timeStep = context->timeStep; - b2BitSet* awakeContactBitSet = &world->taskContextArray[workerIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[workerIndex].shapeBitSet; - b2BitSet* awakeIslandBitSet = &world->taskContextArray[workerIndex].awakeIslandBitSet; + b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + b2BitSet* awakeIslandBitSet = &world->taskContextArray[threadIndex].awakeIslandBitSet; B2_ASSERT(startIndex <= endIndex); B2_ASSERT(startIndex <= world->bodyPool.capacity); @@ -516,6 +519,8 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverT b2Body* body = bodies + solverToBodyMap[i]; + // Integrate positions + // TODO_ERIN clamping body->linearVelocity = solverBody->linearVelocity; body->angularVelocity = solverBody->angularVelocity; body->position = b2Add(body->position, solverBody->deltaPosition); @@ -571,6 +576,7 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverT shapeIndex = shape->nextShapeIndex; } + // Wake contacts int32_t contactKey = body->contactList; while (contactKey != B2_NULL_INDEX) { @@ -584,10 +590,10 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, b2SolverT } } - b2TracyCZoneEnd(finalize_positions); + b2TracyCZoneEnd(finalize_bodies); } -static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex, int32_t workerIndex) +static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex) { b2SolverStageType type = stage->type; @@ -617,10 +623,6 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i b2SolveContactsAVX(startIndex, endIndex, context, stage->colorIndex, false); break; - case b2_stageFinalizeBodies: - b2FinalizeBodiesTask(startIndex, endIndex, context, workerIndex); - break; - case b2_stageStoreImpulses: b2StoreImpulsesAVX(startIndex, endIndex, context); break; @@ -664,7 +666,7 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i B2_ASSERT(completedCount < blockCount); - b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, workerIndex); + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex); completedCount += 1; blockIndex += 1; @@ -694,7 +696,7 @@ static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, i break; } - b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex, workerIndex); + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex); completedCount += 1; blockIndex -= 1; } @@ -712,7 +714,7 @@ static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* contex if (blockCount == 1) { - b2ExecuteBlock(stage, context, stage->blocks[0].startIndex, stage->blocks[0].endIndex, 0); + b2ExecuteBlock(stage, context, stage->blocks[0].startIndex, stage->blocks[0].endIndex); } else { @@ -766,7 +768,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2_stageIntegratePositions, b2_stageCalmJoints, b2_stageCalmContacts, - b2_stageFinalizeBodies, b2_stageStoreImpulses */ @@ -856,11 +857,6 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont stageIndex += 1 + activeColorCount; - syncBits = (bodySyncIndex << 16) | stageIndex; - B2_ASSERT(stages[stageIndex].type == b2_stageFinalizeBodies); - b2ExecuteMainStage(stages + stageIndex, context, syncBits); - stageIndex += 1; - syncBits = (constraintSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); b2ExecuteMainStage(stages + stageIndex, context, syncBits); @@ -908,6 +904,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont } } +// TODO_ERIN this comment is out of data // Threading: // 1. build array of awake bodies, maybe copy to contiguous array // 2. parallel-for integrate velocities @@ -1101,8 +1098,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stageCount += 1; // b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions stageCount += 1 + activeColorCount + 1; - // b2_stageFinalizePositions - stageCount += 1; // b2_stageCalmJoints, b2_stageCalmContacts stageCount += 1 + activeColorCount; // b2_stageStoreImpulses @@ -1113,6 +1108,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); + // TODO_ERIN cannot do this in parallel with FinalizeBodies // Split an awake island. This modifies: // - stack allocator // - awake island array @@ -1124,7 +1120,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) void* splitIslandTask = NULL; if (splitIslandIndex != B2_NULL_INDEX) { - extern bool b2_parallel; if (b2_parallel) { splitIslandTask = world->enqueueTaskFcn(&b2SplitIslandTask, 1, 1, world, world->userTaskContext); @@ -1132,6 +1127,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) else { b2SplitIslandTask(0, 1, 0, world); + world->splitIslandIndex = B2_NULL_INDEX; } } @@ -1260,14 +1256,6 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage += 1; } - // Finalize bodies - stage->type = b2_stageFinalizeBodies; - stage->blocks = bodyBlocks; - stage->blockCount = bodyBlockCount; - stage->colorIndex = -1; - stage->completionCount = 0; - stage += 1; - // Store impulses stage->type = b2_stageStoreImpulses; stage->blocks = storeBlocks; @@ -1278,6 +1266,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) B2_ASSERT((int32_t)(stage - stages) == stageCount); + // TODO_ERIN B2_ASSERT(workerCount <= 16); b2WorkerContext workerContext[16]; @@ -1320,19 +1309,41 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) workerContext[i].userTask = world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); } + // Finish split + if (splitIslandTask != NULL) + { + world->finishTaskFcn(splitIslandTask, world->userTaskContext); + world->splitIslandIndex = B2_NULL_INDEX; + } + // Finish solve for (int32_t i = 0; i < workerCount; ++i) { world->finishTaskFcn(workerContext[i].userTask, world->userTaskContext); } - // Finish split - if (splitIslandTask != NULL) + // Prepare contact and shape bit-sets + int32_t contactCapacity = world->contactPool.capacity; + int32_t shapeCapacity = world->shapePool.capacity; + int32_t islandCapacity = world->islandPool.capacity; + for (uint32_t i = 0; i < world->workerCount; ++i) { - world->finishTaskFcn(splitIslandTask, world->userTaskContext); + b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); + } + + // Finalize bodies. Must happen after the constraint solver and after island splitting. + void* finalizeBodiesTask = NULL; + if (b2_parallel) + { + finalizeBodiesTask = world->enqueueTaskFcn(b2FinalizeBodiesTask, awakeBodyCount, 16, &context, world->userTaskContext); + world->finishTaskFcn(finalizeBodiesTask, world->userTaskContext); + } + else + { + b2FinalizeBodiesTask(0, awakeBodyCount, 0, &context); } - - world->splitIslandIndex = B2_NULL_INDEX; b2FreeStackItem(world->stackAllocator, storeBlocks); b2FreeStackItem(world->stackAllocator, graphBlocks); diff --git a/src/island.c b/src/island.c index 24f6cec5..d4f2f769 100644 --- a/src/island.c +++ b/src/island.c @@ -677,12 +677,13 @@ void b2MergeAwakeIslands(b2World* world) #define B2_CONTACT_REMOVE_THRESHOLD 1 -// Split an island because some contacts and/or joints have been removed +// Split an island because some contacts and/or joints have been removed. +// This is called during the constraint solve while islands are not being touched. This uses DFS and touches a lot of memory, +// so it can be quite slow. // Note: contacts/joints connected to static bodies must belong to an island but don't affect island connectivity // Note: static bodies are never in an island // Note: this task interacts with some allocators without locks under the assumption that no other tasks // are interacting with these data structures. -// WARNING: this cannot be done during the narrow-phase because this is when contacts start and stop touching void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context) { b2TracyCZoneNC(split, "Split Island", b2_colorHoneydew2, true); @@ -928,8 +929,6 @@ void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadInde } b2ValidateIsland(island); - island->awakeIndex = b2Array(world->awakeIslandArray).count; - b2Array_Push(world->awakeIslandArray, islandIndex); } b2FreeStackItem(alloc, bodyIndices); diff --git a/src/solver_data.h b/src/solver_data.h index 46b17e6f..d6b48dd5 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -53,7 +53,6 @@ typedef enum b2SolverStageType b2_stageIntegratePositions, b2_stageCalmJoints, b2_stageCalmContacts, - b2_stageFinalizeBodies, b2_stageStoreImpulses } b2SolverStageType; diff --git a/src/world.c b/src/world.c index 1e9dc570..98b569d2 100644 --- a/src/world.c +++ b/src/world.c @@ -617,17 +617,6 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneNC(graph_solver, "Graph", b2_colorSeaGreen, true); - // Prepare contact and shape bit-sets - int32_t contactCapacity = world->contactPool.capacity; - int32_t shapeCapacity = world->shapePool.capacity; - int32_t islandCapacity = world->islandPool.capacity; - for (uint32_t i = 0; i < world->workerCount; ++i) - { - b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); - b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); - b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); - } - // Solve constraints using graph coloring b2SolveGraph(world, context); @@ -638,6 +627,17 @@ static void b2Solve(b2World* world, b2StepContext* context) world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); b2TracyCZoneNC(awake_islands, "Awake Islands", b2_colorGainsboro, true); + + // TODO_ERIN this code is related to body finalization b2SolveGraph. Reorganize? + + // Prepare awake contact bit set so that putting islands to sleep can clear bits + // for the associated contacts. + b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) + { + b2InPlaceUnion(awakeContactBitSet, &world->taskContextArray[i].awakeContactBitSet); + } + { b2BitSet* bitSet = &world->taskContextArray[0].awakeIslandBitSet; for (uint32_t i = 1; i < world->workerCount; ++i) @@ -645,6 +645,10 @@ static void b2Solve(b2World* world, b2StepContext* context) b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeIslandBitSet); } + b2Body* bodies = world->bodies; + b2Contact* contacts = world->contacts; + b2Joint* joints = world->joints; + int32_t count = b2Array(world->awakeIslandArray).count; for (int32_t i = 0; i < count; ++i) { @@ -658,19 +662,44 @@ static void b2Solve(b2World* world, b2StepContext* context) b2Island* island = world->islands + islandIndex; island->awakeIndex = B2_NULL_INDEX; + // Put contacts to sleep. Remember only touching contacts are in the island. + // So a body may have more contacts than those in the island. + // This is expensive on the main thread, but this only happens when an island goes + // to sleep. + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = bodies + bodyIndex; + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) + { + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // IMPORTANT: clear awake contact bit + b2ClearBit(awakeContactBitSet, contactIndex); + + contactKey = contact->edges[edgeIndex].nextKey; + } + + bodyIndex = body->islandNext; + } + // Remove edges from graph int32_t contactIndex = island->headContact; while (contactIndex != B2_NULL_INDEX) { - b2Contact* contact = world->contacts + contactIndex; + b2Contact* contact = contacts + contactIndex; b2RemoveContactFromGraph(world, contact); + contactIndex = contact->islandNext; } int32_t jointIndex = island->headJoint; while (jointIndex != B2_NULL_INDEX) { - b2Joint* joint = world->joints + jointIndex; + b2Joint* joint = joints + jointIndex; // TODO_JOINT_GRAPH // b2RemoveJointFromGraph(world, joint); jointIndex = joint->islandNext; @@ -711,12 +740,6 @@ static void b2Solve(b2World* world, b2StepContext* context) // Build awake contact array { - b2BitSet* bitSet = &world->taskContextArray[0].awakeContactBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeContactBitSet); - } - b2Array_Clear(world->awakeContactArray); int32_t* contactAwakeIndexArray = world->contactAwakeIndexArray; @@ -725,8 +748,8 @@ static void b2Solve(b2World* world, b2StepContext* context) // The order of the awake contact array doesn't matter, but I don't want duplicates. It is possible // that body A or body B or both bodies wake the contact. uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; + uint32_t wordCount = awakeContactBitSet->wordCount; + uint64_t* bits = awakeContactBitSet->bits; for (uint32_t k = 0; k < wordCount; ++k) { word = bits[k]; @@ -757,9 +780,8 @@ static void b2Solve(b2World* world, b2StepContext* context) if (world->userTreeTask != NULL) { world->finishTaskFcn(world->userTreeTask, world->userTaskContext); + world->userTreeTask = NULL; } - - world->userTreeTask = NULL; } b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); @@ -817,6 +839,7 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(broad_phase); + // TODO_ERIN continuous #if 0 b2TracyCZoneNC(continuous_collision, "Continuous", b2_colorDarkGoldenrod, true); From a9f22ac3e9d5f96f75e384c7acf32dc5159689d7 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Fri, 20 Oct 2023 15:44:31 -0700 Subject: [PATCH 41/51] color stats --- .clang-format | 1 + include/box2d/color.h | 902 +++++++++--------- include/box2d/constants.h | 3 + include/box2d/timer.h | 1 + .../collection/benchmark_many_tumblers.cpp | 2 +- samples/sample.cpp | 40 +- src/graph.c | 15 +- src/graph.h | 5 +- src/world.c | 4 + 9 files changed, 502 insertions(+), 471 deletions(-) diff --git a/.clang-format b/.clang-format index ed8cfc26..9aa3635a 100644 --- a/.clang-format +++ b/.clang-format @@ -11,6 +11,7 @@ BreakBeforeBraces: Custom BraceWrapping: AfterCaseLabel: true AfterUnion: true + BeforeWhile: true ColumnLimit: 140 PointerAlignment: Left diff --git a/include/box2d/color.h b/include/box2d/color.h index eed9e4a8..f1b60890 100644 --- a/include/box2d/color.h +++ b/include/box2d/color.h @@ -11,479 +11,171 @@ typedef struct b2Color enum b2HexColor { - b2_colorSnow = 0xfffafa, - b2_colorGhostWhite = 0xf8f8ff, - b2_colorWhiteSmoke = 0xf5f5f5, - b2_colorGainsboro = 0xdcdcdc, - b2_colorFloralWhite = 0xfffaf0, - b2_colorOldLace = 0xfdf5e6, - b2_colorLinen = 0xfaf0e6, - b2_colorAntiqueWhite = 0xfaebd7, - b2_colorPapayaWhip = 0xffefd5, - b2_colorBlanchedAlmond = 0xffebcd, - b2_colorBisque = 0xffe4c4, - b2_colorPeachPuff = 0xffdab9, - b2_colorNavajoWhite = 0xffdead, - b2_colorMoccasin = 0xffe4b5, - b2_colorCornsilk = 0xfff8dc, - b2_colorIvory = 0xfffff0, - b2_colorLemonChiffon = 0xfffacd, - b2_colorSeashell = 0xfff5ee, - b2_colorHoneydew = 0xf0fff0, - b2_colorMintCream = 0xf5fffa, - b2_colorAzure = 0xf0ffff, b2_colorAliceBlue = 0xf0f8ff, - b2_colorLavender = 0xe6e6fa, - b2_colorLavenderBlush = 0xfff0f5, - b2_colorMistyRose = 0xffe4e1, - b2_colorWhite = 0xffffff, - b2_colorBlack = 0x000000, - b2_colorDarkSlateGray = 0x2f4f4f, - b2_colorDimGray = 0x696969, - b2_colorSlateGray = 0x708090, - b2_colorLightSlateGray = 0x778899, - b2_colorGray = 0xbebebe, - b2_colorX11Gray = 0xbebebe, - b2_colorWebGray = 0x808080, - b2_colorLightGray = 0xd3d3d3, - b2_colorMidnightBlue = 0x191970, - b2_colorNavy = 0x000080, - b2_colorNavyBlue = 0x000080, - b2_colorCornflowerBlue = 0x6495ed, - b2_colorDarkSlateBlue = 0x483d8b, - b2_colorSlateBlue = 0x6a5acd, - b2_colorMediumSlateBlue = 0x7b68ee, - b2_colorLightSlateBlue = 0x8470ff, - b2_colorMediumBlue = 0x0000cd, - b2_colorRoyalBlue = 0x4169e1, - b2_colorBlue = 0x0000ff, - b2_colorDodgerBlue = 0x1e90ff, - b2_colorDeepSkyBlue = 0x00bfff, - b2_colorSkyBlue = 0x87ceeb, - b2_colorLightSkyBlue = 0x87cefa, - b2_colorSteelBlue = 0x4682b4, - b2_colorLightSteelBlue = 0xb0c4de, - b2_colorLightBlue = 0xadd8e6, - b2_colorPowderBlue = 0xb0e0e6, - b2_colorPaleTurquoise = 0xafeeee, - b2_colorDarkTurquoise = 0x00ced1, - b2_colorMediumTurquoise = 0x48d1cc, - b2_colorTurquoise = 0x40e0d0, - b2_colorCyan = 0x00ffff, - b2_colorAqua = 0x00ffff, - b2_colorLightCyan = 0xe0ffff, - b2_colorCadetBlue = 0x5f9ea0, - b2_colorMediumAquamarine = 0x66cdaa, - b2_colorAquamarine = 0x7fffd4, - b2_colorDarkGreen = 0x006400, - b2_colorDarkOliveGreen = 0x556b2f, - b2_colorDarkSeaGreen = 0x8fbc8f, - b2_colorSeaGreen = 0x2e8b57, - b2_colorMediumSeaGreen = 0x3cb371, - b2_colorLightSeaGreen = 0x20b2aa, - b2_colorPaleGreen = 0x98fb98, - b2_colorSpringGreen = 0x00ff7f, - b2_colorLawnGreen = 0x7cfc00, - b2_colorGreen = 0x00ff00, - b2_colorLime = 0x00ff00, - b2_colorX11Green = 0x00ff00, - b2_colorWebGreen = 0x008000, - b2_colorChartreuse = 0x7fff00, - b2_colorMediumSpringGreen = 0x00fa9a, - b2_colorGreenYellow = 0xadff2f, - b2_colorLimeGreen = 0x32cd32, - b2_colorYellowGreen = 0x9acd32, - b2_colorForestGreen = 0x228b22, - b2_colorOliveDrab = 0x6b8e23, - b2_colorDarkKhaki = 0xbdb76b, - b2_colorKhaki = 0xf0e68c, - b2_colorPaleGoldenrod = 0xeee8aa, - b2_colorLightGoldenrodYellow = 0xfafad2, - b2_colorLightYellow = 0xffffe0, - b2_colorYellow = 0xffff00, - b2_colorGold = 0xffd700, - b2_colorLightGoldenrod = 0xeedd82, - b2_colorGoldenrod = 0xdaa520, - b2_colorDarkGoldenrod = 0xb8860b, - b2_colorRosyBrown = 0xbc8f8f, - b2_colorIndianRed = 0xcd5c5c, - b2_colorSaddleBrown = 0x8b4513, - b2_colorSienna = 0xa0522d, - b2_colorPeru = 0xcd853f, - b2_colorBurlywood = 0xdeb887, - b2_colorBeige = 0xf5f5dc, - b2_colorWheat = 0xf5deb3, - b2_colorSandyBrown = 0xf4a460, - b2_colorTan = 0xd2b48c, - b2_colorChocolate = 0xd2691e, - b2_colorFirebrick = 0xb22222, - b2_colorBrown = 0xa52a2a, - b2_colorDarkSalmon = 0xe9967a, - b2_colorSalmon = 0xfa8072, - b2_colorLightSalmon = 0xffa07a, - b2_colorOrange = 0xffa500, - b2_colorDarkOrange = 0xff8c00, - b2_colorCoral = 0xff7f50, - b2_colorLightCoral = 0xf08080, - b2_colorTomato = 0xff6347, - b2_colorOrangeRed = 0xff4500, - b2_colorRed = 0xff0000, - b2_colorHotPink = 0xff69b4, - b2_colorDeepPink = 0xff1493, - b2_colorPink = 0xffc0cb, - b2_colorLightPink = 0xffb6c1, - b2_colorPaleVioletRed = 0xdb7093, - b2_colorMaroon = 0xb03060, - b2_colorX11Maroon = 0xb03060, - b2_colorWebMaroon = 0x800000, - b2_colorMediumVioletRed = 0xc71585, - b2_colorVioletRed = 0xd02090, - b2_colorMagenta = 0xff00ff, - b2_colorFuchsia = 0xff00ff, - b2_colorViolet = 0xee82ee, - b2_colorPlum = 0xdda0dd, - b2_colorOrchid = 0xda70d6, - b2_colorMediumOrchid = 0xba55d3, - b2_colorDarkOrchid = 0x9932cc, - b2_colorDarkViolet = 0x9400d3, - b2_colorBlueViolet = 0x8a2be2, - b2_colorPurple = 0xa020f0, - b2_colorX11Purple = 0xa020f0, - b2_colorWebPurple = 0x800080, - b2_colorMediumPurple = 0x9370db, - b2_colorThistle = 0xd8bfd8, - b2_colorSnow1 = 0xfffafa, - b2_colorSnow2 = 0xeee9e9, - b2_colorSnow3 = 0xcdc9c9, - b2_colorSnow4 = 0x8b8989, - b2_colorSeashell1 = 0xfff5ee, - b2_colorSeashell2 = 0xeee5de, - b2_colorSeashell3 = 0xcdc5bf, - b2_colorSeashell4 = 0x8b8682, + b2_colorAntiqueWhite = 0xfaebd7, b2_colorAntiqueWhite1 = 0xffefdb, b2_colorAntiqueWhite2 = 0xeedfcc, b2_colorAntiqueWhite3 = 0xcdc0b0, b2_colorAntiqueWhite4 = 0x8b8378, - b2_colorBisque1 = 0xffe4c4, - b2_colorBisque2 = 0xeed5b7, - b2_colorBisque3 = 0xcdb79e, - b2_colorBisque4 = 0x8b7d6b, - b2_colorPeachPuff1 = 0xffdab9, - b2_colorPeachPuff2 = 0xeecbad, - b2_colorPeachPuff3 = 0xcdaf95, - b2_colorPeachPuff4 = 0x8b7765, - b2_colorNavajoWhite1 = 0xffdead, - b2_colorNavajoWhite2 = 0xeecfa1, - b2_colorNavajoWhite3 = 0xcdb38b, - b2_colorNavajoWhite4 = 0x8b795e, - b2_colorLemonChiffon1 = 0xfffacd, - b2_colorLemonChiffon2 = 0xeee9bf, - b2_colorLemonChiffon3 = 0xcdc9a5, - b2_colorLemonChiffon4 = 0x8b8970, - b2_colorCornsilk1 = 0xfff8dc, - b2_colorCornsilk2 = 0xeee8cd, - b2_colorCornsilk3 = 0xcdc8b1, - b2_colorCornsilk4 = 0x8b8878, - b2_colorIvory1 = 0xfffff0, - b2_colorIvory2 = 0xeeeee0, - b2_colorIvory3 = 0xcdcdc1, - b2_colorIvory4 = 0x8b8b83, - b2_colorHoneydew1 = 0xf0fff0, - b2_colorHoneydew2 = 0xe0eee0, - b2_colorHoneydew3 = 0xc1cdc1, - b2_colorHoneydew4 = 0x838b83, - b2_colorLavenderBlush1 = 0xfff0f5, - b2_colorLavenderBlush2 = 0xeee0e5, - b2_colorLavenderBlush3 = 0xcdc1c5, - b2_colorLavenderBlush4 = 0x8b8386, - b2_colorMistyRose1 = 0xffe4e1, - b2_colorMistyRose2 = 0xeed5d2, - b2_colorMistyRose3 = 0xcdb7b5, - b2_colorMistyRose4 = 0x8b7d7b, + b2_colorAqua = 0x00ffff, + b2_colorAquamarine = 0x7fffd4, + b2_colorAquamarine1 = 0x7fffd4, + b2_colorAquamarine2 = 0x76eec6, + b2_colorAquamarine3 = 0x66cdaa, + b2_colorAquamarine4 = 0x458b74, + b2_colorAzure = 0xf0ffff, b2_colorAzure1 = 0xf0ffff, b2_colorAzure2 = 0xe0eeee, b2_colorAzure3 = 0xc1cdcd, b2_colorAzure4 = 0x838b8b, - b2_colorSlateBlue1 = 0x836fff, - b2_colorSlateBlue2 = 0x7a67ee, - b2_colorSlateBlue3 = 0x6959cd, - b2_colorSlateBlue4 = 0x473c8b, - b2_colorRoyalBlue1 = 0x4876ff, - b2_colorRoyalBlue2 = 0x436eee, - b2_colorRoyalBlue3 = 0x3a5fcd, - b2_colorRoyalBlue4 = 0x27408b, + b2_colorBeige = 0xf5f5dc, + b2_colorBisque = 0xffe4c4, + b2_colorBisque1 = 0xffe4c4, + b2_colorBisque2 = 0xeed5b7, + b2_colorBisque3 = 0xcdb79e, + b2_colorBisque4 = 0x8b7d6b, + b2_colorBlack = 0x000000, + b2_colorBlanchedAlmond = 0xffebcd, + b2_colorBlue = 0x0000ff, b2_colorBlue1 = 0x0000ff, b2_colorBlue2 = 0x0000ee, b2_colorBlue3 = 0x0000cd, b2_colorBlue4 = 0x00008b, - b2_colorDodgerBlue1 = 0x1e90ff, - b2_colorDodgerBlue2 = 0x1c86ee, - b2_colorDodgerBlue3 = 0x1874cd, - b2_colorDodgerBlue4 = 0x104e8b, - b2_colorSteelBlue1 = 0x63b8ff, - b2_colorSteelBlue2 = 0x5cacee, - b2_colorSteelBlue3 = 0x4f94cd, - b2_colorSteelBlue4 = 0x36648b, - b2_colorDeepSkyBlue1 = 0x00bfff, - b2_colorDeepSkyBlue2 = 0x00b2ee, - b2_colorDeepSkyBlue3 = 0x009acd, - b2_colorDeepSkyBlue4 = 0x00688b, - b2_colorSkyBlue1 = 0x87ceff, - b2_colorSkyBlue2 = 0x7ec0ee, - b2_colorSkyBlue3 = 0x6ca6cd, - b2_colorSkyBlue4 = 0x4a708b, - b2_colorLightSkyBlue1 = 0xb0e2ff, - b2_colorLightSkyBlue2 = 0xa4d3ee, - b2_colorLightSkyBlue3 = 0x8db6cd, - b2_colorLightSkyBlue4 = 0x607b8b, - b2_colorSlateGray1 = 0xc6e2ff, - b2_colorSlateGray2 = 0xb9d3ee, - b2_colorSlateGray3 = 0x9fb6cd, - b2_colorSlateGray4 = 0x6c7b8b, - b2_colorLightSteelBlue1 = 0xcae1ff, - b2_colorLightSteelBlue2 = 0xbcd2ee, - b2_colorLightSteelBlue3 = 0xa2b5cd, - b2_colorLightSteelBlue4 = 0x6e7b8b, - b2_colorLightBlue1 = 0xbfefff, - b2_colorLightBlue2 = 0xb2dfee, - b2_colorLightBlue3 = 0x9ac0cd, - b2_colorLightBlue4 = 0x68838b, - b2_colorLightCyan1 = 0xe0ffff, - b2_colorLightCyan2 = 0xd1eeee, - b2_colorLightCyan3 = 0xb4cdcd, - b2_colorLightCyan4 = 0x7a8b8b, - b2_colorPaleTurquoise1 = 0xbbffff, - b2_colorPaleTurquoise2 = 0xaeeeee, - b2_colorPaleTurquoise3 = 0x96cdcd, - b2_colorPaleTurquoise4 = 0x668b8b, - b2_colorCadetBlue1 = 0x98f5ff, - b2_colorCadetBlue2 = 0x8ee5ee, - b2_colorCadetBlue3 = 0x7ac5cd, - b2_colorCadetBlue4 = 0x53868b, - b2_colorTurquoise1 = 0x00f5ff, - b2_colorTurquoise2 = 0x00e5ee, - b2_colorTurquoise3 = 0x00c5cd, - b2_colorTurquoise4 = 0x00868b, - b2_colorCyan1 = 0x00ffff, - b2_colorCyan2 = 0x00eeee, - b2_colorCyan3 = 0x00cdcd, - b2_colorCyan4 = 0x008b8b, - b2_colorDarkSlateGray1 = 0x97ffff, - b2_colorDarkSlateGray2 = 0x8deeee, - b2_colorDarkSlateGray3 = 0x79cdcd, - b2_colorDarkSlateGray4 = 0x528b8b, - b2_colorAquamarine1 = 0x7fffd4, - b2_colorAquamarine2 = 0x76eec6, - b2_colorAquamarine3 = 0x66cdaa, - b2_colorAquamarine4 = 0x458b74, - b2_colorDarkSeaGreen1 = 0xc1ffc1, - b2_colorDarkSeaGreen2 = 0xb4eeb4, - b2_colorDarkSeaGreen3 = 0x9bcd9b, - b2_colorDarkSeaGreen4 = 0x698b69, - b2_colorSeaGreen1 = 0x54ff9f, - b2_colorSeaGreen2 = 0x4eee94, - b2_colorSeaGreen3 = 0x43cd80, - b2_colorSeaGreen4 = 0x2e8b57, - b2_colorPaleGreen1 = 0x9aff9a, - b2_colorPaleGreen2 = 0x90ee90, - b2_colorPaleGreen3 = 0x7ccd7c, - b2_colorPaleGreen4 = 0x548b54, - b2_colorSpringGreen1 = 0x00ff7f, - b2_colorSpringGreen2 = 0x00ee76, - b2_colorSpringGreen3 = 0x00cd66, - b2_colorSpringGreen4 = 0x008b45, - b2_colorGreen1 = 0x00ff00, - b2_colorGreen2 = 0x00ee00, - b2_colorGreen3 = 0x00cd00, - b2_colorGreen4 = 0x008b00, - b2_colorChartreuse1 = 0x7fff00, - b2_colorChartreuse2 = 0x76ee00, - b2_colorChartreuse3 = 0x66cd00, - b2_colorChartreuse4 = 0x458b00, - b2_colorOliveDrab1 = 0xc0ff3e, - b2_colorOliveDrab2 = 0xb3ee3a, - b2_colorOliveDrab3 = 0x9acd32, - b2_colorOliveDrab4 = 0x698b22, - b2_colorDarkOliveGreen1 = 0xcaff70, - b2_colorDarkOliveGreen2 = 0xbcee68, - b2_colorDarkOliveGreen3 = 0xa2cd5a, - b2_colorDarkOliveGreen4 = 0x6e8b3d, - b2_colorKhaki1 = 0xfff68f, - b2_colorKhaki2 = 0xeee685, - b2_colorKhaki3 = 0xcdc673, - b2_colorKhaki4 = 0x8b864e, - b2_colorLightGoldenrod1 = 0xffec8b, - b2_colorLightGoldenrod2 = 0xeedc82, - b2_colorLightGoldenrod3 = 0xcdbe70, - b2_colorLightGoldenrod4 = 0x8b814c, - b2_colorLightYellow1 = 0xffffe0, - b2_colorLightYellow2 = 0xeeeed1, - b2_colorLightYellow3 = 0xcdcdb4, - b2_colorLightYellow4 = 0x8b8b7a, - b2_colorYellow1 = 0xffff00, - b2_colorYellow2 = 0xeeee00, - b2_colorYellow3 = 0xcdcd00, - b2_colorYellow4 = 0x8b8b00, - b2_colorGold1 = 0xffd700, - b2_colorGold2 = 0xeec900, - b2_colorGold3 = 0xcdad00, - b2_colorGold4 = 0x8b7500, - b2_colorGoldenrod1 = 0xffc125, - b2_colorGoldenrod2 = 0xeeb422, - b2_colorGoldenrod3 = 0xcd9b1d, - b2_colorGoldenrod4 = 0x8b6914, - b2_colorDarkGoldenrod1 = 0xffb90f, - b2_colorDarkGoldenrod2 = 0xeead0e, - b2_colorDarkGoldenrod3 = 0xcd950c, - b2_colorDarkGoldenrod4 = 0x8b6508, - b2_colorRosyBrown1 = 0xffc1c1, - b2_colorRosyBrown2 = 0xeeb4b4, - b2_colorRosyBrown3 = 0xcd9b9b, - b2_colorRosyBrown4 = 0x8b6969, - b2_colorIndianRed1 = 0xff6a6a, - b2_colorIndianRed2 = 0xee6363, - b2_colorIndianRed3 = 0xcd5555, - b2_colorIndianRed4 = 0x8b3a3a, - b2_colorSienna1 = 0xff8247, - b2_colorSienna2 = 0xee7942, - b2_colorSienna3 = 0xcd6839, - b2_colorSienna4 = 0x8b4726, + b2_colorBlueViolet = 0x8a2be2, + b2_colorBrown = 0xa52a2a, + b2_colorBrown1 = 0xff4040, + b2_colorBrown2 = 0xee3b3b, + b2_colorBrown3 = 0xcd3333, + b2_colorBrown4 = 0x8b2323, + b2_colorBurlywood = 0xdeb887, b2_colorBurlywood1 = 0xffd39b, b2_colorBurlywood2 = 0xeec591, b2_colorBurlywood3 = 0xcdaa7d, b2_colorBurlywood4 = 0x8b7355, - b2_colorWheat1 = 0xffe7ba, - b2_colorWheat2 = 0xeed8ae, - b2_colorWheat3 = 0xcdba96, - b2_colorWheat4 = 0x8b7e66, - b2_colorTan1 = 0xffa54f, - b2_colorTan2 = 0xee9a49, - b2_colorTan3 = 0xcd853f, - b2_colorTan4 = 0x8b5a2b, + b2_colorCadetBlue = 0x5f9ea0, + b2_colorCadetBlue1 = 0x98f5ff, + b2_colorCadetBlue2 = 0x8ee5ee, + b2_colorCadetBlue3 = 0x7ac5cd, + b2_colorCadetBlue4 = 0x53868b, + b2_colorChartreuse = 0x7fff00, + b2_colorChartreuse1 = 0x7fff00, + b2_colorChartreuse2 = 0x76ee00, + b2_colorChartreuse3 = 0x66cd00, + b2_colorChartreuse4 = 0x458b00, + b2_colorChocolate = 0xd2691e, b2_colorChocolate1 = 0xff7f24, b2_colorChocolate2 = 0xee7621, b2_colorChocolate3 = 0xcd661d, b2_colorChocolate4 = 0x8b4513, - b2_colorFirebrick1 = 0xff3030, - b2_colorFirebrick2 = 0xee2c2c, - b2_colorFirebrick3 = 0xcd2626, - b2_colorFirebrick4 = 0x8b1a1a, - b2_colorBrown1 = 0xff4040, - b2_colorBrown2 = 0xee3b3b, - b2_colorBrown3 = 0xcd3333, - b2_colorBrown4 = 0x8b2323, - b2_colorSalmon1 = 0xff8c69, - b2_colorSalmon2 = 0xee8262, - b2_colorSalmon3 = 0xcd7054, - b2_colorSalmon4 = 0x8b4c39, - b2_colorLightSalmon1 = 0xffa07a, - b2_colorLightSalmon2 = 0xee9572, - b2_colorLightSalmon3 = 0xcd8162, - b2_colorLightSalmon4 = 0x8b5742, - b2_colorOrange1 = 0xffa500, - b2_colorOrange2 = 0xee9a00, - b2_colorOrange3 = 0xcd8500, - b2_colorOrange4 = 0x8b5a00, - b2_colorDarkOrange1 = 0xff7f00, - b2_colorDarkOrange2 = 0xee7600, - b2_colorDarkOrange3 = 0xcd6600, - b2_colorDarkOrange4 = 0x8b4500, + b2_colorCoral = 0xff7f50, b2_colorCoral1 = 0xff7256, b2_colorCoral2 = 0xee6a50, b2_colorCoral3 = 0xcd5b45, b2_colorCoral4 = 0x8b3e2f, - b2_colorTomato1 = 0xff6347, - b2_colorTomato2 = 0xee5c42, - b2_colorTomato3 = 0xcd4f39, - b2_colorTomato4 = 0x8b3626, - b2_colorOrangeRed1 = 0xff4500, - b2_colorOrangeRed2 = 0xee4000, - b2_colorOrangeRed3 = 0xcd3700, - b2_colorOrangeRed4 = 0x8b2500, - b2_colorRed1 = 0xff0000, - b2_colorRed2 = 0xee0000, - b2_colorRed3 = 0xcd0000, - b2_colorRed4 = 0x8b0000, - b2_colorDeepPink1 = 0xff1493, - b2_colorDeepPink2 = 0xee1289, - b2_colorDeepPink3 = 0xcd1076, - b2_colorDeepPink4 = 0x8b0a50, - b2_colorHotPink1 = 0xff6eb4, - b2_colorHotPink2 = 0xee6aa7, - b2_colorHotPink3 = 0xcd6090, - b2_colorHotPink4 = 0x8b3a62, - b2_colorPink1 = 0xffb5c5, - b2_colorPink2 = 0xeea9b8, - b2_colorPink3 = 0xcd919e, - b2_colorPink4 = 0x8b636c, - b2_colorLightPink1 = 0xffaeb9, - b2_colorLightPink2 = 0xeea2ad, - b2_colorLightPink3 = 0xcd8c95, - b2_colorLightPink4 = 0x8b5f65, - b2_colorPaleVioletRed1 = 0xff82ab, - b2_colorPaleVioletRed2 = 0xee799f, - b2_colorPaleVioletRed3 = 0xcd6889, - b2_colorPaleVioletRed4 = 0x8b475d, - b2_colorMaroon1 = 0xff34b3, - b2_colorMaroon2 = 0xee30a7, - b2_colorMaroon3 = 0xcd2990, - b2_colorMaroon4 = 0x8b1c62, - b2_colorVioletRed1 = 0xff3e96, - b2_colorVioletRed2 = 0xee3a8c, - b2_colorVioletRed3 = 0xcd3278, - b2_colorVioletRed4 = 0x8b2252, - b2_colorMagenta1 = 0xff00ff, - b2_colorMagenta2 = 0xee00ee, - b2_colorMagenta3 = 0xcd00cd, - b2_colorMagenta4 = 0x8b008b, - b2_colorOrchid1 = 0xff83fa, - b2_colorOrchid2 = 0xee7ae9, - b2_colorOrchid3 = 0xcd69c9, - b2_colorOrchid4 = 0x8b4789, - b2_colorPlum1 = 0xffbbff, - b2_colorPlum2 = 0xeeaeee, - b2_colorPlum3 = 0xcd96cd, - b2_colorPlum4 = 0x8b668b, - b2_colorMediumOrchid1 = 0xe066ff, - b2_colorMediumOrchid2 = 0xd15fee, - b2_colorMediumOrchid3 = 0xb452cd, - b2_colorMediumOrchid4 = 0x7a378b, + b2_colorCornflowerBlue = 0x6495ed, + b2_colorCornsilk = 0xfff8dc, + b2_colorCornsilk1 = 0xfff8dc, + b2_colorCornsilk2 = 0xeee8cd, + b2_colorCornsilk3 = 0xcdc8b1, + b2_colorCornsilk4 = 0x8b8878, + b2_colorCrimson = 0xdc143c, + b2_colorCyan = 0x00ffff, + b2_colorCyan1 = 0x00ffff, + b2_colorCyan2 = 0x00eeee, + b2_colorCyan3 = 0x00cdcd, + b2_colorCyan4 = 0x008b8b, + b2_colorDarkBlue = 0x00008b, + b2_colorDarkCyan = 0x008b8b, + b2_colorDarkGoldenrod = 0xb8860b, + b2_colorDarkGoldenrod1 = 0xffb90f, + b2_colorDarkGoldenrod2 = 0xeead0e, + b2_colorDarkGoldenrod3 = 0xcd950c, + b2_colorDarkGoldenrod4 = 0x8b6508, + b2_colorDarkGray = 0xa9a9a9, + b2_colorDarkGreen = 0x006400, + b2_colorDarkKhaki = 0xbdb76b, + b2_colorDarkMagenta = 0x8b008b, + b2_colorDarkOliveGreen = 0x556b2f, + b2_colorDarkOliveGreen1 = 0xcaff70, + b2_colorDarkOliveGreen2 = 0xbcee68, + b2_colorDarkOliveGreen3 = 0xa2cd5a, + b2_colorDarkOliveGreen4 = 0x6e8b3d, + b2_colorDarkOrange = 0xff8c00, + b2_colorDarkOrange1 = 0xff7f00, + b2_colorDarkOrange2 = 0xee7600, + b2_colorDarkOrange3 = 0xcd6600, + b2_colorDarkOrange4 = 0x8b4500, + b2_colorDarkOrchid = 0x9932cc, b2_colorDarkOrchid1 = 0xbf3eff, b2_colorDarkOrchid2 = 0xb23aee, b2_colorDarkOrchid3 = 0x9a32cd, b2_colorDarkOrchid4 = 0x68228b, - b2_colorPurple1 = 0x9b30ff, - b2_colorPurple2 = 0x912cee, - b2_colorPurple3 = 0x7d26cd, - b2_colorPurple4 = 0x551a8b, - b2_colorMediumPurple1 = 0xab82ff, - b2_colorMediumPurple2 = 0x9f79ee, - b2_colorMediumPurple3 = 0x8968cd, - b2_colorMediumPurple4 = 0x5d478b, - b2_colorThistle1 = 0xffe1ff, - b2_colorThistle2 = 0xeed2ee, - b2_colorThistle3 = 0xcdb5cd, - b2_colorThistle4 = 0x8b7b8b, - b2_colorGray0 = 0x000000, - b2_colorGray1 = 0x030303, - b2_colorGray2 = 0x050505, - b2_colorGray3 = 0x080808, - b2_colorGray4 = 0x0a0a0a, - b2_colorGray5 = 0x0d0d0d, - b2_colorGray6 = 0x0f0f0f, - b2_colorGray7 = 0x121212, - b2_colorGray8 = 0x141414, - b2_colorGray9 = 0x171717, - b2_colorGray10 = 0x1a1a1a, - b2_colorGray11 = 0x1c1c1c, - b2_colorGray12 = 0x1f1f1f, - b2_colorGray13 = 0x212121, - b2_colorGray14 = 0x242424, - b2_colorGray15 = 0x262626, - b2_colorGray16 = 0x292929, - b2_colorGray17 = 0x2b2b2b, + b2_colorDarkRed = 0x8b0000, + b2_colorDarkSalmon = 0xe9967a, + b2_colorDarkSeaGreen = 0x8fbc8f, + b2_colorDarkSeaGreen1 = 0xc1ffc1, + b2_colorDarkSeaGreen2 = 0xb4eeb4, + b2_colorDarkSeaGreen3 = 0x9bcd9b, + b2_colorDarkSeaGreen4 = 0x698b69, + b2_colorDarkSlateBlue = 0x483d8b, + b2_colorDarkSlateGray = 0x2f4f4f, + b2_colorDarkSlateGray1 = 0x97ffff, + b2_colorDarkSlateGray2 = 0x8deeee, + b2_colorDarkSlateGray3 = 0x79cdcd, + b2_colorDarkSlateGray4 = 0x528b8b, + b2_colorDarkTurquoise = 0x00ced1, + b2_colorDarkViolet = 0x9400d3, + b2_colorDeepPink = 0xff1493, + b2_colorDeepPink1 = 0xff1493, + b2_colorDeepPink2 = 0xee1289, + b2_colorDeepPink3 = 0xcd1076, + b2_colorDeepPink4 = 0x8b0a50, + b2_colorDeepSkyBlue = 0x00bfff, + b2_colorDeepSkyBlue1 = 0x00bfff, + b2_colorDeepSkyBlue2 = 0x00b2ee, + b2_colorDeepSkyBlue3 = 0x009acd, + b2_colorDeepSkyBlue4 = 0x00688b, + b2_colorDimGray = 0x696969, + b2_colorDodgerBlue = 0x1e90ff, + b2_colorDodgerBlue1 = 0x1e90ff, + b2_colorDodgerBlue2 = 0x1c86ee, + b2_colorDodgerBlue3 = 0x1874cd, + b2_colorDodgerBlue4 = 0x104e8b, + b2_colorFirebrick = 0xb22222, + b2_colorFirebrick1 = 0xff3030, + b2_colorFirebrick2 = 0xee2c2c, + b2_colorFirebrick3 = 0xcd2626, + b2_colorFirebrick4 = 0x8b1a1a, + b2_colorFloralWhite = 0xfffaf0, + b2_colorForestGreen = 0x228b22, + b2_colorFuchsia = 0xff00ff, + b2_colorGainsboro = 0xdcdcdc, + b2_colorGhostWhite = 0xf8f8ff, + b2_colorGold = 0xffd700, + b2_colorGold1 = 0xffd700, + b2_colorGold2 = 0xeec900, + b2_colorGold3 = 0xcdad00, + b2_colorGold4 = 0x8b7500, + b2_colorGoldenrod = 0xdaa520, + b2_colorGoldenrod1 = 0xffc125, + b2_colorGoldenrod2 = 0xeeb422, + b2_colorGoldenrod3 = 0xcd9b1d, + b2_colorGoldenrod4 = 0x8b6914, + b2_colorGray = 0xbebebe, + b2_colorGray0 = 0x000000, + b2_colorGray1 = 0x030303, + b2_colorGray10 = 0x1a1a1a, + b2_colorGray100 = 0xffffff, + b2_colorGray11 = 0x1c1c1c, + b2_colorGray12 = 0x1f1f1f, + b2_colorGray13 = 0x212121, + b2_colorGray14 = 0x242424, + b2_colorGray15 = 0x262626, + b2_colorGray16 = 0x292929, + b2_colorGray17 = 0x2b2b2b, b2_colorGray18 = 0x2e2e2e, b2_colorGray19 = 0x303030, + b2_colorGray2 = 0x050505, b2_colorGray20 = 0x333333, b2_colorGray21 = 0x363636, b2_colorGray22 = 0x383838, @@ -494,6 +186,7 @@ enum b2HexColor b2_colorGray27 = 0x454545, b2_colorGray28 = 0x474747, b2_colorGray29 = 0x4a4a4a, + b2_colorGray3 = 0x080808, b2_colorGray30 = 0x4d4d4d, b2_colorGray31 = 0x4f4f4f, b2_colorGray32 = 0x525252, @@ -504,6 +197,7 @@ enum b2HexColor b2_colorGray37 = 0x5e5e5e, b2_colorGray38 = 0x616161, b2_colorGray39 = 0x636363, + b2_colorGray4 = 0x0a0a0a, b2_colorGray40 = 0x666666, b2_colorGray41 = 0x696969, b2_colorGray42 = 0x6b6b6b, @@ -514,6 +208,7 @@ enum b2HexColor b2_colorGray47 = 0x787878, b2_colorGray48 = 0x7a7a7a, b2_colorGray49 = 0x7d7d7d, + b2_colorGray5 = 0x0d0d0d, b2_colorGray50 = 0x7f7f7f, b2_colorGray51 = 0x828282, b2_colorGray52 = 0x858585, @@ -524,6 +219,7 @@ enum b2HexColor b2_colorGray57 = 0x919191, b2_colorGray58 = 0x949494, b2_colorGray59 = 0x969696, + b2_colorGray6 = 0x0f0f0f, b2_colorGray60 = 0x999999, b2_colorGray61 = 0x9c9c9c, b2_colorGray62 = 0x9e9e9e, @@ -534,6 +230,7 @@ enum b2HexColor b2_colorGray67 = 0xababab, b2_colorGray68 = 0xadadad, b2_colorGray69 = 0xb0b0b0, + b2_colorGray7 = 0x121212, b2_colorGray70 = 0xb3b3b3, b2_colorGray71 = 0xb5b5b5, b2_colorGray72 = 0xb8b8b8, @@ -544,6 +241,7 @@ enum b2HexColor b2_colorGray77 = 0xc4c4c4, b2_colorGray78 = 0xc7c7c7, b2_colorGray79 = 0xc9c9c9, + b2_colorGray8 = 0x141414, b2_colorGray80 = 0xcccccc, b2_colorGray81 = 0xcfcfcf, b2_colorGray82 = 0xd1d1d1, @@ -554,6 +252,7 @@ enum b2HexColor b2_colorGray87 = 0xdedede, b2_colorGray88 = 0xe0e0e0, b2_colorGray89 = 0xe3e3e3, + b2_colorGray9 = 0x171717, b2_colorGray90 = 0xe5e5e5, b2_colorGray91 = 0xe8e8e8, b2_colorGray92 = 0xebebeb, @@ -564,19 +263,320 @@ enum b2HexColor b2_colorGray97 = 0xf7f7f7, b2_colorGray98 = 0xfafafa, b2_colorGray99 = 0xfcfcfc, - b2_colorGray100 = 0xffffff, - b2_colorDarkGray = 0xa9a9a9, - b2_colorDarkBlue = 0x00008b, - b2_colorDarkCyan = 0x008b8b, - b2_colorDarkMagenta = 0x8b008b, - b2_colorDarkRed = 0x8b0000, - b2_colorLightGreen = 0x90ee90, - b2_colorCrimson = 0xdc143c, + b2_colorGreen = 0x00ff00, + b2_colorGreen1 = 0x00ff00, + b2_colorGreen2 = 0x00ee00, + b2_colorGreen3 = 0x00cd00, + b2_colorGreen4 = 0x008b00, + b2_colorGreenYellow = 0xadff2f, + b2_colorHoneydew = 0xf0fff0, + b2_colorHoneydew1 = 0xf0fff0, + b2_colorHoneydew2 = 0xe0eee0, + b2_colorHoneydew3 = 0xc1cdc1, + b2_colorHoneydew4 = 0x838b83, + b2_colorHotPink = 0xff69b4, + b2_colorHotPink1 = 0xff6eb4, + b2_colorHotPink2 = 0xee6aa7, + b2_colorHotPink3 = 0xcd6090, + b2_colorHotPink4 = 0x8b3a62, + b2_colorIndianRed = 0xcd5c5c, + b2_colorIndianRed1 = 0xff6a6a, + b2_colorIndianRed2 = 0xee6363, + b2_colorIndianRed3 = 0xcd5555, + b2_colorIndianRed4 = 0x8b3a3a, b2_colorIndigo = 0x4b0082, + b2_colorIvory = 0xfffff0, + b2_colorIvory1 = 0xfffff0, + b2_colorIvory2 = 0xeeeee0, + b2_colorIvory3 = 0xcdcdc1, + b2_colorIvory4 = 0x8b8b83, + b2_colorKhaki = 0xf0e68c, + b2_colorKhaki1 = 0xfff68f, + b2_colorKhaki2 = 0xeee685, + b2_colorKhaki3 = 0xcdc673, + b2_colorKhaki4 = 0x8b864e, + b2_colorLavender = 0xe6e6fa, + b2_colorLavenderBlush = 0xfff0f5, + b2_colorLavenderBlush1 = 0xfff0f5, + b2_colorLavenderBlush2 = 0xeee0e5, + b2_colorLavenderBlush3 = 0xcdc1c5, + b2_colorLavenderBlush4 = 0x8b8386, + b2_colorLawnGreen = 0x7cfc00, + b2_colorLemonChiffon = 0xfffacd, + b2_colorLemonChiffon1 = 0xfffacd, + b2_colorLemonChiffon2 = 0xeee9bf, + b2_colorLemonChiffon3 = 0xcdc9a5, + b2_colorLemonChiffon4 = 0x8b8970, + b2_colorLightBlue = 0xadd8e6, + b2_colorLightBlue1 = 0xbfefff, + b2_colorLightBlue2 = 0xb2dfee, + b2_colorLightBlue3 = 0x9ac0cd, + b2_colorLightBlue4 = 0x68838b, + b2_colorLightCoral = 0xf08080, + b2_colorLightCyan = 0xe0ffff, + b2_colorLightCyan1 = 0xe0ffff, + b2_colorLightCyan2 = 0xd1eeee, + b2_colorLightCyan3 = 0xb4cdcd, + b2_colorLightCyan4 = 0x7a8b8b, + b2_colorLightGoldenrod = 0xeedd82, + b2_colorLightGoldenrod1 = 0xffec8b, + b2_colorLightGoldenrod2 = 0xeedc82, + b2_colorLightGoldenrod3 = 0xcdbe70, + b2_colorLightGoldenrod4 = 0x8b814c, + b2_colorLightGoldenrodYellow = 0xfafad2, + b2_colorLightGray = 0xd3d3d3, + b2_colorLightGreen = 0x90ee90, + b2_colorLightPink = 0xffb6c1, + b2_colorLightPink1 = 0xffaeb9, + b2_colorLightPink2 = 0xeea2ad, + b2_colorLightPink3 = 0xcd8c95, + b2_colorLightPink4 = 0x8b5f65, + b2_colorLightSalmon = 0xffa07a, + b2_colorLightSalmon1 = 0xffa07a, + b2_colorLightSalmon2 = 0xee9572, + b2_colorLightSalmon3 = 0xcd8162, + b2_colorLightSalmon4 = 0x8b5742, + b2_colorLightSeaGreen = 0x20b2aa, + b2_colorLightSkyBlue = 0x87cefa, + b2_colorLightSkyBlue1 = 0xb0e2ff, + b2_colorLightSkyBlue2 = 0xa4d3ee, + b2_colorLightSkyBlue3 = 0x8db6cd, + b2_colorLightSkyBlue4 = 0x607b8b, + b2_colorLightSlateBlue = 0x8470ff, + b2_colorLightSlateGray = 0x778899, + b2_colorLightSteelBlue = 0xb0c4de, + b2_colorLightSteelBlue1 = 0xcae1ff, + b2_colorLightSteelBlue2 = 0xbcd2ee, + b2_colorLightSteelBlue3 = 0xa2b5cd, + b2_colorLightSteelBlue4 = 0x6e7b8b, + b2_colorLightYellow = 0xffffe0, + b2_colorLightYellow1 = 0xffffe0, + b2_colorLightYellow2 = 0xeeeed1, + b2_colorLightYellow3 = 0xcdcdb4, + b2_colorLightYellow4 = 0x8b8b7a, + b2_colorLime = 0x00ff00, + b2_colorLimeGreen = 0x32cd32, + b2_colorLinen = 0xfaf0e6, + b2_colorMagenta = 0xff00ff, + b2_colorMagenta1 = 0xff00ff, + b2_colorMagenta2 = 0xee00ee, + b2_colorMagenta3 = 0xcd00cd, + b2_colorMagenta4 = 0x8b008b, + b2_colorMaroon = 0xb03060, + b2_colorMaroon1 = 0xff34b3, + b2_colorMaroon2 = 0xee30a7, + b2_colorMaroon3 = 0xcd2990, + b2_colorMaroon4 = 0x8b1c62, + b2_colorMediumAquamarine = 0x66cdaa, + b2_colorMediumBlue = 0x0000cd, + b2_colorMediumOrchid = 0xba55d3, + b2_colorMediumOrchid1 = 0xe066ff, + b2_colorMediumOrchid2 = 0xd15fee, + b2_colorMediumOrchid3 = 0xb452cd, + b2_colorMediumOrchid4 = 0x7a378b, + b2_colorMediumPurple = 0x9370db, + b2_colorMediumPurple1 = 0xab82ff, + b2_colorMediumPurple2 = 0x9f79ee, + b2_colorMediumPurple3 = 0x8968cd, + b2_colorMediumPurple4 = 0x5d478b, + b2_colorMediumSeaGreen = 0x3cb371, + b2_colorMediumSlateBlue = 0x7b68ee, + b2_colorMediumSpringGreen = 0x00fa9a, + b2_colorMediumTurquoise = 0x48d1cc, + b2_colorMediumVioletRed = 0xc71585, + b2_colorMidnightBlue = 0x191970, + b2_colorMintCream = 0xf5fffa, + b2_colorMistyRose = 0xffe4e1, + b2_colorMistyRose1 = 0xffe4e1, + b2_colorMistyRose2 = 0xeed5d2, + b2_colorMistyRose3 = 0xcdb7b5, + b2_colorMistyRose4 = 0x8b7d7b, + b2_colorMoccasin = 0xffe4b5, + b2_colorNavajoWhite = 0xffdead, + b2_colorNavajoWhite1 = 0xffdead, + b2_colorNavajoWhite2 = 0xeecfa1, + b2_colorNavajoWhite3 = 0xcdb38b, + b2_colorNavajoWhite4 = 0x8b795e, + b2_colorNavy = 0x000080, + b2_colorNavyBlue = 0x000080, + b2_colorOldLace = 0xfdf5e6, b2_colorOlive = 0x808000, + b2_colorOliveDrab = 0x6b8e23, + b2_colorOliveDrab1 = 0xc0ff3e, + b2_colorOliveDrab2 = 0xb3ee3a, + b2_colorOliveDrab3 = 0x9acd32, + b2_colorOliveDrab4 = 0x698b22, + b2_colorOrange = 0xffa500, + b2_colorOrange1 = 0xffa500, + b2_colorOrange2 = 0xee9a00, + b2_colorOrange3 = 0xcd8500, + b2_colorOrange4 = 0x8b5a00, + b2_colorOrangeRed = 0xff4500, + b2_colorOrangeRed1 = 0xff4500, + b2_colorOrangeRed2 = 0xee4000, + b2_colorOrangeRed3 = 0xcd3700, + b2_colorOrangeRed4 = 0x8b2500, + b2_colorOrchid = 0xda70d6, + b2_colorOrchid1 = 0xff83fa, + b2_colorOrchid2 = 0xee7ae9, + b2_colorOrchid3 = 0xcd69c9, + b2_colorOrchid4 = 0x8b4789, + b2_colorPaleGoldenrod = 0xeee8aa, + b2_colorPaleGreen = 0x98fb98, + b2_colorPaleGreen1 = 0x9aff9a, + b2_colorPaleGreen2 = 0x90ee90, + b2_colorPaleGreen3 = 0x7ccd7c, + b2_colorPaleGreen4 = 0x548b54, + b2_colorPaleTurquoise = 0xafeeee, + b2_colorPaleTurquoise1 = 0xbbffff, + b2_colorPaleTurquoise2 = 0xaeeeee, + b2_colorPaleTurquoise3 = 0x96cdcd, + b2_colorPaleTurquoise4 = 0x668b8b, + b2_colorPaleVioletRed = 0xdb7093, + b2_colorPaleVioletRed1 = 0xff82ab, + b2_colorPaleVioletRed2 = 0xee799f, + b2_colorPaleVioletRed3 = 0xcd6889, + b2_colorPaleVioletRed4 = 0x8b475d, + b2_colorPapayaWhip = 0xffefd5, + b2_colorPeachPuff = 0xffdab9, + b2_colorPeachPuff1 = 0xffdab9, + b2_colorPeachPuff2 = 0xeecbad, + b2_colorPeachPuff3 = 0xcdaf95, + b2_colorPeachPuff4 = 0x8b7765, + b2_colorPeru = 0xcd853f, + b2_colorPink = 0xffc0cb, + b2_colorPink1 = 0xffb5c5, + b2_colorPink2 = 0xeea9b8, + b2_colorPink3 = 0xcd919e, + b2_colorPink4 = 0x8b636c, + b2_colorPlum = 0xdda0dd, + b2_colorPlum1 = 0xffbbff, + b2_colorPlum2 = 0xeeaeee, + b2_colorPlum3 = 0xcd96cd, + b2_colorPlum4 = 0x8b668b, + b2_colorPowderBlue = 0xb0e0e6, + b2_colorPurple = 0xa020f0, + b2_colorPurple1 = 0x9b30ff, + b2_colorPurple2 = 0x912cee, + b2_colorPurple3 = 0x7d26cd, + b2_colorPurple4 = 0x551a8b, b2_colorRebeccaPurple = 0x663399, + b2_colorRed = 0xff0000, + b2_colorRed1 = 0xff0000, + b2_colorRed2 = 0xee0000, + b2_colorRed3 = 0xcd0000, + b2_colorRed4 = 0x8b0000, + b2_colorRosyBrown = 0xbc8f8f, + b2_colorRosyBrown1 = 0xffc1c1, + b2_colorRosyBrown2 = 0xeeb4b4, + b2_colorRosyBrown3 = 0xcd9b9b, + b2_colorRosyBrown4 = 0x8b6969, + b2_colorRoyalBlue = 0x4169e1, + b2_colorRoyalBlue1 = 0x4876ff, + b2_colorRoyalBlue2 = 0x436eee, + b2_colorRoyalBlue3 = 0x3a5fcd, + b2_colorRoyalBlue4 = 0x27408b, + b2_colorSaddleBrown = 0x8b4513, + b2_colorSalmon = 0xfa8072, + b2_colorSalmon1 = 0xff8c69, + b2_colorSalmon2 = 0xee8262, + b2_colorSalmon3 = 0xcd7054, + b2_colorSalmon4 = 0x8b4c39, + b2_colorSandyBrown = 0xf4a460, + b2_colorSeaGreen = 0x2e8b57, + b2_colorSeaGreen1 = 0x54ff9f, + b2_colorSeaGreen2 = 0x4eee94, + b2_colorSeaGreen3 = 0x43cd80, + b2_colorSeaGreen4 = 0x2e8b57, + b2_colorSeashell = 0xfff5ee, + b2_colorSeashell1 = 0xfff5ee, + b2_colorSeashell2 = 0xeee5de, + b2_colorSeashell3 = 0xcdc5bf, + b2_colorSeashell4 = 0x8b8682, + b2_colorSienna = 0xa0522d, + b2_colorSienna1 = 0xff8247, + b2_colorSienna2 = 0xee7942, + b2_colorSienna3 = 0xcd6839, + b2_colorSienna4 = 0x8b4726, b2_colorSilver = 0xc0c0c0, - b2_colorTeal = 0x008080 + b2_colorSkyBlue = 0x87ceeb, + b2_colorSkyBlue1 = 0x87ceff, + b2_colorSkyBlue2 = 0x7ec0ee, + b2_colorSkyBlue3 = 0x6ca6cd, + b2_colorSkyBlue4 = 0x4a708b, + b2_colorSlateBlue = 0x6a5acd, + b2_colorSlateBlue1 = 0x836fff, + b2_colorSlateBlue2 = 0x7a67ee, + b2_colorSlateBlue3 = 0x6959cd, + b2_colorSlateBlue4 = 0x473c8b, + b2_colorSlateGray = 0x708090, + b2_colorSlateGray1 = 0xc6e2ff, + b2_colorSlateGray2 = 0xb9d3ee, + b2_colorSlateGray3 = 0x9fb6cd, + b2_colorSlateGray4 = 0x6c7b8b, + b2_colorSnow = 0xfffafa, + b2_colorSnow1 = 0xfffafa, + b2_colorSnow2 = 0xeee9e9, + b2_colorSnow3 = 0xcdc9c9, + b2_colorSnow4 = 0x8b8989, + b2_colorSpringGreen = 0x00ff7f, + b2_colorSpringGreen1 = 0x00ff7f, + b2_colorSpringGreen2 = 0x00ee76, + b2_colorSpringGreen3 = 0x00cd66, + b2_colorSpringGreen4 = 0x008b45, + b2_colorSteelBlue = 0x4682b4, + b2_colorSteelBlue1 = 0x63b8ff, + b2_colorSteelBlue2 = 0x5cacee, + b2_colorSteelBlue3 = 0x4f94cd, + b2_colorSteelBlue4 = 0x36648b, + b2_colorTan = 0xd2b48c, + b2_colorTan1 = 0xffa54f, + b2_colorTan2 = 0xee9a49, + b2_colorTan3 = 0xcd853f, + b2_colorTan4 = 0x8b5a2b, + b2_colorTeal = 0x008080, + b2_colorThistle = 0xd8bfd8, + b2_colorThistle1 = 0xffe1ff, + b2_colorThistle2 = 0xeed2ee, + b2_colorThistle3 = 0xcdb5cd, + b2_colorThistle4 = 0x8b7b8b, + b2_colorTomato = 0xff6347, + b2_colorTomato1 = 0xff6347, + b2_colorTomato2 = 0xee5c42, + b2_colorTomato3 = 0xcd4f39, + b2_colorTomato4 = 0x8b3626, + b2_colorTurquoise = 0x40e0d0, + b2_colorTurquoise1 = 0x00f5ff, + b2_colorTurquoise2 = 0x00e5ee, + b2_colorTurquoise3 = 0x00c5cd, + b2_colorTurquoise4 = 0x00868b, + b2_colorViolet = 0xee82ee, + b2_colorVioletRed = 0xd02090, + b2_colorVioletRed1 = 0xff3e96, + b2_colorVioletRed2 = 0xee3a8c, + b2_colorVioletRed3 = 0xcd3278, + b2_colorVioletRed4 = 0x8b2252, + b2_colorWebGray = 0x808080, + b2_colorWebGreen = 0x008000, + b2_colorWebMaroon = 0x800000, + b2_colorWebPurple = 0x800080, + b2_colorWheat = 0xf5deb3, + b2_colorWheat1 = 0xffe7ba, + b2_colorWheat2 = 0xeed8ae, + b2_colorWheat3 = 0xcdba96, + b2_colorWheat4 = 0x8b7e66, + b2_colorWhite = 0xffffff, + b2_colorWhiteSmoke = 0xf5f5f5, + b2_colorX11Gray = 0xbebebe, + b2_colorX11Green = 0x00ff00, + b2_colorX11Maroon = 0xb03060, + b2_colorX11Purple = 0xa020f0, + b2_colorYellow = 0xffff00, + b2_colorYellow1 = 0xffff00, + b2_colorYellow2 = 0xeeee00, + b2_colorYellow3 = 0xcdcd00, + b2_colorYellow4 = 0x8b8b00, + b2_colorYellowGreen = 0x9acd32, }; #ifdef __cplusplus diff --git a/include/box2d/constants.h b/include/box2d/constants.h index b06e5b28..157a1de2 100644 --- a/include/box2d/constants.h +++ b/include/box2d/constants.h @@ -101,6 +101,9 @@ extern float b2_timeToSleep; /// Maximum parallel workers. Used to size some static arrays. #define b2_maxWorkers 64 +/// Solver graph coloring +#define b2_graphColorCount 12 + /// Version numbering scheme. /// See http://en.wikipedia.org/wiki/Software_versioning typedef struct b2Version diff --git a/include/box2d/timer.h b/include/box2d/timer.h index f848d271..e477590f 100644 --- a/include/box2d/timer.h +++ b/include/box2d/timer.h @@ -32,6 +32,7 @@ typedef struct b2Statistics int32_t stackCapacity; int32_t stackUsed; int32_t byteCount; + int32_t colorCounts[b2_graphColorCount + 1]; } b2Statistics; /// Timer for profiling. This has platform specific code and may diff --git a/samples/collection/benchmark_many_tumblers.cpp b/samples/collection/benchmark_many_tumblers.cpp index 05b922c4..89cfe1dc 100644 --- a/samples/collection/benchmark_many_tumblers.cpp +++ b/samples/collection/benchmark_many_tumblers.cpp @@ -30,7 +30,7 @@ class BenchmarkManyTumblers : public Sample m_bodyCount = 0; m_bodyIndex = 0; - m_motorSpeed = 0.0f; + m_motorSpeed = 25.0f; m_shapeType = 0; CreateScene(); diff --git a/samples/sample.cpp b/samples/sample.cpp index 9892935b..08874a72 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -62,8 +62,7 @@ Sample::Sample(const Settings& settings) { b2Vec2 gravity = {0.0f, -10.0f}; - // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = 8;// enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = B2_MIN(8, enki::GetNumHardwareThreads()); m_scheduler.Initialize(maxThreads); m_taskCount = 0; @@ -89,7 +88,7 @@ Sample::Sample(const Settings& settings) // m_world->SetContactListener(this); // TODO_ERIN too expensive - //b2World_SetPreSolveCallback(m_worldId, PreSolveFcn, this); + b2World_SetPreSolveCallback(m_worldId, PreSolveFcn, this); m_stepCount = 0; @@ -258,6 +257,22 @@ void Sample::Step(Settings& settings) g_draw.DrawString(5, m_textLine, "proxies/height = %d/%d", s.proxyCount, s.treeHeight); m_textLine += m_textIncrement; + int32_t totalCount = 0; + char buffer[256] = {0}; + int32_t offset = sprintf_s(buffer, 256, "colors: "); + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + offset += sprintf_s(buffer + offset, 256 - offset, "%d/", s.colorCounts[i]); + totalCount += s.colorCounts[i]; + } + totalCount += s.colorCounts[b2_graphColorCount]; + sprintf_s(buffer + offset, 256 - offset, "(%d)[%d]", s.colorCounts[b2_graphColorCount], totalCount); + g_draw.DrawString(5, m_textLine, buffer); + m_textLine += m_textIncrement; + + g_draw.DrawString(5, m_textLine, "tree: proxies/height = %d/%d", s.proxyCount, s.treeHeight); + m_textLine += m_textIncrement; + g_draw.DrawString(5, m_textLine, "stack allocator capacity/used = %d/%d", s.stackCapacity, s.stackUsed); m_textLine += m_textIncrement; @@ -337,25 +352,20 @@ void Sample::Step(Settings& settings) b2Color addColor = {0.3f, 0.95f, 0.3f, 1.0f}; b2Color persistColor = {0.3f, 0.3f, 0.95f, 1.0f}; - b2HexColor colors[12] = {b2_colorAquamarine, b2_colorBisque, b2_colorBlue, b2_colorBrown, - b2_colorBurlywood, b2_colorCadetBlue, b2_colorChartreuse, b2_colorChocolate, - b2_colorDarkGoldenrod, b2_colorCoral, b2_colorAqua, b2_colorHoneydew}; + b2HexColor colors[b2_graphColorCount + 1] = {b2_colorRed, b2_colorOrange, b2_colorYellow, b2_colorGreen, b2_colorCyan, + b2_colorBlue, b2_colorViolet, b2_colorPink, b2_colorChocolate, b2_colorGoldenrod, + b2_colorCoral, b2_colorAqua, b2_colorBlack}; for (int32_t i = 0; i < m_pointCount; ++i) { ContactPoint* point = m_points + i; - //if (point->constraintIndex >= 0 && point->constraintIndex < 5000) - //{ - // b2Vec2 p = point->position; - // p.y += 0.1f; - // g_draw.DrawString(p, "%d", point->constraintIndex); - //} - if (0 <= point->color && point->color < 12) + if (0 <= point->color && point->color <= b2_graphColorCount) { // graph color - g_draw.DrawPoint(point->position, 5.0f, b2MakeColor(colors[point->color], 1.0f)); - //g_draw.DrawString(point->position, "%d", point->color); + float pointSize = point->color == b2_graphColorCount ? 7.5f : 5.0f; + g_draw.DrawPoint(point->position, pointSize, b2MakeColor(colors[point->color], 1.0f)); + // g_draw.DrawString(point->position, "%d", point->color); } else if (point->separation > b2_linearSlop) { diff --git a/src/graph.c b/src/graph.c index 98aa31d6..5fd2099e 100644 --- a/src/graph.c +++ b/src/graph.c @@ -34,6 +34,8 @@ typedef struct b2WorkerContext void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity) { + memset(graph, 0, sizeof(b2Graph)); + bodyCapacity = B2_MAX(bodyCapacity, 8); contactCapacity = B2_MAX(contactCapacity, 8); jointCapacity = B2_MAX(jointCapacity, 8); @@ -925,6 +927,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2Graph* graph = &world->graph; b2GraphColor* colors = graph->colors; + // Count awake bodies int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; int32_t awakeBodyCount = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) @@ -939,6 +942,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) return; } + // Reserve space for awake bodies b2Body* bodies = world->bodies; b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); @@ -947,15 +951,16 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) // TODO_ERIN have body directly reference solver body for user access int32_t* solverToBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); + // Map from world body to solver body + // TODO_ERIN eliminate this? int32_t bodyCapacity = world->bodyPool.capacity; int32_t* bodyToSolverMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); memset(bodyToSolverMap, 0xFF, bodyCapacity * sizeof(int32_t)); - // Search for an awake island to split + // Build array of awake bodies + // Also search for an awake island to split int32_t splitIslandIndex = B2_NULL_INDEX; int32_t maxRemovedContacts = 0; - - // Build array of awake bodies int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) { @@ -992,6 +997,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t workerCount = world->workerCount; const int32_t blocksPerWorker = 6; + // Configure blocks for tasks that parallel-for bodies int32_t bodyBlockSize = 1 << 5; int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 5) + 1; if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) @@ -1013,6 +1019,8 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) for (int32_t i = 0; i < b2_graphColorCount; ++i) { int32_t count = b2Array(colors[i].contactArray).count; + graph->occupancy[i] = count; + if (count > 0) { int32_t avxCount = ((count - 1) >> 3) + 1; @@ -1036,6 +1044,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); int32_t overflowContactCount = b2Array(graph->overflow.contactArray).count; + graph->occupancy[b2_overflowIndex] = overflowContactCount; graph->overflow.contactConstraints = b2AllocateStackItem(world->stackAllocator, overflowContactCount * sizeof(b2ContactConstraint), "overflow contact constraint"); diff --git a/src/graph.h b/src/graph.h index db651347..98dd4d9f 100644 --- a/src/graph.h +++ b/src/graph.h @@ -5,6 +5,7 @@ #include "array.h" #include "bitset.h" +#include "box2d/constants.h" typedef struct b2Contact b2Contact; typedef struct b2ContactConstraint b2ContactConstraint; @@ -13,7 +14,6 @@ typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; -#define b2_graphColorCount 16 #define b2_overflowIndex b2_graphColorCount typedef struct b2GraphColor @@ -43,6 +43,9 @@ typedef struct b2Graph b2GraphColor colors[b2_graphColorCount]; int32_t colorCount; + // debug info + int32_t occupancy[b2_graphColorCount + 1]; + b2GraphOverflow overflow; } b2Graph; diff --git a/src/world.c b/src/world.c index 98b569d2..9fa6d4c4 100644 --- a/src/world.c +++ b/src/world.c @@ -1312,6 +1312,10 @@ b2Statistics b2World_GetStatistics(b2WorldId worldId) s.stackCapacity = b2GetStackCapacity(world->stackAllocator); s.stackUsed = b2GetMaxStackAllocation(world->stackAllocator); s.byteCount = b2GetByteCount(); + for (int32_t i = 0; i <= b2_graphColorCount; ++i) + { + s.colorCounts[i] = world->graph.occupancy[i]; + } return s; } From 5e6c6e5823e83d76e56d237e8ad64c65ab165cc7 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 21 Oct 2023 17:11:46 -0700 Subject: [PATCH 42/51] fix invalid islands testing and cleanup --- include/box2d/box2d.h | 43 ++++++--- include/box2d/constants.h | 36 +++----- include/box2d/types.h | 8 ++ samples/collection/behavior.cpp | 92 ++++++++++++++++--- .../collection/benchmark_many_tumblers.cpp | 3 +- samples/collection/benchmark_pyramid.cpp | 8 -- samples/collection/benchmark_tumbler.cpp | 38 ++++---- samples/sample.cpp | 22 ++++- samples/sample.h | 1 + src/body.c | 2 +- src/contact_solver.c | 20 ++-- src/core.h | 2 +- src/dynamic_tree.c | 22 ++--- src/graph.c | 43 +++++++-- src/island.c | 50 ++++++++-- src/island.h | 2 +- src/joint.c | 42 +++++++++ src/math.c | 3 - src/pool.c | 7 +- src/world.c | 75 ++++++++++++--- src/world.h | 2 + 21 files changed, 379 insertions(+), 142 deletions(-) diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index cb457ea3..a85e761f 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -31,19 +31,6 @@ BOX2D_API void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityI /// Call this to draw shapes and other debug draw data. This is intentionally non-const. BOX2D_API void b2World_Draw(b2WorldId worldId, b2DebugDraw* debugDraw); -/// Enable/disable sleep. -BOX2D_API void b2World_EnableSleeping(b2WorldId worldId, bool flag); - -BOX2D_API void b2World_EnableWarmStarting(b2WorldId worldId, bool flag); - -/// Enable/disable continuous collision. -BOX2D_API void b2World_EnableContinuous(b2WorldId worldId, bool flag); - -/// Get the current profile. -BOX2D_API struct b2Profile b2World_GetProfile(b2WorldId worldId); - -BOX2D_API struct b2Statistics b2World_GetStatistics(b2WorldId worldId); - /// Create a rigid body given a definition. No reference to the definition is retained. /// @warning This function is locked during callbacks. BOX2D_API b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def); @@ -79,6 +66,9 @@ BOX2D_API b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2Revol BOX2D_API b2JointId b2World_CreateWeldJoint(b2WorldId worldId, const b2WeldJointDef* def); BOX2D_API void b2World_DestroyJoint(b2JointId jointId); +BOX2D_API b2BodyId b2Joint_GetBodyA(b2JointId jointId); +BOX2D_API b2BodyId b2Joint_GetBodyB(b2JointId jointId); + BOX2D_API void b2MouseJoint_SetTarget(b2JointId jointId, b2Vec2 target); BOX2D_API void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit); @@ -96,3 +86,30 @@ typedef bool b2QueryCallbackFcn(b2ShapeId shapeId, void* context); /// @param callback a user implemented callback function. /// @param aabb the query box. BOX2D_API void b2World_QueryAABB(b2WorldId worldId, b2AABB aabb, b2QueryCallbackFcn* fcn, void* context); + + +/// Advanced API for testing and special cases + +/// Enable/disable sleep. +BOX2D_API void b2World_EnableSleeping(b2WorldId worldId, bool flag); + +/// Enable/disable contact warm starting. Improves stacking stability. +BOX2D_API void b2World_EnableWarmStarting(b2WorldId worldId, bool flag); + +/// Enable/disable continuous collision. +BOX2D_API void b2World_EnableContinuous(b2WorldId worldId, bool flag); + +/// Adjust the restitution threshold +BOX2D_API void b2World_SetRestitutionThreshold(b2WorldId worldId, float value); + +/// Adjust the maximum contact constraint push out velocity +BOX2D_API void b2World_SetMaximumPushoutVelocity(b2WorldId worldId, float value); + +/// Adjust the contact stiffness in cycles per second. +BOX2D_API void b2World_SetContactHertz(b2WorldId worldId, float value); + +/// Get the current profile +BOX2D_API struct b2Profile b2World_GetProfile(b2WorldId worldId); + +/// Get counters and sizes +BOX2D_API struct b2Statistics b2World_GetStatistics(b2WorldId worldId); diff --git a/include/box2d/constants.h b/include/box2d/constants.h index 157a1de2..cfe95311 100644 --- a/include/box2d/constants.h +++ b/include/box2d/constants.h @@ -12,12 +12,14 @@ extern "C" /// Constants used by box2d. /// box2d uses meters-kilograms-seconds (MKS) units. Angles are always in radians unless /// degrees are indicated. -/// Some values can be overridden with a define and some values can be modified at runtime. +/// Some values can be overridden by using a compiler definition. /// Other values cannot be modified without causing stability and/or performance problems. /// box2d bases all length units on meters, but you may need different units for your game. -/// You can adjust this value to use different units, normally at application startup. -extern float b2_lengthUnitsPerMeter; +/// You can override this value to use different units. +#ifndef b2_lengthUnitsPerMeter +#define b2_lengthUnitsPerMeter 1.0f +#endif #define b2_pi 3.14159265359f @@ -50,16 +52,6 @@ extern float b2_lengthUnitsPerMeter; #define b2_maxWorlds 32 #endif -/// The maximum linear position correction used when solving constraints. This helps to -/// prevent overshoot. Meters. -/// @warning modifying this can have a significant impact on stability -#define b2_maxLinearCorrection (0.2f * b2_lengthUnitsPerMeter) - -/// The maximum angular position correction used when solving constraints. This helps to -/// prevent overshoot. -/// @warning modifying this can have a significant impact on stability -#define b2_maxAngularCorrection (8.0f / 180.0f * b2_pi) - /// The maximum linear translation of a body per step. This limit is very large and is used /// to prevent numerical problems. You shouldn't need to adjust this. Meters. /// @warning modifying this can have a significant impact on stability @@ -72,24 +64,20 @@ extern float b2_lengthUnitsPerMeter; #define b2_maxRotation (0.5f * b2_pi) #define b2_maxRotationSquared (b2_maxRotation * b2_maxRotation) -/// @warning modifying this can have a significant impact on stability +/// @warning modifying this can have a significant impact on performance and stability #define b2_speculativeDistance (4.0f * b2_linearSlop) -/// This scale factor controls how fast overlap is resolved. Ideally this would be 1 so -/// that overlap is removed in one time step. However using values close to 1 often lead -/// to overshoot. -/// @warning modifying this can have a significant impact on stability -#define b2_baumgarte 0.2f - -/// The time that a body must be still before it will go to sleep. -extern float b2_timeToSleep; +/// The time that a body must be still before it will go to sleep. In seconds. +#ifndef b2_timeToSleep +#define b2_timeToSleep 0.5f +#endif -/// A body cannot sleep if its linear velocity is above this tolerance. +/// A body cannot sleep if its linear velocity is above this tolerance. Meters per second. #ifndef b2_linearSleepTolerance #define b2_linearSleepTolerance (0.01f * b2_lengthUnitsPerMeter) #endif -/// A body cannot sleep if its angular velocity is above this tolerance. +/// A body cannot sleep if its angular velocity is above this tolerance. Radians per second. #ifndef b2_angularSleepTolerance #define b2_angularSleepTolerance (2.0f / 180.0f * b2_pi) #endif diff --git a/include/box2d/types.h b/include/box2d/types.h index a3cab401..7f20406e 100644 --- a/include/box2d/types.h +++ b/include/box2d/types.h @@ -112,6 +112,12 @@ typedef struct b2WorldDef /// speed have restitution applied (will bounce). float restitutionThreshold; + /// This parameter controls how fast overlap is resolved and has units of meters per second + float maxPushoutVelocity; + + /// This parameter affects the stiffness of contacts. Cycles per second. + float contactHertz; + /// Can bodies go to sleep to improve performance bool enableSleep; @@ -252,6 +258,8 @@ static inline b2WorldDef b2DefaultWorldDef(void) b2WorldDef def = {0}; def.gravity = B2_LITERAL(b2Vec2){0.0f, -10.0f}; def.restitutionThreshold = 1.0f * b2_lengthUnitsPerMeter; + def.maxPushoutVelocity = 3.0f * b2_lengthUnitsPerMeter; + def.contactHertz = 30.0f; def.enableSleep = true; def.bodyCapacity = 8; def.shapeCapacity = 8; diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index 3488da32..64c4d8d6 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -203,46 +203,110 @@ class OverlapRecovery : public Sample OverlapRecovery(const Settings& settings) : Sample(settings) { - float extent = 1.0f; + m_bodyIds = nullptr; + m_bodyCount = 0; + m_baseCount = 4; + m_overlap = 0.5f; + m_extent = 0.1f; + m_pushout = 3.0f; + m_hertz = 30.0f; b2BodyDef bodyDef = b2DefaultBodyDef(); b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); - float groundWidth = 10.0f * extent; + float groundWidth = 40.0f; b2ShapeDef shapeDef = b2DefaultShapeDef(); shapeDef.density = 1.0f; b2Segment segment = {{-groundWidth, 0.0f}, {groundWidth, 0.0f}}; b2Body_CreateSegment(groundId, &shapeDef, &segment); - bodyDef.type = b2_dynamicBody; + CreateScene(); + } - b2Polygon box = b2MakeBox(extent, extent); + ~OverlapRecovery() override + { + free(m_bodyIds); + } - int count = 4; - float fraction = 0.75f; - float y = fraction * extent; - while (count > 0) + void CreateScene() + { + for (int32_t i = 0; i < m_bodyCount; ++i) { - for (int i = 0; i < count; ++i) - { - float coeff = i - 0.5f * count; + b2World_DestroyBody(m_bodyIds[i]); + } + + b2World_SetMaximumPushoutVelocity(m_worldId, m_pushout); + b2World_SetContactHertz(m_worldId, m_hertz); + + b2BodyDef bodyDef = b2DefaultBodyDef(); + bodyDef.type = b2_dynamicBody; - bodyDef.position = {2.0f * fraction * coeff * extent, y}; + b2Polygon box = b2MakeBox(m_extent, m_extent); + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 1.0f; + + m_bodyCount = m_baseCount * (m_baseCount + 1) / 2; + m_bodyIds = (b2BodyId*)realloc(m_bodyIds, m_bodyCount * sizeof(b2BodyId)); + + int32_t bodyIndex = 0; + float fraction = 1.0f - m_overlap; + float y = m_extent; + for (int32_t i = 0; i < m_baseCount; ++i) + { + float x = fraction * m_extent * (i - m_baseCount); + for (int32_t j = i; j < m_baseCount; ++j) + { + bodyDef.position = {x, y}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); b2Body_CreatePolygon(bodyId, &shapeDef, &box); + + m_bodyIds[bodyIndex++] = bodyId; + + x += 2.0f * fraction * m_extent; } - --count; - y += 2.0f * fraction * extent; + y += 2.0f * fraction * m_extent; } + + assert(bodyIndex == m_bodyCount); + } + + void UpdateUI() override + { + ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); + ImGui::SetNextWindowSize(ImVec2(240.0f, 230.0f)); + ImGui::Begin("Stacks", nullptr, ImGuiWindowFlags_NoResize); + + bool changed = false; + changed = changed || ImGui::SliderFloat("Extent", &m_extent, 0.1f, 1.0f, "%.1f"); + changed = changed || ImGui::SliderInt("Base Count", &m_baseCount, 1, 10); + changed = changed || ImGui::SliderFloat("Overlap", &m_overlap, 0.0f, 1.0f, "%.1f"); + changed = changed || ImGui::SliderFloat("Pushout", &m_pushout, 0.0f, 10.0f, "%.1f"); + changed = changed || ImGui::SliderFloat("Hertz", &m_hertz, 0.0f, 120.0f, "%.1f"); + changed = changed || ImGui::Button("Reset Scene"); + + if (changed) + { + CreateScene(); + } + + ImGui::End(); } static Sample* Create(const Settings& settings) { return new OverlapRecovery(settings); } + + b2BodyId* m_bodyIds; + int32_t m_bodyCount; + int32_t m_baseCount; + float m_overlap; + float m_extent; + float m_pushout; + float m_hertz; }; static int sampleIndex4 = RegisterSample("Behavior", "Overlap Recovery", OverlapRecovery::Create); diff --git a/samples/collection/benchmark_many_tumblers.cpp b/samples/collection/benchmark_many_tumblers.cpp index 89cfe1dc..5bb153ba 100644 --- a/samples/collection/benchmark_many_tumblers.cpp +++ b/samples/collection/benchmark_many_tumblers.cpp @@ -9,6 +9,7 @@ #include #include +// TODO_ERIN make these kinematic class BenchmarkManyTumblers : public Sample { public: @@ -48,7 +49,6 @@ class BenchmarkManyTumblers : public Sample { b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; - bd.enableSleep = false; bd.position = {position.x, position.y}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); m_tumblerIds[index] = bodyId; @@ -154,6 +154,7 @@ class BenchmarkManyTumblers : public Sample for (int i = 0; i < m_tumblerCount; ++i) { b2RevoluteJoint_SetMotorSpeed(m_jointIds[i], (b2_pi / 180.0f) * m_motorSpeed); + b2Body_Wake(m_tumblerIds[i]); } } diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index fa01a9cf..3b53cb7e 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -8,11 +8,6 @@ #include #include -BOX2D_API int32_t b2_awakeContactCount; - -BOX2D_API int b2_collideMinRange; -BOX2D_API int b2_islandMinRange; - class BenchmarkPyramid : public Sample { public: @@ -144,9 +139,6 @@ class BenchmarkPyramid : public Sample changed = changed || ImGui::SliderFloat("Round", &m_round, 0.0f, 0.4f, "%.1f"); changed = changed || ImGui::Button("Reset Scene"); - ImGui::SliderInt("Collide Min", &b2_collideMinRange, 1, 200); - ImGui::SliderInt("Island Min", &b2_islandMinRange, 1, 10); - if (changed) { CreateScene(); diff --git a/samples/collection/benchmark_tumbler.cpp b/samples/collection/benchmark_tumbler.cpp index 5a1a8bfa..3f9ebc20 100644 --- a/samples/collection/benchmark_tumbler.cpp +++ b/samples/collection/benchmark_tumbler.cpp @@ -4,6 +4,7 @@ #include "box2d/box2d.h" #include "box2d/geometry.h" #include "sample.h" +#include "settings.h" #include #include @@ -57,15 +58,14 @@ class BenchmarkTumbler : public Sample m_jointId = b2World_CreateRevoluteJoint(m_worldId, &jd); } - //m_maxCount = g_sampleDebug ? 500 : 2000; - m_maxCount = 2000; + m_maxCount = g_sampleDebug ? 300 : 2000; m_count = 0; } void UpdateUI() override { ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); - ImGui::SetNextWindowSize(ImVec2(240.0f, 230.0f)); + ImGui::SetNextWindowSize(ImVec2(240.0f, 80.0f)); ImGui::Begin("Tumbler", nullptr, ImGuiWindowFlags_NoResize); if (ImGui::SliderFloat("Speed", &m_motorSpeed, 0.0f, 100.0f, "%.f")) @@ -78,22 +78,26 @@ class BenchmarkTumbler : public Sample void Step(Settings& settings) override { - Sample::Step(settings); - - for (int32_t i = 0; i < 5 && m_count < m_maxCount; ++i) + if (settings.m_pause == false || settings.m_singleStep == true) { - b2BodyDef bd = b2DefaultBodyDef(); - bd.type = b2_dynamicBody; - bd.position = {0.25f * i, 10.0f + 1.0f * (m_stepCount & 1)}; - b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); - - b2ShapeDef sd = b2DefaultShapeDef(); - sd.density = 1.0f; - - b2Polygon polygon = b2MakeBox(0.125f, 0.125f); - b2Body_CreatePolygon(bodyId, &sd, &polygon); - ++m_count; + float a = 0.125f; + for (int32_t i = 0; i < 5 && m_count < m_maxCount; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {5.0f * a + 2.0f * a * i, 10.0f + 2.0f * a * (m_stepCount % 5)}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 1.0f; + + b2Polygon polygon = b2MakeBox(0.125f, 0.125f); + b2Body_CreatePolygon(bodyId, &sd, &polygon); + ++m_count; + } } + + Sample::Step(settings); } static Sample* Create(const Settings& settings) diff --git a/samples/sample.cpp b/samples/sample.cpp index 08874a72..cf618596 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -19,7 +19,12 @@ bool PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color, void* context) { Sample* sample = static_cast(context); - return sample->PreSolve(shapeIdA, shapeIdB, manifold, color); + if (sample->m_collectContacts) + { + return sample->PreSolve(shapeIdA, shapeIdB, manifold, color); + } + + return true; } static void* EnqueueTask(b2TaskCallback* task, int32_t itemCount, int32_t minRange, void* taskContext, void* userContext) @@ -71,17 +76,21 @@ Sample::Sample(const Settings& settings) worldDef.enqueueTask = &EnqueueTask; worldDef.finishTask = &FinishTask; worldDef.finishAllTasks = &FinishAllTasks; - worldDef.bodyCapacity = 1024; - worldDef.contactCapacity = 4 * 1024; worldDef.userTaskContext = this; - worldDef.stackAllocatorCapacity = 20 * 1024; worldDef.enableSleep = settings.m_enableSleep; + // These are not ideal, but useful for testing Box2D + worldDef.bodyCapacity = 2; + worldDef.contactCapacity = 2; + worldDef.stackAllocatorCapacity = 0; + m_worldId = b2CreateWorld(&worldDef); m_textLine = 30; m_textIncrement = 18; m_mouseJointId = b2_nullJointId; m_pointCount = 0; + m_collectContacts = + settings.m_drawContactPoints || settings.m_drawContactNormals || settings.m_drawContactImpulse || settings.m_drawFrictionImpulse; // m_destructionListener.test = this; // m_world->SetDestructionListener(&m_destructionListener); @@ -193,6 +202,8 @@ void Sample::MouseMove(b2Vec2 p) if (B2_NON_NULL(m_mouseJointId)) { b2MouseJoint_SetTarget(m_mouseJointId, p); + b2BodyId bodyIdB = b2Joint_GetBodyB(m_mouseJointId); + b2Body_Wake(bodyIdB); } } @@ -227,6 +238,9 @@ void Sample::Step(Settings& settings) g_draw.m_debugDraw.drawAABBs = settings.m_drawAABBs; g_draw.m_debugDraw.drawMass = settings.m_drawMass; + m_collectContacts = + settings.m_drawContactPoints || settings.m_drawContactNormals || settings.m_drawContactImpulse || settings.m_drawFrictionImpulse; + b2World_EnableSleeping(m_worldId, settings.m_enableSleep); b2World_EnableWarmStarting(m_worldId, settings.m_enableWarmStarting); diff --git a/samples/sample.h b/samples/sample.h index 84beeb27..befb3598 100644 --- a/samples/sample.h +++ b/samples/sample.h @@ -135,6 +135,7 @@ class Sample int32_t m_textIncrement; b2Profile m_maxProfile; b2Profile m_totalProfile; + bool m_collectContacts; }; typedef Sample* SampleCreateFcn(const Settings& settings); diff --git a/src/body.c b/src/body.c index 5a36d8a6..681d50d3 100644 --- a/src/body.c +++ b/src/body.c @@ -249,7 +249,7 @@ void b2World_DestroyBody(b2BodyId bodyId) if (islandDestroyed == false) { b2WakeIsland(island); - b2ValidateIsland(island); + b2ValidateIsland(island, true); } } diff --git a/src/contact_solver.c b/src/contact_solver.c index 0abc2599..73a395ca 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -16,8 +16,6 @@ // http://mmacklin.com/smallsteps.pdf // https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf -#define maxBaumgarteVelocity 3.0f - void b2PrepareOverflowContacts(b2SolverTaskContext* context) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); @@ -38,7 +36,7 @@ void b2PrepareOverflowContacts(b2SolverTaskContext* context) // 30 is a bit soft, 60 oscillates too much // const float contactHertz = 45.0f; // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; + const float contactHertz = world->contactHertz; float h = context->timeStep; bool enableWarmStarting = world->enableWarmStarting; @@ -139,6 +137,8 @@ void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias) b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; int32_t count = b2Array(context->graph->overflow.contactArray).count; float inv_dt = context->invTimeStep; + const float pushout = context->world->maximumPushoutVelocity; + // This is a dummy body to represent a static body since static bodies don't have a solver body. b2SolverBody dummyBody = {0}; @@ -196,7 +196,7 @@ void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias) } else if (useBias) { - bias = B2_MAX(biasCoefficient * s, -maxBaumgarteVelocity); + bias = B2_MAX(biasCoefficient * s, -pushout); // bias = cp->biasCoefficient * s; massScale = massCoefficient; impulseScale = impulseCoefficient; @@ -398,14 +398,14 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont b2SolverBody* solverBodies = context->solverBodies; b2ContactConstraintAVX* constraints = context->constraintAVXs; const int32_t* contactIndices = context->contactIndices; - + // This is a dummy body to represent a static body since static bodies don't have a solver body. b2SolverBody dummyBody = {0}; // 30 is a bit soft, 60 oscillates too much // const float contactHertz = 45.0f; // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); - const float contactHertz = 30.0f; + const float contactHertz = world->contactHertz; float h = context->timeStep; @@ -601,8 +601,7 @@ void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCo b2TracyCZoneEnd(warm_start_contact); } - -static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, bool useBias) +static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, float pushout, bool useBias) { b2SimdBody bA = b2GatherBodies(bodies, c->indexA); b2SimdBody bB = b2GatherBodies(bodies, c->indexB); @@ -622,7 +621,7 @@ static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2Sol } __m256 invDtMul = _mm256_set1_ps(inv_dt); - __m256 minBiasVel = _mm256_set1_ps(-maxBaumgarteVelocity); + __m256 minBiasVel = _mm256_set1_ps(-pushout); // first point non-penetration constraint { @@ -780,11 +779,12 @@ void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContex b2SolverBody* bodies = context->solverBodies; b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; float inv_dt = context->invTimeStep; + const float pushout = context->world->maximumPushoutVelocity; for (int32_t i = startIndex; i < endIndex; ++i) { b2ContactConstraintAVX* constraint = constraints + i; - b2SolveContactTwoPointsAVX(constraint, bodies, inv_dt, useBias); + b2SolveContactTwoPointsAVX(constraint, bodies, inv_dt, pushout, useBias); } b2TracyCZoneEnd(solve_contact); diff --git a/src/core.h b/src/core.h index 6c45c8bd..f4b10183 100644 --- a/src/core.h +++ b/src/core.h @@ -75,7 +75,7 @@ #define B2_ASSERT(...) ((void)0) #endif -#if defined(_DEBUG) || 0 +#if defined(_DEBUG) #define B2_VALIDATE 1 #else #define B2_VALIDATE 0 diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index c57ff3a6..f9c117d6 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -13,6 +13,8 @@ #include #include +#define b2_treeStackSize 1024 + // TODO_ERIN // - try incrementally sorting internal nodes by height for better cache efficiency during depth first traversal. @@ -1138,8 +1140,6 @@ void b2DynamicTree_ShiftOrigin(b2DynamicTree* tree, b2Vec2 newOrigin) } } -#define b2_treeStackSize 256 - void b2DynamicTree_QueryFiltered(const b2DynamicTree* tree, b2AABB aabb, uint32_t maskBits, b2TreeQueryCallbackFcn* callback, void* context) { int32_t stack[b2_treeStackSize]; @@ -1169,10 +1169,8 @@ void b2DynamicTree_QueryFiltered(const b2DynamicTree* tree, b2AABB aabb, uint32_ } else { - B2_ASSERT(stackCount <= b2_treeStackSize - 2); - // TODO log this? - - if (stackCount <= b2_treeStackSize - 2) + B2_ASSERT(stackCount < b2_treeStackSize - 1); + if (stackCount < b2_treeStackSize - 1) { stack[stackCount++] = node->child1; stack[stackCount++] = node->child2; @@ -1211,10 +1209,8 @@ void b2DynamicTree_Query(const b2DynamicTree* tree, b2AABB aabb, b2TreeQueryCall } else { - B2_ASSERT(stackCount <= b2_treeStackSize - 2); - // TODO log this? - - if (stackCount <= b2_treeStackSize - 2) + B2_ASSERT(stackCount < b2_treeStackSize - 1); + if (stackCount < b2_treeStackSize - 1) { stack[stackCount++] = node->child1; stack[stackCount++] = node->child2; @@ -1310,10 +1306,8 @@ void b2DynamicTree_RayCast(const b2DynamicTree* tree, const b2RayCastInput* inpu } else { - B2_ASSERT(stackCount <= b2_treeStackSize - 2); - // TODO log this? - - if (stackCount <= b2_treeStackSize - 2) + B2_ASSERT(stackCount < b2_treeStackSize - 1); + if (stackCount < b2_treeStackSize - 1) { // TODO_ERIN just put one node on the stack, continue on a child node // TODO_ERIN test ordering children by nearest to ray origin diff --git a/src/graph.c b/src/graph.c index 5fd2099e..57ec1467 100644 --- a/src/graph.c +++ b/src/graph.c @@ -939,6 +939,12 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) if (awakeBodyCount == 0) { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + graph->occupancy[i] = b2Array(colors[i].contactArray).count; + } + graph->occupancy[b2_overflowIndex] = b2Array(graph->overflow.contactArray).count; + return; } @@ -961,6 +967,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) // Also search for an awake island to split int32_t splitIslandIndex = B2_NULL_INDEX; int32_t maxRemovedContacts = 0; + int32_t splitIslandBodyCount = 0; int32_t index = 0; for (int32_t i = 0; i < awakeIslandCount; ++i) { @@ -971,6 +978,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) { maxRemovedContacts = island->constraintRemoveCount; splitIslandIndex = islandIndex; + splitIslandBodyCount = island->bodyCount; } int32_t bodyIndex = island->headBody; @@ -1311,11 +1319,25 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2TracyCZoneEnd(prepare_stages); // Must use worker index because thread 0 can be assigned multiple tasks by enkiTS - for (int32_t i = 0; i < workerCount; ++i) + if (b2_parallel) + { + for (int32_t i = 0; i < workerCount; ++i) + { + workerContext[i].context = &context; + workerContext[i].workerIndex = i; + workerContext[i].userTask = world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + } + } + else { - workerContext[i].context = &context; - workerContext[i].workerIndex = i; - workerContext[i].userTask = world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + // This relies on work stealing + for (int32_t i = 0; i < workerCount; ++i) + { + workerContext[i].context = &context; + workerContext[i].workerIndex = i; + workerContext[i].userTask = NULL; + b2SolverTask(0, 1, 0, workerContext + i); + } } // Finish split @@ -1326,22 +1348,25 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) } // Finish solve - for (int32_t i = 0; i < workerCount; ++i) + if (b2_parallel) { - world->finishTaskFcn(workerContext[i].userTask, world->userTaskContext); + for (int32_t i = 0; i < workerCount; ++i) + { + world->finishTaskFcn(workerContext[i].userTask, world->userTaskContext); + } } - // Prepare contact and shape bit-sets + // Prepare contact, shape, and island bit sets used in body finalization. int32_t contactCapacity = world->contactPool.capacity; int32_t shapeCapacity = world->shapePool.capacity; - int32_t islandCapacity = world->islandPool.capacity; + int32_t islandCapacity = world->islandPool.capacity + splitIslandBodyCount; for (uint32_t i = 0; i < world->workerCount; ++i) { b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); } - + // Finalize bodies. Must happen after the constraint solver and after island splitting. void* finalizeBodiesTask = NULL; if (b2_parallel) diff --git a/src/island.c b/src/island.c index d4f2f769..141a673f 100644 --- a/src/island.c +++ b/src/island.c @@ -157,7 +157,7 @@ void b2DestroyIsland(b2Island* island) world->islands[swappedIslandIndex].awakeIndex = island->awakeIndex; } } - + b2FreeObject(&island->world->islandPool, &island->object); } @@ -183,7 +183,7 @@ static void b2AddContactToIsland(b2World* world, b2Island* island, b2Contact* co island->contactCount += 1; contact->islandIndex = island->object.index; - b2ValidateIsland(island); + b2ValidateIsland(island, false); } void b2WakeIsland(b2Island* island) @@ -228,7 +228,7 @@ void b2WakeIsland(b2Island* island) b2Joint* joint = world->joints + jointIndex; B2_ASSERT(joint->islandIndex == islandIndex); // TODO_JOINT_GRAPH - //b2AddJointToGraph(world, joint); + // b2AddJointToGraph(world, joint); jointIndex = joint->islandNext; } } @@ -355,6 +355,8 @@ void b2UnlinkContact(b2World* world, b2Contact* contact) contact->islandIndex = B2_NULL_INDEX; contact->islandPrev = B2_NULL_INDEX; contact->islandNext = B2_NULL_INDEX; + + b2ValidateIsland(island, false); } static void b2AddJointToIsland(b2World* world, b2Island* island, b2Joint* joint) @@ -379,7 +381,7 @@ static void b2AddJointToIsland(b2World* world, b2Island* island, b2Joint* joint) island->jointCount += 1; joint->islandIndex = island->object.index; - b2ValidateIsland(island); + b2ValidateIsland(island, false); } void b2LinkJoint(b2World* world, b2Joint* joint) @@ -500,6 +502,8 @@ void b2UnlinkJoint(b2World* world, b2Joint* joint) joint->islandIndex = B2_NULL_INDEX; joint->islandPrev = B2_NULL_INDEX; joint->islandNext = B2_NULL_INDEX; + + b2ValidateIsland(island, false); } // Merge an island into its root island. @@ -611,7 +615,7 @@ static int32_t b2MergeIsland(b2Island* island) // Merging a dirty islands means that splitting may still be needed rootIsland->constraintRemoveCount += island->constraintRemoveCount; - b2ValidateIsland(rootIsland); + b2ValidateIsland(rootIsland, true); return rootIsland->bodyCount; } @@ -698,7 +702,7 @@ void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadInde b2Island* baseIsland = world->islands + world->splitIslandIndex; - b2ValidateIsland(baseIsland); + b2ValidateIsland(baseIsland, true); int32_t bodyCount = baseIsland->bodyCount; @@ -928,7 +932,13 @@ void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadInde } } - b2ValidateIsland(island); + // For consistency, this island must be added to the awake island array. This should + // be safe because no other task is accessing this and the solver has already gathered + // all awake bodies. + island->awakeIndex = b2Array(world->awakeIslandArray).count; + b2Array_Push(world->awakeIslandArray, islandIndex); + + b2ValidateIsland(island, true); } b2FreeStackItem(alloc, bodyIndices); @@ -939,17 +949,19 @@ void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadInde #if B2_VALIDATE -void b2ValidateIsland(b2Island* island) +void b2ValidateIsland(b2Island* island, bool checkSleep) { b2World* world = island->world; int32_t islandIndex = island->object.index; B2_ASSERT(island->object.index == island->object.next); + bool isAwake = false; if (island->awakeIndex != B2_NULL_INDEX) { b2Array_Check(world->awakeIslandArray, island->awakeIndex); B2_ASSERT(world->awakeIslandArray[island->awakeIndex] == islandIndex); + isAwake = true; } B2_ASSERT(island->headBody != B2_NULL_INDEX); @@ -1001,6 +1013,25 @@ void b2ValidateIsland(b2Island* island) B2_ASSERT(contact->islandIndex == islandIndex); count += 1; + if (checkSleep) + { + if (isAwake) + { + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex != B2_NULL_INDEX); + + //int32_t awakeIndex = world->contactAwakeIndexArray[contactIndex]; + //B2_ASSERT(0 <= awakeIndex && awakeIndex < b2Array(world->awakeContactArray).count); + //B2_ASSERT(world->awakeContactArray[awakeIndex] == contactIndex); + } + else + { + B2_ASSERT(contact->colorIndex == B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex == B2_NULL_INDEX); + //B2_ASSERT(world->contactAwakeIndexArray[contactIndex] == B2_NULL_INDEX); + } + } + if (count == island->contactCount) { B2_ASSERT(contactIndex == island->tailContact); @@ -1053,9 +1084,10 @@ void b2ValidateIsland(b2Island* island) #else -void b2ValidateIsland(b2Island* island) +void b2ValidateIsland(b2Island* island, bool checkSleep) { B2_MAYBE_UNUSED(island); + B2_MAYBE_UNUSED(checkSleep); } #endif diff --git a/src/island.h b/src/island.h index f5c73f24..6208f4e2 100644 --- a/src/island.h +++ b/src/island.h @@ -76,4 +76,4 @@ void b2MergeAwakeIslands(b2World* world); void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context); void b2CompleteSplitIsland(b2Island* island); -void b2ValidateIsland(b2Island* island); +void b2ValidateIsland(b2Island* island, bool checkSleep); diff --git a/src/joint.c b/src/joint.c index 0531c964..7509d8b8 100644 --- a/src/joint.c +++ b/src/joint.c @@ -378,6 +378,48 @@ void b2World_DestroyJoint(b2JointId jointId) b2FreeObject(&world->jointPool, &joint->object); } +b2BodyId b2Joint_GetBodyA(b2JointId jointId) +{ + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + + if (world->locked) + { + return b2_nullBodyId; + } + + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); + + b2Joint* joint = world->joints + jointId.index; + int32_t bodyIndex = joint->edges[0].bodyIndex; + + B2_ASSERT(0 <= bodyIndex && bodyIndex < world->bodyPool.capacity); + b2Body* body = world->bodies + bodyIndex; + b2BodyId bodyId = {bodyIndex, jointId.world, body->object.revision}; + return bodyId; +} + +b2BodyId b2Joint_GetBodyB(b2JointId jointId) +{ + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + + if (world->locked) + { + return b2_nullBodyId; + } + + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); + + b2Joint* joint = world->joints + jointId.index; + int32_t bodyIndex = joint->edges[1].bodyIndex; + + B2_ASSERT(0 <= bodyIndex && bodyIndex < world->bodyPool.capacity); + b2Body* body = world->bodies + bodyIndex; + b2BodyId bodyId = {bodyIndex, jointId.world, body->object.revision}; + return bodyId; +} + extern void b2PrepareMouse(b2Joint* base, b2StepContext* context); extern void b2PrepareRevolute(b2Joint* base, b2StepContext* context); extern void b2PrepareWeld(b2Joint* base, b2StepContext* context); diff --git a/src/math.c b/src/math.c index 84f9f303..ff156385 100644 --- a/src/math.c +++ b/src/math.c @@ -8,9 +8,6 @@ #include -float b2_lengthUnitsPerMeter = 1.0f; -float b2_timeToSleep = 0.5f; - b2Version b2_version = { 3, 0, 0 }; bool b2IsValid(float a) diff --git a/src/pool.c b/src/pool.c index 3f79a12e..fe450604 100644 --- a/src/pool.c +++ b/src/pool.c @@ -5,6 +5,8 @@ #include "allocate.h" #include "core.h" +#include "math.h" +#include "box2d/math.h" #include "box2d/types.h" @@ -122,8 +124,8 @@ b2Object* b2AllocObject(b2Pool* pool) else { int32_t oldCapacity = pool->capacity; - int32_t newCapacity = oldCapacity + oldCapacity / 2; - newCapacity = newCapacity > 2 ? newCapacity : 2; + int32_t addedCapacity = B2_MAX(2, oldCapacity / 2); + int32_t newCapacity = B2_MAX(2, oldCapacity + addedCapacity); pool->capacity = newCapacity; char* newMemory = (char*)b2Alloc(pool->capacity * pool->objectSize); memcpy(newMemory, pool->memory, oldCapacity * pool->objectSize); @@ -135,6 +137,7 @@ b2Object* b2AllocObject(b2Pool* pool) newObject->revision = 0; newObject->next = newObject->index; + // This assumes added capacity >= 2 pool->freeList = oldCapacity + 1; for (int32_t i = oldCapacity + 1; i < newCapacity - 1; ++i) { diff --git a/src/world.c b/src/world.c index 9fa6d4c4..a9e95558 100644 --- a/src/world.c +++ b/src/world.c @@ -28,13 +28,12 @@ #include "box2d/distance.h" #include "box2d/timer.h" +#include #include #include b2World b2_worlds[b2_maxWorlds]; bool b2_parallel = true; -int b2_collideMinRange = 64; -int b2_islandMinRange = 1; b2World* b2GetWorldFromId(b2WorldId id) { @@ -130,6 +129,8 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->gravity = def->gravity; world->restitutionThreshold = def->restitutionThreshold; + world->maximumPushoutVelocity = def->maxPushoutVelocity; + world->contactHertz = def->contactHertz; world->inv_dt0 = 0.0f; world->enableSleep = true; world->locked = false; @@ -329,7 +330,7 @@ static void b2Collide(b2World* world) if (b2_parallel) { // Task should take at least 40us on a 4GHz CPU (10K cycles) - int32_t minRange = b2_collideMinRange; + int32_t minRange = 64; void* userCollideTask = world->enqueueTaskFcn(&b2CollideTask, awakeContactCount, minRange, world, world->userTaskContext); world->finishTaskFcn(userCollideTask, world->userTaskContext); } @@ -639,27 +640,28 @@ static void b2Solve(b2World* world, b2StepContext* context) } { - b2BitSet* bitSet = &world->taskContextArray[0].awakeIslandBitSet; + b2BitSet* awakeIslandBitSet = &world->taskContextArray[0].awakeIslandBitSet; for (uint32_t i = 1; i < world->workerCount; ++i) { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeIslandBitSet); + b2InPlaceUnion(awakeIslandBitSet, &world->taskContextArray[i].awakeIslandBitSet); } b2Body* bodies = world->bodies; b2Contact* contacts = world->contacts; b2Joint* joints = world->joints; + b2Island* islands = world->islands; int32_t count = b2Array(world->awakeIslandArray).count; for (int32_t i = 0; i < count; ++i) { int32_t islandIndex = world->awakeIslandArray[i]; - if (b2GetBit(bitSet, islandIndex) == true) + if (b2GetBit(awakeIslandBitSet, islandIndex) == true) { continue; } // Put island to sleep - b2Island* island = world->islands + islandIndex; + b2Island* island = islands + islandIndex; island->awakeIndex = B2_NULL_INDEX; // Put contacts to sleep. Remember only touching contacts are in the island. @@ -711,8 +713,8 @@ static void b2Solve(b2World* world, b2StepContext* context) // Use bitSet to build awake island array. No need to add edges. uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; + uint32_t wordCount = awakeIslandBitSet->wordCount; + uint64_t* bits = awakeIslandBitSet->bits; int32_t awakeIndex = 0; for (uint32_t k = 0; k < wordCount; ++k) { @@ -722,10 +724,12 @@ static void b2Solve(b2World* world, b2StepContext* context) uint32_t ctz = b2CTZ(word); uint32_t islandIndex = 64 * k + ctz; + B2_ASSERT(b2ObjectValid(&islands[islandIndex].object)); + b2Array_Push(world->awakeIslandArray, islandIndex); // Reference index. This tells the island and bodies they are awake. - world->islands[islandIndex].awakeIndex = awakeIndex; + islands[islandIndex].awakeIndex = awakeIndex; awakeIndex += 1; // Clear the smallest set bit @@ -734,6 +738,19 @@ static void b2Solve(b2World* world, b2StepContext* context) } } +#if B2_VALIDATE + for (int32_t i = 0; i < world->islandPool.capacity; ++i) + { + b2Island* island = world->islands + i; + if (b2ObjectValid(&island->object) == false) + { + continue; + } + + b2ValidateIsland(island, true); + } +#endif + b2TracyCZoneEnd(awake_islands); b2TracyCZoneNC(awake_contacts, "Awake Contacts", b2_colorYellowGreen, true); @@ -1279,7 +1296,7 @@ void b2World_EnableWarmStarting(b2WorldId worldId, bool flag) world->enableWarmStarting = flag; } -void b2World_EnableContinuo(b2WorldId worldId, bool flag) +void b2World_EnableContinuous(b2WorldId worldId, bool flag) { b2World* world = b2GetWorldFromId(worldId); B2_ASSERT(world->locked == false); @@ -1291,6 +1308,42 @@ void b2World_EnableContinuo(b2WorldId worldId, bool flag) world->enableContinuous = flag; } +void b2World_SetRestitutionThreshold(b2WorldId worldId, float value) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->restitutionThreshold = B2_CLAMP(value, 0.0f, FLT_MAX); +} + +void b2World_SetMaximumPushoutVelocity(b2WorldId worldId, float value) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->maximumPushoutVelocity = B2_CLAMP(value, 0.0f, FLT_MAX); +} + +void b2World_SetContactHertz(b2WorldId worldId, float value) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->contactHertz = B2_CLAMP(value, 0.0f, FLT_MAX); +} + b2Profile b2World_GetProfile(b2WorldId worldId) { b2World* world = b2GetWorldFromId(worldId); diff --git a/src/world.h b/src/world.h index 5cc0eddb..ec7ac10e 100644 --- a/src/world.h +++ b/src/world.h @@ -84,6 +84,8 @@ typedef struct b2World b2Vec2 gravity; float restitutionThreshold; + float maximumPushoutVelocity; + float contactHertz; // This is used to compute the time step ratio to support a variable time step. float inv_dt0; From ddd1f580b27f68f00d71310baac2509b81b15527 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sat, 21 Oct 2023 22:37:48 -0700 Subject: [PATCH 43/51] continuous wip --- src/graph.c | 55 +++++++++++++++++++++++++++++++++++++---------------- src/world.c | 5 ----- 2 files changed, 39 insertions(+), 21 deletions(-) diff --git a/src/graph.c b/src/graph.c index 57ec1467..fc7124f1 100644 --- a/src/graph.c +++ b/src/graph.c @@ -510,42 +510,65 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; b2BitSet* awakeIslandBitSet = &world->taskContextArray[threadIndex].awakeIslandBitSet; + bool enableContinuous = world->enableContinuous; B2_ASSERT(startIndex <= endIndex); B2_ASSERT(startIndex <= world->bodyPool.capacity); B2_ASSERT(endIndex <= world->bodyPool.capacity); + // Update sleep + const float linTolSqr = b2_linearSleepTolerance * b2_linearSleepTolerance; + const float angTolSqr = b2_angularSleepTolerance * b2_angularSleepTolerance; + for (int32_t i = startIndex; i < endIndex; ++i) { const b2SolverBody* solverBody = solverBodies + i; - b2Body* body = bodies + solverToBodyMap[i]; + int32_t bodyIndex = solverToBodyMap[i]; + b2Body* body = bodies + bodyIndex; + B2_ASSERT(b2ObjectValid(&body->object)); - // Integrate positions - // TODO_ERIN clamping - body->linearVelocity = solverBody->linearVelocity; - body->angularVelocity = solverBody->angularVelocity; - body->position = b2Add(body->position, solverBody->deltaPosition); - body->angle += solverBody->deltaAngle; + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + body->linearVelocity = v; + body->angularVelocity = w; body->force = b2Vec2_zero; body->torque = 0.0f; - // Update sleep - const float linTolSqr = b2_linearSleepTolerance * b2_linearSleepTolerance; - const float angTolSqr = b2_angularSleepTolerance * b2_angularSleepTolerance; - - if (enableSleep == false || body->enableSleep == false || - body->angularVelocity * body->angularVelocity > angTolSqr || - b2Dot(body->linearVelocity, body->linearVelocity) > linTolSqr) + if (enableSleep == false || body->enableSleep == false || w * w > angTolSqr || b2Dot(v, v) > linTolSqr) { body->sleepTime = 0.0f; + + const float saftetyFactor = 0.5f; + if (enableContinuous && (b2Length(v) + B2_ABS(w) * body->maxExtent) * timeStep > saftetyFactor * body->minExtent) + { + // Store in fast array for the continuous collision stage + int fastIndex = atomic_fetch_add(&world->fastBodyCount, 1); + world->fastBodies[fastIndex] = bodyIndex; + body->isFast = true; + } + else + { + // Body is safe to advance + body->isFast = false; + + body->position = b2Add(body->position, solverBody->deltaPosition); + body->angle += solverBody->deltaAngle; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + } } else { + body->position = b2Add(body->position, solverBody->deltaPosition); + body->angle += solverBody->deltaAngle; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + body->sleepTime += timeStep; } diff --git a/src/world.c b/src/world.c index a9e95558..e6c5b0e8 100644 --- a/src/world.c +++ b/src/world.c @@ -856,8 +856,6 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(broad_phase); - // TODO_ERIN continuous -#if 0 b2TracyCZoneNC(continuous_collision, "Continuous", b2_colorDarkGoldenrod, true); // Parallel continuous collision @@ -925,9 +923,6 @@ static void b2Solve(b2World* world, b2StepContext* context) world->fastBodies = NULL; world->profile.continuous = b2GetMilliseconds(&timer); -#endif - - world->profile.continuous = 0.0f; b2TracyCZoneEnd(solve); } From 102b3696df90683d095c382c8e1bee04f22ce713 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Sun, 22 Oct 2023 23:20:47 -0700 Subject: [PATCH 44/51] continuous working restitution wip --- samples/collection/behavior.cpp | 2 +- samples/collection/benchmark_barrel.cpp | 8 +-- samples/collection/sample_joints.cpp | 4 +- src/contact_solver.c | 85 +++++++++++++++++++++++++ src/contact_solver.h | 6 ++ src/graph.c | 66 ++++++++++++------- src/world.h | 1 - 7 files changed, 140 insertions(+), 32 deletions(-) diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp index 64c4d8d6..2b89fdf1 100644 --- a/samples/collection/behavior.cpp +++ b/samples/collection/behavior.cpp @@ -207,7 +207,7 @@ class OverlapRecovery : public Sample m_bodyCount = 0; m_baseCount = 4; m_overlap = 0.5f; - m_extent = 0.1f; + m_extent = 0.3f; m_pushout = 3.0f; m_hertz = 30.0f; diff --git a/samples/collection/benchmark_barrel.cpp b/samples/collection/benchmark_barrel.cpp index 2fd7f7f4..bf010fd6 100644 --- a/samples/collection/benchmark_barrel.cpp +++ b/samples/collection/benchmark_barrel.cpp @@ -69,9 +69,7 @@ class BenchmarkBarrel : public Sample } } - m_columnCount = g_sampleDebug ? 4 : e_maxColumns; - - m_columnCount = e_maxColumns; + m_columnCount = g_sampleDebug ? 10 : e_maxColumns; float rad = 0.5f; @@ -91,9 +89,7 @@ class BenchmarkBarrel : public Sample b2Circle circle = {0}; circle.radius = rad; - m_rowCount = g_sampleDebug ? 8 : e_maxRows; - - m_rowCount = e_maxRows; + m_rowCount = g_sampleDebug ? 40 : e_maxRows; int32_t index = 0; diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index 551f171e..561f5ba4 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -17,8 +17,8 @@ class BenchmarkJointGrid : public Sample : Sample(settings) { constexpr float rad = 0.4f; - constexpr int32_t numi = g_sampleDebug ? 100 : 100; - constexpr int32_t numk = g_sampleDebug ? 100 : 100; + constexpr int32_t numi = g_sampleDebug ? 10 : 100; + constexpr int32_t numk = g_sampleDebug ? 10 : 100; constexpr float shift = 1.0f; // Allocate to avoid huge stack usage diff --git a/src/contact_solver.c b/src/contact_solver.c index 73a395ca..e365a950 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -59,6 +59,7 @@ void b2PrepareOverflowContacts(b2SolverTaskContext* context) constraint->indexB = indexB; constraint->normal = manifold->normal; constraint->friction = contact->friction; + constraint->restitution = contact->restitution; constraint->pointCount = pointCount; b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; @@ -109,6 +110,11 @@ void b2PrepareOverflowContacts(b2SolverTaskContext* context) cp->separation = mp->separation; cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + // Save relative velocity for restitution + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + cp->relativeVelocity = b2Dot(normal, b2Sub(vrB, vrA)); + // Warm start if (enableWarmStarting) { @@ -259,6 +265,85 @@ void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias) b2TracyCZoneEnd(solve_contact); } +void b2ApplyOverflowRestitution(b2SolverTaskContext* context) +{ + b2TracyCZoneNC(overflow_resitution, "Overflow Restitution", b2_colorViolet, true); + + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; + int32_t count = b2Array(context->graph->overflow.contactArray).count; + float threshold = context->world->restitutionThreshold; + + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + for (int32_t i = 0; i < count; ++i) + { + b2ContactConstraint* constraint = constraints + i; + + float restitution = constraint->restitution; + if (restitution == 0.0f) + { + continue; + } + + b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + b2Vec2 normal = constraint->normal; + int32_t pointCount = constraint->pointCount; + + for (int32_t j = 0; j < pointCount; ++j) + { + b2ContactConstraintPoint* cp = constraint->points + j; + + // if the normal impulse is zero then there was no collision + if (cp->relativeVelocity > -threshold || cp->normalImpulse == 0.0f) + { + continue; + } + + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); + + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * (vn + restitution * cp->relativeVelocity); + + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; + + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } + + b2TracyCZoneEnd(overflow_resitution); +} + void b2StoreOverflowImpulses(b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); diff --git a/src/contact_solver.h b/src/contact_solver.h index cde232b6..ef6fb4f7 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -13,6 +13,7 @@ typedef struct b2ContactConstraintPoint { b2Vec2 rA, rB; float separation; + float relativeVelocity; float normalImpulse; float tangentImpulse; float normalMass; @@ -27,6 +28,7 @@ typedef struct b2ContactConstraint b2ContactConstraintPoint points[2]; b2Vec2 normal; float friction; + float restitution; float massCoefficient; float biasCoefficient; float impulseCoefficient; @@ -49,9 +51,11 @@ typedef struct b2ContactConstraintAVX b2Vec2W normal; __m256 friction; + __m256 restitution; b2Vec2W rA1, rB1; b2Vec2W rA2, rB2; __m256 separation1, separation2; + __m256 relativeVelocity1, relativeVelocity2; __m256 normalImpulse1, normalImpulse2; __m256 tangentImpulse1, tangentImpulse2; __m256 normalMass1, tangentMass1; @@ -64,10 +68,12 @@ typedef struct b2ContactConstraintAVX // Scalar void b2PrepareOverflowContacts(b2SolverTaskContext* context); void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias); +void b2ApplyOverflowRestitution(b2SolverTaskContext* context); void b2StoreOverflowImpulses(b2SolverTaskContext* context); // AVX versions void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2ApplyRestitutionW(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); void b2StoreImpulsesAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/graph.c b/src/graph.c index fc7124f1..a31b167e 100644 --- a/src/graph.c +++ b/src/graph.c @@ -23,6 +23,11 @@ #define B2_AVX 1 +// Kinematic bodies have to be treated like dynamic bodies in graph coloring. Unlike static bodies, we cannot use a dummy solver body for +// kinematic bodies. We cannot access a kinematic body from multiple threads efficiently because the SIMD solver body scatter would write to +// the same kinematic body from multiple threads. Even if these writes don't modify the body, they will cause horrible cache stalls. To make +// this feasible I would need a way to block these writes. + extern bool b2_parallel; typedef struct b2WorkerContext @@ -171,7 +176,7 @@ void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) B2_ASSERT(world->contacts[swappedIndex].colorIndex == b2_overflowIndex); world->contacts[swappedIndex].colorSubIndex = colorSubIndex; } - + contact->colorIndex = B2_NULL_INDEX; contact->colorSubIndex = B2_NULL_INDEX; @@ -528,14 +533,21 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t b2Body* body = bodies + bodyIndex; B2_ASSERT(b2ObjectValid(&body->object)); - b2Vec2 v = body->linearVelocity; - float w = body->angularVelocity; + b2Vec2 v = solverBody->linearVelocity; + float w = solverBody->angularVelocity; body->linearVelocity = v; body->angularVelocity = w; + body->position = b2Add(body->position, solverBody->deltaPosition); + body->angle += solverBody->deltaAngle; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + body->force = b2Vec2_zero; body->torque = 0.0f; + body->isFast = false; if (enableSleep == false || body->enableSleep == false || w * w > angTolSqr || b2Dot(v, v) > linTolSqr) { @@ -552,23 +564,15 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t else { // Body is safe to advance - body->isFast = false; - - body->position = b2Add(body->position, solverBody->deltaPosition); - body->angle += solverBody->deltaAngle; - - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + body->position0 = body->position; + body->angle0 = body->angle; } } else { - body->position = b2Add(body->position, solverBody->deltaPosition); - body->angle += solverBody->deltaAngle; - - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - + // Body is safe to advance + body->position0 = body->position; + body->angle0 = body->angle; body->sleepTime += timeStep; } @@ -580,6 +584,7 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t } // Update shapes AABBs + bool isFast = body->isFast; int32_t shapeIndex = body->shapeList; while (shapeIndex != B2_NULL_INDEX) { @@ -587,17 +592,29 @@ static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t B2_ASSERT(shape->isFast == false); - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); - - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + if (isFast) { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + // The AABB is updated after continuous collision. + // Add to moved shapes regardless of AABB changes. + shape->isFast = true; // Bit-set to keep the move array sorted b2SetBit(shapeBitSet, shapeIndex); } + else + { + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + } + shapeIndex = shape->nextShapeIndex; } @@ -960,6 +977,11 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) awakeBodyCount += island->bodyCount; } + // Prepare world to receive fast bodies from body finalization + // TODO_ERIN scope problem + world->fastBodyCount = 0; + world->fastBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "fast bodies"); + if (awakeBodyCount == 0) { for (int32_t i = 0; i < b2_graphColorCount; ++i) @@ -1076,7 +1098,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); int32_t overflowContactCount = b2Array(graph->overflow.contactArray).count; graph->occupancy[b2_overflowIndex] = overflowContactCount; - graph->overflow.contactConstraints = + graph->overflow.contactConstraints = b2AllocateStackItem(world->stackAllocator, overflowContactCount * sizeof(b2ContactConstraint), "overflow contact constraint"); int32_t base = 0; diff --git a/src/world.h b/src/world.h index ec7ac10e..81d6d5db 100644 --- a/src/world.h +++ b/src/world.h @@ -76,7 +76,6 @@ typedef struct b2World // Array of fast bodies that need continuous collision handling int32_t* fastBodies; - int32_t fastBodyCapacity; _Atomic int fastBodyCount; // Id that is incremented every time step From b7d9a60119937b61c945c03987ad4ddecb334efa Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Tue, 24 Oct 2023 22:53:56 -0700 Subject: [PATCH 45/51] restitution wip --- src/contact_solver.c | 439 ++++++++++++++++++++++++++----------------- src/contact_solver.h | 12 +- src/graph.c | 51 ++++- src/graph.h | 2 +- src/solver_data.h | 3 +- 5 files changed, 323 insertions(+), 184 deletions(-) diff --git a/src/contact_solver.c b/src/contact_solver.c index e365a950..5d8a6cad 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -473,7 +473,7 @@ static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict ind _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); } -void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +void b2PrepareContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); @@ -530,6 +530,7 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont float impulseCoefficient = 1.0f / (1.0f + c); ((float*)&constraint->friction)[j] = contact->friction; + ((float*)&constraint->restitution)[j] = contact->restitution; ((float*)&constraint->impulseCoefficient)[j] = impulseCoefficient; ((float*)&constraint->massCoefficient)[j] = c * impulseCoefficient; ((float*)&constraint->biasCoefficient)[j] = omega / d; @@ -560,6 +561,11 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont float rtB = b2Cross(mp->anchorB, tangent); float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; ((float*)&constraint->tangentMass1)[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Save relative velocity for restitution + b2Vec2 vrA = b2Add(solverBodyA->linearVelocity, b2CrossSV(solverBodyA->angularVelocity, mp->anchorA)); + b2Vec2 vrB = b2Add(solverBodyB->linearVelocity, b2CrossSV(solverBodyB->angularVelocity, mp->anchorB)); + ((float*)&constraint->relativeVelocity1)[j] = b2Dot(normal, b2Sub(vrB, vrA)); } int32_t pointCount = manifold->pointCount; @@ -586,6 +592,11 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont float rtB = b2Cross(mp->anchorB, tangent); float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; ((float*)&constraint->tangentMass2)[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Save relative velocity for restitution + b2Vec2 vrA = b2Add(solverBodyA->linearVelocity, b2CrossSV(solverBodyA->angularVelocity, mp->anchorA)); + b2Vec2 vrB = b2Add(solverBodyB->linearVelocity, b2CrossSV(solverBodyB->angularVelocity, mp->anchorB)); + ((float*)&constraint->relativeVelocity2)[j] = b2Dot(normal, b2Sub(vrB, vrA)); } else { @@ -599,6 +610,7 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont ((float*)&constraint->rB2.Y)[j] = 0.0f; ((float*)&constraint->normalMass2)[j] = 0.0f; ((float*)&constraint->tangentMass2)[j] = 0.0f; + ((float*)&constraint->relativeVelocity2)[j] = 0.0f; } } else @@ -622,7 +634,8 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont ((float*)&constraint->rB1.Y)[j] = 0.0f; ((float*)&constraint->normalMass1)[j] = 0.0f; ((float*)&constraint->tangentMass1)[j] = 0.0f; - + ((float*)&constraint->relativeVelocity1)[j] = 0.0f; + ((float*)&constraint->separation2)[j] = 0.0f; ((float*)&constraint->normalImpulse2)[j] = 0.0f; ((float*)&constraint->tangentImpulse2)[j] = 0.0f; @@ -632,6 +645,7 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont ((float*)&constraint->rB2.Y)[j] = 0.0f; ((float*)&constraint->normalMass2)[j] = 0.0f; ((float*)&constraint->tangentMass2)[j] = 0.0f; + ((float*)&constraint->relativeVelocity2)[j] = 0.0f; } } } @@ -639,7 +653,7 @@ void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCont b2TracyCZoneEnd(prepare_contact); } -void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +void b2WarmStartContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { b2TracyCZoneNC(warm_start_contact, "Warm Start", b2_colorGreen1, true); @@ -686,196 +700,283 @@ void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskCo b2TracyCZoneEnd(warm_start_contact); } -static void b2SolveContactTwoPointsAVX(b2ContactConstraintAVX* restrict c, b2SolverBody* restrict bodies, float inv_dt, float pushout, bool useBias) +void b2SolveContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { - b2SimdBody bA = b2GatherBodies(bodies, c->indexA); - b2SimdBody bB = b2GatherBodies(bodies, c->indexB); + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - __m256 biasCoeff, massCoeff, impulseCoeff; - if (useBias) - { - biasCoeff = c->biasCoefficient; - massCoeff = c->massCoefficient; - impulseCoeff = c->impulseCoefficient; - } - else + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; + float inv_dt = context->invTimeStep; + const float pushout = context->world->maximumPushoutVelocity; + + for (int32_t i = startIndex; i < endIndex; ++i) { - biasCoeff = _mm256_setzero_ps(); - massCoeff = _mm256_set1_ps(1.0f); - impulseCoeff = _mm256_setzero_ps(); - } + b2ContactConstraintAVX* c = constraints + i; + + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); - __m256 invDtMul = _mm256_set1_ps(inv_dt); - __m256 minBiasVel = _mm256_set1_ps(-pushout); + __m256 biasCoeff, massCoeff, impulseCoeff; + if (useBias) + { + biasCoeff = c->biasCoefficient; + massCoeff = c->massCoefficient; + impulseCoeff = c->impulseCoefficient; + } + else + { + biasCoeff = _mm256_setzero_ps(); + massCoeff = _mm256_set1_ps(1.0f); + impulseCoeff = _mm256_setzero_ps(); + } - // first point non-penetration constraint - { - // Compute change in separation (small angle approximation of sin(angle) == angle) - __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB1.Y)), sub(bA.dp.X, mul(bA.da, c->rA1.Y))); - __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB1.X)), add(bA.dp.Y, mul(bA.da, c->rA1.X))); - __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); - - __m256 s = add(c->separation1, ds); - - __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); - __m256 specBias = mul(s, invDtMul); - __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); - __m256 bias = _mm256_blendv_ps(softBias, specBias, test); - - // Relative velocity at contact - __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); - __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); - __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); - - // Compute normal impulse - __m256 negImpulse = add(mul(c->normalMass1, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse1)); - // float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - - // Clamp the accumulated impulse - __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse1, negImpulse), _mm256_setzero_ps()); - __m256 impulse = sub(newImpulse, c->normalImpulse1); - c->normalImpulse1 = newImpulse; - - // Apply contact impulse - __m256 Px = mul(impulse, c->normal.X); - __m256 Py = mul(impulse, c->normal.Y); - - bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); - bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); - - bB.v.X = add(bB.v.X, mul(bB.invM, Px)); - bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); - } + __m256 invDtMul = _mm256_set1_ps(inv_dt); + __m256 minBiasVel = _mm256_set1_ps(-pushout); - // second point non-penetration constraint - { - // Compute change in separation (small angle approximation of sin(angle) == angle) - __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB2.Y)), sub(bA.dp.X, mul(bA.da, c->rA2.Y))); - __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB2.X)), add(bA.dp.Y, mul(bA.da, c->rA2.X))); - __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); - - __m256 s = add(c->separation2, ds); - - __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); - __m256 specBias = mul(s, invDtMul); - __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); - __m256 bias = _mm256_blendv_ps(softBias, specBias, test); - - // Relative velocity at contact - __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); - __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); - __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); - - // Compute normal impulse - __m256 negImpulse = add(mul(c->normalMass2, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse2)); - - // Clamp the accumulated impulse - __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); - __m256 impulse = sub(newImpulse, c->normalImpulse2); - c->normalImpulse2 = newImpulse; - - // Apply contact impulse - __m256 Px = mul(impulse, c->normal.X); - __m256 Py = mul(impulse, c->normal.Y); - - bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); - bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - - bB.v.X = add(bB.v.X, mul(bB.invM, Px)); - bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); - } + // first point non-penetration constraint + { + // Compute change in separation (small angle approximation of sin(angle) == angle) + __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB1.Y)), sub(bA.dp.X, mul(bA.da, c->rA1.Y))); + __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB1.X)), add(bA.dp.Y, mul(bA.da, c->rA1.X))); + __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); - __m256 tangentX = c->normal.Y; - __m256 tangentY = sub(_mm256_setzero_ps(), c->normal.X); - // float friction = constraint->friction; + __m256 s = add(c->separation1, ds); - // first point friction constraint - { - // Relative velocity at contact - __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); - __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); - __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); - - // Compute tangent force - __m256 negImpulse = mul(c->tangentMass1, vt); - - // Clamp the accumulated force - __m256 maxFriction = mul(c->friction, c->normalImpulse1); - __m256 newImpulse = sub(c->tangentImpulse1, negImpulse); - newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); - __m256 impulse = sub(newImpulse, c->tangentImpulse1); - c->tangentImpulse1 = newImpulse; - - // Apply contact impulse - __m256 Px = mul(impulse, tangentX); - __m256 Py = mul(impulse, tangentY); - - bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); - bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); - - bB.v.X = add(bB.v.X, mul(bB.invM, Px)); - bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); - } + __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); + __m256 specBias = mul(s, invDtMul); + __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(softBias, specBias, test); - // second point friction constraint - { - // Relative velocity at contact - __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); - __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); - __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); - - // Compute tangent force - __m256 negImpulse = mul(c->tangentMass2, vt); - - // Clamp the accumulated force - __m256 maxFriction = mul(c->friction, c->normalImpulse2); - __m256 newImpulse = sub(c->tangentImpulse2, negImpulse); - newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); - __m256 impulse = sub(newImpulse, c->tangentImpulse2); - c->tangentImpulse2 = newImpulse; - - // Apply contact impulse - __m256 Px = mul(impulse, tangentX); - __m256 Py = mul(impulse, tangentY); - - bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); - bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); - bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - - bB.v.X = add(bB.v.X, mul(bB.invM, Px)); - bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); - bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); + + // Compute normal impulse + __m256 negImpulse = add(mul(c->normalMass1, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse1)); + // float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse1, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse1); + c->normalImpulse1 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); + + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); + } + + // second point non-penetration constraint + { + // Compute change in separation (small angle approximation of sin(angle) == angle) + __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB2.Y)), sub(bA.dp.X, mul(bA.da, c->rA2.Y))); + __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB2.X)), add(bA.dp.Y, mul(bA.da, c->rA2.X))); + __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); + + __m256 s = add(c->separation2, ds); + + __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); + __m256 specBias = mul(s, invDtMul); + __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(softBias, specBias, test); + + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); + + // Compute normal impulse + __m256 negImpulse = add(mul(c->normalMass2, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse2)); + + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse2); + c->normalImpulse2 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); + + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + } + + __m256 tangentX = c->normal.Y; + __m256 tangentY = sub(_mm256_setzero_ps(), c->normal.X); + // float friction = constraint->friction; + + // first point friction constraint + { + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); + + // Compute tangent force + __m256 negImpulse = mul(c->tangentMass1, vt); + + // Clamp the accumulated force + __m256 maxFriction = mul(c->friction, c->normalImpulse1); + __m256 newImpulse = sub(c->tangentImpulse1, negImpulse); + newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); + __m256 impulse = sub(newImpulse, c->tangentImpulse1); + c->tangentImpulse1 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, tangentX); + __m256 Py = mul(impulse, tangentY); + + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); + } + + // second point friction constraint + { + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); + + // Compute tangent force + __m256 negImpulse = mul(c->tangentMass2, vt); + + // Clamp the accumulated force + __m256 maxFriction = mul(c->friction, c->normalImpulse2); + __m256 newImpulse = sub(c->tangentImpulse2, negImpulse); + newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); + __m256 impulse = sub(newImpulse, c->tangentImpulse2); + c->tangentImpulse2 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, tangentX); + __m256 Py = mul(impulse, tangentY); + + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + } + + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); } - b2ScatterBodies(bodies, c->indexA, &bA); - b2ScatterBodies(bodies, c->indexB, &bB); + b2TracyCZoneEnd(solve_contact); } -void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) +void b2ApplyRestitutionSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) { - b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); + b2TracyCZoneNC(restitution, "Restitution", b2_colorDodgerBlue, true); b2SolverBody* bodies = context->solverBodies; b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; - float inv_dt = context->invTimeStep; - const float pushout = context->world->maximumPushoutVelocity; + b2FloatW threshold = _mm256_set1_ps(context->world->restitutionThreshold); + b2FloatW zero = _mm256_setzero_ps(); for (int32_t i = startIndex; i < endIndex; ++i) { - b2ContactConstraintAVX* constraint = constraints + i; - b2SolveContactTwoPointsAVX(constraint, bodies, inv_dt, pushout, useBias); + b2ContactConstraintAVX* c = constraints + i; + + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); + + // first point non-penetration constraint + { + // Set effective mass to zero if restitution should not be applied + __m256 test1 = _mm256_cmp_ps(add(c->relativeVelocity1, threshold), zero, _CMP_GT_OQ); + __m256 test2 = _mm256_cmp_ps(c->normalImpulse1, zero, _CMP_EQ_OQ); + __m256 test = _mm256_or_ps(test1, test2); + __m256 mass = _mm256_blendv_ps(zero, c->normalMass1, test); + + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); + + // Compute normal impulse + __m256 negImpulse = mul(mass, add(vn, mul(c->restitution, c->relativeVelocity1))); + + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse1, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse1); + c->normalImpulse1 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); + + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); + } + + // second point non-penetration constraint + { + // Set effective mass to zero if restitution should not be applied + __m256 test1 = _mm256_cmp_ps(add(c->relativeVelocity2, threshold), zero, _CMP_GT_OQ); + __m256 test2 = _mm256_cmp_ps(c->normalImpulse2, zero, _CMP_EQ_OQ); + __m256 test = _mm256_or_ps(test1, test2); + __m256 mass = _mm256_blendv_ps(zero, c->normalMass2, test); + + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); + + // Compute normal impulse + __m256 negImpulse = mul(mass, add(vn, mul(c->restitution, c->relativeVelocity2))); + + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse2); + c->normalImpulse2 = newImpulse; + + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); + + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + } + + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); } - b2TracyCZoneEnd(solve_contact); + b2TracyCZoneEnd(restitution); } -void b2StoreImpulsesAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +void b2StoreImpulsesSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) { b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); diff --git a/src/contact_solver.h b/src/contact_solver.h index ef6fb4f7..22c2b0a6 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -44,7 +44,7 @@ typedef struct b2Vec2W b2FloatW X, Y; } b2Vec2W; -typedef struct b2ContactConstraintAVX +typedef struct b2ContactConstraintSIMD { int32_t indexA[8]; int32_t indexB[8]; @@ -72,8 +72,8 @@ void b2ApplyOverflowRestitution(b2SolverTaskContext* context); void b2StoreOverflowImpulses(b2SolverTaskContext* context); // AVX versions -void b2PrepareContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); -void b2WarmStartContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); -void b2SolveContactsAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); -void b2ApplyRestitutionW(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); -void b2StoreImpulsesAVX(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +void b2PrepareContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +void b2WarmStartContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2SolveContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2ApplyRestitutionSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2StoreImpulsesSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/graph.c b/src/graph.c index a31b167e..c778de87 100644 --- a/src/graph.c +++ b/src/graph.c @@ -646,15 +646,15 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stagePrepareContacts: - b2PrepareContactsAVX(startIndex, endIndex, context); + b2PrepareContactsSIMD(startIndex, endIndex, context); break; case b2_stageWarmStartContacts: - b2WarmStartContactsAVX(startIndex, endIndex, context, stage->colorIndex); + b2WarmStartContactsSIMD(startIndex, endIndex, context, stage->colorIndex); break; case b2_stageSolveContacts: - b2SolveContactsAVX(startIndex, endIndex, context, stage->colorIndex, true); + b2SolveContactsSIMD(startIndex, endIndex, context, stage->colorIndex, true); break; case b2_stageIntegratePositions: @@ -662,11 +662,15 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i break; case b2_stageCalmContacts: - b2SolveContactsAVX(startIndex, endIndex, context, stage->colorIndex, false); + b2SolveContactsSIMD(startIndex, endIndex, context, stage->colorIndex, false); + break; + + case b2_stageRestitution: + b2ApplyRestitutionSIMD(startIndex, endIndex, context, stage->colorIndex); break; case b2_stageStoreImpulses: - b2StoreImpulsesAVX(startIndex, endIndex, context); + b2StoreImpulsesSIMD(startIndex, endIndex, context); break; } } @@ -810,6 +814,7 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont b2_stageIntegratePositions, b2_stageCalmJoints, b2_stageCalmContacts, + b2_stageRestitution, b2_stageStoreImpulses */ @@ -899,12 +904,30 @@ void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDont stageIndex += 1 + activeColorCount; + // Restitution + { + b2ApplyOverflowRestitution(context); + + // stage index restarted each iteration + int32_t iterStageIndex = stageIndex; + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageRestitution); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + iterStageIndex += 1; + } + graphSyncIndex += 1; + } + + stageIndex += activeColorCount; + + b2StoreOverflowImpulses(context); + syncBits = (constraintSyncIndex << 16) | stageIndex; B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); b2ExecuteMainStage(stages + stageIndex, context, syncBits); - b2StoreOverflowImpulses(context); - // Signal workers to finish atomic_store(&context->syncBits, UINT_MAX); @@ -1144,6 +1167,7 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) b2_stageFinalizePositions, b2_stageCalmJoints, b2_stageCalmContacts, + b2_stageRestitution, b2_stageStoreImpulses */ @@ -1162,6 +1186,8 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stageCount += 1 + activeColorCount + 1; // b2_stageCalmJoints, b2_stageCalmContacts stageCount += 1 + activeColorCount; + // b2_stageRestitution + stageCount += activeColorCount; // b2_stageStoreImpulses stageCount += 1; @@ -1318,6 +1344,17 @@ void b2SolveGraph(b2World* world, b2StepContext* stepContext) stage += 1; } + // Restitution + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageRestitution; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + // Store impulses stage->type = b2_stageStoreImpulses; stage->blocks = storeBlocks; diff --git a/src/graph.h b/src/graph.h index 98dd4d9f..07ac79d4 100644 --- a/src/graph.h +++ b/src/graph.h @@ -9,7 +9,7 @@ typedef struct b2Contact b2Contact; typedef struct b2ContactConstraint b2ContactConstraint; -typedef struct b2ContactConstraintAVX b2ContactConstraintAVX; +typedef struct b2ContactConstraintSIMD b2ContactConstraintAVX; typedef struct b2Joint b2Joint; typedef struct b2StepContext b2StepContext; typedef struct b2World b2World; diff --git a/src/solver_data.h b/src/solver_data.h index d6b48dd5..7d7deaad 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -53,6 +53,7 @@ typedef enum b2SolverStageType b2_stageIntegratePositions, b2_stageCalmJoints, b2_stageCalmContacts, + b2_stageRestitution, b2_stageStoreImpulses } b2SolverStageType; @@ -90,7 +91,7 @@ typedef struct b2SolverTaskContext b2StepContext* stepContext; struct b2ContactConstraint* constraints; - struct b2ContactConstraintAVX* constraintAVXs; + struct b2ContactConstraintSIMD* constraintAVXs; int32_t activeColorCount; int32_t velocityIterations; int32_t calmIterations; From 1a03b847649f2be4f2b6078cbca5c3f2a77017c4 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 25 Oct 2023 21:24:06 -0700 Subject: [PATCH 46/51] fix restitution --- samples/collection/benchmark_many_tumblers.cpp | 7 ++++--- samples/collection/sample_joints.cpp | 2 +- src/contact_solver.c | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/samples/collection/benchmark_many_tumblers.cpp b/samples/collection/benchmark_many_tumblers.cpp index 5bb153ba..c8f89143 100644 --- a/samples/collection/benchmark_many_tumblers.cpp +++ b/samples/collection/benchmark_many_tumblers.cpp @@ -19,8 +19,8 @@ class BenchmarkManyTumblers : public Sample b2BodyDef bd = b2DefaultBodyDef(); m_groundId = b2World_CreateBody(m_worldId, &bd); - m_rowCount = g_sampleDebug ? 1 : 19; - m_columnCount = g_sampleDebug ? 1 : 19; + m_rowCount = g_sampleDebug ? 2 : 19; + m_columnCount = g_sampleDebug ? 2 : 19; m_tumblerIds = nullptr; m_jointIds = nullptr; @@ -122,7 +122,7 @@ class BenchmarkManyTumblers : public Sample free(m_bodyIds); - int32_t bodiesPerTumbler = g_sampleDebug ? 1 : 50; + int32_t bodiesPerTumbler = g_sampleDebug ? 8 : 50; m_bodyCount = bodiesPerTumbler * m_tumblerCount; m_bodyIds = static_cast(malloc(m_bodyCount * sizeof(b2BodyId))); @@ -169,6 +169,7 @@ class BenchmarkManyTumblers : public Sample { b2ShapeDef sd = b2DefaultShapeDef(); sd.density = 1.0f; + //sd.restitution = 0.5f; b2Circle circle = {{0.0f, 0.0f}, 0.125f}; b2Polygon polygon = b2MakeBox(0.125f, 0.125f); diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp index 561f5ba4..3f69bf07 100644 --- a/samples/collection/sample_joints.cpp +++ b/samples/collection/sample_joints.cpp @@ -96,7 +96,7 @@ class Bridge : public Sample public: enum { - e_count = 20 + e_count = 80 }; Bridge(const Settings& settings) diff --git a/src/contact_solver.c b/src/contact_solver.c index 5d8a6cad..a2492050 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -619,6 +619,7 @@ void b2PrepareContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskCon constraint->indexA[j] = B2_NULL_INDEX; constraint->indexB[j] = B2_NULL_INDEX; ((float*)&constraint->friction)[j] = 0.0f; + ((float*)&constraint->restitution)[j] = 0.0f; ((float*)&constraint->impulseCoefficient)[j] = 0.0f; ((float*)&constraint->massCoefficient)[j] = 0.0f; ((float*)&constraint->biasCoefficient)[j] = 0.0f; @@ -907,7 +908,7 @@ void b2ApplyRestitutionSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskCo __m256 test1 = _mm256_cmp_ps(add(c->relativeVelocity1, threshold), zero, _CMP_GT_OQ); __m256 test2 = _mm256_cmp_ps(c->normalImpulse1, zero, _CMP_EQ_OQ); __m256 test = _mm256_or_ps(test1, test2); - __m256 mass = _mm256_blendv_ps(zero, c->normalMass1, test); + __m256 mass = _mm256_blendv_ps(c->normalMass1, zero, test); // Relative velocity at contact __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); From a72b9709b3cb4555693dc14e09c3925f11075f1b Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 25 Oct 2023 21:28:44 -0700 Subject: [PATCH 47/51] CI fix --- src/contact_solver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/contact_solver.c b/src/contact_solver.c index a2492050..6569bdc6 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -389,7 +389,7 @@ typedef struct b2SimdBody // This is a load and 8x8 transpose static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* restrict indices) { - _Static_assert(sizeof(b2SolverBody) == 32); + _Static_assert(sizeof(b2SolverBody) == 32, "b2SolverBody not 32 bytes"); B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); __m256 zero = _mm256_setzero_ps(); __m256 b0 = indices[0] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[0])); @@ -434,7 +434,7 @@ static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* r // This writes everything back to the solver bodies but only the velocities change static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) { - _Static_assert(sizeof(b2SolverBody) == 32); + _Static_assert(sizeof(b2SolverBody) == 32, "b2SolverBody not 32 bytes"); B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); __m256 t0 = _mm256_unpacklo_ps(simdBody->v.X, simdBody->v.Y); __m256 t1 = _mm256_unpackhi_ps(simdBody->v.X, simdBody->v.Y); From 4255711e7f92a2547d086b6ce5a2543cbc07db7b Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 25 Oct 2023 21:42:28 -0700 Subject: [PATCH 48/51] removed FMA --- src/contact_solver.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/contact_solver.c b/src/contact_solver.c index 6569bdc6..6cf23db6 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -368,9 +368,12 @@ void b2StoreOverflowImpulses(b2SolverTaskContext* context) b2TracyCZoneEnd(store_impulses); } +// SIMD WIP #define add(a, b) _mm256_add_ps((a), (b)) #define sub(a, b) _mm256_sub_ps((a), (b)) #define mul(a, b) _mm256_mul_ps((a), (b)) +#define muladd(a, b, c) _mm256_add_ps((a), _mm256_mul_ps((b), (c))) +#define mulsub(a, b, c) _mm256_sub_ps((a), _mm256_mul_ps((b), (c))) static inline __m256 b2CrossW(b2Vec2W a, b2Vec2W b) { @@ -674,24 +677,24 @@ void b2WarmStartContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskC b2Vec2W P; P.X = add(mul(c->normalImpulse1, c->normal.X), mul(c->tangentImpulse1, tangentX)); P.Y = add(mul(c->normalImpulse1, c->normal.Y), mul(c->tangentImpulse1, tangentY)); - bA.w = _mm256_fnmadd_ps(bA.invI, b2CrossW(c->rA1, P), bA.w); - bA.v.X = _mm256_fnmadd_ps(bA.invM, P.X, bA.v.X); - bA.v.Y = _mm256_fnmadd_ps(bA.invM, P.Y, bA.v.Y); - bB.w = _mm256_fmadd_ps(bB.invI, b2CrossW(c->rB1, P), bB.w); - bB.v.X = _mm256_fmadd_ps(bB.invM, P.X, bB.v.X); - bB.v.Y = _mm256_fmadd_ps(bB.invM, P.Y, bB.v.Y); + bA.w = mulsub(bA.w, bA.invI, b2CrossW(c->rA1, P)); + bA.v.X = mulsub(bA.v.X, bA.invM, P.X); + bA.v.Y = mulsub(bA.v.Y, bA.invM, P.Y); + bB.w = muladd(bB.w, bB.invI, b2CrossW(c->rB1, P)); + bB.v.X = muladd(bB.v.X, bB.invM, P.X); + bB.v.Y = muladd(bB.v.Y, bB.invM, P.Y); } { b2Vec2W P; P.X = add(mul(c->normalImpulse2, c->normal.X), mul(c->tangentImpulse2, tangentX)); P.Y = add(mul(c->normalImpulse2, c->normal.Y), mul(c->tangentImpulse2, tangentY)); - bA.w = _mm256_fnmadd_ps(bA.invI, b2CrossW(c->rA2, P), bA.w); - bA.v.X = _mm256_fnmadd_ps(bA.invM, P.X, bA.v.X); - bA.v.Y = _mm256_fnmadd_ps(bA.invM, P.Y, bA.v.Y); - bB.w = _mm256_fmadd_ps(bB.invI, b2CrossW(c->rB2, P), bB.w); - bB.v.X = _mm256_fmadd_ps(bB.invM, P.X, bB.v.X); - bB.v.Y = _mm256_fmadd_ps(bB.invM, P.Y, bB.v.Y); + bA.w = mulsub(bA.w, bA.invI, b2CrossW(c->rA2, P)); + bA.v.X = mulsub(bA.v.X, bA.invM, P.X); + bA.v.Y = mulsub(bA.v.Y, bA.invM, P.Y); + bB.w = muladd(bB.w, bB.invI, b2CrossW(c->rB2, P)); + bB.v.X = muladd(bB.v.X, bB.invM, P.X); + bB.v.Y = muladd(bB.v.Y, bB.invM, P.Y); } b2ScatterBodies(bodies, c->indexA, &bA); From a19c0eb6a6bb89f2516cb74af02f6000691c08b7 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 25 Oct 2023 21:44:14 -0700 Subject: [PATCH 49/51] CI fix --- src/graph.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/graph.c b/src/graph.c index c778de87..402fc8f7 100644 --- a/src/graph.c +++ b/src/graph.c @@ -20,8 +20,7 @@ #include #include #include - -#define B2_AVX 1 +#include // Kinematic bodies have to be treated like dynamic bodies in graph coloring. Unlike static bodies, we cannot use a dummy solver body for // kinematic bodies. We cannot access a kinematic body from multiple threads efficiently because the SIMD solver body scatter would write to From 1697cafcacc99d01cf099d194532bd327f943e68 Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 25 Oct 2023 21:47:08 -0700 Subject: [PATCH 50/51] CI fix --- src/graph.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/graph.c b/src/graph.c index 402fc8f7..a5c493bc 100644 --- a/src/graph.c +++ b/src/graph.c @@ -652,6 +652,12 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i b2WarmStartContactsSIMD(startIndex, endIndex, context, stage->colorIndex); break; + case b2_stagePrepareJoints: + break; + + case b2_stageSolveJoints: + break; + case b2_stageSolveContacts: b2SolveContactsSIMD(startIndex, endIndex, context, stage->colorIndex, true); break; @@ -660,6 +666,9 @@ static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, i b2IntegratePositionsTask(startIndex, endIndex, context); break; + case b2_stageCalmJoints: + break; + case b2_stageCalmContacts: b2SolveContactsSIMD(startIndex, endIndex, context, stage->colorIndex, false); break; From 30172557c24c8afc8d69a33b5bba1005ffdca3bc Mon Sep 17 00:00:00 2001 From: Erin Catto Date: Wed, 25 Oct 2023 22:07:49 -0700 Subject: [PATCH 51/51] CI fixes --- include/box2d/joint_types.h | 2 +- src/world.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/box2d/joint_types.h b/include/box2d/joint_types.h index a78f3f77..4f8bc87c 100644 --- a/include/box2d/joint_types.h +++ b/include/box2d/joint_types.h @@ -160,4 +160,4 @@ static inline struct b2WeldJointDef b2DefaultWeldJointDef(void) def.angularDampingRatio = 1.0f; def.collideConnected = false; return def; -} \ No newline at end of file +} diff --git a/src/world.c b/src/world.c index e6c5b0e8..983124f5 100644 --- a/src/world.c +++ b/src/world.c @@ -621,8 +621,6 @@ static void b2Solve(b2World* world, b2StepContext* context) // Solve constraints using graph coloring b2SolveGraph(world, context); - b2ValidateNoEnlarged(&world->broadPhase); - b2TracyCZoneEnd(graph_solver); world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); @@ -801,6 +799,8 @@ static void b2Solve(b2World* world, b2StepContext* context) } } + b2ValidateNoEnlarged(&world->broadPhase); + b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true);