diff --git a/.clang-format b/.clang-format index c2247d17..9aa3635a 100644 --- a/.clang-format +++ b/.clang-format @@ -11,6 +11,7 @@ BreakBeforeBraces: Custom BraceWrapping: AfterCaseLabel: true AfterUnion: true + BeforeWhile: true ColumnLimit: 140 PointerAlignment: Left @@ -18,7 +19,7 @@ UseTab: Always BreakConstructorInitializers: BeforeComma # when VS updates clang-format to v16 -# InsertNewlineAtEOF: true +InsertNewlineAtEOF: true IncludeBlocks: Regroup diff --git a/include/box2d/aabb.h b/include/box2d/aabb.h index efa99e8f..456f60f5 100644 --- a/include/box2d/aabb.h +++ b/include/box2d/aabb.h @@ -107,6 +107,13 @@ static inline bool b2AABB_Contains(b2AABB a, b2AABB b) return s; } +static inline bool b2AABB_ContainsWithMargin(b2AABB a, b2AABB b, float margin) +{ + bool s = (a.lowerBound.x <= b.lowerBound.x - margin) & (a.lowerBound.y <= b.lowerBound.y - margin) & + (b.upperBound.x + margin <= a.upperBound.x) & (b.upperBound.y + margin <= a.upperBound.y); + return s; +} + /// Do a and b overlap static inline bool b2AABB_Overlaps(b2AABB a, b2AABB b) { diff --git a/include/box2d/api.h b/include/box2d/api.h index 60ad5ea1..49cb3d4b 100644 --- a/include/box2d/api.h +++ b/include/box2d/api.h @@ -3,6 +3,8 @@ #pragma once +#include + #ifdef __cplusplus #define BOX2D_CPP extern "C" #else @@ -22,7 +24,7 @@ #define BOX2D_API BOX2D_CPP #endif -typedef void* b2AllocFcn(int size); +typedef void* b2AllocFcn(uint32_t size); typedef void b2FreeFcn(void* mem); // Return 0 to @@ -37,7 +39,7 @@ extern "C" void b2SetAllocator(b2AllocFcn* allocFcn, b2FreeFcn* freeFcn); /// Total bytes allocated by Box2D -int b2GetByteCount(void); +uint32_t b2GetByteCount(void); extern b2AssertFcn* Box2DAssertCallback; diff --git a/include/box2d/box2d.h b/include/box2d/box2d.h index 353c2e3b..a85e761f 100644 --- a/include/box2d/box2d.h +++ b/include/box2d/box2d.h @@ -31,17 +31,6 @@ BOX2D_API void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityI /// Call this to draw shapes and other debug draw data. This is intentionally non-const. BOX2D_API void b2World_Draw(b2WorldId worldId, b2DebugDraw* debugDraw); -/// Enable/disable sleep. -BOX2D_API void b2World_EnableSleeping(b2WorldId worldId, bool flag); - -/// Enable/disable continuous collision. -BOX2D_API void b2World_EnableContinuous(b2WorldId worldId, bool flag); - -/// Get the current profile. -BOX2D_API struct b2Profile b2World_GetProfile(b2WorldId worldId); - -BOX2D_API struct b2Statistics b2World_GetStatistics(b2WorldId worldId); - /// Create a rigid body given a definition. No reference to the definition is retained. /// @warning This function is locked during callbacks. BOX2D_API b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def); @@ -74,16 +63,22 @@ BOX2D_API bool b2Shape_TestPoint(b2ShapeId shapeId, b2Vec2 point); BOX2D_API b2JointId b2World_CreateMouseJoint(b2WorldId worldId, const b2MouseJointDef* def); BOX2D_API b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2RevoluteJointDef* def); +BOX2D_API b2JointId b2World_CreateWeldJoint(b2WorldId worldId, const b2WeldJointDef* def); BOX2D_API void b2World_DestroyJoint(b2JointId jointId); +BOX2D_API b2BodyId b2Joint_GetBodyA(b2JointId jointId); +BOX2D_API b2BodyId b2Joint_GetBodyB(b2JointId jointId); + BOX2D_API void b2MouseJoint_SetTarget(b2JointId jointId, b2Vec2 target); BOX2D_API void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit); BOX2D_API void b2RevoluteJoint_EnableMotor(b2JointId jointId, bool enableMotor); BOX2D_API void b2RevoluteJoint_SetMotorSpeed(b2JointId jointId, float motorSpeed); BOX2D_API float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep); +BOX2D_API void b2RevoluteJoint_SetMaxMotorTorque(b2JointId jointId, float torque); +BOX2D_API b2Vec2 b2RevoluteJoint_GetConstraintForce(b2JointId jointId); -/// This function receives shapes found in the AABB query. + /// This function receives shapes found in the AABB query. /// @return true if the query should continue typedef bool b2QueryCallbackFcn(b2ShapeId shapeId, void* context); @@ -91,3 +86,30 @@ typedef bool b2QueryCallbackFcn(b2ShapeId shapeId, void* context); /// @param callback a user implemented callback function. /// @param aabb the query box. BOX2D_API void b2World_QueryAABB(b2WorldId worldId, b2AABB aabb, b2QueryCallbackFcn* fcn, void* context); + + +/// Advanced API for testing and special cases + +/// Enable/disable sleep. +BOX2D_API void b2World_EnableSleeping(b2WorldId worldId, bool flag); + +/// Enable/disable contact warm starting. Improves stacking stability. +BOX2D_API void b2World_EnableWarmStarting(b2WorldId worldId, bool flag); + +/// Enable/disable continuous collision. +BOX2D_API void b2World_EnableContinuous(b2WorldId worldId, bool flag); + +/// Adjust the restitution threshold +BOX2D_API void b2World_SetRestitutionThreshold(b2WorldId worldId, float value); + +/// Adjust the maximum contact constraint push out velocity +BOX2D_API void b2World_SetMaximumPushoutVelocity(b2WorldId worldId, float value); + +/// Adjust the contact stiffness in cycles per second. +BOX2D_API void b2World_SetContactHertz(b2WorldId worldId, float value); + +/// Get the current profile +BOX2D_API struct b2Profile b2World_GetProfile(b2WorldId worldId); + +/// Get counters and sizes +BOX2D_API struct b2Statistics b2World_GetStatistics(b2WorldId worldId); diff --git a/include/box2d/callbacks.h b/include/box2d/callbacks.h index d1e4b653..05ce7038 100644 --- a/include/box2d/callbacks.h +++ b/include/box2d/callbacks.h @@ -48,7 +48,7 @@ typedef void b2EndContactFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, void* conte /// get an EndContact callback. However, you may get a BeginContact callback /// the next step. /// - the supplied manifold has impulse values from the previous frame -typedef bool b2PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, void* context); +typedef bool b2PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color, void* context); BOX2D_API void b2World_SetPreSolveCallback(b2WorldId worldId, b2PreSolveFcn* fcn, void* context); /// This lets you inspect a contact after the solver is finished. This is useful diff --git a/include/box2d/color.h b/include/box2d/color.h index eed9e4a8..f1b60890 100644 --- a/include/box2d/color.h +++ b/include/box2d/color.h @@ -11,479 +11,171 @@ typedef struct b2Color enum b2HexColor { - b2_colorSnow = 0xfffafa, - b2_colorGhostWhite = 0xf8f8ff, - b2_colorWhiteSmoke = 0xf5f5f5, - b2_colorGainsboro = 0xdcdcdc, - b2_colorFloralWhite = 0xfffaf0, - b2_colorOldLace = 0xfdf5e6, - b2_colorLinen = 0xfaf0e6, - b2_colorAntiqueWhite = 0xfaebd7, - b2_colorPapayaWhip = 0xffefd5, - b2_colorBlanchedAlmond = 0xffebcd, - b2_colorBisque = 0xffe4c4, - b2_colorPeachPuff = 0xffdab9, - b2_colorNavajoWhite = 0xffdead, - b2_colorMoccasin = 0xffe4b5, - b2_colorCornsilk = 0xfff8dc, - b2_colorIvory = 0xfffff0, - b2_colorLemonChiffon = 0xfffacd, - b2_colorSeashell = 0xfff5ee, - b2_colorHoneydew = 0xf0fff0, - b2_colorMintCream = 0xf5fffa, - b2_colorAzure = 0xf0ffff, b2_colorAliceBlue = 0xf0f8ff, - b2_colorLavender = 0xe6e6fa, - b2_colorLavenderBlush = 0xfff0f5, - b2_colorMistyRose = 0xffe4e1, - b2_colorWhite = 0xffffff, - b2_colorBlack = 0x000000, - b2_colorDarkSlateGray = 0x2f4f4f, - b2_colorDimGray = 0x696969, - b2_colorSlateGray = 0x708090, - b2_colorLightSlateGray = 0x778899, - b2_colorGray = 0xbebebe, - b2_colorX11Gray = 0xbebebe, - b2_colorWebGray = 0x808080, - b2_colorLightGray = 0xd3d3d3, - b2_colorMidnightBlue = 0x191970, - b2_colorNavy = 0x000080, - b2_colorNavyBlue = 0x000080, - b2_colorCornflowerBlue = 0x6495ed, - b2_colorDarkSlateBlue = 0x483d8b, - b2_colorSlateBlue = 0x6a5acd, - b2_colorMediumSlateBlue = 0x7b68ee, - b2_colorLightSlateBlue = 0x8470ff, - b2_colorMediumBlue = 0x0000cd, - b2_colorRoyalBlue = 0x4169e1, - b2_colorBlue = 0x0000ff, - b2_colorDodgerBlue = 0x1e90ff, - b2_colorDeepSkyBlue = 0x00bfff, - b2_colorSkyBlue = 0x87ceeb, - b2_colorLightSkyBlue = 0x87cefa, - b2_colorSteelBlue = 0x4682b4, - b2_colorLightSteelBlue = 0xb0c4de, - b2_colorLightBlue = 0xadd8e6, - b2_colorPowderBlue = 0xb0e0e6, - b2_colorPaleTurquoise = 0xafeeee, - b2_colorDarkTurquoise = 0x00ced1, - b2_colorMediumTurquoise = 0x48d1cc, - b2_colorTurquoise = 0x40e0d0, - b2_colorCyan = 0x00ffff, - b2_colorAqua = 0x00ffff, - b2_colorLightCyan = 0xe0ffff, - b2_colorCadetBlue = 0x5f9ea0, - b2_colorMediumAquamarine = 0x66cdaa, - b2_colorAquamarine = 0x7fffd4, - b2_colorDarkGreen = 0x006400, - b2_colorDarkOliveGreen = 0x556b2f, - b2_colorDarkSeaGreen = 0x8fbc8f, - b2_colorSeaGreen = 0x2e8b57, - b2_colorMediumSeaGreen = 0x3cb371, - b2_colorLightSeaGreen = 0x20b2aa, - b2_colorPaleGreen = 0x98fb98, - b2_colorSpringGreen = 0x00ff7f, - b2_colorLawnGreen = 0x7cfc00, - b2_colorGreen = 0x00ff00, - b2_colorLime = 0x00ff00, - b2_colorX11Green = 0x00ff00, - b2_colorWebGreen = 0x008000, - b2_colorChartreuse = 0x7fff00, - b2_colorMediumSpringGreen = 0x00fa9a, - b2_colorGreenYellow = 0xadff2f, - b2_colorLimeGreen = 0x32cd32, - b2_colorYellowGreen = 0x9acd32, - b2_colorForestGreen = 0x228b22, - b2_colorOliveDrab = 0x6b8e23, - b2_colorDarkKhaki = 0xbdb76b, - b2_colorKhaki = 0xf0e68c, - b2_colorPaleGoldenrod = 0xeee8aa, - b2_colorLightGoldenrodYellow = 0xfafad2, - b2_colorLightYellow = 0xffffe0, - b2_colorYellow = 0xffff00, - b2_colorGold = 0xffd700, - b2_colorLightGoldenrod = 0xeedd82, - b2_colorGoldenrod = 0xdaa520, - b2_colorDarkGoldenrod = 0xb8860b, - b2_colorRosyBrown = 0xbc8f8f, - b2_colorIndianRed = 0xcd5c5c, - b2_colorSaddleBrown = 0x8b4513, - b2_colorSienna = 0xa0522d, - b2_colorPeru = 0xcd853f, - b2_colorBurlywood = 0xdeb887, - b2_colorBeige = 0xf5f5dc, - b2_colorWheat = 0xf5deb3, - b2_colorSandyBrown = 0xf4a460, - b2_colorTan = 0xd2b48c, - b2_colorChocolate = 0xd2691e, - b2_colorFirebrick = 0xb22222, - b2_colorBrown = 0xa52a2a, - b2_colorDarkSalmon = 0xe9967a, - b2_colorSalmon = 0xfa8072, - b2_colorLightSalmon = 0xffa07a, - b2_colorOrange = 0xffa500, - b2_colorDarkOrange = 0xff8c00, - b2_colorCoral = 0xff7f50, - b2_colorLightCoral = 0xf08080, - b2_colorTomato = 0xff6347, - b2_colorOrangeRed = 0xff4500, - b2_colorRed = 0xff0000, - b2_colorHotPink = 0xff69b4, - b2_colorDeepPink = 0xff1493, - b2_colorPink = 0xffc0cb, - b2_colorLightPink = 0xffb6c1, - b2_colorPaleVioletRed = 0xdb7093, - b2_colorMaroon = 0xb03060, - b2_colorX11Maroon = 0xb03060, - b2_colorWebMaroon = 0x800000, - b2_colorMediumVioletRed = 0xc71585, - b2_colorVioletRed = 0xd02090, - b2_colorMagenta = 0xff00ff, - b2_colorFuchsia = 0xff00ff, - b2_colorViolet = 0xee82ee, - b2_colorPlum = 0xdda0dd, - b2_colorOrchid = 0xda70d6, - b2_colorMediumOrchid = 0xba55d3, - b2_colorDarkOrchid = 0x9932cc, - b2_colorDarkViolet = 0x9400d3, - b2_colorBlueViolet = 0x8a2be2, - b2_colorPurple = 0xa020f0, - b2_colorX11Purple = 0xa020f0, - b2_colorWebPurple = 0x800080, - b2_colorMediumPurple = 0x9370db, - b2_colorThistle = 0xd8bfd8, - b2_colorSnow1 = 0xfffafa, - b2_colorSnow2 = 0xeee9e9, - b2_colorSnow3 = 0xcdc9c9, - b2_colorSnow4 = 0x8b8989, - b2_colorSeashell1 = 0xfff5ee, - b2_colorSeashell2 = 0xeee5de, - b2_colorSeashell3 = 0xcdc5bf, - b2_colorSeashell4 = 0x8b8682, + b2_colorAntiqueWhite = 0xfaebd7, b2_colorAntiqueWhite1 = 0xffefdb, b2_colorAntiqueWhite2 = 0xeedfcc, b2_colorAntiqueWhite3 = 0xcdc0b0, b2_colorAntiqueWhite4 = 0x8b8378, - b2_colorBisque1 = 0xffe4c4, - b2_colorBisque2 = 0xeed5b7, - b2_colorBisque3 = 0xcdb79e, - b2_colorBisque4 = 0x8b7d6b, - b2_colorPeachPuff1 = 0xffdab9, - b2_colorPeachPuff2 = 0xeecbad, - b2_colorPeachPuff3 = 0xcdaf95, - b2_colorPeachPuff4 = 0x8b7765, - b2_colorNavajoWhite1 = 0xffdead, - b2_colorNavajoWhite2 = 0xeecfa1, - b2_colorNavajoWhite3 = 0xcdb38b, - b2_colorNavajoWhite4 = 0x8b795e, - b2_colorLemonChiffon1 = 0xfffacd, - b2_colorLemonChiffon2 = 0xeee9bf, - b2_colorLemonChiffon3 = 0xcdc9a5, - b2_colorLemonChiffon4 = 0x8b8970, - b2_colorCornsilk1 = 0xfff8dc, - b2_colorCornsilk2 = 0xeee8cd, - b2_colorCornsilk3 = 0xcdc8b1, - b2_colorCornsilk4 = 0x8b8878, - b2_colorIvory1 = 0xfffff0, - b2_colorIvory2 = 0xeeeee0, - b2_colorIvory3 = 0xcdcdc1, - b2_colorIvory4 = 0x8b8b83, - b2_colorHoneydew1 = 0xf0fff0, - b2_colorHoneydew2 = 0xe0eee0, - b2_colorHoneydew3 = 0xc1cdc1, - b2_colorHoneydew4 = 0x838b83, - b2_colorLavenderBlush1 = 0xfff0f5, - b2_colorLavenderBlush2 = 0xeee0e5, - b2_colorLavenderBlush3 = 0xcdc1c5, - b2_colorLavenderBlush4 = 0x8b8386, - b2_colorMistyRose1 = 0xffe4e1, - b2_colorMistyRose2 = 0xeed5d2, - b2_colorMistyRose3 = 0xcdb7b5, - b2_colorMistyRose4 = 0x8b7d7b, + b2_colorAqua = 0x00ffff, + b2_colorAquamarine = 0x7fffd4, + b2_colorAquamarine1 = 0x7fffd4, + b2_colorAquamarine2 = 0x76eec6, + b2_colorAquamarine3 = 0x66cdaa, + b2_colorAquamarine4 = 0x458b74, + b2_colorAzure = 0xf0ffff, b2_colorAzure1 = 0xf0ffff, b2_colorAzure2 = 0xe0eeee, b2_colorAzure3 = 0xc1cdcd, b2_colorAzure4 = 0x838b8b, - b2_colorSlateBlue1 = 0x836fff, - b2_colorSlateBlue2 = 0x7a67ee, - b2_colorSlateBlue3 = 0x6959cd, - b2_colorSlateBlue4 = 0x473c8b, - b2_colorRoyalBlue1 = 0x4876ff, - b2_colorRoyalBlue2 = 0x436eee, - b2_colorRoyalBlue3 = 0x3a5fcd, - b2_colorRoyalBlue4 = 0x27408b, + b2_colorBeige = 0xf5f5dc, + b2_colorBisque = 0xffe4c4, + b2_colorBisque1 = 0xffe4c4, + b2_colorBisque2 = 0xeed5b7, + b2_colorBisque3 = 0xcdb79e, + b2_colorBisque4 = 0x8b7d6b, + b2_colorBlack = 0x000000, + b2_colorBlanchedAlmond = 0xffebcd, + b2_colorBlue = 0x0000ff, b2_colorBlue1 = 0x0000ff, b2_colorBlue2 = 0x0000ee, b2_colorBlue3 = 0x0000cd, b2_colorBlue4 = 0x00008b, - b2_colorDodgerBlue1 = 0x1e90ff, - b2_colorDodgerBlue2 = 0x1c86ee, - b2_colorDodgerBlue3 = 0x1874cd, - b2_colorDodgerBlue4 = 0x104e8b, - b2_colorSteelBlue1 = 0x63b8ff, - b2_colorSteelBlue2 = 0x5cacee, - b2_colorSteelBlue3 = 0x4f94cd, - b2_colorSteelBlue4 = 0x36648b, - b2_colorDeepSkyBlue1 = 0x00bfff, - b2_colorDeepSkyBlue2 = 0x00b2ee, - b2_colorDeepSkyBlue3 = 0x009acd, - b2_colorDeepSkyBlue4 = 0x00688b, - b2_colorSkyBlue1 = 0x87ceff, - b2_colorSkyBlue2 = 0x7ec0ee, - b2_colorSkyBlue3 = 0x6ca6cd, - b2_colorSkyBlue4 = 0x4a708b, - b2_colorLightSkyBlue1 = 0xb0e2ff, - b2_colorLightSkyBlue2 = 0xa4d3ee, - b2_colorLightSkyBlue3 = 0x8db6cd, - b2_colorLightSkyBlue4 = 0x607b8b, - b2_colorSlateGray1 = 0xc6e2ff, - b2_colorSlateGray2 = 0xb9d3ee, - b2_colorSlateGray3 = 0x9fb6cd, - b2_colorSlateGray4 = 0x6c7b8b, - b2_colorLightSteelBlue1 = 0xcae1ff, - b2_colorLightSteelBlue2 = 0xbcd2ee, - b2_colorLightSteelBlue3 = 0xa2b5cd, - b2_colorLightSteelBlue4 = 0x6e7b8b, - b2_colorLightBlue1 = 0xbfefff, - b2_colorLightBlue2 = 0xb2dfee, - b2_colorLightBlue3 = 0x9ac0cd, - b2_colorLightBlue4 = 0x68838b, - b2_colorLightCyan1 = 0xe0ffff, - b2_colorLightCyan2 = 0xd1eeee, - b2_colorLightCyan3 = 0xb4cdcd, - b2_colorLightCyan4 = 0x7a8b8b, - b2_colorPaleTurquoise1 = 0xbbffff, - b2_colorPaleTurquoise2 = 0xaeeeee, - b2_colorPaleTurquoise3 = 0x96cdcd, - b2_colorPaleTurquoise4 = 0x668b8b, - b2_colorCadetBlue1 = 0x98f5ff, - b2_colorCadetBlue2 = 0x8ee5ee, - b2_colorCadetBlue3 = 0x7ac5cd, - b2_colorCadetBlue4 = 0x53868b, - b2_colorTurquoise1 = 0x00f5ff, - b2_colorTurquoise2 = 0x00e5ee, - b2_colorTurquoise3 = 0x00c5cd, - b2_colorTurquoise4 = 0x00868b, - b2_colorCyan1 = 0x00ffff, - b2_colorCyan2 = 0x00eeee, - b2_colorCyan3 = 0x00cdcd, - b2_colorCyan4 = 0x008b8b, - b2_colorDarkSlateGray1 = 0x97ffff, - b2_colorDarkSlateGray2 = 0x8deeee, - b2_colorDarkSlateGray3 = 0x79cdcd, - b2_colorDarkSlateGray4 = 0x528b8b, - b2_colorAquamarine1 = 0x7fffd4, - b2_colorAquamarine2 = 0x76eec6, - b2_colorAquamarine3 = 0x66cdaa, - b2_colorAquamarine4 = 0x458b74, - b2_colorDarkSeaGreen1 = 0xc1ffc1, - b2_colorDarkSeaGreen2 = 0xb4eeb4, - b2_colorDarkSeaGreen3 = 0x9bcd9b, - b2_colorDarkSeaGreen4 = 0x698b69, - b2_colorSeaGreen1 = 0x54ff9f, - b2_colorSeaGreen2 = 0x4eee94, - b2_colorSeaGreen3 = 0x43cd80, - b2_colorSeaGreen4 = 0x2e8b57, - b2_colorPaleGreen1 = 0x9aff9a, - b2_colorPaleGreen2 = 0x90ee90, - b2_colorPaleGreen3 = 0x7ccd7c, - b2_colorPaleGreen4 = 0x548b54, - b2_colorSpringGreen1 = 0x00ff7f, - b2_colorSpringGreen2 = 0x00ee76, - b2_colorSpringGreen3 = 0x00cd66, - b2_colorSpringGreen4 = 0x008b45, - b2_colorGreen1 = 0x00ff00, - b2_colorGreen2 = 0x00ee00, - b2_colorGreen3 = 0x00cd00, - b2_colorGreen4 = 0x008b00, - b2_colorChartreuse1 = 0x7fff00, - b2_colorChartreuse2 = 0x76ee00, - b2_colorChartreuse3 = 0x66cd00, - b2_colorChartreuse4 = 0x458b00, - b2_colorOliveDrab1 = 0xc0ff3e, - b2_colorOliveDrab2 = 0xb3ee3a, - b2_colorOliveDrab3 = 0x9acd32, - b2_colorOliveDrab4 = 0x698b22, - b2_colorDarkOliveGreen1 = 0xcaff70, - b2_colorDarkOliveGreen2 = 0xbcee68, - b2_colorDarkOliveGreen3 = 0xa2cd5a, - b2_colorDarkOliveGreen4 = 0x6e8b3d, - b2_colorKhaki1 = 0xfff68f, - b2_colorKhaki2 = 0xeee685, - b2_colorKhaki3 = 0xcdc673, - b2_colorKhaki4 = 0x8b864e, - b2_colorLightGoldenrod1 = 0xffec8b, - b2_colorLightGoldenrod2 = 0xeedc82, - b2_colorLightGoldenrod3 = 0xcdbe70, - b2_colorLightGoldenrod4 = 0x8b814c, - b2_colorLightYellow1 = 0xffffe0, - b2_colorLightYellow2 = 0xeeeed1, - b2_colorLightYellow3 = 0xcdcdb4, - b2_colorLightYellow4 = 0x8b8b7a, - b2_colorYellow1 = 0xffff00, - b2_colorYellow2 = 0xeeee00, - b2_colorYellow3 = 0xcdcd00, - b2_colorYellow4 = 0x8b8b00, - b2_colorGold1 = 0xffd700, - b2_colorGold2 = 0xeec900, - b2_colorGold3 = 0xcdad00, - b2_colorGold4 = 0x8b7500, - b2_colorGoldenrod1 = 0xffc125, - b2_colorGoldenrod2 = 0xeeb422, - b2_colorGoldenrod3 = 0xcd9b1d, - b2_colorGoldenrod4 = 0x8b6914, - b2_colorDarkGoldenrod1 = 0xffb90f, - b2_colorDarkGoldenrod2 = 0xeead0e, - b2_colorDarkGoldenrod3 = 0xcd950c, - b2_colorDarkGoldenrod4 = 0x8b6508, - b2_colorRosyBrown1 = 0xffc1c1, - b2_colorRosyBrown2 = 0xeeb4b4, - b2_colorRosyBrown3 = 0xcd9b9b, - b2_colorRosyBrown4 = 0x8b6969, - b2_colorIndianRed1 = 0xff6a6a, - b2_colorIndianRed2 = 0xee6363, - b2_colorIndianRed3 = 0xcd5555, - b2_colorIndianRed4 = 0x8b3a3a, - b2_colorSienna1 = 0xff8247, - b2_colorSienna2 = 0xee7942, - b2_colorSienna3 = 0xcd6839, - b2_colorSienna4 = 0x8b4726, + b2_colorBlueViolet = 0x8a2be2, + b2_colorBrown = 0xa52a2a, + b2_colorBrown1 = 0xff4040, + b2_colorBrown2 = 0xee3b3b, + b2_colorBrown3 = 0xcd3333, + b2_colorBrown4 = 0x8b2323, + b2_colorBurlywood = 0xdeb887, b2_colorBurlywood1 = 0xffd39b, b2_colorBurlywood2 = 0xeec591, b2_colorBurlywood3 = 0xcdaa7d, b2_colorBurlywood4 = 0x8b7355, - b2_colorWheat1 = 0xffe7ba, - b2_colorWheat2 = 0xeed8ae, - b2_colorWheat3 = 0xcdba96, - b2_colorWheat4 = 0x8b7e66, - b2_colorTan1 = 0xffa54f, - b2_colorTan2 = 0xee9a49, - b2_colorTan3 = 0xcd853f, - b2_colorTan4 = 0x8b5a2b, + b2_colorCadetBlue = 0x5f9ea0, + b2_colorCadetBlue1 = 0x98f5ff, + b2_colorCadetBlue2 = 0x8ee5ee, + b2_colorCadetBlue3 = 0x7ac5cd, + b2_colorCadetBlue4 = 0x53868b, + b2_colorChartreuse = 0x7fff00, + b2_colorChartreuse1 = 0x7fff00, + b2_colorChartreuse2 = 0x76ee00, + b2_colorChartreuse3 = 0x66cd00, + b2_colorChartreuse4 = 0x458b00, + b2_colorChocolate = 0xd2691e, b2_colorChocolate1 = 0xff7f24, b2_colorChocolate2 = 0xee7621, b2_colorChocolate3 = 0xcd661d, b2_colorChocolate4 = 0x8b4513, - b2_colorFirebrick1 = 0xff3030, - b2_colorFirebrick2 = 0xee2c2c, - b2_colorFirebrick3 = 0xcd2626, - b2_colorFirebrick4 = 0x8b1a1a, - b2_colorBrown1 = 0xff4040, - b2_colorBrown2 = 0xee3b3b, - b2_colorBrown3 = 0xcd3333, - b2_colorBrown4 = 0x8b2323, - b2_colorSalmon1 = 0xff8c69, - b2_colorSalmon2 = 0xee8262, - b2_colorSalmon3 = 0xcd7054, - b2_colorSalmon4 = 0x8b4c39, - b2_colorLightSalmon1 = 0xffa07a, - b2_colorLightSalmon2 = 0xee9572, - b2_colorLightSalmon3 = 0xcd8162, - b2_colorLightSalmon4 = 0x8b5742, - b2_colorOrange1 = 0xffa500, - b2_colorOrange2 = 0xee9a00, - b2_colorOrange3 = 0xcd8500, - b2_colorOrange4 = 0x8b5a00, - b2_colorDarkOrange1 = 0xff7f00, - b2_colorDarkOrange2 = 0xee7600, - b2_colorDarkOrange3 = 0xcd6600, - b2_colorDarkOrange4 = 0x8b4500, + b2_colorCoral = 0xff7f50, b2_colorCoral1 = 0xff7256, b2_colorCoral2 = 0xee6a50, b2_colorCoral3 = 0xcd5b45, b2_colorCoral4 = 0x8b3e2f, - b2_colorTomato1 = 0xff6347, - b2_colorTomato2 = 0xee5c42, - b2_colorTomato3 = 0xcd4f39, - b2_colorTomato4 = 0x8b3626, - b2_colorOrangeRed1 = 0xff4500, - b2_colorOrangeRed2 = 0xee4000, - b2_colorOrangeRed3 = 0xcd3700, - b2_colorOrangeRed4 = 0x8b2500, - b2_colorRed1 = 0xff0000, - b2_colorRed2 = 0xee0000, - b2_colorRed3 = 0xcd0000, - b2_colorRed4 = 0x8b0000, - b2_colorDeepPink1 = 0xff1493, - b2_colorDeepPink2 = 0xee1289, - b2_colorDeepPink3 = 0xcd1076, - b2_colorDeepPink4 = 0x8b0a50, - b2_colorHotPink1 = 0xff6eb4, - b2_colorHotPink2 = 0xee6aa7, - b2_colorHotPink3 = 0xcd6090, - b2_colorHotPink4 = 0x8b3a62, - b2_colorPink1 = 0xffb5c5, - b2_colorPink2 = 0xeea9b8, - b2_colorPink3 = 0xcd919e, - b2_colorPink4 = 0x8b636c, - b2_colorLightPink1 = 0xffaeb9, - b2_colorLightPink2 = 0xeea2ad, - b2_colorLightPink3 = 0xcd8c95, - b2_colorLightPink4 = 0x8b5f65, - b2_colorPaleVioletRed1 = 0xff82ab, - b2_colorPaleVioletRed2 = 0xee799f, - b2_colorPaleVioletRed3 = 0xcd6889, - b2_colorPaleVioletRed4 = 0x8b475d, - b2_colorMaroon1 = 0xff34b3, - b2_colorMaroon2 = 0xee30a7, - b2_colorMaroon3 = 0xcd2990, - b2_colorMaroon4 = 0x8b1c62, - b2_colorVioletRed1 = 0xff3e96, - b2_colorVioletRed2 = 0xee3a8c, - b2_colorVioletRed3 = 0xcd3278, - b2_colorVioletRed4 = 0x8b2252, - b2_colorMagenta1 = 0xff00ff, - b2_colorMagenta2 = 0xee00ee, - b2_colorMagenta3 = 0xcd00cd, - b2_colorMagenta4 = 0x8b008b, - b2_colorOrchid1 = 0xff83fa, - b2_colorOrchid2 = 0xee7ae9, - b2_colorOrchid3 = 0xcd69c9, - b2_colorOrchid4 = 0x8b4789, - b2_colorPlum1 = 0xffbbff, - b2_colorPlum2 = 0xeeaeee, - b2_colorPlum3 = 0xcd96cd, - b2_colorPlum4 = 0x8b668b, - b2_colorMediumOrchid1 = 0xe066ff, - b2_colorMediumOrchid2 = 0xd15fee, - b2_colorMediumOrchid3 = 0xb452cd, - b2_colorMediumOrchid4 = 0x7a378b, + b2_colorCornflowerBlue = 0x6495ed, + b2_colorCornsilk = 0xfff8dc, + b2_colorCornsilk1 = 0xfff8dc, + b2_colorCornsilk2 = 0xeee8cd, + b2_colorCornsilk3 = 0xcdc8b1, + b2_colorCornsilk4 = 0x8b8878, + b2_colorCrimson = 0xdc143c, + b2_colorCyan = 0x00ffff, + b2_colorCyan1 = 0x00ffff, + b2_colorCyan2 = 0x00eeee, + b2_colorCyan3 = 0x00cdcd, + b2_colorCyan4 = 0x008b8b, + b2_colorDarkBlue = 0x00008b, + b2_colorDarkCyan = 0x008b8b, + b2_colorDarkGoldenrod = 0xb8860b, + b2_colorDarkGoldenrod1 = 0xffb90f, + b2_colorDarkGoldenrod2 = 0xeead0e, + b2_colorDarkGoldenrod3 = 0xcd950c, + b2_colorDarkGoldenrod4 = 0x8b6508, + b2_colorDarkGray = 0xa9a9a9, + b2_colorDarkGreen = 0x006400, + b2_colorDarkKhaki = 0xbdb76b, + b2_colorDarkMagenta = 0x8b008b, + b2_colorDarkOliveGreen = 0x556b2f, + b2_colorDarkOliveGreen1 = 0xcaff70, + b2_colorDarkOliveGreen2 = 0xbcee68, + b2_colorDarkOliveGreen3 = 0xa2cd5a, + b2_colorDarkOliveGreen4 = 0x6e8b3d, + b2_colorDarkOrange = 0xff8c00, + b2_colorDarkOrange1 = 0xff7f00, + b2_colorDarkOrange2 = 0xee7600, + b2_colorDarkOrange3 = 0xcd6600, + b2_colorDarkOrange4 = 0x8b4500, + b2_colorDarkOrchid = 0x9932cc, b2_colorDarkOrchid1 = 0xbf3eff, b2_colorDarkOrchid2 = 0xb23aee, b2_colorDarkOrchid3 = 0x9a32cd, b2_colorDarkOrchid4 = 0x68228b, - b2_colorPurple1 = 0x9b30ff, - b2_colorPurple2 = 0x912cee, - b2_colorPurple3 = 0x7d26cd, - b2_colorPurple4 = 0x551a8b, - b2_colorMediumPurple1 = 0xab82ff, - b2_colorMediumPurple2 = 0x9f79ee, - b2_colorMediumPurple3 = 0x8968cd, - b2_colorMediumPurple4 = 0x5d478b, - b2_colorThistle1 = 0xffe1ff, - b2_colorThistle2 = 0xeed2ee, - b2_colorThistle3 = 0xcdb5cd, - b2_colorThistle4 = 0x8b7b8b, - b2_colorGray0 = 0x000000, - b2_colorGray1 = 0x030303, - b2_colorGray2 = 0x050505, - b2_colorGray3 = 0x080808, - b2_colorGray4 = 0x0a0a0a, - b2_colorGray5 = 0x0d0d0d, - b2_colorGray6 = 0x0f0f0f, - b2_colorGray7 = 0x121212, - b2_colorGray8 = 0x141414, - b2_colorGray9 = 0x171717, - b2_colorGray10 = 0x1a1a1a, - b2_colorGray11 = 0x1c1c1c, - b2_colorGray12 = 0x1f1f1f, - b2_colorGray13 = 0x212121, - b2_colorGray14 = 0x242424, - b2_colorGray15 = 0x262626, - b2_colorGray16 = 0x292929, - b2_colorGray17 = 0x2b2b2b, + b2_colorDarkRed = 0x8b0000, + b2_colorDarkSalmon = 0xe9967a, + b2_colorDarkSeaGreen = 0x8fbc8f, + b2_colorDarkSeaGreen1 = 0xc1ffc1, + b2_colorDarkSeaGreen2 = 0xb4eeb4, + b2_colorDarkSeaGreen3 = 0x9bcd9b, + b2_colorDarkSeaGreen4 = 0x698b69, + b2_colorDarkSlateBlue = 0x483d8b, + b2_colorDarkSlateGray = 0x2f4f4f, + b2_colorDarkSlateGray1 = 0x97ffff, + b2_colorDarkSlateGray2 = 0x8deeee, + b2_colorDarkSlateGray3 = 0x79cdcd, + b2_colorDarkSlateGray4 = 0x528b8b, + b2_colorDarkTurquoise = 0x00ced1, + b2_colorDarkViolet = 0x9400d3, + b2_colorDeepPink = 0xff1493, + b2_colorDeepPink1 = 0xff1493, + b2_colorDeepPink2 = 0xee1289, + b2_colorDeepPink3 = 0xcd1076, + b2_colorDeepPink4 = 0x8b0a50, + b2_colorDeepSkyBlue = 0x00bfff, + b2_colorDeepSkyBlue1 = 0x00bfff, + b2_colorDeepSkyBlue2 = 0x00b2ee, + b2_colorDeepSkyBlue3 = 0x009acd, + b2_colorDeepSkyBlue4 = 0x00688b, + b2_colorDimGray = 0x696969, + b2_colorDodgerBlue = 0x1e90ff, + b2_colorDodgerBlue1 = 0x1e90ff, + b2_colorDodgerBlue2 = 0x1c86ee, + b2_colorDodgerBlue3 = 0x1874cd, + b2_colorDodgerBlue4 = 0x104e8b, + b2_colorFirebrick = 0xb22222, + b2_colorFirebrick1 = 0xff3030, + b2_colorFirebrick2 = 0xee2c2c, + b2_colorFirebrick3 = 0xcd2626, + b2_colorFirebrick4 = 0x8b1a1a, + b2_colorFloralWhite = 0xfffaf0, + b2_colorForestGreen = 0x228b22, + b2_colorFuchsia = 0xff00ff, + b2_colorGainsboro = 0xdcdcdc, + b2_colorGhostWhite = 0xf8f8ff, + b2_colorGold = 0xffd700, + b2_colorGold1 = 0xffd700, + b2_colorGold2 = 0xeec900, + b2_colorGold3 = 0xcdad00, + b2_colorGold4 = 0x8b7500, + b2_colorGoldenrod = 0xdaa520, + b2_colorGoldenrod1 = 0xffc125, + b2_colorGoldenrod2 = 0xeeb422, + b2_colorGoldenrod3 = 0xcd9b1d, + b2_colorGoldenrod4 = 0x8b6914, + b2_colorGray = 0xbebebe, + b2_colorGray0 = 0x000000, + b2_colorGray1 = 0x030303, + b2_colorGray10 = 0x1a1a1a, + b2_colorGray100 = 0xffffff, + b2_colorGray11 = 0x1c1c1c, + b2_colorGray12 = 0x1f1f1f, + b2_colorGray13 = 0x212121, + b2_colorGray14 = 0x242424, + b2_colorGray15 = 0x262626, + b2_colorGray16 = 0x292929, + b2_colorGray17 = 0x2b2b2b, b2_colorGray18 = 0x2e2e2e, b2_colorGray19 = 0x303030, + b2_colorGray2 = 0x050505, b2_colorGray20 = 0x333333, b2_colorGray21 = 0x363636, b2_colorGray22 = 0x383838, @@ -494,6 +186,7 @@ enum b2HexColor b2_colorGray27 = 0x454545, b2_colorGray28 = 0x474747, b2_colorGray29 = 0x4a4a4a, + b2_colorGray3 = 0x080808, b2_colorGray30 = 0x4d4d4d, b2_colorGray31 = 0x4f4f4f, b2_colorGray32 = 0x525252, @@ -504,6 +197,7 @@ enum b2HexColor b2_colorGray37 = 0x5e5e5e, b2_colorGray38 = 0x616161, b2_colorGray39 = 0x636363, + b2_colorGray4 = 0x0a0a0a, b2_colorGray40 = 0x666666, b2_colorGray41 = 0x696969, b2_colorGray42 = 0x6b6b6b, @@ -514,6 +208,7 @@ enum b2HexColor b2_colorGray47 = 0x787878, b2_colorGray48 = 0x7a7a7a, b2_colorGray49 = 0x7d7d7d, + b2_colorGray5 = 0x0d0d0d, b2_colorGray50 = 0x7f7f7f, b2_colorGray51 = 0x828282, b2_colorGray52 = 0x858585, @@ -524,6 +219,7 @@ enum b2HexColor b2_colorGray57 = 0x919191, b2_colorGray58 = 0x949494, b2_colorGray59 = 0x969696, + b2_colorGray6 = 0x0f0f0f, b2_colorGray60 = 0x999999, b2_colorGray61 = 0x9c9c9c, b2_colorGray62 = 0x9e9e9e, @@ -534,6 +230,7 @@ enum b2HexColor b2_colorGray67 = 0xababab, b2_colorGray68 = 0xadadad, b2_colorGray69 = 0xb0b0b0, + b2_colorGray7 = 0x121212, b2_colorGray70 = 0xb3b3b3, b2_colorGray71 = 0xb5b5b5, b2_colorGray72 = 0xb8b8b8, @@ -544,6 +241,7 @@ enum b2HexColor b2_colorGray77 = 0xc4c4c4, b2_colorGray78 = 0xc7c7c7, b2_colorGray79 = 0xc9c9c9, + b2_colorGray8 = 0x141414, b2_colorGray80 = 0xcccccc, b2_colorGray81 = 0xcfcfcf, b2_colorGray82 = 0xd1d1d1, @@ -554,6 +252,7 @@ enum b2HexColor b2_colorGray87 = 0xdedede, b2_colorGray88 = 0xe0e0e0, b2_colorGray89 = 0xe3e3e3, + b2_colorGray9 = 0x171717, b2_colorGray90 = 0xe5e5e5, b2_colorGray91 = 0xe8e8e8, b2_colorGray92 = 0xebebeb, @@ -564,19 +263,320 @@ enum b2HexColor b2_colorGray97 = 0xf7f7f7, b2_colorGray98 = 0xfafafa, b2_colorGray99 = 0xfcfcfc, - b2_colorGray100 = 0xffffff, - b2_colorDarkGray = 0xa9a9a9, - b2_colorDarkBlue = 0x00008b, - b2_colorDarkCyan = 0x008b8b, - b2_colorDarkMagenta = 0x8b008b, - b2_colorDarkRed = 0x8b0000, - b2_colorLightGreen = 0x90ee90, - b2_colorCrimson = 0xdc143c, + b2_colorGreen = 0x00ff00, + b2_colorGreen1 = 0x00ff00, + b2_colorGreen2 = 0x00ee00, + b2_colorGreen3 = 0x00cd00, + b2_colorGreen4 = 0x008b00, + b2_colorGreenYellow = 0xadff2f, + b2_colorHoneydew = 0xf0fff0, + b2_colorHoneydew1 = 0xf0fff0, + b2_colorHoneydew2 = 0xe0eee0, + b2_colorHoneydew3 = 0xc1cdc1, + b2_colorHoneydew4 = 0x838b83, + b2_colorHotPink = 0xff69b4, + b2_colorHotPink1 = 0xff6eb4, + b2_colorHotPink2 = 0xee6aa7, + b2_colorHotPink3 = 0xcd6090, + b2_colorHotPink4 = 0x8b3a62, + b2_colorIndianRed = 0xcd5c5c, + b2_colorIndianRed1 = 0xff6a6a, + b2_colorIndianRed2 = 0xee6363, + b2_colorIndianRed3 = 0xcd5555, + b2_colorIndianRed4 = 0x8b3a3a, b2_colorIndigo = 0x4b0082, + b2_colorIvory = 0xfffff0, + b2_colorIvory1 = 0xfffff0, + b2_colorIvory2 = 0xeeeee0, + b2_colorIvory3 = 0xcdcdc1, + b2_colorIvory4 = 0x8b8b83, + b2_colorKhaki = 0xf0e68c, + b2_colorKhaki1 = 0xfff68f, + b2_colorKhaki2 = 0xeee685, + b2_colorKhaki3 = 0xcdc673, + b2_colorKhaki4 = 0x8b864e, + b2_colorLavender = 0xe6e6fa, + b2_colorLavenderBlush = 0xfff0f5, + b2_colorLavenderBlush1 = 0xfff0f5, + b2_colorLavenderBlush2 = 0xeee0e5, + b2_colorLavenderBlush3 = 0xcdc1c5, + b2_colorLavenderBlush4 = 0x8b8386, + b2_colorLawnGreen = 0x7cfc00, + b2_colorLemonChiffon = 0xfffacd, + b2_colorLemonChiffon1 = 0xfffacd, + b2_colorLemonChiffon2 = 0xeee9bf, + b2_colorLemonChiffon3 = 0xcdc9a5, + b2_colorLemonChiffon4 = 0x8b8970, + b2_colorLightBlue = 0xadd8e6, + b2_colorLightBlue1 = 0xbfefff, + b2_colorLightBlue2 = 0xb2dfee, + b2_colorLightBlue3 = 0x9ac0cd, + b2_colorLightBlue4 = 0x68838b, + b2_colorLightCoral = 0xf08080, + b2_colorLightCyan = 0xe0ffff, + b2_colorLightCyan1 = 0xe0ffff, + b2_colorLightCyan2 = 0xd1eeee, + b2_colorLightCyan3 = 0xb4cdcd, + b2_colorLightCyan4 = 0x7a8b8b, + b2_colorLightGoldenrod = 0xeedd82, + b2_colorLightGoldenrod1 = 0xffec8b, + b2_colorLightGoldenrod2 = 0xeedc82, + b2_colorLightGoldenrod3 = 0xcdbe70, + b2_colorLightGoldenrod4 = 0x8b814c, + b2_colorLightGoldenrodYellow = 0xfafad2, + b2_colorLightGray = 0xd3d3d3, + b2_colorLightGreen = 0x90ee90, + b2_colorLightPink = 0xffb6c1, + b2_colorLightPink1 = 0xffaeb9, + b2_colorLightPink2 = 0xeea2ad, + b2_colorLightPink3 = 0xcd8c95, + b2_colorLightPink4 = 0x8b5f65, + b2_colorLightSalmon = 0xffa07a, + b2_colorLightSalmon1 = 0xffa07a, + b2_colorLightSalmon2 = 0xee9572, + b2_colorLightSalmon3 = 0xcd8162, + b2_colorLightSalmon4 = 0x8b5742, + b2_colorLightSeaGreen = 0x20b2aa, + b2_colorLightSkyBlue = 0x87cefa, + b2_colorLightSkyBlue1 = 0xb0e2ff, + b2_colorLightSkyBlue2 = 0xa4d3ee, + b2_colorLightSkyBlue3 = 0x8db6cd, + b2_colorLightSkyBlue4 = 0x607b8b, + b2_colorLightSlateBlue = 0x8470ff, + b2_colorLightSlateGray = 0x778899, + b2_colorLightSteelBlue = 0xb0c4de, + b2_colorLightSteelBlue1 = 0xcae1ff, + b2_colorLightSteelBlue2 = 0xbcd2ee, + b2_colorLightSteelBlue3 = 0xa2b5cd, + b2_colorLightSteelBlue4 = 0x6e7b8b, + b2_colorLightYellow = 0xffffe0, + b2_colorLightYellow1 = 0xffffe0, + b2_colorLightYellow2 = 0xeeeed1, + b2_colorLightYellow3 = 0xcdcdb4, + b2_colorLightYellow4 = 0x8b8b7a, + b2_colorLime = 0x00ff00, + b2_colorLimeGreen = 0x32cd32, + b2_colorLinen = 0xfaf0e6, + b2_colorMagenta = 0xff00ff, + b2_colorMagenta1 = 0xff00ff, + b2_colorMagenta2 = 0xee00ee, + b2_colorMagenta3 = 0xcd00cd, + b2_colorMagenta4 = 0x8b008b, + b2_colorMaroon = 0xb03060, + b2_colorMaroon1 = 0xff34b3, + b2_colorMaroon2 = 0xee30a7, + b2_colorMaroon3 = 0xcd2990, + b2_colorMaroon4 = 0x8b1c62, + b2_colorMediumAquamarine = 0x66cdaa, + b2_colorMediumBlue = 0x0000cd, + b2_colorMediumOrchid = 0xba55d3, + b2_colorMediumOrchid1 = 0xe066ff, + b2_colorMediumOrchid2 = 0xd15fee, + b2_colorMediumOrchid3 = 0xb452cd, + b2_colorMediumOrchid4 = 0x7a378b, + b2_colorMediumPurple = 0x9370db, + b2_colorMediumPurple1 = 0xab82ff, + b2_colorMediumPurple2 = 0x9f79ee, + b2_colorMediumPurple3 = 0x8968cd, + b2_colorMediumPurple4 = 0x5d478b, + b2_colorMediumSeaGreen = 0x3cb371, + b2_colorMediumSlateBlue = 0x7b68ee, + b2_colorMediumSpringGreen = 0x00fa9a, + b2_colorMediumTurquoise = 0x48d1cc, + b2_colorMediumVioletRed = 0xc71585, + b2_colorMidnightBlue = 0x191970, + b2_colorMintCream = 0xf5fffa, + b2_colorMistyRose = 0xffe4e1, + b2_colorMistyRose1 = 0xffe4e1, + b2_colorMistyRose2 = 0xeed5d2, + b2_colorMistyRose3 = 0xcdb7b5, + b2_colorMistyRose4 = 0x8b7d7b, + b2_colorMoccasin = 0xffe4b5, + b2_colorNavajoWhite = 0xffdead, + b2_colorNavajoWhite1 = 0xffdead, + b2_colorNavajoWhite2 = 0xeecfa1, + b2_colorNavajoWhite3 = 0xcdb38b, + b2_colorNavajoWhite4 = 0x8b795e, + b2_colorNavy = 0x000080, + b2_colorNavyBlue = 0x000080, + b2_colorOldLace = 0xfdf5e6, b2_colorOlive = 0x808000, + b2_colorOliveDrab = 0x6b8e23, + b2_colorOliveDrab1 = 0xc0ff3e, + b2_colorOliveDrab2 = 0xb3ee3a, + b2_colorOliveDrab3 = 0x9acd32, + b2_colorOliveDrab4 = 0x698b22, + b2_colorOrange = 0xffa500, + b2_colorOrange1 = 0xffa500, + b2_colorOrange2 = 0xee9a00, + b2_colorOrange3 = 0xcd8500, + b2_colorOrange4 = 0x8b5a00, + b2_colorOrangeRed = 0xff4500, + b2_colorOrangeRed1 = 0xff4500, + b2_colorOrangeRed2 = 0xee4000, + b2_colorOrangeRed3 = 0xcd3700, + b2_colorOrangeRed4 = 0x8b2500, + b2_colorOrchid = 0xda70d6, + b2_colorOrchid1 = 0xff83fa, + b2_colorOrchid2 = 0xee7ae9, + b2_colorOrchid3 = 0xcd69c9, + b2_colorOrchid4 = 0x8b4789, + b2_colorPaleGoldenrod = 0xeee8aa, + b2_colorPaleGreen = 0x98fb98, + b2_colorPaleGreen1 = 0x9aff9a, + b2_colorPaleGreen2 = 0x90ee90, + b2_colorPaleGreen3 = 0x7ccd7c, + b2_colorPaleGreen4 = 0x548b54, + b2_colorPaleTurquoise = 0xafeeee, + b2_colorPaleTurquoise1 = 0xbbffff, + b2_colorPaleTurquoise2 = 0xaeeeee, + b2_colorPaleTurquoise3 = 0x96cdcd, + b2_colorPaleTurquoise4 = 0x668b8b, + b2_colorPaleVioletRed = 0xdb7093, + b2_colorPaleVioletRed1 = 0xff82ab, + b2_colorPaleVioletRed2 = 0xee799f, + b2_colorPaleVioletRed3 = 0xcd6889, + b2_colorPaleVioletRed4 = 0x8b475d, + b2_colorPapayaWhip = 0xffefd5, + b2_colorPeachPuff = 0xffdab9, + b2_colorPeachPuff1 = 0xffdab9, + b2_colorPeachPuff2 = 0xeecbad, + b2_colorPeachPuff3 = 0xcdaf95, + b2_colorPeachPuff4 = 0x8b7765, + b2_colorPeru = 0xcd853f, + b2_colorPink = 0xffc0cb, + b2_colorPink1 = 0xffb5c5, + b2_colorPink2 = 0xeea9b8, + b2_colorPink3 = 0xcd919e, + b2_colorPink4 = 0x8b636c, + b2_colorPlum = 0xdda0dd, + b2_colorPlum1 = 0xffbbff, + b2_colorPlum2 = 0xeeaeee, + b2_colorPlum3 = 0xcd96cd, + b2_colorPlum4 = 0x8b668b, + b2_colorPowderBlue = 0xb0e0e6, + b2_colorPurple = 0xa020f0, + b2_colorPurple1 = 0x9b30ff, + b2_colorPurple2 = 0x912cee, + b2_colorPurple3 = 0x7d26cd, + b2_colorPurple4 = 0x551a8b, b2_colorRebeccaPurple = 0x663399, + b2_colorRed = 0xff0000, + b2_colorRed1 = 0xff0000, + b2_colorRed2 = 0xee0000, + b2_colorRed3 = 0xcd0000, + b2_colorRed4 = 0x8b0000, + b2_colorRosyBrown = 0xbc8f8f, + b2_colorRosyBrown1 = 0xffc1c1, + b2_colorRosyBrown2 = 0xeeb4b4, + b2_colorRosyBrown3 = 0xcd9b9b, + b2_colorRosyBrown4 = 0x8b6969, + b2_colorRoyalBlue = 0x4169e1, + b2_colorRoyalBlue1 = 0x4876ff, + b2_colorRoyalBlue2 = 0x436eee, + b2_colorRoyalBlue3 = 0x3a5fcd, + b2_colorRoyalBlue4 = 0x27408b, + b2_colorSaddleBrown = 0x8b4513, + b2_colorSalmon = 0xfa8072, + b2_colorSalmon1 = 0xff8c69, + b2_colorSalmon2 = 0xee8262, + b2_colorSalmon3 = 0xcd7054, + b2_colorSalmon4 = 0x8b4c39, + b2_colorSandyBrown = 0xf4a460, + b2_colorSeaGreen = 0x2e8b57, + b2_colorSeaGreen1 = 0x54ff9f, + b2_colorSeaGreen2 = 0x4eee94, + b2_colorSeaGreen3 = 0x43cd80, + b2_colorSeaGreen4 = 0x2e8b57, + b2_colorSeashell = 0xfff5ee, + b2_colorSeashell1 = 0xfff5ee, + b2_colorSeashell2 = 0xeee5de, + b2_colorSeashell3 = 0xcdc5bf, + b2_colorSeashell4 = 0x8b8682, + b2_colorSienna = 0xa0522d, + b2_colorSienna1 = 0xff8247, + b2_colorSienna2 = 0xee7942, + b2_colorSienna3 = 0xcd6839, + b2_colorSienna4 = 0x8b4726, b2_colorSilver = 0xc0c0c0, - b2_colorTeal = 0x008080 + b2_colorSkyBlue = 0x87ceeb, + b2_colorSkyBlue1 = 0x87ceff, + b2_colorSkyBlue2 = 0x7ec0ee, + b2_colorSkyBlue3 = 0x6ca6cd, + b2_colorSkyBlue4 = 0x4a708b, + b2_colorSlateBlue = 0x6a5acd, + b2_colorSlateBlue1 = 0x836fff, + b2_colorSlateBlue2 = 0x7a67ee, + b2_colorSlateBlue3 = 0x6959cd, + b2_colorSlateBlue4 = 0x473c8b, + b2_colorSlateGray = 0x708090, + b2_colorSlateGray1 = 0xc6e2ff, + b2_colorSlateGray2 = 0xb9d3ee, + b2_colorSlateGray3 = 0x9fb6cd, + b2_colorSlateGray4 = 0x6c7b8b, + b2_colorSnow = 0xfffafa, + b2_colorSnow1 = 0xfffafa, + b2_colorSnow2 = 0xeee9e9, + b2_colorSnow3 = 0xcdc9c9, + b2_colorSnow4 = 0x8b8989, + b2_colorSpringGreen = 0x00ff7f, + b2_colorSpringGreen1 = 0x00ff7f, + b2_colorSpringGreen2 = 0x00ee76, + b2_colorSpringGreen3 = 0x00cd66, + b2_colorSpringGreen4 = 0x008b45, + b2_colorSteelBlue = 0x4682b4, + b2_colorSteelBlue1 = 0x63b8ff, + b2_colorSteelBlue2 = 0x5cacee, + b2_colorSteelBlue3 = 0x4f94cd, + b2_colorSteelBlue4 = 0x36648b, + b2_colorTan = 0xd2b48c, + b2_colorTan1 = 0xffa54f, + b2_colorTan2 = 0xee9a49, + b2_colorTan3 = 0xcd853f, + b2_colorTan4 = 0x8b5a2b, + b2_colorTeal = 0x008080, + b2_colorThistle = 0xd8bfd8, + b2_colorThistle1 = 0xffe1ff, + b2_colorThistle2 = 0xeed2ee, + b2_colorThistle3 = 0xcdb5cd, + b2_colorThistle4 = 0x8b7b8b, + b2_colorTomato = 0xff6347, + b2_colorTomato1 = 0xff6347, + b2_colorTomato2 = 0xee5c42, + b2_colorTomato3 = 0xcd4f39, + b2_colorTomato4 = 0x8b3626, + b2_colorTurquoise = 0x40e0d0, + b2_colorTurquoise1 = 0x00f5ff, + b2_colorTurquoise2 = 0x00e5ee, + b2_colorTurquoise3 = 0x00c5cd, + b2_colorTurquoise4 = 0x00868b, + b2_colorViolet = 0xee82ee, + b2_colorVioletRed = 0xd02090, + b2_colorVioletRed1 = 0xff3e96, + b2_colorVioletRed2 = 0xee3a8c, + b2_colorVioletRed3 = 0xcd3278, + b2_colorVioletRed4 = 0x8b2252, + b2_colorWebGray = 0x808080, + b2_colorWebGreen = 0x008000, + b2_colorWebMaroon = 0x800000, + b2_colorWebPurple = 0x800080, + b2_colorWheat = 0xf5deb3, + b2_colorWheat1 = 0xffe7ba, + b2_colorWheat2 = 0xeed8ae, + b2_colorWheat3 = 0xcdba96, + b2_colorWheat4 = 0x8b7e66, + b2_colorWhite = 0xffffff, + b2_colorWhiteSmoke = 0xf5f5f5, + b2_colorX11Gray = 0xbebebe, + b2_colorX11Green = 0x00ff00, + b2_colorX11Maroon = 0xb03060, + b2_colorX11Purple = 0xa020f0, + b2_colorYellow = 0xffff00, + b2_colorYellow1 = 0xffff00, + b2_colorYellow2 = 0xeeee00, + b2_colorYellow3 = 0xcdcd00, + b2_colorYellow4 = 0x8b8b00, + b2_colorYellowGreen = 0x9acd32, }; #ifdef __cplusplus diff --git a/include/box2d/constants.h b/include/box2d/constants.h index 0f3d2b1f..cfe95311 100644 --- a/include/box2d/constants.h +++ b/include/box2d/constants.h @@ -12,12 +12,14 @@ extern "C" /// Constants used by box2d. /// box2d uses meters-kilograms-seconds (MKS) units. Angles are always in radians unless /// degrees are indicated. -/// Some values can be overridden with a define and some values can be modified at runtime. +/// Some values can be overridden by using a compiler definition. /// Other values cannot be modified without causing stability and/or performance problems. /// box2d bases all length units on meters, but you may need different units for your game. -/// You can adjust this value to use different units, normally at application startup. -extern float b2_lengthUnitsPerMeter; +/// You can override this value to use different units. +#ifndef b2_lengthUnitsPerMeter +#define b2_lengthUnitsPerMeter 1.0f +#endif #define b2_pi 3.14159265359f @@ -50,16 +52,6 @@ extern float b2_lengthUnitsPerMeter; #define b2_maxWorlds 32 #endif -/// The maximum linear position correction used when solving constraints. This helps to -/// prevent overshoot. Meters. -/// @warning modifying this can have a significant impact on stability -#define b2_maxLinearCorrection (0.2f * b2_lengthUnitsPerMeter) - -/// The maximum angular position correction used when solving constraints. This helps to -/// prevent overshoot. -/// @warning modifying this can have a significant impact on stability -#define b2_maxAngularCorrection (8.0f / 180.0f * b2_pi) - /// The maximum linear translation of a body per step. This limit is very large and is used /// to prevent numerical problems. You shouldn't need to adjust this. Meters. /// @warning modifying this can have a significant impact on stability @@ -72,25 +64,20 @@ extern float b2_lengthUnitsPerMeter; #define b2_maxRotation (0.5f * b2_pi) #define b2_maxRotationSquared (b2_maxRotation * b2_maxRotation) -/// TODO_ERIN make dynamic based on speed? -/// @warning modifying this can have a significant impact on stability +/// @warning modifying this can have a significant impact on performance and stability #define b2_speculativeDistance (4.0f * b2_linearSlop) -/// This scale factor controls how fast overlap is resolved. Ideally this would be 1 so -/// that overlap is removed in one time step. However using values close to 1 often lead -/// to overshoot. -/// @warning modifying this can have a significant impact on stability -#define b2_baumgarte 0.2f - -/// The time that a body must be still before it will go to sleep. -extern float b2_timeToSleep; +/// The time that a body must be still before it will go to sleep. In seconds. +#ifndef b2_timeToSleep +#define b2_timeToSleep 0.5f +#endif -/// A body cannot sleep if its linear velocity is above this tolerance. +/// A body cannot sleep if its linear velocity is above this tolerance. Meters per second. #ifndef b2_linearSleepTolerance #define b2_linearSleepTolerance (0.01f * b2_lengthUnitsPerMeter) #endif -/// A body cannot sleep if its angular velocity is above this tolerance. +/// A body cannot sleep if its angular velocity is above this tolerance. Radians per second. #ifndef b2_angularSleepTolerance #define b2_angularSleepTolerance (2.0f / 180.0f * b2_pi) #endif @@ -102,6 +89,9 @@ extern float b2_timeToSleep; /// Maximum parallel workers. Used to size some static arrays. #define b2_maxWorkers 64 +/// Solver graph coloring +#define b2_graphColorCount 12 + /// Version numbering scheme. /// See http://en.wikipedia.org/wiki/Software_versioning typedef struct b2Version diff --git a/include/box2d/debug_draw.h b/include/box2d/debug_draw.h index 2c54aa95..5b8b8755 100644 --- a/include/box2d/debug_draw.h +++ b/include/box2d/debug_draw.h @@ -39,9 +39,12 @@ typedef struct b2DebugDraw /// Draw a point. void (*DrawPoint)(b2Vec2 p, float size, b2Color color, void* context); + /// Draw a string. + void (*DrawString)(b2Vec2 p, const char* s, void* context); + bool drawShapes; bool drawJoints; bool drawAABBs; - bool drawCOMs; + bool drawMass; void* context; } b2DebugDraw; diff --git a/include/box2d/joint_types.h b/include/box2d/joint_types.h index cb9fa245..4f8bc87c 100644 --- a/include/box2d/joint_types.h +++ b/include/box2d/joint_types.h @@ -115,3 +115,49 @@ static inline struct b2RevoluteJointDef b2DefaultRevoluteJointDef(void) def.collideConnected = false; return def; } + +typedef struct b2WeldJointDef +{ + /// The first attached body. + b2BodyId bodyIdA; + + /// The second attached body. + b2BodyId bodyIdB; + + /// The local anchor point relative to bodyA's origin. + b2Vec2 localAnchorA; + + /// The local anchor point relative to bodyB's origin. + b2Vec2 localAnchorB; + + /// The bodyB angle minus bodyA angle in the reference state (radians). + /// This defines the zero angle for the joint limit. + float referenceAngle; + + /// Stiffness expressed as hertz (oscillations per second). Use zero for maximum stiffness. + float linearHertz; + float angularHertz; + + /// Damping ratio, non-dimensional. Use 1 for critical damping. + float linearDampingRatio; + float angularDampingRatio; + + /// Set this flag to true if the attached bodies should collide. + bool collideConnected; +} b2WeldJointDef; + +static inline struct b2WeldJointDef b2DefaultWeldJointDef(void) +{ + b2WeldJointDef def = {0}; + def.bodyIdA = b2_nullBodyId; + def.bodyIdB = b2_nullBodyId; + def.localAnchorA = B2_LITERAL(b2Vec2){0.0f, 0.0f}; + def.localAnchorB = B2_LITERAL(b2Vec2){0.0f, 0.0f}; + def.referenceAngle = 0.0f; + def.linearHertz = 0.0f; + def.angularHertz = 0.0f; + def.linearDampingRatio = 1.0f; + def.angularDampingRatio = 1.0f; + def.collideConnected = false; + return def; +} diff --git a/include/box2d/manifold.h b/include/box2d/manifold.h index 71c4149a..5f05db76 100644 --- a/include/box2d/manifold.h +++ b/include/box2d/manifold.h @@ -6,7 +6,6 @@ #include "box2d/types.h" #define b2_nullFeature UCHAR_MAX -#define b2_maxManifoldPoints 2 typedef struct b2Circle b2Circle; typedef struct b2Capsule b2Capsule; @@ -25,6 +24,9 @@ typedef struct b2ManifoldPoint /// world coordinates of contact point b2Vec2 point; + /// Body anchors used by solver + b2Vec2 anchorA, anchorB; + /// the separation of the contact point, negative if penetrating float separation; @@ -41,10 +43,10 @@ typedef struct b2ManifoldPoint bool persisted; } b2ManifoldPoint; -/// Conact manifold convex shapes. +/// Contact manifold convex shapes. typedef struct b2Manifold { - b2ManifoldPoint points[b2_maxManifoldPoints]; + b2ManifoldPoint points[2]; b2Vec2 normal; int32_t pointCount; } b2Manifold; diff --git a/include/box2d/math.h b/include/box2d/math.h index 315fefa0..a287765e 100644 --- a/include/box2d/math.h +++ b/include/box2d/math.h @@ -18,9 +18,11 @@ extern "C" #define B2_CLAMP(A, B, C) B2_MIN(B2_MAX(A, B), C) static const b2Vec2 b2Vec2_zero = {0.0f, 0.0f}; +static const b2Vec3 b2Vec3_zero = {0.0f, 0.0f, 0.0f}; static const b2Rot b2Rot_identity = {0.0f, 1.0f}; static const b2Transform b2Transform_identity = {{0.0f, 0.0f}, {0.0f, 1.0f}}; static const b2Mat22 b2Mat22_zero = {{0.0f, 0.0f}, {0.0f, 0.0f}}; +static const b2Mat33 b2Mat33_zero = {{0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f}}; bool b2IsValid(float a); bool b2IsValidVec2(b2Vec2 v); @@ -43,6 +45,19 @@ static inline float b2Cross(b2Vec2 a, b2Vec2 b) return a.x * b.y - a.y * b.x; } +/// Perform the dot product on two 3-vectors. +static inline float b2Dot3(b2Vec3 a, b2Vec3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +/// Perform the cross product on two 3-vectors. +static inline b2Vec3 b2Cross3(b2Vec3 a, b2Vec3 b) +{ + return B2_LITERAL(b2Vec3){a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x}; +} + + /// Perform the cross product on a vector and a scalar. In 2D this produces /// a vector. static inline b2Vec2 b2CrossVS(b2Vec2 v, float s) @@ -305,8 +320,7 @@ static inline b2Mat22 b2GetInverse22(b2Mat22 A) return B; } -/// Solve A * x = b, where b is a column vector. This is more efficient -/// than computing the inverse in one-shot cases. +/// Solve A * x = b, where b is a column vector. static inline b2Vec2 b2Solve22(b2Mat22 A, b2Vec2 b) { float a11 = A.cx.x, a12 = A.cy.x, a21 = A.cx.y, a22 = A.cy.y; @@ -319,6 +333,21 @@ static inline b2Vec2 b2Solve22(b2Mat22 A, b2Vec2 b) return x; } +/// Solve A * x = b, where b is a column vector. +static inline b2Vec3 b2Solve33(b2Mat33 A, b2Vec3 b) +{ + float det = b2Dot3(A.cx, b2Cross3(A.cy, A.cz)); + if (det != 0.0f) + { + det = 1.0f / det; + } + b2Vec3 x; + x.x = det * b2Dot3(b, b2Cross3(A.cy, A.cz)); + x.y = det * b2Dot3(A.cx, b2Cross3(b, A.cz)); + x.z = det * b2Dot3(A.cx, b2Cross3(A.cy, b)); + return x; +} + #ifdef __cplusplus } #endif diff --git a/include/box2d/timer.h b/include/box2d/timer.h index f848d271..e477590f 100644 --- a/include/box2d/timer.h +++ b/include/box2d/timer.h @@ -32,6 +32,7 @@ typedef struct b2Statistics int32_t stackCapacity; int32_t stackUsed; int32_t byteCount; + int32_t colorCounts[b2_graphColorCount + 1]; } b2Statistics; /// Timer for profiling. This has platform specific code and may diff --git a/include/box2d/types.h b/include/box2d/types.h index a77aa0ea..7f20406e 100644 --- a/include/box2d/types.h +++ b/include/box2d/types.h @@ -27,6 +27,12 @@ typedef struct b2Vec2 float x, y; } b2Vec2; +/// 3D vector +typedef struct b2Vec3 +{ + float x, y, z; +} b2Vec3; + /// 2D rotation typedef struct b2Rot { @@ -48,6 +54,13 @@ typedef struct b2Mat22 b2Vec2 cx, cy; } b2Mat22; +/// A 3-by-3 Matrix +typedef struct b2Mat33 +{ + /// columns + b2Vec3 cx, cy, cz; +} b2Mat33; + /// Axis-aligned bounding box typedef struct b2AABB { @@ -99,6 +112,12 @@ typedef struct b2WorldDef /// speed have restitution applied (will bounce). float restitutionThreshold; + /// This parameter controls how fast overlap is resolved and has units of meters per second + float maxPushoutVelocity; + + /// This parameter affects the stiffness of contacts. Cycles per second. + float contactHertz; + /// Can bodies go to sleep to improve performance bool enableSleep; @@ -239,6 +258,8 @@ static inline b2WorldDef b2DefaultWorldDef(void) b2WorldDef def = {0}; def.gravity = B2_LITERAL(b2Vec2){0.0f, -10.0f}; def.restitutionThreshold = 1.0f * b2_lengthUnitsPerMeter; + def.maxPushoutVelocity = 3.0f * b2_lengthUnitsPerMeter; + def.contactHertz = 30.0f; def.enableSleep = true; def.bodyCapacity = 8; def.shapeCapacity = 8; diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 109e4335..324c1f2c 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -65,11 +65,11 @@ set(BOX2D_SAMPLES collection/benchmark_barrel.cpp collection/benchmark_create_destroy.cpp - collection/benchmark_joint_grid.cpp + collection/sample_joints.cpp collection/benchmark_many_tumblers.cpp collection/benchmark_pyramid.cpp collection/benchmark_tumbler.cpp - + collection/behavior.cpp collection/sample_continuous1.cpp collection/sample_distance.cpp collection/sample_dynamic_tree.cpp diff --git a/samples/collection/behavior.cpp b/samples/collection/behavior.cpp new file mode 100644 index 00000000..2b89fdf1 --- /dev/null +++ b/samples/collection/behavior.cpp @@ -0,0 +1,312 @@ +// SPDX-FileCopyrightText: 2022 Erin Catto +// SPDX-License-Identifier: MIT + +#include "sample.h" + +#include "box2d/box2d.h" +#include "box2d/geometry.h" +#include "box2d/hull.h" +#include "box2d/joint_util.h" + +#include +#include + +// Pyramid with heavy box on top +class HighMassRatio1 : public Sample +{ + public: + HighMassRatio1(const Settings& settings) + : Sample(settings) + { + float extent = 1.0f; + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + float groundWidth = 66.0f * extent; + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.friction = 0.5f; + + b2Segment segment = {{-0.5f * 2.0f * groundWidth, 0.0f}, {0.5f * 2.0f * groundWidth, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + bodyDef.type = b2_dynamicBody; + + b2Polygon box = b2MakeBox(extent, extent); + +#if 0 + //b2Circle circle = {{0.0f, 0.0f}, extent}; + int count = 2; + for (int i = 0; i < count; ++i) + { + bodyDef.position = {0.0f, (2.0f * i + 1.0f) * 1.0f * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + shapeDef.density = i == count - 1 ? 300.0f : 1.0f; + //b2Body_CreateCircle(bodyId, &shapeDef, &circle); + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } +#else + for (int j = 0; j < 3; ++j) + { + int count = 10; + float offset = -20.0f * extent + 2.0f * (count + 1.0f) * extent * j; + float y = extent; + while (count > 0) + { + for (int i = 0; i < count; ++i) + { + float coeff = i - 0.5f * count; + + float yy = count == 1 ? y + 0.0f : y; + bodyDef.position = {2.0f * coeff * extent + offset, yy}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + shapeDef.density = count == 1 ? (j + 1.0f) * 100.0f : 1.0f; + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } + + --count; + y += 2.0f * extent; + } + } +#endif + } + + static Sample* Create(const Settings& settings) + { + return new HighMassRatio1(settings); + } +}; + +static int sampleIndex1 = RegisterSample("Behavior", "HighMassRatio1", HighMassRatio1::Create); + +// Big box on small boxes +class HighMassRatio2 : public Sample +{ + public: + HighMassRatio2(const Settings& settings) + : Sample(settings) + { + float extent = 1.0f; + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + float groundWidth = 66.0f * extent; + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 1.0f; + + b2Segment segment = {{-0.5f * 2.0f * groundWidth, 0.0f}, {0.5f * 2.0f * groundWidth, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + bodyDef.type = b2_dynamicBody; + + b2Vec2 points[3] = {{-0.5f * extent, 0.0f}, {0.5f * extent, 0.0f}, {0.0f, 1.0f * extent}}; + b2Hull hull = b2ComputeHull(points, 3); + b2Polygon smallTriangle = b2MakePolygon(&hull, 0.0f); + b2Polygon smallBox = b2MakeBox(0.5f * extent, 0.5f * extent); + b2Polygon bigBox = b2MakeBox(10.0f * extent, 10.0f * extent); + + { + bodyDef.position = {-9.0f * extent, 0.5f * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + b2Body_CreatePolygon(bodyId, &shapeDef, &smallBox); + } + + { + bodyDef.position = {9.0f * extent, 0.5f * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + b2Body_CreatePolygon(bodyId, &shapeDef, &smallBox); + } + + { + bodyDef.position = {0.0f, (10.0f + 16.0f) * extent}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + b2Body_CreatePolygon(bodyId, &shapeDef, &bigBox); + } + } + + static Sample* Create(const Settings& settings) + { + return new HighMassRatio2(settings); + } +}; + +static int sampleIndex2 = RegisterSample("Behavior", "HighMassRatio2", HighMassRatio2::Create); + +class Friction : public Sample +{ + public: + Friction(const Settings& settings) + : Sample(settings) + { + + { + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.friction = 0.2f; + + b2Segment segment = {{-40.0f, 0.0f}, {40.0f, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + b2Polygon box = b2MakeOffsetBox(13.0f, 0.25f, {-4.0f, 22.0f}, -0.25f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(0.25f, 1.0f, {10.5f, 19.0f}, 0.0f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(13.0f, 0.25f, {4.0f, 14.0f}, 0.25f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(0.25f, 1.0f, {-10.5f, 11.0f}, 0.0f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + + box = b2MakeOffsetBox(13.0f, 0.25f, {-4.0f, 6.0f}, -0.25f); + b2Body_CreatePolygon(groundId, &shapeDef, &box); + } + + { + b2Polygon box = b2MakeBox(0.5f, 0.5f); + + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 25.0f; + + float friction[5] = {0.75f, 0.5f, 0.35f, 0.1f, 0.0f}; + + for (int i = 0; i < 5; ++i) + { + b2BodyDef bodyDef = b2DefaultBodyDef(); + bodyDef.type = b2_dynamicBody; + bodyDef.position = {-15.0f + 4.0f * i, 28.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + shapeDef.friction = friction[i]; + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + } + } + } + + static Sample* Create(const Settings& settings) + { + return new Friction(settings); + } +}; + +static int sampleIndex3 = RegisterSample("Behavior", "Friction", Friction::Create); + +class OverlapRecovery : public Sample +{ + public: + OverlapRecovery(const Settings& settings) + : Sample(settings) + { + m_bodyIds = nullptr; + m_bodyCount = 0; + m_baseCount = 4; + m_overlap = 0.5f; + m_extent = 0.3f; + m_pushout = 3.0f; + m_hertz = 30.0f; + + b2BodyDef bodyDef = b2DefaultBodyDef(); + b2BodyId groundId = b2World_CreateBody(m_worldId, &bodyDef); + + float groundWidth = 40.0f; + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 1.0f; + + b2Segment segment = {{-groundWidth, 0.0f}, {groundWidth, 0.0f}}; + b2Body_CreateSegment(groundId, &shapeDef, &segment); + + CreateScene(); + } + + ~OverlapRecovery() override + { + free(m_bodyIds); + } + + void CreateScene() + { + for (int32_t i = 0; i < m_bodyCount; ++i) + { + b2World_DestroyBody(m_bodyIds[i]); + } + + b2World_SetMaximumPushoutVelocity(m_worldId, m_pushout); + b2World_SetContactHertz(m_worldId, m_hertz); + + b2BodyDef bodyDef = b2DefaultBodyDef(); + bodyDef.type = b2_dynamicBody; + + b2Polygon box = b2MakeBox(m_extent, m_extent); + b2ShapeDef shapeDef = b2DefaultShapeDef(); + shapeDef.density = 1.0f; + + m_bodyCount = m_baseCount * (m_baseCount + 1) / 2; + m_bodyIds = (b2BodyId*)realloc(m_bodyIds, m_bodyCount * sizeof(b2BodyId)); + + int32_t bodyIndex = 0; + float fraction = 1.0f - m_overlap; + float y = m_extent; + for (int32_t i = 0; i < m_baseCount; ++i) + { + float x = fraction * m_extent * (i - m_baseCount); + for (int32_t j = i; j < m_baseCount; ++j) + { + bodyDef.position = {x, y}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bodyDef); + + b2Body_CreatePolygon(bodyId, &shapeDef, &box); + + m_bodyIds[bodyIndex++] = bodyId; + + x += 2.0f * fraction * m_extent; + } + + y += 2.0f * fraction * m_extent; + } + + assert(bodyIndex == m_bodyCount); + } + + void UpdateUI() override + { + ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); + ImGui::SetNextWindowSize(ImVec2(240.0f, 230.0f)); + ImGui::Begin("Stacks", nullptr, ImGuiWindowFlags_NoResize); + + bool changed = false; + changed = changed || ImGui::SliderFloat("Extent", &m_extent, 0.1f, 1.0f, "%.1f"); + changed = changed || ImGui::SliderInt("Base Count", &m_baseCount, 1, 10); + changed = changed || ImGui::SliderFloat("Overlap", &m_overlap, 0.0f, 1.0f, "%.1f"); + changed = changed || ImGui::SliderFloat("Pushout", &m_pushout, 0.0f, 10.0f, "%.1f"); + changed = changed || ImGui::SliderFloat("Hertz", &m_hertz, 0.0f, 120.0f, "%.1f"); + changed = changed || ImGui::Button("Reset Scene"); + + if (changed) + { + CreateScene(); + } + + ImGui::End(); + } + + static Sample* Create(const Settings& settings) + { + return new OverlapRecovery(settings); + } + + b2BodyId* m_bodyIds; + int32_t m_bodyCount; + int32_t m_baseCount; + float m_overlap; + float m_extent; + float m_pushout; + float m_hertz; +}; + +static int sampleIndex4 = RegisterSample("Behavior", "Overlap Recovery", OverlapRecovery::Create); diff --git a/samples/collection/benchmark_barrel.cpp b/samples/collection/benchmark_barrel.cpp index ddfd58b5..bf010fd6 100644 --- a/samples/collection/benchmark_barrel.cpp +++ b/samples/collection/benchmark_barrel.cpp @@ -69,7 +69,8 @@ class BenchmarkBarrel : public Sample } } - m_columnCount = g_sampleDebug ? 4 : e_maxColumns; + m_columnCount = g_sampleDebug ? 10 : e_maxColumns; + float rad = 0.5f; float shift = rad * 2.0f; @@ -88,7 +89,7 @@ class BenchmarkBarrel : public Sample b2Circle circle = {0}; circle.radius = rad; - m_rowCount = g_sampleDebug ? 8 : e_maxRows; + m_rowCount = g_sampleDebug ? 40 : e_maxRows; int32_t index = 0; diff --git a/samples/collection/benchmark_joint_grid.cpp b/samples/collection/benchmark_joint_grid.cpp deleted file mode 100644 index 331db2f7..00000000 --- a/samples/collection/benchmark_joint_grid.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-FileCopyrightText: 2022 Erin Catto -// SPDX-License-Identifier: MIT - -#include "box2d/box2d.h" -#include "box2d/geometry.h" -#include "sample.h" - -// TODO_ERIN test more joint types -// TODO_ERIN try to stabilize revolute -class BenchmarkJointGrid : public Sample -{ -public: - BenchmarkJointGrid(const Settings& settings) - : Sample(settings) - { - constexpr float rad = 0.4f; - constexpr int32_t numi = g_sampleDebug ? 10 : 100; - constexpr int32_t numk = g_sampleDebug ? 10 : 100; - constexpr float shift = 1.0f; - - // Allocate to avoid huge stack usage - b2BodyId* bodies = static_cast(malloc(numi * numk * sizeof(b2BodyId))); - int32_t index = 0; - - b2ShapeDef sd = b2DefaultShapeDef(); - sd.density = 1.0f; - sd.filter.maskBits = 0; - - b2Circle circle = {0}; - circle.radius = rad; - - b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); - - for (int32_t k = 0; k < numk; ++k) - { - for (int32_t i = 0; i < numi; ++i) - { - float fk = (float)k; - float fi = (float)i; - - b2BodyDef bd = b2DefaultBodyDef(); - if (k >= numk / 2 - 3 && k <= numk / 2 + 3 && i == 0) - { - bd.type = b2_staticBody; - } - else - { - bd.type = b2_dynamicBody; - } - - bd.position = {fk * shift, -fi * shift}; - - b2BodyId body = b2World_CreateBody(m_worldId, &bd); - - b2Body_CreateCircle(body, &sd, &circle); - - if (i > 0) - { - jd.bodyIdA = bodies[index - 1]; - jd.bodyIdB = body; - jd.localAnchorA = {0.0f, -0.5f * shift}; - jd.localAnchorB = {0.0f, 0.5f * shift}; - b2World_CreateRevoluteJoint(m_worldId, &jd); - } - - if (k > 0) - { - jd.bodyIdA = bodies[index - numi]; - jd.bodyIdB = body; - jd.localAnchorA = {0.5f * shift, 0.0f}; - jd.localAnchorB = {-0.5f * shift, 0.0f}; - b2World_CreateRevoluteJoint(m_worldId, &jd); - } - - bodies[index++] = body; - } - } - - free(bodies); - } - - static Sample* Create(const Settings& settings) - { - return new BenchmarkJointGrid(settings); - } -}; - -static int sampleIndex = RegisterSample("Benchmark", "Joint Grid", BenchmarkJointGrid::Create); diff --git a/samples/collection/benchmark_many_tumblers.cpp b/samples/collection/benchmark_many_tumblers.cpp index 05b922c4..c8f89143 100644 --- a/samples/collection/benchmark_many_tumblers.cpp +++ b/samples/collection/benchmark_many_tumblers.cpp @@ -9,6 +9,7 @@ #include #include +// TODO_ERIN make these kinematic class BenchmarkManyTumblers : public Sample { public: @@ -18,8 +19,8 @@ class BenchmarkManyTumblers : public Sample b2BodyDef bd = b2DefaultBodyDef(); m_groundId = b2World_CreateBody(m_worldId, &bd); - m_rowCount = g_sampleDebug ? 1 : 19; - m_columnCount = g_sampleDebug ? 1 : 19; + m_rowCount = g_sampleDebug ? 2 : 19; + m_columnCount = g_sampleDebug ? 2 : 19; m_tumblerIds = nullptr; m_jointIds = nullptr; @@ -30,7 +31,7 @@ class BenchmarkManyTumblers : public Sample m_bodyCount = 0; m_bodyIndex = 0; - m_motorSpeed = 0.0f; + m_motorSpeed = 25.0f; m_shapeType = 0; CreateScene(); @@ -48,7 +49,6 @@ class BenchmarkManyTumblers : public Sample { b2BodyDef bd = b2DefaultBodyDef(); bd.type = b2_dynamicBody; - bd.enableSleep = false; bd.position = {position.x, position.y}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); m_tumblerIds[index] = bodyId; @@ -122,7 +122,7 @@ class BenchmarkManyTumblers : public Sample free(m_bodyIds); - int32_t bodiesPerTumbler = g_sampleDebug ? 1 : 50; + int32_t bodiesPerTumbler = g_sampleDebug ? 8 : 50; m_bodyCount = bodiesPerTumbler * m_tumblerCount; m_bodyIds = static_cast(malloc(m_bodyCount * sizeof(b2BodyId))); @@ -154,6 +154,7 @@ class BenchmarkManyTumblers : public Sample for (int i = 0; i < m_tumblerCount; ++i) { b2RevoluteJoint_SetMotorSpeed(m_jointIds[i], (b2_pi / 180.0f) * m_motorSpeed); + b2Body_Wake(m_tumblerIds[i]); } } @@ -168,6 +169,7 @@ class BenchmarkManyTumblers : public Sample { b2ShapeDef sd = b2DefaultShapeDef(); sd.density = 1.0f; + //sd.restitution = 0.5f; b2Circle circle = {{0.0f, 0.0f}, 0.125f}; b2Polygon polygon = b2MakeBox(0.125f, 0.125f); diff --git a/samples/collection/benchmark_pyramid.cpp b/samples/collection/benchmark_pyramid.cpp index b6c4c04a..3b53cb7e 100644 --- a/samples/collection/benchmark_pyramid.cpp +++ b/samples/collection/benchmark_pyramid.cpp @@ -8,11 +8,6 @@ #include #include -BOX2D_API int32_t b2_awakeContactCount; - -BOX2D_API int b2_collideMinRange; -BOX2D_API int b2_islandMinRange; - class BenchmarkPyramid : public Sample { public: @@ -22,8 +17,8 @@ class BenchmarkPyramid : public Sample m_extent = 0.5f; m_round = 0.0f; m_baseCount = 10; - m_rowCount = g_sampleDebug ? 1 : 16; - m_columnCount = g_sampleDebug ? 4 : 16; + m_rowCount = g_sampleDebug ? 4 : 14; + m_columnCount = g_sampleDebug ? 4 : 13; m_groundId = b2_nullBodyId; m_bodyIds = nullptr; m_bodyCount = 0; @@ -41,7 +36,7 @@ class BenchmarkPyramid : public Sample CreateScene(); } - ~BenchmarkPyramid() + ~BenchmarkPyramid() override { free(m_bodyIds); } @@ -57,13 +52,16 @@ class BenchmarkPyramid : public Sample float h = m_extent - m_round; b2Polygon cuboid = b2MakeRoundedBox(h, h, m_round); + float shift = 1.0f * h; + for (int32_t i = 0; i < m_baseCount; ++i) { - float y = (2.0f * i + 1.0f) * m_extent + baseY; + float y = (2.0f * i + 1.0f) * shift + baseY; for (int32_t j = i; j < m_baseCount; ++j) { - float x = (i + 1.0f) * m_extent + 2.0f * (j - i) * m_extent + centerX; + float x = (i + 1.0f) * shift + 2.0f * (j - i) * shift + centerX - 0.5f; + bodyDef.position = {x, y}; assert(m_bodyIndex < m_bodyCount); @@ -104,7 +102,8 @@ class BenchmarkPyramid : public Sample for (int32_t i = 0; i < m_rowCount; ++i) { - b2Segment segment = {{-0.5f * groundWidth, groundY}, {0.5f * groundWidth, groundY}}; + //b2Segment segment = {{-0.5f * groundWidth, groundY}, {0.5f * groundWidth, groundY}}; + b2Segment segment = {{-0.5f * 2.0f * groundWidth, groundY}, {0.5f * 2.0f * groundWidth, groundY}}; b2Body_CreateSegment(m_groundId, &shapeDef, &segment); groundY += groundDeltaY; } @@ -140,9 +139,6 @@ class BenchmarkPyramid : public Sample changed = changed || ImGui::SliderFloat("Round", &m_round, 0.0f, 0.4f, "%.1f"); changed = changed || ImGui::Button("Reset Scene"); - ImGui::SliderInt("Collide Min", &b2_collideMinRange, 1, 200); - ImGui::SliderInt("Island Min", &b2_islandMinRange, 1, 10); - if (changed) { CreateScene(); diff --git a/samples/collection/benchmark_tumbler.cpp b/samples/collection/benchmark_tumbler.cpp index cb47a91f..3f9ebc20 100644 --- a/samples/collection/benchmark_tumbler.cpp +++ b/samples/collection/benchmark_tumbler.cpp @@ -4,6 +4,7 @@ #include "box2d/box2d.h" #include "box2d/geometry.h" #include "sample.h" +#include "settings.h" #include #include @@ -57,14 +58,14 @@ class BenchmarkTumbler : public Sample m_jointId = b2World_CreateRevoluteJoint(m_worldId, &jd); } - m_maxCount = g_sampleDebug ? 500 : 2000; + m_maxCount = g_sampleDebug ? 300 : 2000; m_count = 0; } void UpdateUI() override { ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); - ImGui::SetNextWindowSize(ImVec2(240.0f, 230.0f)); + ImGui::SetNextWindowSize(ImVec2(240.0f, 80.0f)); ImGui::Begin("Tumbler", nullptr, ImGuiWindowFlags_NoResize); if (ImGui::SliderFloat("Speed", &m_motorSpeed, 0.0f, 100.0f, "%.f")) @@ -77,22 +78,26 @@ class BenchmarkTumbler : public Sample void Step(Settings& settings) override { - Sample::Step(settings); - - for (int32_t i = 0; i < 10 && m_count < m_maxCount; ++i) + if (settings.m_pause == false || settings.m_singleStep == true) { - b2BodyDef bd = b2DefaultBodyDef(); - bd.type = b2_dynamicBody; - bd.position = {0.25f * i, 10.0f}; - b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); - - b2ShapeDef sd = b2DefaultShapeDef(); - sd.density = 1.0f; - - b2Polygon polygon = b2MakeBox(0.125f, 0.125f); - b2Body_CreatePolygon(bodyId, &sd, &polygon); - ++m_count; + float a = 0.125f; + for (int32_t i = 0; i < 5 && m_count < m_maxCount; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {5.0f * a + 2.0f * a * i, 10.0f + 2.0f * a * (m_stepCount % 5)}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 1.0f; + + b2Polygon polygon = b2MakeBox(0.125f, 0.125f); + b2Body_CreatePolygon(bodyId, &sd, &polygon); + ++m_count; + } } + + Sample::Step(settings); } static Sample* Create(const Settings& settings) diff --git a/samples/collection/sample_joints.cpp b/samples/collection/sample_joints.cpp new file mode 100644 index 00000000..3f69bf07 --- /dev/null +++ b/samples/collection/sample_joints.cpp @@ -0,0 +1,450 @@ +// SPDX-FileCopyrightText: 2022 Erin Catto +// SPDX-License-Identifier: MIT + +#include "sample.h" + +#include "box2d/box2d.h" +#include "box2d/geometry.h" +#include "box2d/hull.h" + +// #include +#include + +class BenchmarkJointGrid : public Sample +{ + public: + BenchmarkJointGrid(const Settings& settings) + : Sample(settings) + { + constexpr float rad = 0.4f; + constexpr int32_t numi = g_sampleDebug ? 10 : 100; + constexpr int32_t numk = g_sampleDebug ? 10 : 100; + constexpr float shift = 1.0f; + + // Allocate to avoid huge stack usage + b2BodyId* bodies = static_cast(malloc(numi * numk * sizeof(b2BodyId))); + int32_t index = 0; + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 1.0f; + sd.filter.maskBits = 0; + + b2Circle circle = {0}; + circle.radius = rad; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + for (int32_t k = 0; k < numk; ++k) + { + for (int32_t i = 0; i < numi; ++i) + { + float fk = (float)k; + float fi = (float)i; + + b2BodyDef bd = b2DefaultBodyDef(); + if (k >= numk / 2 - 3 && k <= numk / 2 + 3 && i == 0) + { + bd.type = b2_staticBody; + } + else + { + bd.type = b2_dynamicBody; + } + + bd.position = {fk * shift, -fi * shift}; + + b2BodyId body = b2World_CreateBody(m_worldId, &bd); + + b2Body_CreateCircle(body, &sd, &circle); + + if (i > 0) + { + jd.bodyIdA = bodies[index - 1]; + jd.bodyIdB = body; + jd.localAnchorA = {0.0f, -0.5f * shift}; + jd.localAnchorB = {0.0f, 0.5f * shift}; + b2World_CreateRevoluteJoint(m_worldId, &jd); + } + + if (k > 0) + { + jd.bodyIdA = bodies[index - numi]; + jd.bodyIdB = body; + jd.localAnchorA = {0.5f * shift, 0.0f}; + jd.localAnchorB = {-0.5f * shift, 0.0f}; + b2World_CreateRevoluteJoint(m_worldId, &jd); + } + + bodies[index++] = body; + } + } + + free(bodies); + } + + static Sample* Create(const Settings& settings) + { + return new BenchmarkJointGrid(settings); + } +}; + +static int sampleJointGridIndex = RegisterSample("Joints", "Joint Grid", BenchmarkJointGrid::Create); + +// A suspension bridge +class Bridge : public Sample +{ + public: + enum + { + e_count = 80 + }; + + Bridge(const Settings& settings) + : Sample(settings) + { + b2BodyId groundId = b2_nullBodyId; + { + b2BodyDef bd = b2DefaultBodyDef(); + groundId = b2World_CreateBody(m_worldId, &bd); + } + + { + b2Polygon box = b2MakeBox(0.5f, 0.125f); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + int32_t jointIndex = 0; + m_maxMotorTorque = 0.0f; + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {-34.5f + 1.0f * i, 20.0f}; + // bd.linearDamping = 0.1f; + // bd.angularDamping = 0.1f; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {-35.0f + 1.0f * i, 20.0f}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = true; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + b2Vec2 pivot = {-35.0f + 1.0f * e_count, 20.0f}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = groundId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = true; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + + assert(jointIndex == e_count + 1); + } + + for (int32_t i = 0; i < 2; ++i) + { + b2Vec2 vertices[3] = {{-0.5f, 0.0f}, {0.5f, 0.0f}, {0.0f, 1.5f}}; + + b2Hull hull = b2ComputeHull(vertices, 3); + b2Polygon triangle = b2MakePolygon(&hull, 0.0f); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {-8.0f + 8.0f * i, 22.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &triangle); + } + + for (int32_t i = 0; i < 3; ++i) + { + b2Circle circle = {{0.0f, 0.0f}, 0.5f}; + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {-6.0f + 6.0f * i, 25.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCircle(bodyId, &sd, &circle); + } + } + + void UpdateUI() override + { + ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); + + // Automatic window size + ImGui::Begin("Options", nullptr, ImGuiWindowFlags_AlwaysAutoResize); + + // Slider takes half the window + ImGui::PushItemWidth(ImGui::GetWindowWidth() * 0.5f); + bool updateFriction = ImGui::SliderFloat("Joint Friction", &m_maxMotorTorque, 0.0f, 10000.0f, "%2.f"); + if (updateFriction) + { + for (int32_t i = 0; i <= e_count; ++i) + { + b2RevoluteJoint_SetMaxMotorTorque(m_jointIds[i], m_maxMotorTorque); + } + } + + ImGui::End(); + } + + static Sample* Create(const Settings& settings) + { + return new Bridge(settings); + } + + b2JointId m_jointIds[e_count + 1]; + float m_maxMotorTorque; +}; + +static int sampleBridgeIndex = RegisterSample("Joints", "Bridge", Bridge::Create); + +class BallAndChain : public Sample +{ + public: + enum + { + e_count = 30 + }; + + BallAndChain(const Settings& settings) + : Sample(settings) + { + b2BodyId groundId = b2_nullBodyId; + { + b2BodyDef bd = b2DefaultBodyDef(); + groundId = b2World_CreateBody(m_worldId, &bd); + } + + m_maxMotorTorque = 0.0f; + +#if 1 + { + float hx = 0.5f; + b2Capsule capsule = {{-hx, 0.0f}, {hx, 0.0f}, 0.125f}; + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + int32_t jointIndex = 0; + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {(1.0f + 2.0f * i) * hx, e_count * hx}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCapsule(bodyId, &sd, &capsule); + + b2Vec2 pivot = {(2.0f * i) * hx, e_count * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = true; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + b2Circle circle = {{0.0f, 0.0f}, 4.0f}; + + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {(1.0f + 2.0f * e_count) * hx + circle.radius - hx, e_count * hx}; + //bd.linearDamping = 0.1f; + //bd.angularDamping = 0.1f; + + //bd.linearVelocity = {100.0f, -100.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCircle(bodyId, &sd, &circle); + + b2Vec2 pivot = {(2.0f * e_count) * hx, e_count * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = true; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + assert(jointIndex == e_count + 1); + } +#else + { + float hx = 0.5f; + b2Polygon box = b2MakeBox(0.125f, hx); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + sd.filter.categoryBits = 1; + sd.filter.maskBits = 0; + + b2RevoluteJointDef jd = b2DefaultRevoluteJointDef(); + + int32_t jointIndex = 0; + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {0.0, -(1.0f + 2.0f * i) * hx}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {0.0f, -(2.0f * i) * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = false; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + b2Circle circle = {{0.0f, 0.0f}, 20.0f}; + + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {0.0f, -(1.0f + 2.0f * e_count) * hx - circle.radius + hx}; + // bd.linearDamping = 0.1f; + // bd.angularDamping = 0.1f; + + // bd.linearVelocity = {100.0f, -100.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreateCircle(bodyId, &sd, &circle); + + b2Vec2 pivot = {0.0f, -(2.0f * e_count) * hx}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + jd.enableMotor = false; + jd.maxMotorTorque = m_maxMotorTorque; + m_jointIds[jointIndex++] = b2World_CreateRevoluteJoint(m_worldId, &jd); + assert(jointIndex == e_count + 1); + } +#endif + } + + void UpdateUI() override + { + ImGui::SetNextWindowPos(ImVec2(10.0f, 300.0f), ImGuiCond_Once); + ImGui::SetNextWindowSize(ImVec2(300.0f, 60.0f)); + ImGui::Begin("Options", nullptr, ImGuiWindowFlags_NoResize); + + bool updateFriction = ImGui::SliderFloat("Joint Friction", &m_maxMotorTorque, 0.0f, 10000.0f, "%2.f"); + if (updateFriction) + { + for (int32_t i = 0; i <= e_count; ++i) + { + b2RevoluteJoint_SetMaxMotorTorque(m_jointIds[i], m_maxMotorTorque); + } + } + + ImGui::End(); + } + + static Sample* Create(const Settings& settings) + { + return new BallAndChain(settings); + } + + b2JointId m_jointIds[e_count + 1]; + float m_maxMotorTorque; +}; + +static int sampleBallAndChainIndex = RegisterSample("Joints", "BallAndChain", BallAndChain::Create); + +class Cantilever : public Sample +{ + public: + enum + { + e_count = 8 + }; + + Cantilever(const Settings& settings) + : Sample(settings) + { + b2BodyId groundId = b2_nullBodyId; + { + b2BodyDef bd = b2DefaultBodyDef(); + groundId = b2World_CreateBody(m_worldId, &bd); + } + + { + float hx = 0.5f; + b2Polygon box = b2MakeBox(hx, 0.125f); + + b2ShapeDef sd = b2DefaultShapeDef(); + sd.density = 20.0f; + + b2WeldJointDef jd = b2DefaultWeldJointDef(); + + b2BodyId prevBodyId = groundId; + for (int32_t i = 0; i < e_count; ++i) + { + b2BodyDef bd = b2DefaultBodyDef(); + bd.type = b2_dynamicBody; + bd.position = {(1.0f + 2.0f * i) * hx, 0.0f}; + b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); + b2Body_CreatePolygon(bodyId, &sd, &box); + + b2Vec2 pivot = {(2.0f * i) * hx, 0.0f}; + jd.bodyIdA = prevBodyId; + jd.bodyIdB = bodyId; + jd.localAnchorA = b2Body_GetLocalPoint(jd.bodyIdA, pivot); + jd.localAnchorB = b2Body_GetLocalPoint(jd.bodyIdB, pivot); + // jd.linearHertz = 5.0f; + b2World_CreateWeldJoint(m_worldId, &jd); + + prevBodyId = bodyId; + } + + m_tipId = prevBodyId; + } + } + + void Step(Settings& settings) override + { + Sample::Step(settings); + + b2Vec2 tipPosition = b2Body_GetPosition(m_tipId); + g_draw.DrawString(5, m_textLine, "tip-y = %.2f", tipPosition.y); + m_textLine += m_textIncrement; + } + + static Sample* Create(const Settings& settings) + { + return new Cantilever(settings); + } + + b2BodyId m_tipId; +}; + +static int sampleCantileverIndex = RegisterSample("Joints", "Cantilever", Cantilever::Create); diff --git a/samples/collection/sample_vertical_stack.cpp b/samples/collection/sample_vertical_stack.cpp index d339dde3..b97758e8 100644 --- a/samples/collection/sample_vertical_stack.cpp +++ b/samples/collection/sample_vertical_stack.cpp @@ -14,8 +14,8 @@ class VerticalStack : public Sample enum { - e_maxColumns = 500, - e_maxRows = 100, + e_maxColumns = 50, + e_maxRows = 30, e_maxBullets = 20 }; @@ -55,9 +55,9 @@ class VerticalStack : public Sample m_bullets[i] = b2_nullBodyId; } - m_shapeType = e_boxShape; - m_rowCount = g_sampleDebug ? 2 : 50; - m_columnCount = g_sampleDebug ? 1 : 200; + m_shapeType = e_circleShape; + m_rowCount = 1; + m_columnCount = g_sampleDebug ? 1 : e_maxColumns; m_bulletCount = 1; m_bulletType = e_circleShape; @@ -78,8 +78,8 @@ class VerticalStack : public Sample b2Circle circle = {0}; circle.radius = 0.5f; - //b2Polygon box = b2MakeBox(0.5f, 0.5f); - b2Polygon box = b2MakeRoundedBox(0.45f, 0.45f, 0.05f); + b2Polygon box = b2MakeBox(0.5f, 0.5f); + //b2Polygon box = b2MakeRoundedBox(0.45f, 0.45f, 0.05f); b2ShapeDef sd = b2DefaultShapeDef(); sd.density = 1.0f; @@ -93,7 +93,7 @@ class VerticalStack : public Sample } else { - offset = 0.0f; // 0.01f; + offset = 0.01f; } float dx = 3.0f; @@ -111,8 +111,8 @@ class VerticalStack : public Sample int32_t n = j * m_rowCount + i; float shift = (i % 2 == 0 ? -offset : offset); - //bd.position = {x + shift, 0.505f + 1.01f * i}; - bd.position = {x + shift, 2.0f + 1.51f * i}; + bd.position = {x + shift, 0.5f + 1.0f * i}; + //bd.position = {x + shift, 1.0f + 1.51f * i}; b2BodyId bodyId = b2World_CreateBody(m_worldId, &bd); m_bodies[n] = bodyId; diff --git a/samples/draw.cpp b/samples/draw.cpp index 42bce19a..5d5b1140 100644 --- a/samples/draw.cpp +++ b/samples/draw.cpp @@ -860,7 +860,11 @@ void DrawPointFcn(b2Vec2 p, float size, b2Color color, void* context) static_cast(context)->DrawPoint(p, size, color); } -// +void DrawStringFcn(b2Vec2 p, const char* s, void* context) +{ + static_cast(context)->DrawString(p, s); +} + Draw::Draw() { m_showUI = true; @@ -871,7 +875,6 @@ Draw::Draw() m_debugDraw = {}; } -// Draw::~Draw() { assert(m_points == nullptr); @@ -879,7 +882,6 @@ Draw::~Draw() assert(m_triangles == nullptr); } -// void Draw::Create() { m_points = static_cast(malloc(sizeof(GLRenderPoints))); @@ -901,6 +903,7 @@ void Draw::Create() DrawSegmentFcn, DrawTransformFcn, DrawPointFcn, + DrawStringFcn, true, true, false, @@ -908,7 +911,6 @@ void Draw::Create() this}; } -// void Draw::Destroy() { m_points->Destroy(); @@ -928,7 +930,6 @@ void Draw::Destroy() m_roundedTriangles = nullptr; } -// void Draw::DrawPolygon(const b2Vec2* vertices, int32_t vertexCount, b2Color color) { b2Vec2 p1 = vertices[vertexCount - 1]; @@ -941,7 +942,6 @@ void Draw::DrawPolygon(const b2Vec2* vertices, int32_t vertexCount, b2Color colo } } -// void Draw::DrawSolidPolygon(const b2Vec2* vertices, int32_t vertexCount, b2Color color) { b2Color fillColor = {0.5f * color.r, 0.5f * color.g, 0.5f * color.b, 0.5f}; @@ -1038,7 +1038,6 @@ void Draw::DrawRoundedPolygon(const b2Vec2* vertices, int32_t count, float radiu } } -// void Draw::DrawCircle(b2Vec2 center, float radius, b2Color color) { const float k_segments = 32.0f; @@ -1061,7 +1060,6 @@ void Draw::DrawCircle(b2Vec2 center, float radius, b2Color color) } } -// void Draw::DrawSolidCircle(b2Vec2 center, float radius, b2Vec2 axis, b2Color color) { b2Color fillColor = {0.5f * color.r, 0.5f * color.g, 0.5f * color.b, 0.5f}; @@ -1166,7 +1164,6 @@ void Draw::DrawCapsule(b2Vec2 p1, b2Vec2 p2, float radius, b2Color color) m_lines->Vertex(p2, color); } -// void Draw::DrawSolidCapsule(b2Vec2 p1, b2Vec2 p2, float radius, b2Color color) { float length; @@ -1267,14 +1264,12 @@ void Draw::DrawSolidCapsule(b2Vec2 p1, b2Vec2 p2, float radius, b2Color color) m_lines->Vertex(p2, color); } -// void Draw::DrawSegment(b2Vec2 p1, b2Vec2 p2, b2Color color) { m_lines->Vertex(p1, color); m_lines->Vertex(p2, color); } -// void Draw::DrawTransform(b2Transform xf) { const float k_axisScale = 0.4f; @@ -1291,13 +1286,11 @@ void Draw::DrawTransform(b2Transform xf) m_lines->Vertex(p2, green); } -// void Draw::DrawPoint(b2Vec2 p, float size, b2Color color) { m_points->Vertex(p, color, size); } -// void Draw::DrawString(int x, int y, const char* string, ...) { // if (m_showUI == false) @@ -1316,7 +1309,6 @@ void Draw::DrawString(int x, int y, const char* string, ...) va_end(arg); } -// void Draw::DrawString(b2Vec2 pw, const char* string, ...) { b2Vec2 ps = g_camera.ConvertWorldToScreen(pw); @@ -1327,12 +1319,11 @@ void Draw::DrawString(b2Vec2 pw, const char* string, ...) ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoInputs | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoScrollbar); ImGui::SetCursorPos(ImVec2(ps.x, ps.y)); - ImGui::TextColoredV(ImColor(230, 153, 153, 255), string, arg); + ImGui::TextColoredV(ImColor(230, 230, 230, 255), string, arg); ImGui::End(); va_end(arg); } -// void Draw::DrawAABB(b2AABB aabb, b2Color c) { b2Vec2 p1 = aabb.lowerBound; @@ -1353,7 +1344,6 @@ void Draw::DrawAABB(b2AABB aabb, b2Color c) m_lines->Vertex(p1, c); } -// void Draw::Flush() { m_roundedTriangles->Flush(); diff --git a/samples/main.cpp b/samples/main.cpp index 819867ad..6dab3954 100644 --- a/samples/main.cpp +++ b/samples/main.cpp @@ -53,14 +53,14 @@ static b2Vec2 s_clickPointWS = b2Vec2_zero; static float s_windowScale = 1.0f; static float s_framebufferScale = 1.0f; -void* AllocFcn(int32_t size) +void* AllocFcn(uint32_t size) { - size_t size16 = ((size - 1) | 0xF) + 1; - assert((size16 & 0xF) == 0); + size_t size32 = ((size - 1) | 0x1F) + 1; + assert((size32 & 0x1F) == 0); #if defined(_WIN64) - void* ptr = _aligned_malloc(size16, 16); + void* ptr = _aligned_malloc(size32, 32); #else - void* ptr = aligned_alloc(16, size16); + void* ptr = aligned_alloc(32, size32); #endif return ptr; } @@ -410,7 +410,7 @@ static void UpdateUI() ImGui::Checkbox("Contact Normals", &s_settings.m_drawContactNormals); ImGui::Checkbox("Contact Impulses", &s_settings.m_drawContactImpulse); ImGui::Checkbox("Friction Impulses", &s_settings.m_drawFrictionImpulse); - ImGui::Checkbox("Center of Masses", &s_settings.m_drawCOMs); + ImGui::Checkbox("Center of Masses", &s_settings.m_drawMass); ImGui::Checkbox("Statistics", &s_settings.m_drawStats); ImGui::Checkbox("Profile", &s_settings.m_drawProfile); @@ -565,7 +565,7 @@ int main(int, char**) // MSAA glfwWindowHint(GLFW_SAMPLES, 4); - sprintf(buffer, "Box2D Version %d.%d.%d c", b2_version.major, b2_version.minor, b2_version.revision); + sprintf(buffer, "Box2D Version %d.%d.%d Graph Color", b2_version.major, b2_version.minor, b2_version.revision); if (GLFWmonitor* primaryMonitor = glfwGetPrimaryMonitor()) { @@ -694,7 +694,7 @@ int main(int, char**) UpdateUI(); - // ImGui::ShowDemoWindow(); + //ImGui::ShowDemoWindow(); // if (g_draw.m_showUI) { diff --git a/samples/sample.cpp b/samples/sample.cpp index 5472f8d4..cf618596 100644 --- a/samples/sample.cpp +++ b/samples/sample.cpp @@ -16,10 +16,15 @@ #include #include -bool PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, void* context) +bool PreSolveFcn(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color, void* context) { Sample* sample = static_cast(context); - return sample->PreSolve(shapeIdA, shapeIdB, manifold); + if (sample->m_collectContacts) + { + return sample->PreSolve(shapeIdA, shapeIdB, manifold, color); + } + + return true; } static void* EnqueueTask(b2TaskCallback* task, int32_t itemCount, int32_t minRange, void* taskContext, void* userContext) @@ -62,8 +67,7 @@ Sample::Sample(const Settings& settings) { b2Vec2 gravity = {0.0f, -10.0f}; - // TODO_ERIN want core count, not including hyper-threads which don't work well for physics - uint32_t maxThreads = enki::GetNumHardwareThreads() / 2; + uint32_t maxThreads = B2_MIN(8, enki::GetNumHardwareThreads()); m_scheduler.Initialize(maxThreads); m_taskCount = 0; @@ -72,17 +76,21 @@ Sample::Sample(const Settings& settings) worldDef.enqueueTask = &EnqueueTask; worldDef.finishTask = &FinishTask; worldDef.finishAllTasks = &FinishAllTasks; - worldDef.bodyCapacity = 1024; - worldDef.contactCapacity = 4 * 1024; worldDef.userTaskContext = this; - worldDef.stackAllocatorCapacity = 20 * 1024; worldDef.enableSleep = settings.m_enableSleep; + // These are not ideal, but useful for testing Box2D + worldDef.bodyCapacity = 2; + worldDef.contactCapacity = 2; + worldDef.stackAllocatorCapacity = 0; + m_worldId = b2CreateWorld(&worldDef); m_textLine = 30; m_textIncrement = 18; m_mouseJointId = b2_nullJointId; m_pointCount = 0; + m_collectContacts = + settings.m_drawContactPoints || settings.m_drawContactNormals || settings.m_drawContactImpulse || settings.m_drawFrictionImpulse; // m_destructionListener.test = this; // m_world->SetDestructionListener(&m_destructionListener); @@ -166,7 +174,7 @@ void Sample::MouseDown(b2Vec2 p, int button, int mod) float dampingRatio = 0.7f; float mass = b2Body_GetMass(queryContext.bodyId); - b2MouseJointDef jd; + b2MouseJointDef jd = b2DefaultMouseJointDef(); jd.bodyIdA = m_groundBodyId; jd.bodyIdB = queryContext.bodyId; jd.target = p; @@ -194,6 +202,8 @@ void Sample::MouseMove(b2Vec2 p) if (B2_NON_NULL(m_mouseJointId)) { b2MouseJoint_SetTarget(m_mouseJointId, p); + b2BodyId bodyIdB = b2Joint_GetBodyB(m_mouseJointId); + b2Body_Wake(bodyIdB); } } @@ -226,14 +236,18 @@ void Sample::Step(Settings& settings) g_draw.m_debugDraw.drawShapes = settings.m_drawShapes; g_draw.m_debugDraw.drawJoints = settings.m_drawJoints; g_draw.m_debugDraw.drawAABBs = settings.m_drawAABBs; - g_draw.m_debugDraw.drawCOMs = settings.m_drawCOMs; + g_draw.m_debugDraw.drawMass = settings.m_drawMass; - b2World_EnableSleeping(m_worldId, settings.m_enableSleep); + m_collectContacts = + settings.m_drawContactPoints || settings.m_drawContactNormals || settings.m_drawContactImpulse || settings.m_drawFrictionImpulse; - // m_world->SetWarmStarting(settings.m_enableWarmStarting); - // m_world->SetContinuousPhysics(settings.m_enableContinuous); + b2World_EnableSleeping(m_worldId, settings.m_enableSleep); + b2World_EnableWarmStarting(m_worldId, settings.m_enableWarmStarting); - m_pointCount = 0; + if (timeStep > 0.0f) + { + m_pointCount = 0; + } for (int32_t i = 0; i < 1; ++i) { @@ -257,6 +271,22 @@ void Sample::Step(Settings& settings) g_draw.DrawString(5, m_textLine, "proxies/height = %d/%d", s.proxyCount, s.treeHeight); m_textLine += m_textIncrement; + int32_t totalCount = 0; + char buffer[256] = {0}; + int32_t offset = sprintf_s(buffer, 256, "colors: "); + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + offset += sprintf_s(buffer + offset, 256 - offset, "%d/", s.colorCounts[i]); + totalCount += s.colorCounts[i]; + } + totalCount += s.colorCounts[b2_graphColorCount]; + sprintf_s(buffer + offset, 256 - offset, "(%d)[%d]", s.colorCounts[b2_graphColorCount], totalCount); + g_draw.DrawString(5, m_textLine, buffer); + m_textLine += m_textIncrement; + + g_draw.DrawString(5, m_textLine, "tree: proxies/height = %d/%d", s.proxyCount, s.treeHeight); + m_textLine += m_textIncrement; + g_draw.DrawString(5, m_textLine, "stack allocator capacity/used = %d/%d", s.stackCapacity, s.stackUsed); m_textLine += m_textIncrement; @@ -336,11 +366,22 @@ void Sample::Step(Settings& settings) b2Color addColor = {0.3f, 0.95f, 0.3f, 1.0f}; b2Color persistColor = {0.3f, 0.3f, 0.95f, 1.0f}; + b2HexColor colors[b2_graphColorCount + 1] = {b2_colorRed, b2_colorOrange, b2_colorYellow, b2_colorGreen, b2_colorCyan, + b2_colorBlue, b2_colorViolet, b2_colorPink, b2_colorChocolate, b2_colorGoldenrod, + b2_colorCoral, b2_colorAqua, b2_colorBlack}; + for (int32_t i = 0; i < m_pointCount; ++i) { ContactPoint* point = m_points + i; - if (point->separation > b2_linearSlop) + if (0 <= point->color && point->color <= b2_graphColorCount) + { + // graph color + float pointSize = point->color == b2_graphColorCount ? 7.5f : 5.0f; + g_draw.DrawPoint(point->position, pointSize, b2MakeColor(colors[point->color], 1.0f)); + // g_draw.DrawString(point->position, "%d", point->color); + } + else if (point->separation > b2_linearSlop) { // Speculative g_draw.DrawPoint(point->position, 5.0f, speculativeColor); @@ -388,7 +429,7 @@ void Sample::ShiftOrigin(b2Vec2 newOrigin) } // Thread-safe callback -bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold) +bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color) { long startCount = m_pointCount.fetch_add(manifold->pointCount); if (startCount >= k_maxContactPoints) @@ -411,6 +452,7 @@ bool Sample::PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifo cp->normalImpulse = manifold->points[j].normalImpulse; cp->tangentImpulse = manifold->points[j].tangentImpulse; cp->persisted = manifold->points[j].persisted; + cp->color = color; ++j; } diff --git a/samples/sample.h b/samples/sample.h index 9f90ca8f..befb3598 100644 --- a/samples/sample.h +++ b/samples/sample.h @@ -70,6 +70,8 @@ struct ContactPoint float normalImpulse; float tangentImpulse; float separation; + int32_t constraintIndex; + int32_t color; }; class SampleTask : public enki::ITaskSet @@ -112,7 +114,7 @@ class Sample void ResetProfile(); void ShiftOrigin(b2Vec2 newOrigin); - bool PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold); + bool PreSolve(b2ShapeId shapeIdA, b2ShapeId shapeIdB, b2Manifold* manifold, int32_t color); friend class DestructionListener; friend class BoundaryListener; @@ -133,6 +135,7 @@ class Sample int32_t m_textIncrement; b2Profile m_maxProfile; b2Profile m_totalProfile; + bool m_collectContacts; }; typedef Sample* SampleCreateFcn(const Settings& settings); diff --git a/samples/settings.cpp b/samples/settings.cpp index 237b46da..42d4fcbb 100644 --- a/samples/settings.cpp +++ b/samples/settings.cpp @@ -51,7 +51,7 @@ void Settings::Save() fprintf(file, " \"drawContactNormals\": %s,\n", m_drawContactNormals ? "true" : "false"); fprintf(file, " \"drawContactImpulse\": %s,\n", m_drawContactImpulse ? "true" : "false"); fprintf(file, " \"drawFrictionImpulse\": %s,\n", m_drawFrictionImpulse ? "true" : "false"); - fprintf(file, " \"drawCOMs\": %s,\n", m_drawCOMs ? "true" : "false"); + fprintf(file, " \"drawMass\": %s,\n", m_drawMass ? "true" : "false"); fprintf(file, " \"drawStats\": %s,\n", m_drawStats ? "true" : "false"); fprintf(file, " \"drawProfile\": %s,\n", m_drawProfile ? "true" : "false"); fprintf(file, " \"enableWarmStarting\": %s,\n", m_enableWarmStarting ? "true" : "false"); diff --git a/samples/settings.h b/samples/settings.h index ae1e4100..404cec40 100644 --- a/samples/settings.h +++ b/samples/settings.h @@ -9,8 +9,8 @@ struct Settings void Load(); int m_sampleIndex = 0; - int m_windowWidth = 1280; - int m_windowHeight = 720; + int m_windowWidth = 1920; + int m_windowHeight = 1080; float m_hertz = 60.0f; int m_velocityIterations = 8; int m_positionIterations = 3; @@ -21,7 +21,7 @@ struct Settings bool m_drawContactNormals = false; bool m_drawContactImpulse = false; bool m_drawFrictionImpulse = false; - bool m_drawCOMs = false; + bool m_drawMass = false; bool m_drawStats = false; bool m_drawProfile = false; bool m_enableWarmStarting = true; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4f553c88..c4eb1009 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,8 @@ set(BOX2D_SOURCE_FILES distance.c dynamic_tree.c geometry.c + graph.c + graph.h hull.c island.c island.h @@ -43,6 +45,7 @@ set(BOX2D_SOURCE_FILES table.h timer.c types.c + weld_joint.c world.c world.h ) @@ -68,7 +71,12 @@ set(BOX2D_API_FILES ../include/box2d/types.h ) -add_library(box2d ${BOX2D_SOURCE_FILES} ${BOX2D_API_FILES}) +if (MSVC) + add_library(box2d ${BOX2D_SOURCE_FILES} ${BOX2D_API_FILES} box2d.natvis) +else() + add_library(box2d ${BOX2D_SOURCE_FILES} ${BOX2D_API_FILES}) +endif() + target_include_directories(box2d PUBLIC $ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} @@ -80,16 +88,17 @@ message(STATUS "CMake system name: ${CMAKE_SYSTEM_NAME}") if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") + # target_compile_options(box2d PRIVATE /W4 /WX) target_compile_options(box2d PRIVATE /W4 /WX /experimental:c11atomics) - # target_compile_options(box2d PRIVATE /arch:AVX2 /fp:fast /fp:except-) + target_compile_options(box2d PRIVATE /arch:AVX) # target_compile_options(box2d PRIVATE /arch:SSE2 /fp:except-) # target_compile_definitions(box2d PUBLIC "$<$:B2_ENABLE_ASSERT>") elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") message(STATUS "CMake using Clang-CL") - target_compile_options(box2d PRIVATE /W4 /WX /std:c17) + target_compile_options(box2d PRIVATE /W4 /WX /std:c17 /arch:AVX) endif() else() - target_compile_options(box2d PRIVATE -Wall -Wextra -Wpedantic -Werror) + target_compile_options(box2d PRIVATE -Wall -Wextra -Wpedantic -Werror -mavx) endif() find_library(MATH_LIBRARY m) diff --git a/src/allocate.c b/src/allocate.c index 0dc50cf8..f1840766 100644 --- a/src/allocate.c +++ b/src/allocate.c @@ -34,7 +34,7 @@ static b2AllocFcn* b2_allocFcn = NULL; static b2FreeFcn* b2_freeFcn = NULL; -static _Atomic int32_t b2_byteCount; +static _Atomic uint32_t b2_byteCount; void b2SetAllocator(b2AllocFcn* allocFcn, b2FreeFcn* freeFcn) { @@ -42,7 +42,7 @@ void b2SetAllocator(b2AllocFcn* allocFcn, b2FreeFcn* freeFcn) b2_freeFcn = freeFcn; } -void* b2Alloc(int32_t size) +void* b2Alloc(uint32_t size) { atomic_fetch_add_explicit(&b2_byteCount, size, memory_order_relaxed); @@ -50,21 +50,27 @@ void* b2Alloc(int32_t size) { void* ptr = b2_allocFcn(size); b2TracyCAlloc(ptr, size); + + B2_ASSERT(((uintptr_t)ptr & 0x1F) == 0); + return ptr; } - size_t size16 = ((size - 1) | 0xF) + 1; + uint32_t size32 = ((size - 1) | 0x1F) + 1; #ifdef B2_PLATFORM_WINDOWS - void* ptr = _aligned_malloc(size16, 16); + void* ptr = _aligned_malloc(size32, 32); #else - void* ptr = aligned_alloc(16, size16); + void* ptr = aligned_alloc(32, size32); #endif b2TracyCAlloc(ptr, size); + + B2_ASSERT(((uintptr_t)ptr & 0x1F) == 0); + return ptr; } -void b2Free(void* mem, int32_t size) +void b2Free(void* mem, uint32_t size) { if (mem == NULL) { @@ -89,7 +95,7 @@ void b2Free(void* mem, int32_t size) atomic_fetch_sub_explicit(&b2_byteCount, size, memory_order_relaxed); } -int32_t b2GetByteCount(void) +uint32_t b2GetByteCount(void) { return atomic_load_explicit(&b2_byteCount, memory_order_relaxed); } diff --git a/src/allocate.h b/src/allocate.h index a9b381e7..5133251c 100644 --- a/src/allocate.h +++ b/src/allocate.h @@ -4,5 +4,5 @@ #pragma once #include -void* b2Alloc(int32_t size); -void b2Free(void* mem, int32_t size); +void* b2Alloc(uint32_t size); +void b2Free(void* mem, uint32_t size); diff --git a/src/bitset.c b/src/bitset.c index 38ed0251..ddb87847 100644 --- a/src/bitset.c +++ b/src/bitset.c @@ -14,7 +14,7 @@ b2BitSet b2CreateBitSet(uint32_t bitCapacity) bitSet.wordCapacity = (bitCapacity + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); bitSet.wordCount = 0; bitSet.bits = b2Alloc(bitSet.wordCapacity * sizeof(uint64_t)); - + memset(bitSet.bits, 0, bitSet.wordCapacity * sizeof(uint64_t)); return bitSet; } @@ -40,7 +40,24 @@ void b2SetBitCountAndClear(b2BitSet* bitSet, uint32_t bitCount) memset(bitSet->bits, 0, bitSet->wordCount * sizeof(uint64_t)); } -void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB) +void b2GrowBitSet(b2BitSet* bitSet, uint32_t wordCount) +{ + B2_ASSERT(wordCount > bitSet->wordCount); + if (wordCount > bitSet->wordCapacity) + { + uint32_t oldCapacity = bitSet->wordCapacity; + bitSet->wordCapacity = wordCount + wordCount / 2; + uint64_t* newBits = b2Alloc(bitSet->wordCapacity * sizeof(uint64_t)); + memset(newBits, 0, bitSet->wordCapacity * sizeof(uint64_t)); + memcpy(newBits, bitSet->bits, bitSet->wordCount * sizeof(uint64_t)); + b2Free(bitSet->bits, oldCapacity * sizeof(uint64_t)); + bitSet->bits = newBits; + } + + bitSet->wordCount = wordCount; +} + +void b2InPlaceUnion(b2BitSet* restrict setA, const b2BitSet* restrict setB) { B2_ASSERT(setA->wordCount == setB->wordCount); uint32_t wordCount = setA->wordCount; diff --git a/src/bitset.h b/src/bitset.h index 3d240eeb..36e83991 100644 --- a/src/bitset.h +++ b/src/bitset.h @@ -20,12 +20,44 @@ b2BitSet b2CreateBitSet(uint32_t bitCapacity); void b2DestroyBitSet(b2BitSet* bitSet); void b2SetBitCountAndClear(b2BitSet* bitset, uint32_t bitCount); void b2InPlaceUnion(b2BitSet* setA, const b2BitSet* setB); +void b2GrowBitSet(b2BitSet* set, uint32_t wordCount); static inline void b2SetBit(b2BitSet* bitSet, uint32_t bitIndex) { uint32_t wordIndex = bitIndex / 64; + // TODO_ERIN support growing B2_ASSERT(wordIndex < bitSet->wordCount); - bitSet->bits[wordIndex] |= ((uint64_t)1) << (bitIndex % 64); + bitSet->bits[wordIndex] |= ((uint64_t)1 << bitIndex % 64); +} + +static inline void b2SetBitGrow(b2BitSet* bitSet, uint32_t bitIndex) +{ + uint32_t wordIndex = bitIndex / 64; + if (wordIndex >= bitSet->wordCount) + { + b2GrowBitSet(bitSet, wordIndex + 1); + } + bitSet->bits[wordIndex] |= ((uint64_t)1 << bitIndex % 64); +} + +static inline void b2ClearBit(b2BitSet* bitSet, uint32_t bitIndex) +{ + uint32_t wordIndex = bitIndex / 64; + if (wordIndex >= bitSet->wordCount) + { + return; + } + bitSet->bits[wordIndex] &= ~((uint64_t)1 << bitIndex % 64); +} + +static inline bool b2GetBit(const b2BitSet* bitSet, uint32_t bitIndex) +{ + uint32_t wordIndex = bitIndex / 64; + if (wordIndex >= bitSet->wordCount) + { + return false; + } + return (bitSet->bits[wordIndex] & ((uint64_t)1 << bitIndex % 64)) != 0; } #if defined(_MSC_VER) && !defined(__clang__) diff --git a/src/body.c b/src/body.c index 25b0d10b..681d50d3 100644 --- a/src/body.c +++ b/src/body.c @@ -9,6 +9,7 @@ #include "body.h" #include "contact.h" #include "core.h" +#include "graph.h" #include "island.h" #include "joint.h" #include "world.h" @@ -46,6 +47,8 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->localCenter = b2Vec2_zero; b->linearVelocity = def->linearVelocity; b->angularVelocity = def->angularVelocity; + b->deltaPosition = b2Vec2_zero; + b->deltaAngle = 0.0f; b->force = b2Vec2_zero; b->torque = 0.0f; b->shapeList = B2_NULL_INDEX; @@ -64,7 +67,6 @@ b2BodyId b2World_CreateBody(b2WorldId worldId, const b2BodyDef* def) b->sleepTime = 0.0f; b->userData = def->userData; b->world = worldId.index; - b->islandIndex = 0; b->enableSleep = def->enableSleep; b->fixedRotation = def->fixedRotation; b->isEnabled = def->isEnabled; @@ -127,6 +129,12 @@ void b2World_DestroyBody(b2BodyId bodyId) int32_t twinIndex = twinKey & 1; b2Contact* contact = world->contacts + contactIndex; + + if (contact->colorIndex != B2_NULL_INDEX) + { + b2RemoveContactFromGraph(world, contact); + } + b2ContactEdge* twin = contact->edges + twinIndex; // Remove contact from other body's doubly linked list @@ -228,23 +236,8 @@ void b2World_DestroyBody(b2BodyId bodyId) B2_ASSERT(island->contactCount == 0); B2_ASSERT(island->jointCount == 0); - // Remove from awake islands array - if (island->awakeIndex != B2_NULL_INDEX) - { - int32_t islandCount = b2Array(world->awakeIslandArray).count; - B2_ASSERT(islandCount > 0); - b2Array_RemoveSwap(world->awakeIslandArray, island->awakeIndex); - if (island->awakeIndex < islandCount - 1) - { - // Fix awake index on swapped island - int32_t swappedIslandIndex = world->awakeIslandArray[island->awakeIndex]; - world->islands[swappedIslandIndex].awakeIndex = island->awakeIndex; - } - } - // Free the island b2DestroyIsland(island); - b2FreeObject(&world->islandPool, &island->object); islandDestroyed = true; } } @@ -256,7 +249,7 @@ void b2World_DestroyBody(b2BodyId bodyId) if (islandDestroyed == false) { b2WakeIsland(island); - b2ValidateIsland(island); + b2ValidateIsland(island, true); } } @@ -678,7 +671,7 @@ bool b2ShouldBodiesCollide(b2World* world, b2Body* bodyA, b2Body* bodyB) int32_t otherEdgeIndex = edgeIndex ^ 1; b2Joint* joint = world->joints + jointIndex; - if (joint->edges[otherEdgeIndex].bodyIndex == otherBodyIndex) + if (joint->collideConnected == false && joint->edges[otherEdgeIndex].bodyIndex == otherBodyIndex) { return false; } diff --git a/src/body.h b/src/body.h index 15449cb8..55b6821b 100644 --- a/src/body.h +++ b/src/body.h @@ -36,6 +36,10 @@ typedef struct b2Body b2Vec2 linearVelocity; float angularVelocity; + // These are the change in position/angle that accumulate across constraint substeps + b2Vec2 deltaPosition; + float deltaAngle; + b2Vec2 force; float torque; @@ -79,6 +83,22 @@ typedef struct b2Body bool enlargeAABB; } b2Body; +// TODO_ERIN every non-static body gets a solver body. No solver bodies for static bodies to avoid cross thread sharing and the cache misses they bring. +// Keep two solver body arrays: awake and sleeping +// 12 + 12 + 8 = 32 bytes +typedef struct b2SolverBody +{ + b2Vec2 linearVelocity; // 8 + float angularVelocity; // 4 + + // These are the change in position/angle that accumulate across constraint substeps + b2Vec2 deltaPosition; // 8 + float deltaAngle; // 4 + + float invMass; // 4 + float invI; // 4 +} b2SolverBody; + bool b2ShouldBodiesCollide(b2World* world, b2Body* bodyA, b2Body* bodyB); b2ShapeId b2Body_CreatePolygon(b2BodyId bodyId, const b2ShapeDef* def, const b2Polygon* polygon); diff --git a/src/box2d.natvis b/src/box2d.natvis new file mode 100644 index 00000000..89e4f92b --- /dev/null +++ b/src/box2d.natvis @@ -0,0 +1,27 @@ + + + + [{m128_f32[0]}, {m128_f32[1]}, {m128_f32[2]}, {m128_f32[3]}] + + m128_f32[0] + m128_f32[1] + m128_f32[2] + m128_f32[3] + (void*)this + + + + [{m256_f32[0]}, {m256_f32[1]}, {m256_f32[2]}, {m256_f32[3]}, {m256_f32[4]}, {m256_f32[5]}, {m256_f32[6]}, {m256_f32[7]}] + + m256_f32[0] + m256_f32[1] + m256_f32[2] + m256_f32[3] + m256_f32[4] + m256_f32[5] + m256_f32[6] + m256_f32[7] + (void*)this + + + diff --git a/src/broad_phase.c b/src/broad_phase.c index fbea52f9..3065bd7c 100644 --- a/src/broad_phase.c +++ b/src/broad_phase.c @@ -75,7 +75,7 @@ void b2DestroyBroadPhase(b2BroadPhase* bp) static inline void b2UnBufferMove(b2BroadPhase* bp, int32_t proxyKey) { - bool found = b2RemoveKey(&bp->moveSet, proxyKey); + bool found = b2RemoveKey(&bp->moveSet, proxyKey + 1); if (found) { @@ -177,8 +177,8 @@ static bool b2PairQueryCallback(int32_t proxyId, int32_t shapeIndex, void* conte return true; } - bool moved = b2ContainsKey(&bp->moveSet, proxyKey); - if (moved && proxyKey > queryContext->queryProxyKey) + bool moved = b2ContainsKey(&bp->moveSet, proxyKey + 1); + if (moved && proxyKey < queryContext->queryProxyKey) { // Both proxies are moving. Avoid duplicate pairs. return true; @@ -234,6 +234,7 @@ static bool b2PairQueryCallback(int32_t proxyId, int32_t shapeIndex, void* conte return true; } + // TODO_ERIN per thread to eliminate atomic? int pairIndex = atomic_fetch_add(&bp->movePairIndex, 1); b2MovePair* pair; @@ -282,7 +283,7 @@ void b2FindPairsTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, continue; } - int32_t proxyType = B2_PROXY_TYPE(proxyKey); + b2BodyType proxyType = B2_PROXY_TYPE(proxyKey); int32_t proxyId = B2_PROXY_ID(proxyKey); queryContext.queryProxyKey = proxyKey; @@ -296,12 +297,12 @@ void b2FindPairsTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, // Query trees if (proxyType == b2_dynamicBody) { - queryContext.queryTreeType = b2_dynamicBody; - b2DynamicTree_Query(bp->trees + b2_dynamicBody, fatAABB, b2PairQueryCallback, &queryContext); - queryContext.queryTreeType = b2_kinematicBody; - b2DynamicTree_Query(bp->trees + b2_kinematicBody, fatAABB, b2PairQueryCallback, &queryContext); queryContext.queryTreeType = b2_staticBody; b2DynamicTree_Query(bp->trees + b2_staticBody, fatAABB, b2PairQueryCallback, &queryContext); + queryContext.queryTreeType = b2_kinematicBody; + b2DynamicTree_Query(bp->trees + b2_kinematicBody, fatAABB, b2PairQueryCallback, &queryContext); + queryContext.queryTreeType = b2_dynamicBody; + b2DynamicTree_Query(bp->trees + b2_dynamicBody, fatAABB, b2PairQueryCallback, &queryContext); } else if (proxyType == b2_kinematicBody) { diff --git a/src/broad_phase.h b/src/broad_phase.h index e000eb58..c9c0a760 100644 --- a/src/broad_phase.h +++ b/src/broad_phase.h @@ -65,7 +65,8 @@ void b2ValidateNoEnlarged(const b2BroadPhase* bp); // Warning: this must be called in deterministic order static inline void b2BufferMove(b2BroadPhase* bp, int32_t proxyKey) { - bool alreadyAdded = b2AddKey(&bp->moveSet, proxyKey); + // Adding 1 because 0 is the sentinel + bool alreadyAdded = b2AddKey(&bp->moveSet, proxyKey + 1); if (alreadyAdded == false) { b2Array_Push(bp->moveArray, proxyKey); diff --git a/src/contact.c b/src/contact.c index 1413d179..ae928121 100644 --- a/src/contact.c +++ b/src/contact.c @@ -197,6 +197,9 @@ void b2CreateContact(b2World* world, b2Shape* shapeA, b2Shape* shapeB) contact->islandIndex = B2_NULL_INDEX; contact->islandPrev = B2_NULL_INDEX; contact->islandNext = B2_NULL_INDEX; + contact->colorSubIndex = B2_NULL_INDEX; + contact->colorIndex = B2_NULL_INDEX; + contact->isMarked = false; b2Body* bodyA = world->bodies + shapeA->bodyIndex; b2Body* bodyB = world->bodies + shapeB->bodyIndex; @@ -268,6 +271,11 @@ void b2DestroyContact(b2World* world, b2Contact* contact) b2Body* bodyA = world->bodies + edgeA->bodyIndex; b2Body* bodyB = world->bodies + edgeB->bodyIndex; + if (contact->colorIndex != B2_NULL_INDEX) + { + b2RemoveContactFromGraph(world, contact); + } + // if (contactListener && contact->IsTouching()) //{ // contactListener->EndContact(contact); @@ -411,6 +419,8 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body for (int32_t i = 0; i < contact->manifold.pointCount; ++i) { b2ManifoldPoint* mp2 = contact->manifold.points + i; + mp2->anchorA = b2Sub(mp2->point, bodyA->position); + mp2->anchorB = b2Sub(mp2->point, bodyB->position); mp2->normalImpulse = 0.0f; mp2->tangentImpulse = 0.0f; mp2->persisted = false; @@ -428,18 +438,13 @@ void b2UpdateContact(b2World* world, b2Contact* contact, b2Shape* shapeA, b2Body break; } } - - // For debugging ids - // if (mp2->persisted == false && contact->manifold.pointCount == oldManifold.pointCount) - //{ - // i += 0; - //} } if (touching && world->preSolveFcn) { // TODO_ERIN this call assumes thread safety - bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, world->preSolveContext); + int32_t colorIndex = contact->colorIndex; + bool collide = world->preSolveFcn(shapeIdA, shapeIdB, &contact->manifold, colorIndex, world->preSolveContext); if (collide == false) { // disable contact diff --git a/src/contact.h b/src/contact.h index 0b2018d4..26dcd498 100644 --- a/src/contact.h +++ b/src/contact.h @@ -50,8 +50,6 @@ enum b2ContactFlags // This contact stopped touching b2_contactStoppedTouching = 0x00000080, - - b2_contactIslandFlag = 0x0100 }; /// The class manages contact between two shapes. A contact exists for each overlapping @@ -63,8 +61,11 @@ typedef struct b2Contact uint32_t flags; - // This is too hot and has been moved to a separate array - //int32_t awakeIndex; + // The color of this constraint in the graph coloring + int32_t colorIndex; + + // Index of contact within color + int32_t colorSubIndex; b2ContactEdge edges[2]; @@ -85,6 +86,8 @@ typedef struct b2Contact // For conveyor belts float tangentSpeed; + + bool isMarked; } b2Contact; void b2InitializeContactRegisters(void); diff --git a/src/contact_solver.c b/src/contact_solver.c index f0c439c5..6cf23db6 100644 --- a/src/contact_solver.c +++ b/src/contact_solver.c @@ -7,904 +7,1056 @@ #include "body.h" #include "contact.h" #include "core.h" -#include "stack_allocator.h" +#include "graph.h" #include "world.h" -// Solver debugging is normally disabled because the block solver sometimes has to deal with a poorly conditioned -// effective mass matrix. -#define B2_DEBUG_SOLVER 0 +#include -typedef struct b2VelocityConstraintPoint -{ - b2Vec2 rA; - b2Vec2 rB; - float normalImpulse; - float tangentImpulse; - float normalMass; - float tangentMass; - float velocityBias; - float relativeVelocity; -} b2VelocityConstraintPoint; - -typedef struct b2ContactVelocityConstraint -{ - b2Contact* contact; - b2VelocityConstraintPoint points[2]; - b2Vec2 normal; - b2Mat22 normalMass; - b2Mat22 K; - float friction; - float restitution; - float tangentSpeed; - int32_t pointCount; -} b2ContactVelocityConstraint; - -typedef struct b2ContactPositionConstraint -{ - b2Contact* contact; - b2Vec2 localAnchorsA[2]; - b2Vec2 localAnchorsB[2]; - float separations[2]; - float lambdas[2]; - b2Vec2 normal; - int32_t pointCount; -} b2ContactPositionConstraint; - -b2ContactSolver* b2CreateContactSolver(b2ContactSolverDef* def) -{ - b2StackAllocator* alloc = def->world->stackAllocator; - - b2ContactSolver* solver = b2AllocateStackItem(alloc, sizeof(b2ContactSolver), "contact solver"); - solver->context = def->context; - solver->contactList = def->contactList; - solver->contactCount = def->contactCount; - - // These are allocated conservatively because some island contacts may not have contact points - solver->positionConstraints = - b2AllocateStackItem(alloc, solver->contactCount * sizeof(b2ContactPositionConstraint), "position constraints"); - solver->velocityConstraints = - b2AllocateStackItem(alloc, solver->contactCount * sizeof(b2ContactVelocityConstraint), "velocity constraints"); - - solver->world = def->world; - solver->constraintCount = 0; - return solver; -} +// Soft constraints with constraint error substepping. Includes a bias removal stage to help remove excess energy. +// http://mmacklin.com/smallsteps.pdf +// https://box2d.org/files/ErinCatto_SoftConstraints_GDC2011.pdf -void b2ContactSolver_Initialize(b2ContactSolver* solver) +void b2PrepareOverflowContacts(b2SolverTaskContext* context) { - b2World* world = solver->world; + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); + + b2World* world = context->world; + b2Graph* graph = context->graph; b2Contact* contacts = world->contacts; - const b2StepContext* context = solver->context; - b2Body* bodies = world->bodies; + const int32_t* bodyMap = context->bodyToSolverMap; + b2SolverBody* solverBodies = context->solverBodies; + + b2ContactConstraint* constraints = graph->overflow.contactConstraints; + int32_t* contactIndices = graph->overflow.contactArray; + int32_t contactCount = b2Array(graph->overflow.contactArray).count; + + // This is a dummy body to represent a static body because static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = world->contactHertz; - // Initialize position independent portions of the constraints. - int32_t constraintCount = 0; - int32_t contactIndex = solver->contactList; - while (contactIndex != B2_NULL_INDEX) + float h = context->timeStep; + bool enableWarmStarting = world->enableWarmStarting; + + for (int32_t i = 0; i < contactCount; ++i) { - b2Contact* contact = contacts + contactIndex; - contactIndex = contact->islandNext; + b2Contact* contact = contacts + contactIndices[i]; const b2Manifold* manifold = &contact->manifold; int32_t pointCount = manifold->pointCount; - if (pointCount == 0) + B2_ASSERT(0 < pointCount && pointCount <= 2); + + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; + + b2ContactConstraint* constraint = constraints + i; + constraint->contact = contact; + constraint->indexA = indexA; + constraint->indexB = indexB; + constraint->normal = manifold->normal; + constraint->friction = contact->friction; + constraint->restitution = contact->restitution; + constraint->pointCount = pointCount; + + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; + + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; + b2Vec2 vA = solverBodyA->linearVelocity; + float wA = solverBodyA->angularVelocity; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; + + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; + + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float c = h * omega * (2.0f * zeta + h * omega); + constraint->impulseCoefficient = 1.0f / (1.0f + c); + constraint->massCoefficient = c * constraint->impulseCoefficient; + constraint->biasCoefficient = omega / (2.0f * zeta + h * omega); + + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(constraint->normal); + + for (int32_t j = 0; j < pointCount; ++j) { - continue; - } + const b2ManifoldPoint* mp = manifold->points + j; + b2ContactConstraintPoint* cp = constraint->points + j; - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; - - b2ContactVelocityConstraint* vc = solver->velocityConstraints + constraintCount; - vc->contact = contact; - vc->normal = manifold->normal; - vc->friction = contact->friction; - vc->restitution = contact->restitution; - vc->tangentSpeed = contact->tangentSpeed; - vc->pointCount = pointCount; - vc->K = b2Mat22_zero; - vc->normalMass = b2Mat22_zero; - - b2ContactPositionConstraint* pc = solver->positionConstraints + constraintCount; - pc->contact = contact; - pc->normal = manifold->normal; - pc->pointCount = pointCount; + cp->normalImpulse = mp->normalImpulse; + cp->tangentImpulse = mp->tangentImpulse; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; + cp->rA = mp->anchorA; + cp->rB = mp->anchorB; - b2Rot qA = bodyA->transform.q; - b2Vec2 cA = bodyA->position; - b2Rot qB = bodyB->transform.q; - b2Vec2 cB = bodyB->position; + float rnA = b2Cross(cp->rA, normal); + float rnB = b2Cross(cp->rB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; - // TODO_ERIN testing - // qA = b2MakeRot(bodyA->angle); - // qB = b2MakeRot(bodyB->angle); + float rtA = b2Cross(cp->rA, tangent); + float rtB = b2Cross(cp->rB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + cp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + cp->separation = mp->separation; + cp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; - for (int32_t j = 0; j < pointCount; ++j) - { - const b2ManifoldPoint* cp = manifold->points + j; - b2VelocityConstraintPoint* vcp = vc->points + j; + // Save relative velocity for restitution + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + cp->relativeVelocity = b2Dot(normal, b2Sub(vrB, vrA)); - if (context->warmStarting) + // Warm start + if (enableWarmStarting) { - vcp->normalImpulse = context->dtRatio * cp->normalImpulse; - vcp->tangentImpulse = context->dtRatio * cp->tangentImpulse; - } - else - { - vcp->normalImpulse = 0.0f; - vcp->tangentImpulse = 0.0f; + b2Vec2 P = b2Add(b2MulSV(cp->normalImpulse, normal), b2MulSV(cp->tangentImpulse, tangent)); + wA -= iA * b2Cross(cp->rA, P); + vA = b2MulAdd(vA, -mA, P); + wB += iB * b2Cross(cp->rB, P); + vB = b2MulAdd(vB, mB, P); } + } - vcp->rA = b2Sub(cp->point, cA); - vcp->rB = b2Sub(cp->point, cB); - - float rnA = b2Cross(vcp->rA, vc->normal); - float rnB = b2Cross(vcp->rB, vc->normal); + solverBodyA->linearVelocity = vA; + solverBodyA->angularVelocity = wA; + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; + } - float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + b2TracyCZoneEnd(prepare_contact); +} - vcp->normalMass = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; +void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias) +{ + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - b2Vec2 tangent = b2CrossVS(vc->normal, 1.0f); + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; + int32_t count = b2Array(context->graph->overflow.contactArray).count; + float inv_dt = context->invTimeStep; + const float pushout = context->world->maximumPushoutVelocity; - float rtA = b2Cross(vcp->rA, tangent); - float rtB = b2Cross(vcp->rB, tangent); + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; - float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + for (int32_t i = 0; i < count; ++i) + { + b2ContactConstraint* constraint = constraints + i; - vcp->tangentMass = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + b2Vec2 dpA = bodyA->deltaPosition; + float daA = bodyA->deltaAngle; + float mA = bodyA->invMass; + float iA = bodyA->invI; - // Velocity bias for speculative collision - vcp->velocityBias = -B2_MAX(0.0f, cp->separation * context->inv_dt); + b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + b2Vec2 dpB = bodyB->deltaPosition; + float daB = bodyB->deltaAngle; + float mB = bodyB->invMass; + float iB = bodyB->invI; - // Relative velocity - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - vcp->relativeVelocity = b2Dot(vc->normal, b2Sub(vrB, vrA)); + b2Vec2 normal = constraint->normal; + b2Vec2 tangent = b2RightPerp(normal); + float friction = constraint->friction; + float biasCoefficient = constraint->biasCoefficient; + float massCoefficient = constraint->massCoefficient; + float impulseCoefficient = constraint->impulseCoefficient; - pc->localAnchorsA[j] = b2InvRotateVector(qA, vcp->rA); - pc->localAnchorsB[j] = b2InvRotateVector(qB, vcp->rB); - pc->separations[j] = cp->separation; - pc->lambdas[j] = 0.0f; - } + int32_t pointCount = constraint->pointCount; - // If we have two points, then prepare the block solver. - if (vc->pointCount == 2) + for (int32_t j = 0; j < pointCount; ++j) { - b2VelocityConstraintPoint* vcp1 = vc->points + 0; - b2VelocityConstraintPoint* vcp2 = vc->points + 1; - - float rn1A = b2Cross(vcp1->rA, vc->normal); - float rn1B = b2Cross(vcp1->rB, vc->normal); - float rn2A = b2Cross(vcp2->rA, vc->normal); - float rn2B = b2Cross(vcp2->rB, vc->normal); + b2ContactConstraintPoint* cp = constraint->points + j; - float k11 = mA + mB + iA * rn1A * rn1A + iB * rn1B * rn1B; - float k22 = mA + mB + iA * rn2A * rn2A + iB * rn2B * rn2B; - float k12 = mA + mB + iA * rn1A * rn2A + iB * rn1B * rn2B; + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - // Ensure a reasonable condition number. - const float k_maxConditionNumber = 1000.0f; - if (k11 * k11 < k_maxConditionNumber * (k11 * k22 - k12 * k12)) + // Compute change in separation (small angle approximation of sin(angle) == angle) + b2Vec2 prB = b2Add(dpB, b2CrossSV(daB, cp->rB)); + b2Vec2 prA = b2Add(dpA, b2CrossSV(daA, cp->rA)); + float ds = b2Dot(b2Sub(prB, prA), normal); + float s = cp->separation + ds; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (s > 0.0f) { - // K is safe to invert. - vc->K.cx = (b2Vec2){k11, k12}; - vc->K.cy = (b2Vec2){k12, k22}; - vc->normalMass = b2GetInverse22(vc->K); + // TODO_ERIN what time to use? + // Speculative (inverse of full time step) + bias = s * inv_dt; } - else + else if (useBias) { - // The constraints are redundant, just use one. - // TODO_ERIN use deepest? - vc->pointCount = 1; + bias = B2_MAX(biasCoefficient * s, -pushout); + // bias = cp->biasCoefficient * s; + massScale = massCoefficient; + impulseScale = impulseCoefficient; } - } - constraintCount += 1; - } + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; + // float impulse = -cp->normalMass * (vn + bias + cp->gamma * cp->normalImpulse); - solver->constraintCount = constraintCount; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; - // Warm start - if (context->warmStarting) - { - for (int32_t i = 0; i < constraintCount; ++i) + // Apply contact impulse + b2Vec2 P = b2MulSV(impulse, normal); + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); + + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); + } + + for (int32_t j = 0; j < pointCount; ++j) { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; + b2ContactConstraintPoint* cp = constraint->points + j; - const b2Contact* contact = vc->contact; + // Relative velocity at contact + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); + b2Vec2 dv = b2Sub(vrB, vrA); - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = vc->pointCount; + // Compute tangent force + float vt = b2Dot(dv, tangent); + float lambda = cp->tangentMass * (-vt); - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + // Clamp the accumulated force + float maxFriction = friction * cp->normalImpulse; + float newImpulse = B2_CLAMP(cp->tangentImpulse + lambda, -maxFriction, maxFriction); + lambda = newImpulse - cp->tangentImpulse; + cp->tangentImpulse = newImpulse; - b2Vec2 normal = vc->normal; - b2Vec2 tangent = b2CrossVS(normal, 1.0f); + // Apply contact impulse + b2Vec2 P = b2MulSV(lambda, tangent); - for (int32_t j = 0; j < pointCount; ++j) - { - b2VelocityConstraintPoint* vcp = vc->points + j; - b2Vec2 P = b2Add(b2MulSV(vcp->normalImpulse, normal), b2MulSV(vcp->tangentImpulse, tangent)); - wA -= iA * b2Cross(vcp->rA, P); - vA = b2MulAdd(vA, -mA, P); - wB += iB * b2Cross(vcp->rB, P); - vB = b2MulAdd(vB, mB, P); - } + vA = b2MulSub(vA, mA, P); + wA -= iA * b2Cross(cp->rA, P); - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + vB = b2MulAdd(vB, mB, P); + wB += iB * b2Cross(cp->rB, P); } + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; } + + b2TracyCZoneEnd(solve_contact); } -void b2ContactSolver_SolveVelocityConstraints(b2ContactSolver* solver) +void b2ApplyOverflowRestitution(b2SolverTaskContext* context) { - int32_t count = solver->constraintCount; + b2TracyCZoneNC(overflow_resitution, "Overflow Restitution", b2_colorViolet, true); - b2World* world = solver->world; - b2Body* bodies = world->bodies; + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; + int32_t count = b2Array(context->graph->overflow.contactArray).count; + float threshold = context->world->restitutionThreshold; + + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; for (int32_t i = 0; i < count; ++i) { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; - - const b2Contact* contact = vc->contact; + b2ContactConstraint* constraint = constraints + i; - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + float restitution = constraint->restitution; + if (restitution == 0.0f) + { + continue; + } + b2SolverBody* bodyA = constraint->indexA == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; float mA = bodyA->invMass; float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = vc->pointCount; - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; + b2SolverBody* bodyB = constraint->indexB == B2_NULL_INDEX ? &dummyBody : bodies + constraint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; + float mB = bodyB->invMass; + float iB = bodyB->invI; - b2Vec2 normal = vc->normal; - b2Vec2 tangent = b2CrossVS(normal, 1.0f); - float friction = vc->friction; - - B2_ASSERT(pointCount == 1 || pointCount == 2); + b2Vec2 normal = constraint->normal; + int32_t pointCount = constraint->pointCount; - // Solve tangent constraints first because non-penetration is more important - // than friction. for (int32_t j = 0; j < pointCount; ++j) { - b2VelocityConstraintPoint* vcp = vc->points + j; + b2ContactConstraintPoint* cp = constraint->points + j; + + // if the normal impulse is zero then there was no collision + if (cp->relativeVelocity > -threshold || cp->normalImpulse == 0.0f) + { + continue; + } // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); + b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, cp->rB)); + b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, cp->rA)); b2Vec2 dv = b2Sub(vrB, vrA); - // Compute tangent force - float vt = b2Dot(dv, tangent) - vc->tangentSpeed; - float lambda = vcp->tangentMass * (-vt); + // Compute normal impulse + float vn = b2Dot(dv, normal); + float impulse = -cp->normalMass * (vn + restitution * cp->relativeVelocity); - // Clamp the accumulated force - float maxFriction = friction * vcp->normalImpulse; - float newImpulse = B2_CLAMP(vcp->tangentImpulse + lambda, -maxFriction, maxFriction); - lambda = newImpulse - vcp->tangentImpulse; - vcp->tangentImpulse = newImpulse; + // Clamp the accumulated impulse + float newImpulse = B2_MAX(cp->normalImpulse + impulse, 0.0f); + impulse = newImpulse - cp->normalImpulse; + cp->normalImpulse = newImpulse; // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, tangent); - + b2Vec2 P = b2MulSV(impulse, normal); vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(vcp->rA, P); + wA -= iA * b2Cross(cp->rA, P); vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(vcp->rB, P); + wB += iB * b2Cross(cp->rB, P); } - // Solve normal constraints - if (pointCount == 1) - { - for (int32_t j = 0; j < pointCount; ++j) - { - b2VelocityConstraintPoint* vcp = vc->points + j; + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; + } - // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + b2TracyCZoneEnd(overflow_resitution); +} - // Compute normal impulse - float vn = b2Dot(dv, normal); - float lambda = -vcp->normalMass * (vn - vcp->velocityBias); +void b2StoreOverflowImpulses(b2SolverTaskContext* context) +{ + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); - // Clamp the accumulated impulse - float newImpulse = B2_MAX(vcp->normalImpulse + lambda, 0.0f); - lambda = newImpulse - vcp->normalImpulse; - vcp->normalImpulse = newImpulse; + b2ContactConstraint* constraints = context->graph->overflow.contactConstraints; + int32_t count = b2Array(context->graph->overflow.contactArray).count; - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(vcp->rA, P); + for (int32_t i = 0; i < count; ++i) + { + b2ContactConstraint* constraint = constraints + i; + b2Contact* contact = constraint->contact; + b2Manifold* manifold = &contact->manifold; + int32_t pointCount = manifold->pointCount; - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(vcp->rB, P); - } - } - else + for (int32_t j = 0; j < pointCount; ++j) { - // Block solver developed in collaboration with Dirk Gregorius (back in 01/07 on Box2D_Lite). - // Build the mini LCP for this contact patch - // - // vn = A * x + b, vn >= 0, x >= 0 and vn_i * x_i = 0 with i = 1..2 - // - // A = J * W * JT and J = ( -n, -r1 x n, n, r2 x n ) - // b = vn0 - velocityBias - // - // The system is solved using the "Total enumeration method" (s. Murty). The complementary constraint vn_i * - // x_i implies that we must have in any solution either vn_i = 0 or x_i = 0. So for the 2D contact problem - // the cases vn1 = 0 and vn2 = 0, x1 = 0 and x2 = 0, x1 = 0 and vn2 = 0, x2 = 0 and vn1 = 0 need to be - // tested. The first valid solution that satisfies the problem is chosen. - // - // In order to account of the accumulated impulse 'a' (because of the iterative nature of the solver which - // only requires that the accumulated impulse is clamped and not the incremental impulse) we change the - // impulse variable (x_i). - // - // Substitute: - // - // x = a + d - // - // a := old total impulse - // x := new total impulse - // d := incremental impulse - // - // For the current iteration we extend the formula for the incremental impulse - // to compute the new total impulse: - // - // vn = A * d + b - // = A * (x - a) + b - // = A * x + b - A * a - // = A * x + b' - // b' = b - A * a; - - b2VelocityConstraintPoint* cp1 = vc->points + 0; - b2VelocityConstraintPoint* cp2 = vc->points + 1; - - b2Vec2 a = {cp1->normalImpulse, cp2->normalImpulse}; - B2_ASSERT(a.x >= 0.0f && a.y >= 0.0f); - - // Relative velocity at contact - b2Vec2 vrA, vrB; - vrA = b2Add(vA, b2CrossSV(wA, cp1->rA)); - vrB = b2Add(vB, b2CrossSV(wB, cp1->rB)); - b2Vec2 dv1 = b2Sub(vrB, vrA); - vrA = b2Add(vA, b2CrossSV(wA, cp2->rA)); - vrB = b2Add(vB, b2CrossSV(wB, cp2->rB)); - b2Vec2 dv2 = b2Sub(vrB, vrA); - - // Compute normal velocity - float vn1 = b2Dot(dv1, normal); - float vn2 = b2Dot(dv2, normal); + manifold->points[j].normalImpulse = constraint->points[j].normalImpulse; + manifold->points[j].tangentImpulse = constraint->points[j].tangentImpulse; + } + } - b2Vec2 b = {vn1 - cp1->velocityBias, vn2 - cp2->velocityBias}; + b2TracyCZoneEnd(store_impulses); +} - // Compute b' - b = b2Sub(b, b2MulMV(vc->K, a)); +// SIMD WIP +#define add(a, b) _mm256_add_ps((a), (b)) +#define sub(a, b) _mm256_sub_ps((a), (b)) +#define mul(a, b) _mm256_mul_ps((a), (b)) +#define muladd(a, b, c) _mm256_add_ps((a), _mm256_mul_ps((b), (c))) +#define mulsub(a, b, c) _mm256_sub_ps((a), _mm256_mul_ps((b), (c))) - const float k_errorTol = 1e-3f; - B2_MAYBE_UNUSED(k_errorTol); +static inline __m256 b2CrossW(b2Vec2W a, b2Vec2W b) +{ + return sub(mul(a.X, b.Y), mul(a.Y, b.X)); +} - for (;;) - { - // - // Case 1: vn = 0 - // - // 0 = A * x + b' - // - // Solve for x: - // - // x = - inv(A) * b' - // - b2Vec2 x = b2Neg(b2MulMV(vc->normalMass, b)); - - if (x.x >= 0.0f && x.y >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = b2Sub(x, a); - - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); - - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); - - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; - -#if B2_DEBUG_SOLVER == 1 - // Postconditions - dv1 = vB + b2Cross(wB, cp1->rB) - vA - b2Cross(wA, cp1->rA); - dv2 = vB + b2Cross(wB, cp2->rB) - vA - b2Cross(wA, cp2->rA); - - // Compute normal velocity - vn1 = b2Dot(dv1, normal); - vn2 = b2Dot(dv2, normal); - - B2_ASSERT(b2Abs(vn1 - cp1->velocityBias) < k_errorTol); - B2_ASSERT(b2Abs(vn2 - cp2->velocityBias) < k_errorTol); -#endif - break; - } +typedef struct b2SimdBody +{ + b2Vec2W v; + __m256 w; + b2Vec2W dp; + __m256 da; + __m256 invM, invI; +} b2SimdBody; + +// This is a load and 8x8 transpose +static b2SimdBody b2GatherBodies(const b2SolverBody* restrict bodies, int32_t* restrict indices) +{ + _Static_assert(sizeof(b2SolverBody) == 32, "b2SolverBody not 32 bytes"); + B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); + __m256 zero = _mm256_setzero_ps(); + __m256 b0 = indices[0] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[0])); + __m256 b1 = indices[1] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[1])); + __m256 b2 = indices[2] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[2])); + __m256 b3 = indices[3] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[3])); + __m256 b4 = indices[4] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[4])); + __m256 b5 = indices[5] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[5])); + __m256 b6 = indices[6] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[6])); + __m256 b7 = indices[7] == B2_NULL_INDEX ? zero : _mm256_load_ps((float*)(bodies + indices[7])); + + __m256 t0 = _mm256_unpacklo_ps(b0, b1); + __m256 t1 = _mm256_unpackhi_ps(b0, b1); + __m256 t2 = _mm256_unpacklo_ps(b2, b3); + __m256 t3 = _mm256_unpackhi_ps(b2, b3); + __m256 t4 = _mm256_unpacklo_ps(b4, b5); + __m256 t5 = _mm256_unpackhi_ps(b4, b5); + __m256 t6 = _mm256_unpacklo_ps(b6, b7); + __m256 t7 = _mm256_unpackhi_ps(b6, b7); + __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + + b2SimdBody simdBody; + simdBody.v.X = _mm256_permute2f128_ps(tt0, tt4, 0x20); + simdBody.v.Y = _mm256_permute2f128_ps(tt1, tt5, 0x20); + simdBody.w = _mm256_permute2f128_ps(tt2, tt6, 0x20); + simdBody.dp.X = _mm256_permute2f128_ps(tt3, tt7, 0x20); + simdBody.dp.Y = _mm256_permute2f128_ps(tt0, tt4, 0x31); + simdBody.da = _mm256_permute2f128_ps(tt1, tt5, 0x31); + simdBody.invM = _mm256_permute2f128_ps(tt2, tt6, 0x31); + simdBody.invI = _mm256_permute2f128_ps(tt3, tt7, 0x31); + + return simdBody; +} - // - // Case 2: vn1 = 0 and x2 = 0 - // - // 0 = a11 * x1 + a12 * 0 + b1' - // vn2 = a21 * x1 + a22 * 0 + b2' - // - x.x = -cp1->normalMass * b.x; - x.y = 0.0f; - vn1 = 0.0f; - vn2 = vc->K.cx.y * x.x + b.y; - if (x.x >= 0.0f && vn2 >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = b2Sub(x, a); +// This writes everything back to the solver bodies but only the velocities change +static void b2ScatterBodies(b2SolverBody* restrict bodies, int32_t* restrict indices, const b2SimdBody* restrict simdBody) +{ + _Static_assert(sizeof(b2SolverBody) == 32, "b2SolverBody not 32 bytes"); + B2_ASSERT(((uintptr_t)bodies & 0x1F) == 0); + __m256 t0 = _mm256_unpacklo_ps(simdBody->v.X, simdBody->v.Y); + __m256 t1 = _mm256_unpackhi_ps(simdBody->v.X, simdBody->v.Y); + __m256 t2 = _mm256_unpacklo_ps(simdBody->w, simdBody->dp.X); + __m256 t3 = _mm256_unpackhi_ps(simdBody->w, simdBody->dp.X); + __m256 t4 = _mm256_unpacklo_ps(simdBody->dp.Y, simdBody->da); + __m256 t5 = _mm256_unpackhi_ps(simdBody->dp.Y, simdBody->da); + __m256 t6 = _mm256_unpacklo_ps(simdBody->invM, simdBody->invI); + __m256 t7 = _mm256_unpackhi_ps(simdBody->invM, simdBody->invI); + __m256 tt0 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt1 = _mm256_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt2 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt3 = _mm256_shuffle_ps(t1, t3, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt4 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt5 = _mm256_shuffle_ps(t4, t6, _MM_SHUFFLE(3, 2, 3, 2)); + __m256 tt6 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(1, 0, 1, 0)); + __m256 tt7 = _mm256_shuffle_ps(t5, t7, _MM_SHUFFLE(3, 2, 3, 2)); + + // I don't use any dummy body in the body array because this will lead to multithreaded sharing and the + // associated cache flushing. + if (indices[0] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[0]), _mm256_permute2f128_ps(tt0, tt4, 0x20)); + if (indices[1] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[1]), _mm256_permute2f128_ps(tt1, tt5, 0x20)); + if (indices[2] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[2]), _mm256_permute2f128_ps(tt2, tt6, 0x20)); + if (indices[3] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[3]), _mm256_permute2f128_ps(tt3, tt7, 0x20)); + if (indices[4] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[4]), _mm256_permute2f128_ps(tt0, tt4, 0x31)); + if (indices[5] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[5]), _mm256_permute2f128_ps(tt1, tt5, 0x31)); + if (indices[6] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[6]), _mm256_permute2f128_ps(tt2, tt6, 0x31)); + if (indices[7] != B2_NULL_INDEX) + _mm256_store_ps((float*)(bodies + indices[7]), _mm256_permute2f128_ps(tt3, tt7, 0x31)); +} - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); +void b2PrepareContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(prepare_contact, "Prepare Contact", b2_colorYellow, true); - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); + b2World* world = context->world; + b2Contact* contacts = world->contacts; + const int32_t* bodyMap = context->bodyToSolverMap; + b2SolverBody* solverBodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->constraintAVXs; + const int32_t* contactIndices = context->contactIndices; + + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + // 30 is a bit soft, 60 oscillates too much + // const float contactHertz = 45.0f; + // const float contactHertz = B2_MAX(15.0f, stepContext->inv_dt * stepContext->velocityIterations / 8.0f); + const float contactHertz = world->contactHertz; + + float h = context->timeStep; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraintAVX* constraint = constraints + i; - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); + for (int32_t j = 0; j < 8; ++j) + { + int32_t contactIndex = contactIndices[8 * i + j]; - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; + if (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = contacts + contactIndex; -#if B2_DEBUG_SOLVER == 1 - // Postconditions - dv1 = vB + b2Cross(wB, cp1->rB) - vA - b2Cross(wA, cp1->rA); + const b2Manifold* manifold = &contact->manifold; + int32_t indexA = bodyMap[contact->edges[0].bodyIndex]; + int32_t indexB = bodyMap[contact->edges[1].bodyIndex]; - // Compute normal velocity - vn1 = b2Dot(dv1, normal); + constraint->indexA[j] = indexA; + constraint->indexB[j] = indexB; - B2_ASSERT(b2Abs(vn1 - cp1->velocityBias) < k_errorTol); -#endif - break; - } + b2SolverBody* solverBodyA = indexA == B2_NULL_INDEX ? &dummyBody : solverBodies + indexA; + b2SolverBody* solverBodyB = indexB == B2_NULL_INDEX ? &dummyBody : solverBodies + indexB; + float mA = solverBodyA->invMass; + float iA = solverBodyA->invI; + float mB = solverBodyB->invMass; + float iB = solverBodyB->invI; - // - // Case 3: vn2 = 0 and x1 = 0 - // - // vn1 = a11 * 0 + a12 * x2 + b1' - // 0 = a21 * 0 + a22 * x2 + b2' - // - x.x = 0.0f; - x.y = -cp2->normalMass * b.y; - vn1 = vc->K.cy.x * x.y + b.x; - vn2 = 0.0f; - - if (x.y >= 0.0f && vn1 >= 0.0f) - { - // Resubstitute for the incremental impulse - b2Vec2 d = b2Sub(x, a); + float hertz = (indexA == B2_NULL_INDEX || indexB == B2_NULL_INDEX) ? 2.0f * contactHertz : contactHertz; - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); + // Stiffer for static contacts to avoid bodies getting pushed through the ground + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float d = (2.0f * zeta + h * omega); + float c = h * omega * d; + float impulseCoefficient = 1.0f / (1.0f + c); - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); + ((float*)&constraint->friction)[j] = contact->friction; + ((float*)&constraint->restitution)[j] = contact->restitution; + ((float*)&constraint->impulseCoefficient)[j] = impulseCoefficient; + ((float*)&constraint->massCoefficient)[j] = c * impulseCoefficient; + ((float*)&constraint->biasCoefficient)[j] = omega / d; - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); + b2Vec2 normal = manifold->normal; + ((float*)&constraint->normal.X)[j] = normal.x; + ((float*)&constraint->normal.Y)[j] = normal.y; - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; + b2Vec2 tangent = b2RightPerp(normal); -#if B2_DEBUG_SOLVER == 1 - // Postconditions - dv2 = vB + b2Cross(wB, cp2->rB) - vA - b2Cross(wA, cp2->rA); + { + const b2ManifoldPoint* mp = manifold->points + 0; + ((float*)&constraint->separation1)[j] = mp->separation; + ((float*)&constraint->normalImpulse1)[j] = mp->normalImpulse; + ((float*)&constraint->tangentImpulse1)[j] = mp->tangentImpulse; + + ((float*)&constraint->rA1.X)[j] = mp->anchorA.x; + ((float*)&constraint->rA1.Y)[j] = mp->anchorA.y; + ((float*)&constraint->rB1.X)[j] = mp->anchorB.x; + ((float*)&constraint->rB1.Y)[j] = mp->anchorB.y; + + float rnA = b2Cross(mp->anchorA, normal); + float rnB = b2Cross(mp->anchorB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + ((float*)&constraint->normalMass1)[j] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(mp->anchorA, tangent); + float rtB = b2Cross(mp->anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + ((float*)&constraint->tangentMass1)[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Save relative velocity for restitution + b2Vec2 vrA = b2Add(solverBodyA->linearVelocity, b2CrossSV(solverBodyA->angularVelocity, mp->anchorA)); + b2Vec2 vrB = b2Add(solverBodyB->linearVelocity, b2CrossSV(solverBodyB->angularVelocity, mp->anchorB)); + ((float*)&constraint->relativeVelocity1)[j] = b2Dot(normal, b2Sub(vrB, vrA)); + } - // Compute normal velocity - vn2 = b2Dot(dv2, normal); + int32_t pointCount = manifold->pointCount; + B2_ASSERT(0 < pointCount && pointCount <= 2); - B2_ASSERT(b2Abs(vn2 - cp2->velocityBias) < k_errorTol); -#endif - break; + if (pointCount == 2) + { + const b2ManifoldPoint* mp = manifold->points + 1; + ((float*)&constraint->separation2)[j] = mp->separation; + ((float*)&constraint->normalImpulse2)[j] = mp->normalImpulse; + ((float*)&constraint->tangentImpulse2)[j] = mp->tangentImpulse; + + ((float*)&constraint->rA2.X)[j] = mp->anchorA.x; + ((float*)&constraint->rA2.Y)[j] = mp->anchorA.y; + ((float*)&constraint->rB2.X)[j] = mp->anchorB.x; + ((float*)&constraint->rB2.Y)[j] = mp->anchorB.y; + + float rnA = b2Cross(mp->anchorA, normal); + float rnB = b2Cross(mp->anchorB, normal); + float kNormal = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + ((float*)&constraint->normalMass2)[j] = kNormal > 0.0f ? 1.0f / kNormal : 0.0f; + + float rtA = b2Cross(mp->anchorA, tangent); + float rtB = b2Cross(mp->anchorB, tangent); + float kTangent = mA + mB + iA * rtA * rtA + iB * rtB * rtB; + ((float*)&constraint->tangentMass2)[j] = kTangent > 0.0f ? 1.0f / kTangent : 0.0f; + + // Save relative velocity for restitution + b2Vec2 vrA = b2Add(solverBodyA->linearVelocity, b2CrossSV(solverBodyA->angularVelocity, mp->anchorA)); + b2Vec2 vrB = b2Add(solverBodyB->linearVelocity, b2CrossSV(solverBodyB->angularVelocity, mp->anchorB)); + ((float*)&constraint->relativeVelocity2)[j] = b2Dot(normal, b2Sub(vrB, vrA)); } - - // - // Case 4: x1 = 0 and x2 = 0 - // - // vn1 = b1 - // vn2 = b2; - x.x = 0.0f; - x.y = 0.0f; - vn1 = b.x; - vn2 = b.y; - - if (vn1 >= 0.0f && vn2 >= 0.0f) + else { - // Resubstitute for the incremental impulse - b2Vec2 d = b2Sub(x, a); + // dummy data that has no effect + ((float*)&constraint->separation2)[j] = 0.0f; + ((float*)&constraint->normalImpulse2)[j] = 0.0f; + ((float*)&constraint->tangentImpulse2)[j] = 0.0f; + ((float*)&constraint->rA2.X)[j] = 0.0f; + ((float*)&constraint->rA2.Y)[j] = 0.0f; + ((float*)&constraint->rB2.X)[j] = 0.0f; + ((float*)&constraint->rB2.Y)[j] = 0.0f; + ((float*)&constraint->normalMass2)[j] = 0.0f; + ((float*)&constraint->tangentMass2)[j] = 0.0f; + ((float*)&constraint->relativeVelocity2)[j] = 0.0f; + } + } + else + { + // remainder + constraint->indexA[j] = B2_NULL_INDEX; + constraint->indexB[j] = B2_NULL_INDEX; + ((float*)&constraint->friction)[j] = 0.0f; + ((float*)&constraint->restitution)[j] = 0.0f; + ((float*)&constraint->impulseCoefficient)[j] = 0.0f; + ((float*)&constraint->massCoefficient)[j] = 0.0f; + ((float*)&constraint->biasCoefficient)[j] = 0.0f; + ((float*)&constraint->normal.X)[j] = 0.0f; + ((float*)&constraint->normal.Y)[j] = 0.0f; + + ((float*)&constraint->separation1)[j] = 0.0f; + ((float*)&constraint->normalImpulse1)[j] = 0.0f; + ((float*)&constraint->tangentImpulse1)[j] = 0.0f; + ((float*)&constraint->rA1.X)[j] = 0.0f; + ((float*)&constraint->rA1.Y)[j] = 0.0f; + ((float*)&constraint->rB1.X)[j] = 0.0f; + ((float*)&constraint->rB1.Y)[j] = 0.0f; + ((float*)&constraint->normalMass1)[j] = 0.0f; + ((float*)&constraint->tangentMass1)[j] = 0.0f; + ((float*)&constraint->relativeVelocity1)[j] = 0.0f; + + ((float*)&constraint->separation2)[j] = 0.0f; + ((float*)&constraint->normalImpulse2)[j] = 0.0f; + ((float*)&constraint->tangentImpulse2)[j] = 0.0f; + ((float*)&constraint->rA2.X)[j] = 0.0f; + ((float*)&constraint->rA2.Y)[j] = 0.0f; + ((float*)&constraint->rB2.X)[j] = 0.0f; + ((float*)&constraint->rB2.Y)[j] = 0.0f; + ((float*)&constraint->normalMass2)[j] = 0.0f; + ((float*)&constraint->tangentMass2)[j] = 0.0f; + ((float*)&constraint->relativeVelocity2)[j] = 0.0f; + } + } + } - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); + b2TracyCZoneEnd(prepare_contact); +} - vA = b2MulSub(vA, mA, b2Add(P1, P2)); - wA -= iA * (b2Cross(cp1->rA, P1) + b2Cross(cp2->rA, P2)); +void b2WarmStartContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +{ + b2TracyCZoneNC(warm_start_contact, "Warm Start", b2_colorGreen1, true); - vB = b2MulAdd(vB, mB, b2Add(P1, P2)); - wB += iB * (b2Cross(cp1->rB, P1) + b2Cross(cp2->rB, P2)); + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; - // Accumulate - cp1->normalImpulse = x.x; - cp2->normalImpulse = x.y; + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraintAVX* c = constraints + i; + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); - break; - } + __m256 tangentX = c->normal.Y; + __m256 tangentY = sub(_mm256_setzero_ps(), c->normal.X); - // No solution, give up. This is hit sometimes, but it doesn't seem to matter. - break; - } + { + b2Vec2W P; + P.X = add(mul(c->normalImpulse1, c->normal.X), mul(c->tangentImpulse1, tangentX)); + P.Y = add(mul(c->normalImpulse1, c->normal.Y), mul(c->tangentImpulse1, tangentY)); + bA.w = mulsub(bA.w, bA.invI, b2CrossW(c->rA1, P)); + bA.v.X = mulsub(bA.v.X, bA.invM, P.X); + bA.v.Y = mulsub(bA.v.Y, bA.invM, P.Y); + bB.w = muladd(bB.w, bB.invI, b2CrossW(c->rB1, P)); + bB.v.X = muladd(bB.v.X, bB.invM, P.X); + bB.v.Y = muladd(bB.v.Y, bB.invM, P.Y); } - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + { + b2Vec2W P; + P.X = add(mul(c->normalImpulse2, c->normal.X), mul(c->tangentImpulse2, tangentX)); + P.Y = add(mul(c->normalImpulse2, c->normal.Y), mul(c->tangentImpulse2, tangentY)); + bA.w = mulsub(bA.w, bA.invI, b2CrossW(c->rA2, P)); + bA.v.X = mulsub(bA.v.X, bA.invM, P.X); + bA.v.Y = mulsub(bA.v.Y, bA.invM, P.Y); + bB.w = muladd(bB.w, bB.invI, b2CrossW(c->rB2, P)); + bB.v.X = muladd(bB.v.X, bB.invM, P.X); + bB.v.Y = muladd(bB.v.Y, bB.invM, P.Y); + } + + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); } + + b2TracyCZoneEnd(warm_start_contact); } -void b2ContactSolver_ApplyRestitution(b2ContactSolver* solver) +void b2SolveContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias) { - int32_t count = solver->constraintCount; - float threshold = solver->context->restitutionThreshold; + b2TracyCZoneNC(solve_contact, "Solve Contact", b2_colorAliceBlue, true); - b2World* world = solver->world; - b2Body* bodies = world->bodies; + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; + float inv_dt = context->invTimeStep; + const float pushout = context->world->maximumPushoutVelocity; - for (int32_t i = 0; i < count; ++i) + for (int32_t i = startIndex; i < endIndex; ++i) { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; - const b2Contact* contact = vc->contact; + b2ContactConstraintAVX* c = constraints + i; - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); - if (vc->restitution == 0.0f) + __m256 biasCoeff, massCoeff, impulseCoeff; + if (useBias) { - continue; + biasCoeff = c->biasCoefficient; + massCoeff = c->massCoefficient; + impulseCoeff = c->impulseCoefficient; + } + else + { + biasCoeff = _mm256_setzero_ps(); + massCoeff = _mm256_set1_ps(1.0f); + impulseCoeff = _mm256_setzero_ps(); } - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; - int32_t pointCount = vc->pointCount; - - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Vec2 normal = vc->normal; + __m256 invDtMul = _mm256_set1_ps(inv_dt); + __m256 minBiasVel = _mm256_set1_ps(-pushout); - for (int32_t j = 0; j < pointCount; ++j) + // first point non-penetration constraint { - b2VelocityConstraintPoint* vcp = vc->points + j; + // Compute change in separation (small angle approximation of sin(angle) == angle) + __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB1.Y)), sub(bA.dp.X, mul(bA.da, c->rA1.Y))); + __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB1.X)), add(bA.dp.Y, mul(bA.da, c->rA1.X))); + __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); - // if the normal impulse is zero then there was no collision - if (vcp->relativeVelocity > -threshold || vcp->normalImpulse == 0.0f) - { - continue; - } + __m256 s = add(c->separation1, ds); + + __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); + __m256 specBias = mul(s, invDtMul); + __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(softBias, specBias, test); // Relative velocity at contact - b2Vec2 vrB = b2Add(vB, b2CrossSV(wB, vcp->rB)); - b2Vec2 vrA = b2Add(vA, b2CrossSV(wA, vcp->rA)); - b2Vec2 dv = b2Sub(vrB, vrA); + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); // Compute normal impulse - float vn = b2Dot(dv, normal); - float lambda = -vcp->normalMass * (vn + vc->restitution * vcp->relativeVelocity); - - // Apply contact impulse - b2Vec2 P = b2MulSV(lambda, normal); - vA = b2MulSub(vA, mA, P); - wA -= iA * b2Cross(vcp->rA, P); + __m256 negImpulse = add(mul(c->normalMass1, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse1)); + // float impulse = -cp->normalMass * massScale * (vn + bias) - impulseScale * cp->normalImpulse; - vB = b2MulAdd(vB, mB, P); - wB += iB * b2Cross(vcp->rB, P); - } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; - } -} + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse1, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse1); + c->normalImpulse1 = newImpulse; -void b2ContactSolver_StoreImpulses(b2ContactSolver* solver) -{ - int32_t count = solver->constraintCount; + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); - for (int32_t i = 0; i < count; ++i) - { - b2ContactVelocityConstraint* vc = solver->velocityConstraints + i; - b2Contact* contact = vc->contact; + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); - b2Manifold* manifold = &contact->manifold; + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); + } - for (int32_t j = 0; j < vc->pointCount; ++j) + // second point non-penetration constraint { - manifold->points[j].normalImpulse = vc->points[j].normalImpulse; - manifold->points[j].tangentImpulse = vc->points[j].tangentImpulse; - } - } -} + // Compute change in separation (small angle approximation of sin(angle) == angle) + __m256 prx = sub(sub(bB.dp.X, mul(bB.da, c->rB2.Y)), sub(bA.dp.X, mul(bA.da, c->rA2.Y))); + __m256 pry = sub(add(bB.dp.Y, mul(bB.da, c->rB2.X)), add(bA.dp.Y, mul(bA.da, c->rA2.X))); + __m256 ds = add(mul(prx, c->normal.X), mul(pry, c->normal.Y)); -bool b2ContactSolver_SolvePositionConstraintsBlock(b2ContactSolver* solver) -{ - float minSeparation = 0.0f; - int32_t count = solver->constraintCount; - float slop = b2_linearSlop; + __m256 s = add(c->separation2, ds); - b2World* world = solver->world; - b2Body* bodies = world->bodies; + __m256 test = _mm256_cmp_ps(s, _mm256_setzero_ps(), _CMP_GT_OQ); + __m256 specBias = mul(s, invDtMul); + __m256 softBias = _mm256_max_ps(mul(biasCoeff, s), minBiasVel); + __m256 bias = _mm256_blendv_ps(softBias, specBias, test); - for (int32_t i = 0; i < count; ++i) - { - b2ContactPositionConstraint* pc = solver->positionConstraints + i; - const b2Contact* contact = pc->contact; + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); - int32_t indexA = contact->edges[0].bodyIndex; - int32_t indexB = contact->edges[1].bodyIndex; - b2Body* bodyA = bodies + indexA; - b2Body* bodyB = bodies + indexB; + // Compute normal impulse + __m256 negImpulse = add(mul(c->normalMass2, mul(massCoeff, add(vn, bias))), mul(impulseCoeff, c->normalImpulse2)); - float mA = bodyA->invMass; - float iA = bodyA->invI; - float mB = bodyB->invMass; - float iB = bodyB->invI; + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse2); + c->normalImpulse2 = newImpulse; - int32_t pointCount = pc->pointCount; + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); - b2Vec2 cA = bodyA->position; - float aA = bodyA->angle; - b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - b2Vec2 normal = pc->normal; + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + } - if (pointCount == 2) + __m256 tangentX = c->normal.Y; + __m256 tangentY = sub(_mm256_setzero_ps(), c->normal.X); + // float friction = constraint->friction; + + // first point friction constraint { - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); - b2Vec2 rA1 = b2RotateVector(qA, pc->localAnchorsA[0]); - b2Vec2 rB1 = b2RotateVector(qB, pc->localAnchorsB[0]); - b2Vec2 rA2 = b2RotateVector(qA, pc->localAnchorsA[1]); - b2Vec2 rB2 = b2RotateVector(qB, pc->localAnchorsB[1]); + // Compute tangent force + __m256 negImpulse = mul(c->tangentMass1, vt); - // Current separation - b2Vec2 d1 = b2Sub(b2Add(cB, rB1), b2Add(cA, rA1)); - float separation1 = b2Dot(d1, normal) + pc->separations[0]; + // Clamp the accumulated force + __m256 maxFriction = mul(c->friction, c->normalImpulse1); + __m256 newImpulse = sub(c->tangentImpulse1, negImpulse); + newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); + __m256 impulse = sub(newImpulse, c->tangentImpulse1); + c->tangentImpulse1 = newImpulse; - b2Vec2 d2 = b2Sub(b2Add(cB, rB2), b2Add(cA, rA2)); - float separation2 = b2Dot(d2, normal) + pc->separations[1]; + // Apply contact impulse + __m256 Px = mul(impulse, tangentX); + __m256 Py = mul(impulse, tangentY); - // Track max constraint error. - minSeparation = B2_MIN(minSeparation, separation1); - minSeparation = B2_MIN(minSeparation, separation2); + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); - float C1 = B2_CLAMP(b2_baumgarte * (separation1 + slop), -b2_maxLinearCorrection, 0.0f); - float C2 = B2_CLAMP(b2_baumgarte * (separation2 + slop), -b2_maxLinearCorrection, 0.0f); + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); + } - b2Vec2 b = {C1, C2}; + // second point friction constraint + { + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vt = add(mul(dvx, tangentX), mul(dvy, tangentY)); - float rn1A = b2Cross(rA1, normal); - float rn1B = b2Cross(rB1, normal); - float rn2A = b2Cross(rA2, normal); - float rn2B = b2Cross(rB2, normal); + // Compute tangent force + __m256 negImpulse = mul(c->tangentMass2, vt); - float k11 = mA + mB + iA * rn1A * rn1A + iB * rn1B * rn1B; - float k22 = mA + mB + iA * rn2A * rn2A + iB * rn2B * rn2B; - float k12 = mA + mB + iA * rn1A * rn2A + iB * rn1B * rn2B; + // Clamp the accumulated force + __m256 maxFriction = mul(c->friction, c->normalImpulse2); + __m256 newImpulse = sub(c->tangentImpulse2, negImpulse); + newImpulse = _mm256_max_ps(sub(_mm256_setzero_ps(), maxFriction), _mm256_min_ps(newImpulse, maxFriction)); + __m256 impulse = sub(newImpulse, c->tangentImpulse2); + c->tangentImpulse2 = newImpulse; - b2Mat22 K, invK; + // Apply contact impulse + __m256 Px = mul(impulse, tangentX); + __m256 Py = mul(impulse, tangentY); - // Ensure a reasonable condition number. - const float k_maxConditionNumber = 10000.0f; - if (k11 * k11 < k_maxConditionNumber * (k11 * k22 - k12 * k12)) - { - // K is safe to invert. - K.cx = (b2Vec2){k11, k12}; - K.cy = (b2Vec2){k12, k22}; - invK = b2GetInverse22(K); - } - else - { - // The constraints are redundant, however one may be deeper than the other. - // This can happen when a capsule is deeply embedded in a box. - goto manifold_degenerate; - } + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - const float k_errorTol = 1e-3f; - B2_MAYBE_UNUSED(k_errorTol); + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + } - for (;;) - { - // - // Case 1: vn = 0 - // - // 0 = A * x + b' - // - // Solve for x: - // - // x = - inv(A) * b' - // - b2Vec2 x = b2Neg(b2MulMV(invK, b)); - - if (x.x >= 0.0f && x.y >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = x; + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); + } - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); + b2TracyCZoneEnd(solve_contact); +} - cA = b2MulSub(cA, mA, b2Add(P1, P2)); - aA -= iA * (b2Cross(rA1, P1) + b2Cross(rA2, P2)); +void b2ApplyRestitutionSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex) +{ + b2TracyCZoneNC(restitution, "Restitution", b2_colorDodgerBlue, true); - cB = b2MulAdd(cB, mB, b2Add(P1, P2)); - aB += iB * (b2Cross(rB1, P1) + b2Cross(rB2, P2)); - break; - } + b2SolverBody* bodies = context->solverBodies; + b2ContactConstraintAVX* constraints = context->graph->colors[colorIndex].contactConstraintAVXs; + b2FloatW threshold = _mm256_set1_ps(context->world->restitutionThreshold); + b2FloatW zero = _mm256_setzero_ps(); - // - // Case 2: vn1 = 0 and x2 = 0 - // - // 0 = a11 * x1 + a12 * 0 + b1' - // vn2 = a21 * x1 + a22 * 0 + b2' - // - x.x = -b.x / k11; - x.y = 0.0f; - float vn2 = K.cx.y * x.x + b.y; - if (x.x >= 0.0f && vn2 >= 0.0f) - { - // Get the incremental impulse - b2Vec2 d = x; + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2ContactConstraintAVX* c = constraints + i; - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); + b2SimdBody bA = b2GatherBodies(bodies, c->indexA); + b2SimdBody bB = b2GatherBodies(bodies, c->indexB); - cA = b2MulSub(cA, mA, b2Add(P1, P2)); - aA -= iA * (b2Cross(rA1, P1) + b2Cross(rA2, P2)); + // first point non-penetration constraint + { + // Set effective mass to zero if restitution should not be applied + __m256 test1 = _mm256_cmp_ps(add(c->relativeVelocity1, threshold), zero, _CMP_GT_OQ); + __m256 test2 = _mm256_cmp_ps(c->normalImpulse1, zero, _CMP_EQ_OQ); + __m256 test = _mm256_or_ps(test1, test2); + __m256 mass = _mm256_blendv_ps(c->normalMass1, zero, test); - cB = b2MulAdd(cB, mB, b2Add(P1, P2)); - aB += iB * (b2Cross(rB1, P1) + b2Cross(rB2, P2)); - break; - } + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB1.Y)), sub(bA.v.X, mul(bA.w, c->rA1.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB1.X)), add(bA.v.Y, mul(bA.w, c->rA1.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); - // - // Case 3: vn2 = 0 and x1 = 0 - // - // vn1 = a11 * 0 + a12 * x2 + b1' - // 0 = a21 * 0 + a22 * x2 + b2' - // - x.x = 0.0f; - x.y = -b.y / k22; - float vn1 = K.cy.x * x.y + b.x; - if (x.y >= 0.0f && vn1 >= 0.0f) - { - // Resubstitute for the incremental impulse - b2Vec2 d = x; + // Compute normal impulse + __m256 negImpulse = mul(mass, add(vn, mul(c->restitution, c->relativeVelocity1))); - // Apply incremental impulse - b2Vec2 P1 = b2MulSV(d.x, normal); - b2Vec2 P2 = b2MulSV(d.y, normal); + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse1, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse1); + c->normalImpulse1 = newImpulse; - cA = b2MulSub(cA, mA, b2Add(P1, P2)); - aA -= iA * (b2Cross(rA1, P1) + b2Cross(rA2, P2)); + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); - cB = b2MulAdd(cB, mB, b2Add(P1, P2)); - aB += iB * (b2Cross(rB1, P1) + b2Cross(rB2, P2)); - break; - } - break; - } + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA1.X, Py), mul(c->rA1.Y, Px)))); + + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB1.X, Py), mul(c->rB1.Y, Px)))); } - else + + // second point non-penetration constraint { - manifold_degenerate: - for (int32_t j = 0; j < pointCount; ++j) - { - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); + // Set effective mass to zero if restitution should not be applied + __m256 test1 = _mm256_cmp_ps(add(c->relativeVelocity2, threshold), zero, _CMP_GT_OQ); + __m256 test2 = _mm256_cmp_ps(c->normalImpulse2, zero, _CMP_EQ_OQ); + __m256 test = _mm256_or_ps(test1, test2); + __m256 mass = _mm256_blendv_ps(zero, c->normalMass2, test); - b2Vec2 rA = b2RotateVector(qA, pc->localAnchorsA[j]); - b2Vec2 rB = b2RotateVector(qB, pc->localAnchorsB[j]); + // Relative velocity at contact + __m256 dvx = sub(sub(bB.v.X, mul(bB.w, c->rB2.Y)), sub(bA.v.X, mul(bA.w, c->rA2.Y))); + __m256 dvy = sub(add(bB.v.Y, mul(bB.w, c->rB2.X)), add(bA.v.Y, mul(bA.w, c->rA2.X))); + __m256 vn = add(mul(dvx, c->normal.X), mul(dvy, c->normal.Y)); - // Current separation - b2Vec2 d = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - float separation = b2Dot(d, normal) + pc->separations[j]; + // Compute normal impulse + __m256 negImpulse = mul(mass, add(vn, mul(c->restitution, c->relativeVelocity2))); - // Track max constraint error. - minSeparation = B2_MIN(minSeparation, separation); + // Clamp the accumulated impulse + __m256 newImpulse = _mm256_max_ps(sub(c->normalImpulse2, negImpulse), _mm256_setzero_ps()); + __m256 impulse = sub(newImpulse, c->normalImpulse2); + c->normalImpulse2 = newImpulse; - // Prevent large corrections. Need to maintain a small overlap to avoid overshoot. - // This improves stacking stability significantly. - float C = B2_CLAMP(b2_baumgarte * (separation + slop), -b2_maxLinearCorrection, 0.0f); + // Apply contact impulse + __m256 Px = mul(impulse, c->normal.X); + __m256 Py = mul(impulse, c->normal.Y); - // Compute the effective mass. - float rnA = b2Cross(rA, normal); - float rnB = b2Cross(rB, normal); - float K = mA + mB + iA * rnA * rnA + iB * rnB * rnB; + bA.v.X = sub(bA.v.X, mul(bA.invM, Px)); + bA.v.Y = sub(bA.v.Y, mul(bA.invM, Py)); + bA.w = sub(bA.w, mul(bA.invI, sub(mul(c->rA2.X, Py), mul(c->rA2.Y, Px)))); - // Compute normal impulse - float impulse = K > 0.0f ? -C / K : 0.0f; + bB.v.X = add(bB.v.X, mul(bB.invM, Px)); + bB.v.Y = add(bB.v.Y, mul(bB.invM, Py)); + bB.w = add(bB.w, mul(bB.invI, sub(mul(c->rB2.X, Py), mul(c->rB2.Y, Px)))); + } - b2Vec2 P = b2MulSV(impulse, normal); + b2ScatterBodies(bodies, c->indexA, &bA); + b2ScatterBodies(bodies, c->indexB, &bB); + } - cA = b2MulSub(cA, mA, P); - aA -= iA * b2Cross(rA, P); + b2TracyCZoneEnd(restitution); +} - cB = b2MulAdd(cB, mB, P); - aB += iB * b2Cross(rB, P); - } - } +void b2StoreImpulsesSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(store_impulses, "Store", b2_colorFirebrick, true); + + b2Contact* contacts = context->world->contacts; + const b2ContactConstraintAVX* constraints = context->constraintAVXs; + const int32_t* indices = context->contactIndices; - bodyA->position = cA; - bodyA->angle = aA; - bodyB->position = cB; - bodyB->angle = aB; + b2Manifold dummy = {0}; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + const b2ContactConstraintAVX* c = constraints + i; + const float* normalImpulse1 = (float*)&c->normalImpulse1; + const float* normalImpulse2 = (float*)&c->normalImpulse2; + const float* tangentImpulse1 = (float*)&c->tangentImpulse1; + const float* tangentImpulse2 = (float*)&c->tangentImpulse2; + + const int32_t* base = indices + 8 * i; + int32_t index0 = base[0]; + int32_t index1 = base[1]; + int32_t index2 = base[2]; + int32_t index3 = base[3]; + int32_t index4 = base[4]; + int32_t index5 = base[5]; + int32_t index6 = base[6]; + int32_t index7 = base[7]; + + b2Manifold* m0 = index0 == B2_NULL_INDEX ? &dummy : &contacts[index0].manifold; + b2Manifold* m1 = index1 == B2_NULL_INDEX ? &dummy : &contacts[index1].manifold; + b2Manifold* m2 = index2 == B2_NULL_INDEX ? &dummy : &contacts[index2].manifold; + b2Manifold* m3 = index3 == B2_NULL_INDEX ? &dummy : &contacts[index3].manifold; + b2Manifold* m4 = index4 == B2_NULL_INDEX ? &dummy : &contacts[index4].manifold; + b2Manifold* m5 = index5 == B2_NULL_INDEX ? &dummy : &contacts[index5].manifold; + b2Manifold* m6 = index6 == B2_NULL_INDEX ? &dummy : &contacts[index6].manifold; + b2Manifold* m7 = index7 == B2_NULL_INDEX ? &dummy : &contacts[index7].manifold; + + m0->points[0].normalImpulse = normalImpulse1[0]; + m0->points[0].tangentImpulse = tangentImpulse1[0]; + m0->points[1].normalImpulse = normalImpulse2[0]; + m0->points[1].tangentImpulse = tangentImpulse2[0]; + + m1->points[0].normalImpulse = normalImpulse1[1]; + m1->points[0].tangentImpulse = tangentImpulse1[1]; + m1->points[1].normalImpulse = normalImpulse2[1]; + m1->points[1].tangentImpulse = tangentImpulse2[1]; + + m2->points[0].normalImpulse = normalImpulse1[2]; + m2->points[0].tangentImpulse = tangentImpulse1[2]; + m2->points[1].normalImpulse = normalImpulse2[2]; + m2->points[1].tangentImpulse = tangentImpulse2[2]; + + m3->points[0].normalImpulse = normalImpulse1[3]; + m3->points[0].tangentImpulse = tangentImpulse1[3]; + m3->points[1].normalImpulse = normalImpulse2[3]; + m3->points[1].tangentImpulse = tangentImpulse2[3]; + + m4->points[0].normalImpulse = normalImpulse1[4]; + m4->points[0].tangentImpulse = tangentImpulse1[4]; + m4->points[1].normalImpulse = normalImpulse2[4]; + m4->points[1].tangentImpulse = tangentImpulse2[4]; + + m5->points[0].normalImpulse = normalImpulse1[5]; + m5->points[0].tangentImpulse = tangentImpulse1[5]; + m5->points[1].normalImpulse = normalImpulse2[5]; + m5->points[1].tangentImpulse = tangentImpulse2[5]; + + m6->points[0].normalImpulse = normalImpulse1[6]; + m6->points[0].tangentImpulse = tangentImpulse1[6]; + m6->points[1].normalImpulse = normalImpulse2[6]; + m6->points[1].tangentImpulse = tangentImpulse2[6]; + + m7->points[0].normalImpulse = normalImpulse1[7]; + m7->points[0].tangentImpulse = tangentImpulse1[7]; + m7->points[1].normalImpulse = normalImpulse2[7]; + m7->points[1].tangentImpulse = tangentImpulse2[7]; } - // We can't expect minSpeparation >= -b2_linearSlop because we don't - // push the separation above -b2_linearSlop. - return minSeparation >= -3.0f * b2_linearSlop; + b2TracyCZoneEnd(store_impulses); } diff --git a/src/contact_solver.h b/src/contact_solver.h index 916008ac..22c2b0a6 100644 --- a/src/contact_solver.h +++ b/src/contact_solver.h @@ -4,40 +4,76 @@ #pragma once #include "solver_data.h" -#include "stack_allocator.h" -#include "box2d/callbacks.h" +#include -typedef struct b2ContactSolverDef +typedef struct b2Contact b2Contact; + +typedef struct b2ContactConstraintPoint +{ + b2Vec2 rA, rB; + float separation; + float relativeVelocity; + float normalImpulse; + float tangentImpulse; + float normalMass; + float tangentMass; +} b2ContactConstraintPoint; + +typedef struct b2ContactConstraint { - const b2StepContext* context; - struct b2World* world; - int32_t contactList; - int32_t contactCount; -} b2ContactSolverDef; + b2Contact* contact; + int32_t indexA; + int32_t indexB; + b2ContactConstraintPoint points[2]; + b2Vec2 normal; + float friction; + float restitution; + float massCoefficient; + float biasCoefficient; + float impulseCoefficient; + int32_t pointCount; +} b2ContactConstraint; + +// Wide float +typedef __m256 b2FloatW; -typedef struct b2ContactSolver +// Wide vec2 +typedef struct b2Vec2W { - const b2StepContext* context; - struct b2World* world; - struct b2ContactPositionConstraint* positionConstraints; - struct b2ContactVelocityConstraint* velocityConstraints; - int32_t contactList; - int32_t contactCount; - int32_t constraintCount; -} b2ContactSolver; - -b2ContactSolver* b2CreateContactSolver(b2ContactSolverDef* def); - -static inline void b2DestroyContactSolver(b2ContactSolver* solver, b2StackAllocator* alloc) + b2FloatW X, Y; +} b2Vec2W; + +typedef struct b2ContactConstraintSIMD { - b2FreeStackItem(alloc, solver->velocityConstraints); - b2FreeStackItem(alloc, solver->positionConstraints); - b2FreeStackItem(alloc, solver); -} - -void b2ContactSolver_Initialize(b2ContactSolver* solver); -void b2ContactSolver_SolveVelocityConstraints(b2ContactSolver* solver); -void b2ContactSolver_ApplyRestitution(b2ContactSolver* solver); -void b2ContactSolver_StoreImpulses(b2ContactSolver* solver); -bool b2ContactSolver_SolvePositionConstraintsBlock(b2ContactSolver* solver); + int32_t indexA[8]; + int32_t indexB[8]; + + b2Vec2W normal; + __m256 friction; + __m256 restitution; + b2Vec2W rA1, rB1; + b2Vec2W rA2, rB2; + __m256 separation1, separation2; + __m256 relativeVelocity1, relativeVelocity2; + __m256 normalImpulse1, normalImpulse2; + __m256 tangentImpulse1, tangentImpulse2; + __m256 normalMass1, tangentMass1; + __m256 normalMass2, tangentMass2; + __m256 massCoefficient; + __m256 biasCoefficient; + __m256 impulseCoefficient; +} b2ContactConstraintAVX; + +// Scalar +void b2PrepareOverflowContacts(b2SolverTaskContext* context); +void b2SolveOverflowContacts(b2SolverTaskContext* context, bool useBias); +void b2ApplyOverflowRestitution(b2SolverTaskContext* context); +void b2StoreOverflowImpulses(b2SolverTaskContext* context); + +// AVX versions +void b2PrepareContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); +void b2WarmStartContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2SolveContactsSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex, bool useBias); +void b2ApplyRestitutionSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context, int32_t colorIndex); +void b2StoreImpulsesSIMD(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context); diff --git a/src/core.h b/src/core.h index 6c45c8bd..f4b10183 100644 --- a/src/core.h +++ b/src/core.h @@ -75,7 +75,7 @@ #define B2_ASSERT(...) ((void)0) #endif -#if defined(_DEBUG) || 0 +#if defined(_DEBUG) #define B2_VALIDATE 1 #else #define B2_VALIDATE 0 diff --git a/src/dynamic_tree.c b/src/dynamic_tree.c index c57ff3a6..f9c117d6 100644 --- a/src/dynamic_tree.c +++ b/src/dynamic_tree.c @@ -13,6 +13,8 @@ #include #include +#define b2_treeStackSize 1024 + // TODO_ERIN // - try incrementally sorting internal nodes by height for better cache efficiency during depth first traversal. @@ -1138,8 +1140,6 @@ void b2DynamicTree_ShiftOrigin(b2DynamicTree* tree, b2Vec2 newOrigin) } } -#define b2_treeStackSize 256 - void b2DynamicTree_QueryFiltered(const b2DynamicTree* tree, b2AABB aabb, uint32_t maskBits, b2TreeQueryCallbackFcn* callback, void* context) { int32_t stack[b2_treeStackSize]; @@ -1169,10 +1169,8 @@ void b2DynamicTree_QueryFiltered(const b2DynamicTree* tree, b2AABB aabb, uint32_ } else { - B2_ASSERT(stackCount <= b2_treeStackSize - 2); - // TODO log this? - - if (stackCount <= b2_treeStackSize - 2) + B2_ASSERT(stackCount < b2_treeStackSize - 1); + if (stackCount < b2_treeStackSize - 1) { stack[stackCount++] = node->child1; stack[stackCount++] = node->child2; @@ -1211,10 +1209,8 @@ void b2DynamicTree_Query(const b2DynamicTree* tree, b2AABB aabb, b2TreeQueryCall } else { - B2_ASSERT(stackCount <= b2_treeStackSize - 2); - // TODO log this? - - if (stackCount <= b2_treeStackSize - 2) + B2_ASSERT(stackCount < b2_treeStackSize - 1); + if (stackCount < b2_treeStackSize - 1) { stack[stackCount++] = node->child1; stack[stackCount++] = node->child2; @@ -1310,10 +1306,8 @@ void b2DynamicTree_RayCast(const b2DynamicTree* tree, const b2RayCastInput* inpu } else { - B2_ASSERT(stackCount <= b2_treeStackSize - 2); - // TODO log this? - - if (stackCount <= b2_treeStackSize - 2) + B2_ASSERT(stackCount < b2_treeStackSize - 1); + if (stackCount < b2_treeStackSize - 1) { // TODO_ERIN just put one node on the stack, continue on a child node // TODO_ERIN test ordering children by nearest to ray origin diff --git a/src/graph.c b/src/graph.c new file mode 100644 index 00000000..a5c493bc --- /dev/null +++ b/src/graph.c @@ -0,0 +1,1483 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#include "graph.h" + +#include "allocate.h" +#include "array.h" +#include "body.h" +#include "contact.h" +#include "contact_solver.h" +#include "core.h" +#include "joint.h" +#include "shape.h" +#include "solver_data.h" +#include "stack_allocator.h" +#include "world.h" + +#include "box2d/aabb.h" + +#include +#include +#include +#include + +// Kinematic bodies have to be treated like dynamic bodies in graph coloring. Unlike static bodies, we cannot use a dummy solver body for +// kinematic bodies. We cannot access a kinematic body from multiple threads efficiently because the SIMD solver body scatter would write to +// the same kinematic body from multiple threads. Even if these writes don't modify the body, they will cause horrible cache stalls. To make +// this feasible I would need a way to block these writes. + +extern bool b2_parallel; + +typedef struct b2WorkerContext +{ + b2SolverTaskContext* context; + int32_t workerIndex; + void* userTask; +} b2WorkerContext; + +void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity) +{ + memset(graph, 0, sizeof(b2Graph)); + + bodyCapacity = B2_MAX(bodyCapacity, 8); + contactCapacity = B2_MAX(contactCapacity, 8); + jointCapacity = B2_MAX(jointCapacity, 8); + + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + color->bodySet = b2CreateBitSet(bodyCapacity); + b2SetBitCountAndClear(&color->bodySet, bodyCapacity); + + color->contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); + color->jointArray = b2CreateArray(sizeof(int32_t), jointCapacity); + + color->contactConstraints = NULL; + color->contactConstraintAVXs = NULL; + } + + graph->overflow.contactArray = b2CreateArray(sizeof(int32_t), contactCapacity); + graph->overflow.jointArray = b2CreateArray(sizeof(int32_t), jointCapacity); + graph->overflow.contactConstraints = NULL; +} + +void b2DestroyGraph(b2Graph* graph) +{ + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + b2DestroyBitSet(&color->bodySet); + b2DestroyArray(color->contactArray, sizeof(int32_t)); + b2DestroyArray(color->jointArray, sizeof(int32_t)); + } + + b2DestroyArray(graph->overflow.contactArray, sizeof(int32_t)); + b2DestroyArray(graph->overflow.jointArray, sizeof(int32_t)); +} + +void b2AddContactToGraph(b2World* world, b2Contact* contact) +{ + B2_ASSERT(contact->colorIndex == B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex == B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + int32_t bodyIndexA = contact->edges[0].bodyIndex; + int32_t bodyIndexB = contact->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA) || b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexA); + b2SetBitGrow(&color->bodySet, bodyIndexB); + + contact->colorSubIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + else if (typeA == b2_dynamicBody) + { + // Static contacts never in color 0 + for (int32_t i = 1; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexA); + + contact->colorSubIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + else if (typeB == b2_dynamicBody) + { + // Static contacts never in color 0 + for (int32_t i = 1; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexB); + + contact->colorSubIndex = b2Array(color->contactArray).count; + b2Array_Push(color->contactArray, contact->object.index); + contact->colorIndex = i; + break; + } + } + + // Overflow + if (contact->colorIndex == B2_NULL_INDEX) + { + contact->colorSubIndex = b2Array(graph->overflow.contactArray).count; + b2Array_Push(graph->overflow.contactArray, contact->object.index); + contact->colorIndex = b2_overflowIndex; + } +} + +void b2RemoveContactFromGraph(b2World* world, b2Contact* contact) +{ + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex != B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + // Overflow + if (contact->colorIndex == b2_overflowIndex) + { + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(graph->overflow.contactArray, colorSubIndex); + if (colorSubIndex < b2Array(graph->overflow.contactArray).count) + { + // Fix index on swapped contact + int32_t swappedIndex = graph->overflow.contactArray[colorSubIndex]; + B2_ASSERT(world->contacts[swappedIndex].colorIndex == b2_overflowIndex); + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; + } + + contact->colorIndex = B2_NULL_INDEX; + contact->colorSubIndex = B2_NULL_INDEX; + + return; + } + + B2_ASSERT(0 <= contact->colorIndex && contact->colorIndex < b2_graphColorCount); + int32_t bodyIndexA = contact->edges[0].bodyIndex; + int32_t bodyIndexB = contact->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + contact->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(color->contactArray, colorSubIndex); + if (colorSubIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedIndex = color->contactArray[colorSubIndex]; + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + b2ClearBit(&color->bodySet, bodyIndexB); + } + else if (typeA == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + contact->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(color->contactArray, colorSubIndex); + if (colorSubIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedIndex = color->contactArray[colorSubIndex]; + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + } + else if (typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + contact->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorSubIndex = contact->colorSubIndex; + b2Array_RemoveSwap(color->contactArray, colorSubIndex); + if (colorSubIndex < b2Array(color->contactArray).count) + { + // Fix index on swapped contact + int32_t swappedIndex = color->contactArray[colorSubIndex]; + world->contacts[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexB); + } + + contact->colorIndex = B2_NULL_INDEX; + contact->colorSubIndex = B2_NULL_INDEX; +} + +void b2AddJointToGraph(b2World* world, b2Joint* joint) +{ + B2_ASSERT(joint->colorIndex == B2_NULL_INDEX); + B2_ASSERT(joint->colorSubIndex == B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + int32_t bodyIndexA = joint->edges[0].bodyIndex; + int32_t bodyIndexB = joint->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA) || b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexA); + b2SetBitGrow(&color->bodySet, bodyIndexB); + + joint->colorSubIndex = b2Array(color->jointArray).count; + b2Array_Push(color->jointArray, joint->object.index); + joint->colorIndex = i; + break; + } + } + else if (typeA == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexA)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexA); + + joint->colorSubIndex = b2Array(color->jointArray).count; + b2Array_Push(color->jointArray, joint->object.index); + joint->colorIndex = i; + break; + } + } + else if (typeB == b2_dynamicBody) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + b2GraphColor* color = graph->colors + i; + if (b2GetBit(&color->bodySet, bodyIndexB)) + { + continue; + } + + b2SetBitGrow(&color->bodySet, bodyIndexB); + + joint->colorSubIndex = b2Array(color->jointArray).count; + b2Array_Push(color->jointArray, joint->object.index); + joint->colorIndex = i; + break; + } + } + + // TODO_ERIN handle joint overflow + + B2_ASSERT(joint->colorIndex != B2_NULL_INDEX && joint->colorSubIndex != B2_NULL_INDEX); +} + +void b2RemoveJointFromGraph(b2World* world, b2Joint* joint) +{ + B2_ASSERT(joint->colorIndex != B2_NULL_INDEX); + B2_ASSERT(joint->colorSubIndex != B2_NULL_INDEX); + + b2Graph* graph = &world->graph; + + B2_ASSERT(0 <= joint->colorIndex && joint->colorIndex < b2_graphColorCount); + int32_t bodyIndexA = joint->edges[0].bodyIndex; + int32_t bodyIndexB = joint->edges[1].bodyIndex; + + b2BodyType typeA = world->bodies[bodyIndexA].type; + b2BodyType typeB = world->bodies[bodyIndexB].type; + + if (typeA == b2_dynamicBody && typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + joint->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA) && b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorSubIndex = joint->colorSubIndex; + b2Array_RemoveSwap(color->jointArray, colorSubIndex); + if (colorSubIndex < b2Array(color->jointArray).count) + { + // Fix index on swapped joint + int32_t swappedIndex = color->jointArray[colorSubIndex]; + world->joints[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + b2ClearBit(&color->bodySet, bodyIndexB); + } + else if (typeA == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + joint->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexA)); + + int32_t colorSubIndex = joint->colorSubIndex; + b2Array_RemoveSwap(color->jointArray, colorSubIndex); + if (colorSubIndex < b2Array(color->jointArray).count) + { + // Fix index on swapped joint + int32_t swappedIndex = color->jointArray[colorSubIndex]; + world->joints[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexA); + } + else if (typeB == b2_dynamicBody) + { + b2GraphColor* color = graph->colors + joint->colorIndex; + B2_ASSERT(b2GetBit(&color->bodySet, bodyIndexB)); + + int32_t colorSubIndex = joint->colorSubIndex; + b2Array_RemoveSwap(color->jointArray, colorSubIndex); + if (colorSubIndex < b2Array(color->jointArray).count) + { + // Fix index on swapped joint + int32_t swappedIndex = color->jointArray[colorSubIndex]; + world->joints[swappedIndex].colorSubIndex = colorSubIndex; + } + + b2ClearBit(&color->bodySet, bodyIndexB); + } + + joint->colorIndex = B2_NULL_INDEX; + joint->colorSubIndex = B2_NULL_INDEX; +} + +static void b2IntegrateVelocitiesTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(integrate_velocity, "IntVel", b2_colorDeepPink, true); + + b2Vec2 gravity = context->world->gravity; + b2Body** bodies = context->awakeBodies; + b2SolverBody* solverBodies = context->solverBodies; + int32_t* bodyToSolverMap = context->bodyToSolverMap; + + float h = context->timeStep; + + // Integrate velocities and apply damping. Initialize the body state. + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2Body* body = bodies[i]; + //_m_prefetch(bodies[i + 1]); + + // create body map used to prepare constraints + B2_ASSERT(body->object.index < context->world->bodyPool.capacity); + bodyToSolverMap[body->object.index] = i; + + float invMass = body->invMass; + float invI = body->invI; + + b2Vec2 v = body->linearVelocity; + float w = body->angularVelocity; + + // Integrate velocities + v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(body->force, body->gravityScale * body->mass, gravity))); + w = w + h * invI * body->torque; + + // Apply damping. + // ODE: dv/dt + c * v = 0 + // Solution: v(t) = v0 * exp(-c * t) + // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) + // v2 = exp(-c * dt) * v1 + // Pade approximation: + // v2 = v1 * 1 / (1 + c * dt) + v = b2MulSV(1.0f / (1.0f + h * body->linearDamping), v); + w *= 1.0f / (1.0f + h * body->angularDamping); + + b2SolverBody* solverBody = solverBodies + i; + solverBody->linearVelocity = v; + solverBody->angularVelocity = w; + + solverBody->deltaAngle = 0.0f; + solverBody->deltaPosition = b2Vec2_zero; + + solverBody->invMass = invMass; + solverBody->invI = invI; + } + + b2TracyCZoneEnd(integrate_velocity); +} + +static void b2PrepareJointsTask(b2SolverTaskContext* context) +{ + b2World* world = context->world; + b2Joint* joints = world->joints; + int32_t jointCapacity = world->jointPool.capacity; + b2StepContext* stepContext = context->stepContext; + + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + b2PrepareJoint(joint, stepContext); + } +} + +static void b2SolveJointsTask(b2SolverTaskContext* context, bool useBias) +{ + b2World* world = context->world; + b2Joint* joints = world->joints; + int32_t jointCapacity = world->jointPool.capacity; + b2StepContext* stepContext = context->stepContext; + + for (int32_t i = 0; i < jointCapacity; ++i) + { + b2Joint* joint = joints + i; + if (b2ObjectValid(&joint->object) == false) + { + continue; + } + + b2SolveJointVelocity(joint, stepContext, useBias); + } +} + +static void b2IntegratePositionsTask(int32_t startIndex, int32_t endIndex, b2SolverTaskContext* context) +{ + b2TracyCZoneNC(integrate_positions, "IntPos", b2_colorDarkSeaGreen, true); + + b2SolverBody* bodies = context->solverBodies; + float h = context->subStep; + + B2_ASSERT(startIndex <= endIndex); + + for (int32_t i = startIndex; i < endIndex; ++i) + { + b2SolverBody* body = bodies + i; + body->deltaAngle += h * body->angularVelocity; + body->deltaPosition = b2MulAdd(body->deltaPosition, h, body->linearVelocity); + } + + b2TracyCZoneEnd(integrate_positions); +} + +static void b2FinalizeBodiesTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) +{ + b2TracyCZoneNC(finalize_bodies, "FinalizeBodies", b2_colorViolet, true); + + b2SolverTaskContext* context = taskContext; + b2World* world = context->world; + bool enableSleep = world->enableSleep; + b2Body* bodies = world->bodies; + const b2SolverBody* solverBodies = context->solverBodies; + b2Contact* contacts = world->contacts; + const int32_t* solverToBodyMap = context->solverToBodyMap; + const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; + float timeStep = context->timeStep; + + b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; + b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; + b2BitSet* awakeIslandBitSet = &world->taskContextArray[threadIndex].awakeIslandBitSet; + bool enableContinuous = world->enableContinuous; + + B2_ASSERT(startIndex <= endIndex); + B2_ASSERT(startIndex <= world->bodyPool.capacity); + B2_ASSERT(endIndex <= world->bodyPool.capacity); + + // Update sleep + const float linTolSqr = b2_linearSleepTolerance * b2_linearSleepTolerance; + const float angTolSqr = b2_angularSleepTolerance * b2_angularSleepTolerance; + + for (int32_t i = startIndex; i < endIndex; ++i) + { + const b2SolverBody* solverBody = solverBodies + i; + + int32_t bodyIndex = solverToBodyMap[i]; + b2Body* body = bodies + bodyIndex; + B2_ASSERT(b2ObjectValid(&body->object)); + + b2Vec2 v = solverBody->linearVelocity; + float w = solverBody->angularVelocity; + + body->linearVelocity = v; + body->angularVelocity = w; + + body->position = b2Add(body->position, solverBody->deltaPosition); + body->angle += solverBody->deltaAngle; + + body->transform.q = b2MakeRot(body->angle); + body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); + + body->force = b2Vec2_zero; + body->torque = 0.0f; + body->isFast = false; + + if (enableSleep == false || body->enableSleep == false || w * w > angTolSqr || b2Dot(v, v) > linTolSqr) + { + body->sleepTime = 0.0f; + + const float saftetyFactor = 0.5f; + if (enableContinuous && (b2Length(v) + B2_ABS(w) * body->maxExtent) * timeStep > saftetyFactor * body->minExtent) + { + // Store in fast array for the continuous collision stage + int fastIndex = atomic_fetch_add(&world->fastBodyCount, 1); + world->fastBodies[fastIndex] = bodyIndex; + body->isFast = true; + } + else + { + // Body is safe to advance + body->position0 = body->position; + body->angle0 = body->angle; + } + } + else + { + // Body is safe to advance + body->position0 = body->position; + body->angle0 = body->angle; + body->sleepTime += timeStep; + } + + // Any single body in an island can keep it awake + if (body->sleepTime < b2_timeToSleep) + { + B2_ASSERT(0 <= body->islandIndex && body->islandIndex < world->islandPool.capacity); + b2SetBit(awakeIslandBitSet, body->islandIndex); + } + + // Update shapes AABBs + bool isFast = body->isFast; + int32_t shapeIndex = body->shapeList; + while (shapeIndex != B2_NULL_INDEX) + { + b2Shape* shape = world->shapes + shapeIndex; + + B2_ASSERT(shape->isFast == false); + + if (isFast) + { + // The AABB is updated after continuous collision. + // Add to moved shapes regardless of AABB changes. + shape->isFast = true; + + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + else + { + shape->aabb = b2Shape_ComputeAABB(shape, body->transform); + + if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) + { + shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); + shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); + + // Bit-set to keep the move array sorted + b2SetBit(shapeBitSet, shapeIndex); + } + } + + shapeIndex = shape->nextShapeIndex; + } + + // Wake contacts + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) + { + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; + + // Bit set to prevent duplicates + b2SetBit(awakeContactBitSet, contactIndex); + contactKey = contact->edges[edgeIndex].nextKey; + } + } + + b2TracyCZoneEnd(finalize_bodies); +} + +static void b2ExecuteBlock(b2SolverStage* stage, b2SolverTaskContext* context, int32_t startIndex, int32_t endIndex) +{ + b2SolverStageType type = stage->type; + + switch (type) + { + case b2_stageIntegrateVelocities: + b2IntegrateVelocitiesTask(startIndex, endIndex, context); + break; + + case b2_stagePrepareContacts: + b2PrepareContactsSIMD(startIndex, endIndex, context); + break; + + case b2_stageWarmStartContacts: + b2WarmStartContactsSIMD(startIndex, endIndex, context, stage->colorIndex); + break; + + case b2_stagePrepareJoints: + break; + + case b2_stageSolveJoints: + break; + + case b2_stageSolveContacts: + b2SolveContactsSIMD(startIndex, endIndex, context, stage->colorIndex, true); + break; + + case b2_stageIntegratePositions: + b2IntegratePositionsTask(startIndex, endIndex, context); + break; + + case b2_stageCalmJoints: + break; + + case b2_stageCalmContacts: + b2SolveContactsSIMD(startIndex, endIndex, context, stage->colorIndex, false); + break; + + case b2_stageRestitution: + b2ApplyRestitutionSIMD(startIndex, endIndex, context, stage->colorIndex); + break; + + case b2_stageStoreImpulses: + b2StoreImpulsesSIMD(startIndex, endIndex, context); + break; + } +} + +static inline int32_t GetWorkerStartIndex(int32_t workerIndex, int32_t blockCount, int32_t workerCount) +{ + if (blockCount <= workerCount) + { + return workerIndex < blockCount ? workerIndex : B2_NULL_INDEX; + } + + int32_t blocksPerWorker = blockCount / workerCount; + int32_t remainder = blockCount - blocksPerWorker * workerCount; + return blocksPerWorker * workerIndex + B2_MIN(remainder, workerIndex); +} + +static void b2ExecuteStage(b2SolverStage* stage, b2SolverTaskContext* context, int previousSyncIndex, int syncIndex, int32_t workerIndex) +{ + int32_t completedCount = 0; + b2SolverBlock* blocks = stage->blocks; + int32_t blockCount = stage->blockCount; + + int32_t expectedSyncIndex = previousSyncIndex; + + int32_t startIndex = GetWorkerStartIndex(workerIndex, blockCount, context->workerCount); + if (startIndex == B2_NULL_INDEX) + { + return; + } + + B2_ASSERT(0 <= startIndex && startIndex < blockCount); + + int32_t blockIndex = startIndex; + + // Caution: this can change expectedSyncIndex + while (atomic_compare_exchange_strong(&blocks[blockIndex].syncIndex, &expectedSyncIndex, syncIndex) == true) + { + B2_ASSERT(stage->type != b2_stagePrepareContacts || syncIndex < 2); + + B2_ASSERT(completedCount < blockCount); + + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex); + + completedCount += 1; + blockIndex += 1; + if (blockIndex >= blockCount) + { + // Keep looking for work + blockIndex = 0; + } + + expectedSyncIndex = previousSyncIndex; + } + + // Search backwards for blocks + blockIndex = startIndex - 1; + while (true) + { + if (blockIndex < 0) + { + blockIndex = blockCount - 1; + } + + expectedSyncIndex = previousSyncIndex; + + // Caution: this can change expectedSyncIndex + if (atomic_compare_exchange_strong(&blocks[blockIndex].syncIndex, &expectedSyncIndex, syncIndex) == false) + { + break; + } + + b2ExecuteBlock(stage, context, blocks[blockIndex].startIndex, blocks[blockIndex].endIndex); + completedCount += 1; + blockIndex -= 1; + } + + (void)atomic_fetch_add(&stage->completionCount, completedCount); +} + +static void b2ExecuteMainStage(b2SolverStage* stage, b2SolverTaskContext* context, uint32_t syncBits) +{ + int32_t blockCount = stage->blockCount; + if (blockCount == 0) + { + return; + } + + if (blockCount == 1) + { + b2ExecuteBlock(stage, context, stage->blocks[0].startIndex, stage->blocks[0].endIndex); + } + else + { + atomic_store(&context->syncBits, syncBits); + + int syncIndex = (syncBits >> 16) & 0xFFFF; + B2_ASSERT(syncIndex > 0); + int previousSyncIndex = syncIndex - 1; + + b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, 0); + + while (atomic_load(&stage->completionCount) != blockCount) + { + _mm_pause(); + } + + atomic_store(&stage->completionCount, 0); + } +} + +// This should not use the thread index because thread 0 can be called twice by enkiTS. +void b2SolverTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndexDontUse, void* taskContext) +{ + B2_MAYBE_UNUSED(startIndex); + B2_MAYBE_UNUSED(endIndex); + B2_MAYBE_UNUSED(threadIndexDontUse); + + b2WorkerContext* workerContext = taskContext; + int32_t workerIndex = workerContext->workerIndex; + b2SolverTaskContext* context = workerContext->context; + int32_t activeColorCount = context->activeColorCount; + b2SolverStage* stages = context->stages; + + if (workerIndex == 0) + { + // Main thread synchronizes the workers and does work itself. + // + // Stages are re-used for loops so that I don't need more stages for large iteration counts. + // The sync indices grow monotonically for the body/graph/constraint groupings because they share solver blocks. + // The stage index and sync indices are combined in to sync bits for atomic synchronization. + // The workers need to compute the previous sync index for a given stage so that CAS works correctly. This + // setup makes this easy to do. + + /* + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageRestitution, + b2_stageStoreImpulses + */ + + int32_t bodySyncIndex = 1; + int32_t stageIndex = 0; + uint32_t syncBits = (bodySyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageIntegrateVelocities); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + bodySyncIndex += 1; + + uint32_t constraintSyncIndex = 1; + syncBits = (constraintSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareContacts); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + constraintSyncIndex += 1; + + int32_t graphSyncIndex = 1; + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageWarmStartContacts); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + stageIndex += 1; + } + graphSyncIndex += 1; + + // TODO_ERIN single threaded + B2_ASSERT(stages[stageIndex].type == b2_stagePrepareJoints); + b2PrepareJointsTask(context); + stageIndex += 1; + + b2PrepareOverflowContacts(context); + + int32_t velocityIterations = context->velocityIterations; + for (int32_t i = 0; i < velocityIterations; ++i) + { + // stage index restarted each iteration + int32_t iterStageIndex = stageIndex; + + // TODO_ERIN single threaded + B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveJoints); + b2SolveJointsTask(context, true); + iterStageIndex += 1; + + b2SolveOverflowContacts(context, true); + + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageSolveContacts); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + iterStageIndex += 1; + } + graphSyncIndex += 1; + + B2_ASSERT(stages[iterStageIndex].type == b2_stageIntegratePositions); + syncBits = (bodySyncIndex << 16) | iterStageIndex; + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + bodySyncIndex += 1; + } + + stageIndex += 1 + activeColorCount + 1; + + int32_t calmIterations = context->calmIterations; + for (int32_t i = 0; i < calmIterations; ++i) + { + // stage index restarted each iteration + int32_t iterStageIndex = stageIndex; + + B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmJoints); + b2SolveJointsTask(context, false); + iterStageIndex += 1; + + b2SolveOverflowContacts(context, false); + + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageCalmContacts); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + iterStageIndex += 1; + } + graphSyncIndex += 1; + } + + stageIndex += 1 + activeColorCount; + + // Restitution + { + b2ApplyOverflowRestitution(context); + + // stage index restarted each iteration + int32_t iterStageIndex = stageIndex; + for (int32_t colorIndex = 0; colorIndex < activeColorCount; ++colorIndex) + { + syncBits = (graphSyncIndex << 16) | iterStageIndex; + B2_ASSERT(stages[iterStageIndex].type == b2_stageRestitution); + b2ExecuteMainStage(stages + iterStageIndex, context, syncBits); + iterStageIndex += 1; + } + graphSyncIndex += 1; + } + + stageIndex += activeColorCount; + + b2StoreOverflowImpulses(context); + + syncBits = (constraintSyncIndex << 16) | stageIndex; + B2_ASSERT(stages[stageIndex].type == b2_stageStoreImpulses); + b2ExecuteMainStage(stages + stageIndex, context, syncBits); + + // Signal workers to finish + atomic_store(&context->syncBits, UINT_MAX); + + B2_ASSERT(stageIndex + 1 == context->stageCount); + return; + } + + // Worker + uint32_t lastSyncBits = 0; + + while (true) + { + // Spin until main thread bumps changes the sync bits + uint32_t syncBits = atomic_load(&context->syncBits); + while (syncBits == lastSyncBits) + { + _mm_pause(); + syncBits = atomic_load(&context->syncBits); + } + + if (syncBits == UINT_MAX) + { + // sentinel hit + break; + } + + int32_t stageIndex = syncBits & 0xFFFF; + B2_ASSERT(stageIndex < context->stageCount); + + int32_t syncIndex = (syncBits >> 16) & 0xFFFF; + B2_ASSERT(syncIndex > 0); + + int32_t previousSyncIndex = syncIndex - 1; + + b2SolverStage* stage = stages + stageIndex; + b2ExecuteStage(stage, context, previousSyncIndex, syncIndex, workerIndex); + + lastSyncBits = syncBits; + } +} + +// TODO_ERIN this comment is out of data +// Threading: +// 1. build array of awake bodies, maybe copy to contiguous array +// 2. parallel-for integrate velocities +// 3. parallel prepare constraints by color +// Loop sub-steps: +// 4. parallel solve constraints by color +// 5. parallel-for update position deltas (and positions on last iter) +// End Loop +// Loop bias-removal: +// 6. parallel solve constraints by color +// End loop +// 7. parallel-for store impulses +// 8. parallel-for update aabbs, build proxy update set, build awake contact set +void b2SolveGraph(b2World* world, b2StepContext* stepContext) +{ + b2TracyCZoneNC(prepare_stages, "Prepare Stages", b2_colorDarkOrange, true); + + b2Graph* graph = &world->graph; + b2GraphColor* colors = graph->colors; + + // Count awake bodies + int32_t awakeIslandCount = b2Array(world->awakeIslandArray).count; + int32_t awakeBodyCount = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + awakeBodyCount += island->bodyCount; + } + + // Prepare world to receive fast bodies from body finalization + // TODO_ERIN scope problem + world->fastBodyCount = 0; + world->fastBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "fast bodies"); + + if (awakeBodyCount == 0) + { + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + graph->occupancy[i] = b2Array(colors[i].contactArray).count; + } + graph->occupancy[b2_overflowIndex] = b2Array(graph->overflow.contactArray).count; + + return; + } + + // Reserve space for awake bodies + b2Body* bodies = world->bodies; + b2Body** awakeBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2Body*), "awake bodies"); + b2SolverBody* solverBodies = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(b2SolverBody), "solver bodies"); + + // Map from solver body to body + // TODO_ERIN have body directly reference solver body for user access + int32_t* solverToBodyMap = b2AllocateStackItem(world->stackAllocator, awakeBodyCount * sizeof(int32_t), "solver body map"); + + // Map from world body to solver body + // TODO_ERIN eliminate this? + int32_t bodyCapacity = world->bodyPool.capacity; + int32_t* bodyToSolverMap = b2AllocateStackItem(world->stackAllocator, bodyCapacity * sizeof(int32_t), "body map"); + memset(bodyToSolverMap, 0xFF, bodyCapacity * sizeof(int32_t)); + + // Build array of awake bodies + // Also search for an awake island to split + int32_t splitIslandIndex = B2_NULL_INDEX; + int32_t maxRemovedContacts = 0; + int32_t splitIslandBodyCount = 0; + int32_t index = 0; + for (int32_t i = 0; i < awakeIslandCount; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + b2Island* island = world->islands + islandIndex; + + if (island->constraintRemoveCount > maxRemovedContacts) + { + maxRemovedContacts = island->constraintRemoveCount; + splitIslandIndex = islandIndex; + splitIslandBodyCount = island->bodyCount; + } + + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = bodies + bodyIndex; + B2_ASSERT(b2ObjectValid(&body->object)); + B2_ASSERT(body->object.index == bodyIndex); + + awakeBodies[index] = body; + + B2_ASSERT(0 < bodyIndex && bodyIndex < bodyCapacity); + bodyToSolverMap[bodyIndex] = index; + solverToBodyMap[index] = bodyIndex; + + // cache miss + bodyIndex = body->islandNext; + + index += 1; + } + } + B2_ASSERT(index == awakeBodyCount); + + int32_t workerCount = world->workerCount; + const int32_t blocksPerWorker = 6; + + // Configure blocks for tasks that parallel-for bodies + int32_t bodyBlockSize = 1 << 5; + int32_t bodyBlockCount = ((awakeBodyCount - 1) >> 5) + 1; + if (awakeBodyCount > blocksPerWorker * bodyBlockSize * workerCount) + { + bodyBlockSize = awakeBodyCount / (blocksPerWorker * workerCount); + bodyBlockCount = blocksPerWorker * workerCount; + } + + int32_t activeColorIndices[b2_graphColorCount]; + int32_t colorConstraintCounts[b2_graphColorCount]; + int32_t colorBlockSize[b2_graphColorCount]; + int32_t colorBlockCounts[b2_graphColorCount]; + + int32_t activeColorCount = 0; + int32_t graphBlockCount = 0; + int32_t constraintCount = 0; + + int32_t c = 0; + for (int32_t i = 0; i < b2_graphColorCount; ++i) + { + int32_t count = b2Array(colors[i].contactArray).count; + graph->occupancy[i] = count; + + if (count > 0) + { + int32_t avxCount = ((count - 1) >> 3) + 1; + activeColorIndices[c] = i; + colorConstraintCounts[c] = avxCount; + + int32_t blockSize = 4; + int32_t blockCount = ((avxCount - 1) >> 2) + 1; + + colorBlockSize[c] = blockSize; + colorBlockCounts[c] = blockCount; + graphBlockCount += blockCount; + constraintCount += avxCount; + c += 1; + } + } + activeColorCount = c; + + b2ContactConstraintAVX* constraints = + b2AllocateStackItem(world->stackAllocator, constraintCount * sizeof(b2ContactConstraintAVX), "contact constraint"); + + int32_t* contactIndices = b2AllocateStackItem(world->stackAllocator, 8 * constraintCount * sizeof(int32_t), "contact indices"); + int32_t overflowContactCount = b2Array(graph->overflow.contactArray).count; + graph->occupancy[b2_overflowIndex] = overflowContactCount; + graph->overflow.contactConstraints = + b2AllocateStackItem(world->stackAllocator, overflowContactCount * sizeof(b2ContactConstraint), "overflow contact constraint"); + + int32_t base = 0; + for (int32_t i = 0; i < activeColorCount; ++i) + { + int32_t j = activeColorIndices[i]; + b2GraphColor* color = colors + j; + + color->contactConstraintAVXs = constraints + base; + + int32_t colorContactCount = b2Array(color->contactArray).count; + for (int32_t k = 0; k < colorContactCount; ++k) + { + contactIndices[8 * base + k] = color->contactArray[k]; + } + + // remainder + int32_t colorConstraintCount = colorConstraintCounts[i]; + for (int32_t k = colorContactCount; k < 8 * colorConstraintCount; ++k) + { + contactIndices[8 * base + k] = B2_NULL_INDEX; + } + + base += colorConstraintCount; + } + + int32_t storeBlockSize = 4; + int32_t storeBlockCount = constraintCount > 0 ? ((constraintCount - 1) >> 2) + 1 : 0; + if (constraintCount > blocksPerWorker * storeBlockSize * workerCount) + { + storeBlockSize = constraintCount / (blocksPerWorker * workerCount); + storeBlockCount = blocksPerWorker * workerCount; + } + + /* + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageFinalizePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageRestitution, + b2_stageStoreImpulses + */ + + // TODO_ERIN joint tasks + int32_t stageCount = 0; + + // b2_stageIntegrateVelocities + stageCount += 1; + // b2_stagePrepareContacts + stageCount += 1; + // b2_stageWarmStartContacts + stageCount += activeColorCount; + // b2_stagePrepareJoints + stageCount += 1; + // b2_stageSolveJoints, b2_stageSolveContacts, b2_stageIntegratePositions + stageCount += 1 + activeColorCount + 1; + // b2_stageCalmJoints, b2_stageCalmContacts + stageCount += 1 + activeColorCount; + // b2_stageRestitution + stageCount += activeColorCount; + // b2_stageStoreImpulses + stageCount += 1; + + b2SolverStage* stages = b2AllocateStackItem(world->stackAllocator, stageCount * sizeof(b2SolverStage), "stages"); + b2SolverBlock* bodyBlocks = b2AllocateStackItem(world->stackAllocator, bodyBlockCount * sizeof(b2SolverBlock), "body blocks"); + b2SolverBlock* graphBlocks = b2AllocateStackItem(world->stackAllocator, graphBlockCount * sizeof(b2SolverBlock), "graph blocks"); + b2SolverBlock* storeBlocks = b2AllocateStackItem(world->stackAllocator, storeBlockCount * sizeof(b2SolverBlock), "store blocks"); + + // TODO_ERIN cannot do this in parallel with FinalizeBodies + // Split an awake island. This modifies: + // - stack allocator + // - awake island array + // - island pool + // - island indices on bodies, contacts, and joints + // I'm squeezing this task in here because it may be expensive and this + // is a safe place to put it. + world->splitIslandIndex = splitIslandIndex; + void* splitIslandTask = NULL; + if (splitIslandIndex != B2_NULL_INDEX) + { + if (b2_parallel) + { + splitIslandTask = world->enqueueTaskFcn(&b2SplitIslandTask, 1, 1, world, world->userTaskContext); + } + else + { + b2SplitIslandTask(0, 1, 0, world); + world->splitIslandIndex = B2_NULL_INDEX; + } + } + + for (int32_t i = 0; i < bodyBlockCount; ++i) + { + b2SolverBlock* block = bodyBlocks + i; + block->startIndex = i * bodyBlockSize; + block->endIndex = block->startIndex + bodyBlockSize; + block->syncIndex = 0; + } + bodyBlocks[bodyBlockCount - 1].endIndex = awakeBodyCount; + + b2SolverBlock* colorBlocks[b2_graphColorCount]; + b2SolverBlock* baseGraphBlock = graphBlocks; + + for (int32_t i = 0; i < activeColorCount; ++i) + { + int32_t blockCount = colorBlockCounts[i]; + int32_t blockSize = colorBlockSize[i]; + for (int32_t j = 0; j < blockCount; ++j) + { + b2SolverBlock* block = baseGraphBlock + j; + block->startIndex = j * blockSize; + block->endIndex = block->startIndex + blockSize; + atomic_store(&block->syncIndex, 0); + } + baseGraphBlock[blockCount - 1].endIndex = colorConstraintCounts[i]; + + colorBlocks[i] = baseGraphBlock; + baseGraphBlock += blockCount; + } + + for (int32_t i = 0; i < storeBlockCount; ++i) + { + b2SolverBlock* block = storeBlocks + i; + block->startIndex = i * storeBlockSize; + block->endIndex = block->startIndex + storeBlockSize; + block->syncIndex = 0; + } + + if (storeBlockCount > 0) + { + storeBlocks[storeBlockCount - 1].endIndex = constraintCount; + } + + b2SolverStage* stage = stages; + + // Integrate velocities + stage->type = b2_stageIntegrateVelocities; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Prepare constraints + stage->type = b2_stagePrepareContacts; + stage->blocks = storeBlocks; + stage->blockCount = storeBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Warm start contacts + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageWarmStartContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Prepare joints + stage->type = b2_stagePrepareJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Solve joints + stage->type = b2_stageSolveJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Solve constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageSolveContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Integrate positions + stage->type = b2_stageIntegratePositions; + stage->blocks = bodyBlocks; + stage->blockCount = bodyBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Calm joints + stage->type = b2_stageCalmJoints; + stage->blocks = NULL; + stage->blockCount = 0; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + // Calm constraints + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageCalmContacts; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Restitution + for (int32_t i = 0; i < activeColorCount; ++i) + { + stage->type = b2_stageRestitution; + stage->blocks = colorBlocks[i]; + stage->blockCount = colorBlockCounts[i]; + stage->colorIndex = activeColorIndices[i]; + stage->completionCount = 0; + stage += 1; + } + + // Store impulses + stage->type = b2_stageStoreImpulses; + stage->blocks = storeBlocks; + stage->blockCount = storeBlockCount; + stage->colorIndex = -1; + stage->completionCount = 0; + stage += 1; + + B2_ASSERT((int32_t)(stage - stages) == stageCount); + + // TODO_ERIN + B2_ASSERT(workerCount <= 16); + b2WorkerContext workerContext[16]; + + int32_t velIters = B2_MAX(1, stepContext->velocityIterations); + + stepContext->solverBodies = solverBodies; + stepContext->solverToBodyMap = solverToBodyMap; + stepContext->bodyToSolverMap = bodyToSolverMap; + + b2SolverTaskContext context; + context.world = world; + context.graph = graph; + context.awakeBodies = awakeBodies; + context.solverBodies = solverBodies; + context.bodyToSolverMap = bodyToSolverMap; + context.solverToBodyMap = solverToBodyMap; + context.stepContext = stepContext; + context.constraints = NULL; + context.constraintAVXs = constraints; + context.contactIndices = contactIndices; + context.activeColorCount = activeColorCount; + context.velocityIterations = velIters; + context.calmIterations = stepContext->positionIterations; + context.workerCount = workerCount; + context.stageCount = stageCount; + context.stages = stages; + context.timeStep = stepContext->dt; + context.invTimeStep = stepContext->inv_dt; + context.subStep = context.timeStep / velIters; + context.invSubStep = velIters * stepContext->inv_dt; + context.syncBits = 0; + + b2TracyCZoneEnd(prepare_stages); + + // Must use worker index because thread 0 can be assigned multiple tasks by enkiTS + if (b2_parallel) + { + for (int32_t i = 0; i < workerCount; ++i) + { + workerContext[i].context = &context; + workerContext[i].workerIndex = i; + workerContext[i].userTask = world->enqueueTaskFcn(b2SolverTask, 1, 1, workerContext + i, world->userTaskContext); + } + } + else + { + // This relies on work stealing + for (int32_t i = 0; i < workerCount; ++i) + { + workerContext[i].context = &context; + workerContext[i].workerIndex = i; + workerContext[i].userTask = NULL; + b2SolverTask(0, 1, 0, workerContext + i); + } + } + + // Finish split + if (splitIslandTask != NULL) + { + world->finishTaskFcn(splitIslandTask, world->userTaskContext); + world->splitIslandIndex = B2_NULL_INDEX; + } + + // Finish solve + if (b2_parallel) + { + for (int32_t i = 0; i < workerCount; ++i) + { + world->finishTaskFcn(workerContext[i].userTask, world->userTaskContext); + } + } + + // Prepare contact, shape, and island bit sets used in body finalization. + int32_t contactCapacity = world->contactPool.capacity; + int32_t shapeCapacity = world->shapePool.capacity; + int32_t islandCapacity = world->islandPool.capacity + splitIslandBodyCount; + for (uint32_t i = 0; i < world->workerCount; ++i) + { + b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); + b2SetBitCountAndClear(&world->taskContextArray[i].awakeIslandBitSet, islandCapacity); + } + + // Finalize bodies. Must happen after the constraint solver and after island splitting. + void* finalizeBodiesTask = NULL; + if (b2_parallel) + { + finalizeBodiesTask = world->enqueueTaskFcn(b2FinalizeBodiesTask, awakeBodyCount, 16, &context, world->userTaskContext); + world->finishTaskFcn(finalizeBodiesTask, world->userTaskContext); + } + else + { + b2FinalizeBodiesTask(0, awakeBodyCount, 0, &context); + } + + b2FreeStackItem(world->stackAllocator, storeBlocks); + b2FreeStackItem(world->stackAllocator, graphBlocks); + b2FreeStackItem(world->stackAllocator, bodyBlocks); + b2FreeStackItem(world->stackAllocator, stages); + b2FreeStackItem(world->stackAllocator, graph->overflow.contactConstraints); + b2FreeStackItem(world->stackAllocator, contactIndices); + b2FreeStackItem(world->stackAllocator, constraints); + b2FreeStackItem(world->stackAllocator, bodyToSolverMap); + b2FreeStackItem(world->stackAllocator, solverToBodyMap); + b2FreeStackItem(world->stackAllocator, solverBodies); + b2FreeStackItem(world->stackAllocator, awakeBodies); +} diff --git a/src/graph.h b/src/graph.h new file mode 100644 index 00000000..07ac79d4 --- /dev/null +++ b/src/graph.h @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#pragma once + +#include "array.h" +#include "bitset.h" +#include "box2d/constants.h" + +typedef struct b2Contact b2Contact; +typedef struct b2ContactConstraint b2ContactConstraint; +typedef struct b2ContactConstraintSIMD b2ContactConstraintAVX; +typedef struct b2Joint b2Joint; +typedef struct b2StepContext b2StepContext; +typedef struct b2World b2World; + +#define b2_overflowIndex b2_graphColorCount + +typedef struct b2GraphColor +{ + b2BitSet bodySet; + int32_t* contactArray; + int32_t* jointArray; + + // transient + b2ContactConstraint* contactConstraints; + + // TODO_ERIN these could be split up by worker so that workers get a contiguous array of constraints across colors + b2ContactConstraintAVX* contactConstraintAVXs; +} b2GraphColor; + +// This holds constraints that cannot fit the graph color limit. This happens when a single dynamic body +// is touching many other bodies. +typedef struct +{ + int32_t* contactArray; + int32_t* jointArray; + b2ContactConstraint* contactConstraints; +} b2GraphOverflow; + +typedef struct b2Graph +{ + b2GraphColor colors[b2_graphColorCount]; + int32_t colorCount; + + // debug info + int32_t occupancy[b2_graphColorCount + 1]; + + b2GraphOverflow overflow; +} b2Graph; + +void b2CreateGraph(b2Graph* graph, int32_t bodyCapacity, int32_t contactCapacity, int32_t jointCapacity); +void b2DestroyGraph(b2Graph* graph); + +void b2AddContactToGraph(b2World* world, b2Contact* contact); +void b2RemoveContactFromGraph(b2World* world, b2Contact* contact); + +void b2AddJointToGraph(b2World* world, b2Joint* joint); +void b2RemoveJointFromGraph(b2World* world, b2Joint* joint); + +void b2SolveGraph(b2World* world, b2StepContext* stepContext); diff --git a/src/island.c b/src/island.c index cad6e0cd..141a673f 100644 --- a/src/island.c +++ b/src/island.c @@ -21,7 +21,6 @@ #include #include #include -#include /* Position Correction Notes @@ -140,14 +139,26 @@ void b2CreateIsland(b2Island* island) island->parentIsland = B2_NULL_INDEX; island->awakeIndex = B2_NULL_INDEX; island->constraintRemoveCount = 0; - island->maySplit = false; - island->stepContext = NULL; - island->contactSolver = NULL; } void b2DestroyIsland(b2Island* island) { - B2_MAYBE_UNUSED(island); + // Remove from awake islands array + if (island->awakeIndex != B2_NULL_INDEX) + { + b2World* world = island->world; + int32_t islandCount = b2Array(world->awakeIslandArray).count; + B2_ASSERT(islandCount > 0); + b2Array_RemoveSwap(world->awakeIslandArray, island->awakeIndex); + if (island->awakeIndex < islandCount - 1) + { + // Fix awake index on swapped island + int32_t swappedIslandIndex = world->awakeIslandArray[island->awakeIndex]; + world->islands[swappedIslandIndex].awakeIndex = island->awakeIndex; + } + } + + b2FreeObject(&island->world->islandPool, &island->object); } static void b2AddContactToIsland(b2World* world, b2Island* island, b2Contact* contact) @@ -172,7 +183,7 @@ static void b2AddContactToIsland(b2World* world, b2Island* island, b2Contact* co island->contactCount += 1; contact->islandIndex = island->object.index; - b2ValidateIsland(island); + b2ValidateIsland(island, false); } void b2WakeIsland(b2Island* island) @@ -181,12 +192,45 @@ void b2WakeIsland(b2Island* island) if (island->awakeIndex != B2_NULL_INDEX) { + // already awake B2_ASSERT(world->awakeIslandArray[island->awakeIndex] == island->object.index); return; } + int32_t islandIndex = island->object.index; island->awakeIndex = b2Array(world->awakeIslandArray).count; - b2Array_Push(world->awakeIslandArray, island->object.index); + b2Array_Push(world->awakeIslandArray, islandIndex); + + // Reset sleep timers on bodies + // TODO_ERIN make this parallel somehow? + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = world->bodies + bodyIndex; + B2_ASSERT(body->islandIndex == islandIndex); + body->sleepTime = 0.0f; + bodyIndex = body->islandNext; + } + + // Add constraints to graph + int32_t contactIndex = island->headContact; + while (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = world->contacts + contactIndex; + B2_ASSERT(contact->islandIndex == islandIndex); + b2AddContactToGraph(world, contact); + contactIndex = contact->islandNext; + } + + int32_t jointIndex = island->headJoint; + while (jointIndex != B2_NULL_INDEX) + { + b2Joint* joint = world->joints + jointIndex; + B2_ASSERT(joint->islandIndex == islandIndex); + // TODO_JOINT_GRAPH + // b2AddJointToGraph(world, joint); + jointIndex = joint->islandNext; + } } // https://en.wikipedia.org/wiki/Disjoint-set_data_structure @@ -311,6 +355,8 @@ void b2UnlinkContact(b2World* world, b2Contact* contact) contact->islandIndex = B2_NULL_INDEX; contact->islandPrev = B2_NULL_INDEX; contact->islandNext = B2_NULL_INDEX; + + b2ValidateIsland(island, false); } static void b2AddJointToIsland(b2World* world, b2Island* island, b2Joint* joint) @@ -335,7 +381,7 @@ static void b2AddJointToIsland(b2World* world, b2Island* island, b2Joint* joint) island->jointCount += 1; joint->islandIndex = island->object.index; - b2ValidateIsland(island); + b2ValidateIsland(island, false); } void b2LinkJoint(b2World* world, b2Joint* joint) @@ -456,6 +502,8 @@ void b2UnlinkJoint(b2World* world, b2Joint* joint) joint->islandIndex = B2_NULL_INDEX; joint->islandPrev = B2_NULL_INDEX; joint->islandNext = B2_NULL_INDEX; + + b2ValidateIsland(island, false); } // Merge an island into its root island. @@ -567,7 +615,7 @@ static int32_t b2MergeIsland(b2Island* island) // Merging a dirty islands means that splitting may still be needed rootIsland->constraintRemoveCount += island->constraintRemoveCount; - b2ValidateIsland(rootIsland); + b2ValidateIsland(rootIsland, true); return rootIsland->bodyCount; } @@ -623,18 +671,7 @@ void b2MergeAwakeIslands(b2World* world) int32_t mergedBodyCount = b2MergeIsland(island); maxBodyCount = B2_MAX(maxBodyCount, mergedBodyCount); - int32_t count = b2Array(world->awakeIslandArray).count; - int32_t awakeIndex = island->awakeIndex; - b2Array_RemoveSwap(world->awakeIslandArray, awakeIndex); - if (awakeIndex < count - 1) - { - // Fix awake index on swapped island - int32_t swappedIslandIndex = world->awakeIslandArray[awakeIndex]; - world->islands[swappedIslandIndex].awakeIndex = awakeIndex; - } - b2DestroyIsland(island); - b2FreeObject(&world->islandPool, &island->object); } // Step 3: ensure island pool has sufficient space to split the largest island @@ -642,114 +679,31 @@ void b2MergeAwakeIslands(b2World* world) world->islands = (b2Island*)world->islandPool.memory; } -static int b2CompareIslands(const void* A, const void* B) -{ - const b2Island* islandA = *(const b2Island**)A; - const b2Island* islandB = *(const b2Island**)B; - return islandB->bodyCount - islandA->bodyCount; -} - #define B2_CONTACT_REMOVE_THRESHOLD 1 -// Sort islands so that the largest islands are solved first to avoid -// long tails in the island parallel-for loop. -void b2SortIslands(b2World* world, b2Island** islands, int32_t count) -{ - // Sort descending order (largest island first) - qsort(islands, count, sizeof(b2Island*), b2CompareIslands); - - // Look for an island to split. Large islands have priority. - world->splitIslandIndex = B2_NULL_INDEX; - for (int32_t i = 0; i < count; ++i) - { - if (islands[i]->constraintRemoveCount >= B2_CONTACT_REMOVE_THRESHOLD) - { - // This and only this island may split this time step - islands[i]->maySplit = true; - world->splitIslandIndex = islands[i]->object.index; - break; - } - } -} - -void b2PrepareIsland(b2Island* island, b2StepContext* stepContext) -{ - island->stepContext = stepContext; - - b2ContactSolverDef contactSolverDef; - contactSolverDef.context = island->stepContext; - contactSolverDef.world = island->world; - contactSolverDef.contactList = island->headContact; - contactSolverDef.contactCount = island->contactCount; - island->contactSolver = b2CreateContactSolver(&contactSolverDef); -} - -#if 0 -if (island->bodyCount > 16) +// Split an island because some contacts and/or joints have been removed. +// This is called during the constraint solve while islands are not being touched. This uses DFS and touches a lot of memory, +// so it can be quite slow. +// Note: contacts/joints connected to static bodies must belong to an island but don't affect island connectivity +// Note: static bodies are never in an island +// Note: this task interacts with some allocators without locks under the assumption that no other tasks +// are interacting with these data structures. +void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context) { - int32_t k = 4; - b2Vec2 clusterCenters[4] = { 0 }; - int32_t clusterCounts[4] = { 0 }; - int32_t m = island->bodyCount / k; + b2TracyCZoneNC(split, "Split Island", b2_colorHoneydew2, true); - // seed cluster positions - for (int32_t i = 0; i < k; ++i) - { - int32_t j = (i * m) % island->bodyCount; - clusterCenters[i] = island->bodies[j]->position; - } + B2_MAYBE_UNUSED(startIndex); + B2_MAYBE_UNUSED(endIndex); + B2_MAYBE_UNUSED(threadIndex); - for (int32_t i = 0; i < island->bodyCount; ++i) - { - b2Body* b = island->bodies[i]; - b2Vec2 p = b->position; - float bestDist = b2DistanceSquared(clusterCenters[0], p); - b->cluster = 0; + b2World* world = context; - for (int32_t j = 1; j < k; ++j) - { - float dist = b2DistanceSquared(clusterCenters[j], p); - if (dist < bestDist) - { - bestDist = dist; - b->cluster = j; - } - } - } + B2_ASSERT(world->splitIslandIndex != B2_NULL_INDEX); - int32_t maxIter = 4; - for (int32_t iter = 0; iter < maxIter; ++iter) - { - // reset clusters - for (int32_t i = 0; i < k; ++i) - { - clusterCenters[i] = b2Vec2_zero; - clusterCounts[i] = 0; - } + b2Island* baseIsland = world->islands + world->splitIslandIndex; - // computer new clusters - for (int32_t i = 0; i < island->bodyCount; ++i) - { - b2Body* b = island->bodies[i]; - int32_t j = b->cluster; - clusterCenters[j] = b2Add(clusterCenters[j], b->position); - clusterCounts[j] += 1; - } - } -} -#endif + b2ValidateIsland(baseIsland, true); -// Split an island because some contacts and/or joints have been removed -// Note: contacts/joints connecting to static bodies must belong to an island but don't affect island connectivity -// Note: static bodies are never in an island -// TODO_ERIN I think this can be done during collision -static void b2SplitIsland(b2Island* baseIsland) -{ - b2TracyCZoneNC(split, "Split Island", b2_colorHoneydew2, true); - - b2ValidateIsland(baseIsland); - - b2World* world = baseIsland->world; int32_t bodyCount = baseIsland->bodyCount; b2Body* bodies = world->bodies; @@ -758,7 +712,7 @@ static void b2SplitIsland(b2Island* baseIsland) b2StackAllocator* alloc = world->stackAllocator; - // No lock is needed because only one island can split per time step. + // No lock is needed because I ensure these are not used while this task is active. int32_t* stack = b2AllocateStackItem(alloc, bodyCount * sizeof(int32_t), "island stack"); int32_t* bodyIndices = b2AllocateStackItem(alloc, bodyCount * sizeof(int32_t), "body indices"); @@ -784,7 +738,7 @@ static void b2SplitIsland(b2Island* baseIsland) while (nextContact != B2_NULL_INDEX) { b2Contact* contact = contacts + nextContact; - contact->flags &= ~b2_contactIslandFlag; + contact->isMarked = false; nextContact = contact->islandNext; } @@ -797,6 +751,10 @@ static void b2SplitIsland(b2Island* baseIsland) nextJoint = joint->islandNext; } + // Done with the base split island. + b2DestroyIsland(baseIsland); + baseIsland = NULL; + // Each island is found as a depth first search starting from a seed body for (int32_t i = 0; i < bodyCount; ++i) { @@ -867,7 +825,7 @@ static void b2SplitIsland(b2Island* baseIsland) contactKey = contact->edges[edgeIndex].nextKey; // Has this contact already been added to this island? - if (contact->flags & b2_contactIslandFlag) + if (contact->isMarked) { continue; } @@ -884,7 +842,7 @@ static void b2SplitIsland(b2Island* baseIsland) continue; } - contact->flags |= b2_contactIslandFlag; + contact->isMarked = true; int32_t otherEdgeIndex = edgeIndex ^ 1; int32_t otherBodyIndex = contact->edges[otherEdgeIndex].bodyIndex; @@ -974,8 +932,13 @@ static void b2SplitIsland(b2Island* baseIsland) } } - b2ValidateIsland(island); - b2Array_Push(world->splitIslandArray, island->object.index); + // For consistency, this island must be added to the awake island array. This should + // be safe because no other task is accessing this and the solver has already gathered + // all awake bodies. + island->awakeIndex = b2Array(world->awakeIslandArray).count; + b2Array_Push(world->awakeIslandArray, islandIndex); + + b2ValidateIsland(island, true); } b2FreeStackItem(alloc, bodyIndices); @@ -984,413 +947,21 @@ static void b2SplitIsland(b2Island* baseIsland) b2TracyCZoneEnd(split); } -// This must be thread safe -void b2SolveIsland(b2Island* island, uint32_t threadIndex) -{ - b2World* world = island->world; - b2Body* bodies = world->bodies; - b2StepContext* context = island->stepContext; - b2Joint* joints = world->joints; - - b2Vec2 gravity = world->gravity; - - float h = context->dt; - - // Integrate velocities and apply damping. Initialize the body state. - int32_t bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - - float invMass = b->invMass; - float invI = b->invI; - - if (b->type == b2_dynamicBody) - { - b2Vec2 v = b->linearVelocity; - float w = b->angularVelocity; - - // Integrate velocities - v = b2Add(v, b2MulSV(h * invMass, b2MulAdd(b->force, b->gravityScale * b->mass, gravity))); - w = w + h * invI * b->torque; - - // Apply damping. - // ODE: dv/dt + c * v = 0 - // Solution: v(t) = v0 * exp(-c * t) - // Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt) - // v2 = exp(-c * dt) * v1 - // Pade approximation: - // v2 = v1 * 1 / (1 + c * dt) - v = b2MulSV(1.0f / (1.0f + h * b->linearDamping), v); - w *= 1.0f / (1.0f + h * b->angularDamping); - - b->linearVelocity = v; - b->angularVelocity = w; - } - - bodyIndex = b->islandNext; - } - - // Solver data - b2ContactSolver_Initialize(island->contactSolver); - - int32_t jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = joints + jointIndex; - b2InitVelocityConstraints(joint, context); - jointIndex = joint->islandNext; - } - - b2TracyCZoneNC(velc, "Velocity Constraints", b2_colorCadetBlue, true); - // Solve velocity constraints - for (int32_t i = 0; i < context->velocityIterations; ++i) - { - jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = joints + jointIndex; - b2SolveVelocityConstraints(joint, context); - jointIndex = joint->islandNext; - } - - b2ContactSolver_SolveVelocityConstraints(island->contactSolver); - } - b2TracyCZoneEnd(velc); - - // Special handling for restitution - b2ContactSolver_ApplyRestitution(island->contactSolver); - - // Store impulses for warm starting - b2ContactSolver_StoreImpulses(island->contactSolver); - - // Integrate positions - bool enableContinuous = world->enableContinuous; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - - b2Vec2 c = b->position; - float a = b->angle; - b2Vec2 v = b->linearVelocity; - float w = b->angularVelocity; - - // Clamp large velocities - b2Vec2 translation = b2MulSV(h, v); - if (b2Dot(translation, translation) > b2_maxTranslationSquared) - { - float ratio = b2_maxTranslation / b2Length(translation); - v = b2MulSV(ratio, v); - } - - float rotation = h * w; - if (rotation * rotation > b2_maxRotationSquared) - { - float ratio = b2_maxRotation / B2_ABS(rotation); - w *= ratio; - } - - // Integrate - c = b2MulAdd(c, h, v); - a += h * w; - - b->position = c; - b->angle = a; - b->linearVelocity = v; - b->angularVelocity = w; - - const float saftetyFactor = 0.5f; - if (enableContinuous && (b2Length(v) + B2_ABS(w) * b->maxExtent) * h > saftetyFactor * b->minExtent) - { - // Store in fast array for the continuous collision stage - int fastIndex = atomic_fetch_add(&world->fastBodyCount, 1); - world->fastBodies[fastIndex] = bodyIndex; - b->isFast = true; - } - else - { - // Body is safe to advance - b->isFast = false; - b->position0 = b->position; - b->angle0 = b->angle; - } - - bodyIndex = b->islandNext; - } - - b2TracyCZoneNC(posc, "Position Constraints", b2_colorBurlywood, true); - - // Solve position constraints - bool positionSolved = false; - for (int32_t i = 0; i < context->positionIterations; ++i) - { - bool contactsOkay = b2ContactSolver_SolvePositionConstraintsBlock(island->contactSolver); - - bool jointsOkay = true; - jointIndex = island->headJoint; - while (jointIndex != B2_NULL_INDEX) - { - b2Joint* joint = joints + jointIndex; - - bool jointOkay = b2SolvePositionConstraints(joint, context); - jointsOkay = jointsOkay && jointOkay; - - jointIndex = joint->islandNext; - } - - if (contactsOkay && jointsOkay) - { - // Exit early if the position errors are small. - positionSolved = true; - break; - } - } - - b2TracyCZoneEnd(posc); - - b2TracyCZoneNC(sleep, "Sleep", b2_colorSalmon2, true); - - // Update transform - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* body = bodies + bodyIndex; - body->transform.q = b2MakeRot(body->angle); - body->transform.p = b2Sub(body->position, b2RotateVector(body->transform.q, body->localCenter)); - bodyIndex = body->islandNext; - } - - // Update sleep - bool isIslandAwake = true; - - // Don't allow an island that will be split to fall asleep just yet - if (world->enableSleep && island->maySplit == false) - { - float minSleepTime = FLT_MAX; - - const float linTolSqr = b2_linearSleepTolerance * b2_linearSleepTolerance; - const float angTolSqr = b2_angularSleepTolerance * b2_angularSleepTolerance; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - - if (b->enableSleep == false || b->angularVelocity * b->angularVelocity > angTolSqr || - b2Dot(b->linearVelocity, b->linearVelocity) > linTolSqr) - { - b->sleepTime = 0.0f; - minSleepTime = 0.0f; - } - else - { - b->sleepTime += h; - minSleepTime = B2_MIN(minSleepTime, b->sleepTime); - } - - bodyIndex = b->islandNext; - } - - if (minSleepTime >= b2_timeToSleep && positionSolved) - { - isIslandAwake = false; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* b = bodies + bodyIndex; - B2_ASSERT(b->isFast == false); - - b->sleepTime = 0.0f; - b->linearVelocity = b2Vec2_zero; - b->angularVelocity = 0.0f; - b->force = b2Vec2_zero; - b->torque = 0.0f; - - bodyIndex = b->islandNext; - } - } - } - - if (isIslandAwake == false) - { - // This signals that this island should not be added to awake island array - island->awakeIndex = B2_NULL_INDEX; - } - else - { - b2Contact* contacts = world->contacts; - const b2Vec2 aabbMargin = {b2_aabbMargin, b2_aabbMargin}; - b2BitSet* awakeContactBitSet = &world->taskContextArray[threadIndex].awakeContactBitSet; - b2BitSet* shapeBitSet = &world->taskContextArray[threadIndex].shapeBitSet; - - bodyIndex = island->headBody; - while (bodyIndex != B2_NULL_INDEX) - { - b2Body* body = bodies + bodyIndex; - - body->force = b2Vec2_zero; - body->torque = 0.0f; - - bool isFast = body->isFast; - - // Update shapes AABBs - int32_t shapeIndex = body->shapeList; - while (shapeIndex != B2_NULL_INDEX) - { - b2Shape* shape = world->shapes + shapeIndex; - - B2_ASSERT(shape->isFast == false); - - if (isFast) - { - // The AABB is updated after continuous collision. - // Add to moved shapes regardless of AABB changes. - shape->isFast = true; - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - else - { - shape->aabb = b2Shape_ComputeAABB(shape, body->transform); - - if (b2AABB_Contains(shape->fatAABB, shape->aabb) == false) - { - shape->fatAABB.lowerBound = b2Sub(shape->aabb.lowerBound, aabbMargin); - shape->fatAABB.upperBound = b2Add(shape->aabb.upperBound, aabbMargin); - - // Bit-set to keep the move array sorted - b2SetBit(shapeBitSet, shapeIndex); - } - } - - shapeIndex = shape->nextShapeIndex; - } - - // Prepare awake contacts. May include contacts that are not touching - // so they may not be island contacts. - int32_t contactKey = body->contactList; - while (contactKey != B2_NULL_INDEX) - { - int32_t contactIndex = contactKey >> 1; - int32_t edgeIndex = contactKey & 1; - b2Contact* contact = contacts + contactIndex; - - // Bit set to prevent duplicates - b2SetBit(awakeContactBitSet, contactIndex); - contactKey = contact->edges[edgeIndex].nextKey; - } - - bodyIndex = body->islandNext; - } - } - - if (island->maySplit) - { - b2SplitIsland(island); - } - - b2TracyCZoneEnd(sleep); -} - -// Single threaded work -void b2CompleteIsland(b2Island* island) -{ - b2World* world = island->world; - -#if 0 - // Report impulses - b2PostSolveFcn* postSolveFcn = world->postSolveFcn; - if (postSolveFcn != NULL) - { - b2Contact* contacts = world->contacts; - int16_t worldIndex = world->index; - const b2Shape* shapes = world->shapes; - - int32_t contactIndex = island->headContact; - while (contactIndex != B2_NULL_INDEX) - { - const b2Contact* contact = contacts + contactIndex; - - const b2Shape* shapeA = shapes + contact->shapeIndexA; - const b2Shape* shapeB = shapes + contact->shapeIndexB; - - b2ShapeId idA = {shapeA->object.index, worldIndex, shapeA->object.revision}; - b2ShapeId idB = {shapeB->object.index, worldIndex, shapeB->object.revision}; - postSolveFcn(idA, idB, &contact->manifold, world->postSolveContext); - } - } -#endif - - // Destroy in reverse order - b2DestroyContactSolver(island->contactSolver, world->stackAllocator); - island->contactSolver = NULL; - - // Wake island - if (island->awakeIndex != B2_NULL_INDEX) - { - island->awakeIndex = B2_NULL_INDEX; - b2WakeIsland(island); - } -} - -// This island was just split. Handle any remaining single threaded cleanup. -void b2CompleteBaseSplitIsland(b2Island* island) -{ - b2DestroyContactSolver(island->contactSolver, island->world->stackAllocator); - island->contactSolver = NULL; -} - -// This island was just created through splitting. Handle single thread work. -void b2CompleteSplitIsland(b2Island* island) -{ -// Report impulses -#if 0 - b2World* world = island->world; - b2PostSolveFcn* postSolveFcn = island->world->postSolveFcn; - if (postSolveFcn != NULL) - { - b2Contact* contacts = world->contacts; - int16_t worldIndex = world->index; - const b2Shape* shapes = world->shapes; - - int32_t contactIndex = island->headContact; - while (contactIndex != B2_NULL_INDEX) - { - const b2Contact* contact = contacts + contactIndex; - - const b2Shape* shapeA = shapes + contact->shapeIndexA; - const b2Shape* shapeB = shapes + contact->shapeIndexB; - - b2ShapeId idA = {shapeA->object.index, worldIndex, shapeA->object.revision}; - b2ShapeId idB = {shapeB->object.index, worldIndex, shapeB->object.revision}; - postSolveFcn(idA, idB, &contact->manifold, world->postSolveContext); - } - } -#endif - - // Split islands are kept awake as part of the splitting process. They can - // fall asleep the next time step. - island->awakeIndex = B2_NULL_INDEX; - b2WakeIsland(island); -} - #if B2_VALIDATE -void b2ValidateIsland(b2Island* island) +void b2ValidateIsland(b2Island* island, bool checkSleep) { b2World* world = island->world; int32_t islandIndex = island->object.index; B2_ASSERT(island->object.index == island->object.next); + bool isAwake = false; if (island->awakeIndex != B2_NULL_INDEX) { b2Array_Check(world->awakeIslandArray, island->awakeIndex); B2_ASSERT(world->awakeIslandArray[island->awakeIndex] == islandIndex); + isAwake = true; } B2_ASSERT(island->headBody != B2_NULL_INDEX); @@ -1442,6 +1013,25 @@ void b2ValidateIsland(b2Island* island) B2_ASSERT(contact->islandIndex == islandIndex); count += 1; + if (checkSleep) + { + if (isAwake) + { + B2_ASSERT(contact->colorIndex != B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex != B2_NULL_INDEX); + + //int32_t awakeIndex = world->contactAwakeIndexArray[contactIndex]; + //B2_ASSERT(0 <= awakeIndex && awakeIndex < b2Array(world->awakeContactArray).count); + //B2_ASSERT(world->awakeContactArray[awakeIndex] == contactIndex); + } + else + { + B2_ASSERT(contact->colorIndex == B2_NULL_INDEX); + B2_ASSERT(contact->colorSubIndex == B2_NULL_INDEX); + //B2_ASSERT(world->contactAwakeIndexArray[contactIndex] == B2_NULL_INDEX); + } + } + if (count == island->contactCount) { B2_ASSERT(contactIndex == island->tailContact); @@ -1494,9 +1084,10 @@ void b2ValidateIsland(b2Island* island) #else -void b2ValidateIsland(b2Island* island) +void b2ValidateIsland(b2Island* island, bool checkSleep) { B2_MAYBE_UNUSED(island); + B2_MAYBE_UNUSED(checkSleep); } #endif diff --git a/src/island.h b/src/island.h index 6cf31330..6208f4e2 100644 --- a/src/island.h +++ b/src/island.h @@ -52,14 +52,6 @@ typedef struct b2Island // Keeps track of how many contacts have been removed from this island. int32_t constraintRemoveCount; - - // This island has been chosen to be split up into smaller islands because a sufficient - // number of contacts have been removed. - bool maySplit; - - // Transient solver data - b2StepContext* stepContext; - struct b2ContactSolver* contactSolver; } b2Island; void b2CreateIsland(b2Island* island); @@ -80,14 +72,8 @@ void b2LinkJoint(b2World* world, b2Joint* joint); void b2UnlinkJoint(b2World* world, b2Joint* joint); void b2MergeAwakeIslands(b2World* world); -void b2SortIslands(b2World* world, b2Island** islands, int32_t count); - -void b2PrepareIsland(b2Island* island, b2StepContext* stepContext); - -void b2SolveIsland(b2Island* island, uint32_t threadIndex); -void b2CompleteIsland(b2Island* island); -void b2CompleteBaseSplitIsland(b2Island* island); +void b2SplitIslandTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context); void b2CompleteSplitIsland(b2Island* island); -void b2ValidateIsland(b2Island* island); +void b2ValidateIsland(b2Island* island, bool checkSleep); diff --git a/src/joint.c b/src/joint.c index 22afbd54..7509d8b8 100644 --- a/src/joint.c +++ b/src/joint.c @@ -116,6 +116,8 @@ static b2Joint* b2CreateJoint(b2World* world, b2Body* bodyA, b2Body* bodyB) joint->islandIndex = B2_NULL_INDEX; joint->islandPrev = B2_NULL_INDEX; joint->islandNext = B2_NULL_INDEX; + joint->colorIndex = B2_NULL_INDEX; + joint->colorSubIndex = B2_NULL_INDEX; joint->isMarked = false; @@ -123,6 +125,12 @@ static b2Joint* b2CreateJoint(b2World* world, b2Body* bodyA, b2Body* bodyB) { // Add edge to island graph b2LinkJoint(world, joint); + + if (b2IsBodyAwake(world, bodyA) || b2IsBodyAwake(world, bodyB)) + { + // TODO_JOINT_GRAPH + //b2AddJointToGraph(world, joint); + } } return joint; @@ -181,9 +189,9 @@ b2JointId b2World_CreateMouseJoint(b2WorldId worldId, const b2MouseJointDef* def b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); joint->type = b2_mouseJoint; - joint->localAnchorA = b2InvTransformPoint(bodyA->transform, def->target); joint->localAnchorB = b2InvTransformPoint(bodyB->transform, def->target); + joint->collideConnected = true; b2MouseJoint empty = {0}; joint->mouseJoint = empty; @@ -217,9 +225,9 @@ b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2RevoluteJointDe b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); joint->type = b2_revoluteJoint; - joint->localAnchorA = def->localAnchorA; joint->localAnchorB = def->localAnchorB; + joint->collideConnected = def->collideConnected; b2RevoluteJoint empty = {0}; joint->revoluteJoint = empty; @@ -249,6 +257,50 @@ b2JointId b2World_CreateRevoluteJoint(b2WorldId worldId, const b2RevoluteJointDe return jointId; } +b2JointId b2World_CreateWeldJoint(b2WorldId worldId, const b2WeldJointDef* def) +{ + b2World* world = b2GetWorldFromId(worldId); + + B2_ASSERT(world->locked == false); + + if (world->locked) + { + return b2_nullJointId; + } + + B2_ASSERT(b2IsBodyIdValid(world, def->bodyIdA)); + B2_ASSERT(b2IsBodyIdValid(world, def->bodyIdB)); + + b2Body* bodyA = world->bodies + def->bodyIdA.index; + b2Body* bodyB = world->bodies + def->bodyIdB.index; + + b2Joint* joint = b2CreateJoint(world, bodyA, bodyB); + + joint->type = b2_weldJoint; + joint->localAnchorA = def->localAnchorA; + joint->localAnchorB = def->localAnchorB; + joint->collideConnected = def->collideConnected; + + b2WeldJoint empty = {0}; + joint->weldJoint = empty; + joint->weldJoint.referenceAngle = def->referenceAngle; + joint->weldJoint.linearHertz = def->linearHertz; + joint->weldJoint.linearDampingRatio = def->linearDampingRatio; + joint->weldJoint.angularHertz = def->angularHertz; + joint->weldJoint.angularDampingRatio = def->angularDampingRatio; + joint->weldJoint.impulse = b2Vec3_zero; + + // If the joint prevents collisions, then destroy all contacts between attached bodies + if (def->collideConnected == false) + { + b2DestroyContactsBetweenBodies(world, bodyA, bodyB); + } + + b2JointId jointId = {joint->object.index, world->index, joint->object.revision}; + + return jointId; +} + void b2World_DestroyJoint(b2JointId jointId) { b2World* world = b2GetWorldFromIndex(jointId.world); @@ -320,42 +372,72 @@ void b2World_DestroyJoint(b2JointId jointId) b2UnlinkJoint(world, joint); + // TODO_JOINT_GRAPH + // b2RemoveJointFromGraph(joint); + b2FreeObject(&world->jointPool, &joint->object); } -extern void b2InitializeMouse(b2Joint* base, b2StepContext* data); -extern void b2InitializeRevolute(b2Joint* base, b2StepContext* data); - -void b2InitVelocityConstraints(b2Joint* joint, b2StepContext* data) +b2BodyId b2Joint_GetBodyA(b2JointId jointId) { - switch (joint->type) + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + + if (world->locked) { - case b2_mouseJoint: - b2InitializeMouse(joint, data); - break; + return b2_nullBodyId; + } - case b2_revoluteJoint: - b2InitializeRevolute(joint, data); - break; + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); - default: - B2_ASSERT(false); + b2Joint* joint = world->joints + jointId.index; + int32_t bodyIndex = joint->edges[0].bodyIndex; + + B2_ASSERT(0 <= bodyIndex && bodyIndex < world->bodyPool.capacity); + b2Body* body = world->bodies + bodyIndex; + b2BodyId bodyId = {bodyIndex, jointId.world, body->object.revision}; + return bodyId; +} + +b2BodyId b2Joint_GetBodyB(b2JointId jointId) +{ + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + + if (world->locked) + { + return b2_nullBodyId; } + + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); + + b2Joint* joint = world->joints + jointId.index; + int32_t bodyIndex = joint->edges[1].bodyIndex; + + B2_ASSERT(0 <= bodyIndex && bodyIndex < world->bodyPool.capacity); + b2Body* body = world->bodies + bodyIndex; + b2BodyId bodyId = {bodyIndex, jointId.world, body->object.revision}; + return bodyId; } -extern void b2SolveMouseVelocity(b2Joint* base, b2StepContext* data); -extern void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* data); +extern void b2PrepareMouse(b2Joint* base, b2StepContext* context); +extern void b2PrepareRevolute(b2Joint* base, b2StepContext* context); +extern void b2PrepareWeld(b2Joint* base, b2StepContext* context); -void b2SolveVelocityConstraints(b2Joint* joint, b2StepContext* data) +void b2PrepareJoint(b2Joint* joint, b2StepContext* context) { switch (joint->type) { case b2_mouseJoint: - b2SolveMouseVelocity(joint, data); + b2PrepareMouse(joint, context); break; case b2_revoluteJoint: - b2SolveRevoluteVelocity(joint, data); + b2PrepareRevolute(joint, context); + break; + + case b2_weldJoint: + b2PrepareWeld(joint, context); break; default: @@ -363,18 +445,31 @@ void b2SolveVelocityConstraints(b2Joint* joint, b2StepContext* data) } } -extern bool b2SolveRevolutePosition(b2Joint* base, b2StepContext* data); +extern void b2SolveMouseVelocity(b2Joint* base, b2StepContext* context); +extern void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool removeOverlap); +extern void b2SolveWeldVelocity(b2Joint* base, b2StepContext* context, bool removeOverlap); -// This returns true if the position errors are within tolerance. -bool b2SolvePositionConstraints(b2Joint* joint, b2StepContext* data) +void b2SolveJointVelocity(b2Joint* joint, b2StepContext* context, bool removeOverlap) { switch (joint->type) { + case b2_mouseJoint: + if (removeOverlap) + { + b2SolveMouseVelocity(joint, context); + } + break; + case b2_revoluteJoint: - return b2SolveRevolutePosition(joint, data); + b2SolveRevoluteVelocity(joint, context, removeOverlap); + break; + + case b2_weldJoint: + b2SolveWeldVelocity(joint, context, removeOverlap); + break; default: - return true; + B2_ASSERT(false); } } diff --git a/src/joint.h b/src/joint.h index d9025c86..d21c5514 100644 --- a/src/joint.h +++ b/src/joint.h @@ -1,11 +1,10 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT -#include "box2d/id.h" -#include "box2d/types.h" - #include "pool.h" +#include "box2d/types.h" + #include typedef struct b2DebugDraw b2DebugDraw; @@ -22,7 +21,7 @@ typedef enum b2JointType b2_mouseJoint, b2_gearJoint, b2_wheelJoint, - b2_weldJoint, + b2_weldJoint, b2_frictionJoint, b2_motorJoint } b2JointType; @@ -52,10 +51,10 @@ typedef struct b2MouseJoint float gamma; // Solver temp + int32_t indexB; + b2Vec2 positionB; b2Vec2 rB; b2Vec2 localCenterB; - float invMassB; - float invIB; b2Mat22 mass; b2Vec2 C; } b2MouseJoint; @@ -76,33 +75,66 @@ typedef struct b2RevoluteJoint float upperAngle; // Solver temp - b2Vec2 rA; - b2Vec2 rB; + int32_t indexA; + int32_t indexB; + b2Vec2 positionA; + b2Vec2 positionB; + float angleA; + float angleB; b2Vec2 localCenterA; b2Vec2 localCenterB; - float invMassA; - float invMassB; - float invIA; - float invIB; - b2Mat22 K; + float biasCoefficient; + float massCoefficient; + float impulseCoefficient; float angle; float axialMass; } b2RevoluteJoint; +typedef struct b2WeldJoint +{ + // Solver shared + float referenceAngle; + float linearHertz; + float linearDampingRatio; + float angularHertz; + float angularDampingRatio; + float linearBiasCoefficient; + float linearMassCoefficient; + float linearImpulseCoefficient; + float angularBiasCoefficient; + float angularMassCoefficient; + float angularImpulseCoefficient; + b2Vec3 impulse; + + // Solver temp + int32_t indexA; + int32_t indexB; + b2Vec2 positionA; + b2Vec2 positionB; + float angleA; + float angleB; + b2Vec2 localCenterA; + b2Vec2 localCenterB; +} b2WeldJoint; + /// The base joint class. Joints are used to constraint two bodies together in /// various fashions. Some joints also feature limits and motors. typedef struct b2Joint { b2Object object; - b2JointType type; - b2JointEdge edges[2]; int32_t islandIndex; int32_t islandPrev; int32_t islandNext; + // The color of this constraint in the graph coloring + int32_t colorIndex; + + // Index of joint within color + int32_t colorSubIndex; + b2Vec2 localAnchorA; b2Vec2 localAnchorB; @@ -110,16 +142,13 @@ typedef struct b2Joint { b2MouseJoint mouseJoint; b2RevoluteJoint revoluteJoint; + b2WeldJoint weldJoint; }; bool isMarked; bool collideConnected; } b2Joint; -void b2InitVelocityConstraints(b2Joint* joint, b2StepContext* data); -void b2SolveVelocityConstraints(b2Joint* joint, b2StepContext* data); - -// This returns true if the position errors are within tolerance. -bool b2SolvePositionConstraints(b2Joint* joint, b2StepContext* data); - +void b2PrepareJoint(b2Joint* joint, b2StepContext* context); +void b2SolveJointVelocity(b2Joint* joint, b2StepContext* context, bool removeOverlap); void b2DrawJoint(b2DebugDraw* draw, b2World* world, b2Joint* joint); diff --git a/src/math.c b/src/math.c index 84f9f303..ff156385 100644 --- a/src/math.c +++ b/src/math.c @@ -8,9 +8,6 @@ #include -float b2_lengthUnitsPerMeter = 1.0f; -float b2_timeToSleep = 0.5f; - b2Version b2_version = { 3, 0, 0 }; bool b2IsValid(float a) diff --git a/src/mouse_joint.c b/src/mouse_joint.c index 31d7c2ab..2142c07c 100644 --- a/src/mouse_joint.c +++ b/src/mouse_joint.c @@ -33,7 +33,7 @@ void b2MouseJoint_SetTarget(b2JointId jointId, b2Vec2 target) base->mouseJoint.targetA = target; } -void b2InitializeMouse(b2Joint* base, b2StepContext* context) +void b2PrepareMouse(b2Joint* base, b2StepContext* context) { B2_ASSERT(base->type == b2_mouseJoint); @@ -44,20 +44,23 @@ void b2InitializeMouse(b2Joint* base, b2StepContext* context) B2_ASSERT(bodyB->object.index == bodyB->object.next); b2MouseJoint* joint = &base->mouseJoint; + joint->indexB = context->bodyToSolverMap[indexB]; joint->localCenterB = bodyB->localCenter; - joint->invMassB = bodyB->invMass; - joint->invIB = bodyB->invI; b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; + b2Rot qB = bodyB->transform.q; + + b2SolverBody* solverBodyB = context->solverBodies + joint->indexB; + b2Vec2 vB = solverBodyB->linearVelocity; + float wB = solverBodyB->angularVelocity; - b2Rot qB = b2MakeRot(aB); + float mB = bodyB->invMass; + float iB = bodyB->invI; float d = joint->damping; float k = joint->stiffness; + // TODO_ERIN convert to bias/mass/impulse scales // magic formulas // gamma has units of inverse mass. // beta has units of inverse time. @@ -76,10 +79,10 @@ void b2InitializeMouse(b2Joint* base, b2StepContext* context) // = [1/m1+1/m2 0 ] + invI1 * [r1.y*r1.y -r1.x*r1.y] + invI2 * [r1.y*r1.y -r1.x*r1.y] // [ 0 1/m1+1/m2] [-r1.x*r1.y r1.x*r1.x] [-r1.x*r1.y r1.x*r1.x] b2Mat22 K; - K.cx.x = joint->invMassB + joint->invIB * joint->rB.y * joint->rB.y + joint->gamma; - K.cx.y = -joint->invIB * joint->rB.x * joint->rB.y; + K.cx.x = mB + iB * joint->rB.y * joint->rB.y + joint->gamma; + K.cx.y = -iB * joint->rB.x * joint->rB.y; K.cy.x = K.cx.y; - K.cy.y = joint->invMassB + joint->invIB * joint->rB.x * joint->rB.x + joint->gamma; + K.cy.y = mB + iB * joint->rB.x * joint->rB.x + joint->gamma; joint->mass = b2GetInverse22(K); @@ -89,33 +92,33 @@ void b2InitializeMouse(b2Joint* base, b2StepContext* context) // Cheat with some damping wB *= B2_MAX(0.0f, 1.0f - 0.02f * (60.0f * h)); - if (context->warmStarting) + if (context->enableWarmStarting) { joint->impulse = b2MulSV(context->dtRatio, joint->impulse); - vB = b2MulAdd(vB, joint->invMassB, joint->impulse); - wB += joint->invIB * b2Cross(joint->rB, joint->impulse); + vB = b2MulAdd(vB, mB, joint->impulse); + wB += iB * b2Cross(joint->rB, joint->impulse); } else { joint->impulse = b2Vec2_zero; } - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; + solverBodyB->linearVelocity = vB; + solverBodyB->angularVelocity = wB; } void b2SolveMouseVelocity(b2Joint* base, b2StepContext* context) { b2MouseJoint* joint = &base->mouseJoint; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; + b2SolverBody* bodyB = context->solverBodies + joint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; - // Cdot = v + cross(w, r) - b2Vec2 Cdot = b2Add(vB, b2CrossSV(wB, joint->rB)); - b2Vec2 SoftCdot = b2Add(Cdot, b2MulAdd(joint->C, joint->gamma, joint->impulse)); - b2Vec2 impulse = b2Neg(b2MulMV(joint->mass, SoftCdot)); + // dv = v + cross(w, r) + b2Vec2 dv = b2Add(vB, b2CrossSV(wB, joint->rB)); + b2Vec2 Cdot = b2Add(dv, b2MulAdd(joint->C, joint->gamma, joint->impulse)); + b2Vec2 impulse = b2Neg(b2MulMV(joint->mass, Cdot)); b2Vec2 oldImpulse = joint->impulse; joint->impulse = b2Add(joint->impulse, impulse); @@ -126,8 +129,8 @@ void b2SolveMouseVelocity(b2Joint* base, b2StepContext* context) } impulse = b2Sub(joint->impulse, oldImpulse); - vB = b2MulAdd(vB, joint->invMassB, impulse); - wB += joint->invIB * b2Cross(joint->rB, impulse); + vB = b2MulAdd(vB, bodyB->invMass, impulse); + wB += bodyB->invI * b2Cross(joint->rB, impulse); bodyB->linearVelocity = vB; bodyB->angularVelocity = wB; diff --git a/src/pool.c b/src/pool.c index 3f79a12e..fe450604 100644 --- a/src/pool.c +++ b/src/pool.c @@ -5,6 +5,8 @@ #include "allocate.h" #include "core.h" +#include "math.h" +#include "box2d/math.h" #include "box2d/types.h" @@ -122,8 +124,8 @@ b2Object* b2AllocObject(b2Pool* pool) else { int32_t oldCapacity = pool->capacity; - int32_t newCapacity = oldCapacity + oldCapacity / 2; - newCapacity = newCapacity > 2 ? newCapacity : 2; + int32_t addedCapacity = B2_MAX(2, oldCapacity / 2); + int32_t newCapacity = B2_MAX(2, oldCapacity + addedCapacity); pool->capacity = newCapacity; char* newMemory = (char*)b2Alloc(pool->capacity * pool->objectSize); memcpy(newMemory, pool->memory, oldCapacity * pool->objectSize); @@ -135,6 +137,7 @@ b2Object* b2AllocObject(b2Pool* pool) newObject->revision = 0; newObject->next = newObject->index; + // This assumes added capacity >= 2 pool->freeList = oldCapacity + 1; for (int32_t i = oldCapacity + 1; i < newCapacity - 1; ++i) { diff --git a/src/revolute_joint.c b/src/revolute_joint.c index f5c2a4dc..002f0b1f 100644 --- a/src/revolute_joint.c +++ b/src/revolute_joint.c @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT +#define _CRT_SECURE_NO_WARNINGS + #include "body.h" #include "core.h" #include "joint.h" @@ -9,6 +11,8 @@ #include "box2d/debug_draw.h" +#include + // Point-to-point constraint // C = p2 - p1 // Cdot = v2 - v1 @@ -22,57 +26,37 @@ // J = [0 0 -1 0 0 1] // K = invI1 + invI2 -void b2InitializeRevolute(b2Joint* base, b2StepContext* context) +void b2PrepareRevolute(b2Joint* base, b2StepContext* context) { B2_ASSERT(base->type == b2_revoluteJoint); int32_t indexA = base->edges[0].bodyIndex; int32_t indexB = base->edges[1].bodyIndex; - B2_ASSERT(0 <= indexA && indexA < context->bodyCapacity); - B2_ASSERT(0 <= indexB && indexB < context->bodyCapacity); - b2Body* bodyA = context->bodies + indexA; b2Body* bodyB = context->bodies + indexB; - B2_ASSERT(bodyA->object.index == bodyA->object.next); - B2_ASSERT(bodyB->object.index == bodyB->object.next); + B2_ASSERT(b2ObjectValid(&bodyA->object)); + B2_ASSERT(b2ObjectValid(&bodyB->object)); b2RevoluteJoint* joint = &base->revoluteJoint; - joint->localCenterA = bodyA->localCenter; - joint->invMassA = bodyA->invMass; - joint->invIA = bodyA->invI; + joint->indexA = context->bodyToSolverMap[indexA]; + joint->indexB = context->bodyToSolverMap[indexB]; + joint->localCenterA = bodyA->localCenter; joint->localCenterB = bodyB->localCenter; - joint->invMassB = bodyB->invMass; - joint->invIB = bodyB->invI; - - float aA = bodyA->angle; - b2Vec2 vA = bodyA->linearVelocity; - float wA = bodyA->angularVelocity; - - float aB = bodyB->angle; - b2Vec2 vB = bodyB->linearVelocity; - float wB = bodyB->angularVelocity; - - b2Rot qA = b2MakeRot(aA); - b2Rot qB = b2MakeRot(aB); + joint->positionA = bodyA->position; + joint->positionB = bodyB->position; + joint->angleA = bodyA->angle; + joint->angleB = bodyB->angle; - joint->rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); - joint->rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; - // J = [-I -r1_skew I r2_skew] - // r_skew = [-ry; rx] + // Note: must warm start solver bodies + b2SolverBody* solverBodyA = joint->indexA == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexA; + float iA = solverBodyA->invI; - // Matlab - // K = [ mA+r1y^2*iA+mB+r2y^2*iB, -r1y*iA*r1x-r2y*iB*r2x] - // [ -r1y*iA*r1x-r2y*iB*r2x, mA+r1x^2*iA+mB+r2x^2*iB] - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; - - joint->K.cx.x = mA + mB + joint->rA.y * joint->rA.y * iA + joint->rB.y * joint->rB.y * iB; - joint->K.cy.x = -joint->rA.y * joint->rA.x * iA - joint->rB.y * joint->rB.x * iB; - joint->K.cx.y = joint->K.cy.x; - joint->K.cy.y = mA + mB + joint->rA.x * joint->rA.x * iA + joint->rB.x * joint->rB.x * iB; + b2SolverBody* solverBodyB = joint->indexB == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexB; + float iB = solverBodyB->invI; joint->axialMass = iA + iB; bool fixedRotation; @@ -86,7 +70,18 @@ void b2InitializeRevolute(b2Joint* base, b2StepContext* context) fixedRotation = true; } - joint->angle = aB - aA - joint->referenceAngle; + // hertz = 1/4 * substep Hz + const float hertz = 0.25f * context->velocityIterations * context->inv_dt; + const float zeta = 1.0f; + float omega = 2.0f * b2_pi * hertz; + float h = context->dt; + + joint->biasCoefficient = omega / (2.0f * zeta + h * omega); + float c = h * omega * (2.0f * zeta + h * omega); + joint->impulseCoefficient = 1.0f / (1.0f + c); + joint->massCoefficient = c * joint->impulseCoefficient; + + joint->angle = bodyB->angle - bodyA->angle - joint->referenceAngle; if (joint->enableLimit == false || fixedRotation) { joint->lowerImpulse = 0.0f; @@ -98,24 +93,21 @@ void b2InitializeRevolute(b2Joint* base, b2StepContext* context) joint->motorImpulse = 0.0f; } - if (context->warmStarting) + if (context->enableWarmStarting) { float dtRatio = context->dtRatio; - // Scale impulses to support a variable time step. - joint->impulse = b2MulSV(dtRatio, joint->impulse); + // Soft step works best when bilateral constraints have no warm starting. + joint->impulse = b2Vec2_zero; joint->motorImpulse *= dtRatio; joint->lowerImpulse *= dtRatio; joint->upperImpulse *= dtRatio; + // TODO_ERIN is warm starting axial stuff useful? float axialImpulse = joint->motorImpulse + joint->lowerImpulse - joint->upperImpulse; - b2Vec2 P = {joint->impulse.x, joint->impulse.y}; - - vA = b2MulSub(vA, mA, P); - wA -= iA * (b2Cross(joint->rA, P) + axialImpulse); - vB = b2MulAdd(vB, mB, P); - wB += iB * (b2Cross(joint->rB, P) + axialImpulse); + solverBodyA->angularVelocity -= iA * axialImpulse; + solverBodyB->angularVelocity += iB * axialImpulse; } else { @@ -124,29 +116,33 @@ void b2InitializeRevolute(b2Joint* base, b2StepContext* context) joint->lowerImpulse = 0.0f; joint->upperImpulse = 0.0f; } - - bodyA->linearVelocity = vA; - bodyA->angularVelocity = wA; - bodyB->linearVelocity = vB; - bodyB->angularVelocity = wB; } -void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) +void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context, bool useBias) { B2_ASSERT(base->type == b2_revoluteJoint); b2RevoluteJoint* joint = &base->revoluteJoint; - b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + b2SolverBody* bodyA = joint->indexA == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexA; b2Vec2 vA = bodyA->linearVelocity; float wA = bodyA->angularVelocity; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = joint->indexB == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexB; b2Vec2 vB = bodyB->linearVelocity; float wB = bodyB->angularVelocity; + float mB = bodyB->invMass; + float iB = bodyB->invI; - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; + const b2Vec2 cA = b2Add(joint->positionA, bodyA->deltaPosition); + const float aA = joint->angleA + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(joint->positionB, bodyB->deltaPosition); + const float aB = joint->angleB + bodyB->deltaAngle; bool fixedRotation = (iA + iB == 0.0f); @@ -166,11 +162,28 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) if (joint->enableLimit && fixedRotation == false) { + float jointAngle = aB - aA - joint->referenceAngle; + // Lower limit { - float C = joint->angle - joint->lowerAngle; + float C = jointAngle - joint->lowerAngle; + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (C > 0.0f) + { + // speculation + bias = C * context->inv_dt; + } + else if (useBias) + { + bias = joint->biasCoefficient * C; + massScale = joint->massCoefficient; + impulseScale = joint->impulseCoefficient; + } + float Cdot = wB - wA; - float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); + float impulse = -joint->axialMass * massScale * (Cdot + bias) - impulseScale * joint->lowerImpulse; float oldImpulse = joint->lowerImpulse; joint->lowerImpulse = B2_MAX(joint->lowerImpulse + impulse, 0.0f); impulse = joint->lowerImpulse - oldImpulse; @@ -183,9 +196,24 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) // Note: signs are flipped to keep C positive when the constraint is satisfied. // This also keeps the impulse positive when the limit is active. { - float C = joint->upperAngle - joint->angle; + float C = joint->upperAngle - jointAngle; + + float bias = 0.0f; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (C > 0.0f) + { + bias = C * context->inv_dt; + } + else if (useBias) + { + bias = joint->biasCoefficient * C; + massScale = joint->massCoefficient; + impulseScale = joint->impulseCoefficient; + } + float Cdot = wA - wB; - float impulse = -joint->axialMass * (Cdot + B2_MAX(C, 0.0f) * context->inv_dt); + float impulse = -joint->axialMass * massScale * (Cdot + bias) - impulseScale * joint->lowerImpulse; float oldImpulse = joint->upperImpulse; joint->upperImpulse = B2_MAX(joint->upperImpulse + impulse, 0.0f); impulse = joint->upperImpulse - oldImpulse; @@ -197,17 +225,51 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) // Solve point-to-point constraint { - b2Vec2 Cdot = b2Sub(b2Add(vB, b2CrossSV(wB, joint->rB)), b2Add(vA, b2CrossSV(wA, joint->rA))); - b2Vec2 impulse = b2Solve22(joint->K, b2Neg(Cdot)); + // J = [-I -r1_skew I r2_skew] + // r_skew = [-ry; rx] + + // Matlab + // K = [ mA+r1y^2*iA+mB+r2y^2*iB, -r1y*iA*r1x-r2y*iB*r2x] + // [ -r1y*iA*r1x-r2y*iB*r2x, mA+r1x^2*iA+mB+r2x^2*iB] + + // TODO_ERIN approximate the separation similar to contacts. Test if updating K makes a difference. + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); + b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); + b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + + b2Mat22 K; + K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; + K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; + K.cx.y = K.cy.x; + K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB; + + b2Vec2 Cdot = b2Sub(b2Add(vB, b2CrossSV(wB, rB)), b2Add(vA, b2CrossSV(wA, rA))); + + b2Vec2 bias = b2Vec2_zero; + float massScale = 1.0f; + float impulseScale = 0.0f; + if (useBias) + { + b2Vec2 separation = b2Add(b2Sub(rB, rA), b2Sub(cB, cA)); + bias = b2MulSV(joint->biasCoefficient, separation); + massScale = joint->massCoefficient; + impulseScale = joint->impulseCoefficient; + } + + b2Vec2 b = b2Solve22(K, b2Add(Cdot, bias)); + b2Vec2 impulse; + impulse.x = -massScale * b.x - impulseScale * joint->impulse.x; + impulse.y = -massScale * b.y - impulseScale * joint->impulse.y; joint->impulse.x += impulse.x; joint->impulse.y += impulse.y; vA = b2MulSub(vA, mA, impulse); - wA -= iA * b2Cross(joint->rA, impulse); + wA -= iA * b2Cross(rA, impulse); vB = b2MulAdd(vB, mB, impulse); - wB += iB * b2Cross(joint->rB, impulse); + wB += iB * b2Cross(rB, impulse); } bodyA->linearVelocity = vA; @@ -216,92 +278,43 @@ void b2SolveRevoluteVelocity(b2Joint* base, b2StepContext* context) bodyB->angularVelocity = wB; } -bool b2SolveRevolutePosition(b2Joint* base, b2StepContext* context) +void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit) { - B2_ASSERT(base->type == b2_revoluteJoint); - - b2RevoluteJoint* joint = &base->revoluteJoint; - - b2Body* bodyA = context->bodies + base->edges[0].bodyIndex; - b2Body* bodyB = context->bodies + base->edges[1].bodyIndex; - - b2Vec2 cA = bodyA->position; - float aA = bodyA->angle; - b2Vec2 cB = bodyB->position; - float aB = bodyB->angle; - - b2Rot qA = b2MakeRot(aA), qB = b2MakeRot(aB); - - float angularError = 0.0f; - float positionError = 0.0f; - - bool fixedRotation = (joint->invIA + joint->invIB == 0.0f); - - // Solve angular limit constraint - if (joint->enableLimit && fixedRotation == false) + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + if (world->locked) { - float angle = aB - aA - joint->referenceAngle; - float C = 0.0f; - - if (B2_ABS(joint->upperAngle - joint->lowerAngle) < 2.0f * b2_angularSlop) - { - // Prevent large angular corrections - C = B2_CLAMP(angle - joint->lowerAngle, -b2_maxAngularCorrection, b2_maxAngularCorrection); - } - else if (angle <= joint->lowerAngle) - { - // Prevent large angular corrections and allow some slop. - C = B2_CLAMP(angle - joint->lowerAngle + b2_angularSlop, -b2_maxAngularCorrection, 0.0f); - } - else if (angle >= joint->upperAngle) - { - // Prevent large angular corrections and allow some slop. - C = B2_CLAMP(angle - joint->upperAngle - b2_angularSlop, 0.0f, b2_maxAngularCorrection); - } - - float limitImpulse = -joint->axialMass * C; - aA -= joint->invIA * limitImpulse; - aB += joint->invIB * limitImpulse; - angularError = B2_ABS(C); + return; } - // Solve point-to-point constraint. - { - qA = b2MakeRot(aA); - qB = b2MakeRot(aB); - b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); - b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); - - b2Vec2 C = b2Sub(b2Add(cB, rB), b2Add(cA, rA)); - positionError = b2Length(C); - - float mA = joint->invMassA, mB = joint->invMassB; - float iA = joint->invIA, iB = joint->invIB; - - b2Mat22 K; - K.cx.x = mA + mB + iA * rA.y * rA.y + iB * rB.y * rB.y; - K.cx.y = -iA * rA.x * rA.y - iB * rB.x * rB.y; - K.cy.x = K.cx.y; - K.cy.y = mA + mB + iA * rA.x * rA.x + iB * rB.x * rB.x; - - b2Vec2 impulse = b2Solve22(K, b2Neg(C)); + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); - cA = b2MulSub(cA, mA, impulse); - aA -= iA * b2Cross(rA, impulse); + b2Joint* joint = world->joints + jointId.index; + B2_ASSERT(joint->object.index == joint->object.next); + B2_ASSERT(joint->object.revision == jointId.revision); + B2_ASSERT(joint->type == b2_revoluteJoint); + joint->revoluteJoint.enableLimit = enableLimit; +} - cB = b2MulAdd(cB, mB, impulse); - aB += iB * b2Cross(rB, impulse); +void b2RevoluteJoint_EnableMotor(b2JointId jointId, bool enableMotor) +{ + b2World* world = b2GetWorldFromIndex(jointId.world); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; } - bodyA->position = cA; - bodyA->angle = aA; - bodyB->position = cB; - bodyB->angle = aB; + B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); - return positionError <= b2_linearSlop && angularError <= b2_angularSlop; + b2Joint* joint = world->joints + jointId.index; + B2_ASSERT(joint->object.index == joint->object.next); + B2_ASSERT(joint->object.revision == jointId.revision); + B2_ASSERT(joint->type == b2_revoluteJoint); + joint->revoluteJoint.enableMotor = enableMotor; } -void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit) +void b2RevoluteJoint_SetMotorSpeed(b2JointId jointId, float motorSpeed) { b2World* world = b2GetWorldFromIndex(jointId.world); B2_ASSERT(world->locked == false); @@ -316,16 +329,16 @@ void b2RevoluteJoint_EnableLimit(b2JointId jointId, bool enableLimit) B2_ASSERT(joint->object.index == joint->object.next); B2_ASSERT(joint->object.revision == jointId.revision); B2_ASSERT(joint->type == b2_revoluteJoint); - joint->revoluteJoint.enableLimit = enableLimit; + joint->revoluteJoint.motorSpeed = motorSpeed; } -void b2RevoluteJoint_EnableMotor(b2JointId jointId, bool enableMotor) +float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep) { b2World* world = b2GetWorldFromIndex(jointId.world); B2_ASSERT(world->locked == false); if (world->locked) { - return; + return 0.0f; } B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); @@ -334,10 +347,10 @@ void b2RevoluteJoint_EnableMotor(b2JointId jointId, bool enableMotor) B2_ASSERT(joint->object.index == joint->object.next); B2_ASSERT(joint->object.revision == jointId.revision); B2_ASSERT(joint->type == b2_revoluteJoint); - joint->revoluteJoint.enableMotor = enableMotor; + return inverseTimeStep * joint->revoluteJoint.motorImpulse; } -void b2RevoluteJoint_SetMotorSpeed(b2JointId jointId, float motorSpeed) +void b2RevoluteJoint_SetMaxMotorTorque(b2JointId jointId, float torque) { b2World* world = b2GetWorldFromIndex(jointId.world); B2_ASSERT(world->locked == false); @@ -352,16 +365,16 @@ void b2RevoluteJoint_SetMotorSpeed(b2JointId jointId, float motorSpeed) B2_ASSERT(joint->object.index == joint->object.next); B2_ASSERT(joint->object.revision == jointId.revision); B2_ASSERT(joint->type == b2_revoluteJoint); - joint->revoluteJoint.motorSpeed = motorSpeed; + joint->revoluteJoint.maxMotorTorque = torque; } -float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep) +b2Vec2 b2RevoluteJoint_GetConstraintForce(b2JointId jointId) { b2World* world = b2GetWorldFromIndex(jointId.world); B2_ASSERT(world->locked == false); if (world->locked) { - return 0.0f; + return b2Vec2_zero; } B2_ASSERT(0 <= jointId.index && jointId.index < world->jointPool.capacity); @@ -370,7 +383,7 @@ float b2RevoluteJoint_GetMotorTorque(b2JointId jointId, float inverseTimeStep) B2_ASSERT(joint->object.index == joint->object.next); B2_ASSERT(joint->object.revision == jointId.revision); B2_ASSERT(joint->type == b2_revoluteJoint); - return inverseTimeStep * joint->revoluteJoint.motorImpulse; + return joint->revoluteJoint.impulse; } #if 0 @@ -439,4 +452,8 @@ void b2DrawRevolute(b2DebugDraw* draw, b2Joint* base, b2Body* bodyA, b2Body* bod draw->DrawSegment(xfA.p, pA, color, draw->context); draw->DrawSegment(pA, pB, color, draw->context); draw->DrawSegment(xfB.p, pB, color, draw->context); + + //char buffer[32]; + //sprintf(buffer, "%.1f", b2Length(joint->impulse)); + //draw->DrawString(pA, buffer, draw->context); } diff --git a/src/solver_data.h b/src/solver_data.h index b09797d2..7d7deaad 100644 --- a/src/solver_data.h +++ b/src/solver_data.h @@ -14,6 +14,7 @@ typedef struct b2StepContext // inverse time step (0 if dt == 0). float inv_dt; + // TODO_ERIN eliminate support for variable time step // ratio between current and previous time step (dt * inv_dt0) float dtRatio; @@ -25,9 +26,85 @@ typedef struct b2StepContext float restitutionThreshold; - // From b2World::bodies for convenience + // TODO_ERIN for joints struct b2Body* bodies; int32_t bodyCapacity; - bool warmStarting; + // Map from world body pool index to solver body + const int32_t* bodyToSolverMap; + + // Map from solver body to world body + const int32_t* solverToBodyMap; + + struct b2SolverBody* solverBodies; + int32_t solverBodyCount; + + bool enableWarmStarting; } b2StepContext; + +typedef enum b2SolverStageType +{ + b2_stageIntegrateVelocities = 0, + b2_stagePrepareContacts, + b2_stageWarmStartContacts, + b2_stagePrepareJoints, + b2_stageSolveJoints, + b2_stageSolveContacts, + b2_stageIntegratePositions, + b2_stageCalmJoints, + b2_stageCalmContacts, + b2_stageRestitution, + b2_stageStoreImpulses +} b2SolverStageType; + +// Each block of work has a sync index that gets incremented when a worker claims the block. This ensures only a single worker claims a +// block, yet lets work be distributed dynamically across multiple workers (work stealing). This also reduces contention on a single block +// index atomic. For non-iterative stages the sync index is simply set to one. For iterative stages (solver iteration) the same block of +// work is executed once per iteration and the atomic sync index is shared across iterations, so it increases monotonically. +typedef struct b2SolverBlock +{ + int32_t startIndex; + int32_t endIndex; + _Atomic int syncIndex; +} b2SolverBlock; + +// Each stage must be completed before going to the next stage. +// Non-iterative stages use a stage instance once while iterative stages re-use the same instance each iteration. +typedef struct b2SolverStage +{ + b2SolverStageType type; + b2SolverBlock* blocks; + int32_t blockCount; + int32_t colorIndex; + _Atomic int completionCount; +} b2SolverStage; + +typedef struct b2SolverTaskContext +{ + struct b2World* world; + struct b2Graph* graph; + struct b2Body** awakeBodies; + struct b2SolverBody* solverBodies; + int32_t* bodyToSolverMap; + int32_t* solverToBodyMap; + int32_t* contactIndices; + + b2StepContext* stepContext; + struct b2ContactConstraint* constraints; + struct b2ContactConstraintSIMD* constraintAVXs; + int32_t activeColorCount; + int32_t velocityIterations; + int32_t calmIterations; + int32_t workerCount; + + float timeStep; + float invTimeStep; + float subStep; + float invSubStep; + + b2SolverStage* stages; + int32_t stageCount; + + // sync index (16-bits) | stage type (16-bits) + _Atomic unsigned int syncBits; +} b2SolverTaskContext; diff --git a/src/stack_allocator.c b/src/stack_allocator.c index 55bc2766..0e0e3a3f 100644 --- a/src/stack_allocator.c +++ b/src/stack_allocator.c @@ -56,30 +56,35 @@ void b2DestroyStackAllocator(b2StackAllocator* allocator) void* b2AllocateStackItem(b2StackAllocator* alloc, int32_t size, const char* name) { + int32_t size32 = ((size - 1) | 0x1F) + 1; + b2StackEntry entry; - entry.size = size; + entry.size = size32; entry.name = name; - if (alloc->index + size > alloc->capacity) + if (alloc->index + size32 > alloc->capacity) { // fall back to the heap (undesirable) - entry.data = (char*)b2Alloc(size); + entry.data = (char*)b2Alloc(size32); entry.usedMalloc = true; + + B2_ASSERT(((uintptr_t)entry.data & 0x1F) == 0); } else { entry.data = alloc->data + alloc->index; entry.usedMalloc = false; - alloc->index += size; + alloc->index += size32; + + B2_ASSERT(((uintptr_t)entry.data & 0x1F) == 0); } - alloc->allocation += size; + alloc->allocation += size32; if (alloc->allocation > alloc->maxAllocation) { alloc->maxAllocation = alloc->allocation; } b2Array_Push(alloc->entries, entry); - return entry.data; } diff --git a/src/table.c b/src/table.c index 00316c00..a1a16251 100644 --- a/src/table.c +++ b/src/table.c @@ -156,6 +156,8 @@ static void b2GrowTable(b2Set* set) bool b2ContainsKey(const b2Set* set, uint64_t key) { + // key of zero is a sentinel + B2_ASSERT(key != 0); uint32_t hash = b2KeyHash(key); int32_t index = b2FindSlot(set, key, hash); return set->items[index].key == key; @@ -163,7 +165,12 @@ bool b2ContainsKey(const b2Set* set, uint64_t key) bool b2AddKey(b2Set* set, uint64_t key) { + // key of zero is a sentinel + B2_ASSERT(key != 0); + uint32_t hash = b2KeyHash(key); + B2_ASSERT(hash != 0); + int32_t index = b2FindSlot(set, key, hash); if (set->items[index].hash != 0) { diff --git a/src/table.h b/src/table.h index dc492e68..16074c2b 100644 --- a/src/table.h +++ b/src/table.h @@ -26,7 +26,7 @@ void b2DestroySet(b2Set* set); void b2ClearSet(b2Set* set); - // Returns true if key was already in set +// Returns true if key was already in set bool b2AddKey(b2Set* set, uint64_t key); // Returns true if the key was found diff --git a/src/weld_joint.c b/src/weld_joint.c new file mode 100644 index 00000000..85c00edd --- /dev/null +++ b/src/weld_joint.c @@ -0,0 +1,199 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#include "body.h" +#include "core.h" +#include "joint.h" +#include "solver_data.h" +#include "world.h" + +#include "box2d/debug_draw.h" + +// Point-to-point constraint +// C = p2 - p1 +// Cdot = v2 - v1 +// = v2 + cross(w2, r2) - v1 - cross(w1, r1) +// J = [-I -r1_skew I r2_skew ] +// Identity used: +// w k % (rx i + ry j) = w * (-ry i + rx j) + +// Angle constraint +// C = angle2 - angle1 - referenceAngle +// Cdot = w2 - w1 +// J = [0 0 -1 0 0 1] +// K = invI1 + invI2 + +void b2PrepareWeld(b2Joint* base, b2StepContext* context) +{ + B2_ASSERT(base->type == b2_weldJoint); + + int32_t indexA = base->edges[0].bodyIndex; + int32_t indexB = base->edges[1].bodyIndex; + B2_ASSERT(0 <= indexA && indexA < context->bodyCapacity); + B2_ASSERT(0 <= indexB && indexB < context->bodyCapacity); + + b2Body* bodyA = context->bodies + indexA; + b2Body* bodyB = context->bodies + indexB; + B2_ASSERT(bodyA->object.index == bodyA->object.next); + B2_ASSERT(bodyB->object.index == bodyB->object.next); + + b2WeldJoint* joint = &base->weldJoint; + joint->indexA = context->bodyToSolverMap[indexA]; + joint->indexB = context->bodyToSolverMap[indexB]; + joint->localCenterA = bodyA->localCenter; + joint->localCenterB = bodyB->localCenter; + joint->positionA = bodyA->position; + joint->positionB = bodyB->position; + joint->angleA = bodyA->angle; + joint->angleB = bodyB->angle; + + const float h = context->dt; + + float linearHertz = joint->linearHertz; + if (linearHertz == 0.0f) + { + linearHertz = 0.25f * context->velocityIterations * context->inv_dt; + } + + { + const float zeta = joint->linearDampingRatio; + const float omega = 2.0f * b2_pi * linearHertz; + joint->linearBiasCoefficient = omega / (2.0f * zeta + h * omega); + float a = h * omega * (2.0f * zeta + h * omega); + joint->linearImpulseCoefficient = 1.0f / (1.0f + a); + joint->linearMassCoefficient = a * joint->linearImpulseCoefficient; + } + + float angularHertz = joint->angularHertz; + if (angularHertz == 0.0f) + { + angularHertz = 0.25f * context->velocityIterations * context->inv_dt; + } + + { + const float zeta = joint->angularDampingRatio; + const float omega = 2.0f * b2_pi * angularHertz; + joint->angularBiasCoefficient = omega / (2.0f * zeta + h * omega); + float a = h * omega * (2.0f * zeta + h * omega); + joint->angularImpulseCoefficient = 1.0f / (1.0f + a); + joint->angularMassCoefficient = a * joint->angularImpulseCoefficient; + } + + joint->impulse = b2Vec3_zero; +} + +void b2SolveWeldVelocity(b2Joint* base, const b2StepContext* context, bool useBias) +{ + B2_ASSERT(base->type == b2_weldJoint); + + b2WeldJoint* joint = &base->weldJoint; + + // This is a dummy body to represent a static body since static bodies don't have a solver body. + b2SolverBody dummyBody = {0}; + + b2SolverBody* bodyA = joint->indexA == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexA; + b2Vec2 vA = bodyA->linearVelocity; + float wA = bodyA->angularVelocity; + float mA = bodyA->invMass; + float iA = bodyA->invI; + + b2SolverBody* bodyB = joint->indexB == B2_NULL_INDEX ? &dummyBody : context->solverBodies + joint->indexB; + b2Vec2 vB = bodyB->linearVelocity; + float wB = bodyB->angularVelocity; + float mB = bodyB->invMass; + float iB = bodyB->invI; + + const b2Vec2 cA = b2Add(joint->positionA, bodyA->deltaPosition); + const float aA = joint->angleA + bodyA->deltaAngle; + const b2Vec2 cB = b2Add(joint->positionB, bodyB->deltaPosition); + const float aB = joint->angleB + bodyB->deltaAngle; + + b2Rot qA = b2MakeRot(aA); + b2Rot qB = b2MakeRot(aB); + + b2Vec2 rA = b2RotateVector(qA, b2Sub(base->localAnchorA, joint->localCenterA)); + b2Vec2 rB = b2RotateVector(qB, b2Sub(base->localAnchorB, joint->localCenterB)); + + // TODO_ERIN handle fixed rotation + //bool fixedRotation = (iA + iB == 0.0f); + + b2Mat33 K; + K.cx.x = mA + mB + rA.y * rA.y * iA + rB.y * rB.y * iB; + K.cy.x = -rA.y * rA.x * iA - rB.y * rB.x * iB; + K.cz.x = -rA.y * iA - rB.y * iB; + K.cx.y = K.cy.x; + K.cy.y = mA + mB + rA.x * rA.x * iA + rB.x * rB.x * iB; + K.cz.y = rA.x * iA + rB.x * iB; + K.cx.z = K.cz.x; + K.cy.z = K.cz.y; + K.cz.z = iA + iB; + + b2Vec2 Cdot1 = b2Add(b2Sub(vB, vA), b2Sub(b2CrossSV(wB, rB), b2CrossSV(wA, rA))); + float Cdot2 = wB - wA; + + float linearBiasScale = 0.0f; + float linearMassScale = 1.0f; + float linearImpulseScale = 0.0f; + float angularBiasScale = 0.0f; + float angularMassScale = 1.0f; + float angularImpulseScale = 0.0f; + if (useBias) + { + linearBiasScale = joint->linearBiasCoefficient; + linearMassScale = joint->linearMassCoefficient; + linearImpulseScale = joint->linearImpulseCoefficient; + angularBiasScale = joint->angularBiasCoefficient; + angularMassScale = joint->angularMassCoefficient; + angularImpulseScale = joint->angularImpulseCoefficient; + } + + b2Vec2 C1 = b2Add(b2Sub(cB, cA), b2Sub(rB, rA)); + float C2 = aB - aA - joint->referenceAngle; + + b2Vec3 Cdot; + Cdot.x = Cdot1.x + linearBiasScale * C1.x; + Cdot.y = Cdot1.y + linearBiasScale * C1.y; + Cdot.z = Cdot2 + angularBiasScale * C2; + + b2Vec3 b = b2Solve33(K, Cdot); + b2Vec3 impulse; + impulse.x = -linearMassScale * b.x - linearImpulseScale * joint->impulse.x; + impulse.y = -linearMassScale * b.y - linearImpulseScale * joint->impulse.y; + impulse.z = -angularMassScale * b.z - angularImpulseScale * joint->impulse.z; + + joint->impulse.x += impulse.x; + joint->impulse.y += impulse.y; + joint->impulse.z += impulse.z; + + b2Vec2 P = {impulse.x, impulse.y}; + + vA = b2MulSub(vA, mA, P); + wA -= iA * (b2Cross(rA, P) + impulse.z); + + vB = b2MulAdd(vB, mB, P); + wB += iB * (b2Cross(rB, P) + impulse.z); + + bodyA->linearVelocity = vA; + bodyA->angularVelocity = wA; + bodyB->linearVelocity = vB; + bodyB->angularVelocity = wB; +} + +#if 0 +void b2WeldJoint::Dump() +{ + int32 indexA = m_bodyA->m_islandIndex; + int32 indexB = m_bodyB->m_islandIndex; + + b2Dump(" b2WeldJointDef jd;\n"); + b2Dump(" jd.bodyA = bodies[%d];\n", indexA); + b2Dump(" jd.bodyB = bodies[%d];\n", indexB); + b2Dump(" jd.collideConnected = bool(%d);\n", m_collideConnected); + b2Dump(" jd.localAnchorA.Set(%.9g, %.9g);\n", m_localAnchorA.x, m_localAnchorA.y); + b2Dump(" jd.localAnchorB.Set(%.9g, %.9g);\n", m_localAnchorB.x, m_localAnchorB.y); + b2Dump(" jd.referenceAngle = %.9g;\n", m_referenceAngle); + b2Dump(" jd.stiffness = %.9g;\n", m_stiffness); + b2Dump(" jd.damping = %.9g;\n", m_damping); + b2Dump(" joints[%d] = m_world->CreateJoint(&jd);\n", m_index); +} +#endif diff --git a/src/world.c b/src/world.c index d21451e4..983124f5 100644 --- a/src/world.c +++ b/src/world.c @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: 2023 Erin Catto // SPDX-License-Identifier: MIT +#define _CRT_SECURE_NO_WARNINGS + #include "world.h" #include "allocate.h" @@ -11,6 +13,7 @@ #include "broad_phase.h" #include "contact.h" #include "core.h" +#include "graph.h" #include "island.h" #include "joint.h" #include "pool.h" @@ -25,12 +28,12 @@ #include "box2d/distance.h" #include "box2d/timer.h" +#include +#include #include b2World b2_worlds[b2_maxWorlds]; bool b2_parallel = true; -int b2_collideMinRange = 64; -int b2_islandMinRange = 1; b2World* b2GetWorldFromId(b2WorldId id) { @@ -96,6 +99,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->stackAllocator = b2CreateStackAllocator(def->stackAllocatorCapacity); b2CreateBroadPhase(&world->broadPhase); + b2CreateGraph(&world->graph, def->bodyCapacity, def->contactCapacity, def->jointCapacity); // pools world->bodyPool = b2CreatePool(sizeof(b2Body), B2_MAX(def->bodyCapacity, 1)); @@ -114,12 +118,10 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->islands = (b2Island*)world->islandPool.memory; world->awakeIslandArray = b2CreateArray(sizeof(int32_t), B2_MAX(def->bodyCapacity, 1)); - world->splitIslandArray = b2CreateArray(sizeof(int32_t), B2_MAX(def->bodyCapacity, 1)); world->awakeContactArray = b2CreateArray(sizeof(int32_t), B2_MAX(def->contactCapacity, 1)); world->contactAwakeIndexArray = b2CreateArray(sizeof(int32_t), world->contactPool.capacity); - world->splitIslandIndex = B2_NULL_INDEX; world->stepId = 0; // Globals start at 0. It should be fine for this to roll over. @@ -127,12 +129,16 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->gravity = def->gravity; world->restitutionThreshold = def->restitutionThreshold; + world->maximumPushoutVelocity = def->maxPushoutVelocity; + world->contactHertz = def->contactHertz; world->inv_dt0 = 0.0f; world->enableSleep = true; world->locked = false; - world->warmStarting = true; + world->enableWarmStarting = true; world->enableContinuous = true; world->profile = b2_emptyProfile; + world->userTreeTask = NULL; + world->splitIslandIndex = B2_NULL_INDEX; id.revision = world->revision; @@ -159,6 +165,7 @@ b2WorldId b2CreateWorld(const b2WorldDef* def) world->taskContextArray[i].contactStateBitSet = b2CreateBitSet(def->contactCapacity); world->taskContextArray[i].awakeContactBitSet = b2CreateBitSet(def->contactCapacity); world->taskContextArray[i].shapeBitSet = b2CreateBitSet(def->shapeCapacity); + world->taskContextArray[i].awakeIslandBitSet = b2CreateBitSet(256); } return id; @@ -173,6 +180,7 @@ void b2DestroyWorld(b2WorldId id) b2DestroyBitSet(&world->taskContextArray[i].contactStateBitSet); b2DestroyBitSet(&world->taskContextArray[i].awakeContactBitSet); b2DestroyBitSet(&world->taskContextArray[i].shapeBitSet); + b2DestroyBitSet(&world->taskContextArray[i].awakeIslandBitSet); } b2DestroyArray(world->taskContextArray, sizeof(b2TaskContext)); @@ -181,18 +189,6 @@ void b2DestroyWorld(b2WorldId id) b2DestroyArray(world->awakeIslandArray, sizeof(int32_t)); b2DestroyArray(world->contactAwakeIndexArray, sizeof(int32_t)); - b2DestroyArray(world->splitIslandArray, sizeof(int32_t)); - - b2Island* islands = world->islands; - int32_t islandCapacity = world->islandPool.capacity; - for (int32_t i = 0; i < islandCapacity; ++i) - { - b2Island* island = islands + i; - if (b2ObjectValid(&island->object) == true) - { - b2DestroyIsland(island); - } - } b2DestroyPool(&world->islandPool); b2DestroyPool(&world->jointPool); @@ -200,6 +196,7 @@ void b2DestroyWorld(b2WorldId id) b2DestroyPool(&world->shapePool); b2DestroyPool(&world->bodyPool); + b2DestroyGraph(&world->graph); b2DestroyBroadPhase(&world->broadPhase); b2DestroyBlockAllocator(world->blockAllocator); @@ -208,7 +205,6 @@ void b2DestroyWorld(b2WorldId id) memset(world, 0, sizeof(b2World)); } -// Locked version static void b2CollideTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* context) { b2TracyCZoneNC(collide_task, "Collide Task", b2_colorDodgerBlue1, true); @@ -299,13 +295,15 @@ static void b2UpdateTreesTask(int32_t startIndex, int32_t endIndex, uint32_t thr b2TracyCZoneEnd(tree_task); } +// Narrow-phase collision static void b2Collide(b2World* world) { B2_ASSERT(world->workerCount > 0); b2TracyCZoneNC(collide, "Collide", b2_colorDarkOrchid, true); - // Rebuild the collision tree for dynamic and kinematic bodies to keep their query performance good. + // Tasks that can be done in parallel with the narrow-phase + // - rebuild the collision tree for dynamic and kinematic bodies to keep their query performance good if (b2_parallel) { world->userTreeTask = world->enqueueTaskFcn(&b2UpdateTreesTask, 1, 1, world, world->userTaskContext); @@ -320,6 +318,7 @@ static void b2Collide(b2World* world) if (awakeContactCount == 0) { + b2TracyCZoneEnd(collide); return; } @@ -331,7 +330,7 @@ static void b2Collide(b2World* world) if (b2_parallel) { // Task should take at least 40us on a 4GHz CPU (10K cycles) - int32_t minRange = b2_collideMinRange; + int32_t minRange = 64; void* userCollideTask = world->enqueueTaskFcn(&b2CollideTask, awakeContactCount, minRange, world, world->userTaskContext); world->finishTaskFcn(userCollideTask, world->userTaskContext); } @@ -375,13 +374,19 @@ static void b2Collide(b2World* world) { B2_ASSERT(contact->islandIndex == B2_NULL_INDEX); b2LinkContact(world, contact); + b2AddContactToGraph(world, contact); contact->flags &= ~b2_contactStartedTouching; } else { B2_ASSERT(contact->flags & b2_contactStoppedTouching); + if (contact->colorIndex == B2_NULL_INDEX) + { + contact->colorIndex = B2_NULL_INDEX; + } b2UnlinkContact(world, contact); + b2RemoveContactFromGraph(world, contact); contact->flags &= ~b2_contactStoppedTouching; } @@ -390,32 +395,11 @@ static void b2Collide(b2World* world) } } - // TODO_ERIN clear awake contact array here? - b2TracyCZoneEnd(contact_state); b2TracyCZoneEnd(collide); } -static void b2IslandParallelForTask(int32_t startIndex, int32_t endIndex, uint32_t threadIndex, void* taskContext) -{ - b2TracyCZoneNC(island_task, "Island Task", b2_colorYellow, true); - - b2World* world = taskContext; - - B2_ASSERT(startIndex <= endIndex); - B2_ASSERT(startIndex <= b2Array(world->awakeIslandArray).count); - B2_ASSERT(endIndex <= b2Array(world->awakeIslandArray).count); - - for (int32_t i = startIndex; i < endIndex; ++i) - { - int32_t index = world->awakeIslandArray[i]; - b2SolveIsland(world->islands + index, threadIndex); - } - - b2TracyCZoneEnd(island_task); -} - struct b2ContinuousContext { b2World* world; @@ -619,125 +603,132 @@ static void b2ContinuousParallelForTask(int32_t startIndex, int32_t endIndex, ui b2TracyCZoneEnd(continuous_task); } -// Solve with union-find islands +// Solve with graph coloring static void b2Solve(b2World* world, b2StepContext* context) { b2TracyCZoneNC(solve, "Solve", b2_colorMistyRose, true); - b2TracyCZoneNC(prepare_islands, "Prepare Islands", b2_colorDarkSalmon, true); b2Timer timer = b2CreateTimer(); - b2Array_Clear(world->splitIslandArray); world->stepId += 1; - // Prepare contact and shape bit-sets - int32_t contactCapacity = world->contactPool.capacity; - int32_t shapeCapacity = world->shapePool.capacity; - for (uint32_t i = 0; i < world->workerCount; ++i) - { - b2SetBitCountAndClear(&world->taskContextArray[i].awakeContactBitSet, contactCapacity); - b2SetBitCountAndClear(&world->taskContextArray[i].shapeBitSet, shapeCapacity); - } - b2MergeAwakeIslands(world); - // Careful, this is modified by island merging - int32_t count = b2Array(world->awakeIslandArray).count; - - int32_t fastBodyCapacity = 0; - b2Island** islands = b2AllocateStackItem(world->stackAllocator, count * sizeof(b2Island*), "island array"); - for (int32_t i = 0; i < count; ++i) - { - b2Island* island = world->islands + world->awakeIslandArray[i]; - B2_ASSERT(island->awakeIndex == i); - islands[i] = island; - fastBodyCapacity += island->bodyCount; - } + world->profile.buildIslands = b2GetMillisecondsAndReset(&timer); - world->fastBodyCapacity = fastBodyCapacity; - world->fastBodyCount = 0; - world->fastBodies = b2AllocateStackItem(world->stackAllocator, fastBodyCapacity * sizeof(int32_t), "fast bodies"); + b2TracyCZoneNC(graph_solver, "Graph", b2_colorSeaGreen, true); - // Sort islands to improve task distribution - b2SortIslands(world, islands, count); + // Solve constraints using graph coloring + b2SolveGraph(world, context); - // Now create the island solvers - for (int32_t i = 0; i < count; ++i) - { - b2PrepareIsland(islands[i], context); - } + b2TracyCZoneEnd(graph_solver); - b2TracyCZoneEnd(prepare_islands); + world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); - world->profile.buildIslands = b2GetMillisecondsAndReset(&timer); + b2TracyCZoneNC(awake_islands, "Awake Islands", b2_colorGainsboro, true); - b2TracyCZoneNC(island_solver, "Island Solver", b2_colorSeaGreen, true); + // TODO_ERIN this code is related to body finalization b2SolveGraph. Reorganize? - if (b2_parallel) + // Prepare awake contact bit set so that putting islands to sleep can clear bits + // for the associated contacts. + b2BitSet* awakeContactBitSet = &world->taskContextArray[0].awakeContactBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) { - int32_t minRange = b2_islandMinRange; - void* userIslandTask = world->enqueueTaskFcn(&b2IslandParallelForTask, count, minRange, world, world->userTaskContext); - world->finishTaskFcn(userIslandTask, world->userTaskContext); + b2InPlaceUnion(awakeContactBitSet, &world->taskContextArray[i].awakeContactBitSet); + } - // Finish the user tree task that was queued early in the time step - if (world->userTreeTask != NULL) + { + b2BitSet* awakeIslandBitSet = &world->taskContextArray[0].awakeIslandBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) { - world->finishTaskFcn(world->userTreeTask, world->userTaskContext); + b2InPlaceUnion(awakeIslandBitSet, &world->taskContextArray[i].awakeIslandBitSet); } - world->userTreeTask = NULL; - } - else - { - b2IslandParallelForTask(0, count, 0, world); - } + b2Body* bodies = world->bodies; + b2Contact* contacts = world->contacts; + b2Joint* joints = world->joints; + b2Island* islands = world->islands; - b2ValidateNoEnlarged(&world->broadPhase); + int32_t count = b2Array(world->awakeIslandArray).count; + for (int32_t i = 0; i < count; ++i) + { + int32_t islandIndex = world->awakeIslandArray[i]; + if (b2GetBit(awakeIslandBitSet, islandIndex) == true) + { + continue; + } - b2TracyCZoneEnd(island_solver); + // Put island to sleep + b2Island* island = islands + islandIndex; + island->awakeIndex = B2_NULL_INDEX; - world->profile.solveIslands = b2GetMillisecondsAndReset(&timer); + // Put contacts to sleep. Remember only touching contacts are in the island. + // So a body may have more contacts than those in the island. + // This is expensive on the main thread, but this only happens when an island goes + // to sleep. + int32_t bodyIndex = island->headBody; + while (bodyIndex != B2_NULL_INDEX) + { + b2Body* body = bodies + bodyIndex; + int32_t contactKey = body->contactList; + while (contactKey != B2_NULL_INDEX) + { + int32_t contactIndex = contactKey >> 1; + int32_t edgeIndex = contactKey & 1; + b2Contact* contact = contacts + contactIndex; - b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); + // IMPORTANT: clear awake contact bit + b2ClearBit(awakeContactBitSet, contactIndex); - b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true); + contactKey = contact->edges[edgeIndex].nextKey; + } - // Enlarge broad-phase proxies and build move array - { - b2BroadPhase* broadPhase = &world->broadPhase; + bodyIndex = body->islandNext; + } - // Gather bits for all shapes that have enlarged AABBs - b2BitSet* bitSet = &world->taskContextArray[0].shapeBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].shapeBitSet); + // Remove edges from graph + int32_t contactIndex = island->headContact; + while (contactIndex != B2_NULL_INDEX) + { + b2Contact* contact = contacts + contactIndex; + b2RemoveContactFromGraph(world, contact); + + contactIndex = contact->islandNext; + } + + int32_t jointIndex = island->headJoint; + while (jointIndex != B2_NULL_INDEX) + { + b2Joint* joint = joints + jointIndex; + // TODO_JOINT_GRAPH + // b2RemoveJointFromGraph(world, joint); + jointIndex = joint->islandNext; + } } - // Apply shape AABB changes to broadphase. This also create the move array which must be - // ordered to ensure determinism. - b2Shape* shapes = world->shapes; + // Clear awake island array + b2Array_Clear(world->awakeIslandArray); + + // Use bitSet to build awake island array. No need to add edges. uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; + uint32_t wordCount = awakeIslandBitSet->wordCount; + uint64_t* bits = awakeIslandBitSet->bits; + int32_t awakeIndex = 0; for (uint32_t k = 0; k < wordCount; ++k) { word = bits[k]; while (word != 0) { uint32_t ctz = b2CTZ(word); - uint32_t shapeIndex = 64 * k + ctz; + uint32_t islandIndex = 64 * k + ctz; - b2Shape* shape = shapes + shapeIndex; - B2_ASSERT(b2ObjectValid(&shape->object)); - if (shape->isFast == false) - { - b2BroadPhase_EnlargeProxy(broadPhase, shape->proxyKey, shape->fatAABB); - } - else - { - // Shape is fast. It's aabb will be enlarged in continuous collision. - b2BufferMove(broadPhase, shape->proxyKey); - } + B2_ASSERT(b2ObjectValid(&islands[islandIndex].object)); + + b2Array_Push(world->awakeIslandArray, islandIndex); + + // Reference index. This tells the island and bodies they are awake. + islands[islandIndex].awakeIndex = awakeIndex; + awakeIndex += 1; // Clear the smallest set bit word = word & (word - 1); @@ -745,18 +736,25 @@ static void b2Solve(b2World* world, b2StepContext* context) } } - b2TracyCZoneEnd(enlarge_proxies); +#if B2_VALIDATE + for (int32_t i = 0; i < world->islandPool.capacity; ++i) + { + b2Island* island = world->islands + i; + if (b2ObjectValid(&island->object) == false) + { + continue; + } + + b2ValidateIsland(island, true); + } +#endif + + b2TracyCZoneEnd(awake_islands); b2TracyCZoneNC(awake_contacts, "Awake Contacts", b2_colorYellowGreen, true); // Build awake contact array { - b2BitSet* bitSet = &world->taskContextArray[0].awakeContactBitSet; - for (uint32_t i = 1; i < world->workerCount; ++i) - { - b2InPlaceUnion(bitSet, &world->taskContextArray[i].awakeContactBitSet); - } - b2Array_Clear(world->awakeContactArray); int32_t* contactAwakeIndexArray = world->contactAwakeIndexArray; @@ -765,8 +763,8 @@ static void b2Solve(b2World* world, b2StepContext* context) // The order of the awake contact array doesn't matter, but I don't want duplicates. It is possible // that body A or body B or both bodies wake the contact. uint64_t word; - uint32_t wordCount = bitSet->wordCount; - uint64_t* bits = bitSet->bits; + uint32_t wordCount = awakeContactBitSet->wordCount; + uint64_t* bits = awakeContactBitSet->bits; for (uint32_t k = 0; k < wordCount; ++k) { word = bits[k]; @@ -791,45 +789,68 @@ static void b2Solve(b2World* world, b2StepContext* context) b2TracyCZoneEnd(awake_contacts); - b2TracyCZoneNC(complete_island, "Complete Island", b2_colorBlueViolet, true); - - // Complete islands (reverse order for stack allocator) - // This rebuilds the awake island array and awake contact array - b2Array_Clear(world->awakeIslandArray); - - for (int32_t i = count - 1; i >= 0; --i) + // Finish the user tree task that was queued early in the time step. This must be done before touching the broadphase. + if (b2_parallel) { - b2Island* island = islands[i]; - if (island->object.index == world->splitIslandIndex) - { - b2CompleteBaseSplitIsland(island); - } - else + if (world->userTreeTask != NULL) { - b2CompleteIsland(island); + world->finishTaskFcn(world->userTreeTask, world->userTaskContext); + world->userTreeTask = NULL; } } - // Handle islands created from splitting - if (world->splitIslandIndex != B2_NULL_INDEX) + b2ValidateNoEnlarged(&world->broadPhase); + + b2TracyCZoneNC(broad_phase, "Broadphase", b2_colorPurple, true); + + b2TracyCZoneNC(enlarge_proxies, "Enlarge Proxies", b2_colorDarkTurquoise, true); + + // Enlarge broad-phase proxies and build move array { - b2Island* baseIsland = world->islands + world->splitIslandIndex; - int32_t splitCount = b2Array(world->splitIslandArray).count; - for (int32_t i = 0; i < splitCount; ++i) + b2BroadPhase* broadPhase = &world->broadPhase; + + // Gather bits for all shapes that have enlarged AABBs + b2BitSet* bitSet = &world->taskContextArray[0].shapeBitSet; + for (uint32_t i = 1; i < world->workerCount; ++i) { - int32_t index = world->splitIslandArray[i]; - b2Island* splitIsland = world->islands + index; - b2CompleteSplitIsland(splitIsland); + b2InPlaceUnion(bitSet, &world->taskContextArray[i].shapeBitSet); } - // Done with the base split island. - b2DestroyIsland(baseIsland); - b2FreeObject(&world->islandPool, &baseIsland->object); + // Apply shape AABB changes to broadphase. This also create the move array which must be + // ordered to ensure determinism. + b2Shape* shapes = world->shapes; + uint64_t word; + uint32_t wordCount = bitSet->wordCount; + uint64_t* bits = bitSet->bits; + for (uint32_t k = 0; k < wordCount; ++k) + { + word = bits[k]; + while (word != 0) + { + uint32_t ctz = b2CTZ(word); + uint32_t shapeIndex = 64 * k + ctz; + + b2Shape* shape = shapes + shapeIndex; + B2_ASSERT(b2ObjectValid(&shape->object)); + if (shape->isFast == false) + { + b2BroadPhase_EnlargeProxy(broadPhase, shape->proxyKey, shape->fatAABB); + } + else + { + // Shape is fast. It's aabb will be enlarged in continuous collision. + b2BufferMove(broadPhase, shape->proxyKey); + } + + // Clear the smallest set bit + word = word & (word - 1); + } + } } - b2ValidateBroadphase(&world->broadPhase); + b2TracyCZoneEnd(enlarge_proxies); - b2TracyCZoneEnd(complete_island); + b2ValidateBroadphase(&world->broadPhase); world->profile.broadphase = b2GetMilliseconds(&timer); @@ -903,8 +924,6 @@ static void b2Solve(b2World* world, b2StepContext* context) world->profile.continuous = b2GetMilliseconds(&timer); - b2FreeStackItem(world->stackAllocator, islands); - b2TracyCZoneEnd(solve); } @@ -954,7 +973,7 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, context.dtRatio = world->inv_dt0 * timeStep; context.restitutionThreshold = world->restitutionThreshold; - context.warmStarting = world->warmStarting; + context.enableWarmStarting = world->enableWarmStarting; context.bodies = world->bodies; context.bodyCapacity = world->bodyPool.capacity; @@ -982,16 +1001,17 @@ void b2World_Step(b2WorldId worldId, float timeStep, int32_t velocityIterations, world->profile.step = b2GetMilliseconds(&stepTimer); - B2_ASSERT(b2GetStackAllocation(world->stackAllocator) == 0); - - // Ensure stack is large enough - b2GrowStack(world->stackAllocator); - if (b2_parallel) { + // This finishes tree rebuild and split island tasks world->finishAllTasksFcn(world->userTaskContext); } + B2_ASSERT(b2GetStackAllocation(world->stackAllocator) == 0); + + // Ensure stack is large enough + b2GrowStack(world->stackAllocator); + b2TracyCZoneEnd(world_step); } @@ -1166,6 +1186,10 @@ void b2World_Draw(b2WorldId worldId, b2DebugDraw* draw) continue; } + char buffer[32]; + sprintf(buffer, "%d", b->object.index); + draw->DrawString(b->position, buffer, draw->context); + int32_t shapeIndex = b->shapeList; while (shapeIndex != B2_NULL_INDEX) { @@ -1200,15 +1224,28 @@ void b2World_Draw(b2WorldId worldId, b2DebugDraw* draw) //} } - // if (flags & b2Draw::e_centerOfMassBit) - //{ - // for (b2Body* b = m_bodyList; b; b = b->GetNext()) - // { - // b2Transform xf = b->GetTransform(); - // xf.p = b->GetWorldCenter(); - // m_debugDraw->DrawTransform(xf); - // } - // } + if (draw->drawMass) + { + b2Vec2 offset = {0.1f, 0.1f}; + b2Body* bodies = world->bodies; + int32_t bodyCapacity = world->bodyPool.capacity; + for (int32_t i = 0; i < bodyCapacity; ++i) + { + b2Body* body = bodies + i; + if (b2ObjectValid(&body->object) == false) + { + continue; + } + + draw->DrawTransform(body->transform, draw->context); + + b2Vec2 p = b2TransformPoint(body->transform, offset); + + char buffer[32]; + sprintf(buffer, "%.1f", body->mass); + draw->DrawString(p, buffer, draw->context); + } + } } void b2World_EnableSleeping(b2WorldId worldId, bool flag) @@ -1242,7 +1279,19 @@ void b2World_EnableSleeping(b2WorldId worldId, bool flag) } } -void b2World_EnableContinuo(b2WorldId worldId, bool flag) +void b2World_EnableWarmStarting(b2WorldId worldId, bool flag) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->enableWarmStarting = flag; +} + +void b2World_EnableContinuous(b2WorldId worldId, bool flag) { b2World* world = b2GetWorldFromId(worldId); B2_ASSERT(world->locked == false); @@ -1254,6 +1303,42 @@ void b2World_EnableContinuo(b2WorldId worldId, bool flag) world->enableContinuous = flag; } +void b2World_SetRestitutionThreshold(b2WorldId worldId, float value) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->restitutionThreshold = B2_CLAMP(value, 0.0f, FLT_MAX); +} + +void b2World_SetMaximumPushoutVelocity(b2WorldId worldId, float value) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->maximumPushoutVelocity = B2_CLAMP(value, 0.0f, FLT_MAX); +} + +void b2World_SetContactHertz(b2WorldId worldId, float value) +{ + b2World* world = b2GetWorldFromId(worldId); + B2_ASSERT(world->locked == false); + if (world->locked) + { + return; + } + + world->contactHertz = B2_CLAMP(value, 0.0f, FLT_MAX); +} + b2Profile b2World_GetProfile(b2WorldId worldId) { b2World* world = b2GetWorldFromId(worldId); @@ -1275,6 +1360,10 @@ b2Statistics b2World_GetStatistics(b2WorldId worldId) s.stackCapacity = b2GetStackCapacity(world->stackAllocator); s.stackUsed = b2GetMaxStackAllocation(world->stackAllocator); s.byteCount = b2GetByteCount(); + for (int32_t i = 0; i <= b2_graphColorCount; ++i) + { + s.colorCounts[i] = world->graph.occupancy[i]; + } return s; } diff --git a/src/world.h b/src/world.h index 09e79151..81d6d5db 100644 --- a/src/world.h +++ b/src/world.h @@ -6,11 +6,14 @@ #include "bitset.h" #include "broad_phase.h" #include "island.h" +#include "graph.h" #include "pool.h" #include "box2d/callbacks.h" #include "box2d/timer.h" +#define B2_GRAPH_COLOR 1 + typedef struct b2Contact b2Contact; // Per thread task storage @@ -26,6 +29,8 @@ typedef struct b2TaskContext // Used to sort shapes that have enlarged AABBs b2BitSet shapeBitSet; + // Used to wake islands + b2BitSet awakeIslandBitSet; } b2TaskContext; /// The world class manages all physics entities, dynamic simulation, @@ -39,6 +44,7 @@ typedef struct b2World struct b2StackAllocator* stackAllocator; b2BroadPhase broadPhase; + b2Graph graph; b2Pool bodyPool; b2Pool contactPool; @@ -68,15 +74,8 @@ typedef struct b2World // Hot data split from b2Contact int32_t* contactAwakeIndexArray; - // This transient array holds islands created from splitting a larger island. - int32_t* splitIslandArray; - - // Transient index of the island being split this time step. May be B2_NULL_INDEX. - int32_t splitIslandIndex; - // Array of fast bodies that need continuous collision handling int32_t* fastBodies; - int32_t fastBodyCapacity; _Atomic int fastBodyCount; // Id that is incremented every time step @@ -84,6 +83,8 @@ typedef struct b2World b2Vec2 gravity; float restitutionThreshold; + float maximumPushoutVelocity; + float contactHertz; // This is used to compute the time step ratio to support a variable time step. float inv_dt0; @@ -106,9 +107,11 @@ typedef struct b2World void* userTreeTask; + int32_t splitIslandIndex; + bool enableSleep; bool locked; - bool warmStarting; + bool enableWarmStarting; bool enableContinuous; } b2World; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5f073571..948339b1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2,6 +2,7 @@ set(BOX2D_TESTS main.c + test_bitset.c test_collision.c test_determinism.c test_distance.c diff --git a/test/main.c b/test/main.c index 29b29cd2..48044774 100644 --- a/test/main.c +++ b/test/main.c @@ -18,6 +18,7 @@ //} #endif +extern int BitSetTest(); extern int MathTest(); extern int CollisionTest(); extern int DeterminismTest(); @@ -47,6 +48,7 @@ int main(void) RUN_TEST(HelloWorld); RUN_TEST(ShapeTest); RUN_TEST(TableTest); + RUN_TEST(BitSetTest); printf("======================================\n"); printf("All Box2D tests passed!\n"); diff --git a/test/test_bitset.c b/test/test_bitset.c new file mode 100644 index 00000000..fdec9a0a --- /dev/null +++ b/test/test_bitset.c @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: 2023 Erin Catto +// SPDX-License-Identifier: MIT + +#include "test_macros.h" +#include "bitset.h" +#include "box2d/timer.h" + +#define COUNT 169 + +int BitSetTest() +{ + b2BitSet bitSet = b2CreateBitSet(COUNT); + + b2SetBitCountAndClear(&bitSet, COUNT); + bool values[COUNT] = {false}; + + int32_t i1 = 0, i2 = 1; + b2SetBit(&bitSet, i1); + values[i1] = true; + + while (i2 < COUNT) + { + b2SetBit(&bitSet, i2); + values[i2] = true; + int32_t next = i1 + i2; + i1 = i2; + i2 = next; + } + + for (int32_t i = 0; i < COUNT; ++i) + { + bool value = b2GetBit(&bitSet, i); + ENSURE(value == values[i]); + } + + b2DestroyBitSet(&bitSet); + + return 0; +} diff --git a/test/test_determinism.c b/test/test_determinism.c index 80c787e5..2eb26787 100644 --- a/test/test_determinism.c +++ b/test/test_determinism.c @@ -116,6 +116,8 @@ void TiltedStacks(int testIndex, int workerCount) worldDef.finishAllTasks = FinishAllTasks; worldDef.workerCount = workerCount; worldDef.enableSleep = false; + worldDef.bodyCapacity = 1024; + worldDef.contactCapacity = 4 * 1024; b2WorldId worldId = b2CreateWorld(&worldDef);