Skip to content

Commit

Permalink
Custom SSE2 and Neon (#772)
Browse files Browse the repository at this point in the history
- hand written SSE2, Neon, and scalar SIMD
- improved performance of SSE2 (x64) and Neon (ARM)
- removed SIMDE library
- SSE2 is the default on x64
  • Loading branch information
erincatto authored Aug 19, 2024
1 parent 2b880d1 commit 0e333ff
Show file tree
Hide file tree
Showing 63 changed files with 1,355 additions and 42,204 deletions.
2 changes: 1 addition & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ IncludeCategories:

IndentExternBlock: NoIndent
IndentCaseLabels: true
IndentPPDirectives: BeforeHash
#IndentPPDirectives: None
IndentAccessModifiers: false
AccessModifierOffset: -4

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ jobs:

- name: Configure CMake
# some problem with simde
# run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBOX2D_SANITIZE=ON -DBUILD_SHARED_LIBS=OFF
# run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBOX2D_SAMPLES=OFF -DBUILD_SHARED_LIBS=OFF

- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
Expand Down
16 changes: 12 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
cmake_minimum_required(VERSION 3.22)
include(FetchContent)
include(CMakeDependentOption)

project(box2d
VERSION 3.0.0
VERSION 3.0.1
DESCRIPTION "A 2D physics engine for games"
HOMEPAGE_URL "https://box2d.org"
LANGUAGES C CXX
Expand Down Expand Up @@ -34,10 +35,13 @@ if (MSVC OR APPLE)
endif()
endif()

option(BOX2D_ENABLE_SIMD "Enable SIMD math (faster)" ON)

if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
option(BOX2D_AVX2 "Enable AVX2 (faster)" ON)
cmake_dependent_option(BOX2D_AVX2 "Enable AVX2" OFF "BOX2D_ENABLE_SIMD" OFF)
endif()


if(PROJECT_IS_TOP_LEVEL)
# Needed for samples.exe to find box2d.dll
# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/bin")
Expand All @@ -52,8 +56,6 @@ set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set(CMAKE_VERBOSE_MAKEFILE ON)

# The Box2D library uses simde https://github.com/simd-everywhere/simde
add_subdirectory(extern/simde)
add_subdirectory(src)

# This hides samples, test, and doxygen from apps that use box2d via FetchContent
Expand Down Expand Up @@ -95,6 +97,7 @@ if(PROJECT_IS_TOP_LEVEL)
if(NOT BUILD_SHARED_LIBS AND BOX2D_UNIT_TESTS)
message(STATUS "Adding Box2D unit tests")
add_subdirectory(test)
set_target_properties(test PROPERTIES XCODE_GENERATE_SCHEME TRUE)
else()
message(STATUS "Skipping Box2D unit tests")
endif()
Expand All @@ -107,10 +110,15 @@ if(PROJECT_IS_TOP_LEVEL)
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT samples)
set_property(TARGET samples PROPERTY VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}")
endif()

set_target_properties(samples PROPERTIES
XCODE_GENERATE_SCHEME TRUE
XCODE_SCHEME_WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}")
endif()

if(BOX2D_BENCHMARKS)
add_subdirectory(benchmark)
set_target_properties(benchmark PROPERTIES XCODE_GENERATE_SCHEME TRUE)
endif()

if(BOX2D_DOCS)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Box2D is a 2D physics engine for games.
- cmake -G Xcode ..
- open box2d.xcodeproj
- Select the samples scheme
- Edit the scheme to set a custom working directory, make this be in box2d/samples
- Edit the scheme to set a custom working directory to the box2d directory
- You can now build and run the samples

## Compatibility
Expand Down
2 changes: 1 addition & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ if(MSVC)
# target_compile_options(benchmark PRIVATE /experimental:c11atomics)
endif()

target_link_libraries(benchmark PRIVATE box2d enkiTS simde)
target_link_libraries(benchmark PRIVATE box2d enkiTS)
16 changes: 8 additions & 8 deletions benchmark/amd7950x/joint_grid.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,331.343
2,638.04
3,932.731
4,1200.15
5,1480.23
6,1718.79
7,1930.12
8,2133.65
1,333.121
2,638.057
3,928.95
4,1205.85
5,1479.54
6,1699.99
7,1974.84
8,2043.64
16 changes: 8 additions & 8 deletions benchmark/amd7950x/large_pyramid.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,325.705
2,616.127
3,886.575
4,1118.85
5,1331.22
6,1498.6
7,1685.28
8,1728.1
1,336.895
2,602.665
3,878.207
4,1117.02
5,1304.79
6,1482.92
7,1663.97
8,1661.13
16 changes: 8 additions & 8 deletions benchmark/amd7950x/many_pyramids.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,82.8619
2,160.906
3,236.027
4,300.688
5,368.315
6,429.822
7,498.81
8,549.271
1,84.8025
2,163.264
3,234.388
4,305.216
5,369.85
6,434.45
7,497.573
8,525.427
16 changes: 8 additions & 8 deletions benchmark/amd7950x/smash.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,173.898
2,277.19
3,357.566
4,430.528
5,483.446
6,525.652
7,566.859
8,598.553
1,174.051
2,276.742
3,352.751
4,421.773
5,479.049
6,522.318
7,556.193
8,586.672
16 changes: 8 additions & 8 deletions benchmark/amd7950x/tumbler.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,373.066
2,581.852
3,764.444
4,902.898
5,1044.99
6,1143.44
7,1229.87
8,1299.61
1,376.3
2,576.749
3,737.749
4,883.315
5,1024.69
6,1120.48
7,1197.2
8,1212.85
9 changes: 9 additions & 0 deletions benchmark/amd7950x_float/joint_grid.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
threads,fps
1,362.12
2,685.873
3,998.169
4,1274.09
5,1590.5
6,1841.48
7,2036.83
8,2152.76
9 changes: 9 additions & 0 deletions benchmark/amd7950x_float/large_pyramid.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
threads,fps
1,148.238
2,279.403
3,407.797
4,524.174
5,635.423
6,716.434
7,799.394
8,880.242
9 changes: 9 additions & 0 deletions benchmark/amd7950x_float/many_pyramids.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
threads,fps
1,38.1845
2,73.9263
3,108.337
4,139.456
5,171.725
6,198.861
7,229.515
8,253.222
9 changes: 9 additions & 0 deletions benchmark/amd7950x_float/smash.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
threads,fps
1,130.637
2,210.938
3,275.828
4,341.204
5,386.281
6,426.426
7,452.909
8,467.611
9 changes: 9 additions & 0 deletions benchmark/amd7950x_float/tumbler.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
threads,fps
1,247.067
2,403.606
3,523.705
4,629.426
5,734.511
6,800.338
7,857.235
8,898.919
16 changes: 8 additions & 8 deletions benchmark/amd7950x_sse2/joint_grid.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,357.551
2,691.193
3,1010.45
4,1317.42
5,1590.65
6,1858.78
7,2074.2
8,2261.67
1,360.077
2,687.48
3,998.479
4,1261.45
5,1581.53
6,1825.64
7,2067.46
8,2216.48
16 changes: 8 additions & 8 deletions benchmark/amd7950x_sse2/large_pyramid.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,186.185
2,351.045
3,511.316
4,636.035
5,765.404
6,875.296
7,991.353
8,961.402
1,288.876
2,527.399
3,769.81
4,982.428
5,1151.91
6,1323.49
7,1474.09
8,1552.6
16 changes: 8 additions & 8 deletions benchmark/amd7950x_sse2/many_pyramids.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,48.5561
2,92.6231
3,137.175
4,176.644
5,214.941
6,253.39
7,288.631
8,312.527
1,75.3333
2,141.977
3,205.225
4,266.523
5,330.244
6,380.809
7,433.287
8,482.241
16 changes: 8 additions & 8 deletions benchmark/amd7950x_sse2/smash.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,142.532
2,228.987
3,299.951
4,364.679
5,413.564
6,453.351
7,489.239
8,519.379
1,165.538
2,263.517
3,338.066
4,405.629
5,461.45
6,506.119
7,540.182
8,563.682
16 changes: 8 additions & 8 deletions benchmark/amd7950x_sse2/tumbler.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
threads,fps
1,276.905
2,453.522
3,592.946
4,702.383
5,826.52
6,919.179
7,1009.05
8,1062.61
1,326.657
2,521.743
3,671.396
4,805.81
5,928.274
6,1019.45
7,1082.76
8,1109.95
Loading

0 comments on commit 0e333ff

Please sign in to comment.