From 08630f5c04f15fda7243650ef104338db52e1be0 Mon Sep 17 00:00:00 2001 From: Exverge Date: Mon, 1 Jul 2024 19:16:51 -0400 Subject: [PATCH 01/28] CI: Add macOS arm64 build --- .github/workflows/build.yml | 99 ++++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a2342c27c..14b16f674 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -204,7 +204,7 @@ jobs: name: cemu-bin-windows-x64 path: ./bin/Cemu.exe - build-macos: + build-macos-intel: runs-on: macos-12 steps: - name: "Checkout repo" @@ -239,12 +239,7 @@ jobs: - name: "Install system dependencies" run: | brew update - brew install llvm@15 ninja nasm molten-vk automake libtool - - - name: "Setup cmake" - uses: jwlawson/actions-setup-cmake@v2 - with: - cmake-version: '3.29.0' + brew install llvm@15 ninja nasm molten-vk automake libtool cmake - name: "Bootstrap vcpkg" run: | @@ -298,3 +293,93 @@ jobs: with: name: cemu-bin-macos-x64 path: ./bin/Cemu.dmg + + build-macos-arm64: + runs-on: macos-14 + steps: + - name: "Checkout repo" + uses: actions/checkout@v4 + with: + submodules: "recursive" + + - name: "Fetch full history for vcpkg submodule" + run: | + cd dependencies/vcpkg + git fetch --unshallow + + - name: Setup release mode parameters (for deploy) + if: ${{ inputs.deploymode == 'release' }} + run: | + echo "BUILD_MODE=release" >> $GITHUB_ENV + echo "BUILD_FLAGS=" >> $GITHUB_ENV + echo "Build mode is release" + - name: Setup debug mode parameters (for continous build) + if: ${{ inputs.deploymode != 'release' }} + run: | + echo "BUILD_MODE=debug" >> $GITHUB_ENV + echo "BUILD_FLAGS=" >> $GITHUB_ENV + echo "Build mode is debug" + + - name: Setup version for experimental + if: ${{ inputs.experimentalversion != '' }} + run: | + echo "[INFO] Experimental version ${{ inputs.experimentalversion }}" + echo "BUILD_FLAGS=${{ env.BUILD_FLAGS }} -DEXPERIMENTAL_VERSION=${{ inputs.experimentalversion }}" >> $GITHUB_ENV + + - name: "Install system dependencies" + run: | + brew update + brew install llvm@15 ninja nasm molten-vk automake libtool cmake + + - name: "Bootstrap vcpkg" + run: | + bash ./dependencies/vcpkg/bootstrap-vcpkg.sh + + - name: 'Setup NuGet Credentials for vcpkg' + shell: 'bash' + run: | + mono `./dependencies/vcpkg/vcpkg fetch nuget | tail -n 1` \ + sources add \ + -source "https://nuget.pkg.github.com/${{ github.repository_owner }}/index.json" \ + -storepasswordincleartext \ + -name "GitHub" \ + -username "${{ github.repository_owner }}" \ + -password "${{ secrets.GITHUB_TOKEN }}" + mono `./dependencies/vcpkg/vcpkg fetch nuget | tail -n 1` \ + setapikey "${{ secrets.GITHUB_TOKEN }}" \ + -source "https://nuget.pkg.github.com/${{ github.repository_owner }}/index.json" + + - name: "cmake" + run: | + mkdir build + cd build + cmake .. ${{ env.BUILD_FLAGS }} \ + -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \ + -DMACOS_BUNDLE=ON \ + -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm@15/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@15/bin/clang++ \ + -G Ninja + + - name: "Build Cemu" + run: | + cmake --build build + + - name: Prepare artifact + if: ${{ inputs.deploymode == 'release' }} + run: | + mkdir bin/Cemu_app + mv bin/Cemu_release.app bin/Cemu_app/Cemu.app + mv bin/Cemu_app/Cemu.app/Contents/MacOS/Cemu_release bin/Cemu_app/Cemu.app/Contents/MacOS/Cemu + sed -i '' 's/Cemu_release/Cemu/g' bin/Cemu_app/Cemu.app/Contents/Info.plist + chmod a+x bin/Cemu_app/Cemu.app/Contents/MacOS/{Cemu,update.sh} + ln -s /Applications bin/Cemu_app/Applications + hdiutil create ./bin/tmp.dmg -ov -volname "Cemu" -fs HFS+ -srcfolder "./bin/Cemu_app" + hdiutil convert ./bin/tmp.dmg -format UDZO -o bin/Cemu.dmg + rm bin/tmp.dmg + + - name: Upload artifact + uses: actions/upload-artifact@v4 + if: ${{ inputs.deploymode == 'release' }} + with: + name: cemu-bin-macos-arm64 + path: ./bin/Cemu.dmg From d2a9c317d6f8cd66f76213b42ce93248c1b788fe Mon Sep 17 00:00:00 2001 From: Exverge Date: Mon, 1 Jul 2024 17:46:22 -0400 Subject: [PATCH 02/28] ih264d: Modify to compile with AppleClang & for M1 --- dependencies/ih264d/CMakeLists.txt | 2 +- .../armv8/ih264_intra_pred_chroma_av8.s | 19 ++++-- .../armv8/ih264_intra_pred_luma_16x16_av8.s | 11 +++- .../armv8/ih264_intra_pred_luma_8x8_av8.s | 11 +++- .../ih264d/common/ih264_deblk_edge_filters.h | 45 +++++++------- .../ih264d/common/ih264_inter_pred_filters.h | 32 ++++++---- .../ih264d/common/ih264_intra_pred_filters.h | 59 ++++++++++--------- dependencies/ih264d/common/ih264_padding.h | 15 +++-- .../common/ih264_trans_quant_itrans_iquant.h | 30 ++++++---- .../ih264d/common/ih264_weighted_pred.h | 18 ++++-- 10 files changed, 149 insertions(+), 93 deletions(-) diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt index d97d6ddab..626424f58 100644 --- a/dependencies/ih264d/CMakeLists.txt +++ b/dependencies/ih264d/CMakeLists.txt @@ -140,7 +140,7 @@ target_sources(ih264d PRIVATE "decoder/x86/ih264d_function_selector_sse42.c" "decoder/x86/ih264d_function_selector_ssse3.c" ) -elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") enable_language( C CXX ASM ) set(LIBAVCDEC_ARM_INCLUDES "common/armv8" "decoder/arm") include_directories("common/" "decoder/" ${LIBAVCDEC_ARM_INCLUDES}) diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s index 39c02560f..fa4c07c5b 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s @@ -53,10 +53,13 @@ .text .p2align 2 .include "ih264_neon_macros.s" - +#ifdef __APPLE__ +.extern _ih264_gai1_intrapred_chroma_plane_coeffs1 +.extern _ih264_gai1_intrapred_chroma_plane_coeffs2 +#else .extern ih264_gai1_intrapred_chroma_plane_coeffs1 .extern ih264_gai1_intrapred_chroma_plane_coeffs2 - +#endif ///** @@ -429,8 +432,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: rev64 v7.4h, v2.4h ld1 {v3.2s}, [x10] sub x5, x3, #8 +#ifdef __APPLE__ + adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1] +#endif usubl v10.8h, v5.8b, v1.8b ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3 mov v8.d[1], v9.d[0] @@ -484,10 +492,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: zip1 v1.8h, v0.8h, v2.8h zip2 v2.8h, v0.8h, v2.8h mov v0.16b, v1.16b - +#ifdef __APPLE__ + adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2] - +#endif ld1 {v8.2s, v9.2s}, [x12] mov v8.d[1], v9.d[0] mov v10.16b, v8.16b diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s index fa19c1213..4d45e5af2 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s @@ -53,7 +53,11 @@ .text .p2align 2 .include "ih264_neon_macros.s" +#ifdef __APPLE__ +.extern _ih264_gai1_intrapred_luma_plane_coeffs +#else .extern ih264_gai1_intrapred_luma_plane_coeffs +#endif @@ -431,10 +435,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8: mov x10, x1 //top_left mov x4, #-1 ld1 {v2.2s}, [x1], x8 - +#ifdef __APPLE__ + adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE + ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF] +#else adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] - +#endif ld1 {v0.2s}, [x1] rev64 v2.8b, v2.8b ld1 {v6.2s, v7.2s}, [x7] diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s index 273aa81b8..efc30c03d 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s @@ -56,8 +56,11 @@ .text .p2align 2 .include "ih264_neon_macros.s" - +#ifdef __APPLE__ +.extern _ih264_gai1_intrapred_luma_8x8_horz_u +#else .extern ih264_gai1_intrapred_luma_8x8_horz_u +#endif @@ -1029,9 +1032,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8: mov v3.d[0], v2.d[1] ext v4.16b, v2.16b , v2.16b , #1 mov v5.d[0], v4.d[1] - +#ifdef __APPLE__ + adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] +#endif uaddl v20.8h, v0.8b, v2.8b uaddl v22.8h, v1.8b, v3.8b uaddl v24.8h, v2.8b, v4.8b diff --git a/dependencies/ih264d/common/ih264_deblk_edge_filters.h b/dependencies/ih264d/common/ih264_deblk_edge_filters.h index 4079dd2cc..9b3752079 100644 --- a/dependencies/ih264d/common/ih264_deblk_edge_filters.h +++ b/dependencies/ih264d/common/ih264_deblk_edge_filters.h @@ -40,6 +40,11 @@ /*****************************************************************************/ /* Extern Function Declarations */ /*****************************************************************************/ +#ifdef __APPLE__ +#define av8(name) name __asm__(#name) +#else +#define av8(name) name +#endif typedef void ih264_deblk_edge_bslt4_ft(UWORD8 *pu1_src, WORD32 src_strd, @@ -133,34 +138,34 @@ ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_a9; ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_a9; /*AV8*/ -ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_luma_vert_bs4_mbaff_av8; +ih264_deblk_edge_bs4_ft av8(ih264_deblk_luma_horz_bs4_av8); +ih264_deblk_edge_bs4_ft av8(ih264_deblk_luma_vert_bs4_av8); +ih264_deblk_edge_bs4_ft av8(ih264_deblk_luma_vert_bs4_mbaff_av8); -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_horz_bs4_bp_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_bp_av8; -ih264_deblk_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_bp_av8; +ih264_deblk_edge_bs4_ft av8(ih264_deblk_chroma_horz_bs4_bp_av8); +ih264_deblk_edge_bs4_ft av8(ih264_deblk_chroma_vert_bs4_bp_av8); +ih264_deblk_edge_bs4_ft av8(ih264_deblk_chroma_vert_bs4_mbaff_bp_av8); -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_horz_bslt4_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_luma_vert_bslt4_mbaff_av8; +ih264_deblk_edge_bslt4_ft av8(ih264_deblk_luma_horz_bslt4_av8); +ih264_deblk_edge_bslt4_ft av8(ih264_deblk_luma_vert_bslt4_av8); +ih264_deblk_edge_bslt4_ft av8(ih264_deblk_luma_vert_bslt4_mbaff_av8); -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_bp_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_bp_av8; -ih264_deblk_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8; +ih264_deblk_edge_bslt4_ft av8(ih264_deblk_chroma_horz_bslt4_bp_av8); +ih264_deblk_edge_bslt4_ft av8(ih264_deblk_chroma_vert_bslt4_bp_av8); +ih264_deblk_edge_bslt4_ft av8(ih264_deblk_chroma_vert_bslt4_mbaff_bp_av8); -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_av8; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_av8; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_vert_bs4_mbaff_av8; -ih264_deblk_chroma_edge_bs4_ft ih264_deblk_chroma_horz_bs4_mbaff_av8; +ih264_deblk_chroma_edge_bs4_ft av8(ih264_deblk_chroma_vert_bs4_av8); +ih264_deblk_chroma_edge_bs4_ft av8(ih264_deblk_chroma_horz_bs4_av8); +ih264_deblk_chroma_edge_bs4_ft av8(ih264_deblk_chroma_vert_bs4_mbaff_av8); +ih264_deblk_chroma_edge_bs4_ft av8(ih264_deblk_chroma_horz_bs4_mbaff_av8); -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_av8; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_av8; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_vert_bslt4_mbaff_av8; -ih264_deblk_chroma_edge_bslt4_ft ih264_deblk_chroma_horz_bslt4_mbaff_av8; +ih264_deblk_chroma_edge_bslt4_ft av8(ih264_deblk_chroma_vert_bslt4_av8); +ih264_deblk_chroma_edge_bslt4_ft av8(ih264_deblk_chroma_horz_bslt4_av8); +ih264_deblk_chroma_edge_bslt4_ft av8(ih264_deblk_chroma_vert_bslt4_mbaff_av8); +ih264_deblk_chroma_edge_bslt4_ft av8(ih264_deblk_chroma_horz_bslt4_mbaff_av8); /*SSE3*/ ih264_deblk_edge_bs4_ft ih264_deblk_luma_horz_bs4_ssse3; diff --git a/dependencies/ih264d/common/ih264_inter_pred_filters.h b/dependencies/ih264d/common/ih264_inter_pred_filters.h index c439ab842..3cc9b722c 100644 --- a/dependencies/ih264d/common/ih264_inter_pred_filters.h +++ b/dependencies/ih264d/common/ih264_inter_pred_filters.h @@ -100,6 +100,12 @@ extern const WORD32 ih264_g_six_tap[3];/* coefficients for 6 tap filtering*/ /* Extern Function Declarations */ /*****************************************************************************/ +#ifdef __APPLE__ +#define av8(name) name __asm__(#name) +#else +#define av8(name) name +#endif + typedef void ih264_inter_pred_luma_ft(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -186,31 +192,31 @@ ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_a9q; ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_a9q; /* AV8 NEON Declarations */ -ih264_inter_pred_luma_ft ih264_inter_pred_luma_copy_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_copy_av8); -ih264_interleave_copy_ft ih264_interleave_copy_av8; +ih264_interleave_copy_ft av8(ih264_interleave_copy_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_horz_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_vert_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_hpel_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_horz_hpel_vert_hpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_horz_qpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_vert_qpel_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_vert_qpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_qpel_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_horz_qpel_vert_qpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_qpel_vert_hpel_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_horz_qpel_vert_hpel_av8); -ih264_inter_pred_luma_ft ih264_inter_pred_luma_horz_hpel_vert_qpel_av8; +ih264_inter_pred_luma_ft av8(ih264_inter_pred_luma_horz_hpel_vert_qpel_av8); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_av8; +ih264_inter_pred_chroma_ft av8(ih264_inter_pred_chroma_av8); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dx_zero_av8; +ih264_inter_pred_chroma_ft av8(ih264_inter_pred_chroma_dx_zero_av8); -ih264_inter_pred_chroma_ft ih264_inter_pred_chroma_dy_zero_av8; +ih264_inter_pred_chroma_ft av8(ih264_inter_pred_chroma_dy_zero_av8); /* SSSE3 Intrinsic Declarations */ diff --git a/dependencies/ih264d/common/ih264_intra_pred_filters.h b/dependencies/ih264d/common/ih264_intra_pred_filters.h index caf6b3355..99857201c 100644 --- a/dependencies/ih264d/common/ih264_intra_pred_filters.h +++ b/dependencies/ih264d/common/ih264_intra_pred_filters.h @@ -46,6 +46,12 @@ #define FILT121(a,b,c) ((a + (b<<1) + c + 2)>>2) /*! Filter (1,1) i.e (a + b) / 2 */ #define FILT11(a,b) ((a + b + 1)>>1) + +#ifdef __APPLE__ +#define av8(name) name __asm__(#name) +#else +#define av8(name) name +#endif /*****************************************************************************/ /* Global Variables */ /*****************************************************************************/ @@ -60,7 +66,6 @@ extern const WORD8 ih264_gai1_intrapred_luma_8x8_horz_u[]; /* Extern Function Declarations */ /*****************************************************************************/ - typedef void ih264_intra_pred_ref_filtering_ft(UWORD8 *pu1_left, UWORD8 *pu1_topleft, UWORD8 *pu1_top, @@ -270,62 +275,62 @@ ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_ssse3; /* AV8 Definition */ /* Luma 4x4 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_vert_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_horz_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_dc_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_dc_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dl_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_diag_dl_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_diag_dr_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_diag_dr_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_r_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_vert_r_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_d_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_horz_d_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_vert_l_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_vert_l_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_4x4_mode_horz_u_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_4x4_mode_horz_u_av8); /* Luma 8x8 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_vert_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_horz_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_dc_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_dc_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dl_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_diag_dl_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_diag_dr_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_diag_dr_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_r_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_vert_r_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_d_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_horz_d_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_vert_l_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_vert_l_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_8x8_mode_horz_u_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_8x8_mode_horz_u_av8); /* Luma 16x16 Intra pred filters */ -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_vert_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_16x16_mode_vert_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_horz_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_16x16_mode_horz_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_dc_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_16x16_mode_dc_av8); -ih264_intra_pred_luma_ft ih264_intra_pred_luma_16x16_mode_plane_av8; +ih264_intra_pred_luma_ft av8(ih264_intra_pred_luma_16x16_mode_plane_av8); /* Chroma 8x8 Intra pred filters */ -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_dc_av8; +ih264_intra_pred_chroma_ft av8(ih264_intra_pred_chroma_8x8_mode_dc_av8); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_horz_av8; +ih264_intra_pred_chroma_ft av8(ih264_intra_pred_chroma_8x8_mode_horz_av8); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_vert_av8; +ih264_intra_pred_chroma_ft av8(ih264_intra_pred_chroma_8x8_mode_vert_av8); -ih264_intra_pred_chroma_ft ih264_intra_pred_chroma_8x8_mode_plane_av8; +ih264_intra_pred_chroma_ft av8(ih264_intra_pred_chroma_8x8_mode_plane_av8); #endif /* IH264_INTRA_PRED_FILTERS_H_ */ diff --git a/dependencies/ih264d/common/ih264_padding.h b/dependencies/ih264d/common/ih264_padding.h index e4e18fbea..824575487 100644 --- a/dependencies/ih264d/common/ih264_padding.h +++ b/dependencies/ih264d/common/ih264_padding.h @@ -40,6 +40,11 @@ /*****************************************************************************/ /* Function Declarations */ /*****************************************************************************/ +#ifdef __APPLE__ +#define av8(name) name __asm__(#name) +#else +#define av8(name) name +#endif typedef void ih264_pad(UWORD8 *, WORD32, WORD32, WORD32); @@ -59,11 +64,11 @@ ih264_pad ih264_pad_right_luma_a9q; ih264_pad ih264_pad_right_chroma_a9q; /* AV8 function declarations */ -ih264_pad ih264_pad_top_av8; -ih264_pad ih264_pad_left_luma_av8; -ih264_pad ih264_pad_left_chroma_av8; -ih264_pad ih264_pad_right_luma_av8; -ih264_pad ih264_pad_right_chroma_av8; +ih264_pad av8(ih264_pad_top_av8); +ih264_pad av8(ih264_pad_left_luma_av8); +ih264_pad av8(ih264_pad_left_chroma_av8); +ih264_pad av8(ih264_pad_right_luma_av8); +ih264_pad av8(ih264_pad_right_chroma_av8); ih264_pad ih264_pad_left_luma_ssse3; diff --git a/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h b/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h index 83551aadd..8e87cc5a3 100644 --- a/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h +++ b/dependencies/ih264d/common/ih264_trans_quant_itrans_iquant.h @@ -39,7 +39,11 @@ /*****************************************************************************/ /* Extern Function Declarations */ /*****************************************************************************/ - +#ifdef __APPLE__ +#define av8(name) name __asm__(#name) +#else +#define av8(name) name +#endif typedef void ih264_resi_trans_dctrans_quant_ft(UWORD8*pu1_src, UWORD8 *pu1_pred, @@ -199,18 +203,18 @@ ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_a9; ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_a9; /*Av8 Declarations*/ -ih264_resi_trans_quant_ft ih264_resi_trans_quant_4x4_av8; -ih264_resi_trans_quant_ft ih264_resi_trans_quant_chroma_4x4_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_dc_av8; -ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_8x8_dc_av8; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_av8; -ih264_iquant_itrans_recon_chroma_ft ih264_iquant_itrans_recon_chroma_4x4_dc_av8; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_4x4_av8; -ih264_ihadamard_scaling_ft ih264_ihadamard_scaling_2x2_uv_av8; -ih264_hadamard_quant_ft ih264_hadamard_quant_4x4_av8; -ih264_hadamard_quant_ft ih264_hadamard_quant_2x2_uv_av8; +ih264_resi_trans_quant_ft av8(ih264_resi_trans_quant_4x4_av8); +ih264_resi_trans_quant_ft av8(ih264_resi_trans_quant_chroma_4x4_av8); +ih264_iquant_itrans_recon_ft av8(ih264_iquant_itrans_recon_4x4_av8); +ih264_iquant_itrans_recon_ft av8(ih264_iquant_itrans_recon_8x8_av8); +ih264_iquant_itrans_recon_ft av8(ih264_iquant_itrans_recon_4x4_dc_av8); +ih264_iquant_itrans_recon_ft av8(ih264_iquant_itrans_recon_8x8_dc_av8); +ih264_iquant_itrans_recon_chroma_ft av8(ih264_iquant_itrans_recon_chroma_4x4_av8); +ih264_iquant_itrans_recon_chroma_ft av8(ih264_iquant_itrans_recon_chroma_4x4_dc_av8); +ih264_ihadamard_scaling_ft av8(ih264_ihadamard_scaling_4x4_av8); +ih264_ihadamard_scaling_ft av8(ih264_ihadamard_scaling_2x2_uv_av8); +ih264_hadamard_quant_ft av8(ih264_hadamard_quant_4x4_av8); +ih264_hadamard_quant_ft av8(ih264_hadamard_quant_2x2_uv_av8); /*SSSE3 Declarations*/ ih264_iquant_itrans_recon_ft ih264_iquant_itrans_recon_4x4_ssse3; diff --git a/dependencies/ih264d/common/ih264_weighted_pred.h b/dependencies/ih264d/common/ih264_weighted_pred.h index f9b93b0fc..6696e02a0 100644 --- a/dependencies/ih264d/common/ih264_weighted_pred.h +++ b/dependencies/ih264d/common/ih264_weighted_pred.h @@ -68,6 +68,12 @@ /*****************************************************************************/ /* Extern Function Declarations */ /*****************************************************************************/ +#ifdef __APPLE__ +#define av8(name) name __asm__(#name) +#else +#define av8(name) name +#endif + typedef void ih264_default_weighted_pred_ft(UWORD8 *puc_src1, UWORD8 *puc_src2, UWORD8 *puc_dst, @@ -132,17 +138,17 @@ ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_a9q; /* AV8 NEON Declarations */ -ih264_default_weighted_pred_ft ih264_default_weighted_pred_luma_av8; +ih264_default_weighted_pred_ft av8(ih264_default_weighted_pred_luma_av8); -ih264_default_weighted_pred_ft ih264_default_weighted_pred_chroma_av8; +ih264_default_weighted_pred_ft av8(ih264_default_weighted_pred_chroma_av8); -ih264_weighted_pred_ft ih264_weighted_pred_luma_av8; +ih264_weighted_pred_ft av8(ih264_weighted_pred_luma_av8); -ih264_weighted_pred_ft ih264_weighted_pred_chroma_av8; +ih264_weighted_pred_ft av8(ih264_weighted_pred_chroma_av8); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_luma_av8; +ih264_weighted_bi_pred_ft av8(ih264_weighted_bi_pred_luma_av8); -ih264_weighted_bi_pred_ft ih264_weighted_bi_pred_chroma_av8; +ih264_weighted_bi_pred_ft av8(ih264_weighted_bi_pred_chroma_av8); /* SSE42 Intrinsic Declarations */ From 5a3f86481e9cc07069e8c490c6c978a0ddb46d70 Mon Sep 17 00:00:00 2001 From: Exverge Date: Fri, 5 Jul 2024 23:41:39 -0400 Subject: [PATCH 03/28] When compiling for Apple Silicon, CMAKE_SYSTEM_PROCESSOR is set to arm64 rather than aarch64 cmake: Fix compiling for Apple Silicon --- CMakeLists.txt | 3 ++- src/CMakeLists.txt | 10 +++++++--- src/asm/CMakeLists.txt | 3 ++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b5f38819..5b682eabb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,8 @@ cmake_minimum_required(VERSION 3.21.1) +include(CMakeDependentOption) option(ENABLE_VCPKG "Enable the vcpkg package manager" ON) -option(MACOS_BUNDLE "The executable when built on macOS will be created as an application bundle" OFF) +cmake_dependent_option(MACOS_BUNDLE "The executable when built on macOS will be created as an application bundle" ON "APPLE" OFF) set(EXPERIMENTAL_VERSION "" CACHE STRING "") # used by CI script to set experimental version if (EXPERIMENTAL_VERSION) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7d64d91bf..7609fa789 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -101,12 +101,16 @@ if (MACOS_BUNDLE) COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory "${CMAKE_SOURCE_DIR}/bin/${folder}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/SharedSupport/${folder}") endforeach(folder) + execute_process(COMMAND brew --prefix + OUTPUT_VARIABLE HOMEBREW_PREFIX + OUTPUT_STRIP_TRAILING_WHITESPACE) + add_custom_command (TARGET CemuBin POST_BUILD - COMMAND ${CMAKE_COMMAND} ARGS -E copy "/usr/local/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib" - COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib" + COMMAND ${CMAKE_COMMAND} ARGS -E copy "${HOMEBREW_PREFIX}/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib" + COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib" COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh" COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}" - COMMAND bash -c "install_name_tool -change /Users/runner/work/Cemu/Cemu/build/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib @executable_path/../Frameworks/libusb-1.0.0.dylib ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}") + COMMAND bash -c "install_name_tool -change ${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib @executable_path/../Frameworks/libusb-1.0.0.dylib ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}") endif() set_target_properties(CemuBin PROPERTIES diff --git a/src/asm/CMakeLists.txt b/src/asm/CMakeLists.txt index 5d9f84c28..68ac9e70a 100644 --- a/src/asm/CMakeLists.txt +++ b/src/asm/CMakeLists.txt @@ -40,8 +40,9 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") endif() -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)|(arm64)(ARM64)") add_library(CemuAsm stub.cpp) else() message(STATUS "CemuAsm - Unsupported arch: ${CMAKE_SYSTEM_PROCESSOR}") + add_library(CemuAsm stub.cpp) endif() From dc1dc4491c925c55391dd53b1b2222607efbeead Mon Sep 17 00:00:00 2001 From: Exverge Date: Tue, 2 Jul 2024 20:09:16 -0400 Subject: [PATCH 04/28] MMU: Align pre-core memory space for 16kib page size I've changed the range to accommodate for the Project Zero bug stated however I'm not sure if causes any other issues or if this is used, however it seems to work fine. Please correct me if true. --- src/Cafe/HW/MMU/MMU.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/MMU/MMU.cpp b/src/Cafe/HW/MMU/MMU.cpp index 04ee8877e..b80d639e7 100644 --- a/src/Cafe/HW/MMU/MMU.cpp +++ b/src/Cafe/HW/MMU/MMU.cpp @@ -91,6 +91,7 @@ void MMURange::mapMem() if (MemMapper::AllocateMemory(memory_base + baseAddress, size, MemMapper::PAGE_PERMISSION::P_RW, true) == nullptr) { std::string errorMsg = fmt::format("Unable to allocate {} memory", name); + cemuLog_log(LogType::Force, "Unable to allocate {} memory; error {}", name, errno); wxMessageBox(errorMsg.c_str(), "Error", wxOK | wxCENTRE | wxICON_ERROR); #if BOOST_OS_WINDOWS ExitProcess(-1); @@ -122,7 +123,7 @@ MMURange mmuRange_SHARED_AREA { 0xF8000000, 0x02000000, MMU_MEM_AREA_ID::SHARE MMURange mmuRange_CORE0_LC { 0xFFC00000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC0, "CORE0_LC" }; // locked L2 cache of core 0 MMURange mmuRange_CORE1_LC { 0xFFC40000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC1, "CORE1_LC" }; // locked L2 cache of core 1 MMURange mmuRange_CORE2_LC { 0xFFC80000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC2, "CORE2_LC" }; // locked L2 cache of core 2 -MMURange mmuRange_HIGHMEM { 0xFFFFF000, 0x00001000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; // per-core memory? Used by coreinit and PPC kernel to store core context specific data (like current thread ptr). We dont use it but Project Zero has a bug where it writes a byte at 0xfffffffe thus this memory range needs to be writable +MMURange mmuRange_HIGHMEM { 0xFFFFC000, 0x00004000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; // per-core memory? Used by coreinit and PPC kernel to store core context specific data (like current thread ptr). We dont use it but Project Zero has a bug where it writes a byte at 0xfffffffe thus this memory range needs to be writable void memory_init() { From 6786bcaaf5c87a60aeb6cbc48edc9015b003ee7e Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 4 Jul 2024 18:14:09 -0400 Subject: [PATCH 05/28] Implement _mm_mfence for aarch64 --- src/Common/precompiled.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/precompiled.h b/src/Common/precompiled.h index 790a001a8..2abb0fe02 100644 --- a/src/Common/precompiled.h +++ b/src/Common/precompiled.h @@ -291,7 +291,8 @@ inline uint64 __rdtsc() inline void _mm_mfence() { - + asm volatile("" ::: "memory"); + std::atomic_thread_fence(std::memory_order_seq_cst); } inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result) From 72af6e8ae4d202e681167b78ed0f9b1b60ca574f Mon Sep 17 00:00:00 2001 From: Exverge Date: Sat, 6 Jul 2024 15:18:22 -0400 Subject: [PATCH 06/28] Hardcode frequency on Apple Silicon On Apple Silicon, PPCTimer estimates a terribily inaccurate RSTSC frequency and results in games (specifically tested Color Splash & MK8) run extremely fast especially in the title screens which unsurpisingly doesn't work that well. The value hardcoded is the same frequency as on Rosetta. Admittedly this probably isn't the best solution however it is accurate and it works. --- src/util/highresolutiontimer/HighResolutionTimer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/util/highresolutiontimer/HighResolutionTimer.cpp b/src/util/highresolutiontimer/HighResolutionTimer.cpp index 67ffa3492..de144b321 100644 --- a/src/util/highresolutiontimer/HighResolutionTimer.cpp +++ b/src/util/highresolutiontimer/HighResolutionTimer.cpp @@ -27,6 +27,8 @@ uint64 HighResolutionTimer::m_freq = []() -> uint64 { LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return (uint64)(freq.QuadPart); +#elif BOOST_OS_MACOS && defined(__arm64__) + return 1000000000; #else timespec pc; clock_getres(CLOCK_MONOTONIC_RAW, &pc); From 011be25597e36606b3e44986d7fef768f0bc2a57 Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 10 Jul 2024 16:28:33 -0400 Subject: [PATCH 07/28] coreinit: Split pointer before passing to FiberThreadEntry --- src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp | 12 +++++++++++- src/util/Fiber/FiberUnix.cpp | 5 +++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp index 2f3808b73..653b59b1d 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp @@ -25,7 +25,11 @@ void nnNfp_update(); namespace coreinit { +#ifdef __arm64__ + void __OSFiberThreadEntry(uint32, uint32); +#else void __OSFiberThreadEntry(void* thread); +#endif void __OSAddReadyThreadToRunQueue(OSThread_t* thread); void __OSRemoveThreadFromRunQueues(OSThread_t* thread); }; @@ -49,7 +53,7 @@ namespace coreinit struct OSHostThread { - OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber(__OSFiberThreadEntry, this, this) + OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber((void(*)(void*))__OSFiberThreadEntry, this, this) { } @@ -1304,8 +1308,14 @@ namespace coreinit __OSThreadStartTimeslice(hostThread->m_thread, &hostThread->ppcInstance); } +#ifdef __arm64__ + void __OSFiberThreadEntry(uint32 _high, uint32 _low) + { + uint64 _thread = (uint64) _high << 32 | _low; +#else void __OSFiberThreadEntry(void* _thread) { +#endif OSHostThread* hostThread = (OSHostThread*)_thread; #if defined(ARCH_X86_64) diff --git a/src/util/Fiber/FiberUnix.cpp b/src/util/Fiber/FiberUnix.cpp index 0d5270693..36430449e 100644 --- a/src/util/Fiber/FiberUnix.cpp +++ b/src/util/Fiber/FiberUnix.cpp @@ -15,7 +15,12 @@ Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* pri ctx->uc_stack.ss_sp = m_stackPtr; ctx->uc_stack.ss_size = stackSize; ctx->uc_link = &ctx[0]; +#ifdef __arm64__ + // https://www.man7.org/linux/man-pages/man3/makecontext.3.html#NOTES + makecontext(ctx, (void(*)())FiberEntryPoint, 2, (uint64) userParam >> 32, userParam); +#else makecontext(ctx, (void(*)())FiberEntryPoint, 1, userParam); +#endif this->m_implData = (void*)ctx; } From e81a592b8936cb5e80b0cab2d2d99255d8c54720 Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 10 Jul 2024 16:45:43 -0400 Subject: [PATCH 08/28] gui: Fix missing item on macOS "A MenuItem ID of Zero does not work under Mac" --- src/gui/MainWindow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index 7a4f3174c..66f9808bd 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -2226,7 +2226,7 @@ void MainWindow::RecreateMenu() debugLoggingMenu->AppendSeparator(); wxMenu* logCosModulesMenu = new wxMenu(); - logCosModulesMenu->AppendCheckItem(0, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); + logCosModulesMenu->AppendCheckItem(1, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); logCosModulesMenu->AppendSeparator(); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile)); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync)); From f4b59c967ea7bd6cbe2f7ce9cc41088fff29f37a Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 11 Jul 2024 11:48:51 -0400 Subject: [PATCH 09/28] Latte: Disable blending integer formats Despite being disabled in InitBlendState, this still causes errors on MoltenVk, so just skip it altogether Seemingly fixes Cemu-project/Cemu#396 (there's a multitude of errors there in the comments, specifically referring to the issue), however I don't own BOTW and can't confirm --- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp index ce582b9ac..5cd29941a 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp @@ -988,7 +988,8 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show pipelineInfo.pDynamicState = &dynamicState; pipelineInfo.pRasterizationState = &rasterizer; pipelineInfo.pMultisampleState = &multisampling; - pipelineInfo.pColorBlendState = &colorBlending; + if (!_IsVkIntegerFormat(m_renderPassObj->GetColorFormat(0))) + pipelineInfo.pColorBlendState = &colorBlending; pipelineInfo.layout = m_pipeline_layout; pipelineInfo.renderPass = m_renderPassObj->m_renderPass; pipelineInfo.pDepthStencilState = &depthStencilState; From b2330b36ef536fd911e567c791d80e33eb45993b Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 11 Jul 2024 12:33:41 -0400 Subject: [PATCH 10/28] Use interpreter on arm64 by default --- src/config/ActiveSettings.cpp | 6 ++++++ src/config/LaunchSettings.h | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/config/ActiveSettings.cpp b/src/config/ActiveSettings.cpp index 07e6f16d3..c1d8c2852 100644 --- a/src/config/ActiveSettings.cpp +++ b/src/config/ActiveSettings.cpp @@ -67,12 +67,18 @@ CPUMode ActiveSettings::GetCPUMode() auto mode = g_current_game_profile->GetCPUMode().value_or(CPUMode::Auto); if (mode == CPUMode::Auto) + { +#ifdef __aarch64__ + mode = CPUMode::SinglecoreInterpreter; + } +#else { if (GetPhysicalCoreCount() >= 4) mode = CPUMode::MulticoreRecompiler; else mode = CPUMode::SinglecoreRecompiler; } +#endif else if (mode == CPUMode::DualcoreRecompiler) // dualcore is disabled now mode = CPUMode::MulticoreRecompiler; diff --git a/src/config/LaunchSettings.h b/src/config/LaunchSettings.h index b0f673a13..4ff70296f 100644 --- a/src/config/LaunchSettings.h +++ b/src/config/LaunchSettings.h @@ -39,7 +39,6 @@ class LaunchSettings inline static bool s_enable_gdbstub = false; inline static bool s_nsight_mode = false; - inline static bool s_force_interpreter = false; inline static std::optional s_persistent_id{}; From f24fc3ed3307e2dede3458ef1bacb978add2fcf4 Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 11 Jul 2024 14:38:34 -0400 Subject: [PATCH 11/28] MMU: Make HIGHMEM address conditional --- src/Cafe/HW/MMU/MMU.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/MMU/MMU.cpp b/src/Cafe/HW/MMU/MMU.cpp index b80d639e7..ce15600ad 100644 --- a/src/Cafe/HW/MMU/MMU.cpp +++ b/src/Cafe/HW/MMU/MMU.cpp @@ -123,7 +123,12 @@ MMURange mmuRange_SHARED_AREA { 0xF8000000, 0x02000000, MMU_MEM_AREA_ID::SHARE MMURange mmuRange_CORE0_LC { 0xFFC00000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC0, "CORE0_LC" }; // locked L2 cache of core 0 MMURange mmuRange_CORE1_LC { 0xFFC40000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC1, "CORE1_LC" }; // locked L2 cache of core 1 MMURange mmuRange_CORE2_LC { 0xFFC80000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC2, "CORE2_LC" }; // locked L2 cache of core 2 -MMURange mmuRange_HIGHMEM { 0xFFFFC000, 0x00004000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; // per-core memory? Used by coreinit and PPC kernel to store core context specific data (like current thread ptr). We dont use it but Project Zero has a bug where it writes a byte at 0xfffffffe thus this memory range needs to be writable +#if !defined(__arm64__) || !defined(__APPLE__) +MMURange mmuRange_HIGHMEM { 0xFFFFF000, 0x00001000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; // per-core memory? Used by coreinit and PPC kernel to store core context specific data (like current thread ptr). We dont use it but Project Zero has a bug where it writes a byte at 0xfffffffe thus this memory range needs to be writable +#else +// Apple Silicon uses a 16kb pagesize, it can't allocate higher than this +MMURange mmuRange_HIGHMEM { 0xFFFFC000, 0x00004000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; +#endif void memory_init() { From e6e65aff9aeb3215020d68ab47667671fbb9aa57 Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 11 Jul 2024 22:32:19 -0400 Subject: [PATCH 12/28] gx2: Use atomic pointers for GX2WriteGatherPipeState --- src/Cafe/OS/libs/gx2/GX2_Command.cpp | 22 +++++++++++----------- src/Cafe/OS/libs/gx2/GX2_Command.h | 10 +++++----- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Cafe/OS/libs/gx2/GX2_Command.cpp b/src/Cafe/OS/libs/gx2/GX2_Command.cpp index ec96a4ffc..0779cbb15 100644 --- a/src/Cafe/OS/libs/gx2/GX2_Command.cpp +++ b/src/Cafe/OS/libs/gx2/GX2_Command.cpp @@ -17,28 +17,28 @@ GX2WriteGatherPipeState gx2WriteGatherPipe = { 0 }; void gx2WriteGather_submitU32AsBE(uint32 v) { uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) return; - *(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = _swapEndianU32(v); - (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4; + *(uint32*)(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load()) = _swapEndianU32(v); + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4; } void gx2WriteGather_submitU32AsLE(uint32 v) { uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) return; - *(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = v; - (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4; + *(uint32*)(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load()) = v; + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4; } void gx2WriteGather_submitU32AsLEArray(uint32* v, uint32 numValues) { uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance()); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL) return; - memcpy_dwords((*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]), v, numValues); - (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4 * numValues; + memcpy_dwords(gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load(), v, numValues); + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] += 4 * numValues; } namespace GX2 @@ -121,7 +121,7 @@ namespace GX2 if (sGX2MainCoreIndex == coreIndex) gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = &gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex]; else - gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL; + *gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL; // return size of (written) display list return currentWriteSize; } @@ -217,7 +217,7 @@ namespace GX2 cemu_assert_debug(coreIndex == sGX2MainCoreIndex); coreIndex = sGX2MainCoreIndex; // always submit to main queue which is owned by GX2 main core (TCLSubmitToRing does not need this workaround) - uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex]); + uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex].load()); cmdStream[0] = pm4HeaderType3(IT_INDIRECT_BUFFER_PRIV, 3); cmdStream[1] = memory_virtualToPhysical(MEMPTR(addr).GetMPTR()); cmdStream[2] = 0; diff --git a/src/Cafe/OS/libs/gx2/GX2_Command.h b/src/Cafe/OS/libs/gx2/GX2_Command.h index 51c049289..fb9bb65e5 100644 --- a/src/Cafe/OS/libs/gx2/GX2_Command.h +++ b/src/Cafe/OS/libs/gx2/GX2_Command.h @@ -6,9 +6,9 @@ struct GX2WriteGatherPipeState { uint8* gxRingBuffer; // each core has it's own write gatherer and display list state (writing) - uint8* writeGatherPtrGxBuffer[Espresso::CORE_COUNT]; - uint8** writeGatherPtrWrite[Espresso::CORE_COUNT]; - uint8* writeGatherPtrDisplayList[Espresso::CORE_COUNT]; + std::atomic writeGatherPtrGxBuffer[Espresso::CORE_COUNT]; + std::atomic* writeGatherPtrWrite[Espresso::CORE_COUNT]; + std::atomic writeGatherPtrDisplayList[Espresso::CORE_COUNT]; MPTR displayListStart[Espresso::CORE_COUNT]; uint32 displayListMaxSize[Espresso::CORE_COUNT]; }; @@ -75,10 +75,10 @@ template inline void gx2WriteGather_submit(Targs... args) { uint32 coreIndex = PPCInterpreter_getCurrentCoreIndex(); - if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr) + if (*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == nullptr) return; - uint32be* writePtr = (uint32be*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]); + uint32be* writePtr = (uint32be*)gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]->load(); gx2WriteGather_submit_(coreIndex, writePtr, std::forward(args)...); } From 10bed1abc827bd553ab1479c60d9cf1777860a25 Mon Sep 17 00:00:00 2001 From: Exverge Date: Tue, 16 Jul 2024 22:18:00 -0400 Subject: [PATCH 13/28] Latte: decode indices with NEON on aarch64 --- src/Cafe/HW/Latte/Core/LatteIndices.cpp | 126 ++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 6e1d74559..0f813897f 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -5,6 +5,8 @@ #if defined(ARCH_X86_64) && defined(__GNUC__) #include +#elif defined(__aarch64__) +#include #endif struct @@ -480,6 +482,114 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat indexMax = std::max(indexMax, _maxIndex); indexMin = std::min(indexMin, _minIndex); } +#elif defined(__aarch64__) + +void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + const uint16* indicesU16BE = (const uint16*)indexDataInput; + uint16* indexOutput = (uint16*)indexDataOutput; + sint32 count8 = count >> 3; + sint32 countRemaining = count & 7; + + if (count8) + { + uint16x8_t mMin = vdupq_n_u16(0xFFFF); + uint16x8_t mMax = vdupq_n_u16(0x0000); + uint16x8_t mTemp; + uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE; + indicesU16BE += count8 * 8; + uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput; + indexOutput += count8 * 8; + + while (count8--) + { + mTemp = vld1q_u16((uint16*)mRawIndices); + mRawIndices++; + mTemp = vrev16q_u8(mTemp); + mMin = vminq_u16(mMin, mTemp); + mMax = vmaxq_u16(mMax, mTemp); + vst1q_u16((uint16*)mOutputIndices, mTemp); + mOutputIndices++; + } + + uint16* mMaxU16 = (uint16*)&mMax; + uint16* mMinU16 = (uint16*)&mMin; + + for (int i = 0; i < 8; ++i) { + indexMax = std::max(indexMax, (uint32)mMaxU16[i]); + indexMin = std::min(indexMin, (uint32)mMinU16[i]); + } + } + // process remaining indices + uint32 _minIndex = 0xFFFFFFFF; + uint32 _maxIndex = 0; + for (sint32 i = countRemaining; (--i) >= 0;) + { + uint16 idx = _swapEndianU16(*indicesU16BE); + *indexOutput = idx; + indexOutput++; + indicesU16BE++; + _maxIndex = std::max(_maxIndex, (uint32)idx); + _minIndex = std::min(_minIndex, (uint32)idx); + } + // update min/max + indexMax = std::max(indexMax, _maxIndex); + indexMin = std::min(indexMin, _minIndex); +} + +void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + const uint32* indicesU32BE = (const uint32*)indexDataInput; + uint32* indexOutput = (uint32*)indexDataOutput; + sint32 count8 = count >> 2; + sint32 countRemaining = count & 3; + + if (count8) + { + uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF); + uint32x4_t mMax = vdupq_n_u32(0x00000000); + uint32x4_t mTemp; + uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE; + indicesU32BE += count8 * 4; + uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput; + indexOutput += count8 * 4; + + while (count8--) + { + mTemp = vld1q_u32((uint32*)mRawIndices); + mRawIndices++; + mTemp = vrev32q_u8(mTemp); + mMin = vminq_u32(mMin, mTemp); + mMax = vmaxq_u32(mMax, mTemp); + vst1q_u32((uint32*)mOutputIndices, mTemp); + mOutputIndices++; + } + + uint32* mMaxU32 = (uint32*)&mMax; + uint32* mMinU32 = (uint32*)&mMin; + + for (int i = 0; i < 4; ++i) { + indexMax = std::max(indexMax, mMaxU32[i]); + indexMin = std::min(indexMin, mMinU32[i]); + } + } + // process remaining indices + uint32 _minIndex = 0xFFFFFFFF; + uint32 _maxIndex = 0; + for (sint32 i = countRemaining; (--i) >= 0;) + { + uint32 idx = _swapEndianU32(*indicesU32BE); + *indexOutput = idx; + indexOutput++; + indicesU32BE++; + _maxIndex = std::max(_maxIndex, idx); + _minIndex = std::min(_minIndex, idx); + } + // update min/max + indexMax = std::max(indexMax, _maxIndex); + indexMin = std::min(indexMin, _minIndex); +} + #endif template @@ -663,27 +773,31 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 { if (indexType == LatteIndexType::U16_BE) { - #if defined(ARCH_X86_64) +#if defined(ARCH_X86_64) if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3) LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #else +#elif defined(__aarch64__) + LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); +#else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #endif +#endif } else if (indexType == LatteIndexType::U32_BE) { - #if defined(ARCH_X86_64) +#if defined(ARCH_X86_64) if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #else +#elif defined(__aarch64__) + LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); +#else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); - #endif +#endif } else if (indexType == LatteIndexType::U16_LE) { From 804aa4f345b0f31854b0195a10d62a1a54cbef60 Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 18 Jul 2024 22:59:56 -0400 Subject: [PATCH 14/28] ih264d: Process arguments correctly on Apple Silicon Apple seemed to not have offsets for arguments on the stack Either that or the offsets were just wrong, I'll test on a Linux VM and remove the conditonal if this still happens --- .../common/armv8/ih264_weighted_bi_pred_av8.s | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s index 475f690ec..96629b2b8 100644 --- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s @@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8: sxtw x4, w4 sxtw x5, w5 stp x19, x20, [sp, #-16]! +#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 ldr w9, [sp, #88] //Load ofst1 in w9 - add w6, w6, #1 //w6 = log_WD + 1 - neg w10, w6 //w10 = -(log_WD + 1) - dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) ldr w10, [sp, #96] //Load ofst2 in w10 ldr w11, [sp, #104] //Load ht in w11 ldr w12, [sp, #112] //Load wd in w12 +#else + ldr w8, [sp, #80] //Load wd in w12 + ldr w9, [sp, #84] //Load ht in w11 + ldr w10, [sp, #88] //Load offst2 in w10 + ldr w11, [sp, #92] //Load offst1 in w9 + ldr w12, [sp, #96] //Load offst1 in w9 +#endif + add w6, w6, #1 //w6 = log_WD + 1 + neg w10, w6 //w10 = -(log_WD + 1) + dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) add w9, w9, #1 //w9 = ofst1 + 1 add w9, w9, w10 //w9 = ofst1 + ofst2 + 1 mov v2.s[0], w7 @@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8: sxtw x5, w5 stp x19, x20, [sp, #-16]! - +#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 + ldr w9, [sp, #88] //Load ofst1 in w9 + ldr w10, [sp, #96] //Load ofst2 in w10 + ldr w11, [sp, #104] //Load ht in w11 + ldr w12, [sp, #112] //Load wd in w12 +#else + ldr w8, [sp, #80] //Load wd in w12 + ldr w9, [sp, #84] //Load ht in w11 + ldr w10, [sp, #88] //Load offst2 in w10 + ldr w11, [sp, #92] //Load offst1 in w9 + ldr w12, [sp, #96] //Load offst1 in w9 +#endif dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit) dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit) add w6, w6, #1 //w6 = log_WD + 1 - ldr w9, [sp, #88] //Load ofst1 in w9 - ldr w10, [sp, #96] //Load ofst2 in w10 neg w20, w6 //w20 = -(log_WD + 1) dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit) - ldr w11, [sp, #104] //Load ht in x11 - ldr w12, [sp, #112] //Load wd in x12 dup v20.8h, w9 //0ffset1 dup v21.8h, w10 //0ffset2 srhadd v6.8b, v20.8b, v21.8b From 005e78b35ea9f32ee6337e2512746600cf93c055 Mon Sep 17 00:00:00 2001 From: Exverge Date: Sat, 20 Jul 2024 16:21:15 -0400 Subject: [PATCH 15/28] Set macOS frequency constant for Intel --- src/util/highresolutiontimer/HighResolutionTimer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/highresolutiontimer/HighResolutionTimer.cpp b/src/util/highresolutiontimer/HighResolutionTimer.cpp index de144b321..bb4a40ab3 100644 --- a/src/util/highresolutiontimer/HighResolutionTimer.cpp +++ b/src/util/highresolutiontimer/HighResolutionTimer.cpp @@ -27,7 +27,7 @@ uint64 HighResolutionTimer::m_freq = []() -> uint64 { LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return (uint64)(freq.QuadPart); -#elif BOOST_OS_MACOS && defined(__arm64__) +#elif BOOST_OS_MACOS return 1000000000; #else timespec pc; From 4baad7cd37106641645a3e0e11bb1abb8238a711 Mon Sep 17 00:00:00 2001 From: Exverge Date: Sat, 20 Jul 2024 20:46:52 -0400 Subject: [PATCH 16/28] CI: Use AppleClang for arm64 For some reason, compiling with LLVM causes a segfault when any popup opens. This is specific to LLVM and works fine with the default compiler. --- .github/workflows/build.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 14b16f674..afd54fda7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -329,7 +329,7 @@ jobs: - name: "Install system dependencies" run: | brew update - brew install llvm@15 ninja nasm molten-vk automake libtool cmake + brew install ninja nasm molten-vk automake libtool cmake - name: "Bootstrap vcpkg" run: | @@ -356,8 +356,6 @@ jobs: cmake .. ${{ env.BUILD_FLAGS }} \ -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \ -DMACOS_BUNDLE=ON \ - -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm@15/bin/clang \ - -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm@15/bin/clang++ \ -G Ninja - name: "Build Cemu" From 7037337659c2eaa2bd75a0f8e6bc4d9045014bcb Mon Sep 17 00:00:00 2001 From: Exverge Date: Sun, 21 Jul 2024 15:44:54 -0400 Subject: [PATCH 17/28] gui: don't use 1 as itemid --- src/gui/MainWindow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index 66f9808bd..73ae21b8a 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -2226,7 +2226,7 @@ void MainWindow::RecreateMenu() debugLoggingMenu->AppendSeparator(); wxMenu* logCosModulesMenu = new wxMenu(); - logCosModulesMenu->AppendCheckItem(1, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); + logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 - 1, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); logCosModulesMenu->AppendSeparator(); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile)); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync)); From 0760d3ef44ee2e885c5b93cc68638bfefa5486a7 Mon Sep 17 00:00:00 2001 From: Exverge Date: Mon, 22 Jul 2024 21:25:53 -0400 Subject: [PATCH 18/28] Update BUILD.md --- BUILD.md | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/BUILD.md b/BUILD.md index 3ff2254f7..6807a5b1a 100644 --- a/BUILD.md +++ b/BUILD.md @@ -16,7 +16,6 @@ - [Compiling Errors](#compiling-errors) - [Building Errors](#building-errors) - [macOS](#macos) - - [On Apple Silicon Macs, Rosetta 2 and the x86_64 version of Homebrew must be used](#on-apple-silicon-macs-rosetta-2-and-the-x86_64-version-of-homebrew-must-be-used) - [Installing brew](#installing-brew) - [Installing Dependencies](#installing-dependencies) - [Build Cemu using CMake and Clang](#build-cemu-using-cmake-and-clang) @@ -142,21 +141,15 @@ If you are getting a different error than any of the errors listed above, you ma ## macOS To compile Cemu, a recent enough compiler and STL with C++20 support is required! LLVM 13 and -below, built in LLVM, and Xcode LLVM don't support the C++20 feature set required. The OpenGL graphics -API isn't support on macOS, Vulkan must be used. Additionally Vulkan must be used through the -Molten-VK compatibility layer +lower don't support the C++20 feature set required. Cemu doesn't support using OpenGL +on macOS, Vulkan must be used via the Molten-Vk compatibility layer. -### On Apple Silicon Macs, Rosetta 2 and the x86_64 version of Homebrew must be used - -You can skip this section if you have an Intel Mac. Every time you compile, you need to perform steps 2. - -1. `softwareupdate --install-rosetta` # Install Rosetta 2 if you don't have it. This only has to be done once -2. `arch -x86_64 zsh` # run an x64 shell +Note that if you are compiling on/for Intel, you must replace all references of `/opt/homebrew` to `/usr/local`. ### Installing brew 1. `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"` -2. `eval "$(/usr/local/Homebrew/bin/brew shellenv)"` # set x86_64 brew env +3. `eval "$(/opt/homebrew/bin/brew shellenv)"` # set brew env ### Installing Dependencies @@ -165,12 +158,12 @@ You can skip this section if you have an Intel Mac. Every time you compile, you ### Build Cemu using CMake and Clang 1. `git clone --recursive https://github.com/cemu-project/Cemu` 2. `cd Cemu` -3. `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ -G Ninja` +3. `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ -G Ninja` 4. `cmake --build build` 5. You should now have a Cemu executable file in the /bin folder, which you can run using `./bin/Cemu_release`. #### Troubleshooting steps -- If step 3 gives you an error about not being able to find ninja, try appending `-DCMAKE_MAKE_PROGRAM=/usr/local/bin/ninja` to the command and running it again. +- If step 3 gives you an error about not being able to find ninja, try appending `-DCMAKE_MAKE_PROGRAM=/opt/homebrew/bin/ninja` to the command and running it again. ## Updating Cemu and source code 1. To update your Cemu local repository, use the command `git pull --recurse-submodules` (run this command on the Cemu root). From d25f8da4235aac3e204442ddab7f7779e0b1a7c0 Mon Sep 17 00:00:00 2001 From: Exverge Date: Tue, 23 Jul 2024 12:47:06 -0400 Subject: [PATCH 19/28] ih264: fix comment formatting --- .../common/armv8/ih264_weighted_bi_pred_av8.s | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s index 96629b2b8..b8f53454f 100644 --- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s @@ -149,11 +149,11 @@ ih264_weighted_bi_pred_luma_av8: ldr w11, [sp, #104] //Load ht in w11 ldr w12, [sp, #112] //Load wd in w12 #else - ldr w8, [sp, #80] //Load wd in w12 - ldr w9, [sp, #84] //Load ht in w11 + ldr w8, [sp, #80] //Load wd in w8 + ldr w9, [sp, #84] //Load ht in w9 ldr w10, [sp, #88] //Load offst2 in w10 - ldr w11, [sp, #92] //Load offst1 in w9 - ldr w12, [sp, #96] //Load offst1 in w9 + ldr w11, [sp, #92] //Load offst1 in w11 + ldr w12, [sp, #96] //Load offst1 in w12 #endif add w6, w6, #1 //w6 = log_WD + 1 neg w10, w6 //w10 = -(log_WD + 1) @@ -439,11 +439,11 @@ ih264_weighted_bi_pred_chroma_av8: ldr w11, [sp, #104] //Load ht in w11 ldr w12, [sp, #112] //Load wd in w12 #else - ldr w8, [sp, #80] //Load wd in w12 - ldr w9, [sp, #84] //Load ht in w11 + ldr w8, [sp, #80] //Load wd in w8 + ldr w9, [sp, #84] //Load ht in w9 ldr w10, [sp, #88] //Load offst2 in w10 - ldr w11, [sp, #92] //Load offst1 in w9 - ldr w12, [sp, #96] //Load offst1 in w9 + ldr w11, [sp, #92] //Load offst1 in w11 + ldr w12, [sp, #96] //Load offst1 in w12 #endif dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit) dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit) From a393ee8fbbe6c5e4d1699907405748f15ee1decb Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 7 Aug 2024 20:11:42 -0400 Subject: [PATCH 20/28] Fix build errors --- .github/workflows/build.yml | 3 ++- src/config/ActiveSettings.cpp | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6b72bacb8..e6fe42591 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -347,7 +347,8 @@ jobs: git checkout bf097edc74ec3b6dfafdcd5a38d3ce14b11952d6 ./fetchDependencies --macos make macos - make install + sudo make install + cp /usr/local/lib/libMoltenVK.dylib /opt/homebrew/lib/ - name: "Bootstrap vcpkg" run: | diff --git a/src/config/ActiveSettings.cpp b/src/config/ActiveSettings.cpp index 0d8e9b9f7..f7748cf33 100644 --- a/src/config/ActiveSettings.cpp +++ b/src/config/ActiveSettings.cpp @@ -76,15 +76,13 @@ CPUMode ActiveSettings::GetCPUMode() { #ifdef __aarch64__ mode = CPUMode::SinglecoreInterpreter; - } #else - { if (GetPhysicalCoreCount() >= 4) mode = CPUMode::MulticoreRecompiler; else mode = CPUMode::SinglecoreRecompiler; - } #endif + } else if (mode == CPUMode::DualcoreRecompiler) // dualcore is disabled now mode = CPUMode::MulticoreRecompiler; From 4700a1900948a69a2721839b0f96f21e403a79cc Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 7 Aug 2024 18:03:37 -0400 Subject: [PATCH 21/28] Revert alignment of HIGHMEM Revert "MMU: Make HIGHMEM address conditional" This reverts commit f24fc3ed3307e2dede3458ef1bacb978add2fcf4. Revert "MMU: Align pre-core memory space for 16kib page size" This reverts commit dc1dc4491c925c55391dd53b1b2222607efbeead. --- src/Cafe/HW/MMU/MMU.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Cafe/HW/MMU/MMU.cpp b/src/Cafe/HW/MMU/MMU.cpp index ce15600ad..04ee8877e 100644 --- a/src/Cafe/HW/MMU/MMU.cpp +++ b/src/Cafe/HW/MMU/MMU.cpp @@ -91,7 +91,6 @@ void MMURange::mapMem() if (MemMapper::AllocateMemory(memory_base + baseAddress, size, MemMapper::PAGE_PERMISSION::P_RW, true) == nullptr) { std::string errorMsg = fmt::format("Unable to allocate {} memory", name); - cemuLog_log(LogType::Force, "Unable to allocate {} memory; error {}", name, errno); wxMessageBox(errorMsg.c_str(), "Error", wxOK | wxCENTRE | wxICON_ERROR); #if BOOST_OS_WINDOWS ExitProcess(-1); @@ -123,12 +122,7 @@ MMURange mmuRange_SHARED_AREA { 0xF8000000, 0x02000000, MMU_MEM_AREA_ID::SHARE MMURange mmuRange_CORE0_LC { 0xFFC00000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC0, "CORE0_LC" }; // locked L2 cache of core 0 MMURange mmuRange_CORE1_LC { 0xFFC40000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC1, "CORE1_LC" }; // locked L2 cache of core 1 MMURange mmuRange_CORE2_LC { 0xFFC80000, 0x00005000, MMU_MEM_AREA_ID::CPU_LC2, "CORE2_LC" }; // locked L2 cache of core 2 -#if !defined(__arm64__) || !defined(__APPLE__) MMURange mmuRange_HIGHMEM { 0xFFFFF000, 0x00001000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; // per-core memory? Used by coreinit and PPC kernel to store core context specific data (like current thread ptr). We dont use it but Project Zero has a bug where it writes a byte at 0xfffffffe thus this memory range needs to be writable -#else -// Apple Silicon uses a 16kb pagesize, it can't allocate higher than this -MMURange mmuRange_HIGHMEM { 0xFFFFC000, 0x00004000, MMU_MEM_AREA_ID::CPU_PER_CORE, "PER-CORE" }; -#endif void memory_init() { From 3795fc07fdfbf5a169f0b59c999e9def05cfd155 Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 7 Aug 2024 18:19:22 -0400 Subject: [PATCH 22/28] MemMapper: Support alternative pagesizes --- src/util/MemMapper/MemMapperUnix.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/util/MemMapper/MemMapperUnix.cpp b/src/util/MemMapper/MemMapperUnix.cpp index 0ade291d4..6ac4a4b40 100644 --- a/src/util/MemMapper/MemMapperUnix.cpp +++ b/src/util/MemMapper/MemMapperUnix.cpp @@ -42,6 +42,12 @@ namespace MemMapper void* AllocateMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags, bool fromReservation) { + uint64 page_size = sysconf(_SC_PAGESIZE); + if ((uint64) baseAddr % page_size != 0) + { + baseAddr = (void*) ((uint64)baseAddr & ~(page_size - 1)); + } + void* r; if(fromReservation) { From 118d1c669a2f68292fc37330e23320dcbe658c4a Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 7 Aug 2024 20:14:23 -0400 Subject: [PATCH 23/28] Update BUILD.md --- BUILD.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BUILD.md b/BUILD.md index f79326ba0..50dc8a012 100644 --- a/BUILD.md +++ b/BUILD.md @@ -149,7 +149,7 @@ Note that if you are compiling on/for Intel, you must replace all references of ### Installing brew 1. `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"` -3. `eval "$(/opt/homebrew/bin/brew shellenv)"` # set brew env +2. `eval "$(/opt/homebrew/bin/brew shellenv)"` # set brew env ### Installing Dependencies From 6b6a538b3a02d85d8f0b9c74115199532653125f Mon Sep 17 00:00:00 2001 From: Exverge Date: Wed, 7 Aug 2024 20:36:39 -0400 Subject: [PATCH 24/28] Update build.yml --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e6fe42591..47360b309 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -348,7 +348,6 @@ jobs: ./fetchDependencies --macos make macos sudo make install - cp /usr/local/lib/libMoltenVK.dylib /opt/homebrew/lib/ - name: "Bootstrap vcpkg" run: | From d73c5895f20a4bd9cc90f21c267aa87e0f06b236 Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 8 Aug 2024 12:18:40 -0400 Subject: [PATCH 25/28] MemMapper: don't change address on non-reserved mappings --- src/util/MemMapper/MemMapperUnix.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/util/MemMapper/MemMapperUnix.cpp b/src/util/MemMapper/MemMapperUnix.cpp index 6ac4a4b40..630d71997 100644 --- a/src/util/MemMapper/MemMapperUnix.cpp +++ b/src/util/MemMapper/MemMapperUnix.cpp @@ -42,16 +42,15 @@ namespace MemMapper void* AllocateMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags, bool fromReservation) { - uint64 page_size = sysconf(_SC_PAGESIZE); - if ((uint64) baseAddr % page_size != 0) - { - baseAddr = (void*) ((uint64)baseAddr & ~(page_size - 1)); - } void* r; if(fromReservation) { - if( mprotect(baseAddr, size, GetProt(permissionFlags)) == 0 ) + uint64 page_size = sysconf(_SC_PAGESIZE); + void* page = baseAddr; + if ( (uint64) baseAddr % page_size != 0 ) + page = (void*) ((uint64)baseAddr & ~(page_size - 1)); + if( mprotect(page, size, GetProt(permissionFlags)) == 0 ) r = baseAddr; else r = nullptr; From b30a6b0b98a627c3c32f6a8a7633a69385842b76 Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 8 Aug 2024 12:20:01 -0400 Subject: [PATCH 26/28] Add suggested changes Co-authored-by: Exzap <13877693+Exzap@users.noreply.github.com> --- src/gui/MainWindow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index 0d7337e47..11ff437ab 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -138,6 +138,7 @@ enum MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, // debug->logging + MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE = 21499, MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500, MAINFRAME_MENU_ID_DEBUG_ADVANCED_PPC_INFO = 21599, // debug->dump @@ -2199,7 +2200,7 @@ void MainWindow::RecreateMenu() debugLoggingMenu->AppendSeparator(); wxMenu* logCosModulesMenu = new wxMenu(); - logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 - 1, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); + logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); logCosModulesMenu->AppendSeparator(); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile)); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync)); From 86b477ae122815599903f232cdf67eef7e82e3ec Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 8 Aug 2024 12:23:22 -0400 Subject: [PATCH 27/28] Revert "Update build.yml" This reverts commit 6b6a538b3a02d85d8f0b9c74115199532653125f. [skip ci] --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 47360b309..d461d0579 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -348,6 +348,7 @@ jobs: ./fetchDependencies --macos make macos sudo make install + cp /usr/local/lib/libMoltenVK.dylib /opt/homebrew/lib - name: "Bootstrap vcpkg" run: | From ee7c08e147ac001b26242200db7210fcaa640937 Mon Sep 17 00:00:00 2001 From: Exverge Date: Thu, 8 Aug 2024 14:54:38 -0400 Subject: [PATCH 28/28] Latte : Temporarily disable blending for integer format color attachments --- .../Renderer/Vulkan/VulkanPipelineCompiler.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp index 5cd29941a..e3d91c97b 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp @@ -988,8 +988,6 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show pipelineInfo.pDynamicState = &dynamicState; pipelineInfo.pRasterizationState = &rasterizer; pipelineInfo.pMultisampleState = &multisampling; - if (!_IsVkIntegerFormat(m_renderPassObj->GetColorFormat(0))) - pipelineInfo.pColorBlendState = &colorBlending; pipelineInfo.layout = m_pipeline_layout; pipelineInfo.renderPass = m_renderPassObj->m_renderPass; pipelineInfo.pDepthStencilState = &depthStencilState; @@ -998,6 +996,17 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show pipelineInfo.flags = 0; if (!forceCompile) pipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; +#ifdef __APPLE__ + for (int i = 0; i < Latte::GPU_LIMITS::NUM_COLOR_ATTACHMENTS; ++i) + { + if (_IsVkIntegerFormat(m_renderPassObj->GetColorFormat(i))) + break; + if (i == Latte::GPU_LIMITS::NUM_COLOR_ATTACHMENTS - 1) + pipelineInfo.pColorBlendState = &colorBlending; + } +#else + pipelineInfo.pColorBlendState = &colorBlending; +#endif VkPipelineCreationFeedbackCreateInfoEXT creationFeedbackInfo; VkPipelineCreationFeedbackEXT creationFeedback;