Skip to content

Commit

Permalink
gpu_neon: fix wrong mask bit for fills
Browse files Browse the repository at this point in the history
Fixes #344
  • Loading branch information
notaz committed Aug 16, 2024
1 parent 3382c20 commit 89a8e88
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 49 deletions.
6 changes: 2 additions & 4 deletions plugins/gpu_neon/psx_gpu/psx_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -4810,8 +4810,7 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
u32 r = color & 0xFF;
u32 g = (color >> 8) & 0xFF;
u32 b = (color >> 16) & 0xFF;
u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
psx_gpu->mask_msb;
u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
u32 color_32bpp = color_16bpp | (color_16bpp << 16);

u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
Expand Down Expand Up @@ -4863,8 +4862,7 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
u32 r = color & 0xFF;
u32 g = (color >> 8) & 0xFF;
u32 b = (color >> 16) & 0xFF;
u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
psx_gpu->mask_msb;
u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
u32 color_32bpp = color_16bpp | (color_16bpp << 16);

u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
Expand Down
46 changes: 1 addition & 45 deletions plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
Original file line number Diff line number Diff line change
Expand Up @@ -4386,51 +4386,6 @@ function(warmup)

#undef vram_ptr
#undef color
#undef width
#undef height
#undef pitch

#define vram_ptr r0
#define color r1
#define width r2
#define height r3

#define pitch r1

#define num_width r12

#undef colors_a
#undef colors_b

#define colors_a q0
#define colors_b q1

.align 3

function(render_block_fill_body)
vdup.u16 colors_a, color
mov pitch, #2048

vmov colors_b, colors_a
sub pitch, pitch, width, lsl #1

mov num_width, width

0:
vst1.u32 { colors_a, colors_b }, [vram_ptr, :256]!

subs num_width, num_width, #16
bne 0b

add vram_ptr, vram_ptr, pitch
mov num_width, width

subs height, height, #1
bne 0b

bx lr


#undef x
#undef y
#undef width
Expand Down Expand Up @@ -4523,6 +4478,7 @@ function(render_block_fill_body)
#define texels_wide_high d15
#define texels_wide q7

.align 3

setup_sprite_flush_blocks:
vpush { q1 - q5 }
Expand Down

0 comments on commit 89a8e88

Please sign in to comment.