From ca22b8ec4ae5aa36c7ab4826523b978aaa6c3939 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 20 Jul 2023 14:29:38 +0300 Subject: [PATCH] gpu: Use spans for memory access --- include/PICA/gpu.hpp | 22 ++++++------- include/memory.hpp | 8 ++++- include/renderer_gl/textures.hpp | 4 +-- src/core/PICA/gpu.cpp | 49 ++++++++++------------------ src/core/PICA/regs.cpp | 25 +++++++------- src/core/renderer_gl/renderer_gl.cpp | 4 +-- src/core/renderer_gl/textures.cpp | 6 ++-- 7 files changed, 55 insertions(+), 63 deletions(-) diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index d4e54358e..a223cbf78 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include "PICA/dynapica/shader_rec.hpp" #include "PICA/float_types.hpp" @@ -64,9 +65,9 @@ class GPU { std::array fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted // Command processor pointers for GPU command lists - u32* cmdBuffStart = nullptr; - u32* cmdBuffEnd = nullptr; - u32* cmdBuffCurr = nullptr; + std::span cmdBuff{}; + u32 cmdBuffEnd{}; + u32 cmdBuffCurr{}; std::unique_ptr renderer; PICA::Vertex getImmediateModeVertex(); @@ -127,19 +128,18 @@ class GPU { } } - // Get a pointer of type T* to the data starting from physical address paddr + // Get a span of type T to the data starting from physical address paddr template - T* getPointerPhys(u32 paddr) { - if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) { + std::span getPointerPhys(u32 paddr, u32 size) { + if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) { u8* fcram = mem.getFCRAM(); u32 index = paddr - PhysicalAddrs::FCRAM; - - return (T*)&fcram[index]; - } else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) { + return std::span{(T*)&fcram[index], size / sizeof(T)}; + } else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) { u32 index = paddr - PhysicalAddrs::VRAM; - return (T*)&vram[index]; + return std::span{(T*)&vram[index], size / sizeof(T)}; } else [[unlikely]] { Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr); } } -}; \ No newline at end of file +}; diff --git a/include/memory.hpp b/include/memory.hpp index 6f33d8956..cd7a4241b 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "crypto/aes_engine.hpp" #include "helpers.hpp" #include "handles.hpp" @@ -168,6 +169,11 @@ class Memory { u32 getLinearHeapVaddr(); u8* getFCRAM() { return fcram; } + template + std::span getReadPointer(u32 address, u32 size) { + return std::span{reinterpret_cast(getReadPointer(address), size / sizeof(T))}; + } + // Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader) u32 totalSysFCRAM() { return FCRAM_SIZE - FCRAM_APPLICATION_SIZE; @@ -248,4 +254,4 @@ class Memory { void setVRAM(u8* pointer) { vram = pointer; } bool allocateMainThreadStack(u32 size); -}; \ No newline at end of file +}; diff --git a/include/renderer_gl/textures.hpp b/include/renderer_gl/textures.hpp index 5469a59f6..981f6eebe 100644 --- a/include/renderer_gl/textures.hpp +++ b/include/renderer_gl/textures.hpp @@ -40,7 +40,7 @@ struct Texture { void allocate(); void setNewConfig(u32 newConfig); - void decodeTexture(const void* data); + void decodeTexture(std::span data); void free(); u64 sizeInBytes(); @@ -61,4 +61,4 @@ struct Texture { // TODO: Make hasAlpha a template parameter u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data); u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); -}; \ No newline at end of file +}; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 15c99c42a..da8bb5266 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -131,12 +131,12 @@ void GPU::drawArrays() { vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg]; } else { if (shortIndex) { - auto ptr = getPointerPhys(indexBufferPointer); - vertexIndex = *ptr; // TODO: This is very unsafe + auto ptr = getPointerPhys(indexBufferPointer, 2); + vertexIndex = ptr[0]; // TODO: This is very unsafe indexBufferPointer += 2; } else { - auto ptr = getPointerPhys(indexBufferPointer); - vertexIndex = *ptr; // TODO: This is also very unsafe + auto ptr = getPointerPhys(indexBufferPointer, 1); + vertexIndex = ptr[0]; // TODO: This is also very unsafe indexBufferPointer += 1; } } @@ -194,47 +194,32 @@ void GPU::drawArrays() { vec4f& attribute = currentAttributes[attrCount]; uint component; // Current component + const auto get_attrib = [&](T param) { + auto ptr = getPointerPhys(attrAddress, size * sizeof(T)); + for (component = 0; component < size; component++) { + const float val = static_cast(ptr[component]); + attribute[component] = f24::fromFloat32(val); + } + attrAddress += size * sizeof(T); + }; + switch (attribType) { case 0: { // Signed byte - s8* ptr = getPointerPhys(attrAddress); - for (component = 0; component < size; component++) { - float val = static_cast(*ptr++); - attribute[component] = f24::fromFloat32(val); - } - attrAddress += size * sizeof(s8); + get_attrib(s8{}); break; } - case 1: { // Unsigned byte - u8* ptr = getPointerPhys(attrAddress); - for (component = 0; component < size; component++) { - float val = static_cast(*ptr++); - attribute[component] = f24::fromFloat32(val); - } - attrAddress += size * sizeof(u8); + get_attrib(u8{}); break; } - case 2: { // Short - s16* ptr = getPointerPhys(attrAddress); - for (component = 0; component < size; component++) { - float val = static_cast(*ptr++); - attribute[component] = f24::fromFloat32(val); - } - attrAddress += size * sizeof(s16); + get_attrib(s16{}); break; } - case 3: { // Float - float* ptr = getPointerPhys(attrAddress); - for (component = 0; component < size; component++) { - float val = *ptr++; - attribute[component] = f24::fromFloat32(val); - } - attrAddress += size * sizeof(float); + get_attrib(float{}); break; } - default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType); } diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index d245f8af2..aaa169960 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -305,9 +305,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3; // Set command buffer state to execute the new buffer - cmdBuffStart = getPointerPhys(addr); - cmdBuffCurr = cmdBuffStart; - cmdBuffEnd = cmdBuffStart + (size / sizeof(u32)); + cmdBuff = getPointerPhys(addr, size); + cmdBuffCurr = 0; + cmdBuffEnd = cmdBuff.size(); } break; } @@ -336,12 +336,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } void GPU::startCommandList(u32 addr, u32 size) { - cmdBuffStart = static_cast(mem.getReadPointer(addr)); - if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list"); + cmdBuff = mem.getReadPointer(addr, size); + if (!cmdBuff.data()) + Helpers::panic("Couldn't get buffer for command list"); // TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB - cmdBuffCurr = cmdBuffStart; - cmdBuffEnd = cmdBuffStart + (size / sizeof(u32)); + cmdBuffCurr = 0; + cmdBuffEnd = cmdBuff.size(); // LUT for converting the parameter mask to an actual 32-bit mask // The parameter mask is 4 bits long, each bit corresponding to one byte of the mask @@ -357,13 +358,13 @@ void GPU::startCommandList(u32 addr, u32 size) { // The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time // So to check if it is aligned, we get the number of words it's been incremented by // If that number is an odd value then the buffer is not aligned, otherwise it is - if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) { + if (cmdBuffCurr % 2 != 0) { cmdBuffCurr++; } // The first word of a command is the command parameter and the second one is the header - u32 param1 = *cmdBuffCurr++; - u32 header = *cmdBuffCurr++; + const u32 param1 = cmdBuff[cmdBuffCurr++]; + const u32 header = cmdBuff[cmdBuffCurr++]; u32 id = header & 0xffff; u32 paramMaskIndex = getBits<16, 4>(header); @@ -380,8 +381,8 @@ void GPU::startCommandList(u32 addr, u32 size) { writeInternalReg(id, param1, mask); for (u32 i = 0; i < paramCount; i++) { id += idIncrement; - u32 param = *cmdBuffCurr++; + u32 param = cmdBuff[cmdBuffCurr++]; writeInternalReg(id, param, mask); } } -} \ No newline at end of file +} diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 94639f517..13db68efb 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -448,7 +448,7 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) { if (buffer.has_value()) { return buffer.value().get().texture; } else { - const void* textureData = gpu.getPointerPhys(tex.location); // Get pointer to the texture data in 3DS memory + std::span textureData = gpu.getPointerPhys(tex.location, tex.sizeInBytes()); // Get pointer to the texture data in 3DS memory Texture& newTex = textureCache.add(tex); newTex.decodeTexture(textureData); @@ -515,4 +515,4 @@ void RendererGL::screenshot(const std::string& name) { } stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0); -} \ No newline at end of file +} diff --git a/src/core/renderer_gl/textures.cpp b/src/core/renderer_gl/textures.cpp index 819bf783c..411c8de0b 100644 --- a/src/core/renderer_gl/textures.cpp +++ b/src/core/renderer_gl/textures.cpp @@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) { } } -void Texture::decodeTexture(const void* data) { +void Texture::decodeTexture(std::span data) { std::vector decoded; decoded.reserve(u64(size.u()) * u64(size.v())); // Decode texels line by line for (u32 v = 0; v < size.v(); v++) { for (u32 u = 0; u < size.u(); u++) { - u32 colour = decodeTexel(u, v, format, data); + u32 colour = decodeTexel(u, v, format, data.data()); decoded.push_back(colour); } } texture.bind(); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data()); -} \ No newline at end of file +}