Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gpu: Use spans for memory access #126

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions include/PICA/gpu.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <array>
#include <span>

#include "PICA/dynapica/shader_rec.hpp"
#include "PICA/float_types.hpp"
Expand Down Expand Up @@ -64,9 +65,9 @@ class GPU {
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted

// Command processor pointers for GPU command lists
u32* cmdBuffStart = nullptr;
u32* cmdBuffEnd = nullptr;
u32* cmdBuffCurr = nullptr;
std::span<u32> cmdBuff{};
u32 cmdBuffEnd{};
u32 cmdBuffCurr{};

std::unique_ptr<Renderer> renderer;
PICA::Vertex getImmediateModeVertex();
Expand Down Expand Up @@ -127,19 +128,18 @@ class GPU {
}
}

// Get a pointer of type T* to the data starting from physical address paddr
// Get a span of type T to the data starting from physical address paddr
template <typename T>
T* getPointerPhys(u32 paddr) {
if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
std::span<T> getPointerPhys(u32 paddr, u32 size) {
if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
u8* fcram = mem.getFCRAM();
u32 index = paddr - PhysicalAddrs::FCRAM;

return (T*)&fcram[index];
} else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) {
return std::span{(T*)&fcram[index], size / sizeof(T)};
} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
u32 index = paddr - PhysicalAddrs::VRAM;
return (T*)&vram[index];
return std::span{(T*)&vram[index], size / sizeof(T)};
} else [[unlikely]] {
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
}
}
};
};
8 changes: 7 additions & 1 deletion include/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <fstream>
#include <optional>
#include <vector>
#include <span>
#include "crypto/aes_engine.hpp"
#include "helpers.hpp"
#include "handles.hpp"
Expand Down Expand Up @@ -168,6 +169,11 @@ class Memory {
u32 getLinearHeapVaddr();
u8* getFCRAM() { return fcram; }

template <class T>
std::span<T> getReadPointer(u32 address, u32 size) {
return std::span{reinterpret_cast<T*>(getReadPointer(address), size / sizeof(T))};
}

// Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader)
u32 totalSysFCRAM() {
return FCRAM_SIZE - FCRAM_APPLICATION_SIZE;
Expand Down Expand Up @@ -248,4 +254,4 @@ class Memory {

void setVRAM(u8* pointer) { vram = pointer; }
bool allocateMainThreadStack(u32 size);
};
};
4 changes: 2 additions & 2 deletions include/renderer_gl/textures.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct Texture {

void allocate();
void setNewConfig(u32 newConfig);
void decodeTexture(const void* data);
void decodeTexture(std::span<const u8> data);
void free();
u64 sizeInBytes();

Expand All @@ -61,4 +61,4 @@ struct Texture {
// TODO: Make hasAlpha a template parameter
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data);
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
};
};
49 changes: 17 additions & 32 deletions src/core/PICA/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,12 @@ void GPU::drawArrays() {
vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
} else {
if (shortIndex) {
auto ptr = getPointerPhys<u16>(indexBufferPointer);
vertexIndex = *ptr; // TODO: This is very unsafe
auto ptr = getPointerPhys<u16>(indexBufferPointer, 2);
vertexIndex = ptr[0]; // TODO: This is very unsafe
indexBufferPointer += 2;
} else {
auto ptr = getPointerPhys<u8>(indexBufferPointer);
vertexIndex = *ptr; // TODO: This is also very unsafe
auto ptr = getPointerPhys<u8>(indexBufferPointer, 1);
vertexIndex = ptr[0]; // TODO: This is also very unsafe
indexBufferPointer += 1;
}
}
Expand Down Expand Up @@ -194,47 +194,32 @@ void GPU::drawArrays() {
vec4f& attribute = currentAttributes[attrCount];
uint component; // Current component

const auto get_attrib = [&]<typename T>(T param) {
auto ptr = getPointerPhys<T>(attrAddress, size * sizeof(T));
for (component = 0; component < size; component++) {
const float val = static_cast<float>(ptr[component]);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(T);
};

switch (attribType) {
case 0: { // Signed byte
s8* ptr = getPointerPhys<s8>(attrAddress);
for (component = 0; component < size; component++) {
float val = static_cast<float>(*ptr++);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(s8);
get_attrib(s8{});
break;
}

case 1: { // Unsigned byte
u8* ptr = getPointerPhys<u8>(attrAddress);
for (component = 0; component < size; component++) {
float val = static_cast<float>(*ptr++);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(u8);
get_attrib(u8{});
break;
}

case 2: { // Short
s16* ptr = getPointerPhys<s16>(attrAddress);
for (component = 0; component < size; component++) {
float val = static_cast<float>(*ptr++);
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(s16);
get_attrib(s16{});
break;
}

case 3: { // Float
float* ptr = getPointerPhys<float>(attrAddress);
for (component = 0; component < size; component++) {
float val = *ptr++;
attribute[component] = f24::fromFloat32(val);
}
attrAddress += size * sizeof(float);
get_attrib(float{});
break;
}

default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType);
}

Expand Down
25 changes: 13 additions & 12 deletions src/core/PICA/regs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;

// Set command buffer state to execute the new buffer
cmdBuffStart = getPointerPhys<u32>(addr);
cmdBuffCurr = cmdBuffStart;
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
cmdBuff = getPointerPhys<u32>(addr, size);
cmdBuffCurr = 0;
cmdBuffEnd = cmdBuff.size();
}
break;
}
Expand Down Expand Up @@ -336,12 +336,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
}

void GPU::startCommandList(u32 addr, u32 size) {
cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
cmdBuff = mem.getReadPointer<u32>(addr, size);
if (!cmdBuff.data())
Helpers::panic("Couldn't get buffer for command list");
// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB

cmdBuffCurr = cmdBuffStart;
cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
cmdBuffCurr = 0;
cmdBuffEnd = cmdBuff.size();

// LUT for converting the parameter mask to an actual 32-bit mask
// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
Expand All @@ -357,13 +358,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
// So to check if it is aligned, we get the number of words it's been incremented by
// If that number is an odd value then the buffer is not aligned, otherwise it is
if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
if (cmdBuffCurr % 2 != 0) {
cmdBuffCurr++;
}

// The first word of a command is the command parameter and the second one is the header
u32 param1 = *cmdBuffCurr++;
u32 header = *cmdBuffCurr++;
const u32 param1 = cmdBuff[cmdBuffCurr++];
const u32 header = cmdBuff[cmdBuffCurr++];

u32 id = header & 0xffff;
u32 paramMaskIndex = getBits<16, 4>(header);
Expand All @@ -380,8 +381,8 @@ void GPU::startCommandList(u32 addr, u32 size) {
writeInternalReg(id, param1, mask);
for (u32 i = 0; i < paramCount; i++) {
id += idIncrement;
u32 param = *cmdBuffCurr++;
u32 param = cmdBuff[cmdBuffCurr++];
writeInternalReg(id, param, mask);
}
}
}
}
4 changes: 2 additions & 2 deletions src/core/renderer_gl/renderer_gl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
if (buffer.has_value()) {
return buffer.value().get().texture;
} else {
const void* textureData = gpu.getPointerPhys<void*>(tex.location); // Get pointer to the texture data in 3DS memory
std::span<u8> textureData = gpu.getPointerPhys<u8>(tex.location, tex.sizeInBytes()); // Get pointer to the texture data in 3DS memory
Texture& newTex = textureCache.add(tex);
newTex.decodeTexture(textureData);

Expand Down Expand Up @@ -515,4 +515,4 @@ void RendererGL::screenshot(const std::string& name) {
}

stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
}
}
6 changes: 3 additions & 3 deletions src/core/renderer_gl/textures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
}
}

void Texture::decodeTexture(const void* data) {
void Texture::decodeTexture(std::span<const u8> data) {
std::vector<u32> decoded;
decoded.reserve(u64(size.u()) * u64(size.v()));

// Decode texels line by line
for (u32 v = 0; v < size.v(); v++) {
for (u32 u = 0; u < size.u(); u++) {
u32 colour = decodeTexel(u, v, format, data);
u32 colour = decodeTexel(u, v, format, data.data());
decoded.push_back(colour);
}
}

texture.bind();
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
}
}
Loading