From ca22b8ec4ae5aa36c7ab4826523b978aaa6c3939 Mon Sep 17 00:00:00 2001
From: GPUCode <geoster3d@gmail.com>
Date: Thu, 20 Jul 2023 14:29:38 +0300
Subject: [PATCH] gpu: Use spans for memory access

---
 include/PICA/gpu.hpp                 | 22 ++++++-------
 include/memory.hpp                   |  8 ++++-
 include/renderer_gl/textures.hpp     |  4 +--
 src/core/PICA/gpu.cpp                | 49 ++++++++++------------------
 src/core/PICA/regs.cpp               | 25 +++++++-------
 src/core/renderer_gl/renderer_gl.cpp |  4 +--
 src/core/renderer_gl/textures.cpp    |  6 ++--
 7 files changed, 55 insertions(+), 63 deletions(-)
diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp
index d4e54358e..a223cbf78 100644
--- a/include/PICA/gpu.hpp
+++ b/include/PICA/gpu.hpp
@@ -1,5 +1,6 @@
 #pragma once
 #include <array>
+#include <span>
 
 #include "PICA/dynapica/shader_rec.hpp"
 #include "PICA/float_types.hpp"
@@ -64,9 +65,9 @@ class GPU {
 	std::array<u32, 3> fixedAttrBuff;  // Buffer to hold fixed attributes in until they get submitted
 
 	// Command processor pointers for GPU command lists
-	u32* cmdBuffStart = nullptr;
-	u32* cmdBuffEnd = nullptr;
-	u32* cmdBuffCurr = nullptr;
+	std::span<u32> cmdBuff{};
+	u32 cmdBuffEnd{};
+	u32 cmdBuffCurr{};
 
 	std::unique_ptr<Renderer> renderer;
 	PICA::Vertex getImmediateModeVertex();
@@ -127,19 +128,18 @@ class GPU {
 		}
 	}
 
-	// Get a pointer of type T* to the data starting from physical address paddr
+	// Get a span of type T to the data starting from physical address paddr
 	template <typename T>
-	T* getPointerPhys(u32 paddr) {
-		if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
+	std::span<T> getPointerPhys(u32 paddr, u32 size) {
+		if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
 			u8* fcram = mem.getFCRAM();
 			u32 index = paddr - PhysicalAddrs::FCRAM;
-
-			return (T*)&fcram[index];
-		} else if (paddr >= PhysicalAddrs::VRAM && paddr <= PhysicalAddrs::VRAMEnd) {
+			return std::span{(T*)&fcram[index], size / sizeof(T)};
+		} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
 			u32 index = paddr - PhysicalAddrs::VRAM;
-			return (T*)&vram[index];
+			return std::span{(T*)&vram[index], size / sizeof(T)};
 		} else [[unlikely]] {
 			Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
 		}
 	}
-};
\ No newline at end of file
+};
diff --git a/include/memory.hpp b/include/memory.hpp
index 6f33d8956..cd7a4241b 100644
--- a/include/memory.hpp
+++ b/include/memory.hpp
@@ -5,6 +5,7 @@
 #include <fstream>
 #include <optional>
 #include <vector>
+#include <span>
 #include "crypto/aes_engine.hpp"
 #include "helpers.hpp"
 #include "handles.hpp"
@@ -168,6 +169,11 @@ class Memory {
 	u32 getLinearHeapVaddr();
 	u8* getFCRAM() { return fcram; }
 
+	template <class T>
+	std::span<T> getReadPointer(u32 address, u32 size) {
+		return std::span{reinterpret_cast<T*>(getReadPointer(address), size / sizeof(T))};
+	}
+
 	// Total amount of OS-only FCRAM available (Can vary depending on how much FCRAM the app requests via the cart exheader)
 	u32 totalSysFCRAM() {
 		return FCRAM_SIZE - FCRAM_APPLICATION_SIZE;
@@ -248,4 +254,4 @@ class Memory {
 
 	void setVRAM(u8* pointer) { vram = pointer; }
 	bool allocateMainThreadStack(u32 size);
-};
\ No newline at end of file
+};
diff --git a/include/renderer_gl/textures.hpp b/include/renderer_gl/textures.hpp
index 5469a59f6..981f6eebe 100644
--- a/include/renderer_gl/textures.hpp
+++ b/include/renderer_gl/textures.hpp
@@ -40,7 +40,7 @@ struct Texture {
 
     void allocate();
     void setNewConfig(u32 newConfig);
-    void decodeTexture(const void* data);
+    void decodeTexture(std::span<const u8> data);
     void free();
     u64 sizeInBytes();
 
@@ -61,4 +61,4 @@ struct Texture {
     // TODO: Make hasAlpha a template parameter
     u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data);
     u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
-};
\ No newline at end of file
+};
diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp
index 15c99c42a..da8bb5266 100644
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@@ -131,12 +131,12 @@ void GPU::drawArrays() {
 			vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
 		} else {
 			if (shortIndex) {
-				auto ptr = getPointerPhys<u16>(indexBufferPointer);
-				vertexIndex = *ptr;  // TODO: This is very unsafe
+				auto ptr = getPointerPhys<u16>(indexBufferPointer, 2);
+				vertexIndex = ptr[0];  // TODO: This is very unsafe
 				indexBufferPointer += 2;
 			} else {
-				auto ptr = getPointerPhys<u8>(indexBufferPointer);
-				vertexIndex = *ptr;  // TODO: This is also very unsafe
+				auto ptr = getPointerPhys<u8>(indexBufferPointer, 1);
+				vertexIndex = ptr[0];  // TODO: This is also very unsafe
 				indexBufferPointer += 1;
 			}
 		}
@@ -194,47 +194,32 @@ void GPU::drawArrays() {
 					vec4f& attribute = currentAttributes[attrCount];
 					uint component;  // Current component
 
+					const auto get_attrib = [&]<typename T>(T param) {
+						auto ptr = getPointerPhys<T>(attrAddress, size * sizeof(T));
+						for (component = 0; component < size; component++) {
+							const float val = static_cast<float>(ptr[component]);
+							attribute[component] = f24::fromFloat32(val);
+						}
+						attrAddress += size * sizeof(T);
+					};
+
 					switch (attribType) {
 						case 0: {  // Signed byte
-							s8* ptr = getPointerPhys<s8>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = static_cast<float>(*ptr++);
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(s8);
+							get_attrib(s8{});
 							break;
 						}
-
 						case 1: {  // Unsigned byte
-							u8* ptr = getPointerPhys<u8>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = static_cast<float>(*ptr++);
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(u8);
+							get_attrib(u8{});
 							break;
 						}
-
 						case 2: {  // Short
-							s16* ptr = getPointerPhys<s16>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = static_cast<float>(*ptr++);
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(s16);
+							get_attrib(s16{});
 							break;
 						}
-
 						case 3: {  // Float
-							float* ptr = getPointerPhys<float>(attrAddress);
-							for (component = 0; component < size; component++) {
-								float val = *ptr++;
-								attribute[component] = f24::fromFloat32(val);
-							}
-							attrAddress += size * sizeof(float);
+							get_attrib(float{});
 							break;
 						}
-
 						default: Helpers::panic("[PICA] Unimplemented attribute type %d", attribType);
 					}
 
diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp
index d245f8af2..aaa169960 100644
--- a/src/core/PICA/regs.cpp
+++ b/src/core/PICA/regs.cpp
@@ -305,9 +305,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 				u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
 
 				// Set command buffer state to execute the new buffer
-				cmdBuffStart = getPointerPhys<u32>(addr);
-				cmdBuffCurr = cmdBuffStart;
-				cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
+				cmdBuff = getPointerPhys<u32>(addr, size);
+				cmdBuffCurr = 0;
+				cmdBuffEnd = cmdBuff.size();
 			}
 			break;
 		}
@@ -336,12 +336,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 }
 
 void GPU::startCommandList(u32 addr, u32 size) {
-	cmdBuffStart = static_cast<u32*>(mem.getReadPointer(addr));
-	if (!cmdBuffStart) Helpers::panic("Couldn't get buffer for command list");
+	cmdBuff = mem.getReadPointer<u32>(addr, size);
+	if (!cmdBuff.data())
+		Helpers::panic("Couldn't get buffer for command list");
 	// TODO: This is very memory unsafe. We get a pointer to FCRAM and just keep writing without checking if we're gonna go OoB
 
-	cmdBuffCurr = cmdBuffStart;
-	cmdBuffEnd = cmdBuffStart + (size / sizeof(u32));
+	cmdBuffCurr = 0;
+	cmdBuffEnd = cmdBuff.size();
 
 	// LUT for converting the parameter mask to an actual 32-bit mask
 	// The parameter mask is 4 bits long, each bit corresponding to one byte of the mask
@@ -357,13 +358,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
 		// The curr pointer starts out doubleword-aligned and is increased by 4 bytes each time
 		// So to check if it is aligned, we get the number of words it's been incremented by
 		// If that number is an odd value then the buffer is not aligned, otherwise it is
-		if ((cmdBuffCurr - cmdBuffStart) % 2 != 0) {
+		if (cmdBuffCurr % 2 != 0) {
 			cmdBuffCurr++;
 		}
 
 		// The first word of a command is the command parameter and the second one is the header
-		u32 param1 = *cmdBuffCurr++;
-		u32 header = *cmdBuffCurr++;
+		const u32 param1 = cmdBuff[cmdBuffCurr++];
+		const u32 header = cmdBuff[cmdBuffCurr++];
 
 		u32 id = header & 0xffff;
 		u32 paramMaskIndex = getBits<16, 4>(header);
@@ -380,8 +381,8 @@ void GPU::startCommandList(u32 addr, u32 size) {
 		writeInternalReg(id, param1, mask);
 		for (u32 i = 0; i < paramCount; i++) {
 			id += idIncrement;
-			u32 param = *cmdBuffCurr++;
+			u32 param = cmdBuff[cmdBuffCurr++];
 			writeInternalReg(id, param, mask);
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index 94639f517..13db68efb 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -448,7 +448,7 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
 	if (buffer.has_value()) {
 		return buffer.value().get().texture;
 	} else {
-		const void* textureData = gpu.getPointerPhys<void*>(tex.location);  // Get pointer to the texture data in 3DS memory
+		std::span<u8> textureData = gpu.getPointerPhys<u8>(tex.location, tex.sizeInBytes());  // Get pointer to the texture data in 3DS memory
 		Texture& newTex = textureCache.add(tex);
 		newTex.decodeTexture(textureData);
 
@@ -515,4 +515,4 @@ void RendererGL::screenshot(const std::string& name) {
 	}
 
 	stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
-}
\ No newline at end of file
+}
diff --git a/src/core/renderer_gl/textures.cpp b/src/core/renderer_gl/textures.cpp
index 819bf783c..411c8de0b 100644
--- a/src/core/renderer_gl/textures.cpp
+++ b/src/core/renderer_gl/textures.cpp
@@ -258,18 +258,18 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
     }
 }
 
-void Texture::decodeTexture(const void* data) {
+void Texture::decodeTexture(std::span<const u8> data) {
     std::vector<u32> decoded;
     decoded.reserve(u64(size.u()) * u64(size.v()));
 
     // Decode texels line by line
     for (u32 v = 0; v < size.v(); v++) {
         for (u32 u = 0; u < size.u(); u++) {
-            u32 colour = decodeTexel(u, v, format, data);
+            u32 colour = decodeTexel(u, v, format, data.data());
             decoded.push_back(colour);
         }
     }
 
     texture.bind();
     glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
-}
\ No newline at end of file
+}