diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bf367e..ee5e445 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,3 +90,4 @@ add_subdirectory ("sdl-master") add_subdirectory ("physfs-main") add_subdirectory ("RT") add_subdirectory ("d1") +add_subdirectory ("sgv-archiver") diff --git a/RT/Core/CMakeLists.txt b/RT/Core/CMakeLists.txt index b9ed5cb..e83f8bf 100644 --- a/RT/Core/CMakeLists.txt +++ b/RT/Core/CMakeLists.txt @@ -9,7 +9,8 @@ add_library(RT_CORE INTERFACE "Config.h" "String.h" "Hash.h" -"FileIO.h") +"FileIO.h" +"Vault.h") target_sources(RT_CORE INTERFACE "${CMAKE_CURRENT_LIST_DIR}/Config.cpp" @@ -18,4 +19,5 @@ target_sources(RT_CORE INTERFACE "${CMAKE_CURRENT_LIST_DIR}/Common.c" "${CMAKE_CURRENT_LIST_DIR}/String.c" "${CMAKE_CURRENT_LIST_DIR}/VirtualMemory.c" +"${CMAKE_CURRENT_LIST_DIR}/Vault.cpp" ) \ No newline at end of file diff --git a/RT/Core/Config.cpp b/RT/Core/Config.cpp index 75b320f..d6526ea 100644 --- a/RT/Core/Config.cpp +++ b/RT/Core/Config.cpp @@ -8,6 +8,7 @@ #include "Core/Arena.h" #include "Core/MemoryScope.hpp" #include "Core/String.h" +#include "Core/Vault.h" static inline void ConfigError(RT_Config *cfg, char *error) { @@ -24,6 +25,27 @@ void RT_InitializeConfig(RT_Config *cfg, RT_Arena *arena) cfg->last_modified_time = RT_GetHighResTime().value; } +bool RT_DeserializeConfigFromVault(RT_Config* cfg, const char* file_name) +{ + + RT_String file_name_string = RT_StringFromCString(file_name); + RT_String file_buffer; + file_buffer.bytes = nullptr; + file_buffer.count = 0; + + if (RT_GetFileFromVaults(file_name_string, file_buffer)) + { + // got the config file from the vault + + // parse it + RT_DeserializeConfigFromString(cfg, file_buffer); + + return true; + } + + return false; +} + bool RT_DeserializeConfigFromFile(RT_Config *cfg, const char *file_name) { FILE *f = fopen(file_name, "rb"); @@ -432,3 +454,8 @@ bool RT_SerializeConfigToFile(RT_Config *cfg, char *file_name) return result; } + +bool RT_ConfigFileExistsInVaults(const RT_String* file_name) +{ + return RT_FileExistsInVaults(file_name); +} diff --git a/RT/Core/Config.h b/RT/Core/Config.h index ebde5b8..b02dd97 100644 --- a/RT/Core/Config.h +++ b/RT/Core/Config.h @@ -35,6 +35,7 @@ typedef struct RT_Config } RT_Config; RT_API void RT_InitializeConfig(RT_Config *cfg, RT_Arena *arena); +RT_API bool RT_DeserializeConfigFromVault(RT_Config* cfg, const char* file_name); RT_API bool RT_DeserializeConfigFromFile(RT_Config *cfg, const char *file_name); RT_API void RT_DeserializeConfigFromString(RT_Config *cfg, RT_String string); RT_API bool RT_ConfigReadString(RT_Config *cfg, RT_String key, RT_String *value); @@ -50,6 +51,7 @@ RT_API void RT_ConfigWriteVec3(RT_Config *cfg, RT_String key, RT_Vec3 value); RT_API bool RT_ConfigEraseKey(RT_Config *cfg, RT_String key); // Returns true if the key existed RT_API RT_String RT_SerializeConfigToString(RT_Arena *arena, RT_Config *cfg); RT_API bool RT_SerializeConfigToFile(RT_Config *cfg, char *file_name); +RT_API bool RT_ConfigFileExistsInVaults(const RT_String *file_name); // Prefer the above functions for easy interpretation of values RT_API RT_ConfigKeyValue *RT_ConfigFindKeyValue(RT_Config *cfg, RT_String key); diff --git a/RT/Core/String.h b/RT/Core/String.h index 3b83819..3cd7166 100644 --- a/RT/Core/String.h +++ b/RT/Core/String.h @@ -96,7 +96,7 @@ static inline RT_String RT_CopyString(RT_Arena *arena, RT_String string) static inline const char *RT_CopyStringNullTerm(RT_Arena *arena, RT_String string) { char *result = (char *)RT_ArenaCopyArray(arena, string.bytes, string.count + 1); - result[string.count + 1] = 0; + result[string.count] = 0; return result; } diff --git a/RT/Core/Vault.cpp b/RT/Core/Vault.cpp new file mode 100644 index 0000000..56590ad --- /dev/null +++ b/RT/Core/Vault.cpp @@ -0,0 +1,388 @@ + +#include +#include + +bool vault_data_cached = false; +RT_VaultNode* vault_data = nullptr; + +RT_API void RT_CacheVaultsInfo() +{ + // cache the vault data/indexes + vault_data_cached = true; // just need to keep track if we attempted to cache the vault data (so it doesn't repeatedly try to cache the vaults in the event it fails or there are none) + + // Get list of vaults + RT_StringNode* vault_list = RT_GetListOfVaults(); + + // loop through the list and build the vault data + RT_StringNode* cur_node = vault_list; + RT_VaultNode* cur_vault_node = nullptr; // vault_data; + + // build the first vault list node + if (cur_node != nullptr) + { + RT_Vault vault; + if (RT_LoadVaultInfo(cur_node->string, vault)) + { + // successfully loaded vault data, put it into list. + vault_data = new RT_VaultNode(); + const char* temp_vault_name = RT_CopyStringNullTerm(&g_thread_arena, cur_node->string); + vault_data->vault_name = (char*)malloc(strlen(temp_vault_name) + 1); + strcpy(vault_data->vault_name, temp_vault_name); + vault_data->vault_data = vault; + vault_data->next = nullptr; + cur_vault_node = vault_data; + } + + cur_node = cur_node->next; + } + + // build remaining vault list nodes + while (cur_node != nullptr) + { + RT_Vault vault; + if (RT_LoadVaultInfo(cur_node->string, vault)) + { + // successfully loaded vault data, put it into list. + cur_vault_node->next = new RT_VaultNode(); + const char* temp_vault_name = RT_CopyStringNullTerm(&g_thread_arena, cur_node->string); + cur_vault_node->next->vault_name = (char*)malloc(strlen(temp_vault_name) + 1); + strcpy(cur_vault_node->next->vault_name, temp_vault_name); + cur_vault_node->next->vault_data = vault; + cur_vault_node->next->next = nullptr; + cur_vault_node = cur_vault_node->next; + } + + cur_node = cur_node->next; + } + + return; +} + +RT_API bool RT_FileExistsInVaults(const RT_String* file_name) +{ + bool found = false; + + // Check to see if the vault data has not been cached + if (!vault_data_cached) + { + RT_CacheVaultsInfo(); + } + + // Loop over list of vaults and search each one for the file + if (vault_data_cached && vault_data != nullptr) + { + RT_VaultNode* cur_vault = vault_data; + + while (cur_vault != nullptr && !found) + { + found = RT_FileExistsInVault(cur_vault, file_name); + cur_vault = cur_vault->next; + } + + + } + + return found; +} + +RT_API bool RT_FileExistsInVault(const RT_VaultNode* vault, const RT_String* file_name) +{ + bool found = false; + bool empty = false; + + if (vault != nullptr) + { + // open the vault + + FILE* f2 = fopen(vault->vault_name, "rb"); + + if (f2) + { + // query index to find if file present + const char* file_name_cstr = RT_CopyStringNullTerm(&g_thread_arena, *file_name); + + uint32_t file_name_hash = RT_VaultHash( + file_name_cstr, + vault->vault_data.header.index_hash_function, + vault->vault_data.header.index_hash_function_modifier, + vault->vault_data.header.index_size); + + + + while (!empty && !found) + { + uint32_t headerSize = 19; // manually set the header size as sizeof(sgv_header) will return 20 due to padding; + fseek(f2, headerSize + (file_name_hash * sizeof(sgv_index_entry)), SEEK_SET); + char index_entry[264]; + fread(&index_entry, sizeof(char), 264, f2); + + // check to see if the index entry is an empty string. if so the file is not in the vault. + if (index_entry[0] == 0) + empty = true; + + else if (strcmp(file_name_cstr, index_entry) == 0) + { + + found = true; // we've found the file in the index + + } + else + { + // we've hit an entry in the index, but it wasn't what we were looking for. move the index to the next entry and try again + file_name_hash = (file_name_hash + 1) % vault->vault_data.header.index_size; + } + } + + // close vault + fclose(f2); + } + + } + + + return found; +} + +RT_StringNode* RT_GetListOfVaults() +{ + char* vaults_file = RT_ArenaPrintF(&g_thread_arena, "vaults"); + + RT_StringNode* vault_list = nullptr; + + FILE* f = fopen(vaults_file, "rb"); + + if (!f) + { + return vault_list; // return empty list + } + + defer{ fclose(f); }; + + // read entire file into memory + fseek(f, 0, SEEK_END); + size_t file_size = ftell(f); + fseek(f, 0, SEEK_SET); + + RT_String file; + file.bytes = (char*)RT_ArenaAllocNoZero(&g_thread_arena, file_size + 1, 16); + file.count = file_size; + + // Null terminate for good measure + file.bytes[file.count] = 0; + + size_t bytes_read = fread(file.bytes, 1, file_size, f); + if (bytes_read != file_size) + { + return vault_list; + } + + // split the string to lines (each line should be a vault entry) + while (file.count > 0) + { + RT_String line; + file = RT_StringSplitLine(file, &line); + + line = RT_StringTrim(line); + + // verify that the vault exists + const char* vault_file = RT_CopyStringNullTerm(&g_thread_arena,line); + + FILE* f2 = fopen(vault_file, "rb"); + + if (f2) + { + // we found a vault file + + // add it to the end of the vault list + + if (vault_list == nullptr) + { + vault_list = RT_ArenaAllocStruct(&g_thread_arena, RT_StringNode); + vault_list->string = RT_CopyString(&g_thread_arena, line); + vault_list->next = nullptr; + } + else + { + RT_StringNode* vault_list_node = vault_list; + + while (vault_list_node->next != nullptr) + vault_list_node = vault_list_node->next; + + vault_list_node->next = RT_ArenaAllocStruct(&g_thread_arena, RT_StringNode); + vault_list_node->next->string = RT_CopyString(&g_thread_arena, line); + vault_list_node->next->next = nullptr; + } + + fclose(f2); + } + } + + return vault_list; +} + + +bool RT_GetFileFromVaults(const RT_String file_name, RT_String& buffer) +{ + // Check to see if the vault data has not been cached + if (!vault_data_cached) + { + RT_CacheVaultsInfo(); + } + + // Loop over list of vaults and search each one for the file + if(vault_data_cached && vault_data != nullptr) + { + RT_VaultNode* cur_vault = vault_data; + + while (cur_vault != nullptr && buffer.bytes == nullptr) + { + RT_GetFileFromVault(cur_vault, file_name, buffer); + cur_vault = cur_vault->next; + } + + if (buffer.bytes != nullptr && buffer.count != 0) + return true; + } + + return false; +} + +bool RT_GetFileFromVault(const RT_VaultNode* vault, const RT_String file_name, RT_String& buffer) +{ + + if (vault != nullptr) + { + // open the vault + + FILE* f2 = fopen(vault->vault_name, "rb"); + + if (f2) + { + // query index to find if file present + const char* file_name_cstr = RT_CopyStringNullTerm(&g_thread_arena, file_name); + + uint32_t file_name_hash = RT_VaultHash( + file_name_cstr, + vault->vault_data.header.index_hash_function, + vault->vault_data.header.index_hash_function_modifier, + vault->vault_data.header.index_size); + + bool found = false; + bool empty = false; + + while (!empty && !found) + { + uint32_t headerSize = 19; // manually set the header size as sizeof(sgv_header) will return 20 due to padding; + fseek(f2, headerSize + (file_name_hash * sizeof(sgv_index_entry)), SEEK_SET); + char index_entry[264]; + fread(&index_entry, sizeof(char), 264, f2); + + // check to see if the index entry is an empty string. if so the file is not in the vault. + if (index_entry[0] == 0) + empty = true; + + else if (strcmp(file_name_cstr, index_entry) == 0) + { + + found = true; // we've found the file in the index + + // extract the file here + + uint32_t file_pos = 0; + uint32_t file_len = 0; + + memcpy(&file_pos, &index_entry[256], 4); + memcpy(&file_len, &index_entry[260], 4); + + buffer.bytes = (char*)RT_ArenaAllocNoZero(&g_thread_arena, (size_t)file_len + 1, 16); + _fseeki64(f2, file_pos, SEEK_SET); + + fread(buffer.bytes, sizeof(char), file_len, f2); + + buffer.count = file_len; + + // Null terminate for good measure + buffer.bytes[buffer.count] = 0; + + } + else + { + // we've hit an entry in the index, but it wasn't what we were looking for. move the index to the next entry and try again + file_name_hash = (file_name_hash + 1) % vault->vault_data.header.index_size; + } + } + + // close vault + fclose(f2); + } + + } + + + return false; + +} + +bool RT_LoadVaultInfo(const RT_String& vault_name, RT_Vault& vault) +{ + const char* vault_with_path = RT_CopyStringNullTerm(&g_thread_arena, vault_name); + + FILE* f = fopen(vault_with_path, "rb"); + + if (!f) + { + return false; + } + + defer{ fclose(f); }; + + // start reading file. + + fread(&vault.header.magic, sizeof(char), 4, f); + + const char sgv_magic[] = { 'S','G','V','!' }; + if (memcmp(vault.header.magic, sgv_magic, 4) != 0) + { + // file is not an sgv + return false; + } + + fread(&vault.header.version_major, sizeof(char), 1, f); + fread(&vault.header.version_minor, sizeof(char), 1, f); + + // check if version of file is 1.0. The only version supported currently. + if (vault.header.version_major != 1 || vault.header.version_minor != 0) + { + // unknown version of sgv + return false; + } + + // file is a valid sgv 1.0 file. load the rest of the header. + fread(&vault.header.index_entries, sizeof(uint32_t), 1, f); + fread(&vault.header.index_size, sizeof(uint32_t), 1, f); + fread(&vault.header.index_hash_function, sizeof(char), 1, f); + fread(&vault.header.index_hash_function_modifier, sizeof(uint32_t), 1, f); + + return true; +} + +uint32_t RT_VaultHash(const char* string_to_hash, const char hash_function, const uint32_t modifier, const uint32_t hash_map_size) +{ + if (string_to_hash == nullptr) + return 0; + + uint32_t hash = 0; + + if (hash_function == 1) + { + uint32_t char_index = 0; + while(string_to_hash[char_index] != 0) + { + + hash = (hash * modifier + string_to_hash[char_index]) % hash_map_size; + char_index++; + } + } + + return hash; +} \ No newline at end of file diff --git a/RT/Core/Vault.h b/RT/Core/Vault.h new file mode 100644 index 0000000..80a1e27 --- /dev/null +++ b/RT/Core/Vault.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include + +#include "Common.h" + +struct sgv_index_entry { + + char key[256]; + uint32_t loc; + uint32_t length; + +}; + +struct sgv_header { + char magic[4]; // SGV! + char version_major; + char version_minor; + uint32_t index_entries; + uint32_t index_size; + char index_hash_function; // 0 = No Hash + // 1 = Multiplication 1 : hash = ( hash + modifier * value ) % index_size; + uint32_t index_hash_function_modifier; + + +}; + + +typedef struct RT_Vault +{ + sgv_header header; + //sgv_index_entry* entries; // NOTE: here in case caching of the entire index is needed +} RT_Vault; + +typedef struct RT_VaultNode +{ + char* vault_name; + RT_Vault vault_data; + RT_VaultNode* next; +} RT_VaultNode; + +RT_API void RT_CacheVaultsInfo(); +RT_API RT_StringNode* RT_GetListOfVaults(); +RT_API bool RT_FileExistsInVaults(const RT_String *file_name); +RT_API bool RT_FileExistsInVault(const RT_VaultNode* vault, const RT_String* file_name); +RT_API bool RT_GetFileFromVaults(const RT_String file_name, RT_String& buffer); +RT_API bool RT_GetFileFromVault(const RT_VaultNode* vault, const RT_String file_name, RT_String& buffer); +RT_API bool RT_LoadVaultInfo(const RT_String& vault_name, RT_Vault& vault); +RT_API uint32_t RT_VaultHash(const char* string_to_hash, const char hash_function, const uint32_t modifier, const uint32_t hash_map_size); \ No newline at end of file diff --git a/RT/RTmaterials.c b/RT/RTmaterials.c index 2d62732..2d5aad5 100644 --- a/RT/RTmaterials.c +++ b/RT/RTmaterials.c @@ -96,7 +96,16 @@ static void RT_ParseMaterialDefinitionFile(int bm_index, RT_Material *material, RT_Config cfg; RT_InitializeConfig(&cfg, &g_thread_arena); - if (RT_DeserializeConfigFromFile(&cfg, material_file)) + // first try to load the material definition from the vault files + bool loaded = RT_DeserializeConfigFromVault(&cfg, material_file); + + // if fails, try from file. + if (!loaded) + { + loaded = RT_DeserializeConfigFromFile(&cfg, material_file); + } + + if (loaded) { RT_String string; @@ -167,15 +176,22 @@ static void RT_VerifyMaterialTexturesFromPaths(uint16_t bm_index, RT_MaterialPat RT_ArenaMemoryScope(&g_thread_arena) { char* dds_file = RT_ArenaPrintF(&g_thread_arena, "assets/textures/%s.dds", paths->textures[i]); + + RT_String file_name_string = RT_StringFromCString(dds_file); + bool found = RT_ConfigFileExistsInVaults(&file_name_string); FILE* f = fopen(dds_file, "r"); - if (f) + if (found || f) { - fclose(f); paths->textures[i]; paths_exist->textures[i] = true; } + + if (f) + { + fclose(f); + } } if (!paths_exist->textures[i]) // dds file not found try png @@ -184,14 +200,22 @@ static void RT_VerifyMaterialTexturesFromPaths(uint16_t bm_index, RT_MaterialPat { char* file = RT_ArenaPrintF(&g_thread_arena, "assets/textures/%s.png", paths->textures[i]); + RT_String file_name_string = RT_StringFromCString(file); + bool found = RT_ConfigFileExistsInVaults(&file_name_string); + FILE* f = fopen(file, "r"); - if (f) + if (found || f) { - fclose(f); + paths->textures[i]; paths_exist->textures[i] = true; } + + if (f) + { + fclose(f); + } } } } diff --git a/RT/Renderer/Backend/DX12/src/ImageReadWrite.cpp b/RT/Renderer/Backend/DX12/src/ImageReadWrite.cpp index 305f766..61981dd 100644 --- a/RT/Renderer/Backend/DX12/src/ImageReadWrite.cpp +++ b/RT/Renderer/Backend/DX12/src/ImageReadWrite.cpp @@ -3,6 +3,7 @@ #include "Core/Arena.h" #include "Core/String.h" #include "Core/FileIO.h" +#include "Core/Vault.h" // TODO(daniel): These external libraries are in a weird place... RT/Renderer/Backend/DX12? This has nothing to do with DX12!!!!!!!! @@ -49,8 +50,24 @@ RT_Image RT_LoadImageFromDisk(RT_Arena *arena, const char *path_c, int required_ } else { - int w, h, channel_count; - result.pixels = stbi_load(path_c, &w, &h, &channel_count, required_channel_count); + + int w=0, h=0, channel_count=0; + + RT_String file_buffer; + file_buffer.bytes = nullptr; + file_buffer.count = 0; + + // first try to load from vault + if (RT_GetFileFromVaults(path, file_buffer)) + { + result.pixels = stbi_load_from_memory((unsigned char*)(file_buffer.bytes),file_buffer.count,&w,&h, &channel_count, required_channel_count); + } + + // try to load from disk + if (!result.pixels) + { + result.pixels = stbi_load(path_c, &w, &h, &channel_count, required_channel_count); + } if (result.pixels) { @@ -308,11 +325,33 @@ RT_Image RT_LoadDDSFromDisk(RT_Arena *arena, RT_String path) // Don't want to spam when attempting to load DDS image first // fprintf(stderr, "[RT_LoadDDSFromDisk]: Attempting to load image: '%.*s'\n", RT_ExpandString(path)); + + //attempt to load from vaults first here; + bool loaded = false; + RT_String memory; - if (RT_ReadEntireFile(arena, path, &memory)) + memory.bytes = nullptr; + memory.count = 0; + + if (RT_GetFileFromVaults(path, memory)) + { + + loaded = true; + } + + + else if (RT_ReadEntireFile(arena, path, &memory)) + { + + loaded = true; + } + + if (loaded) { result = RT_LoadDDSFromMemory(memory); } + RT_ArenaReset(&g_thread_arena); + return result; } \ No newline at end of file diff --git a/sgv-archiver/CMakeLists.txt b/sgv-archiver/CMakeLists.txt new file mode 100644 index 0000000..3c8d201 --- /dev/null +++ b/sgv-archiver/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required (VERSION 3.8) + +project(sgv-archiver) + +set (CMAKE_CXX_STANDARD 17) + +set(CMAKE_C_COMPILER_ID, "MSVC") +set(CMAKE_CXX_COMPILER_ID, "MSVC") + +message("Build the SFV Archiver") + +add_executable(sgv-archiver) + +target_sources(sgv-archiver PRIVATE + + + "sgv-header.h" + "sgv-index-entry.h" + "sgv-utils.h" + "main.cpp" + "sgv-utils.cpp" + + + ) \ No newline at end of file diff --git a/sgv-archiver/main.cpp b/sgv-archiver/main.cpp new file mode 100644 index 0000000..c9c68d1 --- /dev/null +++ b/sgv-archiver/main.cpp @@ -0,0 +1,363 @@ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sgv-header.h" +#include "sgv-index-entry.h" +#include "sgv-utils.h" + +int main(int argc, char* argv[]) +{ + + long long headerSize = 19; // manually set the header size of sgv as sizeof(header) will return 20 due to padding; + + char* arg_input = nullptr; + char* arg_output = nullptr; + + int index_size_multiplier = 2; + int passes = 25; + + uint32_t prime_list[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997 , 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1289, 1291, 1297, 1301, 1303, 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 1471, 1481, 1487, 1493, 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, 1619, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, 1747, 1753, 1759, 1777, 1783, 1787, 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1973, 1979, 1987, 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, 2339, 2341, 2357, 2371, 2377, 2381, 2383, 2387, 2393, 2399, 2411, 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531, 2539, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2687, 2689, 2693, 2699, 2707, 2711, 2719, 2729, 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2969, 2971, 2999, 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 3083, 3087, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3257, 3259, 3271, 3299, 3307, 3313, 3319, 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, 3471, 3473, 3487, 3491, 3499, 3511, 3517, 3527, 3539, 3541, 3547, 3557, 3571, 3581, 3583, 3593, 3607, 3617, 3623, 3631, 3637, 3643, 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, 3733, 3739, 3749, 3761, 3767, 3769, 3779, 3787, 3793, 3797, 3803, 3821, 3827, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3893, 3907, 3911, 3917, 3923, 3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4027, 4049, 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139 }; + + bool vault_has_been_split = false; // tracks if an vault has been split so a number can be appended to vault name. + + for (auto arg_index = 1; arg_index < argc; arg_index++) // start at 1 because arg 0 is the executable path + { + if (strcmp("-h", argv[arg_index]) == 0 || strcmp("--help", argv[arg_index]) == 0) + { + // help argument detected + // print help screen and exit + printHelp(); + + return 0; + } + else if (strcmp("-i", argv[arg_index]) == 0) + { + // next argument should be the input path + arg_index++; + if(arg_index < argc) + arg_input = argv[arg_index]; + } + else if (strcmp("-o", argv[arg_index]) == 0) + { + // next argument should be the output file + arg_index++; + if (arg_index < argc) + arg_output = argv[arg_index]; + } + else if (strcmp("-p", argv[arg_index]) == 0) + { + // next argument should be the number of passes + arg_index++; + if (arg_index < argc) + { + if (atoi(argv[arg_index]) != 0) + { + passes = atoi(argv[arg_index]); + passes = std::min(passes, 500); + } + } + } + else + printf("Unknown Argument: %s\n", argv[arg_index]); + } + + printf("\tInput: %s\n", arg_input); + printf("\tOutput: %s\n", arg_output); + + if( arg_input == nullptr || arg_output == nullptr) + { + printf("One or more required arguments are missing. Exiting\n"); + return 1; + } + + // Validate the input path + if (!std::filesystem::exists(arg_input)) + { + printf("Input path does not exist. Exiting\n"); + return 1; + + } + + if (!std::filesystem::is_directory(arg_input)) + { + printf("Input path is not a directory. Exiting\n"); + return 1; + } + + // Validate the output path + std::filesystem::path output_path = std::filesystem::path(arg_output); + std::filesystem::path output_path_extension = output_path.extension(); + std::filesystem::path output_path_wo_extension = output_path.replace_extension(""); + std::filesystem::path output_path_parent = output_path.parent_path(); + + printf("Output parent directory: %s\n", output_path_parent.generic_string().c_str()); + + if (!std::filesystem::exists(output_path_parent)) + { + printf("Output file path does not exist. Exiting\n"); + return 1; + } + + // now create the list of files that need to be added to this archive + std::stack directories; + std::vector files; + + directories.push(arg_input); + + while (!directories.empty()) + { + std::filesystem::path cur_dir = directories.top(); + directories.pop(); + + for (auto const& dir_entry : std::filesystem::directory_iterator(cur_dir) ) + { + if (std::filesystem::is_regular_file( dir_entry ) ) + { + //printf("File: %s \n", std::filesystem::relative( dir_entry.path(), std::filesystem::path(arg_input)).generic_string().c_str()); + files.push_back( dir_entry ); + } + else if (std::filesystem::is_directory( dir_entry )) + { + //printf("Directory: %s \n", std::filesystem::relative( dir_entry.path(), std::filesystem::path(arg_input)).generic_string().c_str()); + directories.push( dir_entry ); + } + } + } + + printf("Total files to add to archives: %zd\n", files.size()); + + // iterate over the list of files to add them to archives splitting them every 4GB. + constexpr uint32_t max_file_size = std::numeric_limits::max(); + + int archive_count = 0; + + while (!files.empty()) + { + printf("Remaining files to be archived: %zd\n", files.size()); + + std::vector files_for_current_archive; + uint32_t current_data_size = 0; + bool current_archive_filled = false; + + while (!(files.empty() || current_archive_filled)) + { + + + // get the last element from the files list + std::filesystem::path& current_file = files.back(); + + // get its file size. + uintmax_t current_file_size = std::filesystem::file_size(current_file); + + // check if there is enough room to fit it into the current file archive + uintmax_t current_archive_size = sizeof(sgv_header); + current_archive_size += sizeof(sgv_index_entry) * files_for_current_archive.size() * index_size_multiplier; + current_archive_size += current_data_size; + + if (current_archive_size + (sizeof(sgv_index_entry) * index_size_multiplier) + current_file_size < max_file_size) + { + // there is room to fit the new file into the archive + current_data_size += current_file_size; + files_for_current_archive.push_back(current_file); + files.pop_back(); + } + else + { + // there is not enough room to fit the new file into the archive + printf("Current Archive Full, Starting new..."); + current_archive_filled = true; + vault_has_been_split = true; + } + } + + // we have the files for the archive... process them into an archive. + + // determine the best hash modifier value to use for these index entries that maximize the spread of entries across the array to reduce collisions + uint32_t modifier_min = 1; + uint32_t modifier_max = 1024; + + uint32_t modifier_selected = 0; + double modifier_selected_spread_score = 1000000000.0;// std::numeric_limits::max(); + + // loop through the series hash modifier values and test build the index to test for how evenly spread the entries are. The one with the most even spread is selected. + for (uint32_t modifier_current_index = 0; modifier_current_index < passes; modifier_current_index++) + { + printf("Optimizing Hash: Pass# %zd/%zd \r", modifier_current_index + 1, passes); + fflush(stdout); + + uint32_t modifier_current = prime_list[modifier_current_index]; + + // allocate space for the current test index + auto index_size = files_for_current_archive.size() * index_size_multiplier; + sgv_index_entry* index_entries = new sgv_index_entry[index_size](); + + for (auto file_index = 0; file_index < files_for_current_archive.size(); file_index++) + { + std::filesystem::path current_path = files_for_current_archive[file_index]; + std::string file_string = std::filesystem::relative(current_path, std::filesystem::path(arg_input)).generic_string(); + + uint32_t file_string_hash = hashString(file_string, 1, index_size, modifier_current); + + // starting with the hashed index, find a place in the index for this file. + bool found_place = false; + + while (!found_place) + { + if (index_entries[file_string_hash].key[0] == 0) + { + // we've found an empty index, put the data here. + found_place = true; + strcpy( index_entries[file_string_hash].key, file_string.c_str()); + } + else + { + // index slot is already used + + // move index to next slot + file_string_hash = ( file_string_hash + 1 ) % index_size; + } + } + } + + // files are all placed in the index + + // see if this index is better than the current best index + // calculateSpread returns a lower score for more event spread, so we want the lowest score to be chosen. + double modifier_current_spread_score = calculateSpread(index_entries,index_size); + + if (modifier_current_spread_score < modifier_selected_spread_score) + { + modifier_selected = modifier_current; + modifier_selected_spread_score = modifier_current_spread_score; + } + + + delete[] index_entries; + } + + printf("Selected Hash Modifier: %d, Spread Score: %f\n", modifier_selected, modifier_selected_spread_score); + + // create the actual archive now + printf("Writing Vault\n"); + uint32_t total_file_size = 0; + std::filesystem::path final_output_path; + + + if(vault_has_been_split) + final_output_path = output_path_wo_extension.string() + std::to_string(archive_count) + output_path_extension.string(); + else + final_output_path = output_path_wo_extension.string() + output_path_extension.string(); + + std::ofstream sgv_fs(final_output_path, std::ios::out | std::ios::binary); + + if (!sgv_fs) + { + printf("Error opening file for writing: %s\n", final_output_path.generic_string().c_str()); + return 1; + } + + // create the header + sgv_header header; + header.magic[0] = 'S'; + header.magic[1] = 'G'; + header.magic[2] = 'V'; + header.magic[3] = '!'; + header.version_major = 1; + header.version_minor = 0; + header.index_entries = files_for_current_archive.size(); + header.index_size = header.index_entries * index_size_multiplier; // increase number of index entries for faster seeking. + header.index_hash_function = 1; // set hash_function to 1 (multiplication hash 1), NOTE may allow other values in future. + header.index_hash_function_modifier = modifier_selected; + + sgv_fs.write((char*)&header.magic, sizeof(char) * 4); + sgv_fs.write((char*)&header.version_major, sizeof(char)); + sgv_fs.write((char*)&header.version_minor, sizeof(char)); + sgv_fs.write((char*)&header.index_entries, sizeof(uint32_t)); + sgv_fs.write((char*)&header.index_size, sizeof(uint32_t)); + sgv_fs.write((char*)&header.index_hash_function, sizeof(char)); + sgv_fs.write((char*)&header.index_hash_function_modifier, sizeof(uint32_t)); + + // build the archive index. + auto index_size = files_for_current_archive.size() * index_size_multiplier; + sgv_index_entry* index_entries = new sgv_index_entry[index_size](); + + // write the empty index to file for now (we'll go back and rewrite it once we have the real data).. just need to move the write position. + sgv_fs.write((char*)index_entries, sizeof(sgv_index_entry) * index_size); + + // go through all the files to archive and both write them to archive file and record its info into the index. + for (auto file_index = 0; file_index < files_for_current_archive.size(); file_index++) + { + std::filesystem::path current_path = files_for_current_archive[file_index]; + std::string file_string = std::filesystem::relative(current_path, std::filesystem::path(arg_input)).generic_string(); + auto current_file_size = std::filesystem::file_size(current_path); + + uint32_t file_string_hash = hashString(file_string, 1, index_size, modifier_selected); + + // starting with the hashed index, find a place in the index for this file. + bool found_place = false; + + while (!found_place) + { + if (index_entries[file_string_hash].key[0] == 0) + { + // we've found an empty index, put the data here. + found_place = true; + strcpy(index_entries[file_string_hash].key, file_string.c_str()); + index_entries[file_string_hash].loc = sgv_fs.tellp(); + index_entries[file_string_hash].length = current_file_size; + + } + else + { + // index slot is already used + // move index to next slot + file_string_hash = (file_string_hash + 1) % index_size; + } + } + + // read the file and copy its contents to the archive + std::ifstream sourceFile(current_path, std::ios::in | std::ios::binary); + if (!sourceFile.is_open()) { + std::cerr << "Error opening source file!" << std::endl; + return 1; + } + + // Buffer to hold data while reading and writing + const size_t bufferSize = 8192; + char buffer[bufferSize]; + + // Read from the source file and write to the destination file + while (sourceFile.read(buffer, bufferSize)) { + sgv_fs.write(buffer, sourceFile.gcount()); // Write the number of bytes read + } + + // Write any remaining bytes + sgv_fs.write(buffer, sourceFile.gcount()); + + // Close the files + sourceFile.close(); + + } + + // seek back to the index location and write the final hashed index. + sgv_fs.seekp(headerSize); + sgv_fs.write((char*)index_entries, sizeof(sgv_index_entry)* index_size); + + + sgv_fs.close(); + + archive_count++; + + } + + return 0; + +} \ No newline at end of file diff --git a/sgv-archiver/sgv-header.h b/sgv-archiver/sgv-header.h new file mode 100644 index 0000000..8485e0d --- /dev/null +++ b/sgv-archiver/sgv-header.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +struct sgv_header{ + + char magic[4]; // SGV! + char version_major; + char version_minor; + uint32_t index_entries; + uint32_t index_size; + char index_hash_function; // 0 = No Hash + // 1 = Multiplication 1 : hash = ( hash + modifier * value ) % index_size; + uint32_t index_hash_function_modifier; + + + +} ; \ No newline at end of file diff --git a/sgv-archiver/sgv-index-entry.h b/sgv-archiver/sgv-index-entry.h new file mode 100644 index 0000000..c55fb7c --- /dev/null +++ b/sgv-archiver/sgv-index-entry.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +struct sgv_index_entry{ + + char key[256]; + uint32_t loc; + uint32_t length; + +}; \ No newline at end of file diff --git a/sgv-archiver/sgv-utils.cpp b/sgv-archiver/sgv-utils.cpp new file mode 100644 index 0000000..e3f3056 --- /dev/null +++ b/sgv-archiver/sgv-utils.cpp @@ -0,0 +1,104 @@ +#include +#include +#include + +#include "sgv-utils.h" +#include "sgv-index-entry.h" + +void printHelp() +{ + printf("\nsgv-archiver v1.0\n\n"); + + printf("============\n"); + printf(" HELP\n"); + printf("============\n"); + + printf("Takes an input directory and creates a seekable game vault (SGV) file.\n\n"); + + printf("\t-h, --help\tPrint this help screen\n\n"); + printf("\t-i\t\tThe directory to use as the source of the content to be included in the vault. All files and folders inside this directory will be included in the vault.\n"); + printf("\t\t\tex: C:/vault-content/\n"); + printf("\t\t\t( REQUIRED )\n\n"); + printf("\t-o\t\tThe file name of the vault to be created.\n"); + printf("\t\t\tex: C:/vault-output/vault.sgv\n"); + printf("\t\t\t( REQUIRED )\n\n"); + printf("\t-p\t\tNumber of passes to optimize hashing algorithm for faster seeking.\n"); + printf("\t\t\t( Default=25 )\n\n"); +} + +uint32_t hashString(const std::string& string, char hash_function, uint32_t size, uint32_t modifier) +{ + uint32_t string_hash = 0; + + if (hash_function == 0) + return 0; + + else if (hash_function == 1) + { + for (auto char_index = 0; char_index < string.size(); char_index++) { + + string_hash = (string_hash * modifier + string.at(char_index)) % size; + } + } + + return string_hash; +} + +// Function to calculate the spread score +// It takes the distance between entries and finds the standard deviation of the distances between entries. +// The more spread out the entries across the array the lower the deviation/score. 0 = perfectly spread entries. +double calculateSpread(const sgv_index_entry* index_entries, size_t index_size) { + + std::vector filled_indices; + + // Collect indices of filled entries + for (int i = 0; i < index_size; ++i) { + + if (index_entries[i].key[0] != 0) { // Assuming 0 indicates an empty entry + + filled_indices.push_back(i); + //printf("#"); + } + //else + //printf(" "); + } + //printf("\n"); + + if (filled_indices.size() < 2) + { + return 0.0; // Not enough entries to calculate spread + } + + // Calculate distances between consecutive filled entries + + std::vector distances; + if (filled_indices[0] > 0) + { + distances.push_back(filled_indices[0]); // Distance from start of array to first filled entry + } + + for (int i = 1; i < filled_indices.size(); ++i) + { + distances.push_back(filled_indices[i] - filled_indices[i - 1] - 1); // Subtract 1 to count only empty spaces + } + + if (filled_indices.back() < index_size - 1) + { + distances.push_back(index_size - 1 - filled_indices.back()); // Distance from last filled entry to end of array + } + + // Calculate the mean distance + double meanDistance = std::accumulate(distances.begin(), distances.end(), 0.0) / distances.size(); + + // Calculate the standard deviation of distances + double variance = 0.0; + for (int distance : distances) + { + variance += std::pow(distance - meanDistance, 2); + } + + variance /= distances.size(); + double stdDeviation = std::sqrt(variance); + + return stdDeviation; +} \ No newline at end of file diff --git a/sgv-archiver/sgv-utils.h b/sgv-archiver/sgv-utils.h new file mode 100644 index 0000000..0de0970 --- /dev/null +++ b/sgv-archiver/sgv-utils.h @@ -0,0 +1,8 @@ +#pragma once + +#include +#include "sgv-index-entry.h" + +void printHelp(); +uint32_t hashString(const std::string& string, char hash_function, uint32_t size, uint32_t modifier); +double calculateSpread(const sgv_index_entry* index_entries, size_t index_size); \ No newline at end of file