Skip to content

Commit

Permalink
Experimental multithreaded loading
Browse files Browse the repository at this point in the history
  • Loading branch information
PieKing1215 committed Sep 8, 2024
1 parent edeffa6 commit 99955cb
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 6 deletions.
6 changes: 4 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ use tweaks::editor_camera_speed::EditorCameraSpeedTweak;
use tweaks::editor_placement::EditorPlacementTweak;
use tweaks::editor_show_hidden::ShowHiddenComponents;
use tweaks::fullscreen::FullscreenTweak;
use tweaks::loading::LoadingTweak;
use tweaks::fast_loading_animations::FastLoadingAnimationsTweak;
use tweaks::map_lag::MapLagTweak;
use tweaks::multithreaded_loading::MultithreadedLoadingTweak;
use tweaks::{Tweak, TweakWrapper};
use windows::Win32::Foundation::HINSTANCE;
use windows::Win32::System::SystemServices::DLL_PROCESS_ATTACH;
Expand Down Expand Up @@ -81,7 +82,8 @@ impl MainHud {
this.add_tweak::<EditorPlacementTweak>(&process.region);
this.add_tweak::<ShowHiddenComponents>(&process.region);
this.add_tweak::<MapLagTweak>(&process.region);
this.add_tweak::<LoadingTweak>(&process.region);
this.add_tweak::<FastLoadingAnimationsTweak>(&process.region);
this.add_tweak::<MultithreadedLoadingTweak>(&process.region);
this.add_tweak::<FullscreenTweak>(&process.region);
this.add_tweak::<DevModeTweak>(&process.region);
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ use super::{Defaults, InjectAt, Tweak};
const FAST_MENU_FADE_DEFAULTS: Defaults<bool> = Defaults::new(true, false);
const SKIP_LOAD_FINISH_DEFAULTS: Defaults<bool> = Defaults::new(true, false);

pub struct LoadingTweak;
pub struct FastLoadingAnimationsTweak;

impl Tweak for LoadingTweak {
impl Tweak for FastLoadingAnimationsTweak {
#[allow(clippy::too_many_lines)]
fn new(builder: &mut super::TweakBuilder) -> anyhow::Result<Self>
where
Expand Down
19 changes: 18 additions & 1 deletion src/tweaks/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ use memory_rs::internal::{
memory_region::MemoryRegion,
};
use num_traits::ToBytes;
use retour::GenericDetour;
use settings::{slider::SliderBuilder, toggle::ToggleBuilder, SettingUntyped};

pub mod dev_mode;
pub mod editor_camera_speed;
pub mod editor_placement;
pub mod editor_show_hidden;
pub mod fullscreen;
pub mod loading;
pub mod fast_loading_animations;
pub mod map_lag;
pub mod settings;
pub mod multithreaded_loading;

pub trait Tweak {
fn new(builder: &mut TweakBuilder) -> anyhow::Result<Self>
Expand Down Expand Up @@ -257,3 +259,18 @@ impl<N: ToBytes> NumberInjection<N> {
self.injection.remove_injection();
}
}

pub trait DetourUntyped {
fn enable(&mut self) -> anyhow::Result<()>;
fn disable(&mut self) -> anyhow::Result<()>;
}

impl<T: retour::Function> DetourUntyped for GenericDetour<T> {
fn enable(&mut self) -> anyhow::Result<()> {
unsafe { Ok(GenericDetour::enable(self)?) }
}

fn disable(&mut self) -> anyhow::Result<()> {
unsafe { Ok(GenericDetour::disable(self)?) }
}
}
147 changes: 147 additions & 0 deletions src/tweaks/multithreaded_loading.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use std::thread::JoinHandle;

use anyhow::Context;
use memory_rs::generate_aob_pattern;
use retour::GenericDetour;

use crate::tweaks::MemoryRegionExt;

use super::{Defaults, Tweak};

type LoadRomFn = extern "fastcall" fn(*mut (), *mut (), usize, *mut ());
type LoadSaveFn = extern "fastcall" fn(*mut (), *mut (), *mut (), *mut (), *mut ());

const MULTITHREADED_LOADING_DEFAULTS: Defaults<bool> = Defaults::new(false, false);

static mut LOAD_ROM_FN: Option<LoadRomFn> = None;
static mut LOAD_SAVE_FN: Option<LoadSaveFn> = None;
static mut LOAD_ROM_THREAD: Option<JoinHandle<()>> = None;

pub struct MultithreadedLoadingTweak;

impl Tweak for MultithreadedLoadingTweak {
#[allow(clippy::too_many_lines)]
fn new(builder: &mut super::TweakBuilder) -> anyhow::Result<Self>
where
Self: Sized,
{
builder.set_category(Some("Performance"));

// --- multithreaded loading

// move load_rom call to another thread
// kind of a wildly unsafe change but I haven't had any issues with it so far
// note that subdividing this further (ie. load_audio on another thread) DID lead to issues when exiting a world
let load_rom_detour = unsafe {
extern "fastcall" fn hook(
param_1: *mut (),
param_2: *mut (),
param_3: usize,
param_4: *mut (),
) {
unsafe {
let param_1_ptr = param_1 as usize;
let param_2_ptr = param_2 as usize;
let param_3_ptr = param_3;
let param_4_ptr = param_4 as usize;
let thread = std::thread::Builder::new()
.name("load_rom".to_owned())
.spawn(move || {
let load_rom: LoadRomFn = LOAD_ROM_FN.unwrap_unchecked();
load_rom(
param_1_ptr as _,
param_2_ptr as _,
param_3_ptr as _,
param_4_ptr as _,
);
})
.unwrap();

LOAD_ROM_THREAD = Some(thread);
}
}

#[rustfmt::skip]
let load_rom_fn_addr = builder.region.scan_aob_single(&generate_aob_pattern![
_, 0x89, 0x5c, _, 0x10, // MOV qword ptr [RSP + 0x10],RBX
_, 0x89, 0x4c, _, 0x20, // MOV qword ptr [RSP + 0x20],R9
_, // PUSH _
_, // PUSH _
_, // PUSH _
_, _, // PUSH _
_, _, // PUSH _
_, _, // PUSH _
_, _, // PUSH _
_, 0x8d, 0x6c, _, 0xd9, // LEA RBP,[RSP + -0x27]
0x48, 0x81, 0xec, 0xa0, 0x00, 0x00, 0x00 // SUB RSP,0xa0
]).context("Error finding load_rom fn addr")?;

let det = GenericDetour::new(
std::mem::transmute::<usize, LoadRomFn>(load_rom_fn_addr),
hook,
)
.context("Failed to detour load_rom fn")?;

LOAD_ROM_FN = Some(std::mem::transmute::<&(), LoadRomFn>(det.trampoline()));

det.enable().context("Failed to enable load_rom detour")?;

det
};

// join load_rom thread later on in loading to make sure we don't finish out of order
let load_save_detour = unsafe {
extern "fastcall" fn hook(
param_1: *mut (),
param_2: *mut (),
param_3: *mut (),
param_4: *mut (),
param_5: *mut (),
) {
unsafe {
let load_save: LoadSaveFn = LOAD_SAVE_FN.unwrap_unchecked();
load_save(param_1, param_2, param_3, param_4, param_5);
if let Some(handle) = LOAD_ROM_THREAD.take() {
handle.join().unwrap();
}
}
}

#[rustfmt::skip]
let load_save_fn_addr = builder.region.scan_aob_single(&generate_aob_pattern![
_, 0x89, 0x5c, _, 0x18, // MOV qword ptr [RSP + 0x18],RBX
_, 0x89, 0x54, _, 0x10, // MOV qword ptr [RSP + 0x10],RDX
_, // PUSH _
_, // PUSH _
_, // PUSH _
_, _, // PUSH _
_, _, // PUSH _
_, _, // PUSH _
_, _, // PUSH _
_, 0x8d, 0x6c, _, 0xe1, // LEA RBP,[RSP + -0x1f]
0x48, 0x81, 0xec, 0xd0, 0x00, 0x00, 0x00 // SUB RSP,0xd0
]).context("Error finding load_save fn addr")?;

let det = GenericDetour::new(
std::mem::transmute::<usize, LoadSaveFn>(load_save_fn_addr),
hook,
)
.context("Failed to detour load_save fn")?;

LOAD_SAVE_FN = Some(std::mem::transmute::<&(), LoadSaveFn>(det.trampoline()));

det.enable().context("Failed to enable load_save detour")?;

det
};

builder
.toggle("Multithreaded Loading (experimental)", MULTITHREADED_LOADING_DEFAULTS)
.tooltip("EXPERIMENTAL!\nSplits asset loading into a separate thread, reducing world load time by ~40%.\nI haven't had any issues using this but I wouldn't be suprised if there are unknown edge cases.")
.detour(load_rom_detour, false)
.detour(load_save_detour, false)
.build()?;

Ok(Self)
}
}
27 changes: 26 additions & 1 deletion src/tweaks/settings/toggle.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use memory_rs::internal::injections::{Inject, Injection};

use crate::tweaks::{Defaults, TweakBuilder};
use crate::tweaks::{Defaults, DetourUntyped, TweakBuilder};

use super::{Setting, SettingImpl};

Expand All @@ -25,6 +25,7 @@ impl<'b, 'r> ToggleBuilder<'b, 'r> {
tooltip: String::new(),
label: label.into(),
injections: vec![],
detours: vec![],
},
}
}
Expand All @@ -45,6 +46,12 @@ impl<'b, 'r> ToggleBuilder<'b, 'r> {
self
}

#[must_use]
pub fn detour(mut self, detour: impl DetourUntyped + Send + Sync + 'static, invert: bool) -> Self {
self.toggle.detours.push((Box::new(detour), invert));
self
}

pub fn build(self) -> anyhow::Result<()> {
self.tweak_builder
.add_setting(Setting::new(self.toggle, self.defaults))
Expand All @@ -55,6 +62,7 @@ pub struct Toggle {
label: String,
tooltip: String,
injections: Vec<(Injection, bool)>,
detours: Vec<(Box<dyn DetourUntyped + Send + Sync>, bool)>,
}

impl SettingImpl<bool> for Toggle {
Expand All @@ -76,6 +84,23 @@ impl SettingImpl<bool> for Toggle {
}
}

for (detour, invert) in &mut self.detours {
#[allow(clippy::collapsible_else_if)]
if value {
if *invert {
detour.disable()?;
} else {
detour.enable()?;
}
} else {
if *invert {
detour.enable()?;
} else {
detour.disable()?;
}
}
}

Ok(())
}

Expand Down

0 comments on commit 99955cb

Please sign in to comment.