diff --git a/examples/rp2040/multicore/Cargo.lock b/examples/rp2040/multicore/Cargo.lock index d82faea..66f93f4 100644 --- a/examples/rp2040/multicore/Cargo.lock +++ b/examples/rp2040/multicore/Cargo.lock @@ -187,6 +187,7 @@ version = "0.1.0" dependencies = [ "cortex-m", "cortex-m-rt", + "embedded-hal", "lilos", "panic-halt", "pin-project-lite", @@ -194,7 +195,6 @@ dependencies = [ "rp-pico", "rp2040-boot2 0.2.1", "rp2040-hal", - "rp2040-pac 0.3.0", ] [[package]] @@ -266,6 +266,9 @@ name = "portable-atomic" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +dependencies = [ + "critical-section", +] [[package]] name = "proc-macro2" @@ -340,7 +343,7 @@ dependencies = [ "pio", "rand_core", "rp2040-hal-macros", - "rp2040-pac 0.5.0", + "rp2040-pac", "usb-device", "vcell", "void", @@ -358,17 +361,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "rp2040-pac" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13a6106d5db01c7171a39c1f7696780912db9b42fe7ac722db60069c8904ea7c" -dependencies = [ - "cortex-m", - "cortex-m-rt", - "vcell", -] - [[package]] name = "rp2040-pac" version = "0.5.0" diff --git a/examples/rp2040/multicore/Cargo.toml b/examples/rp2040/multicore/Cargo.toml index b596d1c..ad72406 100644 --- a/examples/rp2040/multicore/Cargo.toml +++ b/examples/rp2040/multicore/Cargo.toml @@ -18,14 +18,14 @@ default-target = "thumbv6m-none-eabi" [dependencies] cortex-m = "0.7.4" cortex-m-rt = "0.7.1" -lilos = { path = "../../../os", default-features = false, features = ["timer", "systick"] } +lilos = { path = "../../../os", default-features = false, features = ["timer", "2core"] } panic-halt = "0.2.0" -rp2040-pac = {version = "0.3", features = ["rt"]} rp2040-boot2 = "0.2" rp2040-hal = { version = "0.9.1", features = ["critical-section-impl"] } -portable-atomic = { version = "1.6.0", default-features = false, features = ["unsafe-assume-single-core"] } +portable-atomic = { version = "1.6.0", default-features = false, features = ["critical-section"] } rp-pico = "0.8.0" pin-project-lite = "0.2.13" +embedded-hal = "0.2.7" [[bin]] name = "lilos-example-rp2040-multicore" diff --git a/examples/rp2040/multicore/src/main.rs b/examples/rp2040/multicore/src/main.rs index ab44035..d7cdbcd 100644 --- a/examples/rp2040/multicore/src/main.rs +++ b/examples/rp2040/multicore/src/main.rs @@ -24,8 +24,23 @@ extern crate panic_halt; pub mod fifo; +use fifo::AsyncFifo; -use rp2040_hal as hal; +use rp_pico as bsp; + +use bsp::{hal, hal::pac}; +use hal::fugit::ExtU64; +use hal::multicore::{Multicore, Stack}; +use hal::Clock; + +use core::pin::{pin, Pin}; +use cortex_m::peripheral::syst::SystClkSource; +use cortex_m_rt::exception; +use lilos::list::List; + +use embedded_hal::digital::v2::ToggleableOutputPin; + +type Instant = hal::fugit::Instant; // For RP2040, we need to include a bootloader. The general Cargo build process // doesn't have great support for this, so we included it as a binary constant. @@ -33,51 +48,215 @@ use rp2040_hal as hal; #[used] static BOOT: [u8; 256] = rp2040_boot2::BOOT_LOADER_W25Q080; -// How often our blinky task wakes up (1/2 our blink frequency). -const PERIOD: lilos::time::Millis = lilos::time::Millis(500); +static mut CORE1_STACK: Stack<4096> = Stack::new(); fn cpu_core_id() -> u16 { hal::Sio::core() as u16 } -#[cortex_m_rt::entry] +fn tick() -> Instant { + let timer = unsafe { &*pac::TIMER::ptr() }; + Instant::from_ticks(loop { + let e = timer.timerawh.read().bits(); + let t = timer.timerawl.read().bits(); + let e2 = timer.timerawh.read().bits(); + if e == e2 { + break ((e as u64) << 32) | (t as u64); + } + }) +} + +/// We mostly just need to not enter an infinite loop, which is what the +/// `cortex_m_rt` does in `DefaultHandler`. But turning systick off until it's +/// needed can save some energy, especially if the reload value is small. +#[exception] +fn SysTick() { + // Disable the counter, we enable it again when necessary + // Safety: We are in the SysTick interrupt handler, having been woken up by + // it, so shouldn't receive another systick interrupt here. + unsafe { + let syst = &*cortex_m::peripheral::SYST::PTR; + const SYST_CSR_TICKINT: u32 = 1 << 1; + syst.csr.modify(|v| v & !SYST_CSR_TICKINT); + } +} + +fn make_idle_task<'a>( + core: &'a mut cortex_m::Peripherals, + timer_list: Pin<&'a List>, + cycles_per_us: u32, +) -> impl FnMut() + 'a { + // Make it so that `wfe` waits for masked interrupts as well as events -- + // the problem is that the idle-task is called with interrupts disabled (to + // not have an interrupt fire before we call the idle task but after we + // check that we should sleep -- for `wfi` it would just wake up). + // See + // https://www.embedded.com/the-definitive-guide-to-arm-cortex-m0-m0-wake-up-operation/ + const SEVONPEND: u32 = 1 << 4; + unsafe { + core.SCB.scr.modify(|scr| scr | SEVONPEND); + } + + // 24-bit timer + let max_sleep_us = ((1 << 24) - 1) / cycles_per_us; + core.SYST.set_clock_source(SystClkSource::Core); + + move || { + match timer_list.peek() { + Some(wake_at) => { + let now = tick(); + if wake_at > now { + let wake_in_us = u64::min( + max_sleep_us as u64, + (wake_at - now).to_micros(), + ); + let wake_in_ticks = wake_in_us as u32 * cycles_per_us; + // Setting zero to the reload register disables systick -- + // systick is non-zero due to `wake_at > now` + core.SYST.set_reload(wake_in_ticks); + core.SYST.clear_current(); + core.SYST.enable_interrupt(); + core.SYST.enable_counter(); + // We use `SEV` to signal from the other core that we can + // send more data. See also the comment above on SEVONPEND + cortex_m::asm::wfe(); + } else { + // We just missed a timer, don't idle + } + } + None => { + // We use `SEV` to signal from the other core that we can send + // more data. See also the comment above on SEVONPEND + cortex_m::asm::wfe(); + } + } + } +} + +struct Timer<'a> { + timer_list: Pin<&'a List>, +} + +impl<'a> lilos::time::Timer for Timer<'a> { + type Instant = Instant; + fn timer_list(&self) -> Pin<&'a List> { + self.timer_list + } + + fn now(&self) -> Self::Instant { + tick() + } +} + +#[bsp::entry] fn main() -> ! { // Check out peripherals from the runtime. - let mut cp = cortex_m::Peripherals::take().unwrap(); - let p = rp2040_pac::Peripherals::take().unwrap(); - - // Configure our output pin, GPIO 25. Begin by bringing IO BANK0 out of - // reset. - p.RESETS.reset.modify(|_, w| w.io_bank0().clear_bit()); - while !p.RESETS.reset_done.read().io_bank0().bit() {} - - // Set GPIO25 to be controlled by SIO. - p.IO_BANK0.gpio[25].gpio_ctrl.write(|w| w.funcsel().sio()); - // Now have SIO configure GPIO25 as an output. - p.SIO.gpio_oe_set.write(|w| unsafe { w.bits(1 << 25) }); - - // Create a task to blink the LED. You could also write this as an `async - // fn` but we've inlined it as an `async` block for simplicity. - let blink = core::pin::pin!(async { - // PeriodicGate is a `lilos` tool for implementing low-jitter periodic - // actions. It opens once per PERIOD. - let mut gate = lilos::time::PeriodicGate::from(PERIOD); - - // Loop forever, blinking things. Note that this borrows the device - // peripherals `p` from the enclosing stack frame. + let core = pac::CorePeripherals::take().unwrap(); + let mut pac = pac::Peripherals::take().unwrap(); + let mut watchdog = hal::Watchdog::new(pac.WATCHDOG); + let clocks = hal::clocks::init_clocks_and_plls( + bsp::XOSC_CRYSTAL_FREQ, + pac.XOSC, + pac.CLOCKS, + pac.PLL_SYS, + pac.PLL_USB, + &mut pac.RESETS, + &mut watchdog, + ) + .ok() + .unwrap(); + let sys_clk = clocks.system_clock.freq(); + + // Make it so that `wfe` waits for masked interrupts as well as events -- + // the problem is that the idle-task is called with interrupts disabled (to + // not have an interrupt fire before we call the idle task but after we + // check that we should sleep -- for `wfi` it would just wake up). + // See + // https://www.embedded.com/the-definitive-guide-to-arm-cortex-m0-m0-wake-up-operation/ + const SEVONPEND: u32 = 1 << 4; + unsafe { + core.SCB.scr.modify(|scr| scr | SEVONPEND); + } + + let mut sio = hal::Sio::new(pac.SIO); + let pins = hal::gpio::Pins::new( + pac.IO_BANK0, + pac.PADS_BANK0, + sio.gpio_bank0, + &mut pac.RESETS, + ); + + let mut led = pins.gpio25.into_push_pull_output(); + + let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo); + let cores = mc.cores(); + let core1 = &mut cores[1]; + let _task = core1.spawn(unsafe { &mut CORE1_STACK.mem }, move || { + // Because both core's peripherals are mapped to the same address, this + // is not necessary, but serves as a reminder that core 1 has its own + // core peripherals + // See also https://github.com/rust-embedded/cortex-m/issues/149 + let mut core = unsafe { pac::CorePeripherals::steal() }; + let pac = unsafe { pac::Peripherals::steal() }; + let mut sio = hal::Sio::new(pac.SIO); + + lilos::create_list!(timer_list, Instant::from_ticks(0)); + let timer_list = timer_list.as_ref(); + let timer = Timer { timer_list }; + let idle_task = make_idle_task(&mut core, timer_list, sys_clk.to_MHz()); + + fifo::reset_read_fifo(&mut sio.fifo); + + // Create a task to blink the LED. You could also write this as an `async + // fn` but we've inlined it as an `async` block for simplicity. + let blink = pin!(async { + // Loop forever, blinking things. Note that this borrows the device + // peripherals `p` from the enclosing stack frame. + loop { + let delay = sio.fifo.read_async().await as u64; + lilos::time::sleep_for(&timer, delay.millis()).await; + led.toggle().unwrap(); + } + }); + + lilos::exec::run_tasks_with_idle( + &mut [blink], // <-- array of tasks + lilos::exec::ALL_TASKS, // <-- which to start initially + &timer, + 1, + idle_task, + ) + }); + + let compute_delay = pin!(async { + /// How much we adjust the LED period every cycle + const INC: i32 = 2; + /// The minimum LED toggle interval we allow for. + const MIN: i32 = 0; + /// The maximum LED toggle interval period we allow for. Keep it reasonably short so it's easy to see. + const MAX: i32 = 100; loop { - p.SIO.gpio_out_xor.write(|w| unsafe { w.bits(1 << 25) }); - gate.next_time(&lilos::time::SysTickTimer).await; + for period in (MIN..MAX).step_by(INC as usize) { + sio.fifo.write_async(period as u32).await; + } + for period in (MIN..MAX).step_by(INC as usize).rev() { + sio.fifo.write_async(period as u32).await; + } } }); - // Configure the systick timer for 1kHz ticks at the default ROSC speed of - // _roughly_ 6 MHz. - lilos::time::initialize_sys_tick(&mut cp.SYST, 6_000_000); + lilos::create_list!(timer_list, Instant::from_ticks(0)); + let timer_list = timer_list.as_ref(); + let timer = Timer { timer_list }; + // Set up and run the scheduler with a single task. - lilos::exec::run_tasks( - &mut [blink], // <-- array of tasks + lilos::exec::run_tasks_with_idle( + &mut [compute_delay], // <-- array of tasks lilos::exec::ALL_TASKS, // <-- which to start initially - &lilos::time::SysTickTimer, + &timer, + 0, + // We use `SEV` to signal from the other core that we can send more + // data. See also the comment above on SEVONPEND + cortex_m::asm::wfe, ) } diff --git a/os/Cargo.toml b/os/Cargo.toml index d5d81b8..e81e504 100644 --- a/os/Cargo.toml +++ b/os/Cargo.toml @@ -21,6 +21,7 @@ mutex = [] spsc = [] timer = [] systick = [] +2core = [] [dependencies] cfg-if = "1.0.0" diff --git a/os/src/exec.rs b/os/src/exec.rs index 3b59621..1980cf7 100644 --- a/os/src/exec.rs +++ b/os/src/exec.rs @@ -149,6 +149,9 @@ use crate::util::Captures; /// atomically checks and clears this at each iteration. static WAKE_BITS: AtomicUsize = AtomicUsize::new(0); +/// Wake bits used by a previous/concurrent invocation of run_tasks +static WAKE_BITS_USED: AtomicUsize = AtomicUsize::new(0); + /// Computes the wake bit mask for the task with the given index, which is /// equivalent to `1 << (index % USIZE_BITS)`. const fn wake_mask_for_index(index: usize) -> usize { @@ -176,6 +179,7 @@ static VTABLE: RawWakerVTable = RawWakerVTable::new( /// Technically, this will wake any task `n` where `n % 32 == index % 32`. fn waker_for_task(index: usize) -> Waker { let mask = wake_mask_for_index(index); + // Safety: Waker::from_raw is unsafe because bad things happen if the // combination of this particular pointer and the functions in the vtable // don't meet the Waker contract or are incompatible. In our case, our @@ -359,6 +363,7 @@ pub fn run_tasks<#[cfg(feature = "timer")] T: Timer>( futures: &mut [Pin<&mut dyn Future>], initial_mask: usize, #[cfg(feature = "timer")] timer: &T, + #[cfg(feature = "2core")] core: u8, ) -> ! { // Safety: we're passing Interrupts::Masked, the always-safe option unsafe { @@ -367,6 +372,8 @@ pub fn run_tasks<#[cfg(feature = "timer")] T: Timer>( initial_mask, #[cfg(feature = "timer")] timer, + #[cfg(feature = "2core")] + core, Interrupts::Masked, || { cortex_m::asm::wfi(); @@ -399,6 +406,7 @@ pub fn run_tasks_with_idle<#[cfg(feature = "timer")] T: Timer>( futures: &mut [Pin<&mut dyn Future>], initial_mask: usize, #[cfg(feature = "timer")] timer: &T, + #[cfg(feature = "2core")] core: u8, idle_hook: impl FnMut(), ) -> ! { // Safety: we're passing Interrupts::Masked, the always-safe option @@ -408,6 +416,8 @@ pub fn run_tasks_with_idle<#[cfg(feature = "timer")] T: Timer>( initial_mask, #[cfg(feature = "timer")] timer, + #[cfg(feature = "2core")] + core, Interrupts::Masked, idle_hook, ) @@ -438,6 +448,7 @@ pub unsafe fn run_tasks_with_preemption<#[cfg(feature = "timer")] T: Timer>( futures: &mut [Pin<&mut dyn Future>], initial_mask: usize, #[cfg(feature = "timer")] timer: &T, + #[cfg(feature = "2core")] core: u8, interrupts: Interrupts, ) -> ! { // Safety: this is safe if our own contract is upheld. @@ -447,6 +458,8 @@ pub unsafe fn run_tasks_with_preemption<#[cfg(feature = "timer")] T: Timer>( initial_mask, #[cfg(feature = "timer")] timer, + #[cfg(feature = "2core")] + core, interrupts, cortex_m::asm::wfi, ) @@ -482,6 +495,7 @@ pub unsafe fn run_tasks_with_preemption_and_idle< futures: &mut [Pin<&mut dyn Future>], initial_mask: usize, #[cfg(feature = "timer")] timer: &T, + #[cfg(feature = "2core")] core: u8, interrupts: Interrupts, mut idle_hook: impl FnMut(), ) -> ! { @@ -499,15 +513,25 @@ pub unsafe fn run_tasks_with_preemption_and_idle< // be wrong. let futures_ptr: *mut [Pin<*mut dyn Future>] = futures_ptr as _; // Stash the task future array in a known location. + #[cfg(not(feature = "2core"))] unsafe { TASK_FUTURES = Some(futures_ptr); } + + #[cfg(feature = "2core")] + unsafe { + TASK_FUTURES[core as usize] = Some(futures_ptr); + } } - WAKE_BITS.store(initial_mask, Ordering::SeqCst); + let this_count = futures.len(); + let prev_count = WAKE_BITS_USED.fetch_add(this_count, Ordering::SeqCst); + let this_mask = ((1usize << this_count) - 1).rotate_left(prev_count as u32); + + WAKE_BITS.fetch_or(initial_mask & this_mask, Ordering::SeqCst); // Initialize the timer list. - #[cfg(all(feature = "timer", feature = "systick"))] + #[cfg(all(feature = "timer", feature = "systick", not(feature = "2core")))] crate::time::SysTickTimer.init(); #[cfg(feature = "timer")] @@ -520,13 +544,14 @@ pub unsafe fn run_tasks_with_preemption_and_idle< tl.wake_less_than(timer.now()); } - // Capture and reset wake bits, then process any 1s. + // Capture and reset wake bits (for the current executor only), + // then process any 1s. // TODO: this loop visits every future testing for 1 bits; it would // almost certainly be faster to visit the futures corresponding to // 1 bits instead. I have avoided this for now because of the // increased complexity. - let mask = WAKE_BITS.swap(0, Ordering::SeqCst); - for (i, f) in futures.iter_mut().enumerate() { + let mask = WAKE_BITS.fetch_and(!this_mask, Ordering::SeqCst); + for (f, i) in futures.iter_mut().zip(prev_count..) { if mask & wake_mask_for_index(i) != 0 { poll_task(i, f.as_mut()); } @@ -534,7 +559,7 @@ pub unsafe fn run_tasks_with_preemption_and_idle< // If none of the futures woke each other, we're relying on an // interrupt to set bits -- so we can sleep waiting for it. - if WAKE_BITS.load(Ordering::SeqCst) == 0 { + if WAKE_BITS.load(Ordering::SeqCst) & this_mask == 0 { idle_hook(); } @@ -560,7 +585,16 @@ pub unsafe fn run_tasks_with_preemption_and_idle< /// Note that the `#[used]` annotation is load-bearing here -- without it the /// compiler will happily throw the variable away, confusing the debugger. #[used] -static mut TASK_FUTURES: Option<*mut [Pin<*mut dyn Future>]> = None; +#[cfg(not(feature = "2core"))] +static mut TASK_FUTURES: Option< + *mut [Pin<*mut dyn Future>], +> = None; + +#[used] +#[cfg(feature = "2core")] +static mut TASK_FUTURES: [Option< + *mut [Pin<*mut dyn Future>], +>; 2] = [None; 2]; /// Constant that can be passed to `run_tasks` and `wake_tasks_by_mask` to mean /// "all tasks."