From f86dab5ff3e9c35e9e68b798b5c4faa08d390085 Mon Sep 17 00:00:00 2001 From: Per Lindgren Date: Fri, 21 Jan 2022 21:49:45 +0100 Subject: [PATCH] Added support for SRP based scheduling for armv6m --- CHANGELOG.md | 1 + ci/expected/complex.run | 47 ++++++++ examples/complex.rs | 132 ++++++++++++++++++++++ macros/src/codegen.rs | 2 +- macros/src/codegen/assertions.rs | 32 +++++- macros/src/codegen/shared_resources.rs | 33 ++++++ macros/src/codegen/util.rs | 1 + src/export.rs | 148 ++++++++++++++++++++++--- ui/v6m-interrupt-not-enough.rs_no | 54 +++++++++ 9 files changed, 434 insertions(+), 16 deletions(-) create mode 100644 ci/expected/complex.run create mode 100644 examples/complex.rs create mode 100644 ui/v6m-interrupt-not-enough.rs_no diff --git a/CHANGELOG.md b/CHANGELOG.md index 094275735b82..f05aeeaf714b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ For each category, *Added*, *Changed*, *Fixed* add new entries at the top! - Rework branch structure, release/vVERSION - Cargo clippy in CI - Use rust-cache Github Action +- Support for NVIC based SPR based scheduling for armv6m. - CI changelog entry enforcer - `examples/periodic-at.rs`, an example of a periodic timer without accumulated drift. - `examples/periodic-at2.rs`, an example of a periodic process with two tasks, with offset timing. diff --git a/ci/expected/complex.run b/ci/expected/complex.run new file mode 100644 index 000000000000..5df884dabd25 --- /dev/null +++ b/ci/expected/complex.run @@ -0,0 +1,47 @@ +init +idle p0 started +t2 p4 called 1 time +enter lock s4 0 +t3 p4 exit +idle enter lock s3 0 +idle pend t0 +idle pend t1 +idle pend t2 +t2 p4 called 2 times +enter lock s4 1 +t3 p4 exit +idle still in lock s3 0 +t1 p3 called 1 time +t1 enter lock s4 2 +t1 pend t0 +t1 pend t2 +t1 still in lock s4 2 +t2 p4 called 3 times +enter lock s4 2 +t3 p4 exit +t1 p3 exit +t0 p2 called 1 time +t0 p2 exit + +back in idle +enter lock s2 0 +idle pend t0 +idle pend t1 +t1 p3 called 2 times +t1 enter lock s4 3 +t1 pend t0 +t1 pend t2 +t1 still in lock s4 3 +t2 p4 called 4 times +enter lock s4 3 +t3 p4 exit +t1 p3 exit +idle pend t2 +t2 p4 called 5 times +enter lock s4 4 +t3 p4 exit +idle still in lock s2 0 +t0 p2 called 2 times +t0 p2 exit + +idle exit diff --git a/examples/complex.rs b/examples/complex.rs new file mode 100644 index 000000000000..e5cf6dbea30c --- /dev/null +++ b/examples/complex.rs @@ -0,0 +1,132 @@ +//! examples/complex.rs + +#![deny(unsafe_code)] +#![deny(warnings)] +#![no_main] +#![no_std] + +use panic_semihosting as _; + +#[rtic::app(device = lm3s6965)] +mod app { + + use cortex_m_semihosting::{debug, hprintln}; + use lm3s6965::Interrupt; + + #[shared] + struct Shared { + s2: u32, // shared with ceiling 2 + s3: u32, // shared with ceiling 3 + s4: u32, // shared with ceiling 4 + } + + #[local] + struct Local {} + + #[init] + fn init(_: init::Context) -> (Shared, Local, init::Monotonics) { + hprintln!("init").unwrap(); + + ( + Shared { + s2: 0, + s3: 0, + s4: 0, + }, + Local {}, + init::Monotonics(), + ) + } + + #[idle(shared = [s2, s3])] + fn idle(mut cx: idle::Context) -> ! { + hprintln!("idle p0 started").ok(); + rtic::pend(Interrupt::GPIOC); + cx.shared.s3.lock(|s| { + hprintln!("idle enter lock s3 {}", s).ok(); + hprintln!("idle pend t0").ok(); + rtic::pend(Interrupt::GPIOA); // t0 p2, with shared ceiling 3 + hprintln!("idle pend t1").ok(); + rtic::pend(Interrupt::GPIOB); // t1 p3, with shared ceiling 3 + hprintln!("idle pend t2").ok(); + rtic::pend(Interrupt::GPIOC); // t2 p4, no sharing + hprintln!("idle still in lock s3 {}", s).ok(); + }); + hprintln!("\nback in idle").ok(); + + cx.shared.s2.lock(|s| { + hprintln!("enter lock s2 {}", s).ok(); + hprintln!("idle pend t0").ok(); + rtic::pend(Interrupt::GPIOA); // t0 p2, with shared ceiling 2 + hprintln!("idle pend t1").ok(); + rtic::pend(Interrupt::GPIOB); // t1 p3, no sharing + hprintln!("idle pend t2").ok(); + rtic::pend(Interrupt::GPIOC); // t2 p4, no sharing + hprintln!("idle still in lock s2 {}", s).ok(); + }); + hprintln!("\nidle exit").ok(); + + debug::exit(debug::EXIT_SUCCESS); // Exit QEMU simulator + + loop { + cortex_m::asm::nop(); + } + } + + #[task(binds = GPIOA, priority = 2, local = [times: u32 = 0], shared = [s2, s3])] + fn t0(cx: t0::Context) { + // Safe access to local `static mut` variable + *cx.local.times += 1; + + hprintln!( + "t0 p2 called {} time{}", + *cx.local.times, + if *cx.local.times > 1 { "s" } else { "" } + ) + .ok(); + hprintln!("t0 p2 exit").ok(); + } + + #[task(binds = GPIOB, priority = 3, local = [times: u32 = 0], shared = [s3, s4])] + fn t1(mut cx: t1::Context) { + // Safe access to local `static mut` variable + *cx.local.times += 1; + + hprintln!( + "t1 p3 called {} time{}", + *cx.local.times, + if *cx.local.times > 1 { "s" } else { "" } + ) + .ok(); + + cx.shared.s4.lock(|s| { + hprintln!("t1 enter lock s4 {}", s).ok(); + hprintln!("t1 pend t0").ok(); + rtic::pend(Interrupt::GPIOA); // t0 p2, with shared ceiling 2 + hprintln!("t1 pend t2").ok(); + rtic::pend(Interrupt::GPIOC); // t2 p4, no sharing + hprintln!("t1 still in lock s4 {}", s).ok(); + }); + + hprintln!("t1 p3 exit").ok(); + } + + #[task(binds = GPIOC, priority = 4, local = [times: u32 = 0], shared = [s4])] + fn t2(mut cx: t2::Context) { + // Safe access to local `static mut` variable + *cx.local.times += 1; + + hprintln!( + "t2 p4 called {} time{}", + *cx.local.times, + if *cx.local.times > 1 { "s" } else { "" } + ) + .unwrap(); + + cx.shared.s4.lock(|s| { + hprintln!("enter lock s4 {}", s).ok(); + *s += 1; + }); + hprintln!("t3 p4 exit").ok(); + } +} diff --git a/macros/src/codegen.rs b/macros/src/codegen.rs index f5cae34a7276..01be1d578739 100644 --- a/macros/src/codegen.rs +++ b/macros/src/codegen.rs @@ -28,7 +28,7 @@ pub fn app(app: &App, analysis: &Analysis, extra: &Extra) -> TokenStream2 { let mut user = vec![]; // Generate the `main` function - let assertion_stmts = assertions::codegen(app, analysis); + let assertion_stmts = assertions::codegen(app, analysis, extra); let pre_init_stmts = pre_init::codegen(app, analysis, extra); diff --git a/macros/src/codegen/assertions.rs b/macros/src/codegen/assertions.rs index a8a4491bdf92..36ab03644504 100644 --- a/macros/src/codegen/assertions.rs +++ b/macros/src/codegen/assertions.rs @@ -1,11 +1,11 @@ use proc_macro2::TokenStream as TokenStream2; use quote::quote; -use crate::analyze::Analysis; +use crate::{analyze::Analysis, check::Extra, codegen::util}; use rtic_syntax::ast::App; /// Generates compile-time assertions that check that types implement the `Send` / `Sync` traits -pub fn codegen(app: &App, analysis: &Analysis) -> Vec { +pub fn codegen(app: &App, analysis: &Analysis, extra: &Extra) -> Vec { let mut stmts = vec![]; for ty in &analysis.send_types { @@ -21,5 +21,33 @@ pub fn codegen(app: &App, analysis: &Analysis) -> Vec { stmts.push(quote!(rtic::export::assert_monotonic::<#ty>();)); } + let device = &extra.device; + let arm_v6_checks: Vec<_> = app + .hardware_tasks + .iter() + .filter_map(|(_, task)| { + if !util::is_exception(&task.args.binds) { + let interrupt_name = &task.args.binds; + Some(quote!(assert!((#device::Interrupt::#interrupt_name as u32) < 32);)) + } else { + None + } + }) + .collect(); + + let const_check = quote! { + const _CONST_CHECK: () = { + if rtic::export::is_armv6() { + #(#arm_v6_checks)* + } else { + // TODO: Add armv7 checks here + } + }; + + let _ = _CONST_CHECK; + }; + + stmts.push(const_check); + stmts } diff --git a/macros/src/codegen/shared_resources.rs b/macros/src/codegen/shared_resources.rs index 9e45cff97384..a016e4538d51 100644 --- a/macros/src/codegen/shared_resources.rs +++ b/macros/src/codegen/shared_resources.rs @@ -105,5 +105,38 @@ pub fn codegen( }) }; + // Computing mapping of used interrupts to masks + let interrupt_ids = analysis.interrupts.iter().map(|(p, (id, _))| (p, id)); + + use std::collections::HashMap; + let mut masks: HashMap = std::collections::HashMap::new(); + let device = &extra.device; + + for p in 0..3 { + masks.insert(p, quote!(0)); + } + + for (&priority, name) in interrupt_ids.chain(app.hardware_tasks.values().flat_map(|task| { + if !util::is_exception(&task.args.binds) { + Some((&task.args.priority, &task.args.binds)) + } else { + // TODO: exceptions not implemented + None + } + })) { + let name = quote!(#device::Interrupt::#name as u32); + if let Some(v) = masks.get_mut(&(priority - 1)) { + *v = quote!(#v | 1 << #name); + }; + } + + let mut mask_arr: Vec<(_, _)> = masks.iter().collect(); + mask_arr.sort_by_key(|(k, _v)| *k); + let mask_arr: Vec<_> = mask_arr.iter().map(|(_, v)| v).collect(); + + mod_app.push(quote!( + const MASKS: [u32; 3] = [#(#mask_arr),*]; + )); + (mod_app, mod_resources) } diff --git a/macros/src/codegen/util.rs b/macros/src/codegen/util.rs index 6a07732c3486..4a29754bc848 100644 --- a/macros/src/codegen/util.rs +++ b/macros/src/codegen/util.rs @@ -52,6 +52,7 @@ pub fn impl_mutex( #priority, CEILING, #device::NVIC_PRIO_BITS, + &MASKS, f, ) } diff --git a/src/export.rs b/src/export.rs index 838ae8435e95..ed51a9e9f718 100644 --- a/src/export.rs +++ b/src/export.rs @@ -102,6 +102,19 @@ impl Priority { } } +/// Const helper to check architecture +pub const fn is_armv6() -> bool { + #[cfg(not(armv6m))] + { + false + } + + #[cfg(armv6m)] + { + true + } +} + #[inline(always)] pub fn assert_send() where @@ -123,13 +136,40 @@ where { } -/// Lock the resource proxy by setting the BASEPRI -/// and running the closure with interrupt::free +/// Lock implementation using BASEPRI and global Critical Section (CS) /// /// # Safety /// -/// Writing to the BASEPRI -/// Dereferencing a raw pointer +/// The system ceiling is raised from current to ceiling +/// by either +/// - raising the BASEPRI to the ceiling value, or +/// - disable all interrupts in case we want to +/// mask interrupts with maximum priority +/// +/// Dereferencing a raw pointer inside CS +/// +/// The priority.set/priority.get can safely be outside the CS +/// as being a context local cell (not affected by preemptions). +/// It is merely used in order to omit masking in case current +/// priority is current priority >= ceiling. +/// +/// Lock Efficiency: +/// Experiments validate (sub)-zero cost for CS implementation +/// (Sub)-zero as: +/// - Either zero OH (lock optimized out), or +/// - Amounting to an optimal assembly implementation +/// - The BASEPRI value is folded to a constant at compile time +/// - CS entry, single assembly instruction to write BASEPRI +/// - CS exit, single assembly instruction to write BASEPRI +/// - priority.set/get optimized out (their effect not) +/// - On par or better than any handwritten implementation of SRP +/// +/// Limitations: +/// The current implementation reads/writes BASEPRI once +/// even in some edge cases where this may be omitted. +/// Total OH of per task is max 2 clock cycles, negligible in practice +/// but can in theory be fixed. +/// #[cfg(armv7m)] #[inline(always)] pub unsafe fn lock( @@ -137,6 +177,7 @@ pub unsafe fn lock( priority: &Priority, ceiling: u8, nvic_prio_bits: u8, + _mask: &[u32; 3], f: impl FnOnce(&mut T) -> R, ) -> R { let current = priority.get(); @@ -160,13 +201,50 @@ pub unsafe fn lock( } } -/// Lock the resource proxy by setting the PRIMASK -/// and running the closure with ``interrupt::free`` +/// Lock implementation using interrupt masking /// /// # Safety /// -/// Writing to the PRIMASK -/// Dereferencing a raw pointer +/// The system ceiling is raised from current to ceiling +/// by computing a 32 bit `mask` (1 bit per interrupt) +/// 1: ceiling >= priority > current +/// 0: else +/// +/// On CS entry, `clear_enable_mask(mask)` disables interrupts +/// On CS exit, `set_enable_mask(mask)` re-enables interrupts +/// +/// The priority.set/priority.get can safely be outside the CS +/// as being a context local cell (not affected by preemptions). +/// It is merely used in order to omit masking in case +/// current priority >= ceiling. +/// +/// Dereferencing a raw pointer is done safely inside the CS +/// +/// Lock Efficiency: +/// Early experiments validate (sub)-zero cost for CS implementation +/// (Sub)-zero as: +/// - Either zero OH (lock optimized out), or +/// - Amounting to an optimal assembly implementation +/// - if ceiling == (1 << nvic_prio_bits) +/// - we execute the closure in a global critical section (interrupt free) +/// - CS entry cost, single write to core register +/// - CS exit cost, single write to core register +/// else +/// - The `mask` value is folded to a constant at compile time +/// - CS entry, single write of the 32 bit `mask` to the `icer` register +/// - CS exit, single write of the 32 bit `mask` to the `iser` register +/// - priority.set/get optimized out (their effect not) +/// - On par or better than any hand written implementation of SRP +/// +/// Limitations: +/// Current implementation does not allow for tasks with shared resources +/// to be bound to exception handlers, as these cannot be masked in HW. +/// +/// Possible solutions: +/// - Mask exceptions by global critical sections (interrupt::free) +/// - Temporary lower exception priority +/// +/// These possible solutions are set goals for future work #[cfg(not(armv7m))] #[inline(always)] pub unsafe fn lock( @@ -174,20 +252,64 @@ pub unsafe fn lock( priority: &Priority, ceiling: u8, _nvic_prio_bits: u8, + masks: &[u32; 3], f: impl FnOnce(&mut T) -> R, ) -> R { let current = priority.get(); - if current < ceiling { - priority.set(u8::max_value()); - let r = interrupt::free(|_| f(&mut *ptr)); - priority.set(current); - r + if ceiling >= 4 { + // safe to manipulate outside critical section + priority.set(ceiling); + // execute closure under protection of raised system ceiling + let r = interrupt::free(|_| f(&mut *ptr)); + // safe to manipulate outside critical section + priority.set(current); + r + } else { + // safe to manipulate outside critical section + priority.set(ceiling); + let mask = compute_mask(current, ceiling, masks); + clear_enable_mask(mask); + + // execute closure under protection of raised system ceiling + let r = f(&mut *ptr); + + set_enable_mask(mask); + + // safe to manipulate outside critical section + priority.set(current); + r + } } else { + // execute closure without raising system ceiling f(&mut *ptr) } } +#[cfg(not(armv7m))] +#[inline(always)] +fn compute_mask(from_prio: u8, to_prio: u8, masks: &[u32; 3]) -> u32 { + let mut res = 0; + masks[from_prio as usize..to_prio as usize] + .iter() + .for_each(|m| res |= m); + res +} + +// enables interrupts +#[cfg(not(armv7m))] +#[inline(always)] +unsafe fn set_enable_mask(mask: u32) { + (*NVIC::ptr()).iser[0].write(mask) +} + +// disables interrupts +#[cfg(not(armv7m))] +#[inline(always)] +unsafe fn clear_enable_mask(mask: u32) { + (*NVIC::ptr()).icer[0].write(mask) +} + #[inline] #[must_use] pub fn logical2hw(logical: u8, nvic_prio_bits: u8) -> u8 { diff --git a/ui/v6m-interrupt-not-enough.rs_no b/ui/v6m-interrupt-not-enough.rs_no new file mode 100644 index 000000000000..3fbf3cf7bf05 --- /dev/null +++ b/ui/v6m-interrupt-not-enough.rs_no @@ -0,0 +1,54 @@ +//! v6m-interrupt-not-enough.rs_no (not run atm) +//! +//! Expected behavior: +//! should pass +//! > cargo build --example m0_perf_err --target thumbv7m-none-eabi --release +//! +//! should fail +//! > cargo build --example m0_perf_err --target thumbv6m-none-eabi --release +//! Compiling cortex-m-rtic v1.0.0 (/home/pln/rust/rtic/cortex-m-rtic) +//! error[E0308]: mismatched types +//! --> examples/m0_perf_err.rs:25:1 +//! | +//! 25 | #[rtic::app(device = lm3s6965)] +//! | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ expected an array with a fixed size of 4 elements, found one with 5 elements +//! | +//! = note: this error originates in the attribute macro `rtic::app` (in Nightly builds, run with -Z macro-backtrace for more info) + +#![deny(unsafe_code)] +#![deny(warnings)] +#![no_main] +#![no_std] + +use panic_semihosting as _; + +#[rtic::app(device = lm3s6965)] +mod app { + + use cortex_m_semihosting::debug; + + #[shared] + struct Shared {} + + #[local] + struct Local {} + + #[init] + fn init(_: init::Context) -> (Shared, Local, init::Monotonics) { + (Shared {}, Local {}, init::Monotonics()) + } + + #[inline(never)] + #[idle] + fn idle(_cx: idle::Context) -> ! { + debug::exit(debug::EXIT_SUCCESS); // Exit QEMU simulator + + loop { + cortex_m::asm::nop(); + } + } + + // priority to high for v6m + #[task(binds = GPIOA, priority = 5)] + fn t0(_cx: t0::Context) {} +}