diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2b0ef6ed47079497d1b95b98b978c59d8c3fb6..e0ac539f3d06009f50e45c318c1b08ee093dfd73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 2021-02-28 + +- examples/bare2.rs, raw timer access. +- examples/bare3.rs, timing abstractions. +- examples/bare4.rs, a simple bare metal peripheral access API. +- examples/bare5.rs, write your own C-like peripheral access API. + ## 2021-02-26 - examples/bare1.rs, bare metal 101! diff --git a/examples/rtic_bare2.rs b/examples/rtic_bare2.rs new file mode 100644 index 0000000000000000000000000000000000000000..48fbb05ed22dc62b4fae75ef47161d19138ce23e --- /dev/null +++ b/examples/rtic_bare2.rs @@ -0,0 +1,139 @@ +//! bare2.rs +//! +//! Measuring execution time +//! +//! What it covers +//! - Generating documentation +//! - Using core peripherals +//! - Measuring time using the DWT + +#![no_main] +#![no_std] + +// use panic_halt as _; + +// use cortex_m::{iprintln, peripheral::DWT, Peripherals}; +// use cortex_m_rt::entry; + +use cortex_m::peripheral::DWT; +use cortex_m_semihosting::hprintln; +use panic_semihosting as _; +use stm32f4; + +#[rtic::app(device = stm32f4)] +const APP: () = { + #[init] + fn init(mut cx: init::Context) { + cx.core.DWT.enable_cycle_counter(); + + // Reading the cycle counter can be done without `owning` access + // the DWT (since it has no side effect). + // + // Look in the docs: + // pub fn enable_cycle_counter(&mut self) + // pub fn get_cycle_count() -> u32 + // + // Notice the difference in the function signature! + + let start = DWT::get_cycle_count(); + wait(1_000_000); + let end = DWT::get_cycle_count(); + + // notice all printing outside of the section to measure! + hprintln!("Start {:?}", start).ok(); + hprintln!("End {:?}", end).ok(); + hprintln!("Diff {:?}", end.wrapping_sub(start)).ok(); + + // wait(100); + } +}; + +// burns CPU cycles by just looping `i` times +#[inline(never)] +#[no_mangle] +fn wait(i: u32) { + for _ in 0..i { + // no operation (ensured not optimized out) + cortex_m::asm::nop(); + } +} + +// 0. Setup +// +// > cargo doc --open +// +// `cargo.doc` will document your crate, and open the docs in your browser. +// If it does not auto-open, then copy paste the path shown in your browser. +// +// Notice, it will try to document all dependencies, you may have only one +// one panic handler, so temporarily comment out all but one in `Cargo.toml`. +// +// In the docs, search (`S`) for DWT, and click `cortex_m::peripheral::DWT`. +// Read the API docs. +// +// 1. Build and run the application in vscode using (Cortex Debug). +// +// What is the output in the Adapter Output console? +// (Notice, it will take a while we loop one million times at only 16 MHz.) +// +// ** your answer here ** +// +// Rebuild and run in (Cortex Release). +// +// ** your answer here ** +// +// Compute the ratio between debug/release optimized code +// (the speedup). +// +// ** your answer here ** +// +// commit your answers (bare2_1) +// +// 2. As seen there is a HUGE difference in between Debug and Release builds. +// In Debug builds, the compiler preserves all abstractions, so there will +// be a lot of calls and pointer indirections. +// +// In Release builds, the compiler strives to "smash" all abstractions into straight +// line code. +// +// This is what Rust "zero-cost abstractions" means, not zero execution time but rather, +// "as good as it possibly gets" (you pay no extra cost for using abstractions at run-time). +// +// In Release builds, the compiler is able to "specialize" the implementation +// of each function. +// +// Let us look in detail at the `wait` function: +// Place a breakpoint at line 54 (wait). Restart the (Cortex Release) session and +// look at the generated code. +// +// > disass +// +// Dump generated assembly for the "wait" function. +// +// ** your answer here ** +// +// Under the ARM calling convention, r0.. is used as arguments. +// However in this case, we se that r0 is set by the assembly instructions, +// before the loop is entered. +// +// Lookup the two instructions `movw` and `movt` to figure out what happens here. +// +// Answer in your own words, how they assign r0 to 1000000. +// +// ** your answer here ** +// +// Commit your answers (bare2_2) +// +// 3. Now add a second call to `wait` (line 47). +// +// Recompile and run until the breakpoint. +// +// Dump the generated assembly for the "wait" function. +// +// ** your answer here ** +// +// Answer in your own words, why you believe the generated code differs? +// +// ** your answer here ** +// +// diff --git a/examples/rtic_bare3.rs b/examples/rtic_bare3.rs new file mode 100644 index 0000000000000000000000000000000000000000..74f6c52794dfaee1607eea64f7d1e7cd9e36f480 --- /dev/null +++ b/examples/rtic_bare3.rs @@ -0,0 +1,129 @@ +//! bare3.rs +//! +//! Measuring execution time +//! +//! What it covers +//! - Reading Rust documentation +//! - Timing abstractions and semantics +//! - Understanding Rust abstractions + +#![no_main] +#![no_std] + +use cortex_m_semihosting::hprintln; +use panic_semihosting as _; +use rtic::cyccnt::Instant; +use stm32f4; + +#[rtic::app(device = stm32f4)] +const APP: () = { + #[init] + fn init(mut cx: init::Context) { + cx.core.DWT.enable_cycle_counter(); + + let start = Instant::now(); + wait(1_000_000); + let end = Instant::now(); + + // notice all printing outside of the section to measure! + hprintln!("Start {:?}", start).ok(); + hprintln!("End {:?}", end).ok(); + // hprintln!("Diff {:?}", (end - start) ).ok(); + } +}; + +// burns CPU cycles by just looping `i` times +#[inline(never)] +#[no_mangle] +fn wait(i: u32) { + for _ in 0..i { + // no operation (ensured not optimized out) + cortex_m::asm::nop(); + } +} + +// 0. Setup +// +// > cargo doc --open +// +// In the docs, search (`S`) for `Monotonic` and read the API docs. +// Also search for `Instant`, and `Duration`. +// +// Together these provide timing semantics. +// +// - `Monotonic` is a "trait" for a timer implementation. +// - `Instant` is a point in time. +// - `Duration` is a range in time. +// +// By default RTIC uses the `Systic` and the `DWT` cycle counter +// to provide a `Monotonic` timer. +// +// 1. Build and run the application in vscode using (Cortex Release). +// +// What is the output in the Adapter Output console? +// +// ** your answer here ** +// +// As you see line 31 is commented out (we never print the difference). +// +// Now uncomment line 31, and try to run the program. You will see +// that it fails to compile right as `Duration` does not implement `Debug` +// (needed for formatting the printout.) +// +// This is on purpose as `Duration` is abstract (opaque). You need to +// turn it into a concrete value. Look at the documentation, to find out +// a way to turn it into clock cycles (which are printable). +// +// What is now the output in the Adapter Output console? +// +// ** your answer here ** +// +// Commit your answers (bare3_1) +// +// 2. Look at the `Instant` documentation. +// +// Alter the code so that you use `duration_since`, instead of manual subtraction. +// +// What is now the output in the Adapter Output console? +// +// ** your answer here ** +// +// Commit your answers (bare3_2) +// +// 3. Look at the `Instant` documentation. +// Now alter the code so that it uses `elapsed` instead. +// +// What is now the output in the Adapter Output console? +// +// ** your answer here ** +// +// Commit your answers (bare3_3) +// +// 4. Discussion. +// +// If you did implement the above exercises correctly you should get exactly the same +// result (in clock cycles) for all cases as you got in the bare2 exercise. +// (If not, go back and revise your code.) +// +// What this shows, is that we can step away from pure hardware accesses +// and deal with time in a more convenient and "abstract" fashion. +// +// `Instant` and `Duration` are associated with semantics (meaning). +// `Monotonic` is associated the implementation. +// +// This is an example of separation of concerns! +// +// If you implement your application based on Instant and Duration, your code +// will be "portable" across all platforms (that implement Monotonic). +// +// The implementation of Monotonic is done only once for each platform, thus +// bugs related to low level timer access will occur only at one place, +// not scattered across thousands of manually written applications. +// +// However, as you have already seen, the current time abstraction (API) is +// is rather "thin" (provided just a bare minimum functionality). +// +// We are working to further generalize timing semantics, by building +// on a richer abstraction `https://docs.rs/embedded-time/0.10.1/embedded_time/`. +// +// Support for embedded time is projected for next RTIC release. diff --git a/examples/rtic_bare4.rs b/examples/rtic_bare4.rs new file mode 100644 index 0000000000000000000000000000000000000000..a04efb300af1cdebfe5477138e460e80aeba536b --- /dev/null +++ b/examples/rtic_bare4.rs @@ -0,0 +1,133 @@ +//! bare4.rs +//! +//! Access to Peripherals +//! +//! What it covers: +//! - Raw pointers +//! - Volatile read/write +//! - Busses and clocking +//! - GPIO (a primitive abstraction) + +#![no_std] +#![no_main] + +extern crate cortex_m; +extern crate panic_halt; +use stm32f4; + +// Peripheral addresses as constants +#[rustfmt::skip] +mod address { + pub const PERIPH_BASE: u32 = 0x40000000; + pub const AHB1PERIPH_BASE: u32 = PERIPH_BASE + 0x00020000; + pub const RCC_BASE: u32 = AHB1PERIPH_BASE + 0x3800; + pub const RCC_AHB1ENR: u32 = RCC_BASE + 0x30; + pub const GBPIA_BASE: u32 = AHB1PERIPH_BASE + 0x0000; + pub const GPIOA_MODER: u32 = GBPIA_BASE + 0x00; + pub const GPIOA_BSRR: u32 = GBPIA_BASE + 0x18; +} + +use address::*; + +// see the Reference Manual RM0368 (www.st.com/resource/en/reference_manual/dm00096844.pdf) +// rcc, chapter 6 +// gpio, chapter 8 + +#[inline(always)] +fn read_u32(addr: u32) -> u32 { + unsafe { core::ptr::read_volatile(addr as *const _) } + // core::ptr::read_volatile(addr as *const _) +} + +#[inline(always)] +fn write_u32(addr: u32, val: u32) { + unsafe { + core::ptr::write_volatile(addr as *mut _, val); + } +} + +fn wait(i: u32) { + for _ in 0..i { + cortex_m::asm::nop(); // no operation (cannot be optimized out) + } +} + +#[rtic::app(device = stm32f4)] +const APP: () = { + #[init] + fn init(_cx: init::Context) { + // power on GPIOA + let r = read_u32(RCC_AHB1ENR); // read + write_u32(RCC_AHB1ENR, r | 1); // set enable + + // configure PA5 as output + let r = read_u32(GPIOA_MODER) & !(0b11 << (5 * 2)); // read and mask + write_u32(GPIOA_MODER, r | 0b01 << (5 * 2)); // set output mode + + // and alter the data output through the BSRR register + // this is more efficient as the read register is not needed. + + loop { + // set PA5 high + write_u32(GPIOA_BSRR, 1 << 5); // set bit, output hight (turn on led) + wait(10_000); + + // set PA5 low + write_u32(GPIOA_BSRR, 1 << (5 + 16)); // clear bit, output low (turn off led) + wait(10_000); + } + } +}; + +// 0. Build and run the application (Cortex Debug). +// +// 1. Did you enjoy the blinking? +// +// ** your answer here ** +// +// Now lookup the data-sheets, and read each section referred, +// 6.3.11, 8.4.1, 8.4.7 +// +// Document each low level access *code* by the appropriate section in the +// data sheet. +// +// Commit your answers (bare4_1) +// +// 2. Comment out line 38 and uncomment line 39 (essentially omitting the `unsafe`) +// +// //unsafe { core::ptr::read_volatile(addr as *const _) } +// core::ptr::read_volatile(addr as *const _) +// +// What was the error message and explain why. +// +// ** your answer here ** +// +// Digging a bit deeper, why do you think `read_volatile` is declared `unsafe`. +// (https://doc.rust-lang.org/core/ptr/fn.read_volatile.html, for some food for thought ) +// +// ** your answer here ** +// +// Commit your answers (bare4_2) +// +// 3. Volatile read/writes are explicit *volatile operations* in Rust, while in C they +// are declared at type level (i.e., access to variables declared volatile amounts to +// volatile reads/and writes). +// +// Both C and Rust (even more) allows code optimization to re-order operations, as long +// as data dependencies are preserved. +// +// Why is it important that ordering of volatile operations are ensured by the compiler? +// +// ** your answer here ** +// +// Give an example in the above code, where reordering might make things go horribly wrong +// (hint, accessing a peripheral not being powered...) +// +// ** your answer here ** +// +// Without the non-reordering property of `write_volatile/read_volatile` could that happen in theory +// (argue from the point of data dependencies). +// +// ** your answer here ** +// +// Commit your answers (bare4_3) diff --git a/examples/rtic_bare5.rs b/examples/rtic_bare5.rs new file mode 100644 index 0000000000000000000000000000000000000000..3fddcb3cf19f6ff0dea7203633ba039d327e6144 --- /dev/null +++ b/examples/rtic_bare5.rs @@ -0,0 +1,250 @@ +//! bare5.rs +//! +//! C Like Peripheral API +//! +//! What it covers: +//! - abstractions in Rust +//! - structs and implementations + +#![no_std] +#![no_main] + +extern crate cortex_m; +extern crate panic_semihosting; + +// C like API... +mod stm32f40x { + #[allow(dead_code)] + use core::{cell, ptr}; + + #[rustfmt::skip] + mod address { + pub const PERIPH_BASE: u32 = 0x40000000; + pub const AHB1PERIPH_BASE: u32 = PERIPH_BASE + 0x00020000; + pub const RCC_BASE: u32 = AHB1PERIPH_BASE + 0x3800; + pub const GPIOA_BASE: u32 = AHB1PERIPH_BASE + 0x0000; + } + use address::*; + + pub struct VolatileCell<T> { + pub value: cell::UnsafeCell<T>, + } + + impl<T> VolatileCell<T> { + #[inline(always)] + pub fn read(&self) -> T + where + T: Copy, + { + unsafe { ptr::read_volatile(self.value.get()) } + } + + #[inline(always)] + pub fn write(&self, value: T) + where + T: Copy, + { + unsafe { ptr::write_volatile(self.value.get(), value) } + } + } + + // modify (reads, modifies a field, and writes the volatile cell) + // + // parameters: + // offset (field offset) + // width (field width) + // value (new value that the field should take) + // + impl VolatileCell<u32> { + #[inline(always)] + pub fn modify(&self, offset: u8, width: u8, value: u32) { + // your code here + } + } + + #[repr(C)] + #[allow(non_snake_case)] + #[rustfmt::skip] + pub struct RCC { + pub CR: VolatileCell<u32>, // < RCC clock control register, Address offset: 0x00 + pub PLLCFGR: VolatileCell<u32>, // < RCC PLL configuration register, Address offset: 0x04 + pub CFGR: VolatileCell<u32>, // < RCC clock configuration register, Address offset: 0x08 + pub CIR: VolatileCell<u32>, // < RCC clock interrupt register, Address offset: 0x0C + pub AHB1RSTR: VolatileCell<u32>, // < RCC AHB1 peripheral reset register, Address offset: 0x10 + pub AHB2RSTR: VolatileCell<u32>, // < RCC AHB2 peripheral reset register, Address offset: 0x14 + pub AHB3RSTR: VolatileCell<u32>, // < RCC AHB3 peripheral reset register, Address offset: 0x18 + pub RESERVED0: VolatileCell<u32>, // < Reserved, 0x1C + pub APB1RSTR: VolatileCell<u32>, // < RCC APB1 peripheral reset register, Address offset: 0x20 + pub APB2RSTR: VolatileCell<u32>, // < RCC APB2 peripheral reset register, Address offset: 0x24 + pub RESERVED1: [VolatileCell<u32>; 2], // < Reserved, 0x28-0x2C + pub AHB1ENR: VolatileCell<u32>, // < RCC AHB1 peripheral clock register, Address offset: 0x30 + pub AHB2ENR: VolatileCell<u32>, // < RCC AHB2 peripheral clock register, Address offset: 0x34 + pub AHB3ENR: VolatileCell<u32>, // < RCC AHB3 peripheral clock register, Address offset: 0x38 + pub RESERVED2: VolatileCell<u32>, // < Reserved, 0x3C + pub APB1ENR: VolatileCell<u32>, // < RCC APB1 peripheral clock enable register, Address offset: 0x40 + pub APB2ENR: VolatileCell<u32>, // < RCC APB2 peripheral clock enable register, Address offset: 0x44 + pub RESERVED3: [VolatileCell<u32>; 2], // < Reserved, 0x48-0x4C + pub AHB1LPENR: VolatileCell<u32>, // < RCC AHB1 peripheral clock enable in low power mode register, Address offset: 0x50 + pub AHB2LPENR: VolatileCell<u32>, // < RCC AHB2 peripheral clock enable in low power mode register, Address offset: 0x54 + pub AHB3LPENR: VolatileCell<u32>, // < RCC AHB3 peripheral clock enable in low power mode register, Address offset: 0x58 + pub RESERVED4: VolatileCell<u32>, // < Reserved, 0x5C + pub APB1LPENR: VolatileCell<u32>, // < RCC APB1 peripheral clock enable in low power mode register, Address offset: 0x60 + pub APB2LPENR: VolatileCell<u32>, // < RCC APB2 peripheral clock enable in low power mode register, Address offset: 0x64 + pub RESERVED5: [VolatileCell<u32>; 2], // < Reserved, 0x68-0x6C + pub BDCR: VolatileCell<u32>, // < RCC Backup domain control register, Address offset: 0x70 + pub CSR: VolatileCell<u32>, // < RCC clock control & status register, Address offset: 0x74 + pub RESERVED6: [VolatileCell<u32>; 2], // < Reserved, 0x78-0x7C + pub SSCGR: VolatileCell<u32>, // < RCC spread spectrum clock generation register, Address offset: 0x80 + pub PLLI2SCFGR: VolatileCell<u32>, // < RCC PLLI2S configuration register, Address offset: 0x84 + } + + impl RCC { + pub fn get() -> *mut RCC { + address::RCC_BASE as *mut RCC + } + } + + #[repr(C)] + #[allow(non_snake_case)] + #[rustfmt::skip] + pub struct GPIOA { + pub MODER: VolatileCell<u32>, // < GPIO port mode register, Address offset: 0x00 + pub OTYPER: VolatileCell<u32>, // < GPIO port output type register, Address offset: 0x04 + pub OSPEEDR: VolatileCell<u32>, // < GPIO port output speed register, Address offset: 0x08 + pub PUPDR: VolatileCell<u32>, // < GPIO port pull-up/pull-down register, Address offset: 0x0C + pub IDR: VolatileCell<u32>, // < GPIO port input data register, Address offset: 0x10 + pub ODR: VolatileCell<u32>, // < GPIO port output data register, Address offset: 0x14 + pub BSRRL: VolatileCell<u16>, // < GPIO port bit set/reset low register, Address offset: 0x18 + pub BSRRH: VolatileCell<u16>, // < GPIO port bit set/reset high register, Address offset: 0x1A + pub LCKR: VolatileCell<u32>, // < GPIO port configuration lock register, Address offset: 0x1C + pub AFR: [VolatileCell<u32>;2], // < GPIO alternate function registers, Address offset: 0x20-0x24 + } + + impl GPIOA { + pub fn get() -> *mut GPIOA { + GPIOA_BASE as *mut GPIOA + } + } +} +use stm32f40x::*; + +// see the Reference Manual RM0368 (www.st.com/resource/en/reference_manual/dm00096844.pdf) +// rcc, chapter 6 +// gpio, chapter 8 + +fn wait(i: u32) { + for _ in 0..i { + cortex_m::asm::nop(); // no operation (cannot be optimized out) + } +} + +// simple test of Your `modify` +// +fn test_modify() { + let t: VolatileCell<u32> = VolatileCell { + value: core::cell::UnsafeCell::new(0), + }; + t.write(0); + assert!(t.read() == 0); + t.modify(3, 3, 0b10101); + // 10101 + // ..0111000 + // --------- + // 000101000 + assert!(t.read() == 0b101 << 3); + t.modify(4, 3, 0b10001); + // 000101000 + // 111 + // 001 + // 000011000 + assert!(t.read() == 0b011 << 3); + // + // add more tests here if you like +} + +#[rtic::app(device = stm32f4)] +const APP: () = { + #[init] + fn init(_cx: init::Context) { + let rcc = unsafe { &mut *RCC::get() }; // get the reference to RCC in memory + let gpioa = unsafe { &mut *GPIOA::get() }; // get the reference to GPIOA in memory + + // power on GPIOA + let r = rcc.AHB1ENR.read(); // read + rcc.AHB1ENR.write(r | 1 << (0)); // set enable + + // configure PA5 as output + let r = gpioa.MODER.read() & !(0b11 << (5 * 2)); // read and mask + gpioa.MODER.write(r | 0b01 << (5 * 2)); // set output mode + + loop { + // set PA5 high + gpioa.BSRRH.write(1 << 5); // set bit, output hight (turn on led) + + // alternatively to set the bit high we can + // read the value, or with PA5 (bit 5) and write back + // gpioa.ODR.write(gpioa.ODR.read() | (1 << 5)); + + wait(10_000); + + // set PA5 low + gpioa.BSRRL.write(1 << 5); // clear bit, output low (turn off led) + + // alternatively to clear the bit we can + // read the value, mask out PA5 (bit 5) and write back + // gpioa.ODR.write(gpioa.ODR.read() & !(1 << 5)); + wait(10_000); + } + } +}; + +// 0. Build and run the application. +// +// > cargo build --example bare5 +// (or use the vscode) +// +// 1. C like API. +// Using C the .h files are used for defining interfaces, like function signatures (prototypes), +// structs and macros (but usually not the functions themselves). +// +// Here is a peripheral abstraction quite similar to what you would find in the .h files +// provided by ST (and other companies). Actually, the file presented here is mostly a +// cut/paste/replace of the stm32f40x.h, just Rustified. +// +// +// In the loop we access PA5 through bit set/clear operations. +// Comment out those operations and uncomment the the ODR accesses. +// (They should have the same behavior, but is a bit less efficient.) +// +// Run and see that the program behaves the same. +// +// Commit your answers (bare5_1) +// +// 2. Extend the read/write API with a `modify` for u32, taking the +// - address (&mut u32), +// - field offset (in bits, u8), +// - field width (in bits, u8), +// - and value (u32). +// +// Implement and check that running `test` gives you expected behavior. +// +// Change the code into using your new API. +// +// Run and see that the program behaves the same. +// +// Commit your answers (bare5_2) +// +// Discussion: +// As with arithmetic operations, default semantics differ in between +// debug/dev and release builds. +// In debug << rhs is checked, rhs must be less than 32 (for 32 bit datatypes). +// +// Notice, over-shifting (where bits are spilled) is always considered legal, +// its just the shift amount that is checked. +// There are explicit unchecked versions available if so wanted. +// +// We are now approaching a more "safe" to use API. +// What if we could automatically generate that from Vendors specifications (SVD files)? +// Wouldn't that be great? +// +// ** your answer here **