Skip to content
Snippets Groups Projects
Commit 6845790a authored by Per Lindgren's avatar Per Lindgren
Browse files

rtt_timing

parent 4e580b2b
No related branches found
No related tags found
No related merge requests found
//! examples/rtt_simple.rs
#![deny(unsafe_code)]
#![deny(warnings)]
#![no_main]
#![no_std]
use panic_rtt_target as _;
use rtt_target::{rprintln, rtt_init_print};
use stm32f4;
#[rtic::app(device = stm32f4)]
const APP: () = {
#[init]
fn init(_cx: init::Context) {
rtt_init_print!();
rprintln!("init");
}
#[idle]
fn idle(_cx: idle::Context) -> ! {
rprintln!("idle");
loop {
continue;
}
}
};
// > cargo run rtt_simple
// Compiling app v0.1.0 (/home/pln/courses/e7020e/app)
// Finished dev [unoptimized + debuginfo] target(s) in 0.15s
// Running `probe-run --chip STM32F411RETx target/thumbv7em-none-eabi/debug/examples/rtt_simple`
// flashing program ..
// DONE
// resetting device
// init
// idle
//
// > [Ctrl-C]
// stack backtrace:
// 0: 0x0800031a - rtt_simple::idle
// 1: 0x0800036c - main
// 2: 0x080001da - Reset
//! examples/rtt_timing.rs
// #![deny(unsafe_code)]
#![deny(warnings)]
#![no_main]
#![no_std]
use cortex_m::{asm, peripheral::DWT};
use panic_rtt_target as _;
use rtt_target::{rprintln, rtt_init_print};
use stm32f4;
#[rtic::app(device = stm32f4)]
const APP: () = {
#[init]
fn init(mut cx: init::Context) {
rtt_init_print!();
rprintln!("init");
// Initialize (enable) the monotonic timer (CYCCNT)
cx.core.DCB.enable_trace();
cx.core.DWT.enable_cycle_counter();
rprintln!("start timed_loop");
let (start, end) = timed_loop();
rprintln!(
"start {}, end {}, diff {}",
start,
end,
end.wrapping_sub(start)
);
}
#[idle]
fn idle(_cx: idle::Context) -> ! {
rprintln!("idle");
loop {
continue;
}
}
};
// Forbid inlining and keeping name (symbol) readable.
#[inline(never)]
#[no_mangle]
fn timed_loop() -> (u32, u32) {
let start = DWT::get_cycle_count();
for _ in 0..10000 {
asm::nop();
}
let end = DWT::get_cycle_count();
(start, end)
}
// We can measure execution time using the built in cycle counter,
// a wrapping 32 bit timer.
//
// `DWT::get_cycle_count()` reads the current value.
//
// ------------------------------------------------------------------------
// Exercises:
//
// A.1) What is the cycle count for the loop?
// > cargo run --example rtt_timing
//
// [Your answer here]
//
// A.2) How many cycles per iteration?
//
// [Your answer here]
//
// ------------------------------------------------------------------------
// Now try a release (optimized build, see `Cargo.toml` for build options).
// B.1) What is the cycle count for the loop?
// > cargo run --example rtt_timing --release
//
// [Your answer here]
//
// B.2) How many cycles per iteration?
//
// [Your answer here]
//
// What is the speedup (A/B)?
//
// [Your answer here]
//
// Why do you think it differs that much?
//
// [Your answer here]
//
// ------------------------------------------------------------------------
// In the loop there is just a single assembly instruction (nop).
// However, the API for "inline assembly" is not yet stabilized, thus
// we have to call an "external" functions that implements the assembly.
//
// We can switch to the "nightly" channel (which allows experimental features).
//
// > rustup override set nightly
// info: using existing install for 'nightly-x86_64-unknown-linux-gnu'
// info: override toolchain for '/home/pln/courses/e7020e/app' set to 'nightly-x86_64-unknown-linux-gnu'
//
// nightly-x86_64-unknown-linux-gnu unchanged - rustc 1.49.0-nightly (cf9cf7c92 2020-11-10)
//
// > rustup target add thumbv7em-none-eabi
// (only needed first time)
//
// Now try a release (optimized build, see `Cargo.toml` for build options).
// C.1) What is the cycle count for the loop?
// > cargo run --example rtt_timing --release
//
// [Your answer here]
//
// C.2) How many cycles per iteration?
//
// [Your answer here]
//
// What is the speedup (A/C)?
//
// [Your answer here]
//
// ------------------------------------------------------------------------
// D) Now lets have a closer look at the generated assembly.
//
// > cargo objdump --example rtt_timing --release --features nightly -- --disassemble > rtt_timing.objdump
//
// Open the file in you editor and search for the `timed_loop`.
//
// [Assembly for function here]
//
// Locate the loop body, and verify that it makes sense
// based on the information from the technical documentation:
//
// https://developer.arm.com/documentation/ddi0439/b/Programmers-Model/Instruction-set-summary/Cortex-M4-instructions
//
//
// ------------------------------------------------------------------------
// E) Now we shall take detailed control over the debugging.
//
// Alter the .cargo/config
// uncomment:
// runner = "arm-none-eabi-gdb -q -x openocd.gdb"
// or (if under ubuntu)
// runner = "gdb-multiarch -q -x openocd.gdb"
//
// comment:
// # runner = "probe-run --chip STM32F411RETx"
//
// Remember to save the file!
//
// In a separate terminal start `openocd`.
// > openocd -f openocd.cfg
// And in the main terminal run:
// > cargo run --example rtt_timing --release --features nightly
// ...
// halted: PC: 0x0800019a
// 0x0800019a in cortex_m_rt::Reset () at /home/pln/.cargo/registry/src/github.com-1ecc6299db9ec823/cortex-m-rt-0.6.13/src/lib.rs:497
// 497 pub unsafe extern "C" fn Reset() -> ! {
//
// Now add a breakpoint at `timed_loop`.
// (gdb) break timed_loop
// Breakpoint 5 at 0x800023e: file examples/rtt_timing.rs, line 45.
//
// Now we can continue (first break will be at `init`)
// (gdb) continue
// Breakpoint 4, cortex_m::interrupt::disable () at /home/pln/.cargo/registry/src/github.com-1ecc6299db9ec823/cortex-m-0.6.4/src/interrupt.rs:13
// llvm_asm!("cpsid i" ::: "memory" : "volatile");
// (`init` in RTIC runs with interrupts disabled)
// (gdb) continue
// Breakpoint 5, core::ptr::read_volatile<u32> (src=<optimized out>) at /home/pln/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:1052
// unsafe { intrinsics::volatile_load(src) }
//
// Now we are inside the `timed_loop`.
// disassemble
// Dump of assembler code for function _ZN10rtt_timing10timed_loop17h4a445e5f592f3304E:
// 0x08000232 <+0>: movw r1, #4100 ; 0x1004
// 0x08000236 <+4>: movw r2, #10000 ; 0x2710
// 0x0800023a <+8>: movt r1, #57344 ; 0xe000
// => 0x0800023e <+12>: ldr r0, [r1, #0]
// 0x08000240 <+14>: subs r2, #1
// 0x08000242 <+16>: nop
// 0x08000244 <+18>: bne.n 0x8000240 <rtt_timing::timed_loop+14>
// 0x08000246 <+20>: ldr r1, [r1, #0]
// 0x08000248 <+22>: bx lr
//
// (breakpoints can be a bit "off" for release builds, here it
// put the breakpoint not at the function entry but rather at the
// first Rust line (the load done by `DWT::get_cycle_count()`)).
//
// Is there a function call to the DWT::get_cycle_count(),
// if not can you explain what has happened here?
// In particular, what is the content of r1?
//
// Confer to the documentation:
// https://developer.arm.com/documentation/ddi0439/b/Data-Watchpoint-and-Trace-Unit/DWT-Programmers-Model
//
// [Your answer here]
//
// Now check your answer by dumping the registers
// (gdb) info registers
//
// [Register dump here]
//
// We can now set a breakpoint exactly at the `nop`.
//
// (gdb) break *0x8000240
//
// And continue execution to the breakpoint:
// (gdb) continue
//
// (gdb) disassemble
// 0x08000232 <+0>: movw r1, #4100 ; 0x1004
// 0x08000236 <+4>: movw r2, #10000 ; 0x2710
// 0x0800023a <+8>: movt r1, #57344 ; 0xe000
// 0x0800023e <+12>: ldr r0, [r1, #0]
// 0x08000240 <+14>: subs r2, #1
// => 0x08000242 <+16>: nop
// 0x08000244 <+18>: bne.n 0x8000240 <rtt_timing::timed_loop+14>
// 0x08000246 <+20>: ldr r1, [r1, #0]
// 0x08000248 <+22>: bx lr
//
// You can inspect the memory directly.
// What is the current value of the cycle counter?
//
// (gdb) x 0xe0001004
//
// [Your answer here]
//
// Now, let's execute one iteration:
// (gdb) continue
//
// What is now the current value of the cycle counter?
//
// [Your answer here]
//
// By how much does the cycle counter increase for each iteration?
//
// [Your answer here]
//
// ------------------------------------------------------------------------
// F) Reseting the cycle counter
// You can restart the program by a "soft reset".
// (gdb) monitor reset init
// "monitor" the command to be sent to `openocd`
// "reset init" will reset the MCU and run a the "init" hook
// in the currently loaded `openocd` configuration.
//
// Now repeat the procedure from E) and check the value of
// the cycle counter.
//
// You will find that its not starting over but continuing.
// This may be a bit confusing (we like deterministic behavior).
//
// Look in the documentation for `cortex_m`
// https://docs.rs/cortex-m/0.6.4/cortex_m/peripheral/dwt/struct.RegisterBlock.html
//
// This is a "raw" API to the underlying hardware.
// Under the hood, it uses `volatile_register`
// https://docs.rs/volatile-register/0.2.0/volatile_register/struct.RW.html
//
// for which write is marked "unsafe".
//
// Figure out the way to access the register through:
// `cx. cx.core.DWT.cyccnt.write(0)`.
// And add that to the code just before calling `timed_loop`.
//
// (Remember to comment out #![deny(unsafe_code)])
//
// Now run re-run the procedure from E)
//
// What is the initial value of the cycle counter
// (when hitting the `timed_loop` breakpoint)?
//
// [Your answer here]
//
// ------------------------------------------------------------------------
// F) Finally some statics
// For embedded it is often/typically crucial to reduce overhead,
// - code footprint (flash memory)
// - memory footprint (sram memory)
// - execution OH (speed/power consumption)
//
// We have already looked execution overhead and can conclude
// Rust to do an excellent job.
//
// The ram requirement for the application amounts only to
// to the buffers (allocated for RTT), and the stack
// for the functions (which is in this case 0 for the `timed_loop`).
//
// Let's have a look at the code footprint (flash).
//
// > cargo bloat -n 100 --example rtt_timing --release --features nightly
// ...
// 0.0% 0.4% 24B [Unknown] timed_loop
// ...
//
// That is, the timed loop takes only 24 bytes of memory!
//
// > cargo size --example rtt_timing --release --features nightly
// Finished release [optimized + debuginfo] target(s) in 0.03s
// text data bss dec hex filename
// 6876 0 1084 7960 1f18 rtt_timing
//
// And the code overall less than 8k of flash.
//
// Your assignment now is to get this down, by identifying the
// memory hogs. You can remove the tracing, replace the panic
// handler by `panic_halt`, but besides that it should still
// measure time (debugging in gdb of `timed_loop` must still work).
//
// ------------------------------------------------------------------------
// Summary:
// What are the learning outcomes.
//
// - Rust generates really bad code in debug build.
// - Rust generates really, really, really good code in release build.
// Zero-cost abstractions are for real.
//
// - RTIC is extremely light weight (zero-cost in release build).
// - RTIC applications are easy to
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment