diff --git a/.vscode/launch.json b/.vscode/launch.json
index 97e02cde76807c35b3d1e1fd863550edc0d54d1e..c967eb44c8796c4008828c32a81e8f3b5a71b878 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -36,7 +36,7 @@
             "request": "launch",
             "servertype": "openocd",
             "name": "itm internal (debug)",
-            "preLaunchTask": "cargo build --examples",
+            "preLaunchTask": "cargo build --example",
             "executable": "./target/thumbv7em-none-eabihf/debug/examples/${fileBasenameNoExtension}",
             "configFiles": [
                 "interface/stlink.cfg",
@@ -72,7 +72,7 @@
             "request": "launch",
             "servertype": "openocd",
             "name": "itm fifo (debug)",
-            "preLaunchTask": "cargo build --examples",
+            "preLaunchTask": "cargo build --example",
             "executable": "./target/thumbv7em-none-eabihf/debug/examples/${fileBasenameNoExtension}",
             "configFiles": [
                 //"interface/stlink.cfg",
@@ -97,7 +97,7 @@
             "request": "launch",
             "servertype": "openocd",
             "name": "itm fifo (release)",
-            "preLaunchTask": "cargo build --examples --release",
+            "preLaunchTask": "cargo build --example --release",
             "executable": "./target/thumbv7em-none-eabihf/release/examples/${fileBasenameNoExtension}",
             "configFiles": [
                 //"interface/stlink.cfg",
@@ -122,7 +122,7 @@
             "request": "launch",
             "servertype": "openocd",
             "name": "itm fifo 64MHz (release)",
-            "preLaunchTask": "cargo build --examples --release",
+            "preLaunchTask": "cargo build --example --release",
             "executable": "./target/thumbv7em-none-eabihf/release/examples/${fileBasenameNoExtension}",
             "configFiles": [
                 "interface/stlink.cfg",
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
index 42f4b03ab8cafe8c01c41cd9e4c930c6196c94da..13860c24cd509bafc8fd02512f1a0a64170df201 100644
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -1,4 +1,7 @@
 {
+    // See https://go.microsoft.com/fwlink/?LinkId=733558
+    // for the documentation about the tasks.json format
+    "version": "2.0.0",
     "tasks": [
         {
             "type": "shell",
@@ -26,8 +29,8 @@
         },
         {
             "type": "shell",
-            "label": "cargo build --examples",
-            "command": "cargo build --examples",
+            "label": "cargo build --example",
+            "command": "cargo build --example ${fileBasenameNoExtension}",
             "group": {
                 "kind": "build",
                 "isDefault": true
@@ -38,8 +41,8 @@
         },
         {
             "type": "shell",
-            "label": "cargo build --examples --release",
-            "command": "cargo build --examples --release",
+            "label": "cargo build --example --release",
+            "command": "cargo build --example ${fileBasenameNoExtension} --release",
             "group": {
                 "kind": "build",
                 "isDefault": true
diff --git a/Cargo.toml b/Cargo.toml
index 3a3532ef17d3fc7c78563661fc58d8b80f2c5aeb..bcbb4b6488a687db4d50445b8eacce3b2da2d42b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,11 +11,11 @@ edition = "2018"
 
 [dependencies]
 panic-halt              = "0.2"
-panic-semihosting       = "0.5"
+panic-semihosting       = "0.5" # comment out for `cargo doc`
+panic-itm               = "0.4.1" # comment out for `cargo doc`
 cortex-m-semihosting    = "0.3.5"
 aligned                 = "0.3.2"
 ufmt                    = "0.1.0"
-panic-itm               = "0.4.1"
 nb                      = "0.1.2"
 
 [dependencies.cortex-m]
diff --git a/examples/bare0.rs b/examples/bare0.rs
index dae25b59ab14769f44097786759aa6320356614c..ca2504495e43c384ac8268f1bbae57e87e5cb25a 100644
--- a/examples/bare0.rs
+++ b/examples/bare0.rs
@@ -15,12 +15,8 @@
 // no standard main, we declare main using [entry]
 #![no_main]
 
-// Minimal runtime / startup for Cortex-M microcontrollers
-//extern crate cortex_m_rt as rt;
 // Panic handler, for textual output using semihosting
-extern crate panic_semihosting;
-// Panic handler, infinite loop on panic
-// extern crate panic_halt;
+use panic_semihosting as _;
 
 // import entry point
 use cortex_m_rt::entry;
@@ -75,10 +71,10 @@ fn main() -> ! {
 
 // Here we assume you are using `vscode` with `cortex-debug`.
 //
-// 0. Compile/build the example in debug (dev) mode.
+// 0. Compile/build and run the example in debug (dev) mode.
 //
-//    > cargo build --example bare0
-//    (or use the vscode build task)
+//    > cargo run --example bare0
+//    (or use vscode)
 //
 // 1. Run the program in the debugger, let the program run for a while and
 //    then press pause.
diff --git a/examples/bare1.rs b/examples/bare1.rs
index 9230820fc240f64bf3cd695f789b8a4e4e42572d..07cacda6bb8c5488d35cda3cae6e2b7ba88979a3 100644
--- a/examples/bare1.rs
+++ b/examples/bare1.rs
@@ -3,14 +3,14 @@
 //! Inspecting the generated assembly
 //!
 //! What it covers
-//! - ITM tracing
+//! - Rust panic tracing using ITM
 //! - assembly calls and inline assembly
 //! - more on arithmetics
 
 #![no_main]
 #![no_std]
 
-extern crate panic_itm;
+use panic_itm as _;
 
 use cortex_m_rt::entry;
 
@@ -45,18 +45,18 @@ fn main() -> ! {
 //
 //    You may need/want to install additional components also.
 //    To that end look at the install section in the README.md.
-//    If you change toolchain, you may need to exit and re-start `vscode`.
+//    (If you change toolchain, you may need to exit and re-start `vscode`.)
 //
 // 1. Build and run the application
 //
-//    > cargo build --example bare1
-//    (or use the vscode build task)
+//    > cargo run --example bare1
+//    (or use the `itm fifo (debug)` or the `itm internal (debug)` launch configuration.)
 //
-//    Make sure you have followed the instructions for fifo `ITM` tracing.
-//    Debug using the `itm fifo (debug)` launch configuration.
+//    Make sure you have followed the instructions for fifo `ITM` tracing accordingly.
 //
 //    When debugging the application it should hit the `bkpt` instruction.
 //    What happens when you continue (second iteration of the loop)?
+//    (passing 3 breakpoints)
 //
 //    ** your answer here **
 //      Alot of items are added to the call stack of the program
@@ -107,7 +107,7 @@ fn main() -> ! {
 //    Rebuild `bare1.rs` in release (optimized mode).
 //
 //    > cargo build --example bare1 --release
-//    (or using the vscode build task)
+//    (or using the vscode)
 //
 //    Compare the generated assembly for the loop
 //    between the dev (un-optimized) and release (optimized) build.
@@ -159,7 +159,32 @@ fn main() -> ! {
 //    Later we will demonstrate how we can get guarantees of panic free execution.
 //    This is very important to improve reliability.
 //
-// 4. *Optional
+// 4. Now comment out the `read_volatile`.
+//
+//    > cargo build --example bare1 --release
+//    (or using the vscode)
+//
+//    Compare the generated assembly for the loop
+//    between the dev (un-optimized) and release (optimized) build.
+//
+//    What is the output of:
+//    > disassemble
+//
+//    ** your answer here **
+//
+//    How many instructions are in between the two `bkpt` instructions.
+//
+//    ** your answer here **
+//
+//    Where is the local variable stored?
+//    What happened, and why is Rust + LLVM allowed to do that?
+//
+//    ** your answer here **
+//
+//    commit your answers (bare1_4)
+//
+//
+// 5. *Optional
 //    You can pass additional flags to the Rust `rustc` compiler.
 //
 //    `-Z force-overflow-checks=off`
@@ -172,11 +197,11 @@ fn main() -> ! {
 //
 //    ** your answer here **
 //
-//    commit your answers (bare1_4)
+//    commit your answers (bare1_5)
 //
 //    Now restore the `.cargo/config` to its original state.
 //
-// 5. *Optional
+// 6. *Optional
 //    There is another way to conveniently use wrapping arithmetics
 //    without passing flags to the compiler.
 //
@@ -192,7 +217,7 @@ fn main() -> ! {
 //
 //    ** your answer here **
 //
-//    commit your answers (bare1_5)
+//    commit your answers (bare1_6)
 //
 //    Final discussion:
 //
diff --git a/examples/bare2.rs b/examples/bare2.rs
new file mode 100644
index 0000000000000000000000000000000000000000..ce6acb20fb9221507961a8586a4ba8211917db1c
--- /dev/null
+++ b/examples/bare2.rs
@@ -0,0 +1,101 @@
+//! bare2.rs
+//!
+//! Measuring execution time
+//!
+//! What it covers
+//! - Generating documentation
+//! - Using core peripherals
+//! - Measuring time using the DWT
+//! - ITM tracing using `iprintln`
+//! - Panic halt
+//!
+
+#![no_main]
+#![no_std]
+
+use panic_halt as _;
+
+use cortex_m::{iprintln, peripheral::DWT, Peripherals};
+use cortex_m_rt::entry;
+
+// burns CPU cycles by just looping `i` times
+#[inline(never)]
+fn wait(i: u32) {
+    for _ in 0..i {
+        // no operation (ensured not optimized out)
+        cortex_m::asm::nop();
+    }
+}
+
+#[entry]
+fn main() -> ! {
+    let mut p = Peripherals::take().unwrap();
+    let stim = &mut p.ITM.stim[0];
+    let mut dwt = p.DWT;
+
+    iprintln!(stim, "bare2");
+
+    dwt.enable_cycle_counter();
+
+    // Reading the cycle counter can be done without `owning` access
+    // the DWT (since it has no side effect).
+    //
+    // Look in the docs:
+    // pub fn enable_cycle_counter(&mut self)
+    // pub fn get_cycle_count() -> u32
+    //
+    // Notice the difference in the function signature!
+
+    let start = DWT::get_cycle_count();
+    wait(1_000_000);
+    let end = DWT::get_cycle_count();
+
+    // notice all printing outside of the section to measure!
+    iprintln!(stim, "Start {:?}", start);
+    iprintln!(stim, "End {:?}", end);
+    iprintln!(stim, "Diff {:?}", end - start);
+
+    loop {}
+}
+
+// 0. Setup
+//    > cargo doc --open
+//
+//    This will document your crate, and open the docs in your browser.
+//    If it does not auto-open, then copy paste the path in your browser.
+//    (Notice, it will try to document all dependencies, you may have only one
+//    one panic handler, so comment out all but one in `Cargo.toml`.)
+//
+//    In the docs, search (`S`) for DWT, and click `cortex_m::peripheral::DWT`.
+//    Read the API docs.
+//
+// 1. Build and run the application (debug build).
+//    Setup ITM tracing (see `bare1.rs`) and `openocd` (if not using vscode).
+//
+//    > cargo run --example bare2
+//    (or use the vscode build task)
+//
+//    What is the output in the ITM console?
+//
+//    ** your answer here **
+//
+//    Rebuild and run in release mode
+//
+//    > cargo build --example bare2 --release
+//
+//    ** your answer here **
+//
+//    Compute the ratio between debug/release optimized code
+//    (the speedup).
+//
+//    ** your answer here **
+//
+//    commit your answers (bare2_1)
+//
+// 3. *Optional
+//    Inspect the generated binaries, and try stepping through the code
+//    for both debug and release binaries. How do they differ?
+//
+//    ** your answer here **
+//
+//    commit your answers (bare2_2)
diff --git a/examples/bare3.rs b/examples/bare3.rs
new file mode 100644
index 0000000000000000000000000000000000000000..665b4da687d3ca0285abdf1ae5268b9ba0a4c340
--- /dev/null
+++ b/examples/bare3.rs
@@ -0,0 +1,148 @@
+//! bare3.rs
+//!
+//! String types in Rust
+//!
+//! What it covers:
+//! - Types, str, arrays ([u8; usize]), slices (&[u8])
+//! - Iteration, copy
+//! - Semihosting (tracing using `hprintln`
+
+#![no_main]
+#![no_std]
+
+extern crate panic_halt;
+
+use cortex_m_rt::entry;
+use cortex_m_semihosting::{hprint, hprintln};
+
+#[entry]
+fn main() -> ! {
+    hprintln!("bare3").unwrap();
+    let s = "ABCD";
+    let bs = s.as_bytes();
+
+    hprintln!("s = {}", s).unwrap();
+    hprintln!("bs = {:?}", bs).unwrap();
+
+    hprintln!("iterate over slice").unwrap();
+    for c in bs {
+        hprint!("{},", c).unwrap();
+    }
+
+    hprintln!("iterate iterate using (raw) indexing").unwrap();
+    for i in 0..s.len() {
+        hprintln!("{},", bs[i]).unwrap();
+    }
+
+    hprintln!("").unwrap();
+
+    let a = [65u8; 4];
+    // let mut a = [0u8; 4];
+
+    hprintln!("").unwrap();
+    hprintln!("a = {}", core::str::from_utf8(&a).unwrap()).unwrap();
+
+    loop {
+        continue;
+    }
+}
+
+// 0. Build and run the application (debug build).
+//
+//    > cargo run --example bare3
+//    (or use the vscode build task)
+//
+// 1. What is the output in the `openocd` (Adapter Output) console?
+//
+//    ** your answer here **
+//
+//    What is the type of `s`?
+//
+//    ** your answer here **
+//
+//    What is the type of `bs`?
+//
+//    ** your answer here **
+//
+//    What is the type of `c`?
+//
+//    ** your answer here **
+//
+//    What is the type of `a`?
+//
+//    ** your answer here **
+//
+//    What is the type of `i`?
+//
+//    ** your answer here **
+//
+//    Commit your answers (bare3_1)
+//
+// 2. Make types of `s`, `bs`, `c`, `a`, `i` explicit.
+//
+//    Commit your answers (bare3_2)
+//
+// 3. Uncomment line `let mut a = [0u8; 4];
+//`
+//    Run the program, what happens and why?
+//
+//    ** your answer here **
+//
+//    Commit your answers (bare3_3)
+//
+// 4. Alter the program so that the data from `bs` is copied byte
+//    by byte into `a` using a loop and raw indexing.
+//
+//    Test that it works as intended.
+//
+//    Commit your answers (bare3_4)
+//
+// 5. Look for a way to make this copy done without a loop.
+//    https://doc.rust-lang.org/std/primitive.slice.html
+//
+//    Implement and test your solution.
+//
+//    Commit your answers (bare3_5)
+//
+// 6. Optional
+//    Rust is heavily influenced by functional languages.
+//    Figure out how you can use an iterator to work over both
+//    the `a` and `bs` to copy the content of `bs` to `a`.
+//
+//    You may use
+//    - `iter` (to turn a slice into an iterator)
+//    - `zip` (to merge two slices into an iterator)
+//    - a for loop to assign the elements
+//
+//    Commit your solution (bare3_6)
+//
+// 7. Optional
+//    Iter using `foreach` and a closure instead of the for loop.
+//
+//    Commit your solution (bare3_7)
+//
+// 8. Optional*
+//    Now benchmark your different solutions using the cycle accurate
+//    DWT based approach (in release mode).
+//
+//    Cycle count for `raw` indexing
+//
+//    ** your answer here **
+//
+//    Cycle count for the primitive slice approach.
+//
+//    ** your answer here **
+//
+//    Cycle count for the primitive slice approach.
+//
+//    ** your answer here **
+//
+//    Cycle count for the zip + for loop approach.
+//
+//    ** your answer here **
+//
+//    Cycle count for the zip + for_each approach.
+//
+//    What conclusions can you draw, does Rust give you zero-cost abstractions?
+//
+//    ** your answer here **
diff --git a/examples/bare4.rs b/examples/bare4.rs
new file mode 100644
index 0000000000000000000000000000000000000000..07ce483f31f97174ac046b5a43a6548af79a212d
--- /dev/null
+++ b/examples/bare4.rs
@@ -0,0 +1,134 @@
+//! bare4.rs
+//!
+//! Access to Peripherals
+//!
+//! What it covers:
+//! - Raw pointers
+//! - Volatile read/write
+//! - Busses and clocking
+//! - GPIO (a primitive abstraction)
+
+#![no_std]
+#![no_main]
+
+extern crate panic_halt;
+
+extern crate cortex_m;
+use cortex_m_rt::entry;
+
+// Peripheral addresses as constants
+#[rustfmt::skip]
+mod address {
+    pub const PERIPH_BASE: u32      = 0x40000000;
+    pub const AHB1PERIPH_BASE: u32  = PERIPH_BASE + 0x00020000;
+    pub const RCC_BASE: u32         = AHB1PERIPH_BASE + 0x3800;
+    pub const RCC_AHB1ENR: u32      = RCC_BASE + 0x30;
+    pub const GBPIA_BASE: u32       = AHB1PERIPH_BASE + 0x0000;
+    pub const GPIOA_MODER: u32      = GBPIA_BASE + 0x00;
+    pub const GPIOA_BSRR: u32       = GBPIA_BASE + 0x18;
+}
+
+use address::*;
+
+// see the Reference Manual RM0368 (www.st.com/resource/en/reference_manual/dm00096844.pdf)
+// rcc,     chapter 6
+// gpio,    chapter 8
+
+#[inline(always)]
+fn read_u32(addr: u32) -> u32 {
+    unsafe { core::ptr::read_volatile(addr as *const _) }
+    //core::ptr::read_volatile(addr as *const _)
+}
+
+#[inline(always)]
+fn write_u32(addr: u32, val: u32) {
+    unsafe {
+        core::ptr::write_volatile(addr as *mut _, val);
+    }
+}
+
+fn wait(i: u32) {
+    for _ in 0..i {
+        cortex_m::asm::nop(); // no operation (cannot be optimized out)
+    }
+}
+
+#[entry]
+fn main() -> ! {
+    // power on GPIOA
+    let r = read_u32(RCC_AHB1ENR); // read
+    write_u32(RCC_AHB1ENR, r | 1); // set enable
+
+    // configure PA5 as output
+    let r = read_u32(GPIOA_MODER) & !(0b11 << (5 * 2)); // read and mask
+    write_u32(GPIOA_MODER, r | 0b01 << (5 * 2)); // set output mode
+
+    // and alter the data output through the BSRR register
+    // this is more efficient as the read register is not needed.
+
+    loop {
+        // set PA5 high
+        write_u32(GPIOA_BSRR, 1 << 5); // set bit, output hight (turn on led)
+        wait(10_000);
+
+        // set PA5 low
+        write_u32(GPIOA_BSRR, 1 << (5 + 16)); // clear bit, output low (turn off led)
+        wait(10_000);
+    }
+}
+
+// 0.  Build and run the application (debug build).
+//
+//    > cargo run --example bare4
+//    (or use the vscode)
+//
+// 1.  Did you enjoy the blinking?
+//
+//    ** your answer here **
+//
+//    Now lookup the data-sheets, and read each section referred,
+//    6.3.11, 8.4.1, 8.4.7
+//
+//    Document each low level access *code* by the appropriate section in the
+//    data sheet.
+//
+//    Commit your answers (bare4_1)
+//
+// 2. Comment out line 40 and uncomment line 41 (essentially omitting the `unsafe`)
+//
+//    //unsafe { core::ptr::read_volatile(addr as *const _) }
+//    core::ptr::read_volatile(addr as *const _)
+//
+//    What was the error message and explain why.
+//
+//    ** your answer here **
+//
+//    Digging a bit deeper, why do you think `read_volatile` is declared `unsafe`.
+//    (https://doc.rust-lang.org/core/ptr/fn.read_volatile.html, for some food for thought )
+//
+//    ** your answer here **
+//
+//    Commit your answers (bare4_2)
+//
+// 3. Volatile read/writes are explicit *volatile operations* in Rust, while in C they
+//    are declared at type level (i.e., access to varibles declared volatile amounts to
+//    volatile reads/and writes).
+//
+//    Both C and Rust (even more) allows code optimization to re-order operations, as long
+//    as data dependencies are preserved.
+//
+//    Why is it important that ordering of volatile operations are ensured by the compiler?
+//
+//    ** your answer here **
+//
+//    Give an example in the above code, where reordering might make things go horribly wrong
+//    (hint, accessing a peripheral not being powered...)
+//
+//    ** your answer here **
+//
+//    Without the non-reordering property of `write_volatile/read_volatile` could that happen in theory
+//    (argue from the point of data dependencies).
+//
+//    ** your answer here **
+//
+//    Commit your answers (bare4_3)
diff --git a/examples/bare5.rs b/examples/bare5.rs
new file mode 100644
index 0000000000000000000000000000000000000000..90ce2066c5f374b5277ea923bcc474927c1ce524
--- /dev/null
+++ b/examples/bare5.rs
@@ -0,0 +1,251 @@
+//! bare5.rs
+//!
+//! C Like Peripheral API
+//!
+//! What it covers:
+//! - abstractions in Rust
+//! - structs and implementations
+
+#![no_std]
+#![no_main]
+
+extern crate panic_halt;
+
+extern crate cortex_m;
+use cortex_m_rt::entry;
+
+// C like API...
+mod stm32f40x {
+    #[allow(dead_code)]
+    use core::{cell, ptr};
+
+    #[rustfmt::skip]
+    mod address {
+        pub const PERIPH_BASE: u32      = 0x40000000;
+        pub const AHB1PERIPH_BASE: u32  = PERIPH_BASE + 0x00020000;
+        pub const RCC_BASE: u32         = AHB1PERIPH_BASE + 0x3800;
+        pub const GPIOA_BASE: u32       = AHB1PERIPH_BASE + 0x0000;
+    }
+    use address::*;
+
+    pub struct VolatileCell<T> {
+        value: cell::UnsafeCell<T>,
+    }
+
+    impl<T> VolatileCell<T> {
+        #[inline(always)]
+        pub fn read(&self) -> T
+        where
+            T: Copy,
+        {
+            unsafe { ptr::read_volatile(self.value.get()) }
+        }
+
+        #[inline(always)]
+        pub fn write(&self, value: T)
+        where
+            T: Copy,
+        {
+            unsafe { ptr::write_volatile(self.value.get(), value) }
+        }
+    }
+
+    // modify (reads, modifies a field, and writes the volatile cell)
+    //
+    // parameters:
+    // offset (field offset)
+    // width  (field width)
+    // value  (new value that the field should take)
+    //
+    // impl VolatileCell<u32> {
+    //     #[inline(always)]
+    //     pub fn modify(&self, offset: u8, width: u8, value: u32) {
+    //         // your code here
+    //     }
+    // }
+
+    #[repr(C)]
+    #[allow(non_snake_case)]
+    #[rustfmt::skip]
+    pub struct RCC {
+        pub CR:         VolatileCell<u32>,      // < RCC clock control register,                                    Address offset: 0x00 
+        pub PLLCFGR:    VolatileCell<u32>,      // < RCC PLL configuration register,                                Address offset: 0x04 
+        pub CFGR:       VolatileCell<u32>,      // < RCC clock configuration register,                              Address offset: 0x08 
+        pub CIR:        VolatileCell<u32>,      // < RCC clock interrupt register,                                  Address offset: 0x0C 
+        pub AHB1RSTR:   VolatileCell<u32>,      // < RCC AHB1 peripheral reset register,                            Address offset: 0x10 
+        pub AHB2RSTR:   VolatileCell<u32>,      // < RCC AHB2 peripheral reset register,                            Address offset: 0x14 
+        pub AHB3RSTR:   VolatileCell<u32>,      // < RCC AHB3 peripheral reset register,                            Address offset: 0x18 
+        pub RESERVED0:  VolatileCell<u32>,      // < Reserved, 0x1C                                                                      
+        pub APB1RSTR:   VolatileCell<u32>,      // < RCC APB1 peripheral reset register,                            Address offset: 0x20 
+        pub APB2RSTR:   VolatileCell<u32>,      // < RCC APB2 peripheral reset register,                            Address offset: 0x24 
+        pub RESERVED1:  [VolatileCell<u32>; 2], // < Reserved, 0x28-0x2C                                                                 
+        pub AHB1ENR:    VolatileCell<u32>,      // < RCC AHB1 peripheral clock register,                            Address offset: 0x30 
+        pub AHB2ENR:    VolatileCell<u32>,      // < RCC AHB2 peripheral clock register,                            Address offset: 0x34 
+        pub AHB3ENR:    VolatileCell<u32>,      // < RCC AHB3 peripheral clock register,                            Address offset: 0x38 
+        pub RESERVED2:  VolatileCell<u32>,      // < Reserved, 0x3C                                                                      
+        pub APB1ENR:    VolatileCell<u32>,      // < RCC APB1 peripheral clock enable register,                     Address offset: 0x40 
+        pub APB2ENR:    VolatileCell<u32>,      // < RCC APB2 peripheral clock enable register,                     Address offset: 0x44 
+        pub RESERVED3:  [VolatileCell<u32>; 2], // < Reserved, 0x48-0x4C                                                                 
+        pub AHB1LPENR:  VolatileCell<u32>,      // < RCC AHB1 peripheral clock enable in low power mode register,   Address offset: 0x50 
+        pub AHB2LPENR:  VolatileCell<u32>,      // < RCC AHB2 peripheral clock enable in low power mode register,   Address offset: 0x54 
+        pub AHB3LPENR:  VolatileCell<u32>,      // < RCC AHB3 peripheral clock enable in low power mode register,   Address offset: 0x58 
+        pub RESERVED4:  VolatileCell<u32>,      // < Reserved, 0x5C                                                                      
+        pub APB1LPENR:  VolatileCell<u32>,      // < RCC APB1 peripheral clock enable in low power mode register,   Address offset: 0x60 
+        pub APB2LPENR:  VolatileCell<u32>,      // < RCC APB2 peripheral clock enable in low power mode register,   Address offset: 0x64 
+        pub RESERVED5:  [VolatileCell<u32>; 2], // < Reserved, 0x68-0x6C                                                                 
+        pub BDCR:       VolatileCell<u32>,      // < RCC Backup domain control register,                            Address offset: 0x70 
+        pub CSR:        VolatileCell<u32>,      // < RCC clock control & status register,                           Address offset: 0x74 
+        pub RESERVED6:  [VolatileCell<u32>; 2], // < Reserved, 0x78-0x7C                                                                 
+        pub SSCGR:      VolatileCell<u32>,      // < RCC spread spectrum clock generation register,                 Address offset: 0x80 
+        pub PLLI2SCFGR: VolatileCell<u32>,      // < RCC PLLI2S configuration register,                             Address offset: 0x84 
+    }
+
+    impl RCC {
+        pub fn get() -> *mut RCC {
+            address::RCC_BASE as *mut RCC
+        }
+    }
+
+    #[repr(C)]
+    #[allow(non_snake_case)]
+    #[rustfmt::skip]
+    pub struct GPIOA {
+        pub MODER:      VolatileCell<u32>,      // < GPIO port mode register,                                       Address offset: 0x00     
+        pub OTYPER:     VolatileCell<u32>,      // < GPIO port output type register,                                Address offset: 0x04     
+        pub OSPEEDR:    VolatileCell<u32>,      // < GPIO port output speed register,                               Address offset: 0x08     
+        pub PUPDR:      VolatileCell<u32>,      // < GPIO port pull-up/pull-down register,                          Address offset: 0x0C     
+        pub IDR:        VolatileCell<u32>,      // < GPIO port input data register,                                 Address offset: 0x10     
+        pub ODR:        VolatileCell<u32>,      // < GPIO port output data register,                                Address offset: 0x14     
+        pub BSRRL:      VolatileCell<u16>,      // < GPIO port bit set/reset low register,                          Address offset: 0x18     
+        pub BSRRH:      VolatileCell<u16>,      // < GPIO port bit set/reset high register,                         Address offset: 0x1A     
+        pub LCKR:       VolatileCell<u32>,      // < GPIO port configuration lock register,                         Address offset: 0x1C     
+        pub AFR:        [VolatileCell<u32>;2],  // < GPIO alternate function registers,                             Address offset: 0x20-0x24
+    }
+
+    impl GPIOA {
+        pub fn get() -> *mut GPIOA {
+            GPIOA_BASE as *mut GPIOA
+        }
+    }
+}
+use stm32f40x::*;
+
+// see the Reference Manual RM0368 (www.st.com/resource/en/reference_manual/dm00096844.pdf)
+// rcc,     chapter 6
+// gpio,    chapter 8
+
+fn wait(i: u32) {
+    for _ in 0..i {
+        cortex_m::asm::nop(); // no operation (cannot be optimized out)
+    }
+}
+
+// simple test of Your `modify`
+//fn test() {
+// let t:VolatileCell<u32> = unsafe {  core::mem::uninitialized() };
+// t.write(0);
+// assert!(t.read() == 0);
+// t.modify(3, 3, 0b10101);
+// //
+// //     10101
+// //    ..0111000
+// //    ---------
+// //    000101000
+// assert!(t.read() == 0b101 << 3);
+// t.modify(4, 3, 0b10001);
+// //    000101000
+// //      111
+// //      001
+// //    000011000
+// assert!(t.read() == 0b011 << 3);
+
+// if << is used, your code will panic in dev (debug), but not in release mode
+// t.modify(32, 3, 1);
+//}
+
+// system startup, can be hidden from the user
+#[entry]
+fn main() -> ! {
+    let rcc = unsafe { &mut *RCC::get() }; // get the reference to RCC in memory
+    let gpioa = unsafe { &mut *GPIOA::get() }; // get the reference to GPIOA in memory
+
+    // test(); // uncomment to run test
+    idle(rcc, gpioa);
+    loop {
+        continue;
+    }
+}
+
+// user application
+fn idle(rcc: &mut RCC, gpioa: &mut GPIOA) {
+    // power on GPIOA
+    let r = rcc.AHB1ENR.read(); // read
+    rcc.AHB1ENR.write(r | 1 << (0)); // set enable
+
+    // configure PA5 as output
+    let r = gpioa.MODER.read() & !(0b11 << (5 * 2)); // read and mask
+    gpioa.MODER.write(r | 0b01 << (5 * 2)); // set output mode
+
+    loop {
+        // set PA5 high
+        gpioa.BSRRH.write(1 << 5); // set bit, output hight (turn on led)
+                                   // alternatively to set the bit high we can
+                                   // read the value, or with PA5 (bit 5) and write back
+                                   // gpioa.ODR.write(gpioa.ODR.read() | (1 << 5));
+
+        wait(10_000);
+
+        // set PA5 low
+        gpioa.BSRRL.write(1 << 5); // clear bit, output low (turn off led)
+                                   // alternatively to clear the bit we can
+                                   // read the value, mask out PA5 (bit 5) and write back
+                                   // gpioa.ODR.write(gpioa.ODR.read() & !(1 << 5));
+        wait(10_000);
+    }
+}
+
+// 0. Build and run the application.
+//
+//    > cargo build --example bare5
+//    (or use the vscode)
+//
+// 1. C like API.
+//    Using C the .h files are used for defining interfaces, like function signatures (prototypes),
+//    structs and macros (but usually not the functions themselves).
+//
+//    Here is a peripheral abstraction quite similar to what you would find in the .h files
+//    provided by ST (and other companies). Actually, the file presented here is mostly a
+//    cut/paste/replace of the stm32f40x.h, just Rustified.
+//
+//    In this case we pass mutable pointers of the peripherals to the `idle` function.
+//
+//    In the loop we access PA5 through bit set/clear operations.
+//    Comment out those operations and uncomment the the ODR accesses.
+//    (They should have the same behavior, but is a bit less efficient.)
+//
+//    Run and see that the program behaves the same.
+//
+//    Commit your answers (bare5_1)
+//
+// 2. Extend the read/write API with a `modify` for u32, taking the
+//    - address (&mut u32),
+//    - field offset (in bits, u8),
+//    - field width (in bits, u8),
+//    - and value (u32).
+//
+//    Implement and check that running `test` gives you expected behavior.
+//
+//    Change the code into using your new API.
+//
+//    Run and see that the program behaves the same.
+//
+//    Commit your answers (bare5_2)
+//
+//    Discussion:
+//    As with arithmetic operations, default semantics differ in between
+//    debug/dev and release builds. E.g., debug << rhs is checked, rhs must be less
+//    than 32 (for 32 bit datatypes).
+//
+//    Notice, over-shifting (where bits are spilled) is always considered legal,
+//    its just the shift amount that is checked.
+//    There are explicit unchecked versions available if so wanted.