diff --git a/.vscode/launch.json b/.vscode/launch.json
index d78df7ad333dbd853e08541c68836227eaaf0831..3fcc60a51b3e484b81e25d720d43f125ab2da682 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -19,7 +19,7 @@
             "runToMain": true,
             "svdFile": "${workspaceRoot}/.vscode/STM32F401.svd",
             "configFiles": [
-                "interface/stlink-v2-1.cfg",
+                "interface/stlink.cfg",
                 "target/stm32f4x.cfg"
             ],
             "preRestartCommands": [
@@ -44,6 +44,41 @@
             "executable": "./target/thumbv7em-none-eabihf/debug/examples/${fileBasenameNoExtension}",
             "cpu": "cortex-m4",
         },
+        {
+            "type": "cortex-debug",
+            "request": "launch",
+            "name": "Cortex Debug 48Mhz",
+            "servertype": "openocd",
+            "cwd": "${workspaceRoot}",
+            "preLaunchTask": "cargo build --example",
+            "runToMain": true,
+            "svdFile": "${workspaceRoot}/.vscode/STM32F401.svd",
+            "configFiles": [
+                "interface/stlink.cfg",
+                "target/stm32f4x.cfg"
+            ],
+            "preRestartCommands": [
+                "load",
+            ],
+            "postLaunchCommands": [
+                "monitor arm semihosting enable"
+            ],
+            "swoConfig": {
+                "enabled": true,
+                "cpuFrequency": 48000000,
+                "swoFrequency": 2000000,
+                "source": "probe",
+                "decoders": [
+                    {
+                        "type": "console",
+                        "label": "ITM",
+                        "port": 0
+                    }
+                ]
+            },
+            "executable": "./target/thumbv7em-none-eabihf/debug/examples/${fileBasenameNoExtension}",
+            "cpu": "cortex-m4",
+        },
         {
             "type": "cortex-debug",
             "request": "launch",
@@ -54,7 +89,7 @@
             "runToMain": true,
             "svdFile": "${workspaceRoot}/.vscode/STM32F401.svd",
             "configFiles": [
-                "interface/stlink-v2-1.cfg",
+                "interface/stlink.cfg",
                 "target/stm32f4x.cfg"
             ],
             "preRestartCommands": [
@@ -78,6 +113,41 @@
             },
             "executable": "./target/thumbv7em-none-eabihf/release/examples/${fileBasenameNoExtension}",
             "cpu": "cortex-m4",
+        },
+        {
+            "type": "cortex-debug",
+            "request": "launch",
+            "name": "Cortex Release 48Mhz",
+            "servertype": "openocd",
+            "cwd": "${workspaceRoot}",
+            "preLaunchTask": "cargo build --example --release",
+            "runToMain": true,
+            "svdFile": "${workspaceRoot}/.vscode/STM32F401.svd",
+            "configFiles": [
+                "interface/stlink.cfg",
+                "target/stm32f4x.cfg"
+            ],
+            "preRestartCommands": [
+                "load",
+            ],
+            "postLaunchCommands": [
+                "monitor arm semihosting enable"
+            ],
+            "swoConfig": {
+                "enabled": true,
+                "cpuFrequency": 48000000,
+                "swoFrequency": 2000000,
+                "source": "probe",
+                "decoders": [
+                    {
+                        "type": "console",
+                        "label": "ITM",
+                        "port": 0
+                    }
+                ]
+            },
+            "executable": "./target/thumbv7em-none-eabihf/release/examples/${fileBasenameNoExtension}",
+            "cpu": "cortex-m4",
         }
     ]
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc5b8f8effec693a24d6ff0acb3d17e973b10cd0..39a885d619a8239d192d88b39ca330ee76d8001a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## 2021-03-18
+
+- examples/usb-mouse.rs, a very small example using external hid library.
+  
 ## 2021-03-07
 
 - examples/rtic_bare7.rs, using embedded HAL.
diff --git a/Cargo.toml b/Cargo.toml
index 0029c8b9e10e46f747ed8d913deffd491e8fa30e..76ff65fca1fc0e15563c1daf41b133918e8e5cf7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ cortex-m = { version = "0.7.1", features = ["linker-plugin-lto"] }
 cortex-m-rt = "0.6.13"
 cortex-m-semihosting = "0.3.7"
 cortex-m-rtic = "0.5.5"
+# embedded-hal = { version = "0.2.4", features = ["unproven"] }
 embedded-hal = "0.2.4"
 usb-device = "0.2.7"
 
@@ -17,7 +18,7 @@ usb-device = "0.2.7"
 panic-halt = "0.2.0"
 
 # Uncomment for the itm panic examples.
-#panic-itm = "0.4.2"
+panic-itm = "0.4.2"
 
 # Uncomment for the rtt-timing examples.
 panic-rtt-target = { version = "0.1.1", features = ["cortex-m"] }
@@ -27,14 +28,13 @@ panic-semihosting = "0.5.6"
 
 # Tracing
 rtt-target = { version = "0.3.0", features = ["cortex-m"] }
+nb = "1.0.0"
+usbd-hid = "0.5.0"
 
 [dependencies.stm32f4]
 version = "0.13.0"
 features = ["stm32f411", "rt"]
 
-# Uncomment for the allocator example.
-# alloc-cortex-m = "0.4.0"
-
 
 [dependencies.stm32f4xx-hal]
 version = "0.8.3"
@@ -45,28 +45,18 @@ features = ["rt", "stm32f411", "usb_fs"]
 # path = "../stm32f4xx-hal"
 
 # this lets you use `cargo fix`!
-[[bin]]
-name = "app"
-test = false
-bench = false
+# [[bin]]
+# name = "app"
+# # test = false
+# bench = false
 
 [profile.dev]
 incremental = false
 codegen-units = 1
+overflow-checks = false  
 
 [profile.release]
 codegen-units = 1 # better optimizations
 debug = true      # symbols are nice and they don't increase the size on Flash
 lto = true        # better optimizations
 
-
-
-# [features]
-# nightly = ["cortex-m/inline-asm"]
-
-# # this lets you use `cargo fix`!
-# [[bin]]
-# name = "app"
-# test = false
-# bench = false
-
diff --git a/examples/itm_rtic_hello_48Mhz.rs b/examples/itm_rtic_hello_48Mhz.rs
new file mode 100644
index 0000000000000000000000000000000000000000..de89e49425061b8af8e75ee9c47f3c7b3946905b
--- /dev/null
+++ b/examples/itm_rtic_hello_48Mhz.rs
@@ -0,0 +1,27 @@
+// itm_rtic_hello_48Mhz
+//
+// Use the vscode 48Mhz launch profiles
+
+#![no_main]
+#![no_std]
+
+use cortex_m::iprintln;
+use panic_halt as _;
+
+use stm32f4xx_hal::prelude::*;
+
+#[rtic::app(device = stm32f4xx_hal::stm32, peripherals = true)]
+const APP: () = {
+    #[init]
+    fn init(ctx: init::Context) {
+        // Set up the system clock.
+        let rcc = ctx.device.RCC.constrain();
+        let _clocks = rcc.cfgr.sysclk(48.mhz()).require_pll48clk().freeze();
+
+        let mut p = ctx.core;
+        let stim = &mut p.ITM.stim[0];
+        for a in 0..=10 {
+            iprintln!(stim, "RTIC Hello, world!! {}", a);
+        }
+    }
+};
diff --git a/examples/rtt-pmw3389-sine.rs b/examples/rtt-pmw3389-sine.rs
new file mode 100644
index 0000000000000000000000000000000000000000..0aae89b68384e31fdc0a905ee0a5d7cd4b252739
--- /dev/null
+++ b/examples/rtt-pmw3389-sine.rs
@@ -0,0 +1,187 @@
+//! examples/rtt-pwm-sine.rs
+//! cargo run --examples rtt-pwm-sine --release
+
+// #![deny(unsafe_code)]
+// #![deny(warnings)]
+#![no_main]
+#![no_std]
+
+// use core::f32::consts::PI;
+use cortex_m::{asm, peripheral::DWT};
+// use panic_halt as _;
+use panic_rtt_target as _;
+use rtic::cyccnt::{Instant, U32Ext as _};
+use rtt_target::{rprint, rprintln, rtt_init_print};
+
+use core::f32::consts::PI;
+use micromath::F32Ext;
+
+use stm32f4xx_hal::{bb, gpio::Speed, prelude::*, pwm, stm32};
+
+include!(concat!(env!("OUT_DIR"), "/sin_abs_const.rs"));
+
+#[rtic::app(device = stm32f4xx_hal::stm32,  monotonic = rtic::cyccnt::CYCCNT, peripherals = true)]
+const APP: () = {
+    struct Resources {
+        // late resources
+        TIM1: stm32::TIM1,
+    }
+    #[init(schedule = [pwm_out])]
+    fn init(mut cx: init::Context) -> init::LateResources {
+        rtt_init_print!();
+        rprintln!("init");
+        let dp = cx.device;
+
+        // Initialize (enable) the monotonic timer (CYCCNT)
+        cx.core.DCB.enable_trace();
+        cx.core.DWT.enable_cycle_counter();
+
+        let rcc = dp.RCC.constrain();
+        // Set up the system clock. 48 MHz?
+        let clocks = rcc
+            .cfgr
+            // .use_hse(8.mhz())
+            // .sysclk(48.mhz())
+            .sysclk(96.mhz())
+            .pclk1(24.mhz())
+            .freeze();
+
+        let gpioa = dp.GPIOA.split();
+        // we set the pins to VeryHigh to get the sharpest waveform possible
+        // (rise and fall times should have similar characteristics)
+        let _channels = (
+            gpioa.pa8.into_alternate_af1().set_speed(Speed::VeryHigh),
+            gpioa.pa9.into_alternate_af1().set_speed(Speed::VeryHigh),
+        );
+
+        // Setup PWM RAW
+        let tim1 = dp.TIM1;
+        // Here we need unsafe as we are "stealing" the RCC peripheral
+        // At this point it has been contrained into SysConf and used to set clocks
+        let rcc = unsafe { &(*stm32::RCC::ptr()) };
+
+        rcc.apb2enr.modify(|_, w| w.tim1en().set_bit());
+        rcc.apb2rstr.modify(|_, w| w.tim1rst().set_bit());
+        rcc.apb2rstr.modify(|_, w| w.tim1rst().clear_bit());
+
+        // Setup chanel 1 and 2 as pwm_mode1
+        tim1.ccmr1_output()
+            .modify(|_, w| w.oc1pe().set_bit().oc1m().pwm_mode1());
+
+        tim1.ccmr1_output()
+            .modify(|_, w| w.oc2pe().set_bit().oc2m().pwm_mode1());
+
+        // The reference manual is a bit ambiguous about when enabling this bit is really
+        // necessary, but since we MUST enable the pre-load for the output channels then we
+        // might as well enable for the auto-reload too
+        tim1.cr1.modify(|_, w| w.arpe().set_bit());
+
+        let clk = clocks.pclk2().0 * if clocks.ppre2() == 1 { 1 } else { 2 };
+        // check that its actually 48_000_000
+        rprintln!("clk {}", clk);
+
+        // we want maximum performance, thus we set the prescaler to 0
+        let pre = 0;
+        rprintln!("pre {}", pre);
+        tim1.psc.write(|w| w.psc().bits(pre));
+
+        // we want 8 bits of resolution
+        // so our ARR = 2^8 - 1 = 256 - 1 = 255
+        let arr = 255;
+        rprintln!("arr {}", arr);
+        tim1.arr.write(|w| unsafe { w.bits(arr) });
+
+        //  Trigger update event to load the registers
+        tim1.cr1.modify(|_, w| w.urs().set_bit());
+        tim1.egr.write(|w| w.ug().set_bit());
+        tim1.cr1.modify(|_, w| w.urs().clear_bit());
+
+        // Set main output enable of all Output Compare (OC) registers
+        tim1.bdtr.modify(|_, w| w.moe().set_bit());
+
+        // Set output enable for channels 1 and 2
+        tim1.ccer.write(|w| w.cc1e().set_bit().cc2e().set_bit());
+
+        // Setup the timer
+        tim1.cr1.write(|w| {
+            w.cms()
+                .bits(0b00) // edge aligned mode
+                .dir() // counter used as up-counter
+                .clear_bit()
+                .opm() // one pulse mode
+                .clear_bit()
+                .cen() // enable counter
+                .set_bit()
+        });
+
+        // Set main output enable of all Output Compare (OC) registers
+        tim1.bdtr.modify(|_, w| w.moe().set_bit());
+
+        // Set duty cycle of Channels
+        tim1.ccr1.write(|w| unsafe { w.ccr().bits(128) });
+        tim1.ccr2.write(|w| unsafe { w.ccr().bits(128) });
+
+        // Set preload for the CCx
+        tim1.cr2.write(|w| w.ccpc().set_bit());
+
+        // Enable update events
+        tim1.dier.write(|w| w.uie().enabled());
+        tim1.sr.modify(|_, w| w.uif().clear());
+
+        // Set divider to 4, (48_000_000/256)/4
+        // tim1.rcr.modify(|_, w| unsafe { w.rep().bits(4) });
+
+        while tim1.sr.read().uif().is_clear() {
+            rprint!("-");
+        }
+        rprintln!("here");
+        tim1.sr.modify(|_, w| w.uif().clear());
+
+        // pass on late resources
+        cx.schedule.pwm_out(cx.start + PERIOD.cycles()).ok();
+        init::LateResources { TIM1: tim1 }
+    }
+
+    #[idle]
+    fn idle(_cx: idle::Context) -> ! {
+        rprintln!("idle");
+        // panic!("panic");
+        loop {
+            continue;
+        }
+    }
+
+    #[task(resources = [TIM1], schedule = [pwm_out])]
+    fn pwm_out(cx: pwm_out::Context) {
+        static mut INDEX: u16 = 0;
+        static mut LEFT: u16 = 0;
+        static mut RIGHT: u16 = 0;
+        static mut FLOAT: u16 = 0;
+
+        let tim1 = cx.resources.TIM1;
+
+        tim1.ccr1.write(|w| unsafe { w.ccr().bits(*LEFT) });
+        // tim1.ccr2.write(|w| unsafe { w.ccr().bits(*RIGHT) });
+        tim1.ccr2.write(|w| unsafe { w.ccr().bits(*FLOAT) });
+
+        *INDEX = (*INDEX).wrapping_add(10_000);
+        let f: f32 = (*INDEX as f32 * 2.0 * PI) / 65536.0;
+        *FLOAT = (128.0 + f.sin() * 128.0) as u16;
+        cx.schedule.pwm_out(cx.scheduled + PERIOD.cycles()).ok();
+
+        *LEFT = SINE_BUF[*INDEX as usize] as u16;
+        *RIGHT = SINE_BUF[*INDEX as usize] as u16;
+
+        if cx.scheduled.elapsed() > 500.cycles() {
+            panic!("task overrun");
+        }
+    }
+
+    extern "C" {
+        fn EXTI0();
+    }
+};
+
+// We aim for a sampling rate of 48kHz, assuming that the input filter of the
+// sound card used to sample the generated signal has an appropriate input filter
+const PERIOD: u32 = 2000; // 96_000_000 / 48_000;
diff --git a/examples/usb-mouse.rs b/examples/usb-mouse.rs
new file mode 100644
index 0000000000000000000000000000000000000000..8ce314810a04e6974f22238d765dc22fd15493d1
--- /dev/null
+++ b/examples/usb-mouse.rs
@@ -0,0 +1,110 @@
+// > cargo run usb-mouse
+// or
+// > cargo run usb-mouse --release
+
+#![no_main]
+#![no_std]
+
+use stm32f4xx_hal::{
+    gpio::{gpioc::PC13, Input, PullUp},
+    otg_fs::{UsbBus, UsbBusType, USB},
+    prelude::*,
+};
+use usb_device::{bus::UsbBusAllocator, prelude::*};
+use usbd_hid::{
+    descriptor::{generator_prelude::*, MouseReport},
+    hid_class::HIDClass,
+};
+
+use panic_rtt_target as _;
+use rtt_target::{rprintln, rtt_init_print};
+
+#[rtic::app(device = stm32f4xx_hal::stm32, peripherals = true)]
+const APP: () = {
+    struct Resources {
+        btn: PC13<Input<PullUp>>,
+        hid: HIDClass<'static, UsbBusType>,
+        usb_dev: UsbDevice<'static, UsbBus<USB>>,
+    }
+
+    #[init]
+    fn init(ctx: init::Context) -> init::LateResources {
+        static mut EP_MEMORY: [u32; 1024] = [0; 1024];
+        static mut USB_BUS: Option<UsbBusAllocator<UsbBusType>> = None;
+
+        rtt_init_print!();
+        rprintln!("init");
+
+        // Set up the system clock.
+        let rcc = ctx.device.RCC.constrain();
+        let _clocks = rcc.cfgr.sysclk(48.mhz()).require_pll48clk().freeze();
+
+        let gpioc = ctx.device.GPIOC.split();
+        let btn = gpioc.pc13.into_pull_up_input();
+
+        let gpioa = ctx.device.GPIOA.split();
+        let usb = USB {
+            usb_global: ctx.device.OTG_FS_GLOBAL,
+            usb_device: ctx.device.OTG_FS_DEVICE,
+            usb_pwrclk: ctx.device.OTG_FS_PWRCLK,
+            pin_dm: gpioa.pa11.into_alternate_af10(),
+            pin_dp: gpioa.pa12.into_alternate_af10(),
+        };
+        USB_BUS.replace(UsbBus::new(usb, EP_MEMORY));
+
+        let hid = HIDClass::new(USB_BUS.as_ref().unwrap(), MouseReport::desc(), 1);
+        let usb_dev = UsbDeviceBuilder::new(USB_BUS.as_ref().unwrap(), UsbVidPid(0xc410, 0x0000))
+            .manufacturer("E70011E")
+            .product("Mouse")
+            .serial_number("1.0")
+            .device_class(0)
+            .build();
+
+        init::LateResources { btn, hid, usb_dev }
+    }
+
+    #[task(binds=OTG_FS, resources = [btn, hid, usb_dev])]
+    fn on_usb(ctx: on_usb::Context) {
+        static mut COUNTER: u16 = 0;
+
+        // destruct the context
+        let (btn, usb_dev, hid) = (ctx.resources.btn, ctx.resources.usb_dev, ctx.resources.hid);
+
+        let report = MouseReport {
+            x: match *COUNTER {
+                // reached after 100ms
+                100 => {
+                    rprintln!("10");
+                    10
+                }
+                // reached after 199ms
+                199 => {
+                    rprintln!("-10");
+                    -10
+                }
+                _ => 0,
+            },
+            y: 0,
+            buttons: btn.is_low().unwrap().into(), // (into takes a bool into an integer)
+            wheel: 0,
+        };
+        // wraps around after 200ms
+        *COUNTER = (*COUNTER + 1) % 200;
+
+        // push the report
+        hid.push_input(&report).ok();
+
+        // update the usb device state
+        if usb_dev.poll(&mut [hid]) {
+            return;
+        }
+    }
+
+    #[idle]
+    fn idle(_cx: idle::Context) -> ! {
+        rprintln!("idle");
+        loop {
+            continue;
+        }
+    }
+};