diff --git a/.vscode/launch.json b/.vscode/launch.json
index 59ebbbd4ad003064d0d1b9ffb69ce4098e76ab45..d91619dd3e9a36730443b4d4c5beaf17dd4db6d4 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -69,5 +69,18 @@
             ],
             "cwd": "${workspaceRoot}"
         },
+        {
+            "type": "gdb",
+            "request": "attach",
+            "name": "serial-dma-tx",
+            "gdbpath": "/usr/bin/arm-none-eabi-gdb",
+            "executable": "./target/thumbv7em-none-eabihf/debug/examples/serial-dma-tx",
+            "target": ":3333",
+            "remote": true,
+            "autorun": [
+                "load"
+            ],
+            "cwd": "${workspaceRoot}"
+        },
     ]
 }
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index e817c9d6797852859bd32b751adeb9ce43456b8a..f02a11947b9912c9fd39d39a06a30f1fc92d011d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,6 +23,11 @@ branch = "svd2rust0.12"
 default-features = false
 version = "0.2.2"
 
+[dependencies.cortex-m-debug]
+git = "https://gitlab.henriktjader.com/pln/cortex-m-debug.git"
+version = "0.1.2"
+branch = "cortex-m-4"
+
 [dev-dependencies.stm32f413]
 features = ["rt"]
 version = "0.2.0"
diff --git a/src/dma.rs b/src/dma.rs
index fdec5c916e4893d6824a116e9f70aeb373e8806c..1fda842245e680031c152fe8b9569cc2b18f1bb0 100644
--- a/src/dma.rs
+++ b/src/dma.rs
@@ -215,11 +215,53 @@ pub mod dma1 {
         }
     }
 
+    use cortex_m::asm;
+
     fn start_transfer_s6_c4(ndtr: u16, par: u32, m0: u32) {
         let dma = unsafe { &*DMA1::ptr() };
+        asm::bkpt();
+        ipln!("ndtr {:04x}", ndtr);
+        ipln!("par  {:08x}", par);
+        ipln!("m0   {:08x}", m0);
+        // nr data transfers
         dma.s6ndtr.write(|w| unsafe { w.ndt().bits(ndtr) });
+        // peripheral address
         dma.s6par.write(|w| unsafe { w.bits(par) });
+        // memory address 0
         dma.s6m0ar.write(|w| unsafe { w.bits(m0) });
+        // TX DMA transfer
+        // chsel: Channel 4 (RM0368 9.3.3 Table 27)
+        // pl: Medium priority
+        // msize: Memory size = 8 bits
+        // psize: Peripheral size = 8 bits
+        // minc: Memory increment mode enabled
+        // pinc: Peripheral increment mode disabled
+        // circ: Circular mode disabled
+        // dir: Transfer from memory to peripheral
+        // tcie: Transfer complete interrupt enabled
+        // en: Disabled
+        dma.s6cr.write(|w| unsafe {
+            w.chsel()
+                .bits(4) // channel 4
+                .pl()
+                .bits(0b01) // medium priority
+                .msize()
+                .bits(0b00) // memory 8 bits
+                .psize()
+                .bits(0b00) // peripheral 8 bits
+                .minc()  // memory increment
+                .set_bit()
+                .circ() 
+                .clear_bit() // not circular
+                .pinc()
+                .clear_bit() // no peripheral increment
+                .dir()
+                .bits(1) // memory -> peripheral
+                .tcie()
+                .clear_bit() // no interrupt
+                .en()
+                .clear_bit() // setup
+        });
         dma.s6cr.modify(|_, w| w.en().set_bit());
     }
 
@@ -228,6 +270,8 @@ pub mod dma1 {
 
         fn split(self, ahb1: &mut AHB1) -> Streams {
             //ahb.ahb1enr().modify(|_, w| w.dma1en().set_bit());
+            // power and clk to dma1
+            ipln!("dma1en");
             ahb1.enr().modify(|_, w| w.dma1en().set_bit());
 
             // // reset the DMA control registers (stops all on-going transfers)
@@ -257,8 +301,13 @@ pub mod dma1 {
             // from or writing to a reserved address space". I think it's impossible
             // to get to that state with our type safe API and *safe* Rust.
             let dma = unsafe { &*DMA1::ptr() };
+            let mut nr = 0;
+            asm::bkpt();
 
-            while dma.hisr.read().tcif6().bit_is_clear() {}
+            while dma.hisr.read().tcif6().bit_is_clear() {
+                nr += 1;
+            }
+            asm::bkpt();
             dma.hifcr.write(|w| w.ctcif6().set_bit());
             dma.s2cr.modify(|_, w| w.en().clear_bit());
 
diff --git a/src/lib.rs b/src/lib.rs
index 52507e7c9c1fa8185a89eecf8158230f3751c6b2..8548e4fb98ad1590d2149a0a57825f523ff3d318 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -29,6 +29,10 @@ extern crate embedded_hal as hal;
 extern crate nb;
 pub extern crate stm32f413 as stm32f4x;
 
+// Convenient tracing over semihosting and ITM
+#[macro_use]
+extern crate cortex_m_debug;
+
 pub mod delay;
 pub mod flash;
 pub mod gpio;
diff --git a/src/serial.rs b/src/serial.rs
index 5fa5335115348d03fed4bdaa37c285322611a47e..08b14105b652d213318476bbe26f37b4535ab565 100644
--- a/src/serial.rs
+++ b/src/serial.rs
@@ -254,21 +254,14 @@ macro_rules! hal {
                 {
                     {
                         let buf :&[u8] = buffer.borrow();
+
+                        // ntdr, par, m0
                         tx_stream.start_transfer(
                             u16(buf.len()).unwrap(),
                             unsafe { &(*$USARTX::ptr()).dr as *const _ as usize as u32 },
                             buf.as_ptr() as u32
                         );
 
-                        // stream.ndtr()
-                        //     .write(|w| unsafe { w.ndt().bits(u16(buffer1.len()).unwrap()) });
-                        // stream.par()
-                        //     .write(|w| unsafe { w.bits(&(*$USARTX::ptr()).dr as *const _ as usize as u32) });
-                        // stream.m0ar()
-                        //     .write(|w| unsafe { w.bits(buffer1.as_ptr() as u32) });
-                    // dma1.s6cr.modify(|_, w| w.en().set_bit());
-
-
 
                     //let buffer: &[u8] = buffer.borrow();
                     // chan.cmar().write(|w| unsafe {