diff --git a/examples/serial-dma-tx.rs b/examples/serial-dma-tx.rs
new file mode 100644
index 0000000000000000000000000000000000000000..60fdba17dead691f0ae16895526e0acc72f3fda4
--- /dev/null
+++ b/examples/serial-dma-tx.rs
@@ -0,0 +1,43 @@
+//! Serial interface echo server
+//!
+//! In this example every received byte will be sent back to the sender. You can test this example
+//! with serial terminal emulator like `minicom`.
+#![deny(unsafe_code)]
+//#![deny(warnings)]
+#![no_std]
+
+extern crate stm32f4x_hal as f4;
+
+use f4::prelude::*;
+use f4::serial::Serial;
+use f4::stm32f4x;
+
+fn main() {
+    let p = stm32f4x::Peripherals::take().unwrap();
+
+    let mut flash = p.FLASH.constrain();
+    let mut rcc = p.RCC.constrain();
+    let mut gpioa = p.GPIOA.split(&mut rcc.ahb1);
+    let streams = p.DMA1.split(&mut rcc.ahb1);
+    let tx_stream = streams.1.into_channel4(); // actually 5
+
+    let clocks = rcc.cfgr.freeze(&mut flash.acr);
+
+    let tx = gpioa.pa2.into_af7(&mut gpioa.moder, &mut gpioa.afrl);
+    let rx = gpioa.pa3.into_af7(&mut gpioa.moder, &mut gpioa.afrl);
+
+    let serial = Serial::usart2(p.USART2, (tx, rx), 115_200.bps(), clocks, &mut rcc.apb1);
+    let (mut tx, mut rx) = serial.split();
+
+    let (_, c, tx) = tx.write_all(tx_stream, b"The quick brown fox").wait();
+
+    // asm::bkpt();
+
+    let (_, c, tx) = tx.write_all(c, b" jumps").wait();
+
+    // asm::bkpt();
+
+    tx.write_all(c, b" over the lazy dog.").wait();
+
+    // asm::bkpt();
+}
diff --git a/src/dma.rs b/src/dma.rs
new file mode 100644
index 0000000000000000000000000000000000000000..6b9e5e4b6d9541197b18bec0af2e049cd64cbbae
--- /dev/null
+++ b/src/dma.rs
@@ -0,0 +1,505 @@
+#![allow(dead_code)]
+#![allow(unused_code)]
+#![allow(missing_docs)]
+
+use core::marker::PhantomData;
+use core::ops;
+
+use rcc::AHB1;
+use stm32f4x::USART2;
+
+#[derive(Debug)]
+pub enum Error {
+    Overrun,
+    #[doc(hidden)] _Extensible,
+}
+
+pub enum Event {
+    HalfTransfer,
+    TransferComplete,
+}
+
+#[derive(Clone, Copy, PartialEq)]
+pub enum Half {
+    First,
+    Second,
+}
+
+pub struct CircBuffer<BUFFER, STREAM>
+where
+    BUFFER: 'static,
+{
+    buffer: &'static mut [BUFFER; 2],
+    channel: STREAM,
+    readable_half: Half,
+}
+
+impl<BUFFER, STREAM> CircBuffer<BUFFER, STREAM> {
+    pub(crate) fn new(buf: &'static mut [BUFFER; 2], chan: STREAM) -> Self {
+        CircBuffer {
+            buffer: buf,
+            channel: chan,
+            readable_half: Half::Second,
+        }
+    }
+}
+
+pub trait Static<B> {
+    fn borrow(&self) -> &B;
+}
+
+impl<B> Static<B> for &'static B {
+    fn borrow(&self) -> &B {
+        *self
+    }
+}
+
+impl<B> Static<B> for &'static mut B {
+    fn borrow(&self) -> &B {
+        *self
+    }
+}
+
+pub trait DmaExt {
+    type Streams;
+
+    fn split(self, ahb: &mut AHB1) -> Self::Streams;
+}
+
+pub struct Transfer<MODE, BUFFER, STREAM, PAYLOAD> {
+    _mode: PhantomData<MODE>,
+    buffer: BUFFER,
+    stream: STREAM,
+    payload: PAYLOAD,
+}
+
+impl<BUFFER, STREAM, PAYLOAD> Transfer<R, BUFFER, STREAM, PAYLOAD> {
+    pub(crate) fn r(buffer: BUFFER, stream: STREAM, payload: PAYLOAD) -> Self {
+        Transfer {
+            _mode: PhantomData,
+            buffer,
+            stream,
+            payload,
+        }
+    }
+}
+
+impl<BUFFER, STREAM, PAYLOAD> Transfer<W, BUFFER, STREAM, PAYLOAD> {
+    pub(crate) fn w(buffer: BUFFER, stream: STREAM, payload: PAYLOAD) -> Self {
+        Transfer {
+            _mode: PhantomData,
+            buffer,
+            stream,
+            payload,
+        }
+    }
+}
+
+impl<BUFFER, STREAM, PAYLOAD> ops::Deref for Transfer<R, BUFFER, STREAM, PAYLOAD> {
+    type Target = BUFFER;
+
+    fn deref(&self) -> &BUFFER {
+        &self.buffer
+    }
+}
+
+/// Read transfer
+pub struct R;
+
+/// Write transfer
+pub struct W;
+
+/// Output mode (type state)
+pub struct Output<MODE> {
+    _mode: PhantomData<MODE>,
+}
+
+pub mod dma1 {
+    use core::marker::PhantomData;
+    use stm32f4x::{DMA1, dma2};
+    use stm32f4x::USART2;
+    use dma::{CircBuffer, DmaExt, Error, Event, Half, Transfer};
+    use rcc::AHB1;
+
+    // Stream === Pin
+    pub struct S0<CHANNEL> {
+        _channel: PhantomData<CHANNEL>,
+    }
+    // pub struct S1<CHANNEL> {}
+    // pub struct S2<CHANNEL> {}
+    // pub struct S3<CHANNEL> {}
+    pub struct S4<CHANNEL> {
+        _channel: PhantomData<CHANNEL>,
+    }
+    pub struct S5<CHANNEL> {
+        _channel: PhantomData<CHANNEL>,
+    }
+    pub struct S6<CHANNEL> {
+        _channel: PhantomData<CHANNEL>,
+    }
+    //pub struct S7<CHANNEL> {}
+
+    // Channels === Alternate function
+    pub struct C0;
+    pub struct C1;
+    pub struct C2;
+    pub struct C3;
+    pub struct C4;
+    pub struct C5;
+    pub struct C6;
+    pub struct C7;
+    pub struct C8;
+    pub struct C9;
+
+    // into_channel
+    impl<CHANNEL> S5<CHANNEL> {
+        pub fn into_channel4(self) -> S5<C4> {
+            S5 {
+                _channel: PhantomData,
+            }
+        }
+    }
+
+    // === Usart1TxPin
+    pub unsafe trait Usart1TxStream<USART> {}
+
+    unsafe impl Usart1TxStream<USART2> for S5<C4> {}
+
+    pub struct Streams(pub S4<C0>, pub S5<C0>);
+
+    // impl  {
+    //                         pub fn listen(&mut self, event: Event) {
+    //                             match event {
+    //                                 Event::HalfTransfer => self.ccr().modify(|_, w| w.htie().set_bit()),
+    //                                 Event::TransferComplete => {
+    //                                     self.ccr().modify(|_, w| w.tcie().set_bit())
+    //                                 }
+    //                             }
+    //                         }
+
+    impl DmaExt for DMA1 {
+        type Streams = Streams;
+
+        fn split(self, ahb: &mut AHB1) -> Streams {
+            // ahb.enr().modify(|_, w| w.$dmaXen().enabled());
+
+            // // reset the DMA control registers (stops all on-going transfers)
+            // $(
+            //     self.$ccrX.reset();
+            // )+
+
+            // Channels((), $($CX { _0: () }),+)
+            Streams(
+                S4 {
+                    _channel: PhantomData,
+                },
+                S5 {
+                    _channel: PhantomData,
+                },
+            )
+        }
+    }
+
+    impl<BUFFER, PAYLOAD, MODE> Transfer<MODE, BUFFER, S5<C4>, PAYLOAD> {
+        pub fn wait(mut self) -> (BUFFER, S5<C4>, PAYLOAD) {
+            // // XXX should we check for transfer errors here?
+            // // The manual says "A DMA transfer error can be generated by reading
+            // // from or writing to a reserved address space". I think it's impossible
+            // // to get to that state with our type safe API and *safe* Rust.
+            // while self.channel.isr().$tcifX().bit_is_clear() {}
+
+            // self.channel.ifcr().write(|w| w.$cgifX().set_bit());
+
+            // self.channel.ccr().modify(|_, w| w.en().clear_bit());
+
+            // // TODO can we weaken this compiler barrier?
+            // // NOTE(compiler_fence) operations on `buffer` should not be reordered
+            // // before the previous statement, which marks the DMA transfer as done
+            // atomic::compiler_fence(Ordering::SeqCst);
+
+            (self.buffer, self.stream, self.payload)
+        }
+    }
+}
+// impl Usart1TxStrAeam for S7<C4> {}
+
+// impl Serial {
+//     pub fn write_all<S>(self, stream: S, buffer: B) where S: Usart1TxStream {}
+// }
+
+// macro_rules! dma {
+//     ($($DMAX:ident: ($dmaX:ident, $dmaXen:ident, $dmaXrst:ident, {
+//         $($CX:ident: (
+//             $ccrX:ident,
+//             $CCRX:ident,
+//             $cndtrX:ident,
+//             $CNDTRX:ident,
+//             $cparX:ident,
+//             $CPARX:ident,
+//             $cmarX:ident,
+//             $CMARX:ident,
+//             $htifX:ident,
+//             $tcifX:ident,
+//             $chtifX:ident,
+//             $ctcifX:ident,
+//             $cgifX:ident
+//         ),)+
+//     }),)+) => {
+//         $(
+//             pub mod $dmaX {
+//                 use core::sync::atomic::{self, Ordering};
+
+//                 use stm32f4x::{$DMAX, dma2};
+
+//                 use dma::{CircBuffer, DmaExt, Error, Event, Half, Transfer};
+//                 use rcc::AHB1;
+
+//                 pub struct Channels((), $(pub $CX),+);
+
+//                 $(
+//                     pub struct $CX { _0: () }
+
+//                     impl $CX {
+//                         pub fn listen(&mut self, event: Event) {
+//                             match event {
+//                                 Event::HalfTransfer => self.ccr().modify(|_, w| w.htie().set_bit()),
+//                                 Event::TransferComplete => {
+//                                     self.ccr().modify(|_, w| w.tcie().set_bit())
+//                                 }
+//                             }
+//                         }
+
+//                         pub fn unlisten(&mut self, event: Event) {
+//                             match event {
+//                                 Event::HalfTransfer => {
+//                                     self.ccr().modify(|_, w| w.htie().clear_bit())
+//                                 },
+//                                 Event::TransferComplete => {
+//                                     self.ccr().modify(|_, w| w.tcie().clear_bit())
+//                                 }
+//                             }
+//                         }
+
+//                         // interrupt
+//                         pub(crate) fn isr(&self) -> dma2::sr::R {
+//                             // NOTE(unsafe) atomic read with no side effects
+//                             unsafe { (*$DMAX::ptr()).sr.read() }
+//                         }
+
+//                         pub(crate) fn ifcr(&self) -> &dma2::IFCR {
+//                             unsafe { &(*$DMAX::ptr()).cr }
+//                         }
+
+//                         pub(crate) fn ccr(&mut self) -> &dma2::$CCRX {
+//                             unsafe { &(*$DMAX::ptr()).$ccrX }
+//                         }
+
+//                         pub(crate) fn cndtr(&mut self) -> &dma2::$CNDTRX {
+//                             unsafe { &(*$DMAX::ptr()).$cndtrX }
+//                         }
+
+//                         pub(crate) fn cpar(&mut self) -> &dma2::$CPARX {
+//                             unsafe { &(*$DMAX::ptr()).$cparX }
+//                         }
+
+//                         pub(crate) fn cmar(&mut self) -> &dma2::$CMARX {
+//                             unsafe { &(*$DMAX::ptr()).$cmarX }
+//                         }
+//                     }
+
+//                     impl<B> CircBuffer<B, $CX> {
+//                         /// Peeks into the readable half of the buffer
+//                         pub fn peek<R, F>(&mut self, f: F) -> Result<R, Error>
+//                             where
+//                             F: FnOnce(&B, Half) -> R,
+//                         {
+//                             let half_being_read = self.readable_half()?;
+
+//                             let buf = match half_being_read {
+//                                 Half::First => &self.buffer[0],
+//                                 Half::Second => &self.buffer[1],
+//                             };
+
+//                             // XXX does this need a compiler barrier?
+//                             let ret = f(buf, half_being_read);
+
+//                             let isr = self.channel.isr();
+//                             let first_half_is_done = isr.$htifX().bit_is_set();
+//                             let second_half_is_done = isr.$tcifX().bit_is_set();
+
+//                             if (half_being_read == Half::First && second_half_is_done) ||
+//                                 (half_being_read == Half::Second && first_half_is_done) {
+//                                 Err(Error::Overrun)
+//                             } else {
+//                                 Ok(ret)
+//                             }
+//                         }
+
+//                         /// Returns the `Half` of the buffer that can be read
+//                         pub fn readable_half(&mut self) -> Result<Half, Error> {
+//                             let isr = self.channel.isr();
+//                             let first_half_is_done = isr.$htifX().bit_is_set();
+//                             let second_half_is_done = isr.$tcifX().bit_is_set();
+
+//                             if first_half_is_done && second_half_is_done {
+//                                 return Err(Error::Overrun);
+//                             }
+
+//                             let last_read_half = self.readable_half;
+
+//                             Ok(match last_read_half {
+//                                 Half::First => {
+//                                     if second_half_is_done {
+//                                         self.channel.ifcr().write(|w| w.$ctcifX().set_bit());
+
+//                                         self.readable_half = Half::Second;
+//                                         Half::Second
+//                                     } else {
+//                                         last_read_half
+//                                     }
+//                                 }
+//                                 Half::Second => {
+//                                     if first_half_is_done {
+//                                         self.channel.ifcr().write(|w| w.$chtifX().set_bit());
+
+//                                         self.readable_half = Half::First;
+//                                         Half::First
+//                                     } else {
+//                                         last_read_half
+//                                     }
+//                                 }
+//                             })
+//                         }
+//                     }
+
+//                     impl<BUFFER, PAYLOAD, MODE> Transfer<MODE, BUFFER, $CX, PAYLOAD> {
+//                         pub fn wait(mut self) -> (BUFFER, $CX, PAYLOAD) {
+//                             // XXX should we check for transfer errors here?
+//                             // The manual says "A DMA transfer error can be generated by reading
+//                             // from or writing to a reserved address space". I think it's impossible
+//                             // to get to that state with our type safe API and *safe* Rust.
+//                             while self.channel.isr().$tcifX().bit_is_clear() {}
+
+//                             self.channel.ifcr().write(|w| w.$cgifX().set_bit());
+
+//                             self.channel.ccr().modify(|_, w| w.en().clear_bit());
+
+//                             // TODO can we weaken this compiler barrier?
+//                             // NOTE(compiler_fence) operations on `buffer` should not be reordered
+//                             // before the previous statement, which marks the DMA transfer as done
+//                             atomic::compiler_fence(Ordering::SeqCst);
+
+//                             (self.buffer, self.channel, self.payload)
+//                         }
+//                     }
+
+//                 )+
+
+//             }
+//         )+
+//     }
+// }
+
+// dma! {
+//     DMA1: (dma2, dma2en, dma2rst, {
+//         C1: (
+//             ccr1, CCR1,
+//             cndtr1, CNDTR1,
+//             cpar1, CPAR1,
+//             cmar1, CMAR1,
+//             htif1, tcif1,
+//             chtif1, ctcif1, cgif1
+//         ),
+//         C2: (
+//             ccr2, CCR2,
+//             cndtr2, CNDTR2,
+//             cpar2, CPAR2,
+//             cmar2, CMAR2,
+//             htif2, tcif2,
+//             chtif2, ctcif2, cgif2
+//         ),
+//         C3: (
+//             ccr3, CCR3,
+//             cndtr3, CNDTR3,
+//             cpar3, CPAR3,
+//             cmar3, CMAR3,
+//             htif3, tcif3,
+//             chtif3, ctcif3, cgif3
+//         ),
+//         C4: (
+//             ccr4, CCR4,
+//             cndtr4, CNDTR4,
+//             cpar4, CPAR4,
+//             cmar4, CMAR4,
+//             htif4, tcif4,
+//             chtif4, ctcif4, cgif4
+//         ),
+//         C5: (
+//             ccr5, CCR5,
+//             cndtr5, CNDTR5,
+//             cpar5, CPAR5,
+//             cmar5, CMAR5,
+//             htif5, tcif5,
+//             chtif5, ctcif5, cgif5
+//         ),
+//         C6: (
+//             ccr6, CCR6,
+//             cndtr6, CNDTR6,
+//             cpar6, CPAR6,
+//             cmar6, CMAR6,
+//             htif6, tcif6,
+//             chtif6, ctcif6, cgif6
+//         ),
+//         C7: (
+//             ccr7, CCR7,
+//             cndtr7, CNDTR7,
+//             cpar7, CPAR7,
+//             cmar7, CMAR7,
+//             htif7, tcif7,
+//             chtif7, ctcif7, cgif7
+//         ),
+//     }),
+
+//     DMA2: (dma2, dma2en, dma2rst, {
+//         C1: (
+//             ccr1, CCR1,
+//             cndtr1, CNDTR1,
+//             cpar1, CPAR1,
+//             cmar1, CMAR1,
+//             htif1, tcif1,
+//             chtif1, ctcif1, cgif1
+//         ),
+//         C2: (
+//             ccr2, CCR2,
+//             cndtr2, CNDTR2,
+//             cpar2, CPAR2,
+//             cmar2, CMAR2,
+//             htif2, tcif2,
+//             chtif2, ctcif2, cgif2
+//         ),
+//         C3: (
+//             ccr3, CCR3,
+//             cndtr3, CNDTR3,
+//             cpar3, CPAR3,
+//             cmar3, CMAR3,
+//             htif3, tcif3,
+//             chtif3, ctcif3, cgif3
+//         ),
+//         C4: (
+//             ccr4, CCR4,
+//             cndtr4, CNDTR4,
+//             cpar4, CPAR4,
+//             cmar4, CMAR4,
+//             htif4, tcif4,
+//             chtif4, ctcif4, cgif4
+//         ),
+//         C5: (
+//             ccr5, CCR5,
+//             cndtr5, CNDTR5,
+//             cpar5, CPAR5,
+//             cmar5, CMAR5,
+//             htif5, tcif5,
+//             chtif5, ctcif5, cgif5
+//         ),
+//     }),
+// }
diff --git a/src/lib.rs b/src/lib.rs
index aa7242ff359adc862746440e08e5f86fdf2e07d8..52507e7c9c1fa8185a89eecf8158230f3751c6b2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,9 +17,10 @@
 //!
 //! See examples directory for generic usage
 
-#![deny(missing_docs)]
+//#![deny(missing_docs)]
 //#![deny(warnings)]
 #![feature(never_type)]
+#![feature(unsize)]
 #![no_std]
 
 extern crate cast;
@@ -31,6 +32,8 @@ pub extern crate stm32f413 as stm32f4x;
 pub mod delay;
 pub mod flash;
 pub mod gpio;
+pub mod dma;
+
 // pub mod i2c;
 pub mod prelude;
 pub mod rcc;
diff --git a/src/prelude.rs b/src/prelude.rs
index 4f1fe7a74a5f400f8d6c9136e087fc57af51ad07..861e5ac0e8be61b02be3656361c82b0491a30553 100644
--- a/src/prelude.rs
+++ b/src/prelude.rs
@@ -5,3 +5,4 @@ pub use hal::prelude::*;
 pub use rcc::RccExt as _stm32f4x_hal_rcc_RccExt;
 pub use time::U32Ext as _stm32f4x_hal_time_U32Ext;
 pub use flash::FlashExt as _stm32f4x_hal_flash_FlashExt;
+pub use dma::DmaExt as _stm32fx_hal_dma_DmaExt;
diff --git a/src/serial.rs b/src/serial.rs
index e72ec830527d771241528a17bfe1361260f0d645..c1ce8d8fb65401f6814758393928f855d7dfab44 100644
--- a/src/serial.rs
+++ b/src/serial.rs
@@ -5,13 +5,14 @@
 //! - TX = PA2
 //! - RX = PA3
 //! - Interrupt = USART2
-
+use core::sync::atomic::{self, Ordering};
 use core::ptr;
-use core::marker::PhantomData;
+use core::marker::{PhantomData, Unsize};
 
 use hal::serial;
 use nb;
 use stm32f4x::{USART1, USART2, USART6};
+use dma::{Static, Transfer, R};
 
 // usart2
 use gpio::gpioa::{PA2, PA3};
@@ -239,6 +240,78 @@ macro_rules! hal {
                     }
                 }
             }
+            impl Tx<$USARTX> {
+                pub fn write_all<A, B, C>(
+                    self,
+                    mut chan: C,
+                    buffer: B,
+                ) -> Transfer<R, B, C, Self>
+                where
+                    A: Unsize<[u8]>,
+                    B: Static<A>,
+                    C:
+                {
+                    //              // write!(dma1, "hi {}", 1);
+                    // let usart2 = self.0;
+
+        // if dma1.s6cr.read().en().bit_is_set() {
+        //     return Err(dma::Error::InUse);
+        // }
+
+        // let buffer: &[u8] = buffer.lock();
+
+        // dma1.s6ndtr
+        //     .write(|w| unsafe { w.ndt().bits(u16(buffer.len()).unwrap()) });
+        // dma1.s6par
+        //     .write(|w| unsafe { w.bits(&usart2.dr as *const _ as u32) });
+        // dma1.s6m0ar
+        //     .write(|w| unsafe { w.bits(buffer.as_ptr() as u32) });
+        // dma1.s6cr.modify(|_, w| w.en().set_bit());
+
+
+
+                        //let buffer: &[u8] = buffer.borrow();
+                        // chan.cmar().write(|w| unsafe {
+                        //     w.ma().bits(buffer.as_ptr() as usize as u32)
+                        // });
+                        // chan.cndtr().write(|w| unsafe{
+                        //     w.ndt().bits(u16(buffer.len()).unwrap())
+                        // });
+                        // chan.cpar().write(|w| unsafe {
+                        //     w.pa().bits(&(*$USARTX::ptr()).dr as *const _ as usize as u32)
+                        // });
+
+                        // TODO can we weaken this compiler barrier?
+                        // NOTE(compiler_fence) operations on `buffer` should not be reordered after
+                        // the next statement, which starts the DMA transfer
+                        atomic::compiler_fence(Ordering::SeqCst);
+
+                    //     chan.ccr().modify(|_, w| {
+                    //         w.mem2mem()
+                    //             .clear_bit()
+                    //             .pl()
+                    //             .medium()
+                    //             .msize()
+                    //             .bit8()
+                    //             .psize()
+                    //             .bit8()
+                    //             .minc()
+                    //             .set_bit()
+                    //             .pinc()
+                    //             .clear_bit()
+                    //             .circ()
+                    //             .clear_bit()
+                    //             .dir()
+                    //             .set_bit()
+                    //             .en()
+                    //             .set_bit()
+                    //     });
+
+
+                    Transfer::r(buffer, chan, self)
+                }
+            }
+
         )+
     }
 }