diff --git a/examples/rtt_timing.rs b/examples/rtt_timing.rs
index ba33bee1af817613a203f1d78a14a3df3178f2cd..1437b5db735f884cb862852fbab4d6ddc2e1b99c 100644
--- a/examples/rtt_timing.rs
+++ b/examples/rtt_timing.rs
@@ -65,7 +65,7 @@ fn timed_loop() -> (u32, u32) {
 // A.3) Why do we need a wrapping subtraction?
 //
 // [Your answer here]
-// If end value has wrapped around then start > end. Thus we need to wrap around when we subtract so we get the correct difference value.
+// If the end value has wrapped around then start > end. Thus we need to wrap around when we subtract so we get the correct difference value.
 //
 // ------------------------------------------------------------------------
 // Now try a release (optimized build, see `Cargo.toml` for build options).
@@ -91,14 +91,7 @@ fn timed_loop() -> (u32, u32) {
 // Why do you think it differs that much?
 //
 // [Your answer here]
-// I think that the optimizer removes the for loop:
-//      for _ in 0..10000 {
-//          asm::nop();
-//      }
-// Thus the end variable is set right after the start variable. This makes the time difference very
-// small. But it is not near 10000 time faster because the function DWT::get_cycle_count() is much
-// slower at getting the cycle count then it is to run one iteration of the loop.
-//
+// The unoptimized version has to store and pop all the register every time the nop function call is made. But the optimized version doesn't have to do that because it identifies that most of the registers are not used by the nop function. Thus it is ~68 times faster because there is 32 registers.
 //
 //
 // ------------------------------------------------------------------------
@@ -378,7 +371,6 @@ fn timed_loop() -> (u32, u32) {
 // > cargo size --example rtt_timing --release --features nightly
 //
 // [Your answer here]
-// 
 //  Compiling app v0.1.0 (/home/niklas/Desktop/D7020E/rtic_f4xx_nucleo)
 //   Finished release [optimized + debuginfo] target(s) in 0.63s
 //  text	   data	    bss	    dec	    hex	filename