diff --git a/examples/embassy/Cargo.toml b/examples/embassy/Cargo.toml
index 3a2a433..3d973e3 100644
--- a/examples/embassy/Cargo.toml
+++ b/examples/embassy/Cargo.toml
@@ -16,6 +16,9 @@ zynq7000-rt = { path = "../../zynq7000-rt" }
 zynq7000 = { path = "../../zynq7000" }
 zynq7000-hal = { path = "../../zynq7000-hal" }
 zynq7000-embassy = { path = "../../zynq7000-embassy" }
+static_cell = "2"
+critical-section = "1"
+heapless = "0.8"
 embedded-io = "0.6"
 embedded-hal = "1"
 fugit = "0.3"
@@ -23,6 +26,7 @@ log = "0.4"
 
 embassy-executor = { path = "/home/rmueller/Rust/embassy/embassy-executor", features = [
   "arch-cortex-ar",
-  "executor-thread"
+  "executor-thread",
+  "task-arena-size-65536"
 ]}
 embassy-time = { path = "/home/rmueller/Rust/embassy/embassy-time", version = "0.4" }
diff --git a/examples/embassy/src/bin/logger-non-blocking.rs b/examples/embassy/src/bin/logger-non-blocking.rs
new file mode 100644
index 0000000..aceb583
--- /dev/null
+++ b/examples/embassy/src/bin/logger-non-blocking.rs
@@ -0,0 +1,151 @@
+//! Example which uses the UART1 to send log messages.
+#![no_std]
+#![no_main]
+
+use core::panic::PanicInfo;
+use cortex_ar::asm::nop;
+use embassy_executor::Spawner;
+use embassy_time::{Duration, Ticker};
+use embedded_hal::digital::StatefulOutputPin;
+use embedded_io::Write;
+use log::{error, info};
+use zynq7000::PsPeripherals;
+use zynq7000_hal::{
+    BootMode,
+    clocks::Clocks,
+    gic::{GicConfigurator, GicInterruptHelper, Interrupt},
+    gpio::{Mio7, MioPin, MioPins, Output, PinState},
+    gtc::Gtc,
+    time::Hertz,
+    uart::{ClkConfigRaw, TxAsync, Uart, UartConfig, on_interrupt_tx},
+};
+
+use zynq7000_rt as _;
+
+// Define the clock frequency as a constant
+const PS_CLOCK_FREQUENCY: Hertz = Hertz::from_raw(33_333_300);
+
+/// Entry point (not called like a normal main function)
+#[unsafe(no_mangle)]
+pub extern "C" fn boot_core(cpu_id: u32) -> ! {
+    if cpu_id != 0 {
+        panic!("unexpected CPU ID {}", cpu_id);
+    }
+    main();
+}
+
+#[unsafe(export_name = "main")]
+#[embassy_executor::main]
+async fn main(spawner: Spawner) -> ! {
+    let dp = PsPeripherals::take().unwrap();
+    // Clock was already initialized by PS7 Init TCL script or FSBL, we just read it.
+    let clocks = Clocks::new_from_regs(PS_CLOCK_FREQUENCY).unwrap();
+    // Set up the global interrupt controller.
+    let mut gic = GicConfigurator::new_with_init(dp.gicc, dp.gicd);
+    gic.enable_all_interrupts();
+    gic.set_all_spi_interrupt_targets_cpu0();
+    gic.enable();
+    unsafe {
+        gic.enable_interrupts();
+    }
+    // Set up global timer counter and embassy time driver.
+    let gtc = Gtc::new(dp.gtc, clocks.arm_clocks());
+    zynq7000_embassy::init(clocks.arm_clocks(), gtc);
+
+    let mio_pins = MioPins::new(dp.gpio);
+
+    // Set up the UART, we are logging with it.
+    let uart_clk_config = ClkConfigRaw::new_autocalc_with_error(clocks.io_clocks(), 115200)
+        .unwrap()
+        .0;
+    let uart_tx = mio_pins.mio48.into_uart();
+    let uart_rx = mio_pins.mio49.into_uart();
+    let mut uart = Uart::new_with_mio(
+        dp.uart_1,
+        UartConfig::new_with_clk_config(uart_clk_config),
+        (uart_tx, uart_rx),
+    )
+    .unwrap();
+    uart.write_all(b"-- Zynq 7000 Logging example --\n\r")
+        .unwrap();
+    uart.flush().unwrap();
+    let (tx, _rx) = uart.split();
+    let mut logger = TxAsync::new(tx);
+
+    zynq7000_hal::log::rb::init(log::LevelFilter::Trace);
+
+    let boot_mode = BootMode::new();
+    info!("Boot mode: {:?}", boot_mode);
+
+    let led = mio_pins.mio7.into_output(PinState::Low);
+    spawner.spawn(led_task(led)).unwrap();
+    let mut log_buf: [u8; 2048] = [0; 2048];
+    let frame_queue = zynq7000_hal::log::rb::get_frame_queue();
+    loop {
+        let next_frame_len = frame_queue.receive().await;
+        zynq7000_hal::log::rb::read_next_frame(next_frame_len, &mut log_buf);
+        logger.write(&log_buf[0..next_frame_len]).await;
+    }
+}
+
+#[embassy_executor::task]
+async fn led_task(mut mio_led: MioPin<Mio7, Output>) {
+    let mut ticker = Ticker::every(Duration::from_millis(1000));
+    loop {
+        mio_led.toggle().unwrap();
+        info!("Toggling LED");
+        ticker.next().await;
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn _irq_handler() {
+    let mut gic_helper = GicInterruptHelper::new();
+    let irq_info = gic_helper.acknowledge_interrupt();
+    match irq_info.interrupt() {
+        Interrupt::Sgi(_) => (),
+        Interrupt::Ppi(ppi_interrupt) => {
+            if ppi_interrupt == zynq7000_hal::gic::PpiInterrupt::GlobalTimer {
+                unsafe {
+                    zynq7000_embassy::on_interrupt();
+                }
+            }
+        }
+        Interrupt::Spi(spi_interrupt) => {
+            if spi_interrupt == zynq7000_hal::gic::SpiInterrupt::Uart1 {
+                on_interrupt_tx(zynq7000_hal::uart::UartId::Uart1);
+            }
+        }
+        Interrupt::Invalid(_) => (),
+        Interrupt::Spurious => (),
+    }
+    gic_helper.end_of_interrupt(irq_info);
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn _abort_handler() {
+    loop {
+        nop();
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn _undefined_handler() {
+    loop {
+        nop();
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn _prefetch_handler() {
+    loop {
+        nop();
+    }
+}
+
+/// Panic handler
+#[panic_handler]
+fn panic(info: &PanicInfo) -> ! {
+    error!("Panic: {:?}", info);
+    loop {}
+}
diff --git a/examples/embassy/src/bin/pwm.rs b/examples/embassy/src/bin/pwm.rs
index fc5ee75..8eb741f 100644
--- a/examples/embassy/src/bin/pwm.rs
+++ b/examples/embassy/src/bin/pwm.rs
@@ -80,7 +80,13 @@ async fn main(_spawner: Spawner) -> ! {
     uart.write_all(b"-- Zynq 7000 Embassy Hello World --\n\r")
         .unwrap();
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     let boot_mode = BootMode::new();
     info!("Boot mode: {:?}", boot_mode);
diff --git a/examples/embassy/src/main.rs b/examples/embassy/src/main.rs
index 1692354..a70da10 100644
--- a/examples/embassy/src/main.rs
+++ b/examples/embassy/src/main.rs
@@ -69,7 +69,13 @@ async fn main(_spawner: Spawner) -> ! {
     uart.write_all(b"-- Zynq 7000 Embassy Hello World --\n\r")
         .unwrap();
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     let boot_mode = BootMode::new();
     info!("Boot mode: {:?}", boot_mode);
diff --git a/examples/simple/src/bin/gtc-ticks.rs b/examples/simple/src/bin/gtc-ticks.rs
index f075877..565d673 100644
--- a/examples/simple/src/bin/gtc-ticks.rs
+++ b/examples/simple/src/bin/gtc-ticks.rs
@@ -70,7 +70,13 @@ pub fn main() -> ! {
     uart.write_all(b"-- Zynq 7000 GTC Ticks example --\n\r")
         .unwrap();
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     let mut led = mio_pins.mio7.into_output(PinState::Low);
     loop {
diff --git a/examples/simple/src/bin/logger.rs b/examples/simple/src/bin/logger.rs
index 1859e23..0e9a460 100644
--- a/examples/simple/src/bin/logger.rs
+++ b/examples/simple/src/bin/logger.rs
@@ -70,7 +70,13 @@ pub fn main() -> ! {
     uart.write_all(b"-- Zynq 7000 Logging example --\n\r")
         .unwrap();
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     let boot_mode = BootMode::new();
     info!("Boot mode: {:?}", boot_mode);
diff --git a/examples/zedboard/Cargo.toml b/examples/zedboard/Cargo.toml
index c0031d6..16dd211 100644
--- a/examples/zedboard/Cargo.toml
+++ b/examples/zedboard/Cargo.toml
@@ -16,7 +16,7 @@ zynq7000-rt = { path = "../../zynq7000-rt" }
 zynq7000 = { path = "../../zynq7000" }
 zynq7000-hal = { path = "../../zynq7000-hal" }
 zynq7000-embassy = { path = "../../zynq7000-embassy" }
-l3gd20 = { git = "https://github.com/us-irs/l3gd20.git", branch = "add-i2c-if" }
+l3gd20 = { git = "https://github.com/us-irs/l3gd20.git", branch = "add-async-if" }
 embedded-io = "0.6"
 arbitrary-int = "1.3"
 embedded-io-async = "0.6"
diff --git a/examples/zedboard/src/bin/l3gd20h-i2c-mio.rs b/examples/zedboard/src/bin/l3gd20h-i2c-mio.rs
index 5437205..c6b3471 100644
--- a/examples/zedboard/src/bin/l3gd20h-i2c-mio.rs
+++ b/examples/zedboard/src/bin/l3gd20h-i2c-mio.rs
@@ -84,7 +84,13 @@ async fn main(_spawner: Spawner) -> ! {
     uart.write_all(b"-- Zynq 7000 Zedboard I2C L3GD20H example --\n\r")
         .unwrap();
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     let boot_mode = BootMode::new();
     info!("Boot mode: {:?}", boot_mode);
@@ -105,12 +111,11 @@ async fn main(_spawner: Spawner) -> ! {
     )
     .unwrap();
     let i2c = i2c::I2c::new_with_mio(dp.i2c_1, clk_config, (sck_pin, sda_pin)).unwrap();
-
-    let mut delay = Delay;
     let mut l3gd20 = l3gd20::i2c::L3gd20::new(i2c, l3gd20::i2c::I2cAddr::Sa0Low).unwrap();
     let who_am_i = l3gd20.who_am_i().unwrap();
     info!("L3GD20 WHO_AM_I: 0x{:02X}", who_am_i);
 
+    let mut delay = Delay;
     let mut ticker = Ticker::every(Duration::from_millis(400));
     let mut mio_led = gpio_pins.mio.mio7.into_output(PinState::Low);
 
diff --git a/examples/zedboard/src/bin/l3gd20h-spi-mio.rs b/examples/zedboard/src/bin/l3gd20h-spi-mio.rs
index 094483e..87ca3b5 100644
--- a/examples/zedboard/src/bin/l3gd20h-spi-mio.rs
+++ b/examples/zedboard/src/bin/l3gd20h-spi-mio.rs
@@ -22,11 +22,11 @@ use zynq7000_hal::{
     clocks::Clocks,
     configure_level_shifter,
     gic::{GicConfigurator, GicInterruptHelper, Interrupt},
-    gpio::{EmioPin, GpioPins, PinState},
+    gpio::{DynMioPin, EmioPin, GpioPins, PinState},
     gtc::Gtc,
-    spi::{self, SpiWithHwCs},
+    spi::{self, SpiAsync, SpiId, SpiWithHwCs, SpiWithHwCsAsync, on_interrupt},
     time::Hertz,
-    uart,
+    uart::{self, TxAsync, on_interrupt_tx},
 };
 
 use zynq7000::{PsPeripherals, slcr::LevelShifterConfig, spi::DelayControl};
@@ -36,6 +36,7 @@ use zynq7000_rt as _;
 const PS_CLOCK_FREQUENCY: Hertz = Hertz::from_raw(33_333_300);
 
 const DEBUG_SPI_CLK_CONFIG: bool = false;
+const BLOCKING: bool = false;
 
 /// Entry point (not called like a normal main function)
 #[unsafe(no_mangle)]
@@ -48,7 +49,7 @@ pub extern "C" fn boot_core(cpu_id: u32) -> ! {
 
 #[embassy_executor::main]
 #[unsafe(export_name = "main")]
-async fn main(_spawner: Spawner) -> ! {
+async fn main(spawner: Spawner) -> ! {
     // Enable PS-PL level shifters.
     configure_level_shifter(LevelShifterConfig::EnableAll);
     let dp = PsPeripherals::take().unwrap();
@@ -91,8 +92,7 @@ async fn main(_spawner: Spawner) -> ! {
     .unwrap();
     uart.write_all(b"-- Zynq 7000 Zedboard SPI L3GD20H example --\n\r")
         .unwrap();
-    // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    zynq7000_hal::log::rb::init(log::LevelFilter::Trace);
 
     let boot_mode = BootMode::new();
     info!("Boot mode: {:?}", boot_mode);
@@ -129,7 +129,7 @@ async fn main(_spawner: Spawner) -> ! {
     assert_eq!(mod_id, spi::MODULE_ID);
     assert!(spi.sclk() <= Hertz::from_raw(10_000_000));
     let min_delay = (spi.sclk().raw() * 5) / 1_000_000_000;
-    spi.configure_delays(
+    spi.inner().configure_delays(
         DelayControl::builder()
             .with_inter_word_cs_deassert(0)
             .with_between_cs_assertion(0)
@@ -138,15 +138,7 @@ async fn main(_spawner: Spawner) -> ! {
             .build(),
     );
 
-    let mut delay = Delay;
-    let spi_dev = SpiWithHwCs::new(spi, spi::ChipSelect::Slave0, delay.clone());
-    let mut l3gd20 = l3gd20::spi::L3gd20::new(spi_dev).unwrap();
-    let who_am_i = l3gd20.who_am_i().unwrap();
-    info!("L3GD20 WHO_AM_I: 0x{:02X}", who_am_i);
-
-    let mut ticker = Ticker::every(Duration::from_millis(400));
-    let mut mio_led = gpio_pins.mio.mio7.into_output(PinState::Low);
-
+    let mio_led = gpio_pins.mio.mio7.into_output(PinState::Low).downgrade();
     let mut emio_leds: [EmioPin; 8] = [
         gpio_pins.emio.take(0).unwrap(),
         gpio_pins.emio.take(1).unwrap(),
@@ -164,6 +156,39 @@ async fn main(_spawner: Spawner) -> ! {
             led.into_output(PinState::Low);
         }
     }
+    spawner.spawn(logger_task(uart)).unwrap();
+    if BLOCKING {
+        blocking_application(mio_led, emio_leds, spi).await;
+    } else {
+        non_blocking_application(mio_led, emio_leds, spi).await;
+    }
+}
+
+#[embassy_executor::task]
+pub async fn logger_task(uart: uart::Uart) {
+    let (tx, _) = uart.split();
+    let mut tx_async = TxAsync::new(tx);
+    let frame_queue = zynq7000_hal::log::rb::get_frame_queue();
+    let mut log_buf: [u8; 2048] = [0; 2048];
+    loop {
+        let next_frame_len = frame_queue.receive().await;
+        zynq7000_hal::log::rb::read_next_frame(next_frame_len, &mut log_buf);
+        tx_async.write(&log_buf[0..next_frame_len]).await;
+    }
+}
+
+pub async fn blocking_application(
+    mut mio_led: DynMioPin,
+    mut emio_leds: [EmioPin; 8],
+    spi: spi::Spi,
+) -> ! {
+    let mut delay = Delay;
+    let spi_dev = SpiWithHwCs::new(spi, spi::ChipSelect::Slave0, delay.clone());
+    let mut l3gd20 = l3gd20::spi::L3gd20::new(spi_dev).unwrap();
+    let who_am_i = l3gd20.who_am_i().unwrap();
+    info!("L3GD20 WHO_AM_I: 0x{:02X}", who_am_i);
+
+    let mut ticker = Ticker::every(Duration::from_millis(400));
 
     // Power up time for the sensor to get good readings.
     delay.delay_ms(50).await;
@@ -181,6 +206,38 @@ async fn main(_spawner: Spawner) -> ! {
     }
 }
 
+pub async fn non_blocking_application(
+    mut mio_led: DynMioPin,
+    mut emio_leds: [EmioPin; 8],
+    spi: spi::Spi,
+) -> ! {
+    let mut delay = Delay;
+    let spi_async = SpiAsync::new(spi);
+    let spi_dev = SpiWithHwCsAsync::new(spi_async, spi::ChipSelect::Slave0, delay.clone());
+    let mut l3gd20 = l3gd20::asynchronous::spi::L3gd20::new(spi_dev)
+        .await
+        .unwrap();
+    let who_am_i = l3gd20.who_am_i().await.unwrap();
+    info!("L3GD20 WHO_AM_I: 0x{:02X}", who_am_i);
+
+    let mut ticker = Ticker::every(Duration::from_millis(400));
+
+    // Power up time for the sensor to get good readings.
+    delay.delay_ms(50).await;
+    loop {
+        mio_led.toggle().unwrap();
+
+        let measurements = l3gd20.all().await.unwrap();
+        info!("L3GD20: {:?}", measurements);
+        info!("L3GD20 Temp: {:?}", measurements.temp_celcius());
+        for led in emio_leds.iter_mut() {
+            led.toggle().unwrap();
+        }
+
+        ticker.next().await; // Wait for the next cycle of the ticker
+    }
+}
+
 #[unsafe(no_mangle)]
 pub extern "C" fn _irq_handler() {
     let mut gic_helper = GicInterruptHelper::new();
@@ -194,7 +251,13 @@ pub extern "C" fn _irq_handler() {
                 }
             }
         }
-        Interrupt::Spi(_spi_interrupt) => (),
+        Interrupt::Spi(spi_interrupt) => {
+            if spi_interrupt == zynq7000_hal::gic::SpiInterrupt::Spi1 {
+                on_interrupt(SpiId::Spi1);
+            } else if spi_interrupt == zynq7000_hal::gic::SpiInterrupt::Uart1 {
+                on_interrupt_tx(zynq7000_hal::uart::UartId::Uart1);
+            }
+        }
         Interrupt::Invalid(_) => (),
         Interrupt::Spurious => (),
     }
diff --git a/examples/zedboard/src/bin/uart-blocking.rs b/examples/zedboard/src/bin/uart-blocking.rs
index 619d281..a874c6f 100644
--- a/examples/zedboard/src/bin/uart-blocking.rs
+++ b/examples/zedboard/src/bin/uart-blocking.rs
@@ -134,7 +134,13 @@ async fn main(_spawner: Spawner) -> ! {
     log_uart.write_all(INIT_STRING.as_bytes()).unwrap();
 
     // Safety: Co-operative multi-tasking is used.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(log_uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            log_uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     // UART0 routed through EMIO to PL pins.
     let mut uart_0 =
diff --git a/examples/zedboard/src/bin/uart-non-blocking.rs b/examples/zedboard/src/bin/uart-non-blocking.rs
index cedef90..4a2e983 100644
--- a/examples/zedboard/src/bin/uart-non-blocking.rs
+++ b/examples/zedboard/src/bin/uart-non-blocking.rs
@@ -212,7 +212,13 @@ async fn main(spawner: Spawner) -> ! {
     }
 
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(log_uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            log_uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     // Set up UART multiplexing before creating and configuring the UARTs.
     let mut uart_mux = UartMultiplexer::new([
diff --git a/examples/zedboard/src/main.rs b/examples/zedboard/src/main.rs
index 0662b02..9d43c7f 100644
--- a/examples/zedboard/src/main.rs
+++ b/examples/zedboard/src/main.rs
@@ -69,7 +69,13 @@ async fn main(_spawner: Spawner) -> ! {
     .unwrap();
     uart.write_all(INIT_STRING.as_bytes()).unwrap();
     // Safety: We are not multi-threaded yet.
-    unsafe { zynq7000_hal::log::init_unsafe_single_core(uart, log::LevelFilter::Trace, false) };
+    unsafe {
+        zynq7000_hal::log::uart_blocking::init_unsafe_single_core(
+            uart,
+            log::LevelFilter::Trace,
+            false,
+        )
+    };
 
     let boot_mode = BootMode::new();
     info!("Boot mode: {:?}", boot_mode);
diff --git a/zynq7000-hal/Cargo.toml b/zynq7000-hal/Cargo.toml
index 7a4b8ea..e3bb7a2 100644
--- a/zynq7000-hal/Cargo.toml
+++ b/zynq7000-hal/Cargo.toml
@@ -17,9 +17,13 @@ zynq7000 = { path = "../zynq7000" }
 arbitrary-int = "1.3"
 thiserror = { version = "2", default-features = false }
 num_enum = { version = "0.7", default-features = false }
+ringbuf = { version = "0.4.8", default-features = false }
 embedded-hal-nb = "1"
 embedded-io = "0.6"
 embedded-hal = "1"
+embedded-hal-async = "1"
+heapless = "0.8"
+static_cell = "2"
 delegate = "0.13"
 paste = "1"
 nb = "1"
diff --git a/zynq7000-hal/src/log.rs b/zynq7000-hal/src/log.rs
index be2c9f1..06e698a 100644
--- a/zynq7000-hal/src/log.rs
+++ b/zynq7000-hal/src/log.rs
@@ -1,130 +1,230 @@
-//! # Simple logging providers using the processing system [Uart]s
-use core::cell::{Cell, RefCell, UnsafeCell};
+//! # Simple logging providers.
 
-use cortex_ar::register::Cpsr;
-use critical_section::Mutex;
-use embedded_io::Write;
-use log::{LevelFilter, set_logger, set_max_level};
+/// Blocking UART loggers.
+pub mod uart_blocking {
+    use core::cell::{Cell, RefCell, UnsafeCell};
+    use embedded_io::Write as _;
 
-use crate::uart::Uart;
+    use cortex_ar::register::Cpsr;
+    use critical_section::Mutex;
+    use log::{LevelFilter, set_logger, set_max_level};
 
-pub struct UartLoggerBlocking(Mutex<RefCell<Option<Uart>>>);
+    use crate::uart::Uart;
 
-unsafe impl Send for UartLoggerBlocking {}
-unsafe impl Sync for UartLoggerBlocking {}
+    pub struct UartLoggerBlocking(Mutex<RefCell<Option<Uart>>>);
 
-static UART_LOGGER_BLOCKING: UartLoggerBlocking =
-    UartLoggerBlocking(Mutex::new(RefCell::new(None)));
+    unsafe impl Send for UartLoggerBlocking {}
+    unsafe impl Sync for UartLoggerBlocking {}
 
-/// Initialize the logger with a blocking UART instance.
-///
-/// This is a blocking logger which performs a write inside a critical section. This logger is
-/// thread-safe, but interrupts will be disabled while the logger is writing to the UART. It is
-/// strongly recommended to use an asychronous non-blocking logger instead if possible.
-pub fn init_blocking(uart: Uart, level: LevelFilter) {
-    // TODO: Impl debug for Uart
-    critical_section::with(|cs| {
-        let inner = UART_LOGGER_BLOCKING.0.borrow(cs);
-        inner.replace(Some(uart));
-    });
-    set_logger(&UART_LOGGER_BLOCKING).unwrap();
-    // Adjust as needed
-    set_max_level(level);
-}
+    static UART_LOGGER_BLOCKING: UartLoggerBlocking =
+        UartLoggerBlocking(Mutex::new(RefCell::new(None)));
 
-impl log::Log for UartLoggerBlocking {
-    fn enabled(&self, _metadata: &log::Metadata) -> bool {
-        true
+    /// Initialize the logger with a blocking UART instance.
+    ///
+    /// This is a blocking logger which performs a write inside a critical section. This logger is
+    /// thread-safe, but interrupts will be disabled while the logger is writing to the UART.
+    ///
+    /// For async applications, it is strongly recommended to use the asynchronous ring buffer
+    /// logger instead.
+    pub fn init_with_locks(uart: Uart, level: LevelFilter) {
+        // TODO: Impl debug for Uart
+        critical_section::with(|cs| {
+            let inner = UART_LOGGER_BLOCKING.0.borrow(cs);
+            inner.replace(Some(uart));
+        });
+        set_logger(&UART_LOGGER_BLOCKING).unwrap();
+        // Adjust as needed
+        set_max_level(level);
     }
 
-    fn log(&self, record: &log::Record) {
-        critical_section::with(|cs| {
-            let mut opt_logger = self.0.borrow(cs).borrow_mut();
-            if opt_logger.is_none() {
+    impl log::Log for UartLoggerBlocking {
+        fn enabled(&self, _metadata: &log::Metadata) -> bool {
+            true
+        }
+
+        fn log(&self, record: &log::Record) {
+            critical_section::with(|cs| {
+                let mut opt_logger = self.0.borrow(cs).borrow_mut();
+                if opt_logger.is_none() {
+                    return;
+                }
+                let logger = opt_logger.as_mut().unwrap();
+                writeln!(logger, "{} - {}\r", record.level(), record.args()).unwrap();
+            })
+        }
+
+        fn flush(&self) {}
+    }
+
+    pub struct UartLoggerUnsafeSingleThread {
+        skip_in_isr: Cell<bool>,
+        uart: UnsafeCell<Option<Uart>>,
+    }
+
+    unsafe impl Send for UartLoggerUnsafeSingleThread {}
+    unsafe impl Sync for UartLoggerUnsafeSingleThread {}
+
+    static UART_LOGGER_UNSAFE_SINGLE_THREAD: UartLoggerUnsafeSingleThread =
+        UartLoggerUnsafeSingleThread {
+            skip_in_isr: Cell::new(false),
+            uart: UnsafeCell::new(None),
+        };
+
+    /// Initialize the logger with a blocking UART instance.
+    ///
+    /// For async applications, it is strongly recommended to use the asynchronous ring buffer
+    /// logger instead.
+    ///
+    /// # Safety
+    ///
+    /// This is a blocking logger which performs a write WITHOUT a critical section. This logger is
+    /// NOT thread-safe. Users must ensure that this logger is not used inside a pre-emptive
+    /// multi-threading context and interrupt handlers.
+    pub unsafe fn create_unsafe_single_thread_logger(uart: Uart) -> UartLoggerUnsafeSingleThread {
+        UartLoggerUnsafeSingleThread {
+            skip_in_isr: Cell::new(false),
+            uart: UnsafeCell::new(Some(uart)),
+        }
+    }
+
+    /// Initialize the logger with a blocking UART instance which does not use locks.
+    ///
+    /// # Safety
+    ///
+    /// This is a blocking logger which performs a write WITHOUT a critical section. This logger is
+    /// NOT thread-safe, which might lead to garbled output. Log output in ISRs can optionally be
+    /// surpressed.
+    pub unsafe fn init_unsafe_single_core(uart: Uart, level: LevelFilter, skip_in_isr: bool) {
+        let opt_uart = unsafe { &mut *UART_LOGGER_UNSAFE_SINGLE_THREAD.uart.get() };
+        opt_uart.replace(uart);
+        UART_LOGGER_UNSAFE_SINGLE_THREAD
+            .skip_in_isr
+            .set(skip_in_isr);
+
+        set_logger(&UART_LOGGER_UNSAFE_SINGLE_THREAD).unwrap();
+        set_max_level(level); // Adjust as needed
+    }
+
+    impl log::Log for UartLoggerUnsafeSingleThread {
+        fn enabled(&self, _metadata: &log::Metadata) -> bool {
+            true
+        }
+
+        fn log(&self, record: &log::Record) {
+            if self.skip_in_isr.get() {
+                match Cpsr::read().mode().unwrap() {
+                    cortex_ar::register::cpsr::ProcessorMode::Fiq
+                    | cortex_ar::register::cpsr::ProcessorMode::Irq => {
+                        return;
+                    }
+                    _ => {}
+                }
+            }
+
+            let uart_mut = unsafe { &mut *self.uart.get() }.as_mut();
+            if uart_mut.is_none() {
                 return;
             }
-            let logger = opt_logger.as_mut().unwrap();
-            writeln!(logger, "{} - {}\r", record.level(), record.args()).unwrap();
+            writeln!(
+                uart_mut.unwrap(),
+                "{} - {}\r",
+                record.level(),
+                record.args()
+            )
+            .unwrap();
+        }
+
+        fn flush(&self) {}
+    }
+}
+
+/// Logger module which logs into a ring buffer to allow asynchronous logging handling.
+pub mod rb {
+    use core::cell::RefCell;
+    use core::fmt::Write as _;
+
+    use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
+    use log::{LevelFilter, set_logger, set_max_level};
+    use ringbuf::{
+        StaticRb,
+        traits::{Consumer, Producer},
+    };
+
+    /// Logger implementation which logs frames via a ring buffer and sends the frame sizes
+    /// as messages.
+    ///
+    /// The logger does not require allocation and reserved a generous amount of 4096 bytes for
+    /// both data buffer and ring buffer. This should be sufficient for most logging needs.
+    pub struct Logger {
+        frame_queue: embassy_sync::channel::Channel<CriticalSectionRawMutex, usize, 32>,
+        data_buf: critical_section::Mutex<RefCell<heapless::String<4096>>>,
+        ring_buf: critical_section::Mutex<RefCell<Option<StaticRb<u8, 4096>>>>,
+    }
+
+    unsafe impl Send for Logger {}
+    unsafe impl Sync for Logger {}
+
+    static LOGGER_RB: Logger = Logger {
+        frame_queue: embassy_sync::channel::Channel::new(),
+        data_buf: critical_section::Mutex::new(RefCell::new(heapless::String::new())),
+        ring_buf: critical_section::Mutex::new(RefCell::new(None)),
+    };
+
+    impl log::Log for Logger {
+        fn enabled(&self, _metadata: &log::Metadata) -> bool {
+            true
+        }
+
+        fn log(&self, record: &log::Record) {
+            critical_section::with(|cs| {
+                let ref_buf = self.data_buf.borrow(cs);
+                let mut buf = ref_buf.borrow_mut();
+                buf.clear();
+                let _ = writeln!(buf, "{} - {}\r", record.level(), record.args());
+                let rb_ref = self.ring_buf.borrow(cs);
+                let mut rb_opt = rb_ref.borrow_mut();
+                if rb_opt.is_none() {
+                    panic!("log call on uninitialized logger");
+                }
+                rb_opt.as_mut().unwrap().push_slice(buf.as_bytes());
+                let _ = self.frame_queue.try_send(buf.len());
+            });
+        }
+
+        fn flush(&self) {
+            while !self.frame_queue().is_empty() {}
+        }
+    }
+
+    impl Logger {
+        pub fn frame_queue(
+            &self,
+        ) -> &embassy_sync::channel::Channel<CriticalSectionRawMutex, usize, 32> {
+            &self.frame_queue
+        }
+    }
+
+    pub fn init(level: LevelFilter) {
+        critical_section::with(|cs| {
+            let rb = StaticRb::<u8, 4096>::default();
+            let rb_ref = LOGGER_RB.ring_buf.borrow(cs);
+            rb_ref.borrow_mut().replace(rb);
+        });
+        set_logger(&LOGGER_RB).unwrap();
+        set_max_level(level); // Adjust as needed
+    }
+
+    pub fn read_next_frame(frame_len: usize, buf: &mut [u8]) {
+        let read_len = core::cmp::min(frame_len, buf.len());
+        critical_section::with(|cs| {
+            let rb_ref = LOGGER_RB.ring_buf.borrow(cs);
+            let mut rb = rb_ref.borrow_mut();
+            rb.as_mut().unwrap().pop_slice(&mut buf[0..read_len]);
         })
     }
 
-    fn flush(&self) {}
-}
-
-pub struct UartLoggerUnsafeSingleThread {
-    skip_in_isr: Cell<bool>,
-    uart: UnsafeCell<Option<Uart>>,
-}
-
-unsafe impl Send for UartLoggerUnsafeSingleThread {}
-unsafe impl Sync for UartLoggerUnsafeSingleThread {}
-
-static UART_LOGGER_UNSAFE_SINGLE_THREAD: UartLoggerUnsafeSingleThread =
-    UartLoggerUnsafeSingleThread {
-        skip_in_isr: Cell::new(false),
-        uart: UnsafeCell::new(None),
-    };
-
-/// Initialize the logger with a blocking UART instance.
-///
-/// # Safety
-///
-/// This is a blocking logger which performs a write WITHOUT a critical section. This logger is
-/// NOT thread-safe. Users must ensure that this logger is not used inside a pre-emptive
-/// multi-threading context and interrupt handlers.
-pub unsafe fn create_unsafe_single_thread_logger(uart: Uart) -> UartLoggerUnsafeSingleThread {
-    UartLoggerUnsafeSingleThread {
-        skip_in_isr: Cell::new(false),
-        uart: UnsafeCell::new(Some(uart)),
+    pub fn get_frame_queue()
+    -> &'static embassy_sync::channel::Channel<CriticalSectionRawMutex, usize, 32> {
+        LOGGER_RB.frame_queue()
     }
 }
-
-/// Initialize the logger with a blocking UART instance which does not use locks.
-///
-/// # Safety
-///
-/// This is a blocking logger which performs a write WITHOUT a critical section. This logger is
-/// NOT thread-safe, which might lead to garbled output. Log output in ISRs can optionally be
-/// surpressed.
-pub unsafe fn init_unsafe_single_core(uart: Uart, level: LevelFilter, skip_in_isr: bool) {
-    let opt_uart = unsafe { &mut *UART_LOGGER_UNSAFE_SINGLE_THREAD.uart.get() };
-    opt_uart.replace(uart);
-    UART_LOGGER_UNSAFE_SINGLE_THREAD
-        .skip_in_isr
-        .set(skip_in_isr);
-
-    set_logger(&UART_LOGGER_UNSAFE_SINGLE_THREAD).unwrap();
-    set_max_level(level); // Adjust as needed
-}
-
-impl log::Log for UartLoggerUnsafeSingleThread {
-    fn enabled(&self, _metadata: &log::Metadata) -> bool {
-        true
-    }
-
-    fn log(&self, record: &log::Record) {
-        if !self.skip_in_isr.get() {
-            match Cpsr::read().mode().unwrap() {
-                cortex_ar::register::cpsr::ProcessorMode::Fiq
-                | cortex_ar::register::cpsr::ProcessorMode::Irq => {
-                    return;
-                }
-                _ => {}
-            }
-        }
-
-        let uart_mut = unsafe { &mut *self.uart.get() }.as_mut();
-        if uart_mut.is_none() {
-            return;
-        }
-        writeln!(
-            uart_mut.unwrap(),
-            "{} - {}\r",
-            record.level(),
-            record.args()
-        )
-        .unwrap();
-    }
-
-    fn flush(&self) {}
-}
diff --git a/zynq7000-hal/src/spi/asynch.rs b/zynq7000-hal/src/spi/asynch.rs
new file mode 100644
index 0000000..8c02897
--- /dev/null
+++ b/zynq7000-hal/src/spi/asynch.rs
@@ -0,0 +1,582 @@
+//! Asynchronous PS SPI driver.
+use core::{cell::RefCell, convert::Infallible, sync::atomic::AtomicBool};
+
+use critical_section::Mutex;
+use embassy_sync::waitqueue::AtomicWaker;
+use embedded_hal_async::spi::SpiBus;
+use raw_slice::{RawBufSlice, RawBufSliceMut};
+use zynq7000::spi::InterruptStatus;
+
+use super::{ChipSelect, FIFO_DEPTH, Spi, SpiId, SpiLowLevel};
+
+static WAKERS: [AtomicWaker; 2] = [const { AtomicWaker::new() }; 2];
+static TRANSFER_CONTEXTS: [Mutex<RefCell<TransferContext>>; 2] =
+    [const { Mutex::new(RefCell::new(TransferContext::new())) }; 2];
+// Completion flag. Kept outside of the context structure as an atomic to avoid
+// critical section.
+static DONE: [AtomicBool; 2] = [const { AtomicBool::new(false) }; 2];
+
+/// This is a generic interrupt handler to handle asynchronous SPI  operations for a given
+/// SPI peripheral.
+///
+/// The user has to call this once in the interrupt handler responsible for the SPI interrupts on
+/// the given SPI bank.
+pub fn on_interrupt(peripheral: SpiId) {
+    let mut spi = unsafe { SpiLowLevel::steal(peripheral) };
+    let idx = peripheral as usize;
+    let imr = spi.read_imr();
+    // IRQ is not related.
+    if !imr.tx_trig() && !imr.tx_full() && !imr.tx_underflow() && !imr.rx_ovr() && !imr.rx_full() {
+        return;
+    }
+    // Prevent spurious interrupts from messing with out logic here.
+    spi.disable_interrupts();
+    let isr = spi.read_isr();
+    spi.clear_interrupts();
+    let mut context = critical_section::with(|cs| {
+        let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+        *context_ref.borrow()
+    });
+    // No transfer active.
+    if context.transfer_type.is_none() {
+        return;
+    }
+    let transfer_type = context.transfer_type.unwrap();
+    match transfer_type {
+        TransferType::Read => on_interrupt_read(idx, &mut context, &mut spi, isr),
+        TransferType::Write => on_interrupt_write(idx, &mut context, &mut spi, isr),
+        TransferType::Transfer => on_interrupt_transfer(idx, &mut context, &mut spi, isr),
+        TransferType::TransferInPlace => {
+            on_interrupt_transfer_in_place(idx, &mut context, &mut spi, isr)
+        }
+    };
+}
+
+fn on_interrupt_read(
+    idx: usize,
+    context: &mut TransferContext,
+    spi: &mut SpiLowLevel,
+    mut isr: InterruptStatus,
+) {
+    let read_slice = unsafe { context.rx_slice.get_mut().unwrap() };
+    let transfer_len = read_slice.len();
+
+    // Read data from RX FIFO first.
+    let read_len = calculate_read_len(spi, isr, transfer_len, context.rx_progress);
+    (0..read_len).for_each(|_| {
+        read_slice[context.rx_progress] = spi.read_fifo_unchecked();
+        context.rx_progress += 1;
+    });
+
+    // The FIFO still needs to be pumped.
+    while context.tx_progress < read_slice.len() && !isr.tx_full() {
+        spi.write_fifo_unchecked(0);
+        context.tx_progress += 1;
+        isr = spi.read_isr();
+    }
+
+    isr_finish_handler(idx, spi, context, transfer_len)
+}
+
+fn on_interrupt_write(
+    idx: usize,
+    context: &mut TransferContext,
+    spi: &mut SpiLowLevel,
+    mut isr: InterruptStatus,
+) {
+    let write_slice = unsafe { context.tx_slice.get().unwrap() };
+    let transfer_len = write_slice.len();
+
+    // Read data from RX FIFO first.
+    let read_len = calculate_read_len(spi, isr, transfer_len, context.rx_progress);
+    (0..read_len).for_each(|_| {
+        spi.read_fifo_unchecked();
+        context.rx_progress += 1;
+    });
+
+    // Data still needs to be sent
+    while context.tx_progress < transfer_len && !isr.tx_full() {
+        spi.write_fifo_unchecked(write_slice[context.tx_progress]);
+        context.tx_progress += 1;
+        isr = spi.read_isr();
+    }
+
+    isr_finish_handler(idx, spi, context, transfer_len)
+}
+
+fn on_interrupt_transfer(
+    idx: usize,
+    context: &mut TransferContext,
+    spi: &mut SpiLowLevel,
+    mut isr: InterruptStatus,
+) {
+    let read_slice = unsafe { context.rx_slice.get_mut().unwrap() };
+    let read_len = read_slice.len();
+    let write_slice = unsafe { context.tx_slice.get().unwrap() };
+    let write_len = write_slice.len();
+    let transfer_len = core::cmp::max(read_len, write_len);
+
+    // Read data from RX FIFO first.
+    let read_len = calculate_read_len(spi, isr, transfer_len, context.rx_progress);
+    (0..read_len).for_each(|_| {
+        if context.rx_progress < read_len {
+            read_slice[context.rx_progress] = spi.read_fifo_unchecked();
+        } else {
+            spi.read_fifo_unchecked();
+        }
+        context.rx_progress += 1;
+    });
+
+    // Data still needs to be sent
+    while context.tx_progress < transfer_len && !isr.tx_full() {
+        if context.tx_progress < write_len {
+            spi.write_fifo_unchecked(write_slice[context.tx_progress]);
+        } else {
+            // Dummy write.
+            spi.write_fifo_unchecked(0);
+        }
+        context.tx_progress += 1;
+        isr = spi.read_isr();
+    }
+
+    isr_finish_handler(idx, spi, context, transfer_len)
+}
+
+fn on_interrupt_transfer_in_place(
+    idx: usize,
+    context: &mut TransferContext,
+    spi: &mut SpiLowLevel,
+    mut isr: InterruptStatus,
+) {
+    let transfer_slice = unsafe { context.rx_slice.get_mut().unwrap() };
+    let transfer_len = transfer_slice.len();
+    // Read data from RX FIFO first.
+    let read_len = calculate_read_len(spi, isr, transfer_len, context.rx_progress);
+    (0..read_len).for_each(|_| {
+        transfer_slice[context.rx_progress] = spi.read_fifo_unchecked();
+        context.rx_progress += 1;
+    });
+
+    // Data still needs to be sent
+    while context.tx_progress < transfer_len && !isr.tx_full() {
+        spi.write_fifo_unchecked(transfer_slice[context.tx_progress]);
+        context.tx_progress += 1;
+        isr = spi.read_isr();
+    }
+
+    isr_finish_handler(idx, spi, context, transfer_len)
+}
+
+fn calculate_read_len(
+    spi: &mut SpiLowLevel,
+    isr: InterruptStatus,
+    total_read_len: usize,
+    rx_progress: usize,
+) -> usize {
+    if isr.rx_full() {
+        core::cmp::min(FIFO_DEPTH, total_read_len - rx_progress)
+    } else if isr.rx_not_empty() {
+        let trigger = spi.read_rx_not_empty_threshold();
+        core::cmp::min(total_read_len - rx_progress, trigger as usize)
+    } else {
+        0
+    }
+}
+
+/// Generic handler after RX FIFO and TX FIFO were handled. Checks and handles finished
+/// and unfinished conditions.
+fn isr_finish_handler(
+    idx: usize,
+    spi: &mut SpiLowLevel,
+    context: &mut TransferContext,
+    transfer_len: usize,
+) {
+    // Transfer finish condition.
+    if context.rx_progress == context.tx_progress && context.rx_progress == transfer_len {
+        finish_transfer(idx, context, spi);
+        return;
+    }
+
+    unfinished_transfer(spi, transfer_len, context.rx_progress);
+
+    // If the transfer is done, the context structure was already written back.
+    // Write back updated context structure.
+    critical_section::with(|cs| {
+        let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+        *context_ref.borrow_mut() = *context;
+    });
+}
+
+fn finish_transfer(idx: usize, context: &mut TransferContext, spi: &mut SpiLowLevel) {
+    // Write back updated context structure.
+    critical_section::with(|cs| {
+        let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+        *context_ref.borrow_mut() = *context;
+    });
+    spi.set_rx_fifo_trigger(1).unwrap();
+    spi.set_tx_fifo_trigger(1).unwrap();
+    // Interrupts were already disabled and cleared.
+    DONE[idx].store(true, core::sync::atomic::Ordering::Relaxed);
+    WAKERS[idx].wake();
+}
+
+fn unfinished_transfer(spi: &mut SpiLowLevel, transfer_len: usize, rx_progress: usize) {
+    let new_trig_level = core::cmp::min(FIFO_DEPTH, transfer_len - rx_progress);
+    spi.set_rx_fifo_trigger(new_trig_level as u32).unwrap();
+    // Re-enable interrupts with the new RX FIFO trigger level.
+    spi.enable_interrupts();
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum TransferType {
+    Read,
+    Write,
+    Transfer,
+    TransferInPlace,
+}
+
+#[derive(Default, Debug, Copy, Clone)]
+pub struct TransferContext {
+    transfer_type: Option<TransferType>,
+    tx_progress: usize,
+    rx_progress: usize,
+    tx_slice: RawBufSlice,
+    rx_slice: RawBufSliceMut,
+}
+
+#[allow(clippy::new_without_default)]
+impl TransferContext {
+    pub const fn new() -> Self {
+        Self {
+            transfer_type: None,
+            tx_progress: 0,
+            rx_progress: 0,
+            tx_slice: RawBufSlice::new_nulled(),
+            rx_slice: RawBufSliceMut::new_nulled(),
+        }
+    }
+}
+
+pub struct SpiFuture {
+    id: super::SpiId,
+    spi: super::SpiLowLevel,
+    config: super::Config,
+    finished_regularly: core::cell::Cell<bool>,
+}
+
+impl SpiFuture {
+    fn new_for_read(spi: &mut Spi, spi_id: SpiId, words: &mut [u8]) -> Self {
+        if words.is_empty() {
+            panic!("words length unexpectedly 0");
+        }
+        let idx = spi_id as usize;
+        DONE[idx].store(false, core::sync::atomic::Ordering::Relaxed);
+        spi.inner.disable_interrupts();
+
+        let write_idx = core::cmp::min(super::FIFO_DEPTH, words.len());
+        // Send dummy bytes.
+        (0..write_idx).for_each(|_| {
+            spi.inner.write_fifo_unchecked(0);
+        });
+
+        Self::set_triggers(spi, write_idx, words.len());
+        // We assume that the slave select configuration was already performed, but we take
+        // care of issuing a start if necessary.
+        spi.issue_manual_start_for_manual_cfg();
+
+        critical_section::with(|cs| {
+            let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+            let mut context = context_ref.borrow_mut();
+            context.transfer_type = Some(TransferType::Read);
+            unsafe {
+                context.rx_slice.set(words);
+            }
+            context.tx_slice.set_null();
+            context.tx_progress = write_idx;
+            context.rx_progress = 0;
+            spi.inner.clear_interrupts();
+            spi.inner.enable_interrupts();
+            spi.inner.enable();
+        });
+        Self {
+            id: spi_id,
+            config: spi.config,
+            spi: unsafe { spi.inner.clone() },
+            finished_regularly: core::cell::Cell::new(false),
+        }
+    }
+
+    fn new_for_write(spi: &mut Spi, spi_id: SpiId, words: &[u8]) -> Self {
+        if words.is_empty() {
+            panic!("words length unexpectedly 0");
+        }
+        let (idx, write_idx) = Self::generic_init_transfer(spi, spi_id, words);
+        critical_section::with(|cs| {
+            let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+            let mut context = context_ref.borrow_mut();
+            context.transfer_type = Some(TransferType::Write);
+            unsafe {
+                context.tx_slice.set(words);
+            }
+            context.rx_slice.set_null();
+            context.tx_progress = write_idx;
+            context.rx_progress = 0;
+            spi.inner.clear_interrupts();
+            spi.inner.enable_interrupts();
+            spi.inner.enable();
+        });
+        Self {
+            id: spi_id,
+            config: spi.config,
+            spi: unsafe { spi.inner.clone() },
+            finished_regularly: core::cell::Cell::new(false),
+        }
+    }
+
+    fn new_for_transfer(spi: &mut Spi, spi_id: SpiId, read: &mut [u8], write: &[u8]) -> Self {
+        if read.is_empty() || write.is_empty() {
+            panic!("read or write buffer unexpectedly empty");
+        }
+        let (idx, write_idx) = Self::generic_init_transfer(spi, spi_id, write);
+        critical_section::with(|cs| {
+            let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+            let mut context = context_ref.borrow_mut();
+            context.transfer_type = Some(TransferType::Transfer);
+            unsafe {
+                context.tx_slice.set(write);
+                context.rx_slice.set(read);
+            }
+            context.tx_progress = write_idx;
+            context.rx_progress = 0;
+            spi.inner.clear_interrupts();
+            spi.inner.enable_interrupts();
+            spi.inner.enable();
+        });
+        Self {
+            id: spi_id,
+            config: spi.config,
+            spi: unsafe { spi.inner.clone() },
+            finished_regularly: core::cell::Cell::new(false),
+        }
+    }
+
+    fn new_for_transfer_in_place(spi: &mut Spi, spi_id: SpiId, words: &mut [u8]) -> Self {
+        if words.is_empty() {
+            panic!("read and write buffer unexpectedly empty");
+        }
+        let (idx, write_idx) = Self::generic_init_transfer(spi, spi_id, words);
+        critical_section::with(|cs| {
+            let context_ref = TRANSFER_CONTEXTS[idx].borrow(cs);
+            let mut context = context_ref.borrow_mut();
+            context.transfer_type = Some(TransferType::TransferInPlace);
+            unsafe {
+                context.rx_slice.set(words);
+            }
+            context.tx_slice.set_null();
+            context.tx_progress = write_idx;
+            context.rx_progress = 0;
+            spi.inner.clear_interrupts();
+            spi.inner.enable_interrupts();
+            spi.inner.enable();
+        });
+        Self {
+            id: spi_id,
+            config: spi.config,
+            spi: unsafe { spi.inner.clone() },
+            finished_regularly: core::cell::Cell::new(false),
+        }
+    }
+
+    fn generic_init_transfer(spi: &mut Spi, spi_id: SpiId, write: &[u8]) -> (usize, usize) {
+        let idx = spi_id as usize;
+        DONE[idx].store(false, core::sync::atomic::Ordering::Relaxed);
+        spi.inner.disable();
+        spi.inner.disable_interrupts();
+
+        let write_idx = core::cmp::min(super::FIFO_DEPTH, write.len());
+        (0..write_idx).for_each(|idx| {
+            spi.inner.write_fifo_unchecked(write[idx]);
+        });
+
+        Self::set_triggers(spi, write_idx, write.len());
+        // We assume that the slave select configuration was already performed, but we take
+        // care of issuing a start if necessary.
+        spi.issue_manual_start_for_manual_cfg();
+        (idx, write_idx)
+    }
+
+    fn set_triggers(spi: &mut Spi, write_idx: usize, write_len: usize) {
+        // This should never fail because it is never larger than the FIFO depth.
+        spi.inner.set_rx_fifo_trigger(write_idx as u32).unwrap();
+        // We want to re-fill the TX FIFO before it is completely empty if the full transfer size
+        // is larger than the FIFO depth. I am not sure whether the default value of 1 ensures
+        // this because the TMR says that this interrupt is triggered when the FIFO has less than
+        // threshold entries.
+        if write_len > super::FIFO_DEPTH {
+            spi.inner.set_tx_fifo_trigger(2).unwrap();
+        }
+    }
+}
+
+impl Future for SpiFuture {
+    type Output = ();
+
+    fn poll(
+        self: core::pin::Pin<&mut Self>,
+        cx: &mut core::task::Context<'_>,
+    ) -> core::task::Poll<Self::Output> {
+        WAKERS[self.id as usize].register(cx.waker());
+        if DONE[self.id as usize].swap(false, core::sync::atomic::Ordering::Relaxed) {
+            critical_section::with(|cs| {
+                let mut ctx = TRANSFER_CONTEXTS[self.id as usize].borrow(cs).borrow_mut();
+                *ctx = TransferContext::default();
+            });
+            self.finished_regularly.set(true);
+            return core::task::Poll::Ready(());
+        }
+        core::task::Poll::Pending
+    }
+}
+
+impl Drop for SpiFuture {
+    fn drop(&mut self) {
+        if !self.finished_regularly.get() {
+            self.spi.reset_and_reconfigure(self.config);
+        }
+    }
+}
+
+/// Asynchronous SPI driver.
+///
+/// This is the primary data structure used to perform non-blocking SPI operations.
+/// It implements the [embedded_hal_async::spi::SpiBus] as well.
+pub struct SpiAsync(pub Spi);
+
+impl SpiAsync {
+    pub fn new(spi: Spi) -> Self {
+        Self(spi)
+    }
+
+    async fn read(&mut self, words: &mut [u8]) {
+        if words.is_empty() {
+            return;
+        }
+        let id = self.0.inner.id;
+        let spi_fut = SpiFuture::new_for_read(&mut self.0, id, words);
+        spi_fut.await;
+    }
+
+    async fn write(&mut self, words: &[u8]) {
+        if words.is_empty() {
+            return;
+        }
+        let id = self.0.inner.id;
+        let spi_fut = SpiFuture::new_for_write(&mut self.0, id, words);
+        spi_fut.await;
+    }
+
+    async fn transfer(&mut self, read: &mut [u8], write: &[u8]) {
+        if read.is_empty() || write.is_empty() {
+            return;
+        }
+        let id = self.0.inner.id;
+        let spi_fut = SpiFuture::new_for_transfer(&mut self.0, id, read, write);
+        spi_fut.await;
+    }
+
+    async fn transfer_in_place(&mut self, words: &mut [u8]) {
+        if words.is_empty() {
+            return;
+        }
+        let id = self.0.inner.id;
+        let spi_fut = SpiFuture::new_for_transfer_in_place(&mut self.0, id, words);
+        spi_fut.await;
+    }
+}
+
+impl embedded_hal_async::spi::ErrorType for SpiAsync {
+    type Error = Infallible;
+}
+
+impl embedded_hal_async::spi::SpiBus for SpiAsync {
+    async fn read(&mut self, words: &mut [u8]) -> Result<(), Self::Error> {
+        self.read(words).await;
+        Ok(())
+    }
+
+    async fn write(&mut self, words: &[u8]) -> Result<(), Self::Error> {
+        self.write(words).await;
+        Ok(())
+    }
+
+    async fn transfer(&mut self, read: &mut [u8], write: &[u8]) -> Result<(), Self::Error> {
+        self.transfer(read, write).await;
+        Ok(())
+    }
+
+    async fn transfer_in_place(&mut self, words: &mut [u8]) -> Result<(), Self::Error> {
+        self.transfer_in_place(words).await;
+        Ok(())
+    }
+
+    async fn flush(&mut self) -> Result<(), Self::Error> {
+        Ok(())
+    }
+}
+
+/// This structure is a wrapper for [SpiAsync] which implements the
+/// [embedded_hal_async::spi::SpiDevice] trait as well.
+pub struct SpiWithHwCsAsync<Delay: embedded_hal_async::delay::DelayNs> {
+    pub spi: SpiAsync,
+    pub cs: ChipSelect,
+    pub delay: Delay,
+}
+
+impl<Delay: embedded_hal_async::delay::DelayNs> SpiWithHwCsAsync<Delay> {
+    pub fn new(spi: SpiAsync, cs: ChipSelect, delay: Delay) -> Self {
+        Self { spi, cs, delay }
+    }
+
+    pub fn release(self) -> SpiAsync {
+        self.spi
+    }
+}
+
+impl<Delay: embedded_hal_async::delay::DelayNs> embedded_hal_async::spi::ErrorType
+    for SpiWithHwCsAsync<Delay>
+{
+    type Error = Infallible;
+}
+
+impl<Delay: embedded_hal_async::delay::DelayNs> embedded_hal_async::spi::SpiDevice
+    for SpiWithHwCsAsync<Delay>
+{
+    async fn transaction(
+        &mut self,
+        operations: &mut [embedded_hal::spi::Operation<'_, u8>],
+    ) -> Result<(), Self::Error> {
+        self.spi.0.inner.select_hw_cs(self.cs);
+        for op in operations {
+            match op {
+                embedded_hal::spi::Operation::Read(items) => {
+                    self.spi.read(items).await;
+                }
+                embedded_hal::spi::Operation::Write(items) => {
+                    self.spi.write(items).await;
+                }
+                embedded_hal::spi::Operation::Transfer(read, write) => {
+                    self.spi.transfer(read, write).await;
+                }
+                embedded_hal::spi::Operation::TransferInPlace(items) => {
+                    self.spi.transfer_in_place(items).await;
+                }
+                embedded_hal::spi::Operation::DelayNs(delay) => {
+                    self.delay.delay_ns(*delay).await;
+                }
+            }
+        }
+        self.spi.flush().await?;
+        self.spi.0.inner.no_hw_cs();
+        Ok(())
+    }
+}
diff --git a/zynq7000-hal/src/spi.rs b/zynq7000-hal/src/spi/mod.rs
similarity index 80%
rename from zynq7000-hal/src/spi.rs
rename to zynq7000-hal/src/spi/mod.rs
index 7ea7781..4376b93 100644
--- a/zynq7000-hal/src/spi.rs
+++ b/zynq7000-hal/src/spi/mod.rs
@@ -1,3 +1,4 @@
+//! PS SPI HAL driver.
 use core::convert::Infallible;
 
 use crate::clocks::Clocks;
@@ -19,16 +20,20 @@ pub use embedded_hal::spi::Mode;
 use embedded_hal::spi::{MODE_0, MODE_1, MODE_2, MODE_3, SpiBus as _};
 use zynq7000::slcr::reset::DualRefAndClockReset;
 use zynq7000::spi::{
-    BaudDivSelect, DelayControl, FifoWrite, MmioSpi, SPI_0_BASE_ADDR, SPI_1_BASE_ADDR,
+    BaudDivSelect, DelayControl, FifoWrite, InterruptControl, InterruptMask, InterruptStatus,
+    MmioSpi, SPI_0_BASE_ADDR, SPI_1_BASE_ADDR,
 };
 
 pub const FIFO_DEPTH: usize = 128;
 pub const MODULE_ID: u32 = 0x90106;
 
+pub mod asynch;
+pub use asynch::*;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum SpiId {
-    Spi0,
-    Spi1,
+    Spi0 = 0,
+    Spi1 = 1,
 }
 
 pub trait PsSpi {
@@ -381,6 +386,7 @@ pub enum SlaveSelectConfig {
     AutoWithAutoStart = 0b00,
 }
 
+#[derive(Debug, Copy, Clone)]
 pub struct Config {
     baud_div: BaudDivSelect,
     init_mode: Mode,
@@ -403,9 +409,257 @@ impl Config {
     }
 }
 
+/// Thin re-usable low-level helper to interface with the SPI.
+pub struct SpiLowLevel {
+    id: SpiId,
+    regs: zynq7000::spi::MmioSpi<'static>,
+}
+
+impl SpiLowLevel {
+    /// Steal the SPI low level helper.
+    ///
+    /// # Safety
+    ///
+    /// This API can be used to potentially create a driver to the same peripheral structure
+    /// from multiple threads. The user must ensure that concurrent accesses are safe and do not
+    /// interfere with each other.
+    pub unsafe fn steal(id: SpiId) -> Self {
+        let regs = unsafe {
+            match id {
+                SpiId::Spi0 => zynq7000::spi::Spi::new_mmio_fixed_0(),
+                SpiId::Spi1 => zynq7000::spi::Spi::new_mmio_fixed_1(),
+            }
+        };
+        Self { id, regs }
+    }
+
+    /// Clone the SPI low level helper.
+    ///
+    /// # Safety
+    ///
+    /// This API can be used to potentially create a driver to the same peripheral structure
+    /// from multiple threads. The user must ensure that concurrent accesses are safe and do not
+    /// interfere with each other.
+    pub unsafe fn clone(&self) -> Self {
+        Self {
+            id: self.id,
+            regs: unsafe { self.regs.clone() },
+        }
+    }
+
+    pub fn id(&self) -> SpiId {
+        self.id
+    }
+
+    #[inline]
+    pub fn disable(&mut self) {
+        self.regs.write_enable(0);
+    }
+
+    #[inline]
+    pub fn enable(&mut self) {
+        self.regs.write_enable(1);
+    }
+
+    /// Select the peripheral chip select line.
+    ///
+    /// This needs to be done before starting a transfer to select the correct peripheral chip
+    /// select line.
+    ///
+    /// The decoder bits logic is is based
+    /// [on online documentation](https://www.realdigital.org/doc/3eb4f7a05e5003f2e0e6858a27a679bb?utm_source=chatgpt.com)
+    /// because the TRM does not specify how decoding really works. This also only works if
+    /// the external decoding was enabled via the [Config::enable_external_decoding] option.
+    #[inline]
+    pub fn select_hw_cs(&mut self, chip_select: ChipSelect) {
+        self.regs.modify_cr(|mut val| {
+            val.set_cs_raw(chip_select.raw_reg());
+            val
+        });
+    }
+
+    /// Re-configures the mode register.
+    #[inline]
+    pub fn configure_mode(&mut self, mode: Mode) {
+        let (cpol, cpha) = match mode {
+            MODE_0 => (false, false),
+            MODE_1 => (false, true),
+            MODE_2 => (true, false),
+            MODE_3 => (true, true),
+        };
+        self.regs.modify_cr(|mut val| {
+            val.set_cpha(cpha);
+            val.set_cpha(cpol);
+            val
+        });
+    }
+
+    /// Re-configures the delay control register.
+    #[inline]
+    pub fn configure_delays(&mut self, config: DelayControl) {
+        self.regs.write_delay_control(config)
+    }
+
+    /// Re-configures the SPI peripheral with the initial configuration.
+    pub fn reconfigure(&mut self, config: Config) {
+        self.regs.write_enable(0);
+        let (man_ss, man_start) = match config.ss_config {
+            SlaveSelectConfig::ManualWithManualStart => (true, true),
+            SlaveSelectConfig::ManualAutoStart => (true, false),
+            SlaveSelectConfig::AutoWithManualStart => (false, true),
+            SlaveSelectConfig::AutoWithAutoStart => (false, false),
+        };
+        let (cpol, cpha) = match config.init_mode {
+            MODE_0 => (false, false),
+            MODE_1 => (false, true),
+            MODE_2 => (true, false),
+            MODE_3 => (true, true),
+        };
+
+        self.regs.write_cr(
+            zynq7000::spi::Config::builder()
+                .with_modefail_gen_en(false)
+                .with_manual_start(false)
+                .with_manual_start_enable(man_start)
+                .with_manual_cs(man_ss)
+                .with_cs_raw(ChipSelect::None.raw_reg())
+                .with_peri_sel(config.with_ext_decoding)
+                .with_baud_rate_div(config.baud_div)
+                .with_cpha(cpha)
+                .with_cpol(cpol)
+                .with_master_ern(true)
+                .build(),
+        );
+        // Configures for polling mode by default: TX trigger by one will lead to the
+        // TX FIFO not full signal being set when the TX FIFO is emtpy.
+        self.regs.write_tx_trig(1);
+        // Same as TX.
+        self.regs.write_rx_trig(1);
+
+        self.regs.write_enable(1);
+    }
+
+    /// [Resets][reset] and [re-configures][Self::reconfigure] the SPI peripheral.
+    ///
+    /// This can also be used to reset the FIFOs and the state machine of the SPI.
+    pub fn reset_and_reconfigure(&mut self, config: Config) {
+        reset(self.id());
+        self.reconfigure(config);
+    }
+
+    /// No peripheral slave selection.
+    #[inline]
+    pub fn no_hw_cs(&mut self) {
+        self.select_hw_cs(ChipSelect::None);
+    }
+
+    #[inline(always)]
+    pub fn write_fifo_unchecked(&mut self, data: u8) {
+        self.regs
+            .write_txd(FifoWrite::new_with_raw_value(data as u32));
+    }
+
+    #[inline(always)]
+    pub fn read_fifo_unchecked(&mut self) -> u8 {
+        self.regs.read_rxd().data()
+    }
+
+    #[inline]
+    pub fn issue_manual_start(&mut self) {
+        self.regs.modify_cr(|mut val| {
+            val.set_manual_start(true);
+            val
+        });
+    }
+
+    #[inline]
+    pub fn read_isr(&self) -> InterruptStatus {
+        self.regs.read_isr()
+    }
+
+    #[inline]
+    pub fn read_imr(&self) -> InterruptMask {
+        self.regs.read_imr()
+    }
+
+    #[inline]
+    pub fn read_rx_not_empty_threshold(&self) -> u32 {
+        self.regs.read_rx_trig()
+    }
+
+    #[inline]
+    pub fn set_rx_fifo_trigger(&mut self, trigger: u32) -> Result<(), InvalidTriggerError> {
+        if trigger > FIFO_DEPTH as u32 {
+            return Err(InvalidTriggerError(trigger as usize));
+        }
+        self.regs.write_rx_trig(trigger.value());
+        Ok(())
+    }
+
+    #[inline]
+    pub fn set_tx_fifo_trigger(&mut self, trigger: u32) -> Result<(), InvalidTriggerError> {
+        if trigger > FIFO_DEPTH as u32 {
+            return Err(InvalidTriggerError(trigger as usize));
+        }
+        self.regs.write_tx_trig(trigger.value());
+        Ok(())
+    }
+
+    /// This disables all interrupts relevant for non-blocking interrupt driven SPI operation
+    /// in SPI master mode.
+    #[inline]
+    pub fn disable_interrupts(&mut self) {
+        self.regs.write_idr(
+            InterruptControl::builder()
+                .with_tx_underflow(true)
+                .with_rx_full(true)
+                .with_rx_not_empty(true)
+                .with_tx_full(false)
+                .with_tx_trig(true)
+                .with_mode_fault(false)
+                .with_rx_ovr(true)
+                .build(),
+        );
+    }
+
+    /// This enables all interrupts relevant for non-blocking interrupt driven SPI operation
+    /// in SPI master mode.
+    #[inline]
+    pub fn enable_interrupts(&mut self) {
+        self.regs.write_ier(
+            InterruptControl::builder()
+                .with_tx_underflow(true)
+                .with_rx_full(true)
+                .with_rx_not_empty(true)
+                .with_tx_full(false)
+                .with_tx_trig(true)
+                .with_mode_fault(false)
+                .with_rx_ovr(true)
+                .build(),
+        );
+    }
+
+    /// This clears all interrupts relevant for non-blocking interrupt driven SPI operation
+    /// in SPI master mode.
+    #[inline]
+    pub fn clear_interrupts(&mut self) {
+        self.regs.write_isr(
+            InterruptStatus::builder()
+                .with_tx_underflow(true)
+                .with_rx_full(true)
+                .with_rx_not_empty(true)
+                .with_tx_full(false)
+                .with_tx_not_full(true)
+                .with_mode_fault(false)
+                .with_rx_ovr(true)
+                .build(),
+        );
+    }
+}
+
 /// Blocking Driver for the PS SPI peripheral in master mode.
 pub struct Spi {
-    regs: zynq7000::spi::MmioSpi<'static>,
+    inner: SpiLowLevel,
     sclk: Hertz,
     config: Config,
     outstanding_rx: bool,
@@ -415,6 +669,10 @@ pub struct Spi {
 #[error("invalid SPI ID")]
 pub struct InvalidPsSpiError;
 
+#[derive(Debug, thiserror::Error)]
+#[error("invalid trigger value {0}")]
+pub struct InvalidTriggerError(pub usize);
+
 // TODO: Add and handle MUX config check.
 #[derive(Debug, thiserror::Error)]
 pub enum SpiConstructionError {
@@ -593,7 +851,7 @@ impl Spi {
 
     pub fn new_generic_unchecked(
         id: SpiId,
-        mut regs: MmioSpi<'static>,
+        regs: MmioSpi<'static>,
         clocks: &IoClocks,
         config: Config,
     ) -> Self {
@@ -602,48 +860,36 @@ impl Spi {
             SpiId::Spi1 => crate::PeripheralSelect::Spi1,
         };
         enable_amba_peripheral_clock(periph_sel);
-        reset(id);
-        regs.write_enable(0);
-        let (man_ss, man_start) = match config.ss_config {
-            SlaveSelectConfig::ManualWithManualStart => (true, true),
-            SlaveSelectConfig::ManualAutoStart => (true, false),
-            SlaveSelectConfig::AutoWithManualStart => (false, true),
-            SlaveSelectConfig::AutoWithAutoStart => (false, false),
-        };
-        let (cpol, cpha) = match config.init_mode {
-            MODE_0 => (false, false),
-            MODE_1 => (false, true),
-            MODE_2 => (true, false),
-            MODE_3 => (true, true),
-        };
-
-        regs.write_cr(
-            zynq7000::spi::Config::builder()
-                .with_modefail_gen_en(false)
-                .with_manual_start(false)
-                .with_manual_start_enable(man_start)
-                .with_manual_cs(man_ss)
-                .with_cs_raw(ChipSelect::None.raw_reg())
-                .with_peri_sel(config.with_ext_decoding)
-                .with_baud_rate_div(config.baud_div)
-                .with_cpha(cpha)
-                .with_cpol(cpol)
-                .with_master_ern(true)
-                .build(),
-        );
-        // Configures for polling mode by default: TX trigger by one will lead to the
-        // TX FIFO not full signal being set when the TX FIFO is emtpy.
-        regs.write_tx_trig(1);
-        // Same as TX.
-        regs.write_rx_trig(1);
-
-        regs.write_enable(1);
         let sclk = clocks.spi_clk() / config.baud_div.div_value() as u32;
-        Self {
-            regs,
+        let mut spi = Self {
+            inner: SpiLowLevel { regs, id },
             sclk,
             config,
             outstanding_rx: false,
+        };
+        spi.reset_and_reconfigure();
+        spi
+    }
+
+    /// Re-configures the SPI peripheral with the initial configuration.
+    pub fn reconfigure(&mut self) {
+        self.inner.reconfigure(self.config);
+    }
+
+    /// [Resets][reset] and [re-configures][Self::reconfigure] the SPI peripheral.
+    ///
+    /// This can also be used to reset the FIFOs and the state machine of the SPI.
+    pub fn reset_and_reconfigure(&mut self) {
+        reset(self.inner.id());
+        self.reconfigure();
+    }
+
+    #[inline]
+    pub fn issue_manual_start_for_manual_cfg(&mut self) {
+        if self.config.ss_config == SlaveSelectConfig::AutoWithManualStart
+            || self.config.ss_config == SlaveSelectConfig::ManualWithManualStart
+        {
+            self.inner.issue_manual_start();
         }
     }
 
@@ -653,88 +899,21 @@ impl Spi {
         self.sclk
     }
 
+    /// Retrieve inner low-level helper.
     #[inline]
-    pub fn regs(&mut self) -> &mut MmioSpi<'static> {
-        &mut self.regs
-    }
-
-    /// Select the peripheral chip select line.
-    ///
-    /// This needs to be done before starting a transfer to select the correct peripheral chip
-    /// select line.
-    ///
-    /// The decoder bits logic is is based
-    /// [on online documentation](https://www.realdigital.org/doc/3eb4f7a05e5003f2e0e6858a27a679bb?utm_source=chatgpt.com)
-    /// because the TRM does not specify how decoding really works. This also only works if
-    /// the external decoding was enabled via the [Config::enable_external_decoding] option.
-    #[inline]
-    pub fn select_hw_cs(&mut self, chip_select: ChipSelect) {
-        self.regs.modify_cr(|mut val| {
-            val.set_cs_raw(chip_select.raw_reg());
-            val
-        });
-    }
-
-    /// Re-configures the mode register.
-    #[inline]
-    pub fn configure_mode(&mut self, mode: Mode) {
-        let (cpol, cpha) = match mode {
-            MODE_0 => (false, false),
-            MODE_1 => (false, true),
-            MODE_2 => (true, false),
-            MODE_3 => (true, true),
-        };
-        self.regs.modify_cr(|mut val| {
-            val.set_cpha(cpha);
-            val.set_cpha(cpol);
-            val
-        });
-    }
-
-    /// Re-configures the delay control register.
-    #[inline]
-    pub fn configure_delays(&mut self, config: DelayControl) {
-        self.regs.write_delay_control(config)
-    }
-
-    /// No peripheral slave selection.
-    #[inline]
-    pub fn no_hw_cs(&mut self) {
-        self.select_hw_cs(ChipSelect::None);
-    }
-
-    #[inline(always)]
-    pub fn write_fifo_unchecked(&mut self, data: u8) {
-        self.regs
-            .write_txd(FifoWrite::new_with_raw_value(data as u32));
-    }
-
-    #[inline(always)]
-    pub fn read_fifo_unchecked(&mut self) -> u8 {
-        self.regs.read_rxd().data()
+    pub const fn inner(&mut self) -> &mut SpiLowLevel {
+        &mut self.inner
     }
 
     #[inline]
-    pub fn issue_manual_start(&mut self) {
-        self.regs.modify_cr(|mut val| {
-            val.set_manual_start(true);
-            val
-        });
-    }
-
-    #[inline]
-    pub fn issue_manual_start_for_manual_cfg(&mut self) {
-        if self.config.ss_config == SlaveSelectConfig::AutoWithManualStart
-            || self.config.ss_config == SlaveSelectConfig::ManualWithManualStart
-        {
-            self.issue_manual_start();
-        }
+    pub fn regs(&mut self) -> &mut MmioSpi<'static> {
+        &mut self.inner.regs
     }
 
     fn initial_fifo_fill(&mut self, words: &[u8]) -> usize {
         let write_len = core::cmp::min(FIFO_DEPTH, words.len());
         (0..write_len).for_each(|idx| {
-            self.write_fifo_unchecked(words[idx]);
+            self.inner.write_fifo_unchecked(words[idx]);
         });
         write_len
     }
@@ -744,7 +923,7 @@ impl Spi {
         // implementation for now.
         self.flush().unwrap();
         // Write this to 1 in any case to allow polling, defensive programming.
-        self.regs.write_rx_trig(1);
+        self.inner.regs.write_rx_trig(1);
 
         // Fill the FIFO with initial data.
         let written = self.initial_fifo_fill(words);
@@ -769,12 +948,12 @@ impl embedded_hal::spi::SpiBus for Spi {
         // implementation for now.
         self.flush()?;
         // Write this to 1 in any case to allow polling, defensive programming.
-        self.regs.write_rx_trig(1);
+        self.regs().write_rx_trig(1);
 
         let mut write_idx = core::cmp::min(FIFO_DEPTH, words.len());
         // Send dummy bytes.
         (0..write_idx).for_each(|_| {
-            self.write_fifo_unchecked(0);
+            self.inner.write_fifo_unchecked(0);
         });
 
         // We assume that the slave select configuration was already performed, but we take
@@ -783,14 +962,14 @@ impl embedded_hal::spi::SpiBus for Spi {
 
         let mut read_idx = 0;
         while read_idx < words.len() {
-            let status = self.regs.read_sr();
+            let status = self.regs().read_isr();
             if status.rx_not_empty() {
-                words[read_idx] = self.read_fifo_unchecked();
+                words[read_idx] = self.inner.read_fifo_unchecked();
                 read_idx += 1;
             }
             // Continue pumping the FIFO if necesary and possible.
             if write_idx < words.len() && !status.tx_full() {
-                self.write_fifo_unchecked(0);
+                self.inner.write_fifo_unchecked(0);
                 write_idx += 1;
             }
         }
@@ -806,21 +985,21 @@ impl embedded_hal::spi::SpiBus for Spi {
         let mut read_idx = 0;
 
         while written < words.len() {
-            let status = self.regs.read_sr();
+            let status = self.regs().read_isr();
             // We empty the FIFO to prevent it filling up completely, as long as we have to write
             // bytes
             if status.rx_not_empty() {
-                self.read_fifo_unchecked();
+                self.inner.read_fifo_unchecked();
                 read_idx += 1;
             }
             if !status.tx_full() {
-                self.write_fifo_unchecked(words[written]);
+                self.inner.write_fifo_unchecked(words[written]);
                 written += 1;
             }
         }
         // We exit once all bytes have been written, so some bytes to read might be outstanding.
         // We use the FIFO trigger mechanism to determine when we can read all the remaining bytes.
-        self.regs.write_rx_trig((words.len() - read_idx) as u32);
+        self.regs().write_rx_trig((words.len() - read_idx) as u32);
         self.outstanding_rx = true;
         Ok(())
     }
@@ -831,22 +1010,33 @@ impl embedded_hal::spi::SpiBus for Spi {
         }
         let mut write_idx = self.prepare_generic_blocking_transfer(write);
         let mut read_idx = 0;
+        let max_idx = core::cmp::max(write.len(), read.len());
 
-        let mut writes_finished = write_idx == write.len();
+        let mut writes_finished = write_idx == max_idx;
         let mut reads_finished = false;
         while !writes_finished || !reads_finished {
-            let status = self.regs.read_sr();
+            let status = self.regs().read_isr();
             if status.rx_not_empty() && !reads_finished {
-                read[read_idx] = self.read_fifo_unchecked();
+                if read_idx < read.len() {
+                    read[read_idx] = self.inner.read_fifo_unchecked();
+                } else {
+                    // Discard the data.
+                    self.inner.read_fifo_unchecked();
+                }
                 read_idx += 1;
             }
 
             if !status.tx_full() && !writes_finished {
-                self.write_fifo_unchecked(write[write_idx]);
+                if write_idx < write.len() {
+                    self.inner.write_fifo_unchecked(write[write_idx]);
+                } else {
+                    // Send dummy bytes.
+                    self.inner.write_fifo_unchecked(0);
+                }
                 write_idx += 1;
             }
-            writes_finished = write_idx == write.len();
-            reads_finished = read_idx == write.len();
+            writes_finished = write_idx == max_idx;
+            reads_finished = read_idx == max_idx;
         }
 
         Ok(())
@@ -862,14 +1052,14 @@ impl embedded_hal::spi::SpiBus for Spi {
         let mut writes_finished = write_idx == words.len();
         let mut reads_finished = false;
         while !writes_finished || !reads_finished {
-            let status = self.regs.read_sr();
+            let status = self.inner.read_isr();
             if status.rx_not_empty() && !reads_finished {
-                words[read_idx] = self.read_fifo_unchecked();
+                words[read_idx] = self.inner.read_fifo_unchecked();
                 read_idx += 1;
             }
 
             if !status.tx_full() && !writes_finished {
-                self.write_fifo_unchecked(words[write_idx]);
+                self.inner.write_fifo_unchecked(words[write_idx]);
                 write_idx += 1;
             }
             writes_finished = write_idx == words.len();
@@ -879,16 +1069,17 @@ impl embedded_hal::spi::SpiBus for Spi {
         Ok(())
     }
 
+    /// Blocking flush implementation.
     fn flush(&mut self) -> Result<(), Self::Error> {
         if !self.outstanding_rx {
             return Ok(());
         }
-        let rx_trig = self.regs.read_rx_trig();
-        while !self.regs.read_sr().rx_not_empty() {}
+        let rx_trig = self.inner.read_rx_not_empty_threshold();
+        while !self.inner.read_isr().rx_not_empty() {}
         (0..rx_trig).for_each(|_| {
-            self.regs.read_rxd();
+            self.inner.read_fifo_unchecked();
         });
-        self.regs.write_rx_trig(1);
+        self.inner.set_rx_fifo_trigger(1).unwrap();
         self.outstanding_rx = false;
         Ok(())
     }
@@ -919,7 +1110,7 @@ impl<Delay: DelayNs> embedded_hal::spi::SpiDevice for SpiWithHwCs<Delay> {
         &mut self,
         operations: &mut [embedded_hal::spi::Operation<'_, u8>],
     ) -> Result<(), Self::Error> {
-        self.spi.select_hw_cs(self.cs);
+        self.spi.inner.select_hw_cs(self.cs);
         for op in operations {
             match op {
                 embedded_hal::spi::Operation::Read(items) => {
@@ -940,7 +1131,7 @@ impl<Delay: DelayNs> embedded_hal::spi::SpiDevice for SpiWithHwCs<Delay> {
             }
         }
         self.spi.flush()?;
-        self.spi.no_hw_cs();
+        self.spi.inner.no_hw_cs();
         Ok(())
     }
 }
diff --git a/zynq7000-hal/src/uart/tx_async.rs b/zynq7000-hal/src/uart/tx_async.rs
index 9c0f970..f349e71 100644
--- a/zynq7000-hal/src/uart/tx_async.rs
+++ b/zynq7000-hal/src/uart/tx_async.rs
@@ -6,6 +6,14 @@ use raw_slice::RawBufSlice;
 
 use crate::uart::{FIFO_DEPTH, Tx, UartId};
 
+#[derive(Debug)]
+pub enum TransferType {
+    Read,
+    Write,
+    Transfer,
+    TransferInPlace,
+}
+
 static UART_TX_WAKERS: [AtomicWaker; 2] = [const { AtomicWaker::new() }; 2];
 static TX_CONTEXTS: [Mutex<RefCell<TxContext>>; 2] =
     [const { Mutex::new(RefCell::new(TxContext::new())) }; 2];
@@ -49,8 +57,8 @@ pub fn on_interrupt_tx(peripheral: UartId) {
         // Transfer is done.
         TX_DONE[idx].store(true, core::sync::atomic::Ordering::Relaxed);
         tx_with_irq.disable_interrupts();
-        UART_TX_WAKERS[idx].wake();
         tx_with_irq.clear_interrupts();
+        UART_TX_WAKERS[idx].wake();
         return;
     }
     // Safety: We documented that the user provided slice must outlive the future, so we convert
@@ -94,7 +102,7 @@ impl TxContext {
 }
 
 pub struct TxFuture {
-    uart_idx: UartId,
+    id: UartId,
 }
 
 impl TxFuture {
@@ -124,7 +132,7 @@ impl TxFuture {
         tx_with_irq.enable_interrupts();
 
         Self {
-            uart_idx: tx_with_irq.uart_idx(),
+            id: tx_with_irq.uart_idx(),
         }
     }
 }
@@ -136,10 +144,10 @@ impl Future for TxFuture {
         self: core::pin::Pin<&mut Self>,
         cx: &mut core::task::Context<'_>,
     ) -> core::task::Poll<Self::Output> {
-        UART_TX_WAKERS[self.uart_idx as usize].register(cx.waker());
-        if TX_DONE[self.uart_idx as usize].swap(false, core::sync::atomic::Ordering::Relaxed) {
+        UART_TX_WAKERS[self.id as usize].register(cx.waker());
+        if TX_DONE[self.id as usize].swap(false, core::sync::atomic::Ordering::Relaxed) {
             let progress = critical_section::with(|cs| {
-                let mut ctx = TX_CONTEXTS[self.uart_idx as usize].borrow(cs).borrow_mut();
+                let mut ctx = TX_CONTEXTS[self.id as usize].borrow(cs).borrow_mut();
                 ctx.slice.set_null();
                 ctx.progress
             });
@@ -151,7 +159,7 @@ impl Future for TxFuture {
 
 impl Drop for TxFuture {
     fn drop(&mut self) {
-        let mut tx = unsafe { Tx::steal(self.uart_idx) };
+        let mut tx = unsafe { Tx::steal(self.id) };
         tx.disable_interrupts();
     }
 }
diff --git a/zynq7000/src/spi.rs b/zynq7000/src/spi.rs
index 74087fa..bca9381 100644
--- a/zynq7000/src/spi.rs
+++ b/zynq7000/src/spi.rs
@@ -65,9 +65,9 @@ pub struct Config {
     master_ern: bool,
 }
 
-#[bitbybit::bitfield(u32)]
+#[bitbybit::bitfield(u32, default = 0x0)]
 #[derive(Debug)]
-pub struct Status {
+pub struct InterruptStatus {
     #[bit(6, rw)]
     tx_underflow: bool,
     #[bit(5, rw)]
@@ -77,7 +77,7 @@ pub struct Status {
     #[bit(3, rw)]
     tx_full: bool,
     #[bit(2, rw)]
-    tx_trig: bool,
+    tx_not_full: bool,
     #[bit(1, rw)]
     mode_fault: bool,
     /// Receiver overflow interrupt.
@@ -85,9 +85,9 @@ pub struct Status {
     rx_ovr: bool,
 }
 
-#[bitbybit::bitfield(u32)]
+#[bitbybit::bitfield(u32, default = 0x0)]
 #[derive(Debug)]
-pub struct InterruptRegWriteOnly {
+pub struct InterruptControl {
     #[bit(6, w)]
     tx_underflow: bool,
     #[bit(5, w)]
@@ -107,7 +107,7 @@ pub struct InterruptRegWriteOnly {
 
 #[bitbybit::bitfield(u32)]
 #[derive(Debug)]
-pub struct InterruptRegReadOnly {
+pub struct InterruptMask {
     #[bit(6, r)]
     tx_underflow: bool,
     #[bit(5, r)]
@@ -162,16 +162,17 @@ pub struct DelayControl {
 #[repr(C)]
 pub struct Spi {
     cr: Config,
-    sr: Status,
+    #[mmio(PureRead, Write)]
+    isr: InterruptStatus,
     /// Interrupt Enable Register.
     #[mmio(Write)]
-    ier: InterruptRegWriteOnly,
+    ier: InterruptControl,
     /// Interrupt Disable Register.
     #[mmio(Write)]
-    idr: InterruptRegWriteOnly,
+    idr: InterruptControl,
     /// Interrupt Mask Register.
     #[mmio(PureRead)]
-    imr: InterruptRegReadOnly,
+    imr: InterruptMask,
     enable: u32,
     delay_control: DelayControl,
     #[mmio(Write)]