First public commit

2022-10-12 05:33:16 +02:00
commit 5c94ec6a65
34 changed files with 5893 additions and 0 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -0,0 +1,124 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+# Uncomment before first release.
+#env:
+#  RUSTFLAGS: -Dwarnings
+
+jobs:
+  check:
+    name: Check
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        rust:
+          - stable
+          - 1.64.0
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+
+      - name: Install toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: ${{ matrix.rust }}
+          profile: minimal
+          override: true
+
+      - name: Run cargo check
+        uses: actions-rs/cargo@v1
+        with:
+          command: check
+          args: --benches
+
+  test:
+    name: Test suite
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+
+      - name: Install toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          profile: minimal
+          override: true
+
+      - name: Run cargo test
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --release
+  
+  loom-dry-run:
+    name: Loom dry run
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+
+      - name: Install toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          profile: minimal
+          override: true
+
+      - name: Dry-run cargo test (Loom)
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --no-run --tests
+        env:
+          RUSTFLAGS: --cfg asynchronix_loom
+
+  lints:
+    name: Lints
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+
+      - name: Install toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          profile: default
+          override: true
+
+      - name: Run cargo fmt
+        uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --all -- --check
+
+      - name: Run cargo clippy
+        uses: actions-rs/cargo@v1
+        with:
+          command: clippy
+
+  docs:
+    name: Docs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+
+      - name: Install toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          profile: minimal
+          override: true
+
+      - name: Run cargo doc
+        uses: actions-rs/cargo@v1
+        with:
+          command: doc
+          args: --no-deps --document-private-items
--- a/.github/workflows/loom.yml
+++ b/.github/workflows/loom.yml
@ -0,0 +1,34 @@
+name: Loom
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+    paths:
+      - 'asynchronix/src/runtime/executor/queue.rs'
+      - 'asynchronix/src/runtime/executor/queue/**'
+      - 'asynchronix/src/runtime/executor/task.rs'
+      - 'asynchronix/src/runtime/executor/task/**'
+
+jobs:
+  loom:
+    name: Loom
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v3
+
+      - name: Install toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          profile: minimal
+          override: true
+
+      - name: Run cargo test (Loom)
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --tests --release
+        env:
+          RUSTFLAGS: --cfg asynchronix_loom
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+target
+Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,2 @@
+[workspace]
+members = ["asynchronix"]
--- a/202
+++ b/202
@ -0,0 +1,202 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
--- a/22
+++ b/22
@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2022 Serge Barral
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,96 @@
+# Asynchronix
+
+A high-performance asynchronous computation framework for system simulation.
+
+## What is this?
+
+> **Warning**: this page is at the moment mostly addressed at interested
+> contributors, but resources for users will be added soon. 
+
+In a nutshell, Asynchronix is an effort to develop a framework for
+discrete-event system simulation, with a particular focus on cyberphysical
+systems. In this context, a system might be something as large as a spacecraft,
+or as small as a IoT device.
+
+Asynchronix draws from experience in the space industry but differs from
+existing tools in a number of respects, including:
+
+1) *open-source license*: it is distributed under the very permissive MIT and
+   Apache 2 licenses, with the intent to foster an ecosystem where models can be
+   easily exchanged without reliance on proprietary APIs,
+2) *developer-friendly technology*: Rust's support for algebraic types and its
+   powerful type system make it ideal for the "cyber" part in cyberphysical,
+   i.e. for modelling digital devices with state machines,
+3) *very fast*: by leveraging Rust's excellent support for multithreading and
+   async programming, simulation models can run efficiently in parallel with all
+   required synchronization being transparently handled by the simulator.
+
+
+## General design
+
+Asynchronix is an async compute framework for time-based discrete event
+simulation.
+
+From the perspective of simulation model implementers and users, it closely
+resembles a flow-based programming framework: a model is essentially an isolated
+entity with a fixed set of typed inputs and outputs, communicating with other
+models and with the scheduler through message passing. Unlike in conventional
+flow-based programming, however, request-response patterns are also possible.
+
+Under the hood, Asynchronix' implementation is based on async Rust and the actor
+model. All inputs are forwarded to a single "mailbox" (an async channel),
+preserving the relative order of arrival of input messages.
+
+Computations proceed at discrete times. When executed, models can post events
+for the future, i.e. request the delayed activation of an input. Whenever the
+computation at a given time completes, the scheduler selects the nearest future
+time at which one or several events are scheduled, thus triggering another set
+of computations.
+
+This computational process makes it difficult to use general-purposes runtimes
+such as Tokio, because the end of a set of computations is technically a
+deadlock: the computation completes when all model have nothing left to do and
+are blocked on an empty mailbox. Also, instead of managing a conventional
+reactor, the runtime manages a priority queue containing the posted events. For
+these reasons, it was necessary for Asynchronix to develop a fully custom
+runtime.
+
+Another crucial aspect of async compute is message-passing efficiency:
+oftentimes the processing of an input is a simple action, making inter-thread
+message-passing the bottleneck. This in turns calls for a very efficient
+channel implementation, heavily optimized for the case of starved receivers
+since models are most of the time waiting for an input to become available.
+
+
+## Current state
+
+The simulator is rapidly approaching MVP completion and has achieved 2 major
+milestones:
+
+* completion of an extremely fast asynchronous multi-threaded channel,
+  demonstrated in the [Tachyonix][tachyonix] project; this channel is the
+  backbone of the actor model,
+* completion of a custom `async` executor optimized for message-passing and
+  deadlock detection, which has demonstrated even better performance than Tokio
+  for message-passing; this executor is already in the main branch and can be
+  tested against other executors using the Tachyonix [benchmark].
+
+Before it becomes usable, however, further work is required to implement the
+priority queue, implement model inputs and outputs and adapt the channel.  
+
+[tachyonix]: https://github.com/asynchronics/tachyonix
+
+[benchmark]: https://github.com/asynchronics/tachyonix/tree/main/bench
+
+
+## License
+
+This software is licensed under the [Apache License, Version 2.0](LICENSE-APACHE) or the
+[MIT license](LICENSE-MIT), at your option.
+
+
+## Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted
+for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
+dual licensed as above, without any additional terms or conditions.
--- a/asynchronix/Cargo.toml
+++ b/asynchronix/Cargo.toml
@ -0,0 +1,33 @@
+[package]
+name = "asynchronix"
+authors = ["Serge Barral <serge.barral@asynchronics.com>"]
+version = "0.1.0"
+edition = "2021"
+rust-version = "1.64"
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/asynchronics/asynchronix"
+readme = "../README.md"
+description = """
+A high performance asychronous compute framework for system simulation.
+"""
+categories = ["simulation", "aerospace", "science"]
+keywords = ["simulation", "discrete-event", "systems", "cyberphysical", "real-time"]
+
+[features]
+# API-unstable public exports meant for external test/benchmarking; development only.
+dev-hooks = []
+# Logging of performance-related statistics; development only.
+dev-logs = []
+
+[dependencies]
+parking = "2.0"
+slab = "0.4"
+cache-padded = "1.1"
+num_cpus = "1.13"
+
+[target.'cfg(asynchronix_loom)'.dependencies]
+loom = "0.5"
+
+[dev-dependencies]
+futures-channel = "0.3"
+futures-util = "0.3"
--- a/asynchronix/src/dev_hooks.rs
+++ b/asynchronix/src/dev_hooks.rs
@ -0,0 +1,38 @@
+//! Unstable, unofficial public API meant for external benchmarking and testing.
+//!
+//! Not for production use!
+
+use std::future::Future;
+
+use crate::runtime::executor;
+
+/// A multi-threaded `async` executor.
+#[derive(Debug)]
+pub struct Executor(executor::Executor);
+
+impl Executor {
+    /// Creates an executor that runs futures on a thread pool.
+    ///
+    /// The maximum number of threads is set with the `pool_size` parameter.
+    pub fn new(pool_size: usize) -> Self {
+        Self(executor::Executor::new(pool_size))
+    }
+
+    /// Spawns a task which output will never be retrieved.
+    ///
+    /// This is mostly useful to avoid undue reference counting for futures that
+    /// return a `()` type.
+    pub fn spawn_and_forget<T>(&self, future: T)
+    where
+        T: Future + Send + 'static,
+        T::Output: Send + 'static,
+    {
+        self.0.spawn_and_forget(future);
+    }
+
+    /// Let the executor run, blocking until all futures have completed or until
+    /// the executor deadlocks.
+    pub fn run(&mut self) {
+        self.0.run();
+    }
+}
--- a/asynchronix/src/lib.rs
+++ b/asynchronix/src/lib.rs
@ -0,0 +1,11 @@
+//! Asynchronix: a high-performance asynchronous computation framework for
+//! system simulation.
+
+#![warn(missing_docs, missing_debug_implementations, unreachable_pub)]
+
+mod loom_exports;
+pub(crate) mod macros;
+pub mod runtime;
+
+#[cfg(feature = "dev-hooks")]
+pub mod dev_hooks;
--- a/asynchronix/src/loom_exports.rs
+++ b/asynchronix/src/loom_exports.rs
@ -0,0 +1,53 @@
+#[cfg(asynchronix_loom)]
+#[allow(unused_imports)]
+pub(crate) mod sync {
+    pub(crate) mod atomic {
+        pub(crate) use loom::sync::atomic::{fence, AtomicU32, AtomicU64, AtomicUsize, Ordering};
+    }
+}
+#[cfg(not(asynchronix_loom))]
+#[allow(unused_imports)]
+pub(crate) mod sync {
+    pub(crate) mod atomic {
+        pub(crate) use std::sync::atomic::{fence, AtomicU32, AtomicU64, AtomicUsize, Ordering};
+    }
+}
+
+#[cfg(asynchronix_loom)]
+pub(crate) mod cell {
+    pub(crate) use loom::cell::UnsafeCell;
+}
+#[cfg(not(asynchronix_loom))]
+pub(crate) mod cell {
+    #[derive(Debug)]
+    pub(crate) struct UnsafeCell<T>(std::cell::UnsafeCell<T>);
+
+    #[allow(dead_code)]
+    impl<T> UnsafeCell<T> {
+        #[inline(always)]
+        pub(crate) fn new(data: T) -> UnsafeCell<T> {
+            UnsafeCell(std::cell::UnsafeCell::new(data))
+        }
+        #[inline(always)]
+        pub(crate) fn with<R>(&self, f: impl FnOnce(*const T) -> R) -> R {
+            f(self.0.get())
+        }
+        #[inline(always)]
+        pub(crate) fn with_mut<R>(&self, f: impl FnOnce(*mut T) -> R) -> R {
+            f(self.0.get())
+        }
+    }
+}
+
+#[allow(unused_macros)]
+macro_rules! debug_or_loom_assert {
+    ($($arg:tt)*) => (if cfg!(any(debug_assertions, asynchronix_loom)) { assert!($($arg)*); })
+}
+#[allow(unused_macros)]
+macro_rules! debug_or_loom_assert_eq {
+    ($($arg:tt)*) => (if cfg!(any(debug_assertions, asynchronix_loom)) { assert_eq!($($arg)*); })
+}
+#[allow(unused_imports)]
+pub(crate) use debug_or_loom_assert;
+#[allow(unused_imports)]
+pub(crate) use debug_or_loom_assert_eq;
--- a/asynchronix/src/macros.rs
+++ b/asynchronix/src/macros.rs
@ -0,0 +1 @@
+pub(crate) mod scoped_local_key;
--- a/asynchronix/src/macros/scoped_local_key.rs
+++ b/asynchronix/src/macros/scoped_local_key.rs
@ -0,0 +1,182 @@
+use std::thread::LocalKey;
+
+use std::cell::Cell;
+use std::marker;
+use std::ptr;
+
+/// Declare a new thread-local storage scoped key of type `ScopedKey<T>`.
+///
+/// This is based on the `scoped-tls` crate, with slight modifications, such as
+/// the use of the newly available `const` qualifier for TLS.
+macro_rules! scoped_thread_local {
+    ($(#[$attrs:meta])* $vis:vis static $name:ident: $ty:ty) => (
+        $(#[$attrs])*
+        $vis static $name: $crate::macros::scoped_local_key::ScopedLocalKey<$ty>
+            = $crate::macros::scoped_local_key::ScopedLocalKey {
+                inner: {
+                    thread_local!(static FOO: ::std::cell::Cell<*const ()> = const {
+                        std::cell::Cell::new(::std::ptr::null())
+                    });
+                    &FOO
+                },
+                _marker: ::std::marker::PhantomData,
+            };
+    )
+}
+pub(crate) use scoped_thread_local;
+
+/// Type representing a thread local storage key corresponding to a reference
+/// to the type parameter `T`.
+pub(crate) struct ScopedLocalKey<T> {
+    pub(crate) inner: &'static LocalKey<Cell<*const ()>>,
+    pub(crate) _marker: marker::PhantomData<T>,
+}
+
+unsafe impl<T> Sync for ScopedLocalKey<T> {}
+
+impl<T> ScopedLocalKey<T> {
+    /// Inserts a value into this scoped thread local storage slot for the
+    /// duration of a closure.
+    pub(crate) fn set<F, R>(&'static self, t: &T, f: F) -> R
+    where
+        F: FnOnce() -> R,
+    {
+        struct Reset {
+            key: &'static LocalKey<Cell<*const ()>>,
+            val: *const (),
+        }
+
+        impl Drop for Reset {
+            fn drop(&mut self) {
+                self.key.with(|c| c.set(self.val));
+            }
+        }
+
+        let prev = self.inner.with(|c| {
+            let prev = c.get();
+            c.set(t as *const _ as *const ());
+            prev
+        });
+
+        let _reset = Reset {
+            key: self.inner,
+            val: prev,
+        };
+
+        f()
+    }
+
+    /// Removes the value from this scoped thread local storage slot for the
+    /// duration of a closure.
+    pub(crate) fn unset<F, R>(&'static self, f: F) -> R
+    where
+        F: FnOnce() -> R,
+    {
+        struct Reset {
+            key: &'static LocalKey<Cell<*const ()>>,
+            val: *const (),
+        }
+
+        impl Drop for Reset {
+            fn drop(&mut self) {
+                self.key.with(|c| c.set(self.val));
+            }
+        }
+
+        let prev = self.inner.with(|c| {
+            let prev = c.get();
+            c.set(ptr::null());
+            prev
+        });
+
+        let _reset = Reset {
+            key: self.inner,
+            val: prev,
+        };
+
+        f()
+    }
+
+    /// Evaluates a closure taking as argument a reference to the value if set
+    /// and returns the closures output, or `None` if the value is not set.
+    pub(crate) fn map<F, R>(&'static self, f: F) -> Option<R>
+    where
+        F: FnOnce(&T) -> R,
+    {
+        let val = self.inner.with(|c| c.get());
+
+        if val.is_null() {
+            None
+        } else {
+            Some(f(unsafe { &*(val as *const T) }))
+        }
+    }
+}
+
+#[cfg(all(test, not(asynchronix_loom)))]
+mod tests {
+    use std::cell::Cell;
+    use std::sync::mpsc::{channel, Sender};
+    use std::thread;
+
+    scoped_thread_local!(static FOO: u32);
+
+    #[test]
+    fn scoped_local_key_smoke() {
+        scoped_thread_local!(static BAR: u32);
+
+        BAR.set(&1, || {
+            BAR.map(|_slot| {}).unwrap();
+        });
+    }
+
+    #[test]
+    fn scoped_local_key_set() {
+        scoped_thread_local!(static BAR: Cell<u32>);
+
+        BAR.set(&Cell::new(1), || {
+            BAR.map(|slot| {
+                assert_eq!(slot.get(), 1);
+            })
+            .unwrap();
+        });
+    }
+
+    #[test]
+    fn scoped_local_key_unset() {
+        scoped_thread_local!(static BAR: Cell<u32>);
+
+        BAR.set(&Cell::new(1), || {
+            BAR.unset(|| assert!(BAR.map(|_| {}).is_none()));
+            BAR.map(|slot| {
+                assert_eq!(slot.get(), 1);
+            })
+            .unwrap();
+        });
+    }
+
+    #[test]
+    fn scoped_local_key_panic_resets() {
+        struct Check(Sender<u32>);
+        impl Drop for Check {
+            fn drop(&mut self) {
+                FOO.map(|r| {
+                    self.0.send(*r).unwrap();
+                })
+                .unwrap()
+            }
+        }
+
+        let (tx, rx) = channel();
+        let t = thread::spawn(|| {
+            FOO.set(&1, || {
+                let _r = Check(tx);
+
+                FOO.set(&2, || panic!());
+            });
+        });
+
+        assert_eq!(rx.recv().unwrap(), 1);
+        assert!(t.join().is_err());
+    }
+}
--- a/asynchronix/src/runtime.rs
+++ b/asynchronix/src/runtime.rs
@ -0,0 +1,3 @@
+//! Executor and tasks.
+
+pub(crate) mod executor;
--- a/asynchronix/src/runtime/executor.rs
+++ b/asynchronix/src/runtime/executor.rs
@ -0,0 +1,466 @@
+use std::future::Future;
+use std::panic::{self, AssertUnwindSafe};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
+use std::thread::{self, JoinHandle};
+use std::time::{Duration, Instant};
+
+use parking::Parker;
+use slab::Slab;
+
+mod find_bit;
+mod injector;
+mod pool;
+mod queue;
+mod rng;
+mod task;
+mod worker;
+
+#[cfg(all(test, not(asynchronix_loom)))]
+mod tests;
+
+use self::pool::{Pool, PoolState};
+use self::rng::Rng;
+use self::task::{CancelToken, Promise, Runnable};
+use self::worker::Worker;
+use crate::macros::scoped_local_key::scoped_thread_local;
+
+type Bucket = injector::Bucket<Runnable, 128>;
+type GlobalQueue = injector::Injector<Runnable, 128>;
+type LocalQueue = queue::Worker<Runnable, queue::B256>;
+type Stealer = queue::Stealer<Runnable, queue::B256>;
+
+scoped_thread_local!(static LOCAL_WORKER: Worker);
+scoped_thread_local!(static ACTIVE_TASKS: Mutex<Slab<CancelToken>>);
+static NEXT_EXECUTOR_ID: AtomicUsize = AtomicUsize::new(0);
+
+/// A multi-threaded `async` executor.
+///
+/// The executor is exclusively designed for message-passing computational
+/// tasks. As such, it does not include an I/O reactor and does not consider
+/// fairness as a goal in itself. While it does use fair local queues inasmuch
+/// as these tend to perform better in message-passing applications, it uses an
+/// unfair injection queue and a LIFO slot without attempt to mitigate the
+/// effect of badly behaving code (e.g. futures that use spin-locks and hope for
+/// the best by yielding to the executor with something like tokio's
+/// `yield_now`).
+///
+/// Another way in which it differs from other `async` executors is that it
+/// treats deadlocking as a normal occurrence. This is because in a
+/// discrete-time simulator, the simulation of a system at a given time step
+/// will make as much progress as possible until it technically reaches a
+/// deadlock. Only then does the simulator advance the simulated time until the
+/// next "event" extracted from a time-sorted priority queue, sending it to
+/// enable further progress in the computation.
+///
+/// The design of the executor is largely influenced by the tokio and go
+/// schedulers, both of which are optimized for message-passing applications. In
+/// particular, it uses fast, fixed-size thread-local work-stealing queues with
+/// a "fast" non-stealable slot in combination with a global injector queue. The
+/// injector queue is used both to schedule new tasks and to absorb temporary
+/// overflow in the local queues. The design of the injector queue is kept very
+/// simple by taking advantage of the fact that the injector is not required to
+/// be either LIFO or FIFO.
+///
+/// Probably the largest difference with tokio is the task system, which boasts
+/// a higher throughput achieved by reducing the need for synchronization.
+/// Another difference is that, at the moment, the complete subset of active
+/// worker threads is stored in a single atomic variable. This makes it in
+/// particular possible to rapidly identify free worker threads for stealing
+/// operations. The downside of this approach is that the maximum number of
+/// worker threads is limited to `usize::BITS`, but this is unlikely to
+/// constitute a limitation since system simulation is not typically an
+/// embarrassingly parallel problem.
+#[derive(Debug)]
+pub(crate) struct Executor {
+    pool: Arc<Pool>,
+    active_tasks: Arc<Mutex<Slab<CancelToken>>>,
+    parker: parking::Parker,
+    join_handles: Vec<JoinHandle<()>>,
+}
+
+impl Executor {
+    /// Creates an executor that runs futures on a thread pool.
+    ///
+    /// The maximum number of threads is set with the `num_threads` parameter.
+    pub(crate) fn new(num_threads: usize) -> Self {
+        let (parker, unparker) = parking::pair();
+
+        let (local_data, shared_data): (Vec<_>, Vec<_>) = (0..num_threads)
+            .map(|_| {
+                let (parker, unparker) = parking::pair();
+                let local_queue = LocalQueue::new();
+                let stealer = local_queue.stealer();
+
+                ((local_queue, parker), (stealer, unparker))
+            })
+            .unzip();
+
+        // Each executor instance has a unique ID inherited by tasks to ensure
+        // that tasks are scheduled on their parent executor.
+        let executor_id = NEXT_EXECUTOR_ID.fetch_add(1, Ordering::Relaxed);
+        assert!(
+            executor_id <= usize::MAX / 2,
+            "{} executors have been instantiated: this is most probably a bug.",
+            usize::MAX / 2
+        );
+
+        let pool = Arc::new(Pool::new(executor_id, unparker, shared_data.into_iter()));
+        let active_tasks = Arc::new(Mutex::new(Slab::new()));
+
+        // All workers must be marked as active _before_ spawning the threads to
+        // make sure that the count of active workers does not fall to zero
+        // before all workers are blocked on the signal barrier.
+        pool.set_all_workers_active();
+
+        // Spawn all worker threads.
+        let join_handles: Vec<_> = local_data
+            .into_iter()
+            .enumerate()
+            .into_iter()
+            .map(|(id, (local_queue, worker_parker))| {
+                let thread_builder = thread::Builder::new().name(format!("Worker #{}", id));
+
+                thread_builder
+                    .spawn({
+                        let pool = pool.clone();
+                        let active_tasks = active_tasks.clone();
+                        move || {
+                            let worker = Worker::new(local_queue, pool);
+                            ACTIVE_TASKS.set(&active_tasks, || {
+                                LOCAL_WORKER
+                                    .set(&worker, || run_local_worker(&worker, id, worker_parker))
+                            });
+                        }
+                    })
+                    .unwrap()
+            })
+            .collect();
+
+        // Wait until all workers are blocked on the signal barrier.
+        parker.park();
+        assert!(pool.is_idle());
+
+        Self {
+            pool,
+            active_tasks,
+            parker,
+            join_handles,
+        }
+    }
+
+    /// Spawns a task and returns a promise that can be polled to retrieve the
+    /// task's output.
+    pub(crate) fn spawn<T>(&self, future: T) -> Promise<T::Output>
+    where
+        T: Future + Send + 'static,
+        T::Output: Send + 'static,
+    {
+        // Book a slot to store the task cancellation token.
+        let mut active_tasks = self.active_tasks.lock().unwrap();
+        let task_entry = active_tasks.vacant_entry();
+
+        // Wrap the future so that it removes its cancel token from the
+        // executor's list when dropped.
+        let future = CancellableFuture::new(future, task_entry.key());
+
+        let (promise, runnable, cancel_token) =
+            task::spawn(future, schedule_task, self.pool.executor_id);
+
+        task_entry.insert(cancel_token);
+        self.pool.global_queue.insert_task(runnable);
+
+        self.pool.activate_worker();
+
+        promise
+    }
+
+    /// Spawns a task which output will never be retrieved.
+    ///
+    /// This is mostly useful to avoid undue reference counting for futures that
+    /// return a `()` type.
+    pub(crate) fn spawn_and_forget<T>(&self, future: T)
+    where
+        T: Future + Send + 'static,
+        T::Output: Send + 'static,
+    {
+        // Book a slot to store the task cancellation token.
+        let mut active_tasks = self.active_tasks.lock().unwrap();
+        let task_entry = active_tasks.vacant_entry();
+
+        // Wrap the future so that it removes its cancel token from the
+        // executor's list when dropped.
+        let future = CancellableFuture::new(future, task_entry.key());
+
+        let (runnable, cancel_token) =
+            task::spawn_and_forget(future, schedule_task, self.pool.executor_id);
+
+        task_entry.insert(cancel_token);
+        self.pool.global_queue.insert_task(runnable);
+
+        self.pool.activate_worker();
+    }
+
+    /// Let the executor run, blocking until all futures have completed or until
+    /// the executor deadlocks.
+    pub(crate) fn run(&mut self) {
+        loop {
+            if let Some(worker_panic) = self.pool.take_panic() {
+                panic::resume_unwind(worker_panic);
+            }
+            if self.pool.is_idle() {
+                return;
+            }
+
+            self.parker.park();
+        }
+    }
+}
+
+impl Drop for Executor {
+    fn drop(&mut self) {
+        // Force all threads to return.
+        self.pool.trigger_termination();
+        for join_handle in self.join_handles.drain(0..) {
+            join_handle.join().unwrap();
+        }
+
+        // Drop all tasks that have not completed.
+        //
+        // A local worker must be set because some tasks may schedule other
+        // tasks when dropped, which requires that a local worker be available.
+        let worker = Worker::new(LocalQueue::new(), self.pool.clone());
+        LOCAL_WORKER.set(&worker, || {
+            // Cancel all pending futures.
+            //
+            // `ACTIVE_TASKS` is explicitly unset to prevent
+            // `CancellableFuture::drop()` from trying to remove its own token
+            // from the list of active tasks as this would result in a reentrant
+            // lock. This is mainly to stay on the safe side: `ACTIVE_TASKS`
+            // should not be set on this thread anyway, unless for some reason
+            // the executor runs inside another executor.
+            ACTIVE_TASKS.unset(|| {
+                let mut tasks = self.active_tasks.lock().unwrap();
+                for task in tasks.drain() {
+                    task.cancel();
+                }
+
+                // Some of the dropped tasks may have scheduled other tasks that
+                // were not yet cancelled, preventing them from being dropped
+                // upon cancellation. This is OK: the scheduled tasks will be
+                // dropped when the local and global queues are dropped, and
+                // they cannot re-schedule one another since all tasks were
+                // cancelled.
+            });
+        });
+    }
+}
+
+// A `Future` wrapper that removes its cancellation token from the executor's
+// list of active tasks when dropped.
+struct CancellableFuture<T: Future> {
+    inner: T,
+    cancellation_key: usize,
+}
+impl<T: Future> CancellableFuture<T> {
+    fn new(fut: T, cancellation_key: usize) -> Self {
+        Self {
+            inner: fut,
+            cancellation_key,
+        }
+    }
+}
+impl<T: Future> Future for CancellableFuture<T> {
+    type Output = T::Output;
+
+    #[inline(always)]
+    fn poll(
+        self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Self::Output> {
+        unsafe { self.map_unchecked_mut(|s| &mut s.inner).poll(cx) }
+    }
+}
+impl<T: Future> Drop for CancellableFuture<T> {
+    fn drop(&mut self) {
+        // Remove the task from the list of active tasks if the future is
+        // dropped on a worker thread. Otherwise do nothing and let the
+        // executor's drop handler do the cleanup.
+        let _ = ACTIVE_TASKS.map(|active_tasks| {
+            // Don't unwrap on `lock()` because this function can be called from
+            // a destructor and should not panic. In the worse case, the cancel
+            // token will be left in the list of active tasks, which does
+            // prevents eager task deallocation but does not cause any issue
+            // otherwise.
+            if let Ok(mut active_tasks) = active_tasks.lock() {
+                let _cancel_token = active_tasks.try_remove(self.cancellation_key);
+            }
+        });
+    }
+}
+
+// Schedules a `Runnable`.
+fn schedule_task(task: Runnable, executor_id: usize) {
+    LOCAL_WORKER
+        .map(|worker| {
+            // Check that this task was indeed spawned on this executor.
+            assert_eq!(
+                executor_id, worker.pool.executor_id,
+                "Tasks must be awaken on the same executor they are spawned on"
+            );
+
+            // Store the task in the fast slot and retrieve the one that was
+            // formerly stored, if any.
+            let prev_task = match worker.fast_slot.replace(Some(task)) {
+                // If there already was a task in the slot, proceed so it can be
+                // moved to a task queue.
+                Some(t) => t,
+                // Otherwise return immediately: this task cannot be stolen so
+                // there is no point in activating a sibling worker.
+                None => return,
+            };
+
+            // Push the previous task to the local queue if possible or on the
+            // global queue otherwise.
+            if let Err(prev_task) = worker.local_queue.push(prev_task) {
+                // The local queue is full. Try to move half of it to the global
+                // queue; if this fails, just push one task to the global queue.
+                if let Ok(drain) = worker.local_queue.drain(|_| Bucket::capacity()) {
+                    worker
+                        .pool
+                        .global_queue
+                        .push_bucket(Bucket::from_iter(drain));
+                    worker.local_queue.push(prev_task).unwrap();
+                } else {
+                    worker.pool.global_queue.insert_task(prev_task);
+                }
+            }
+
+            // A task has been pushed to the local or global queue: try to
+            // activate another worker if no worker is currently searching for a
+            // task.
+            if worker.pool.searching_worker_count() == 0 {
+                worker.pool.activate_worker_relaxed();
+            }
+        })
+        .expect("Tasks may not be awaken outside executor threads");
+}
+
+/// Processes all incoming tasks on a worker thread until the `Terminate` signal
+/// is received or until it panics.
+fn run_local_worker(worker: &Worker, id: usize, parker: Parker) {
+    let result = panic::catch_unwind(AssertUnwindSafe(|| {
+        // Set how long to spin when searching for a task.
+        const MAX_SEARCH_DURATION: Duration = Duration::from_nanos(1000);
+
+        // Seed a thread RNG with the worker ID.
+        let rng = Rng::new(id as u64);
+
+        loop {
+            // Signal barrier: park until notified to continue or terminate.
+            if worker.pool.set_worker_inactive(id) == PoolState::Idle {
+                // If this worker was the last active worker, it is necessary to
+                // check again whether the global queue is not populated. This
+                // could happen if the executor thread pushed a task to the
+                // global queue but could not activate a new worker because all
+                // workers were then activated.
+                if !worker.pool.global_queue.is_empty() {
+                    worker.pool.set_worker_active(id);
+                } else {
+                    worker.pool.executor_unparker.unpark();
+                    parker.park();
+                }
+            } else {
+                parker.park();
+            }
+            if worker.pool.termination_is_triggered() {
+                return;
+            }
+
+            // We may spin for a little while: start counting.
+            let mut search_start = Instant::now();
+
+            // Process the tasks one by one.
+            loop {
+                // Check the global queue first.
+                if let Some(bucket) = worker.pool.global_queue.pop_bucket() {
+                    let bucket_iter = bucket.into_iter();
+
+                    // There is a _very_ remote possibility that, even though
+                    // the local queue is empty, it has temporarily too little
+                    // spare capacity for the bucket. This could happen because
+                    // a concurrent steal operation could be preempted for all
+                    // the time it took to pop and process the remaining tasks
+                    // and hasn't released the stolen capacity yet.
+                    //
+                    // Unfortunately, we cannot just skip checking the global
+                    // queue altogether when there isn't enough spare capacity
+                    // in the local queue, as this could lead to a race: suppose
+                    // that (1) this thread has earlier pushed tasks onto the
+                    // global queue, and (2) the stealer has processed all
+                    // stolen tasks before this thread sees the capacity
+                    // restored and at the same time (3) the stealer does not
+                    // yet see the tasks this thread pushed to the global queue;
+                    // in such scenario, both this thread and the stealer thread
+                    // may park and leave unprocessed tasks in the global queue.
+                    //
+                    // This is the only instance where spinning is used, as the
+                    // probability of this happening is close to zero and the
+                    // complexity of a signaling mechanism (condvar & friends)
+                    // wouldn't carry its weight.
+                    while worker.local_queue.spare_capacity() < bucket_iter.len() {}
+
+                    // Since empty buckets are never pushed onto the global
+                    // queue, we should now have at least one task to process.
+                    worker.local_queue.extend(bucket_iter);
+                } else {
+                    // The global queue is empty. Try to steal from active
+                    // siblings.
+                    let mut stealers = worker.pool.shuffled_stealers(Some(id), &rng);
+                    if stealers.all(|stealer| {
+                        stealer
+                            .steal_and_pop(&worker.local_queue, |n| n - n / 2)
+                            .map(|task| {
+                                let prev_task = worker.fast_slot.replace(Some(task));
+                                assert!(prev_task.is_none());
+                            })
+                            .is_err()
+                    }) {
+                        // Give up if unsuccessful for too long.
+                        if (Instant::now() - search_start) > MAX_SEARCH_DURATION {
+                            worker.pool.end_worker_search();
+                            break;
+                        }
+
+                        // Re-try.
+                        continue;
+                    }
+                }
+
+                // Signal the end of the search so that another worker can be
+                // activated when a new task is scheduled.
+                worker.pool.end_worker_search();
+
+                // Pop tasks from the fast slot or the local queue.
+                while let Some(task) = worker.fast_slot.take().or_else(|| worker.local_queue.pop())
+                {
+                    if worker.pool.termination_is_triggered() {
+                        return;
+                    }
+                    task.run();
+                }
+
+                // Resume the search for tasks.
+                worker.pool.begin_worker_search();
+                search_start = Instant::now();
+            }
+        }
+    }));
+
+    // Propagate the panic, if any.
+    if let Err(panic) = result {
+        worker.pool.register_panic(panic);
+        worker.pool.trigger_termination();
+        worker.pool.executor_unparker.unpark();
+    }
+}
--- a/asynchronix/src/runtime/executor/find_bit.rs
+++ b/asynchronix/src/runtime/executor/find_bit.rs
@ -0,0 +1,190 @@
+/// Find the position of the `Nᵗʰ` set bit starting the search from the least
+/// significant bit.
+///
+/// A rank `N=1` specifies the first set bit starting from the LSB, a rank `N=2`
+/// specifies the second set bit starting from the LSB, etc.
+///
+/// The rank is to be provided as a closure that takes as argument the total
+/// number of set bits in the value (same as `value.count_ones()`). The rank
+/// returned by the closure should therefore never be greater than the closure's
+/// argument.
+///
+/// The returned position is 0-based. If the bit to be found is the LSB, or if
+/// the provided rank is 0, the returned position is 0. If in turn the bit to be
+/// found is the MSB, or if the specified rank is strictly greater than the
+/// total number of bits set, the returned position is `usize::BITS - 1`.
+///
+/// It is recommended to check for zero values before calling this function
+/// since the returned position is then meaningless regardless of the rank.
+///
+/// Implementation notes: the implementation is based on a tree-of-adders
+/// algorithm followed by binary search, with overall theoretical complexity
+/// `O(log(usize::BITS))`. In release mode the function is optimized to fully
+/// branchless code with a pretty moderate cost of about 70 CPU cycles on x86-64
+/// and less than 60 instruction on aarch64, independently of the inputs. The
+/// use of the `popcnt` intrinsic was also investigated to compute sub-sums in
+/// the binary search but was found to be slower than the tree-of-adders.
+#[allow(clippy::assertions_on_constants)]
+pub(crate) fn find_bit<F: FnOnce(usize) -> usize>(value: usize, rank_fn: F) -> usize {
+    const P: usize = usize::BITS.trailing_zeros() as usize; // P = log2(usize::BITS)
+    const M: [usize; P] = sum_masks();
+
+    const _: () = assert!(usize::BITS.is_power_of_two());
+    const _: () = assert!(P >= 2);
+
+    // Partial sub-sums in groups of adjacent 2^p bits.
+    let mut sum = [0; P + 1];
+
+    // The zero-order sub-sums (2^p == 1) simply reflect the original value.
+    sum[0] = value;
+
+    // Sub-sums for groups of 2 adjacent bits. The RHS is equivalent to
+    // `(sum[0] & M[0]) + ((sum[0] >> 1) & M[0]);`.
+    sum[1] = value - ((value >> 1) & M[0]);
+
+    // Sub-sums for groups of 4 adjacent bits.
+    sum[2] = (sum[1] & M[1]) + ((sum[1] >> 2) & M[1]);
+
+    // Sub-sums for groups of 8, 16 etc. adjacent bits.
+    //
+    // The below loop seems to be reliably unrolled in release mode, which in
+    // turn enables constant propagation and folding. To stay on the safe side,
+    // however, the sum masks `M[p]` are const-evaluated as they use integer
+    // division and would be otherwise very expensive should loop unrolling fail
+    // to kick in.
+    for p in 2..P {
+        // From p>=2, the mask can be applied to pairwise sums rather than to
+        // each operand separately as there is no risk that sub-sums will
+        // overflow on neighboring groups. The RHS is thus equivalent to
+        // `(sum[p] & M[p]) + ((sum[0] >> (1 << p)) & M[p]);`
+        sum[p + 1] = (sum[p] + (sum[p] >> (1 << p))) & M[p];
+    }
+
+    let mut rank = rank_fn(sum[P]);
+
+    // Find the bit using binary search.
+    //
+    // The below loop seems to be reliably unrolled in release mode so the whole
+    // function is effectively optimized to fully branchless code.
+    let mut shift = 0usize;
+    for p in (0..P).rev() {
+        // Low bits mask of width 2^p.
+        let sub_mask = (1 << (1 << p)) - 1;
+
+        // Bit sum of the lower half of the current subset.
+        let lower_sum = (sum[p] >> shift) & sub_mask;
+
+        // Update the rank and the shift if the bit lies in the upper half. The
+        // below is a branchless version of:
+        // ```
+        // if rank > lower_sum {
+        //     rank -= lower_sum;
+        //     shift += 1 << p;
+        // }
+        //```
+        let cmp_mask = ((lower_sum as isize - rank as isize) >> (isize::BITS - 1)) as usize;
+        rank -= lower_sum & cmp_mask;
+        shift += (1 << p) & cmp_mask;
+    }
+
+    shift
+}
+
+/// Generates masks for the tree-of-adder bit summing algorithm.
+///
+/// The masks are generated according to the pattern:
+///
+/// ```text
+/// m[0]   = 0b010101010101...010101010101;
+/// m[1]   = 0b001100110011...001100110011;
+/// m[2]   = 0b000011110000...111100001111;
+/// ...
+/// m[P-1] = 0b000000000000...111111111111;
+/// ```
+#[allow(clippy::assertions_on_constants)]
+const fn sum_masks() -> [usize; usize::BITS.trailing_zeros() as usize] {
+    const P: usize = usize::BITS.trailing_zeros() as usize; // P = log2(usize::BITS)
+    const _: () = assert!(
+        usize::BITS == 1 << P,
+        "sum masks are only supported for `usize` with a power-of-two bit width"
+    );
+
+    let mut m = [0usize; P];
+    let mut p = 0;
+    while p != P {
+        m[p] = !0 / (1 + (1 << (1 << p)));
+        p += 1;
+    }
+
+    m
+}
+
+#[cfg(all(test, not(asynchronix_loom), not(miri)))]
+mod tests {
+    use super::super::rng;
+    use super::*;
+
+    // Fuzzing test.
+    #[test]
+    fn find_bit_fuzz() {
+        const SAMPLES: usize = 100_000;
+
+        #[inline(always)]
+        fn check(value: usize) {
+            let bitsum = value.count_ones() as usize;
+
+            for rank in 1..=bitsum {
+                let pos = find_bit(value, |s| {
+                    assert_eq!(s, bitsum);
+
+                    rank
+                });
+
+                // Check that the bit is indeed set.
+                assert!(
+                    value & (1 << pos) != 0,
+                    "input value: {:064b}\nrequested rank: {}\nreturned position: {}",
+                    value,
+                    rank,
+                    pos
+                );
+
+                // Check that the bit is indeed of the requested rank.
+                assert_eq!(
+                    rank,
+                    (value & ((1 << pos) - 1)).count_ones() as usize + 1,
+                    "input value: {:064b}\nrequested rank: {}\nreturned position: {}",
+                    value,
+                    rank,
+                    pos
+                );
+            }
+        }
+
+        // Check behavior with a null input value.
+        let pos = find_bit(0, |s| {
+            assert_eq!(s, 0);
+            0
+        });
+        assert_eq!(pos, 0);
+
+        // Check behavior with other special values.
+        check(1);
+        check(1 << (usize::BITS - 1));
+        check(usize::MAX);
+
+        // Check behavior with random values.
+        let rng = rng::Rng::new(12345);
+        for _ in 0..SAMPLES {
+            // Generate a random usize from one or more random u64 ...for the
+            // day we get 128+ bit platforms :-)
+            let mut r = rng.gen() as usize;
+            let mut shift = 64;
+            while shift < usize::BITS {
+                r |= (rng.gen() as usize) << shift;
+                shift += 64;
+            }
+            check(r);
+        }
+    }
+}
--- a/asynchronix/src/runtime/executor/injector.rs
+++ b/asynchronix/src/runtime/executor/injector.rs
@ -0,0 +1,189 @@
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Mutex;
+use std::{mem, vec};
+
+/// An unfair injector queue which stores batches of tasks in bounded-size
+/// buckets.
+///
+/// This is a simple but effective unfair injector design which, despite being
+/// based on a mutex-protected `Vec`, ensures low contention and low latency in
+/// most realistic cases.
+///
+/// This is achieved by enabling the worker to push and pop batches of tasks
+/// readily stored in buckets. Since only the handles to the buckets are moved
+/// to and from the injector, pushing and popping a bucket is very fast and the
+/// lock is therefore only held for a very short time.
+///
+/// Also, since tasks in a bucket are memory-contiguous, they can be efficiently
+/// copied to and from worker queues. The use of buckets also keeps the size of
+/// the injector queue small (its size is the number of buckets) so
+/// re-allocation is rare and fast.
+///
+/// As an additional optimization, an `is_empty` atomic flag allows workers
+/// seeking for tasks to skip taking the lock if the queue is likely to be
+/// empty.
+///
+/// The queue is not strictly LIFO. While buckets are indeed pushed and popped
+/// in LIFO order, individual tasks are stored in a bucket at the front of the
+/// queue and this bucket is only moved to the back of the queue when full.
+#[derive(Debug)]
+pub(crate) struct Injector<T, const BUCKET_CAPACITY: usize> {
+    inner: Mutex<Vec<Bucket<T, BUCKET_CAPACITY>>>,
+    is_empty: AtomicBool,
+}
+
+impl<T, const BUCKET_CAPACITY: usize> Injector<T, BUCKET_CAPACITY> {
+    /// Creates an empty injector queue.
+    ///
+    /// # Panic
+    ///
+    /// Panics if the capacity is 0.
+    pub(crate) const fn new() -> Self {
+        assert!(BUCKET_CAPACITY >= 1);
+
+        Self {
+            inner: Mutex::new(Vec::new()),
+            is_empty: AtomicBool::new(true),
+        }
+    }
+
+    /// Inserts a task.
+    ///
+    /// The task is inserted in a bucket at the front of the queue. Once this
+    /// bucket is full, it is moved to the back of the queue.
+    pub(crate) fn insert_task(&self, task: T) {
+        let mut inner = self.inner.lock().unwrap();
+
+        // Try to push the task onto the first bucket if it has enough capacity left.
+        if let Some(bucket) = inner.first_mut() {
+            if let Err(task) = bucket.push(task) {
+                // The bucket is full: move it to the back of the vector and
+                // replace it with a newly created bucket that contains the
+                // task.
+                let mut new_bucket = Bucket::new();
+                let _ = new_bucket.push(task); // this cannot fail provided the capacity is >=1
+
+                let full_bucket = mem::replace(bucket, new_bucket);
+                inner.push(full_bucket);
+            }
+
+            return;
+        }
+
+        // The queue is empty: create a new bucket.
+        let mut new_bucket = Bucket::new();
+        let _ = new_bucket.push(task); // this cannot fail provided the capacity is >=1
+
+        inner.push(new_bucket);
+
+        // Ordering: this flag is only used as a hint so Relaxed ordering is
+        // enough.
+        self.is_empty.store(false, Ordering::Relaxed);
+    }
+
+    /// Appends a bucket to the back of the queue.
+    pub(crate) fn push_bucket(&self, bucket: Bucket<T, BUCKET_CAPACITY>) {
+        let mut inner = self.inner.lock().unwrap();
+
+        let was_empty = inner.is_empty();
+        inner.push(bucket);
+
+        // If the queue was empty before, update the flag.
+        if was_empty {
+            // Ordering: this flag is only used as a hint so Relaxed ordering is
+            // enough.
+            self.is_empty.store(false, Ordering::Relaxed);
+        }
+    }
+
+    /// Takes the bucket at the back of the queue, if any.
+    ///
+    /// Note that this can spuriously return `None` even though the queue is
+    /// populated, unless a happens-before relationship exists between the
+    /// thread that populated the queue and the thread calling this method (this
+    /// is obviously the case if they are the same thread).
+    ///
+    /// This is not an issue in practice because it cannot lead to executor
+    /// deadlock. Indeed, if the last task/bucket was inserted by a worker
+    /// thread, this worker thread will always see that the injector queue is
+    /// populated (unless the bucket was already popped) so it will never exit
+    /// before all tasks in the injector are processed. Likewise, if the last
+    /// task/bucket was inserted by the main executor thread before
+    /// `Executor::run()` is called, the synchronization established when the
+    /// executor unparks worker threads ensures that the task is visible to all
+    /// unparked workers.
+    pub(crate) fn pop_bucket(&self) -> Option<Bucket<T, BUCKET_CAPACITY>> {
+        // Ordering: this flag is only used as a hint so Relaxed ordering is
+        // enough.
+        if self.is_empty.load(Ordering::Relaxed) {
+            return None;
+        }
+
+        let mut inner = self.inner.lock().unwrap();
+
+        let bucket = inner.pop();
+
+        if inner.is_empty() {
+            // Ordering: this flag is only used as a hint so Relaxed ordering is
+            // enough.
+            self.is_empty.store(true, Ordering::Relaxed);
+        }
+
+        bucket
+    }
+
+    /// Checks whether the queue is empty.
+    ///
+    /// Note that this can spuriously return `true` even though the queue is
+    /// populated, unless a happens-before relationship exists between the
+    /// thread that populated the queue and the thread calling this method (this
+    /// is obviously the case if they are the same thread).
+    pub(crate) fn is_empty(&self) -> bool {
+        self.is_empty.load(Ordering::Relaxed)
+    }
+}
+
+/// A collection of tasks with a bounded size.
+///
+/// This is just a very thin wrapper around a `Vec` that ensures that the
+/// nominal capacity bound is never exceeded.
+#[derive(Debug)]
+pub(crate) struct Bucket<T, const CAPACITY: usize>(Vec<T>);
+
+impl<T, const CAPACITY: usize> Bucket<T, CAPACITY> {
+    /// Creates a new bucket, allocating the full capacity upfront.
+    pub(crate) fn new() -> Self {
+        Self(Vec::with_capacity(CAPACITY))
+    }
+
+    /// Returns the bucket's nominal capacity.
+    pub(crate) const fn capacity() -> usize {
+        CAPACITY
+    }
+
+    /// Appends one task if capacity allows; otherwise returns the task in the
+    /// error.
+    pub(crate) fn push(&mut self, task: T) -> Result<(), T> {
+        if self.0.len() < CAPACITY {
+            self.0.push(task);
+            Ok(())
+        } else {
+            Err(task)
+        }
+    }
+}
+
+impl<T, const CAPACITY: usize> IntoIterator for Bucket<T, CAPACITY> {
+    type Item = T;
+    type IntoIter = vec::IntoIter<T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
+impl<T, const CAPACITY: usize> FromIterator<T> for Bucket<T, CAPACITY> {
+    fn from_iter<U: IntoIterator<Item = T>>(iter: U) -> Self {
+        Self(Vec::from_iter(iter.into_iter().take(CAPACITY)))
+    }
+}
--- a/asynchronix/src/runtime/executor/pool.rs
+++ b/asynchronix/src/runtime/executor/pool.rs
@ -0,0 +1,423 @@
+use std::any::Any;
+use std::sync::atomic::{self, AtomicBool, AtomicUsize, Ordering};
+use std::sync::Mutex;
+
+use super::find_bit;
+use super::injector::Injector;
+use super::rng;
+use super::{GlobalQueue, Stealer};
+
+#[derive(Debug)]
+pub(crate) struct Pool {
+    pub(crate) global_queue: GlobalQueue,
+    pub(crate) executor_id: usize,
+    pub(crate) executor_unparker: parking::Unparker,
+    state: PoolRegistry,
+    stealers: Box<[Stealer]>,
+    worker_unparkers: Box<[parking::Unparker]>,
+    searching_workers: AtomicUsize,
+    terminate_signal: AtomicBool,
+    worker_panic: Mutex<Option<Box<dyn Any + Send + 'static>>>,
+}
+
+impl Pool {
+    /// Creates a new pool.
+    pub(crate) fn new(
+        executor_id: usize,
+        executor_unparker: parking::Unparker,
+        shared_data: impl Iterator<Item = (Stealer, parking::Unparker)>,
+    ) -> Self {
+        let (stealers, worker_unparkers): (Vec<_>, Vec<_>) = shared_data.into_iter().unzip();
+        let worker_unparkers = worker_unparkers.into_boxed_slice();
+
+        Self {
+            global_queue: Injector::new(),
+            executor_id,
+            executor_unparker,
+            state: PoolRegistry::new(worker_unparkers.len()),
+            stealers: stealers.into_boxed_slice(),
+            worker_unparkers,
+            searching_workers: AtomicUsize::new(0),
+            terminate_signal: AtomicBool::new(false),
+            worker_panic: Mutex::new(None),
+        }
+    }
+
+    /// Marks all pool workers as active.
+    ///
+    /// Unparking the worker threads is the responsibility of the caller.
+    pub(crate) fn set_all_workers_active(&self) {
+        self.state.set_all_active();
+    }
+
+    /// Marks the specified worker as active.
+    ///
+    /// Unparking the worker thread is the responsibility of the caller.
+    pub(crate) fn set_worker_active(&self, worker_id: usize) {
+        self.state.set_active(worker_id);
+    }
+
+    /// Marks the specified worker as idle.
+    ///
+    /// Parking the worker thread is the responsibility of the caller.
+    ///
+    /// If this was the last active worker, the main executor thread is
+    /// unparked.
+    pub(crate) fn set_worker_inactive(&self, worker_id: usize) -> PoolState {
+        self.state.set_inactive(worker_id)
+    }
+
+    /// Unparks an idle worker if any is found, or do nothing otherwise.
+    ///
+    /// For performance reasons, no synchronization is established if no worker
+    /// is found, meaning that workers in other threads may later transition to
+    /// idle state without observing the tasks scheduled by the caller to this
+    /// method. If this is not tolerable (for instance if this method is called
+    /// from a non-worker thread), use the more expensive `activate_worker`.
+    pub(crate) fn activate_worker_relaxed(&self) {
+        if let Some(worker_id) = self.state.set_one_active_relaxed() {
+            self.searching_workers.fetch_add(1, Ordering::Relaxed);
+            self.worker_unparkers[worker_id].unpark();
+        }
+    }
+
+    /// Unparks an idle worker if any is found, or ensure that at least the last
+    /// worker to transition to idle state will observe all tasks previously
+    /// scheduled by the caller to this method.
+    pub(crate) fn activate_worker(&self) {
+        if let Some(worker_id) = self.state.set_one_active() {
+            self.searching_workers.fetch_add(1, Ordering::Relaxed);
+            self.worker_unparkers[worker_id].unpark();
+        }
+    }
+
+    /// Check if the pool is idle, i.e. if no worker is currently active.
+    ///
+    /// If `true` is returned, it is guaranteed that all operations performed by
+    /// the now-inactive workers become visible in this thread.
+    pub(crate) fn is_idle(&self) -> bool {
+        self.state.pool_state() == PoolState::Idle
+    }
+
+    /// Increments the count of workers actively searching for tasks.
+    pub(crate) fn begin_worker_search(&self) {
+        self.searching_workers.fetch_add(1, Ordering::Relaxed);
+    }
+
+    /// Decrements the count of workers actively searching for tasks.
+    pub(crate) fn end_worker_search(&self) {
+        self.searching_workers.fetch_sub(1, Ordering::Relaxed);
+    }
+
+    /// Returns the count of workers actively searching for tasks.
+    pub(crate) fn searching_worker_count(&self) -> usize {
+        self.searching_workers.load(Ordering::Relaxed)
+    }
+
+    /// Triggers the termination signal and unparks all worker threads so they
+    /// can cleanly terminate.
+    pub(crate) fn trigger_termination(&self) {
+        self.terminate_signal.store(true, Ordering::Relaxed);
+
+        self.state.set_all_active();
+        for unparker in &*self.worker_unparkers {
+            unparker.unpark();
+        }
+    }
+
+    /// Returns true if the termination signal was triggered.
+    pub(crate) fn termination_is_triggered(&self) -> bool {
+        self.terminate_signal.load(Ordering::Relaxed)
+    }
+
+    /// Registers a panic associated with the provided worker ID.
+    ///
+    /// If no panic is currently registered, the panic in argument is
+    /// registered. If a panic was already registered by a worker and was not
+    /// yet processed by the executor, then nothing is done.
+    pub(crate) fn register_panic(&self, panic: Box<dyn Any + Send + 'static>) {
+        let mut worker_panic = self.worker_panic.lock().unwrap();
+        if worker_panic.is_none() {
+            *worker_panic = Some(panic);
+        }
+    }
+
+    /// Takes a worker panic if any is registered.
+    pub(crate) fn take_panic(&self) -> Option<Box<dyn Any + Send + 'static>> {
+        let mut worker_panic = self.worker_panic.lock().unwrap();
+        worker_panic.take()
+    }
+
+    /// Returns an iterator yielding the stealers associated with all active
+    /// workers, starting from a randomly selected active worker. The worker
+    /// which ID is provided in argument (if any) is excluded from the pool of
+    /// candidates.
+    pub(crate) fn shuffled_stealers<'a>(
+        &'a self,
+        excluded_worker_id: Option<usize>,
+        rng: &'_ rng::Rng,
+    ) -> ShuffledStealers<'a> {
+        // All active workers except the specified one are candidate for stealing.
+        let mut candidates = self.state.get_active();
+        if let Some(excluded_worker_id) = excluded_worker_id {
+            candidates &= !(1 << excluded_worker_id);
+        }
+
+        ShuffledStealers::new(candidates, &self.stealers, rng)
+    }
+}
+
+pub(crate) struct ShuffledStealers<'a> {
+    stealers: &'a [Stealer],
+    // A bit-rotated bit field of the remaining candidate workers to steal from.
+    // If set, the LSB represents the next candidate.
+    candidates: usize,
+    next_candidate: usize, // index of the next candidate
+}
+impl<'a> ShuffledStealers<'a> {
+    fn new(candidates: usize, stealers: &'a [Stealer], rng: &'_ rng::Rng) -> Self {
+        let (candidates, next_candidate) = if candidates == 0 {
+            (0, 0)
+        } else {
+            let next_candidate = find_bit::find_bit(candidates, |count| {
+                rng.gen_bounded(count as u64) as usize + 1
+            });
+
+            // Right-rotate the candidates so that the bit corresponding to the
+            // randomly selected worker becomes the LSB.
+            let candidate_count = stealers.len();
+            let lower_mask = (1 << next_candidate) - 1;
+            let lower_bits = candidates & lower_mask;
+            let candidates =
+                (candidates >> next_candidate) | (lower_bits << (candidate_count - next_candidate));
+
+            (candidates, next_candidate)
+        };
+
+        Self {
+            stealers,
+            candidates,
+            next_candidate,
+        }
+    }
+}
+
+impl<'a> Iterator for ShuffledStealers<'a> {
+    type Item = &'a Stealer;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.candidates == 0 {
+            return None;
+        }
+
+        // Clear the bit corresponding to the current candidate worker.
+        self.candidates &= !1;
+
+        let current_candidate = self.next_candidate;
+
+        if self.candidates != 0 {
+            // Locate the next candidate worker and make it the LSB.
+            let shift = self.candidates.trailing_zeros();
+            self.candidates >>= shift;
+
+            // Update the next candidate.
+            self.next_candidate += shift as usize;
+            if self.next_candidate >= self.stealers.len() {
+                self.next_candidate -= self.stealers.len();
+            }
+        }
+
+        Some(&self.stealers[current_candidate])
+    }
+}
+
+/// Registry of active/idle worker threads.
+///
+/// The registry only supports up to `usize::BITS` threads.
+#[derive(Debug)]
+struct PoolRegistry {
+    active_workers: AtomicUsize,
+    pool_size: usize,
+    #[cfg(feature = "dev-logs")]
+    record: Record,
+}
+impl PoolRegistry {
+    /// Creates a new pool registry.
+    ///
+    /// #Panic
+    ///
+    /// This will panic if the specified pool size is zero or is more than
+    /// `usize::BITS`.
+    fn new(pool_size: usize) -> Self {
+        assert!(
+            pool_size >= 1,
+            "the executor pool size should be at least one"
+        );
+        assert!(
+            pool_size <= usize::BITS as usize,
+            "the executor pool size should be at most {}",
+            usize::BITS
+        );
+
+        Self {
+            active_workers: AtomicUsize::new(0),
+            pool_size,
+            #[cfg(feature = "dev-logs")]
+            record: Record::new(pool_size),
+        }
+    }
+    /// Returns the state of the pool.
+    ///
+    /// This operation has Acquire semantic, which guarantees that if the pool
+    /// state returned is `PoolState::Idle`, then all operations performed by
+    /// the now-inactive workers are visible.
+    fn pool_state(&self) -> PoolState {
+        // Ordering: this Acquire operation synchronizes with all Release
+        // RMWs in the `set_inactive` method via a release sequence.
+        let active_workers = self.active_workers.load(Ordering::Acquire);
+        if active_workers == 0 {
+            PoolState::Idle
+        } else {
+            PoolState::Busy
+        }
+    }
+
+    /// Marks the specified worker as inactive.
+    ///
+    /// The specified worker must currently be marked as active. Returns
+    /// `PoolState::Idle` if this was the last active thread.
+    ///
+    /// If this is the last active worker (i.e. `PoolState::Idle` is returned),
+    /// then it is guaranteed that all operations performed by the now-inactive
+    /// workers and by unsuccessful callers to `set_one_active` are now visible.
+    fn set_inactive(&self, worker_id: usize) -> PoolState {
+        // Ordering: this Release operation synchronizes with the Acquire
+        // fence in the below conditional when the pool becomes idle, and/or
+        // with the Acquire state load in the `pool_state` method.
+        let active_workers = self
+            .active_workers
+            .fetch_and(!(1 << worker_id), Ordering::Release);
+
+        if active_workers & !(1 << worker_id) == 0 {
+            // Ordering: this Acquire fence synchronizes with all Release
+            // RMWs in this and in the previous calls to `set_inactive` via a
+            // release sequence.
+            atomic::fence(Ordering::Acquire);
+            PoolState::Idle
+        } else {
+            PoolState::Busy
+        }
+    }
+
+    /// Marks the specified worker as active.
+    fn set_active(&self, worker_id: usize) {
+        self.active_workers
+            .fetch_or(1 << worker_id, Ordering::Relaxed);
+    }
+
+    /// Marks all workers as active.
+    fn set_all_active(&self) {
+        // Mark all workers as busy.
+        self.active_workers.store(
+            !0 >> (usize::BITS - self.pool_size as u32),
+            Ordering::Relaxed,
+        );
+    }
+
+    /// Marks a worker as active if any is found, otherwise do nothing.
+    ///
+    /// The worker ID is returned if successful.
+    fn set_one_active_relaxed(&self) -> Option<usize> {
+        let mut active_workers = self.active_workers.load(Ordering::Relaxed);
+        loop {
+            let first_idle_worker = active_workers.trailing_ones() as usize;
+            if first_idle_worker >= self.pool_size {
+                return None;
+            };
+            active_workers = self
+                .active_workers
+                .fetch_or(1 << first_idle_worker, Ordering::Relaxed);
+            if active_workers & (1 << first_idle_worker) == 0 {
+                #[cfg(feature = "dev-logs")]
+                self.record.increment(first_idle_worker);
+                return Some(first_idle_worker);
+            }
+        }
+    }
+
+    /// Marks a worker as active if any is found, otherwise ensure that all
+    /// memory operations made by the caller prior to this call are visible by
+    /// the last worker transitioning to idle state.
+    ///
+    /// The worker ID is returned if successful.
+    fn set_one_active(&self) -> Option<usize> {
+        let mut active_workers = self.active_workers.load(Ordering::Relaxed);
+        loop {
+            let first_idle_worker = active_workers.trailing_ones() as usize;
+
+            if first_idle_worker >= self.pool_size {
+                // There is apparently no free worker, so a dummy RMW with
+                // Release ordering is performed with the sole purpose of
+                // synchronizing with the Acquire fence in `set_inactive` so
+                // that the last worker to transition to idle can see the tasks
+                // that were queued prior to this call.
+                let new_active_workers = self.active_workers.fetch_or(0, Ordering::Release);
+                if new_active_workers == active_workers {
+                    return None;
+                }
+                active_workers = new_active_workers;
+            } else {
+                active_workers = self
+                    .active_workers
+                    .fetch_or(1 << first_idle_worker, Ordering::Relaxed);
+                if active_workers & (1 << first_idle_worker) == 0 {
+                    #[cfg(feature = "dev-logs")]
+                    self.record.increment(first_idle_worker);
+                    return Some(first_idle_worker);
+                }
+            }
+        }
+    }
+
+    /// Returns a bit field that indicates all active workers.
+    fn get_active(&self) -> usize {
+        self.active_workers.load(Ordering::Relaxed)
+    }
+}
+
+#[derive(PartialEq)]
+pub(crate) enum PoolState {
+    Idle,
+    Busy,
+}
+
+#[cfg(feature = "dev-logs")]
+impl Drop for PoolRegistry {
+    fn drop(&mut self) {
+        println!("Thread launch count: {:?}", self.record.get());
+    }
+}
+
+#[cfg(feature = "dev-logs")]
+#[derive(Debug)]
+struct Record {
+    stats: Vec<AtomicUsize>,
+}
+
+#[cfg(feature = "dev-logs")]
+impl Record {
+    fn new(worker_count: usize) -> Self {
+        let mut stats = Vec::new();
+        stats.resize_with(worker_count, Default::default);
+        Self { stats }
+    }
+    fn increment(&self, worker_id: usize) {
+        self.stats[worker_id].fetch_add(1, Ordering::Relaxed);
+    }
+    fn get(&self) -> Vec<usize> {
+        self.stats
+            .iter()
+            .map(|s| s.load(Ordering::Relaxed))
+            .collect()
+    }
+}
--- a/asynchronix/src/runtime/executor/queue.rs
+++ b/asynchronix/src/runtime/executor/queue.rs
@ -0,0 +1,586 @@
+use std::fmt;
+use std::iter::FusedIterator;
+use std::marker::PhantomData;
+use std::mem::{drop, MaybeUninit};
+use std::panic::{RefUnwindSafe, UnwindSafe};
+use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release};
+use std::sync::Arc;
+
+use cache_padded::CachePadded;
+
+use crate::loom_exports::cell::UnsafeCell;
+use crate::loom_exports::sync::atomic::{AtomicU32, AtomicU64};
+use crate::loom_exports::{debug_or_loom_assert, debug_or_loom_assert_eq};
+
+pub(crate) use buffers::*;
+
+mod buffers;
+#[cfg(test)]
+mod tests;
+
+/// A double-ended FIFO work-stealing queue.
+///
+/// The general operation of the queue is based on tokio's worker queue, itself
+/// based on the Go scheduler's worker queue.
+///
+/// The queue tracks its tail and head position within a ring buffer with
+/// wrap-around integers, where the least significant bits specify the actual
+/// buffer index. All positions have bit widths that are intentionally larger
+/// than necessary for buffer indexing because:
+/// - an extra bit is needed to disambiguate between empty and full buffers when
+///   the start and end position of the buffer are equal,
+/// - the worker head is also used as long-cycle counter to mitigate the risk of
+///   ABA.
+///
+#[derive(Debug)]
+struct Queue<T, B: Buffer<T>> {
+    /// Positions of the head as seen by the worker (most significant bits) and
+    /// as seen by a stealer (least significant bits).
+    heads: CachePadded<AtomicU64>,
+
+    /// Position of the tail.
+    tail: CachePadded<AtomicU32>,
+
+    /// Queue items.
+    buffer: Box<B::Data>,
+
+    /// Make the type !Send and !Sync by default.
+    _phantom: PhantomData<UnsafeCell<T>>,
+}
+
+impl<T, B: Buffer<T>> Queue<T, B> {
+    /// Read an item at the given position.
+    ///
+    /// The position is automatically mapped to a valid buffer index using a
+    /// modulo operation.
+    ///
+    /// # Safety
+    ///
+    /// The item at the given position must have been initialized before and
+    /// cannot have been moved out.
+    ///
+    /// The caller must guarantee that the item at this position cannot be
+    /// written to or moved out concurrently.
+    #[inline]
+    unsafe fn read_at(&self, position: u32) -> T {
+        let index = (position & B::MASK) as usize;
+        (*self.buffer).as_ref()[index].with(|slot| slot.read().assume_init())
+    }
+
+    /// Write an item at the given position.
+    ///
+    /// The position is automatically mapped to a valid buffer index using a
+    /// modulo operation.
+    ///
+    /// # Note
+    ///
+    /// If an item is already initialized but was not moved out yet, it will be
+    /// leaked.
+    ///
+    /// # Safety
+    ///
+    /// The caller must guarantee that the item at this position cannot be read
+    /// or written to concurrently.
+    #[inline]
+    unsafe fn write_at(&self, position: u32, item: T) {
+        let index = (position & B::MASK) as usize;
+        (*self.buffer).as_ref()[index].with_mut(|slot| slot.write(MaybeUninit::new(item)));
+    }
+
+    /// Attempt to book `N` items for stealing where `N` is specified by a
+    /// closure which takes as argument the total count of available items.
+    ///
+    /// In case of success, the returned tuple contains the stealer head and an
+    /// item count at least equal to 1, in this order.
+    ///
+    /// # Errors
+    ///
+    /// An error is returned in the following cases:
+    /// 1) no item could be stolen, either because the queue is empty or because
+    ///    `N` is 0,
+    /// 2) a concurrent stealing operation is ongoing.
+    ///
+    /// # Safety
+    ///
+    /// This function is not strictly unsafe, but because it initiates the
+    /// stealing operation by modifying the post-stealing head in
+    /// `push_count_and_head` without ever updating the `head` atomic variable,
+    /// its misuse can result in permanently blocking subsequent stealing
+    /// operations.
+    fn book_items<C>(&self, mut count_fn: C, max_count: u32) -> Result<(u32, u32), StealError>
+    where
+        C: FnMut(usize) -> usize,
+    {
+        let mut heads = self.heads.load(Acquire);
+
+        loop {
+            let (worker_head, stealer_head) = unpack_heads(heads);
+
+            // Bail out if both heads differ because it means another stealing
+            // operation is concurrently ongoing.
+            if stealer_head != worker_head {
+                return Err(StealError::Busy);
+            }
+
+            let tail = self.tail.load(Acquire);
+            let item_count = tail.wrapping_sub(worker_head);
+
+            // `item_count` is tested now because `count_fn` may expect
+            // `item_count>0`.
+            if item_count == 0 {
+                return Err(StealError::Empty);
+            }
+
+            // Unwind safety: it is OK if `count_fn` panics because no state has
+            // been modified yet.
+            let count =
+                (count_fn(item_count as usize).min(max_count as usize) as u32).min(item_count);
+
+            // The special case `count_fn() == 0` must be tested specifically,
+            // because if the compare-exchange succeeds with `count=0`, the new
+            // worker head will be the same as the old one so other stealers
+            // will not detect that stealing is currently ongoing and may try to
+            // actually steal items and concurrently modify the position of the
+            // heads.
+            if count == 0 {
+                return Err(StealError::Empty);
+            }
+
+            // Move the worker head only.
+            let new_heads = pack_heads(worker_head.wrapping_add(count), stealer_head);
+
+            // Attempt to book the slots. Only one stealer can succeed since
+            // once this atomic is changed, the other thread will necessarily
+            // observe a mismatch between the two heads.
+            match self
+                .heads
+                .compare_exchange_weak(heads, new_heads, Acquire, Acquire)
+            {
+                Ok(_) => return Ok((stealer_head, count)),
+                // We lost the race to a concurrent pop or steal operation, or
+                // the CAS failed spuriously; try again.
+                Err(h) => heads = h,
+            }
+        }
+    }
+}
+
+impl<T, B: Buffer<T>> Drop for Queue<T, B> {
+    fn drop(&mut self) {
+        let worker_head = unpack_heads(self.heads.load(Relaxed)).0;
+        let tail = self.tail.load(Relaxed);
+
+        let count = tail.wrapping_sub(worker_head);
+
+        for offset in 0..count {
+            drop(unsafe { self.read_at(worker_head.wrapping_add(offset)) })
+        }
+    }
+}
+
+/// Handle for single-threaded FIFO push and pop operations.
+#[derive(Debug)]
+pub(crate) struct Worker<T, B: Buffer<T>> {
+    queue: Arc<Queue<T, B>>,
+}
+
+impl<T, B: Buffer<T>> Worker<T, B> {
+    /// Creates a new queue and returns a `Worker` handle.
+    pub(crate) fn new() -> Self {
+        let queue = Arc::new(Queue {
+            heads: CachePadded::new(AtomicU64::new(0)),
+            tail: CachePadded::new(AtomicU32::new(0)),
+            buffer: B::allocate(),
+            _phantom: PhantomData,
+        });
+
+        Worker { queue }
+    }
+
+    /// Creates a new `Stealer` handle associated to this `Worker`.
+    ///
+    /// An arbitrary number of `Stealer` handles can be created, either using
+    /// this method or cloning an existing `Stealer` handle.
+    pub(crate) fn stealer(&self) -> Stealer<T, B> {
+        Stealer {
+            queue: self.queue.clone(),
+        }
+    }
+
+    /// Returns the number of items that can be successfully pushed onto the
+    /// queue.
+    ///
+    /// Note that that the spare capacity may be underestimated due to
+    /// concurrent stealing operations.
+    pub(crate) fn spare_capacity(&self) -> usize {
+        let capacity = <B as Buffer<T>>::CAPACITY;
+        let stealer_head = unpack_heads(self.queue.heads.load(Relaxed)).1;
+        let tail = self.queue.tail.load(Relaxed);
+
+        // Aggregate count of available items (those which can be popped) and of
+        // items currently being stolen.
+        let len = tail.wrapping_sub(stealer_head);
+
+        (capacity - len) as usize
+    }
+
+    /// Attempts to push one item at the tail of the queue.
+    ///
+    /// # Errors
+    ///
+    /// This will fail if the queue is full, in which case the item is returned
+    /// as the error field.
+    pub(crate) fn push(&self, item: T) -> Result<(), T> {
+        let stealer_head = unpack_heads(self.queue.heads.load(Acquire)).1;
+        let tail = self.queue.tail.load(Relaxed);
+
+        // Check that the buffer is not full.
+        if tail.wrapping_sub(stealer_head) >= B::CAPACITY {
+            return Err(item);
+        }
+
+        // Store the item.
+        unsafe { self.queue.write_at(tail, item) };
+
+        // Make the item visible by moving the tail.
+        //
+        // Ordering: the Release ordering ensures that the subsequent
+        // acquisition of this atomic by a stealer will make the previous write
+        // visible.
+        self.queue.tail.store(tail.wrapping_add(1), Release);
+
+        Ok(())
+    }
+
+    /// Attempts to push the content of an iterator at the tail of the queue.
+    ///
+    /// It is the responsibility of the caller to ensure that there is enough
+    /// spare capacity to accommodate all iterator items, for instance by
+    /// calling `[Worker::spare_capacity]` beforehand. Otherwise, the iterator
+    /// is dropped while still holding the items in excess.
+    pub(crate) fn extend<I: IntoIterator<Item = T>>(&self, iter: I) {
+        let stealer_head = unpack_heads(self.queue.heads.load(Acquire)).1;
+        let mut tail = self.queue.tail.load(Relaxed);
+
+        let max_tail = stealer_head.wrapping_add(B::CAPACITY);
+        for item in iter {
+            // Check whether the buffer is full.
+            if tail == max_tail {
+                break;
+            }
+            // Store the item.
+            unsafe { self.queue.write_at(tail, item) };
+            tail = tail.wrapping_add(1);
+        }
+
+        // Make the items visible by incrementing the push count.
+        //
+        // Ordering: the Release ordering ensures that the subsequent
+        // acquisition of this atomic by a stealer will make the previous write
+        // visible.
+        self.queue.tail.store(tail, Release);
+    }
+
+    /// Attempts to pop one item from the head of the queue.
+    ///
+    /// This returns None if the queue is empty.
+    pub(crate) fn pop(&self) -> Option<T> {
+        let mut heads = self.queue.heads.load(Acquire);
+
+        let prev_worker_head = loop {
+            let (worker_head, stealer_head) = unpack_heads(heads);
+            let tail = self.queue.tail.load(Relaxed);
+
+            // Check if the queue is empty.
+            if tail == worker_head {
+                return None;
+            }
+
+            // Move the worker head. The weird cast from `bool` to `u32` is to
+            // steer the compiler towards branchless code.
+            let next_heads = pack_heads(
+                worker_head.wrapping_add(1),
+                stealer_head.wrapping_add((stealer_head == worker_head) as u32),
+            );
+
+            // Attempt to book the items.
+            let res = self
+                .queue
+                .heads
+                .compare_exchange_weak(heads, next_heads, AcqRel, Acquire);
+
+            match res {
+                Ok(_) => break worker_head,
+                // We lost the race to a stealer or the CAS failed spuriously; try again.
+                Err(h) => heads = h,
+            }
+        };
+
+        unsafe { Some(self.queue.read_at(prev_worker_head)) }
+    }
+
+    /// Returns an iterator that steals items from the head of the queue.
+    ///
+    /// The returned iterator steals up to `N` items, where `N` is specified by
+    /// a closure which takes as argument the total count of items available for
+    /// stealing. Upon success, the number of items ultimately stolen can be
+    /// from 1 to `N`, depending on the number of available items.
+    ///
+    /// # Beware
+    ///
+    /// All items stolen by the iterator should be moved out as soon as
+    /// possible, because until then or until the iterator is dropped, all
+    /// concurrent stealing operations will fail with [`StealError::Busy`].
+    ///
+    /// # Leaking
+    ///
+    /// If the iterator is leaked before all stolen items have been moved out,
+    /// subsequent stealing operations will permanently fail with
+    /// [`StealError::Busy`].
+    ///
+    /// # Errors
+    ///
+    /// An error is returned in the following cases:
+    /// 1) no item was stolen, either because the queue is empty or `N` is 0,
+    /// 2) a concurrent stealing operation is ongoing.
+    pub(crate) fn drain<C>(&self, count_fn: C) -> Result<Drain<'_, T, B>, StealError>
+    where
+        C: FnMut(usize) -> usize,
+    {
+        let (head, count) = self.queue.book_items(count_fn, u32::MAX)?;
+
+        Ok(Drain {
+            queue: &self.queue,
+            head,
+            from_head: head,
+            to_head: head.wrapping_add(count),
+        })
+    }
+}
+
+impl<T, B: Buffer<T>> Default for Worker<T, B> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T, B: Buffer<T>> UnwindSafe for Worker<T, B> {}
+impl<T, B: Buffer<T>> RefUnwindSafe for Worker<T, B> {}
+unsafe impl<T: Send, B: Buffer<T>> Send for Worker<T, B> {}
+
+/// A draining iterator for [`Worker<T, B>`].
+///
+/// This iterator is created by [`Worker::drain`]. See its documentation for
+/// more.
+#[derive(Debug)]
+pub(crate) struct Drain<'a, T, B: Buffer<T>> {
+    queue: &'a Queue<T, B>,
+    head: u32,
+    from_head: u32,
+    to_head: u32,
+}
+
+impl<'a, T, B: Buffer<T>> Iterator for Drain<'a, T, B> {
+    type Item = T;
+
+    fn next(&mut self) -> Option<T> {
+        if self.head == self.to_head {
+            return None;
+        }
+
+        let item = Some(unsafe { self.queue.read_at(self.head) });
+
+        self.head = self.head.wrapping_add(1);
+
+        // We cannot rely on the caller to call `next` again after the last item
+        // is yielded so the heads must be updated immediately when yielding the
+        // last item.
+        if self.head == self.to_head {
+            // Signal that the stealing operation has completed.
+            let mut heads = self.queue.heads.load(Relaxed);
+            loop {
+                let (worker_head, stealer_head) = unpack_heads(heads);
+
+                debug_or_loom_assert_eq!(stealer_head, self.from_head);
+
+                let res = self.queue.heads.compare_exchange_weak(
+                    heads,
+                    pack_heads(worker_head, worker_head),
+                    AcqRel,
+                    Acquire,
+                );
+
+                match res {
+                    Ok(_) => break,
+                    Err(h) => {
+                        heads = h;
+                    }
+                }
+            }
+        }
+
+        item
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let sz = self.to_head.wrapping_sub(self.head) as usize;
+
+        (sz, Some(sz))
+    }
+}
+
+impl<'a, T, B: Buffer<T>> ExactSizeIterator for Drain<'a, T, B> {}
+
+impl<'a, T, B: Buffer<T>> FusedIterator for Drain<'a, T, B> {}
+
+impl<'a, T, B: Buffer<T>> Drop for Drain<'a, T, B> {
+    fn drop(&mut self) {
+        // Drop all items and make sure the head is updated so that subsequent
+        // stealing operations can succeed.
+        for _item in self {}
+    }
+}
+
+impl<'a, T, B: Buffer<T>> UnwindSafe for Drain<'a, T, B> {}
+impl<'a, T, B: Buffer<T>> RefUnwindSafe for Drain<'a, T, B> {}
+unsafe impl<'a, T: Send, B: Buffer<T>> Send for Drain<'a, T, B> {}
+unsafe impl<'a, T: Send, B: Buffer<T>> Sync for Drain<'a, T, B> {}
+
+/// Handle for multi-threaded stealing operations.
+#[derive(Debug)]
+pub(crate) struct Stealer<T, B: Buffer<T>> {
+    queue: Arc<Queue<T, B>>,
+}
+
+impl<T, B: Buffer<T>> Stealer<T, B> {
+    /// Attempts to steal items from the head of the queue, returning one of
+    /// them directly and moving the others to the tail of another queue.
+    ///
+    /// Up to `N` items are stolen (including the one returned directly), where
+    /// `N` is specified by a closure which takes as argument the total count of
+    /// items available for stealing. Upon success, one item is returned and
+    /// from 0 to `N-1` items are moved to the destination queue, depending on
+    /// the number of available items and the capacity of the destination queue.
+    ///
+    /// The returned item is the most recent one among the stolen items.
+    ///
+    /// # Errors
+    ///
+    /// An error is returned in the following cases:
+    /// 1) no item was stolen, either because the queue is empty or `N` is 0,
+    /// 2) a concurrent stealing operation is ongoing.
+    ///
+    /// Failure to transfer any item to the destination queue is not considered
+    /// an error as long as one element could be returned directly. This can
+    /// occur if the destination queue is full, if the source queue has only one
+    /// item or if `N` is 1.
+    pub(crate) fn steal_and_pop<C, BDest>(
+        &self,
+        dest: &Worker<T, BDest>,
+        count_fn: C,
+    ) -> Result<T, StealError>
+    where
+        C: FnMut(usize) -> usize,
+        BDest: Buffer<T>,
+    {
+        // Compute the free capacity of the destination queue.
+        //
+        // Ordering: see `Worker::push()` method.
+        let dest_tail = dest.queue.tail.load(Relaxed);
+        let dest_stealer_head = unpack_heads(dest.queue.heads.load(Acquire)).1;
+        let dest_free_capacity = BDest::CAPACITY - dest_tail.wrapping_sub(dest_stealer_head);
+
+        debug_or_loom_assert!(dest_free_capacity <= BDest::CAPACITY);
+
+        let (stealer_head, count) = self.queue.book_items(count_fn, dest_free_capacity + 1)?;
+        let transfer_count = count - 1;
+
+        debug_or_loom_assert!(transfer_count <= dest_free_capacity);
+
+        // Move all items but the last to the destination queue.
+        for offset in 0..transfer_count {
+            unsafe {
+                let item = self.queue.read_at(stealer_head.wrapping_add(offset));
+                dest.queue.write_at(dest_tail.wrapping_add(offset), item);
+            }
+        }
+
+        // Read the last item.
+        let last_item = unsafe {
+            self.queue
+                .read_at(stealer_head.wrapping_add(transfer_count))
+        };
+
+        // Make the moved items visible by updating the destination tail position.
+        //
+        // Ordering: see comments in the `push()` method.
+        dest.queue
+            .tail
+            .store(dest_tail.wrapping_add(transfer_count), Release);
+
+        // Signal that the stealing operation has completed.
+        let mut heads = self.queue.heads.load(Relaxed);
+        loop {
+            let (worker_head, sh) = unpack_heads(heads);
+
+            debug_or_loom_assert_eq!(stealer_head, sh);
+
+            let res = self.queue.heads.compare_exchange_weak(
+                heads,
+                pack_heads(worker_head, worker_head),
+                AcqRel,
+                Acquire,
+            );
+
+            match res {
+                Ok(_) => return Ok(last_item),
+                Err(h) => {
+                    heads = h;
+                }
+            }
+        }
+    }
+}
+
+impl<T, B: Buffer<T>> Clone for Stealer<T, B> {
+    fn clone(&self) -> Self {
+        Stealer {
+            queue: self.queue.clone(),
+        }
+    }
+}
+
+impl<T, B: Buffer<T>> UnwindSafe for Stealer<T, B> {}
+impl<T, B: Buffer<T>> RefUnwindSafe for Stealer<T, B> {}
+unsafe impl<T: Send, B: Buffer<T>> Send for Stealer<T, B> {}
+unsafe impl<T: Send, B: Buffer<T>> Sync for Stealer<T, B> {}
+
+/// Error returned when stealing is unsuccessful.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) enum StealError {
+    /// No item was stolen.
+    Empty,
+    /// Another concurrent stealing operation is ongoing.
+    Busy,
+}
+
+impl fmt::Display for StealError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            StealError::Empty => write!(f, "cannot steal from empty queue"),
+            StealError::Busy => write!(f, "a concurrent steal operation is ongoing"),
+        }
+    }
+}
+
+#[inline(always)]
+/// Extract the worker head and stealer head (in this order) from packed heads.
+fn unpack_heads(heads: u64) -> (u32, u32) {
+    ((heads >> u32::BITS) as u32, heads as u32)
+}
+
+#[inline(always)]
+/// Insert a new stealer head into packed heads.
+fn pack_heads(worker_head: u32, stealer_head: u32) -> u64 {
+    ((worker_head as u64) << u32::BITS) | stealer_head as u64
+}
--- a/asynchronix/src/runtime/executor/queue/buffers.rs
+++ b/asynchronix/src/runtime/executor/queue/buffers.rs
@ -0,0 +1,100 @@
+//! Internal queue buffers of various sizes.
+
+use std::fmt::Debug;
+use std::mem::MaybeUninit;
+
+use crate::loom_exports::cell::UnsafeCell;
+
+/// Marker trait for fixed-size buffers.
+pub(crate) trait Buffer<T>: private::Sealed {
+    /// Buffer size.
+    const CAPACITY: u32;
+
+    #[doc(hidden)]
+    /// Buffer index bit mask.
+    const MASK: u32;
+
+    #[doc(hidden)]
+    /// Buffer data type.
+    type Data: AsRef<[UnsafeCell<MaybeUninit<T>>]> + Debug;
+
+    #[doc(hidden)]
+    /// Returns an uninitialized buffer.
+    fn allocate() -> Box<Self::Data>;
+}
+
+macro_rules! make_buffer {
+    ($b:ident, $cap:expr) => {
+        #[doc = concat!("Marker type for buffers of capacity ", $cap, ".")]
+        #[derive(Copy, Clone, Debug)]
+        pub(crate) struct $b {}
+
+        impl private::Sealed for $b {}
+
+        impl<T> Buffer<T> for $b {
+            const CAPACITY: u32 = $cap;
+
+            #[doc(hidden)]
+            const MASK: u32 = $cap - 1;
+
+            #[doc(hidden)]
+            type Data = [UnsafeCell<MaybeUninit<T>>; $cap];
+
+            #[doc(hidden)]
+            #[cfg(not(asynchronix_loom))]
+            fn allocate() -> Box<Self::Data> {
+                // Safety: initializing an array of `MaybeUninit` items with
+                // `assume_init()` is valid, as per the `MaybeUninit` documentation.
+                // Admittedly the situation is slightly different here: the buffer is
+                // made of `MaybeUninit` elements wrapped in `UnsafeCell`s; however, the
+                // latter is a `repr(transparent)` type with a trivial constructor, so
+                // this should not make any difference.
+                Box::new(unsafe { MaybeUninit::uninit().assume_init() })
+            }
+            #[doc(hidden)]
+            #[cfg(asynchronix_loom)]
+            fn allocate() -> Box<Self::Data> {
+                // Loom's `UnsafeCell` is not `repr(transparent)` and does not
+                // have a trivial constructor so initialization must be done
+                // element-wise.
+                fn make_fixed_size<T>(buffer: Box<[T]>) -> Box<[T; $cap]> {
+                    assert_eq!(buffer.len(), $cap);
+
+                    // Safety: The length was checked.
+                    unsafe { Box::from_raw(Box::into_raw(buffer).cast()) }
+                }
+
+                let mut buffer = Vec::with_capacity($cap);
+                for _ in 0..$cap {
+                    buffer.push(UnsafeCell::new(MaybeUninit::uninit()));
+                }
+
+                make_fixed_size(buffer.into_boxed_slice())
+            }
+        }
+    };
+}
+
+// Define buffer capacities up to 2^15, which is the maximum that can be
+// supported with 16-bit wide buffer positions (1 bit is required for
+// disambiguation between full and empty buffer).
+make_buffer!(B2, 2);
+make_buffer!(B4, 4);
+make_buffer!(B8, 8);
+make_buffer!(B16, 16);
+make_buffer!(B32, 32);
+make_buffer!(B64, 64);
+make_buffer!(B128, 128);
+make_buffer!(B256, 256);
+make_buffer!(B512, 512);
+make_buffer!(B1024, 1024);
+make_buffer!(B2048, 2048);
+make_buffer!(B4096, 4096);
+make_buffer!(B8192, 8192);
+make_buffer!(B16384, 12384);
+make_buffer!(B32768, 32768);
+
+/// Prevent public implementation of Buffer.
+mod private {
+    pub(crate) trait Sealed {}
+}
--- a/asynchronix/src/runtime/executor/queue/tests.rs
+++ b/asynchronix/src/runtime/executor/queue/tests.rs
@ -0,0 +1,7 @@
+use super::*;
+
+#[cfg(not(asynchronix_loom))]
+mod general;
+
+#[cfg(asynchronix_loom)]
+mod loom;
--- a/asynchronix/src/runtime/executor/queue/tests/general.rs
+++ b/asynchronix/src/runtime/executor/queue/tests/general.rs
@ -0,0 +1,240 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use std::thread::spawn;
+
+use super::*;
+
+// Rotate the internal ring buffer indices by `n`.
+fn rotate<T: Default + std::fmt::Debug, B: Buffer<T>>(worker: &Worker<T, B>, n: usize) {
+    let stealer = worker.stealer();
+    let dummy_worker = Worker::<T, B2>::new();
+
+    for _ in 0..n {
+        worker.push(T::default()).unwrap();
+        stealer.steal_and_pop(&dummy_worker, |_| 1).unwrap();
+    }
+}
+
+#[test]
+fn queue_single_threaded_steal() {
+    let rotations: &[_] = if cfg!(miri) {
+        &[42]
+    } else {
+        &[0, 255, 256, 257, 65535, 65536, 65537]
+    };
+    for &rotation in rotations {
+        let worker1 = Worker::<_, B128>::new();
+        let worker2 = Worker::<_, B128>::new();
+        let stealer1 = worker1.stealer();
+        rotate(&worker1, rotation);
+        rotate(&worker2, rotation);
+
+        worker1.push(1).unwrap();
+        worker1.push(2).unwrap();
+        worker1.push(3).unwrap();
+        worker1.push(4).unwrap();
+
+        assert_eq!(worker1.pop(), Some(1));
+        assert_eq!(stealer1.steal_and_pop(&worker2, |_| 2), Ok(3));
+        assert_eq!(worker1.pop(), Some(4));
+        assert_eq!(worker1.pop(), None);
+        assert_eq!(worker2.pop(), Some(2));
+        assert_eq!(worker2.pop(), None);
+    }
+}
+
+#[test]
+fn queue_self_steal() {
+    let rotations: &[_] = if cfg!(miri) {
+        &[42]
+    } else {
+        &[0, 255, 256, 257, 65535, 65536, 65537]
+    };
+    for &rotation in rotations {
+        let worker = Worker::<_, B128>::new();
+        rotate(&worker, rotation);
+        let stealer = worker.stealer();
+
+        worker.push(1).unwrap();
+        worker.push(2).unwrap();
+        worker.push(3).unwrap();
+        worker.push(4).unwrap();
+
+        assert_eq!(worker.pop(), Some(1));
+        assert_eq!(stealer.steal_and_pop(&worker, |_| 2), Ok(3));
+        assert_eq!(worker.pop(), Some(4));
+        assert_eq!(worker.pop(), Some(2));
+        assert_eq!(worker.pop(), None);
+    }
+}
+
+#[test]
+fn queue_drain_steal() {
+    let rotations: &[_] = if cfg!(miri) {
+        &[42]
+    } else {
+        &[0, 255, 256, 257, 65535, 65536, 65537]
+    };
+    for &rotation in rotations {
+        let worker = Worker::<_, B128>::new();
+        let dummy_worker = Worker::<_, B128>::new();
+        let stealer = worker.stealer();
+        rotate(&worker, rotation);
+
+        worker.push(1).unwrap();
+        worker.push(2).unwrap();
+        worker.push(3).unwrap();
+        worker.push(4).unwrap();
+
+        assert_eq!(worker.pop(), Some(1));
+        let mut iter = worker.drain(|n| n - 1).unwrap();
+        assert_eq!(
+            stealer.steal_and_pop(&dummy_worker, |_| 1),
+            Err(StealError::Busy)
+        );
+        assert_eq!(iter.next(), Some(2));
+        assert_eq!(
+            stealer.steal_and_pop(&dummy_worker, |_| 1),
+            Err(StealError::Busy)
+        );
+        assert_eq!(iter.next(), Some(3));
+        assert_eq!(stealer.steal_and_pop(&dummy_worker, |_| 1), Ok(4));
+        assert_eq!(iter.next(), None);
+    }
+}
+
+#[test]
+fn queue_extend_basic() {
+    let rotations: &[_] = if cfg!(miri) {
+        &[42]
+    } else {
+        &[0, 255, 256, 257, 65535, 65536, 65537]
+    };
+    for &rotation in rotations {
+        let worker = Worker::<_, B128>::new();
+        rotate(&worker, rotation);
+
+        let initial_capacity = worker.spare_capacity();
+        worker.push(1).unwrap();
+        worker.push(2).unwrap();
+        worker.extend([3, 4]);
+
+        assert_eq!(worker.spare_capacity(), initial_capacity - 4);
+        assert_eq!(worker.pop(), Some(1));
+        assert_eq!(worker.pop(), Some(2));
+        assert_eq!(worker.pop(), Some(3));
+        assert_eq!(worker.pop(), Some(4));
+        assert_eq!(worker.pop(), None);
+    }
+}
+
+#[test]
+fn queue_extend_overflow() {
+    let rotations: &[_] = if cfg!(miri) {
+        &[42]
+    } else {
+        &[0, 255, 256, 257, 65535, 65536, 65537]
+    };
+    for &rotation in rotations {
+        let worker = Worker::<_, B128>::new();
+        rotate(&worker, rotation);
+
+        let initial_capacity = worker.spare_capacity();
+        worker.push(1).unwrap();
+        worker.push(2).unwrap();
+        worker.extend(3..); // try to append infinitely many integers
+
+        assert_eq!(worker.spare_capacity(), 0);
+        for i in 1..=initial_capacity {
+            assert_eq!(worker.pop(), Some(i));
+        }
+        assert_eq!(worker.pop(), None);
+    }
+}
+
+#[test]
+fn queue_multi_threaded_steal() {
+    use crate::runtime::executor::rng::Rng;
+
+    const N: usize = if cfg!(miri) { 50 } else { 1_000_000 };
+
+    let counter = Arc::new(AtomicUsize::new(0));
+    let worker = Worker::<_, B128>::new();
+    let stealer = worker.stealer();
+
+    let counter0 = counter.clone();
+    let stealer1 = stealer.clone();
+    let counter1 = counter.clone();
+    let stealer = stealer;
+    let counter2 = counter;
+
+    // Worker thread.
+    //
+    // Push all numbers from 0 to N, popping one from time to time.
+    let t0 = spawn(move || {
+        let mut i = 0;
+        let rng = Rng::new(0);
+        let mut stats = vec![0; N];
+        'outer: loop {
+            for _ in 0..(rng.gen_bounded(10) + 1) {
+                while let Err(_) = worker.push(i) {}
+                i += 1;
+                if i == N {
+                    break 'outer;
+                }
+            }
+            if let Some(j) = worker.pop() {
+                stats[j] += 1;
+                counter0.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+
+        stats
+    });
+
+    // Stealer threads.
+    //
+    // Repeatedly steal a random number of items.
+    fn steal_periodically(
+        stealer: Stealer<usize, B128>,
+        counter: Arc<AtomicUsize>,
+        rng_seed: u64,
+    ) -> Vec<usize> {
+        let mut stats = vec![0; N];
+        let rng = Rng::new(rng_seed);
+        let dest_worker = Worker::<_, B128>::new();
+
+        loop {
+            if let Ok(i) =
+                stealer.steal_and_pop(&dest_worker, |m| rng.gen_bounded(m as u64 + 1) as usize)
+            {
+                stats[i] += 1; // the popped item
+                counter.fetch_add(1, Ordering::Relaxed);
+                while let Some(j) = dest_worker.pop() {
+                    stats[j] += 1;
+                    counter.fetch_add(1, Ordering::Relaxed);
+                }
+            }
+            let count = counter.load(Ordering::Relaxed);
+            if count == N {
+                break;
+            }
+            assert!(count < N);
+        }
+
+        stats
+    }
+    let t1 = spawn(move || steal_periodically(stealer1, counter1, 1));
+    let t2 = spawn(move || steal_periodically(stealer, counter2, 2));
+    let mut stats = Vec::new();
+    stats.push(t0.join().unwrap());
+    stats.push(t1.join().unwrap());
+    stats.push(t2.join().unwrap());
+    for i in 0..N {
+        let mut count = 0;
+        for j in 0..stats.len() {
+            count += stats[j][i];
+        }
+        assert_eq!(count, 1);
+    }
+}
--- a/asynchronix/src/runtime/executor/queue/tests/loom.rs
+++ b/asynchronix/src/runtime/executor/queue/tests/loom.rs
@ -0,0 +1,323 @@
+use super::*;
+
+use ::loom::model::Builder;
+use ::loom::thread;
+
+// Test adapted from the Tokio test suite.
+#[test]
+fn loom_queue_basic_steal() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 3;
+    const LOOP_COUNT: usize = 2;
+    const ITEM_COUNT_PER_LOOP: usize = 3;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(|| {
+        let worker = Worker::<usize, B4>::new();
+        let stealer = worker.stealer();
+
+        let th = thread::spawn(move || {
+            let dest_worker = Worker::<usize, B4>::new();
+            let mut n = 0;
+
+            for _ in 0..3 {
+                if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() {
+                    n += 1;
+                    while dest_worker.pop().is_some() {
+                        n += 1;
+                    }
+                }
+            }
+
+            n
+        });
+
+        let mut n = 0;
+
+        for _ in 0..LOOP_COUNT {
+            for _ in 0..(ITEM_COUNT_PER_LOOP - 1) {
+                if worker.push(42).is_err() {
+                    n += 1;
+                }
+            }
+
+            if worker.pop().is_some() {
+                n += 1;
+            }
+
+            // Push another task
+            if worker.push(42).is_err() {
+                n += 1;
+            }
+
+            while worker.pop().is_some() {
+                n += 1;
+            }
+        }
+
+        n += th.join().unwrap();
+
+        assert_eq!(ITEM_COUNT_PER_LOOP * LOOP_COUNT, n);
+    });
+}
+
+// Test adapted from the Tokio test suite.
+#[test]
+fn loom_queue_drain_overflow() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+    const ITEM_COUNT: usize = 7;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(|| {
+        let worker = Worker::<usize, B4>::new();
+        let stealer = worker.stealer();
+
+        let th = thread::spawn(move || {
+            let dest_worker = Worker::<usize, B4>::new();
+            let mut n = 0;
+
+            if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() {
+                n += 1;
+                while dest_worker.pop().is_some() {
+                    n += 1;
+                }
+            }
+
+            n
+        });
+
+        let mut n = 0;
+
+        // Push an item, pop an item.
+        worker.push(42).unwrap();
+
+        if worker.pop().is_some() {
+            n += 1;
+        }
+
+        for _ in 0..(ITEM_COUNT - 1) {
+            if worker.push(42).is_err() {
+                // Spin until some of the old items can be drained to make room
+                // for the new item.
+                loop {
+                    if let Ok(drain) = worker.drain(|n| n - n / 2) {
+                        for _ in drain {
+                            n += 1;
+                        }
+                        assert_eq!(worker.push(42), Ok(()));
+                        break;
+                    }
+                    thread::yield_now();
+                }
+            }
+        }
+
+        n += th.join().unwrap();
+
+        while worker.pop().is_some() {
+            n += 1;
+        }
+
+        assert_eq!(ITEM_COUNT, n);
+    });
+}
+
+// Test adapted from the Tokio test suite.
+#[test]
+fn loom_queue_multi_stealer() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 3;
+    const ITEM_COUNT: usize = 5;
+
+    fn steal_half(stealer: Stealer<usize, B4>) -> usize {
+        let dest_worker = Worker::<usize, B4>::new();
+
+        if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() {
+            let mut n = 1;
+            while dest_worker.pop().is_some() {
+                n += 1;
+            }
+
+            n
+        } else {
+            0
+        }
+    }
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(|| {
+        let worker = Worker::<usize, B4>::new();
+        let stealer1 = worker.stealer();
+        let stealer2 = worker.stealer();
+
+        let th1 = thread::spawn(move || steal_half(stealer1));
+        let th2 = thread::spawn(move || steal_half(stealer2));
+
+        let mut n = 0;
+        for _ in 0..ITEM_COUNT {
+            if worker.push(42).is_err() {
+                n += 1;
+            }
+        }
+
+        while worker.pop().is_some() {
+            n += 1;
+        }
+
+        n += th1.join().unwrap();
+        n += th2.join().unwrap();
+
+        assert_eq!(ITEM_COUNT, n);
+    });
+}
+
+// Test adapted from the Tokio test suite.
+#[test]
+fn loom_queue_chained_steal() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(|| {
+        let w1 = Worker::<usize, B4>::new();
+        let w2 = Worker::<usize, B4>::new();
+        let s1 = w1.stealer();
+        let s2 = w2.stealer();
+
+        for _ in 0..4 {
+            w1.push(42).unwrap();
+            w2.push(42).unwrap();
+        }
+
+        let th = thread::spawn(move || {
+            let dest_worker = Worker::<usize, B4>::new();
+            let _ = s1.steal_and_pop(&dest_worker, |n| n - n / 2);
+
+            while dest_worker.pop().is_some() {}
+        });
+
+        while w1.pop().is_some() {}
+
+        let _ = s2.steal_and_pop(&w1, |n| n - n / 2);
+
+        th.join().unwrap();
+
+        while w1.pop().is_some() {}
+        while w2.pop().is_some() {}
+    });
+}
+
+// A variant of multi-stealer with concurrent push.
+#[test]
+fn loom_queue_push_and_steal() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    fn steal_half(stealer: Stealer<usize, B4>) -> usize {
+        let dest_worker = Worker::<usize, B4>::new();
+
+        if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() {
+            let mut n = 1;
+            while dest_worker.pop().is_some() {
+                n += 1;
+            }
+
+            n
+        } else {
+            0
+        }
+    }
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(|| {
+        let worker = Worker::<usize, B4>::new();
+        let stealer1 = worker.stealer();
+        let stealer2 = worker.stealer();
+
+        let th1 = thread::spawn(move || steal_half(stealer1));
+        let th2 = thread::spawn(move || steal_half(stealer2));
+
+        worker.push(42).unwrap();
+        worker.push(42).unwrap();
+
+        let mut n = 0;
+        while worker.pop().is_some() {
+            n += 1;
+        }
+
+        n += th1.join().unwrap();
+        n += th2.join().unwrap();
+
+        assert_eq!(n, 2);
+    });
+}
+
+// Attempts extending the queue based on `Worker::free_capacity`.
+#[test]
+fn loom_queue_extend() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    fn steal_half(stealer: Stealer<usize, B4>) -> usize {
+        let dest_worker = Worker::<usize, B4>::new();
+
+        if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() {
+            let mut n = 1;
+            while dest_worker.pop().is_some() {
+                n += 1;
+            }
+
+            n
+        } else {
+            0
+        }
+    }
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(|| {
+        let worker = Worker::<usize, B4>::new();
+        let stealer1 = worker.stealer();
+        let stealer2 = worker.stealer();
+
+        let th1 = thread::spawn(move || steal_half(stealer1));
+        let th2 = thread::spawn(move || steal_half(stealer2));
+
+        worker.push(1).unwrap();
+        worker.push(7).unwrap();
+
+        // Try to fill up the queue.
+        let spare_capacity = worker.spare_capacity();
+        assert!(spare_capacity >= 2);
+        worker.extend(0..spare_capacity);
+
+        let mut n = 0;
+
+        n += th1.join().unwrap();
+        n += th2.join().unwrap();
+
+        while worker.pop().is_some() {
+            n += 1;
+        }
+
+        assert_eq!(2 + spare_capacity, n);
+    });
+}
--- a/asynchronix/src/runtime/executor/rng.rs
+++ b/asynchronix/src/runtime/executor/rng.rs
@ -0,0 +1,72 @@
+use std::cell::Cell;
+
+/// A pseudo-random number generator based on Wang Yi's Wyrand.
+///
+/// See: https://github.com/wangyi-fudan/wyhash
+#[derive(Clone, Debug)]
+pub(crate) struct Rng {
+    seed: Cell<u64>,
+}
+
+impl Rng {
+    /// Creates a new RNG with the provided seed.
+    pub(crate) fn new(seed: u64) -> Self {
+        Self {
+            seed: Cell::new(seed),
+        }
+    }
+
+    /// Generates a pseudo-random number within the range `0..2⁶⁴`.
+    pub(crate) fn gen(&self) -> u64 {
+        let seed = self.seed.get().wrapping_add(0xA0761D6478BD642F);
+        self.seed.set(seed);
+        let t = seed as u128 * (seed ^ 0xE7037ED1A0B428DB) as u128;
+        (t as u64) ^ (t >> 64) as u64
+    }
+
+    /// Generates a pseudo-random number within the range `0..upper_bound`.
+    ///
+    /// This generator is biased as it uses the fast (but crude) multiply-shift
+    /// method. The bias is negligible, however, as long as the bound is much
+    /// smaller than 2⁶⁴.
+    pub(crate) fn gen_bounded(&self, upper_bound: u64) -> u64 {
+        ((self.gen() as u128 * upper_bound as u128) >> 64) as u64
+    }
+}
+
+#[cfg(all(test, not(asynchronix_loom), not(miri)))]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn rng_gen_bounded_chi2() {
+        const RNG_SEED: u64 = 12345;
+        const DICE_ROLLS: u64 = 1_000_000;
+        const DICE_FACES: u64 = 6; // beware: modify the p-values if you change this.
+        const CHI2_PVAL_LOWER: f64 = 0.210; // critical chi2 for lower p-value = 0.001 and DoF = DICE_FACES - 1
+        const CHI2_PVAL_UPPER: f64 = 20.515; // critical chi2 for upper p-value = 0.999 and DoF = DICE_FACES - 1.
+
+        let rng = Rng::new(RNG_SEED);
+
+        let mut tally = [0u64; 6];
+
+        for _ in 0..DICE_ROLLS {
+            let face = rng.gen_bounded(DICE_FACES);
+            tally[face as usize] += 1;
+        }
+
+        let expected = DICE_ROLLS as f64 / DICE_FACES as f64;
+
+        let chi2 = (0..DICE_FACES).fold(0f64, |chi2, face| {
+            let actual = tally[face as usize] as f64;
+
+            chi2 + (actual - expected) * (actual - expected) / expected
+        });
+
+        println!("tally = {:?}", tally);
+        println!("chi2 = {}", chi2);
+
+        assert!(chi2 > CHI2_PVAL_LOWER);
+        assert!(chi2 < CHI2_PVAL_UPPER);
+    }
+}
--- a/asynchronix/src/runtime/executor/task.rs
+++ b/asynchronix/src/runtime/executor/task.rs
@ -0,0 +1,398 @@
+extern crate alloc;
+
+use std::alloc::{alloc, dealloc, handle_alloc_error, Layout};
+use std::future::Future;
+use std::mem::{self, ManuallyDrop};
+use std::task::{RawWaker, RawWakerVTable};
+
+use crate::loom_exports::cell::UnsafeCell;
+use crate::loom_exports::sync::atomic::{self, AtomicU64, Ordering};
+
+mod cancel_token;
+mod promise;
+mod runnable;
+mod util;
+
+#[cfg(test)]
+mod tests;
+
+pub(crate) use cancel_token::CancelToken;
+pub(crate) use promise::{Promise, Stage};
+pub(crate) use runnable::Runnable;
+
+use self::util::{runnable_exists, RunOnDrop};
+
+/// Flag indicating that the future has not been polled to completion yet.
+const POLLING: u64 = 1 << 0;
+/// Flag indicating that the task has been cancelled or that the output has
+/// already been moved out.
+const CLOSED: u64 = 1 << 1;
+/// A single reference count increment.
+const REF_INC: u64 = 1 << 2;
+/// A single wake count increment.
+const WAKE_INC: u64 = 1 << 33;
+/// Reference count mask.
+const REF_MASK: u64 = !(REF_INC - 1) & (WAKE_INC - 1);
+/// Wake count mask.
+const WAKE_MASK: u64 = !(WAKE_INC - 1);
+/// Critical value of the reference count at which preventive measures must be
+/// enacted to prevent counter overflow.
+const REF_CRITICAL: u64 = (REF_MASK / 2) & REF_MASK;
+/// Critical value of the wake count at which preventive measures must be
+/// enacted to prevent counter overflow.
+const WAKE_CRITICAL: u64 = (WAKE_MASK / 2) & WAKE_MASK;
+
+/// Either a future, its output, or uninitialized (empty).
+union TaskCore<F: Future> {
+    /// Field present during the `Polling` and  the `Wind-down` phases.
+    future: ManuallyDrop<F>,
+
+    /// Field present during the `Completed` phase.
+    output: ManuallyDrop<F::Output>,
+}
+
+/// A task.
+///
+/// A task contains both the scheduling function and the future to be polled (or
+/// its output if available). `Waker`, `Runnable`, `Promise` and `CancelToken`
+/// are all type-erased (fat) pointers to a `Task`. The task is automatically
+/// deallocated when all the formers have been dropped.
+///
+/// The lifetime of a task involves up to 4 phases:
+/// - `Polling` phase: the future needs to be polled,
+/// - `Completed` phase: the future has been polled to completion and its output
+///   is available,
+/// - `Wind-down` phase: the task has been cancelled while it was already
+///   scheduled for processing, so the future had to be kept temporarily alive
+///   to avoid a race; the `Closed` phase will be entered only when the
+///   scheduled task is processed,
+/// - `Closed` phase: neither the future nor its output are available, either
+///   because the task has been cancelled or because the output has been moved
+///   out.
+///
+/// It is possible to move from `Polling` to `Completed`, `Wind-down` or
+/// `Closed`, but the only possible transition from `Wind-down` and from
+/// `Completed` is to `Closed`.
+///
+/// The different states and sub-states and their corresponding flags are
+/// summarized below:
+///
+/// | Phase               | CLOSED | POLLING | WAKE_COUNT | Runnable exists? |
+/// |---------------------|--------|---------|------------|------------------|
+/// | Polling (idle)      |   0    |    1    |      0     |       No         |
+/// | Polling (scheduled) |   0    |    1    |     ≠0     |       Yes        |
+/// | Completed           |   0    |    0    |     any    |       No         |
+/// | Wind-down           |   1    |    1    |     any    |       Yes        |
+/// | Closed              |   1    |    0    |     any    |       No         |
+///
+/// A `Runnable` is a reference to a task that has been scheduled. There can be
+/// at most one `Runnable` at any given time.
+///
+/// `WAKE_COUNT` is a counter incremented each time the task is awaken and reset
+/// each time the `Runnable` has finished polling the task. The waker that
+/// increments the wake count from 0 to 1 is responsible for creating and
+/// scheduling a new `Runnable`.
+///
+/// The state includes as well a reference count `REF_COUNT` that accounts for
+/// the `Promise`, the `CancelToken` and all `Waker`s. The `Runnable` is _not_
+/// included in `REF_COUNT` because its existence can be inferred from `CLOSED`,
+/// `POLLING` and `WAKE_COUNT` (see table above).
+struct Task<F: Future, S, T> {
+    /// State of the task.
+    ///
+    /// The state has the following layout, where bit 0 is the LSB and bit 63 is
+    /// the MSB:
+    ///
+    /// |    33-63   |    2-32   |    1   |    0    |
+    /// |------------|-----------|--------|---------|
+    /// | WAKE_COUNT | REF_COUNT | CLOSED | POLLING |
+    state: AtomicU64,
+
+    /// The future, its output, or nothing.
+    core: UnsafeCell<TaskCore<F>>,
+
+    /// The task scheduling function.
+    schedule_fn: S,
+
+    /// An arbitrary `Clone` tag that is passed to the scheduling function.
+    tag: T,
+}
+
+impl<F, S, T> Task<F, S, T>
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    const RAW_WAKER_VTABLE: RawWakerVTable = RawWakerVTable::new(
+        Self::clone_waker,
+        Self::wake_by_val,
+        Self::wake_by_ref,
+        Self::drop_waker,
+    );
+
+    /// Clones a waker.
+    unsafe fn clone_waker(ptr: *const ()) -> RawWaker {
+        let this = &*(ptr as *const Self);
+
+        let ref_count = this.state.fetch_add(REF_INC, Ordering::Relaxed) & REF_MASK;
+        if ref_count > REF_CRITICAL {
+            panic!("Attack of the clones: the waker was cloned too many times");
+        }
+
+        RawWaker::new(ptr, &Self::RAW_WAKER_VTABLE)
+    }
+
+    /// Wakes the task by value.
+    unsafe fn wake_by_val(ptr: *const ()) {
+        // Verify that the scheduling function does not capture any variable.
+        //
+        // It is always possible for the `Runnable` scheduled in the call to
+        // `wake` to be called and complete its execution before the scheduling
+        // call returns. For efficiency reasons, the reference count is
+        // preemptively decremented, which implies that the `Runnable` could
+        // prematurely drop and deallocate this task. By making sure that the
+        // schedule function is zero-sized, we ensure that premature
+        // deallocation is safe since the scheduling function does not access
+        // any allocated data.
+        if mem::size_of::<S>() != 0 {
+            // Note: a static assert is not possible as `S` is defined in the
+            // outer scope.
+            Self::drop_waker(ptr);
+            panic!("Scheduling functions with captured variables are not supported");
+        }
+
+        // Wake the task, decreasing at the same time the reference count.
+        let state = Self::wake(ptr, WAKE_INC - REF_INC);
+
+        // Deallocate the task if this waker is the last reference to the task,
+        // meaning that the reference count was 1 and the `POLLING` flag was
+        // cleared. Note that if the `POLLING` flag was set then a `Runnable`
+        // must exist.
+
+        if state & (REF_MASK | POLLING) == REF_INC {
+            // Ensure that the newest state of the task output (if any) is
+            // visible before it is dropped.
+            //
+            // Ordering: Acquire ordering is necessary to synchronize with the
+            // Release ordering in all previous reference count decrements
+            // and/or in the wake count reset (the latter is equivalent to a
+            // reference count decrement for a `Runnable`).
+            atomic::fence(Ordering::Acquire);
+
+            let this = &*(ptr as *const Self);
+
+            // Set a drop guard to ensure that the task is deallocated whether
+            // or not `output` panics when dropped.
+            let _drop_guard = RunOnDrop::new(|| {
+                dealloc(ptr as *mut u8, Layout::new::<Self>());
+            });
+
+            if state & CLOSED == 0 {
+                // Since the `CLOSED` and `POLLING` flags are both cleared, the
+                // output is present and must be dropped.
+                this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output));
+            }
+            // Else the `CLOSED` flag is set and the `POLLING` flag is cleared
+            // so the task is already in the `Closed` phase.
+        }
+    }
+
+    /// Wakes the task by reference.
+    unsafe fn wake_by_ref(ptr: *const ()) {
+        // Wake the task.
+        Self::wake(ptr, WAKE_INC);
+    }
+
+    /// Wakes the task, either by value or by reference.
+    #[inline(always)]
+    unsafe fn wake(ptr: *const (), state_delta: u64) -> u64 {
+        let this = &*(ptr as *const Self);
+
+        // Increment the wake count and, if woken by value, decrement the
+        // reference count at the same time.
+        //
+        // Ordering: Release ordering is necessary to synchronize with either
+        // the Acquire load or with the RMW in `Runnable::run`, which ensures
+        // that all memory operations performed by the user before the call to
+        // `wake` will be visible when the future is polled. Note that there is
+        // no need to use AcqRel ordering to synchronize with all calls to
+        // `wake` that precede the call to `Runnable::run`. This is because,
+        // according to the C++ memory model, an RMW takes part in a Release
+        // sequence irrespective of its ordering. The below RMW also happens to
+        // takes part in another Release sequence: it allows the Acquire-Release
+        // RMW that zeroes the wake count in the previous call to
+        // `Runnable::run` to synchronizes with the initial Acquire load of the
+        // state in the next call `Runnable::run` (or the Acquire fence in
+        // `Runnable::cancel`), thus ensuring that the next `Runnable` sees the
+        // newest state of the future.
+        let state = this.state.fetch_add(state_delta, Ordering::Release);
+
+        if state & WAKE_MASK > WAKE_CRITICAL {
+            panic!("The task was woken too many times: {:0x}", state);
+        }
+
+        // Schedule the task if it is in the `Polling` phase but is not
+        // scheduled yet.
+        if state & (WAKE_MASK | CLOSED | POLLING) == POLLING {
+            // Safety: calling `new_unchecked` is safe since: there is no other
+            // `Runnable` running (the wake count was 0, the `POLLING` flag was
+            // set, the `CLOSED` flag was cleared); the wake count is now 1; the
+            // `POLLING` flag is set; the `CLOSED` flag is cleared; the task
+            // contains a live future.
+
+            let runnable = Runnable::new_unchecked(ptr as *const Self);
+            (this.schedule_fn)(runnable, this.tag.clone());
+        }
+
+        state
+    }
+
+    /// Drops a waker.
+    unsafe fn drop_waker(ptr: *const ()) {
+        let this = &*(ptr as *const Self);
+
+        // Ordering: Release ordering is necessary to synchronize with the
+        // Acquire fence in the drop handler of the last reference to the task
+        // and to make sure that all previous operations on the `core` member
+        // are visible when it is dropped.
+        let state = this.state.fetch_sub(REF_INC, Ordering::Release);
+
+        // Deallocate the task if this waker was the last reference to the task.
+        if state & REF_MASK == REF_INC && !runnable_exists(state) {
+            // Ensure that the newest state of the `core` member is visible
+            // before it is dropped.
+            //
+            // Ordering: Acquire ordering is necessary to synchronize with the
+            // Release ordering in all previous reference count decrements
+            // and/or in the wake count reset (the latter is equivalent to a
+            // reference count decrement for a `Runnable`).
+            atomic::fence(Ordering::Acquire);
+
+            // Set a drop guard to ensure that the task is deallocated whether
+            // or not the `core` member panics when dropped.
+            let _drop_guard = RunOnDrop::new(|| {
+                dealloc(ptr as *mut u8, Layout::new::<Self>());
+            });
+
+            if state & POLLING == POLLING {
+                this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future));
+            } else if state & CLOSED == 0 {
+                this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output));
+            }
+            // Else the `CLOSED` flag is set but the `POLLING` flag is cleared
+            // so the future was already dropped.
+        }
+    }
+}
+
+/// Spawns a task.
+///
+/// An arbitrary tag can be attached to the task, a clone of which will be
+/// passed to the scheduling function each time it is called.
+
+/// The returned `Runnable` must be scheduled by the user.
+pub(crate) fn spawn<F, S, T>(
+    future: F,
+    schedule_fn: S,
+    tag: T,
+) -> (Promise<F::Output>, Runnable, CancelToken)
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    // Create a task with preemptively incremented reference and wake counts to
+    // account for the returned `Promise`, `CancelToken` and `Runnable` (a
+    // non-zero wake count with the `POLLING` flag set indicates that there is a
+    // live `Runnable`).
+    let task = Task {
+        state: AtomicU64::new((2 * REF_INC) | WAKE_INC | POLLING),
+        core: UnsafeCell::new(TaskCore {
+            future: ManuallyDrop::new(future),
+        }),
+        schedule_fn,
+        tag,
+    };
+
+    // Pin the task with its future to the heap.
+    unsafe {
+        let layout = Layout::new::<Task<F, S, T>>();
+        let ptr = alloc(layout) as *mut Task<F, S, T>;
+        if ptr.is_null() {
+            handle_alloc_error(layout);
+        }
+        *ptr = task;
+
+        // Safety: this is safe since the task was allocated with the global
+        // allocator, there is no other `Runnable` running since the task was
+        // just created, the wake count is 1, the `POLLING` flag is set, the
+        // `CLOSED` flag is cleared and `core` contains a future.
+        let runnable = Runnable::new_unchecked(ptr);
+
+        // Safety: this is safe since the task was allocated with the global
+        // allocator and the reference count is 2.
+        let promise = Promise::new_unchecked(ptr);
+        let cancel_token = CancelToken::new_unchecked(ptr);
+
+        (promise, runnable, cancel_token)
+    }
+}
+
+/// Spawns a task which output will never be retrieved.
+///
+/// This is mostly useful to avoid undue reference counting for futures that
+/// return a `()` type.
+///
+/// An arbitrary tag can be attached to the task, a clone of which will be
+/// passed to the scheduling function each time it is called.
+///
+/// The returned `Runnable` must be scheduled by the user.
+pub(crate) fn spawn_and_forget<F, S, T>(
+    future: F,
+    schedule_fn: S,
+    tag: T,
+) -> (Runnable, CancelToken)
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    // Create a task with preemptively incremented reference and wake counts to
+    // account for the returned `CancelToken` and `Runnable` (a non-zero wake
+    // count with the `POLLING` flag set indicates that there is a live
+    // `Runnable`).
+    let task = Task {
+        state: AtomicU64::new(REF_INC | WAKE_INC | POLLING),
+        core: UnsafeCell::new(TaskCore {
+            future: ManuallyDrop::new(future),
+        }),
+        schedule_fn,
+        tag,
+    };
+
+    // Pin the task with its future to the heap.
+    unsafe {
+        let layout = Layout::new::<Task<F, S, T>>();
+        let ptr = alloc(layout) as *mut Task<F, S, T>;
+        if ptr.is_null() {
+            handle_alloc_error(layout);
+        }
+        *ptr = task;
+
+        // Safety: this is safe since the task was allocated with the global
+        // allocator, there is no other `Runnable` running since the task was
+        // just created, the wake count is 1, the `POLLING` flag is set, the
+        // `CLOSED` flag is cleared and `core` contains a future.
+        let runnable = Runnable::new_unchecked(ptr);
+
+        // Safety: this is safe since the task was allocated with the global
+        // allocator and the reference count is 1.
+        let cancel_token = CancelToken::new_unchecked(ptr);
+
+        (runnable, cancel_token)
+    }
+}
--- a/asynchronix/src/runtime/executor/task/cancel_token.rs
+++ b/asynchronix/src/runtime/executor/task/cancel_token.rs
@ -0,0 +1,220 @@
+extern crate alloc;
+
+use std::alloc::{dealloc, Layout};
+use std::future::Future;
+use std::mem::ManuallyDrop;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+
+use crate::loom_exports::sync::atomic::{self, Ordering};
+
+use super::runnable::Runnable;
+use super::util::{runnable_exists, RunOnDrop};
+use super::Task;
+use super::{CLOSED, POLLING, REF_INC, REF_MASK};
+
+/// Virtual table for a `CancelToken`.
+#[derive(Debug)]
+struct VTable {
+    cancel: unsafe fn(*const ()),
+    drop: unsafe fn(*const ()),
+}
+
+/// Cancels a pending task.
+///
+/// If the task is completed, nothing is done. If the task is not completed
+/// but not currently scheduled (no `Runnable` exist) then the future is
+/// dropped immediately. Otherwise, the future will be dropped at a later
+/// time by the scheduled `Runnable` once it runs.
+unsafe fn cancel<F: Future, S, T>(ptr: *const ())
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    let this = &*(ptr as *const Task<F, S, T>);
+
+    // Enter the `Closed` or `Wind-down` phase if the tasks is not
+    // completed.
+    //
+    // Ordering: Acquire ordering is necessary to synchronize with any
+    // operation that modified or dropped the future or output. This ensures
+    // that the future or output can be safely dropped or that the task can
+    // be safely deallocated if necessary. The Release ordering synchronizes
+    // with any of the Acquire atomic fences and ensure that this atomic
+    // access is fully completed upon deallocation.
+    let state = this
+        .state
+        .fetch_update(Ordering::AcqRel, Ordering::Relaxed, |s| {
+            if s & POLLING == 0 {
+                // The task has completed or is closed so there is no need
+                // to drop the future or output and the reference count can
+                // be decremented right away.
+                Some(s - REF_INC)
+            } else if runnable_exists(s) {
+                // A `Runnable` exists so the future cannot be dropped (this
+                // will be done by the `Runnable`) and the reference count
+                // can be decremented right away.
+                Some((s | CLOSED) - REF_INC)
+            } else {
+                // The future or the output needs to be dropped so the
+                // reference count cannot be decremented just yet, otherwise
+                // another reference could deallocate the task before the
+                // drop is complete.
+                Some((s | CLOSED) & !POLLING)
+            }
+        })
+        .unwrap();
+
+    if runnable_exists(state) {
+        // The task is in the `Wind-down` phase so the cancellation is now
+        // the responsibility of the current `Runnable`.
+        return;
+    }
+
+    if state & POLLING == 0 {
+        // Deallocate the task if this was the last reference.
+        if state & REF_MASK == REF_INC {
+            // Ensure that all atomic accesses to the state are visible.
+            //
+            // Ordering: this Acquire fence synchronizes with all Release
+            // operations that decrement the number of references to the
+            // task.
+            atomic::fence(Ordering::Acquire);
+
+            // Set a drop guard to ensure that the task is deallocated,
+            // whether or not the output panics when dropped.
+            let _drop_guard = RunOnDrop::new(|| {
+                dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+            });
+
+            // Drop the output if any.
+            if state & CLOSED == 0 {
+                this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output));
+            }
+        }
+
+        return;
+    }
+
+    // Set a drop guard to ensure that reference count is decremented and
+    // the task is deallocated if this is the last reference, whether or not
+    // the future panics when dropped.
+    let _drop_guard = RunOnDrop::new(|| {
+        // Ordering: Release ordering is necessary to ensure that the drop
+        // of the future or output is visible when the last reference
+        // deallocates the task.
+        let state = this.state.fetch_sub(REF_INC, Ordering::Release);
+        if state & REF_MASK == REF_INC {
+            // Ensure that all atomic accesses to the state are visible.
+            //
+            // Ordering: this Acquire fence synchronizes with all Release
+            // operations that decrement the number of references to the
+            // task.
+            atomic::fence(Ordering::Acquire);
+
+            dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+        }
+    });
+
+    this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future));
+}
+
+/// Drops the token without cancelling the task.
+unsafe fn drop<F: Future, S, T>(ptr: *const ())
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    let this = &*(ptr as *const Task<F, S, T>);
+
+    // Decrement the reference count.
+    //
+    // Ordering: the Release ordering synchronizes with any of the Acquire
+    // atomic fences and ensure that this atomic access is fully completed
+    // upon deallocation.
+    let state = this.state.fetch_sub(REF_INC, Ordering::Release);
+
+    // Deallocate the task if this token was the last reference to the task.
+    if state & REF_MASK == REF_INC && !runnable_exists(state) {
+        // Ensure that the newest state of the future or output is visible
+        // before it is dropped.
+        //
+        // Ordering: this Acquire fence synchronizes with all Release
+        // operations that decrement the number of references to the task.
+        atomic::fence(Ordering::Acquire);
+
+        // Set a drop guard to ensure that the task is deallocated whether
+        // or not the future or output panics when dropped.
+        let _drop_guard = RunOnDrop::new(|| {
+            dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+        });
+
+        if state & POLLING == POLLING {
+            this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future));
+        } else if state & CLOSED == 0 {
+            this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output));
+        }
+        // Else the `CLOSED` flag is set but the `POLLING` flag is cleared
+        // so the future was already dropped.
+    }
+}
+
+/// A token that can be used to cancel a task.
+#[derive(Debug)]
+pub(crate) struct CancelToken {
+    task: *const (),
+    vtable: &'static VTable,
+}
+
+impl CancelToken {
+    /// Creates a `CancelToken`.
+    ///
+    /// Safety: this is safe provided that:
+    ///
+    /// - the task pointer points to a live task allocated with the global
+    ///   allocator,
+    /// - the reference count has been incremented to account for this new task
+    ///   reference.
+    pub(super) unsafe fn new_unchecked<F: Future, S, T>(task: *const Task<F, S, T>) -> Self
+    where
+        F: Future + Send + 'static,
+        F::Output: Send + 'static,
+        S: Fn(Runnable, T) + Send + Sync + 'static,
+        T: Clone + Send + Sync + 'static,
+    {
+        Self {
+            task: task as *const (),
+            vtable: &VTable {
+                cancel: cancel::<F, S, T>,
+                drop: drop::<F, S, T>,
+            },
+        }
+    }
+
+    /// Cancels the task.
+    ///
+    /// If the task is completed, nothing is done. If the task is not completed
+    /// but not currently scheduled (no `Runnable` exist) then the future is
+    /// dropped immediately. Otherwise, the future will be dropped at a later
+    /// time by the scheduled `Runnable` once it runs.
+    pub(crate) fn cancel(self) {
+        // Prevent the drop handler from being called, as it would call
+        // `drop_token` on the inner field.
+        let this = ManuallyDrop::new(self);
+
+        unsafe { (this.vtable.cancel)(this.task) }
+    }
+}
+
+impl Drop for CancelToken {
+    fn drop(&mut self) {
+        unsafe { (self.vtable.drop)(self.task) }
+    }
+}
+
+unsafe impl Send for CancelToken {}
+impl UnwindSafe for CancelToken {}
+impl RefUnwindSafe for CancelToken {}
--- a/asynchronix/src/runtime/executor/task/promise.rs
+++ b/asynchronix/src/runtime/executor/task/promise.rs
@ -0,0 +1,198 @@
+extern crate alloc;
+
+use std::alloc::{dealloc, Layout};
+use std::future::Future;
+use std::mem::ManuallyDrop;
+use std::panic::{RefUnwindSafe, UnwindSafe};
+
+use crate::loom_exports::sync::atomic::{self, Ordering};
+
+use super::runnable::Runnable;
+use super::util::{runnable_exists, RunOnDrop};
+use super::Task;
+use super::{CLOSED, POLLING, REF_INC, REF_MASK};
+
+/// Virtual table for a `Promise`.
+#[derive(Debug)]
+struct VTable<U: Send + 'static> {
+    poll: unsafe fn(*const ()) -> Stage<U>,
+    drop: unsafe fn(*const ()),
+}
+
+/// Retrieves the output of the task if ready.
+unsafe fn poll<F: Future, S, T>(ptr: *const ()) -> Stage<F::Output>
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    let this = &*(ptr as *const Task<F, S, T>);
+
+    // Set the `CLOSED` flag if the task is in the `Completed` phase.
+    //
+    // Ordering: Acquire ordering is necessary to synchronize with the
+    // operation that modified or dropped the future or output. This ensures
+    // that the newest state of the output is visible before it is moved
+    // out, or that the future can be safely dropped when the promised is
+    // dropped if the promise is the last reference to the task.
+    let state = this
+        .state
+        .fetch_update(Ordering::Acquire, Ordering::Relaxed, |s| {
+            if s & (POLLING | CLOSED) == 0 {
+                Some(s | CLOSED)
+            } else {
+                None
+            }
+        });
+
+    if let Err(s) = state {
+        if s & CLOSED == CLOSED {
+            // The task is either in the `Wind-down` or `Closed` phase.
+            return Stage::Cancelled;
+        } else {
+            // The task is in the `Polling` phase.
+            return Stage::Pending;
+        };
+    }
+
+    let output = this.core.with_mut(|c| ManuallyDrop::take(&mut (*c).output));
+
+    Stage::Ready(output)
+}
+
+/// Drops the promise.
+unsafe fn drop<F: Future, S, T>(ptr: *const ())
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    let this = &*(ptr as *const Task<F, S, T>);
+
+    // Decrement the reference count.
+    //
+    // Ordering: Release ordering is necessary to ensure that if the output
+    // was moved out by using `poll`, then the move has completed when the
+    // last reference deallocates the task.
+    let state = this.state.fetch_sub(REF_INC, Ordering::Release);
+
+    // Deallocate the task if this token was the last reference to the task.
+    if state & REF_MASK == REF_INC && !runnable_exists(state) {
+        // Ensure that the newest state of the future or output is visible
+        // before it is dropped.
+        //
+        // Ordering: Acquire ordering is necessary to synchronize with the
+        // Release ordering in all previous reference count decrements
+        // and/or in the wake count reset (the latter is equivalent to a
+        // reference count decrement for a `Runnable`).
+        atomic::fence(Ordering::Acquire);
+
+        // Set a drop guard to ensure that the task is deallocated whether
+        // or not the `core` member panics when dropped.
+        let _drop_guard = RunOnDrop::new(|| {
+            dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+        });
+
+        if state & POLLING == POLLING {
+            this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future));
+        } else if state & CLOSED == 0 {
+            this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output));
+        }
+        // Else the `CLOSED` flag is set but the `POLLING` flag is cleared
+        // so the future was already dropped.
+    }
+}
+
+/// The stage of progress of a promise.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub(crate) enum Stage<T> {
+    /// The task has completed.
+    Ready(T),
+    /// The task is still being processed.
+    Pending,
+    /// The task has been cancelled.
+    Cancelled,
+}
+
+impl<U> Stage<U> {
+    /// Maps a `Stage<T>` to `Stage<U>` by applying a function to a contained value.
+    pub(crate) fn map<V, F>(self, f: F) -> Stage<V>
+    where
+        F: FnOnce(U) -> V,
+    {
+        match self {
+            Stage::Ready(t) => Stage::Ready(f(t)),
+            Stage::Pending => Stage::Pending,
+            Stage::Cancelled => Stage::Cancelled,
+        }
+    }
+
+    /// Returns `true` if the promise is a [`Stage::Ready`] value.
+    #[inline]
+    pub(crate) fn is_ready(&self) -> bool {
+        matches!(*self, Stage::Ready(_))
+    }
+
+    /// Returns `true` if the promise is a [`Stage::Pending`] value.
+    #[inline]
+    pub(crate) fn is_pending(&self) -> bool {
+        matches!(*self, Stage::Pending)
+    }
+
+    /// Returns `true` if the promise is a [`Stage::Cancelled`] value.
+    #[inline]
+    pub(crate) fn is_cancelled(&self) -> bool {
+        matches!(*self, Stage::Cancelled)
+    }
+}
+
+/// A promise that can poll a task's output of type `T`.
+///
+/// Note that dropping a promise does not cancel the task.
+#[derive(Debug)]
+pub(crate) struct Promise<U: Send + 'static> {
+    task: *const (),
+    vtable: &'static VTable<U>,
+}
+
+impl<U: Send + 'static> Promise<U> {
+    /// Creates a `Promise`.
+    ///
+    /// Safety: this is safe provided that:
+    ///
+    /// - the task pointer points to a live task allocated with the global
+    ///   allocator,
+    /// - the reference count has been incremented to account for this new task
+    ///   reference.
+    pub(super) unsafe fn new_unchecked<F, S, T>(task: *const Task<F, S, T>) -> Self
+    where
+        F: Future<Output = U> + Send + 'static,
+        S: Fn(Runnable, T) + Send + Sync + 'static,
+        T: Clone + Send + Sync + 'static,
+    {
+        Self {
+            task: task as *const (),
+            vtable: &VTable::<U> {
+                poll: poll::<F, S, T>,
+                drop: drop::<F, S, T>,
+            },
+        }
+    }
+
+    /// Retrieves the output of the task if ready.
+    pub(crate) fn poll(&self) -> Stage<U> {
+        unsafe { (self.vtable.poll)(self.task) }
+    }
+}
+
+impl<U: Send + 'static> Drop for Promise<U> {
+    fn drop(&mut self) {
+        unsafe { (self.vtable.drop)(self.task) }
+    }
+}
+
+unsafe impl<U: Send + 'static> Send for Promise<U> {}
+impl<U: Send + 'static> UnwindSafe for Promise<U> {}
+impl<U: Send + 'static> RefUnwindSafe for Promise<U> {}
--- a/asynchronix/src/runtime/executor/task/runnable.rs
+++ b/asynchronix/src/runtime/executor/task/runnable.rs
@ -0,0 +1,320 @@
+extern crate alloc;
+
+use std::alloc::{dealloc, Layout};
+use std::future::Future;
+use std::mem::{self, ManuallyDrop};
+use std::panic::{RefUnwindSafe, UnwindSafe};
+use std::pin::Pin;
+use std::task::{Context, Poll, RawWaker, Waker};
+
+use crate::loom_exports::debug_or_loom_assert;
+use crate::loom_exports::sync::atomic::{self, Ordering};
+
+use super::util::RunOnDrop;
+use super::Task;
+use super::{CLOSED, POLLING, REF_MASK, WAKE_MASK};
+
+/// Virtual table for a `Runnable`.
+#[derive(Debug)]
+struct VTable {
+    run: unsafe fn(*const ()),
+    cancel: unsafe fn(*const ()),
+}
+
+/// Polls the inner future.
+unsafe fn run<F: Future, S, T>(ptr: *const ())
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    let this = &*(ptr as *const Task<F, S, T>);
+
+    // A this point, the task cannot be in the `Completed` phase, otherwise
+    // it would not have been scheduled in the first place. It could,
+    // however, have been cancelled and transitioned from `Polling` to
+    // `Wind-down` after it was already scheduled. It is possible that in
+    // such case the `CLOSED` flag may not be visible when loading the
+    // state, but this is not a problem: when a task is cancelled while
+    // already scheduled (i.e. while the wake count is non-zero), its future
+    // is kept alive so even if the state loaded is stale, the worse that
+    // can happen is that the future will be unnecessarily polled.
+    //
+    // It is worth mentioning that, in order to detect if the task was
+    // awaken while polled, other executors reset a notification flag with
+    // an RMW when entering `run`. The idea here is to avoid such RMW and
+    // instead load a wake count. Only once the task has been polled, an RMW
+    // checks the wake count again to detect if the task was notified in the
+    // meantime. This method may be slightly more prone to spurious false
+    // positives but is much faster (1 vs 2 RMWs) and still prevent the
+    // occurrence of lost wake-ups.
+
+    // Load the state.
+    //
+    // Ordering: the below Acquire load synchronizes with the Release
+    // operation at the end of the call to `run` by the previous `Runnable`
+    // and ensures that the new state of the future stored by the previous
+    // call to `run` is visible. This synchronization exists because the RMW
+    // in the call to `Task::wake` or `Task::wake_by_ref` that scheduled
+    // this `Runnable` establishes a Release sequence. This load also
+    // synchronizes with the Release operation in `wake` and ensures that
+    // all memory operations performed by their callers are visible. Since
+    // this is a simple load, it may be stale and some wake requests may not
+    // be visible yet, but the post-polling RMW will later check if all wake
+    // requests were serviced.
+    let mut state = this.state.load(Ordering::Acquire);
+    let mut wake_count = state & WAKE_MASK;
+
+    debug_or_loom_assert!(state & POLLING == POLLING);
+
+    loop {
+        // Drop the future if the phase has transitioned to `Wind-down`.
+        if state & CLOSED == CLOSED {
+            cancel::<F, S, T>(ptr);
+
+            return;
+        }
+
+        // Poll the task.
+        let raw_waker = RawWaker::new(ptr, &Task::<F, S, T>::RAW_WAKER_VTABLE);
+        let waker = ManuallyDrop::new(Waker::from_raw(raw_waker));
+
+        let cx = &mut Context::from_waker(&waker);
+        let fut = Pin::new_unchecked(this.core.with_mut(|c| &mut *(*c).future));
+
+        // Set a panic guard to cancel the task if the future panics when
+        // polled.
+        let panic_guard = RunOnDrop::new(|| cancel::<F, S, T>(ptr));
+
+        let poll_state = fut.poll(cx);
+        mem::forget(panic_guard);
+
+        if let Poll::Ready(output) = poll_state {
+            // Set a panic guard to close the task if the future or the
+            // output panic when dropped.
+            let panic_guard = RunOnDrop::new(|| {
+                // Clear the `POLLING` flag while setting the `CLOSED` flag
+                // to enter the `Closed` phase.
+                //
+                // Ordering: Release ordering on success is necessary to
+                // ensure that all memory operations on the future or the
+                // output are visible when the last reference deallocates
+                // the task.
+                let state = this
+                    .state
+                    .fetch_update(Ordering::Release, Ordering::Relaxed, |s| {
+                        Some((s | CLOSED) & !POLLING)
+                    })
+                    .unwrap();
+
+                // Deallocate if there are no more references to the task.
+                if state & REF_MASK == 0 {
+                    // Ensure that all atomic accesses to the state are
+                    // visible.
+                    //
+                    // Ordering: this Acquire fence synchronizes with all
+                    // Release operations that decrement the number of
+                    // references to the task.
+                    atomic::fence(Ordering::Acquire);
+
+                    dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+                }
+            });
+
+            // Drop the future and publish its output.
+            this.core.with_mut(|c| {
+                ManuallyDrop::drop(&mut (*c).future);
+                (*c).output = ManuallyDrop::new(output);
+            });
+
+            // Clear the `POLLING` flag to enter the `Completed` phase,
+            // unless the task has concurrently transitioned to the
+            // `Wind-down` phase or unless this `Runnable` is the last
+            // reference to the task.
+            if this
+                .state
+                .fetch_update(Ordering::Release, Ordering::Relaxed, |s| {
+                    if s & CLOSED == CLOSED || s & REF_MASK == 0 {
+                        None
+                    } else {
+                        Some(s & !POLLING)
+                    }
+                })
+                .is_ok()
+            {
+                mem::forget(panic_guard);
+                return;
+            }
+
+            // The task is in the `Wind-down` phase or this `Runnable`
+            // was the last reference, so the output must be dropped.
+            this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output));
+            mem::forget(panic_guard);
+
+            // Clear the `POLLING` flag to enter the `Closed` phase. This is
+            // not actually necessary if the `Runnable` is the last
+            // reference, but that should be a very rare occurrence.
+            //
+            // Ordering: Release ordering is necessary to ensure that the
+            // drop of the output is visible when the last reference
+            // deallocates the task.
+            state = this.state.fetch_and(!POLLING, Ordering::Release);
+
+            // Deallocate the task if there are no task references left.
+            if state & REF_MASK == 0 {
+                // Ensure that all atomic accesses to the state are visible.
+                //
+                // Ordering: this Acquire fence synchronizes with all
+                // Release operations that decrement the number of
+                // references to the task.
+                atomic::fence(Ordering::Acquire);
+                dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+            }
+
+            return;
+        }
+
+        // The future is `Pending`: try to reset the wake count.
+        //
+        // Ordering: a Release ordering is required in case the wake count
+        // is successfully cleared; it synchronizes, via a Release sequence,
+        // with the Acquire load upon entering `Runnable::run` the next time
+        // it is called. Acquire ordering is in turn necessary in case the
+        // wake count has changed and the future must be polled again; it
+        // synchronizes with the Release RMW in `wake` and ensures that all
+        // memory operations performed by their callers are visible when the
+        // polling loop is repeated.
+        state = this.state.fetch_sub(wake_count, Ordering::AcqRel);
+        debug_or_loom_assert!(state > wake_count);
+        wake_count = (state & WAKE_MASK) - wake_count;
+
+        // Return now if the wake count has been successfully cleared,
+        // provided that the task was not concurrently cancelled.
+        if wake_count == 0 && state & CLOSED == 0 {
+            // If there are no task references left, cancel and deallocate
+            // the task since it can never be scheduled again.
+            if state & REF_MASK == 0 {
+                let _drop_guard = RunOnDrop::new(|| {
+                    dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+                });
+
+                // Drop the future;
+                this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future));
+            }
+
+            return;
+        }
+    }
+}
+
+/// Cancels the task, dropping the inner future.
+unsafe fn cancel<F, S, T>(ptr: *const ())
+where
+    F: Future + Send + 'static,
+    F::Output: Send + 'static,
+    S: Fn(Runnable, T) + Send + Sync + 'static,
+    T: Clone + Send + Sync + 'static,
+{
+    let this = &*(ptr as *const Task<F, S, T>);
+
+    // Ensure that the modifications of the future by the previous
+    // `Runnable` are visible.
+    //
+    // Ordering: this Acquire fence synchronizes with the Release operation
+    // at the end of the call to `run` by the previous `Runnable` and
+    // ensures that the new state of the future stored by the previous call
+    // to `run` is visible. This synchronization exists because the wake
+    // count RMW in the call to `Task::wake` that created this `Runnable`
+    // establishes a Release sequence.
+    atomic::fence(Ordering::Acquire);
+
+    // Set a drop guard to enter the `Closed` phase whether or not the
+    // future panics when dropped.
+    let _drop_guard = RunOnDrop::new(|| {
+        // Clear the `POLLING` flag while setting the `CLOSED` flag to enter
+        // the `Closed` phase.
+        //
+        // Ordering: Release ordering on success is necessary to ensure that
+        // all memory operations on the future are visible when the last
+        // reference deallocates the task.
+        let state = this
+            .state
+            .fetch_update(Ordering::Release, Ordering::Relaxed, |s| {
+                Some((s | CLOSED) & !POLLING)
+            })
+            .unwrap();
+
+        // Deallocate if there are no more references to the task.
+        if state & REF_MASK == 0 {
+            // Ensure that all atomic accesses to the state are visible.
+            //
+            // Ordering: this Acquire fence synchronizes with all Release
+            // operations that decrement the number of references to the
+            // task.
+            atomic::fence(Ordering::Acquire);
+            dealloc(ptr as *mut u8, Layout::new::<Task<F, S, T>>());
+        }
+    });
+
+    // Drop the future;
+    this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future));
+}
+
+/// Handle to a scheduled task.
+///
+/// Dropping the runnable directly instead of calling `run` cancels the task.
+#[derive(Debug)]
+pub(crate) struct Runnable {
+    task: *const (),
+    vtable: &'static VTable,
+}
+
+impl Runnable {
+    /// Creates a `Runnable`.
+    ///
+    /// Safety: this is safe provided that:
+    ///
+    /// - the task pointer points to a live task allocated with the global
+    ///   allocator,
+    /// - there is not other live `Runnable` for this task,
+    /// - the wake count is non-zero,
+    /// - the `POLLING` flag is set and the `CLOSED` flag is cleared,
+    /// - the task contains a live future.
+    pub(super) unsafe fn new_unchecked<F, S, T>(task: *const Task<F, S, T>) -> Self
+    where
+        F: Future + Send + 'static,
+        F::Output: Send + 'static,
+        S: Fn(Runnable, T) + Send + Sync + 'static,
+        T: Clone + Send + Sync + 'static,
+    {
+        Self {
+            task: task as *const (),
+            vtable: &VTable {
+                run: run::<F, S, T>,
+                cancel: cancel::<F, S, T>,
+            },
+        }
+    }
+
+    /// Polls the wrapped future.
+    pub(crate) fn run(self) {
+        // Prevent the drop handler from being called, as it would call `cancel`
+        // on the inner field.
+        let this = ManuallyDrop::new(self);
+
+        // Poll the future.
+        unsafe { (this.vtable.run)(this.task) }
+    }
+}
+
+impl Drop for Runnable {
+    fn drop(&mut self) {
+        // Cancel the task.
+        unsafe { (self.vtable.cancel)(self.task) }
+    }
+}
+
+unsafe impl Send for Runnable {}
+impl UnwindSafe for Runnable {}
+impl RefUnwindSafe for Runnable {}
--- a/asynchronix/src/runtime/executor/task/tests.rs
+++ b/asynchronix/src/runtime/executor/task/tests.rs
@ -0,0 +1,7 @@
+use super::*;
+
+#[cfg(not(asynchronix_loom))]
+mod general;
+
+#[cfg(asynchronix_loom)]
+mod loom;
--- a/asynchronix/src/runtime/executor/task/tests/general.rs
+++ b/asynchronix/src/runtime/executor/task/tests/general.rs
@ -0,0 +1,625 @@
+use std::future::Future;
+use std::ops::Deref;
+use std::pin::Pin;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
+use std::task::{Context, Poll};
+use std::thread;
+
+use futures_channel::{mpsc, oneshot};
+use futures_util::StreamExt;
+
+use super::*;
+
+// Test prelude to simulates a single-slot scheduler queue.
+macro_rules! test_prelude {
+    () => {
+        static QUEUE: Mutex<Vec<Runnable>> = Mutex::new(Vec::new());
+
+        // Schedules one runnable task.
+        //
+        // Will panic if the slot was already occupied since there should exist
+        // at most 1 runnable per task at any time.
+        #[allow(dead_code)]
+        fn schedule_runnable(runnable: Runnable, _tag: ()) {
+            let mut queue = QUEUE.lock().unwrap();
+            queue.push(runnable);
+        }
+
+        // Runs one runnable task and returns true if a task was scheduled,
+        // otherwise returns false.
+        #[allow(dead_code)]
+        fn run_scheduled_runnable() -> bool {
+            if let Some(runnable) = QUEUE.lock().unwrap().pop() {
+                runnable.run();
+                return true;
+            }
+
+            false
+        }
+
+        // Drops a runnable task and returns true if a task was scheduled, otherwise
+        // returns false.
+        #[allow(dead_code)]
+        fn drop_runnable() -> bool {
+            if let Some(_runnable) = QUEUE.lock().unwrap().pop() {
+                return true;
+            }
+
+            false
+        }
+    };
+}
+
+// A friendly wrapper over a shared atomic boolean that uses only Relaxed
+// ordering.
+#[derive(Clone)]
+struct Flag(Arc<AtomicBool>);
+impl Flag {
+    fn new(value: bool) -> Self {
+        Self(Arc::new(AtomicBool::new(value)))
+    }
+    fn set(&self, value: bool) {
+        self.0.store(value, Ordering::Relaxed);
+    }
+    fn get(&self) -> bool {
+        self.0.load(Ordering::Relaxed)
+    }
+}
+
+// A simple wrapper for the output of a future with a liveness flag.
+struct MonitoredOutput<T> {
+    is_alive: Flag,
+    inner: T,
+}
+impl<T> Deref for MonitoredOutput<T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        &self.inner
+    }
+}
+impl<T> Drop for MonitoredOutput<T> {
+    fn drop(&mut self) {
+        self.is_alive.set(false);
+    }
+}
+
+// A simple future wrapper with a liveness flag returning a `MonitoredOutput` on
+// completion.
+struct MonitoredFuture<F: Future> {
+    future_is_alive: Flag,
+    output_is_alive: Flag,
+    inner: F,
+}
+impl<F: Future> MonitoredFuture<F> {
+    // Returns the `MonitoredFuture`, a liveness flag for the future and a
+    // liveness flag for the output.
+    fn new(future: F) -> (Self, Flag, Flag) {
+        let future_is_alive = Flag::new(true);
+        let output_is_alive = Flag::new(false);
+        let future_is_alive_remote = future_is_alive.clone();
+        let output_is_alive_remote = output_is_alive.clone();
+
+        (
+            Self {
+                future_is_alive,
+                output_is_alive,
+                inner: future,
+            },
+            future_is_alive_remote,
+            output_is_alive_remote,
+        )
+    }
+}
+impl<F: Future> Future for MonitoredFuture<F> {
+    type Output = MonitoredOutput<F::Output>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        let inner = unsafe { self.as_mut().map_unchecked_mut(|s| &mut s.inner) };
+        match inner.poll(cx) {
+            Poll::Pending => Poll::Pending,
+            Poll::Ready(value) => {
+                self.output_is_alive.set(true);
+                let test_output = MonitoredOutput {
+                    is_alive: self.output_is_alive.clone(),
+                    inner: value,
+                };
+                Poll::Ready(test_output)
+            }
+        }
+    }
+}
+impl<F: Future> Drop for MonitoredFuture<F> {
+    fn drop(&mut self) {
+        self.future_is_alive.set(false);
+    }
+}
+
+#[test]
+fn task_schedule() {
+    test_prelude!();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { 42 });
+    let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ());
+    assert_eq!(future_is_alive.get(), true);
+    assert_eq!(output_is_alive.get(), false);
+
+    // The task should complete immediately when ran.
+    runnable.run();
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+    assert_eq!(promise.poll().map(|v| *v), Stage::Ready(42));
+}
+
+#[test]
+fn task_schedule_mt() {
+    test_prelude!();
+
+    let (promise, runnable, _cancel_token) = spawn(async move { 42 }, schedule_runnable, ());
+
+    let th = thread::spawn(move || runnable.run());
+    loop {
+        match promise.poll() {
+            Stage::Pending => {}
+            Stage::Cancelled => unreachable!(),
+            Stage::Ready(v) => {
+                assert_eq!(v, 42);
+                break;
+            }
+        }
+    }
+    th.join().unwrap();
+}
+
+#[test]
+fn task_schedule_and_forget() {
+    test_prelude!();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async {});
+    let (runnable, _cancel_token) = spawn_and_forget(future, schedule_runnable, ());
+    assert_eq!(future_is_alive.get(), true);
+    assert_eq!(output_is_alive.get(), false);
+
+    // The task should complete immediately when ran.
+    runnable.run();
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+}
+
+#[test]
+fn task_wake() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move {
+        let result = receiver.await.unwrap();
+        result
+    });
+
+    let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ());
+    runnable.run();
+
+    // The future should have been polled but should not have completed.
+    assert_eq!(output_is_alive.get(), false);
+    assert!(promise.poll().is_pending());
+
+    // Wake the task.
+    sender.send(42).unwrap();
+
+    // The task should have been scheduled by the channel sender.
+    assert_eq!(run_scheduled_runnable(), true);
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+    assert_eq!(promise.poll().map(|v| *v), Stage::Ready(42));
+}
+
+#[test]
+fn task_wake_mt() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (promise, runnable, _cancel_token) = spawn(
+        async move {
+            let result = receiver.await.unwrap();
+            result
+        },
+        schedule_runnable,
+        (),
+    );
+    runnable.run();
+
+    let th_sender = thread::spawn(move || sender.send(42).unwrap());
+    let th_exec = thread::spawn(|| while !run_scheduled_runnable() {});
+
+    loop {
+        match promise.poll() {
+            Stage::Pending => {}
+            Stage::Cancelled => unreachable!(),
+            Stage::Ready(v) => {
+                assert_eq!(v, 42);
+                break;
+            }
+        }
+    }
+    th_sender.join().unwrap();
+    th_exec.join().unwrap();
+}
+
+#[test]
+fn task_wake_and_forget() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move {
+        let _ = receiver.await;
+    });
+
+    let (runnable, _cancel_token) = spawn_and_forget(future, schedule_runnable, ());
+    runnable.run();
+
+    // The future should have been polled but should not have completed.
+    assert_eq!(output_is_alive.get(), false);
+
+    // Wake the task.
+    sender.send(42).unwrap();
+
+    // The task should have been scheduled by the channel sender.
+    assert_eq!(run_scheduled_runnable(), true);
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+}
+
+#[test]
+fn task_multiple_wake() {
+    test_prelude!();
+
+    let (mut sender, mut receiver) = mpsc::channel(3);
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move {
+        let mut sum = 0;
+        for _ in 0..5 {
+            sum += receiver.next().await.unwrap();
+        }
+        sum
+    });
+
+    let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ());
+    runnable.run();
+
+    // The future should have been polled but should not have completed.
+    assert!(promise.poll().is_pending());
+
+    // Wake the task 3 times.
+    sender.try_send(1).unwrap();
+    sender.try_send(2).unwrap();
+    sender.try_send(3).unwrap();
+
+    // The task should have been scheduled by the channel sender.
+    assert_eq!(run_scheduled_runnable(), true);
+    assert!(promise.poll().is_pending());
+
+    // The channel should be empty. Wake the task 2 more times.
+    sender.try_send(4).unwrap();
+    sender.try_send(5).unwrap();
+
+    // The task should have been scheduled by the channel sender.
+    assert_eq!(run_scheduled_runnable(), true);
+
+    // The task should have completed.
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+    assert_eq!(promise.poll().map(|v| *v), Stage::Ready(15));
+}
+
+#[test]
+fn task_multiple_wake_mt() {
+    test_prelude!();
+
+    let (mut sender1, mut receiver) = mpsc::channel(3);
+    let mut sender2 = sender1.clone();
+    let mut sender3 = sender1.clone();
+
+    let (promise, runnable, _cancel_token) = spawn(
+        async move {
+            let mut sum = 0;
+            for _ in 0..3 {
+                sum += receiver.next().await.unwrap();
+            }
+            sum
+        },
+        schedule_runnable,
+        (),
+    );
+    runnable.run();
+
+    // Wake the task 3 times.
+    let th_sender1 = thread::spawn(move || {
+        sender1.try_send(1).unwrap();
+        while run_scheduled_runnable() {}
+    });
+    let th_sender2 = thread::spawn(move || {
+        sender2.try_send(2).unwrap();
+        while run_scheduled_runnable() {}
+    });
+    let th_sender3 = thread::spawn(move || {
+        sender3.try_send(3).unwrap();
+        while run_scheduled_runnable() {}
+    });
+
+    loop {
+        match promise.poll() {
+            Stage::Pending => {}
+            Stage::Cancelled => unreachable!(),
+            Stage::Ready(v) => {
+                assert_eq!(v, 6);
+                break;
+            }
+        }
+    }
+    th_sender1.join().unwrap();
+    th_sender2.join().unwrap();
+    th_sender3.join().unwrap();
+}
+
+#[test]
+fn task_cancel_scheduled() {
+    test_prelude!();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async {});
+
+    let (promise, runnable, cancel_token) = spawn(future, schedule_runnable, ());
+
+    // Cancel the task while a `Runnable` exists (i.e. while the task is
+    // considered scheduled).
+    cancel_token.cancel();
+
+    // The future should not be dropped while the `Runnable` exists, even if the
+    // task is cancelled, but the task should be seen as cancelled.
+    assert_eq!(future_is_alive.get(), true);
+    assert!(promise.poll().is_cancelled());
+
+    // An attempt to run the task should now drop the future without polling it.
+    runnable.run();
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), false);
+}
+
+#[test]
+fn task_cancel_unscheduled() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move {
+        let _ = receiver.await;
+    });
+
+    let (promise, runnable, cancel_token) = spawn(future, schedule_runnable, ());
+    runnable.run();
+    assert_eq!(future_is_alive.get(), true);
+    assert_eq!(output_is_alive.get(), false);
+
+    // Cancel the task while no `Runnable` exists (the task is not scheduled as
+    // it needs to be woken by the channel sender first).
+    cancel_token.cancel();
+    assert!(promise.poll().is_cancelled());
+    assert!(sender.send(()).is_err());
+
+    // The future should be dropped immediately upon cancellation without
+    // completing.
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), false);
+}
+
+#[test]
+fn task_cancel_completed() {
+    test_prelude!();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { 42 });
+
+    let (promise, runnable, cancel_token) = spawn(future, schedule_runnable, ());
+    runnable.run();
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+
+    // Cancel the already completed task.
+    cancel_token.cancel();
+    assert_eq!(output_is_alive.get(), true);
+    assert_eq!(promise.poll().map(|v| *v), Stage::Ready(42));
+}
+
+#[test]
+fn task_cancel_mt() {
+    test_prelude!();
+
+    let (runnable, cancel_token) = spawn_and_forget(async {}, schedule_runnable, ());
+
+    let th_cancel = thread::spawn(move || cancel_token.cancel());
+    runnable.run();
+
+    th_cancel.join().unwrap();
+}
+
+#[test]
+fn task_drop_promise_scheduled() {
+    test_prelude!();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async {});
+
+    let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ());
+    // Drop the promise while a `Runnable` exists (i.e. while the task is
+    // considered scheduled).
+    drop(promise);
+
+    // The task should complete immediately when ran.
+    runnable.run();
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+}
+
+#[test]
+fn task_drop_promise_unscheduled() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move {
+        let _ = receiver.await;
+    });
+
+    let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ());
+    runnable.run();
+
+    // Drop the promise while no `Runnable` exists (the task is not scheduled as
+    // it needs to be woken by the channel sender first).
+    drop(promise);
+
+    // Wake the task.
+    assert!(sender.send(()).is_ok());
+
+    // The task should have been scheduled by the channel sender.
+    assert_eq!(run_scheduled_runnable(), true);
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), true);
+}
+
+#[test]
+fn task_drop_promise_mt() {
+    test_prelude!();
+
+    let (promise, runnable, _cancel_token) = spawn(async {}, schedule_runnable, ());
+
+    let th_drop = thread::spawn(move || drop(promise));
+    runnable.run();
+
+    th_drop.join().unwrap()
+}
+
+#[test]
+fn task_drop_runnable() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move {
+        let _ = receiver.await;
+    });
+
+    let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ());
+    runnable.run();
+
+    // Wake the task.
+    assert!(sender.send(()).is_ok());
+
+    // Drop the task scheduled by the channel sender.
+    assert_eq!(drop_runnable(), true);
+    assert_eq!(future_is_alive.get(), false);
+    assert_eq!(output_is_alive.get(), false);
+    assert!(promise.poll().is_cancelled());
+}
+
+#[test]
+fn task_drop_runnable_mt() {
+    test_prelude!();
+
+    let (sender, receiver) = oneshot::channel();
+
+    let (runnable, _cancel_token) = spawn_and_forget(
+        async move {
+            let _ = receiver.await;
+        },
+        schedule_runnable,
+        (),
+    );
+    runnable.run();
+
+    let th_sender = thread::spawn(move || sender.send(()).is_ok());
+    drop_runnable();
+
+    th_sender.join().unwrap();
+}
+
+#[test]
+fn task_drop_cycle() {
+    test_prelude!();
+
+    let (sender1, mut receiver1) = mpsc::channel(2);
+    let (sender2, mut receiver2) = mpsc::channel(2);
+    let (sender3, mut receiver3) = mpsc::channel(2);
+
+    static DROP_COUNT: AtomicUsize = AtomicUsize::new(0);
+
+    // Spawn 3 tasks that wake one another when dropped.
+    let (runnable1, cancel_token1) = spawn_and_forget(
+        {
+            let mut sender2 = sender2.clone();
+            let mut sender3 = sender3.clone();
+
+            async move {
+                let _guard = RunOnDrop::new(move || {
+                    let _ = sender2.try_send(());
+                    let _ = sender3.try_send(());
+                    DROP_COUNT.fetch_add(1, Ordering::Relaxed);
+                });
+                let _ = receiver1.next().await;
+            }
+        },
+        schedule_runnable,
+        (),
+    );
+    runnable1.run();
+
+    let (runnable2, cancel_token2) = spawn_and_forget(
+        {
+            let mut sender1 = sender1.clone();
+            let mut sender3 = sender3.clone();
+
+            async move {
+                let _guard = RunOnDrop::new(move || {
+                    let _ = sender1.try_send(());
+                    let _ = sender3.try_send(());
+                    DROP_COUNT.fetch_add(1, Ordering::Relaxed);
+                });
+                let _ = receiver2.next().await;
+            }
+        },
+        schedule_runnable,
+        (),
+    );
+    runnable2.run();
+
+    let (runnable3, cancel_token3) = spawn_and_forget(
+        {
+            let mut sender1 = sender1.clone();
+            let mut sender2 = sender2.clone();
+
+            async move {
+                let _guard = RunOnDrop::new(move || {
+                    let _ = sender1.try_send(());
+                    let _ = sender2.try_send(());
+                    DROP_COUNT.fetch_add(1, Ordering::Relaxed);
+                });
+                let _ = receiver3.next().await;
+            }
+        },
+        schedule_runnable,
+        (),
+    );
+    runnable3.run();
+
+    let th1 = thread::spawn(move || cancel_token1.cancel());
+    let th2 = thread::spawn(move || cancel_token2.cancel());
+    let th3 = thread::spawn(move || cancel_token3.cancel());
+
+    th1.join().unwrap();
+    th2.join().unwrap();
+    th3.join().unwrap();
+
+    while run_scheduled_runnable() {}
+
+    assert_eq!(DROP_COUNT.load(Ordering::Relaxed), 3);
+}
--- a/asynchronix/src/runtime/executor/task/tests/loom.rs
+++ b/asynchronix/src/runtime/executor/task/tests/loom.rs
@ -0,0 +1,536 @@
+use std::future::Future;
+use std::pin::Pin;
+use std::task::Context;
+use std::task::Poll;
+use std::task::Waker;
+
+use ::loom::cell::UnsafeCell;
+use ::loom::model::Builder;
+use ::loom::sync::atomic::AtomicBool;
+use ::loom::sync::atomic::AtomicUsize;
+use ::loom::sync::atomic::Ordering::*;
+use ::loom::sync::Arc;
+use ::loom::{lazy_static, thread};
+
+use super::*;
+
+// Test prelude to simulates a single-slot scheduler queue.
+macro_rules! test_prelude {
+    () => {
+        // A single-slot scheduling queue.
+        lazy_static! {
+            static ref RUNNABLE_SLOT: RunnableSlot = RunnableSlot::new();
+        }
+
+        // Schedules one runnable task.
+        //
+        // Will panic if the slot was already occupied since there should exist
+        // at most 1 runnable per task at any time.
+        #[allow(dead_code)]
+        fn schedule_task(runnable: Runnable, _tag: ()) {
+            RUNNABLE_SLOT.set(runnable);
+        }
+
+        // Runs one runnable task and returns true if a task was indeed
+        // scheduled, otherwise returns false.
+        #[allow(dead_code)]
+        fn try_poll_task() -> bool {
+            if let Some(runnable) = RUNNABLE_SLOT.take() {
+                runnable.run();
+                return true;
+            }
+
+            false
+        }
+
+        // Cancel a scheduled task by dropping its runnable and returns true is
+        // a task was indeed scheduled, otherwise returns false.
+        #[allow(dead_code)]
+        fn try_cancel_task() -> bool {
+            if let Some(_runnable) = RUNNABLE_SLOT.take() {
+                // Just drop the runnable to cancel the task.
+                return true;
+            }
+
+            false
+        }
+    };
+}
+
+struct RunnableSlot {
+    state: AtomicUsize,
+    runnable: UnsafeCell<Option<Runnable>>,
+}
+impl RunnableSlot {
+    const LOCKED: usize = 0b01;
+    const POPULATED: usize = 0b10;
+
+    fn new() -> Self {
+        Self {
+            state: AtomicUsize::new(0),
+            runnable: UnsafeCell::new(None),
+        }
+    }
+
+    fn take(&self) -> Option<Runnable> {
+        self.state
+            .fetch_update(Acquire, Relaxed, |s| {
+                // Only lock if there is a runnable and it is not already locked.
+                if s == Self::POPULATED {
+                    Some(Self::LOCKED)
+                } else {
+                    None
+                }
+            })
+            .ok()
+            .and_then(|_| {
+                // Take the `Runnable`.
+                let runnable = unsafe { self.runnable.with_mut(|r| (*r).take()) };
+                assert!(runnable.is_some());
+
+                // Release the lock and signal that the slot is empty.
+                self.state.store(0, Release);
+
+                runnable
+            })
+    }
+
+    fn set(&self, runnable: Runnable) {
+        // Take the lock.
+        let state = self.state.swap(Self::LOCKED, Acquire);
+
+        // Expect the initial state to be 0. Otherwise, there is already a
+        // stored `Runnable` or one is being stored or taken, which should not
+        // happen since a task can have at most 1 `Runnable` at a time.
+        if state != 0 {
+            panic!("Error: there are several live `Runnable`s for the same task");
+        }
+
+        // Store the `Runnable`.
+        unsafe { self.runnable.with_mut(|r| *r = Some(runnable)) };
+
+        // Release the lock and signal that the slot is populated.
+        self.state.store(Self::POPULATED, Release);
+    }
+}
+
+// An asynchronous count-down counter.
+//
+// The implementation is intentionally naive and wakes the `CountWatcher` each
+// time the count is decremented, even though the future actually only completes
+// when the count reaches 0.
+//
+// Note that for simplicity, the waker may not be changed once set; this is not
+// an issue since the tested task implementation never changes the waker.
+fn count_down(init_count: usize) -> (CountController, CountWatcher) {
+    let inner = Arc::new(CounterInner::new(init_count));
+
+    (
+        CountController {
+            inner: inner.clone(),
+        },
+        CountWatcher { inner },
+    )
+}
+
+// The counter inner type.
+struct CounterInner {
+    waker: UnsafeCell<Option<Waker>>,
+    state: AtomicUsize,
+}
+impl CounterInner {
+    const HAS_WAKER: usize = 1 << 0;
+    const INCREMENT: usize = 1 << 1;
+
+    fn new(init_count: usize) -> Self {
+        Self {
+            waker: UnsafeCell::new(None),
+            state: AtomicUsize::new(init_count * Self::INCREMENT),
+        }
+    }
+}
+
+// A `Clone` and `Sync` entity that can decrement the counter.
+#[derive(Clone)]
+struct CountController {
+    inner: Arc<CounterInner>,
+}
+impl CountController {
+    // Decrement the count and notify the counter if a waker is registered.
+    //
+    // This will panic if the counter is decremented too many times.
+    fn decrement(&self) {
+        let state = self.inner.state.fetch_sub(CounterInner::INCREMENT, Acquire);
+
+        if state / CounterInner::INCREMENT == 0 {
+            panic!("The count-down counter has wrapped around");
+        }
+
+        if state & CounterInner::HAS_WAKER != 0 {
+            unsafe {
+                self.inner
+                    .waker
+                    .with(|w| (&*w).as_ref().map(Waker::wake_by_ref))
+            };
+        }
+    }
+}
+unsafe impl Send for CountController {}
+unsafe impl Sync for CountController {}
+
+// An entity notified by the controller each time the count is decremented.
+struct CountWatcher {
+    inner: Arc<CounterInner>,
+}
+impl Future for CountWatcher {
+    type Output = ();
+
+    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        let state = self.inner.state.load(Relaxed);
+
+        if state / CounterInner::INCREMENT == 0 {
+            return Poll::Ready(());
+        }
+        if state & CounterInner::HAS_WAKER == CounterInner::HAS_WAKER {
+            // Changes of the waker are not supported, so check that the waker
+            // indeed hasn't changed.
+            assert!(
+                unsafe {
+                    self.inner
+                        .waker
+                        .with(|w| cx.waker().will_wake((*w).as_ref().unwrap()))
+                },
+                "This testing primitive does not support changes of waker"
+            );
+
+            return Poll::Pending;
+        }
+
+        unsafe { self.inner.waker.with_mut(|w| *w = Some(cx.waker().clone())) };
+
+        let state = self.inner.state.fetch_or(CounterInner::HAS_WAKER, Release);
+        if state / CounterInner::INCREMENT == 0 {
+            Poll::Ready(())
+        } else {
+            Poll::Pending
+        }
+    }
+}
+unsafe impl Send for CountWatcher {}
+
+#[test]
+fn loom_task_schedule() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+        lazy_static! {
+            static ref READY: AtomicBool = AtomicBool::new(false);
+        }
+
+        let (promise, runnable, _cancel_token) = spawn(async move { 42 }, schedule_task, ());
+
+        let t = thread::spawn(move || {
+            // The task should complete immediately when ran.
+            runnable.run();
+            READY.store(true, Release);
+        });
+
+        if READY.load(Acquire) {
+            assert_eq!(promise.poll(), Stage::Ready(42));
+        }
+
+        t.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_custom1() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+        lazy_static! {
+            static ref READY: AtomicBool = AtomicBool::new(false);
+        }
+
+        let (promise, runnable, cancel_token) = spawn(async move { 42 }, schedule_task, ());
+
+        let t = thread::spawn(move || {
+            // The task should complete immediately when ran.
+            runnable.run();
+        });
+
+        cancel_token.cancel();
+
+        t.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_cancel() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+        lazy_static! {
+            static ref IS_CANCELLED: AtomicBool = AtomicBool::new(false);
+        }
+
+        let (count_controller, count_watcher) = count_down(1);
+
+        let (promise, runnable, cancel_token) =
+            spawn(async move { count_watcher.await }, schedule_task, ());
+        runnable.run();
+
+        let waker_thread = thread::spawn(move || {
+            count_controller.decrement();
+        });
+        let scheduler_thread = thread::spawn(|| {
+            try_poll_task();
+        });
+        let cancel_thread = thread::spawn(move || {
+            cancel_token.cancel();
+            IS_CANCELLED.store(true, Release);
+        });
+
+        if IS_CANCELLED.load(Acquire) {
+            assert!(promise.poll() != Stage::Pending);
+        }
+
+        waker_thread.join().unwrap();
+        scheduler_thread.join().unwrap();
+        cancel_thread.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_run_and_drop() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+
+        let (count_controller, count_watcher) = count_down(1);
+
+        let (runnable, cancel_token) =
+            spawn_and_forget(async move { count_watcher.await }, schedule_task, ());
+        runnable.run();
+
+        let waker_thread = thread::spawn(move || {
+            count_controller.decrement();
+        });
+        let runnable_thread = thread::spawn(|| {
+            try_poll_task();
+        });
+        drop(cancel_token);
+
+        waker_thread.join().unwrap();
+        runnable_thread.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_run_and_cancel() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+
+        let (count_controller, count_watcher) = count_down(1);
+
+        let (runnable, cancel_token) =
+            spawn_and_forget(async move { count_watcher.await }, schedule_task, ());
+        runnable.run();
+
+        let waker_thread = thread::spawn(move || {
+            count_controller.decrement();
+        });
+        let runnable_thread = thread::spawn(|| {
+            try_poll_task();
+        });
+        cancel_token.cancel();
+
+        waker_thread.join().unwrap();
+        runnable_thread.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_drop_all() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+
+        let (promise, runnable, cancel_token) = spawn(async move {}, schedule_task, ());
+
+        let promise_thread = thread::spawn(move || {
+            drop(promise);
+        });
+        let runnable_thread = thread::spawn(move || {
+            drop(runnable);
+        });
+        drop(cancel_token);
+
+        promise_thread.join().unwrap();
+        runnable_thread.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_drop_with_waker() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 4;
+
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND);
+    }
+
+    builder.check(move || {
+        test_prelude!();
+
+        let (count_controller, count_watcher) = count_down(1);
+
+        let (promise, runnable, cancel_token) =
+            spawn(async move { count_watcher.await }, schedule_task, ());
+        runnable.run();
+
+        let waker_thread = thread::spawn(move || {
+            count_controller.decrement();
+        });
+
+        let promise_thread = thread::spawn(move || {
+            drop(promise);
+        });
+        let runnable_thread = thread::spawn(|| {
+            try_cancel_task(); // drop the runnable if available
+        });
+        drop(cancel_token);
+
+        waker_thread.join().unwrap();
+        promise_thread.join().unwrap();
+        runnable_thread.join().unwrap();
+    });
+}
+
+#[test]
+fn loom_task_wake_single_thread() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 3;
+    const TICK_COUNT1: usize = 4;
+    const TICK_COUNT2: usize = 0;
+
+    loom_task_wake(DEFAULT_PREEMPTION_BOUND, TICK_COUNT1, TICK_COUNT2);
+}
+
+#[test]
+fn loom_task_wake_multi_thread() {
+    const DEFAULT_PREEMPTION_BOUND: usize = 3;
+    const TICK_COUNT1: usize = 1;
+    const TICK_COUNT2: usize = 2;
+
+    loom_task_wake(DEFAULT_PREEMPTION_BOUND, TICK_COUNT1, TICK_COUNT2);
+}
+
+// Test task wakening from one or two threads.
+fn loom_task_wake(preemption_bound: usize, tick_count1: usize, tick_count2: usize) {
+    let mut builder = Builder::new();
+    if builder.preemption_bound.is_none() {
+        builder.preemption_bound = Some(preemption_bound);
+    }
+
+    let total_tick_count = tick_count1 + tick_count2;
+    builder.check(move || {
+        test_prelude!();
+        lazy_static! {
+            static ref POLL_COUNT: AtomicUsize = AtomicUsize::new(0);
+        }
+
+        let (count_controller1, count_watcher) = count_down(total_tick_count);
+        let count_controller2 = count_controller1.clone();
+
+        let (promise, runnable, _cancel_token) =
+            spawn(async move { count_watcher.await }, schedule_task, ());
+        runnable.run();
+
+        let waker_thread1 = if tick_count1 != 0 {
+            Some(thread::spawn(move || {
+                for _ in 0..tick_count1 {
+                    count_controller1.decrement();
+                }
+            }))
+        } else {
+            None
+        };
+        let waker_thread2 = if tick_count2 != 0 {
+            Some(thread::spawn(move || {
+                for _ in 0..tick_count2 {
+                    count_controller2.decrement();
+                }
+            }))
+        } else {
+            None
+        };
+        let scheduler_thread = thread::spawn(move || {
+            // Try to run scheduled runnables.
+            for _ in 0..total_tick_count {
+                if try_poll_task() {
+                    POLL_COUNT.fetch_add(1, Release);
+                }
+            }
+        });
+
+        let poll_count = POLL_COUNT.load(Acquire);
+        let has_completed = poll_count == total_tick_count;
+
+        // Check that the promise is available if the task has been polled
+        // `total_tick_count` times.
+        if has_completed {
+            assert_eq!(promise.poll(), Stage::Ready(()));
+        }
+
+        scheduler_thread.join().unwrap();
+        waker_thread1.map(|t| t.join().unwrap());
+        waker_thread2.map(|t| t.join().unwrap());
+
+        // If the promise has not been retrieved yet, retrieve it now. It may be
+        // necessary to poll the task one last time.
+        if !has_completed {
+            if POLL_COUNT.load(Acquire) != total_tick_count {
+                try_poll_task();
+            }
+
+            assert_eq!(promise.poll(), Stage::Ready(()));
+        }
+    });
+}
--- a/asynchronix/src/runtime/executor/task/util.rs
+++ b/asynchronix/src/runtime/executor/task/util.rs
@ -0,0 +1,23 @@
+use super::{CLOSED, POLLING, WAKE_MASK};
+
+/// An object that runs an arbitrary closure when dropped.
+pub(crate) struct RunOnDrop<F: FnMut()> {
+    drop_fn: F,
+}
+impl<F: FnMut()> RunOnDrop<F> {
+    /// Creates a new `RunOnDrop`.
+    pub(crate) fn new(drop_fn: F) -> Self {
+        Self { drop_fn }
+    }
+}
+impl<F: FnMut()> Drop for RunOnDrop<F> {
+    fn drop(&mut self) {
+        (self.drop_fn)();
+    }
+}
+
+/// Check if a `Runnable` exists based on the state.
+#[inline(always)]
+pub(crate) fn runnable_exists(state: u64) -> bool {
+    state & POLLING != 0 && state & (WAKE_MASK | CLOSED) != 0
+}
--- a/asynchronix/src/runtime/executor/tests.rs
+++ b/asynchronix/src/runtime/executor/tests.rs
@ -0,0 +1,142 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+use futures_channel::{mpsc, oneshot};
+use futures_util::StreamExt;
+
+use super::*;
+
+/// An object that runs an arbitrary closure when dropped.
+struct RunOnDrop<F: FnOnce()> {
+    drop_fn: Option<F>,
+}
+impl<F: FnOnce()> RunOnDrop<F> {
+    /// Creates a new `RunOnDrop`.
+    fn new(drop_fn: F) -> Self {
+        Self {
+            drop_fn: Some(drop_fn),
+        }
+    }
+}
+impl<F: FnOnce()> Drop for RunOnDrop<F> {
+    fn drop(&mut self) {
+        self.drop_fn.take().map(|f| f());
+    }
+}
+
+#[test]
+fn executor_deadlock() {
+    const NUM_THREADS: usize = 3;
+
+    let (_sender1, receiver1) = oneshot::channel::<()>();
+    let (_sender2, receiver2) = oneshot::channel::<()>();
+
+    let mut executor = Executor::new(NUM_THREADS);
+    static LAUNCH_COUNT: AtomicUsize = AtomicUsize::new(0);
+    static COMPLETION_COUNT: AtomicUsize = AtomicUsize::new(0);
+
+    executor.spawn_and_forget(async move {
+        LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed);
+        let _ = receiver2.await;
+        COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed);
+    });
+    executor.spawn_and_forget(async move {
+        LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed);
+        let _ = receiver1.await;
+        COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed);
+    });
+
+    executor.run();
+    // Check that the executor returns on deadlock, i.e. none of the task has
+    // completed.
+    assert_eq!(LAUNCH_COUNT.load(Ordering::Relaxed), 2);
+    assert_eq!(COMPLETION_COUNT.load(Ordering::Relaxed), 0);
+}
+
+#[test]
+fn executor_deadlock_st() {
+    const NUM_THREADS: usize = 1;
+
+    let (_sender1, receiver1) = oneshot::channel::<()>();
+    let (_sender2, receiver2) = oneshot::channel::<()>();
+
+    let mut executor = Executor::new(NUM_THREADS);
+    static LAUNCH_COUNT: AtomicUsize = AtomicUsize::new(0);
+    static COMPLETION_COUNT: AtomicUsize = AtomicUsize::new(0);
+
+    executor.spawn_and_forget(async move {
+        LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed);
+        let _ = receiver2.await;
+        COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed);
+    });
+    executor.spawn_and_forget(async move {
+        LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed);
+        let _ = receiver1.await;
+        COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed);
+    });
+
+    executor.run();
+    // Check that the executor returnes on deadlock, i.e. none of the task has
+    // completed.
+    assert_eq!(LAUNCH_COUNT.load(Ordering::Relaxed), 2);
+    assert_eq!(COMPLETION_COUNT.load(Ordering::Relaxed), 0);
+}
+
+#[test]
+fn executor_drop_cycle() {
+    const NUM_THREADS: usize = 3;
+
+    let (sender1, mut receiver1) = mpsc::channel(2);
+    let (sender2, mut receiver2) = mpsc::channel(2);
+    let (sender3, mut receiver3) = mpsc::channel(2);
+
+    let mut executor = Executor::new(NUM_THREADS);
+    static DROP_COUNT: AtomicUsize = AtomicUsize::new(0);
+
+    // Spawn 3 tasks that wake one another when dropped.
+    executor.spawn_and_forget({
+        let mut sender2 = sender2.clone();
+        let mut sender3 = sender3.clone();
+
+        async move {
+            let _guard = RunOnDrop::new(move || {
+                let _ = sender2.try_send(());
+                let _ = sender3.try_send(());
+                DROP_COUNT.fetch_add(1, Ordering::Relaxed);
+            });
+            let _ = receiver1.next().await;
+        }
+    });
+    executor.spawn_and_forget({
+        let mut sender1 = sender1.clone();
+        let mut sender3 = sender3.clone();
+
+        async move {
+            let _guard = RunOnDrop::new(move || {
+                let _ = sender1.try_send(());
+                let _ = sender3.try_send(());
+                DROP_COUNT.fetch_add(1, Ordering::Relaxed);
+            });
+            let _ = receiver2.next().await;
+        }
+    });
+    executor.spawn_and_forget({
+        let mut sender1 = sender1.clone();
+        let mut sender2 = sender2.clone();
+
+        async move {
+            let _guard = RunOnDrop::new(move || {
+                let _ = sender1.try_send(());
+                let _ = sender2.try_send(());
+                DROP_COUNT.fetch_add(1, Ordering::Relaxed);
+            });
+            let _ = receiver3.next().await;
+        }
+    });
+
+    executor.run();
+
+    // Make sure that all tasks are eventually dropped even though each task
+    // wakes the others when dropped.
+    drop(executor);
+    assert_eq!(DROP_COUNT.load(Ordering::Relaxed), 3);
+}
--- a/asynchronix/src/runtime/executor/worker.rs
+++ b/asynchronix/src/runtime/executor/worker.rs
@ -0,0 +1,25 @@
+use std::cell::Cell;
+use std::sync::Arc;
+
+use super::task::Runnable;
+
+use super::pool::Pool;
+use super::LocalQueue;
+
+/// A local worker with access to global executor resources.
+pub(crate) struct Worker {
+    pub(crate) local_queue: LocalQueue,
+    pub(crate) fast_slot: Cell<Option<Runnable>>,
+    pub(crate) pool: Arc<Pool>,
+}
+
+impl Worker {
+    /// Creates a new worker.
+    pub(crate) fn new(local_queue: LocalQueue, pool: Arc<Pool>) -> Self {
+        Self {
+            local_queue,
+            fast_slot: Cell::new(None),
+            pool,
+        }
+    }
+}