commit 5c94ec6a65765d1fbf94812e7c8d5e72336c62c2 Author: Serge Barral Date: Wed Oct 12 05:33:16 2022 +0200 First public commit diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c12c4af --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,124 @@ +name: CI + +on: + pull_request: + push: + branches: [ main ] + +# Uncomment before first release. +#env: +# RUSTFLAGS: -Dwarnings + +jobs: + check: + name: Check + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + rust: + - stable + - 1.64.0 + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + profile: minimal + override: true + + - name: Run cargo check + uses: actions-rs/cargo@v1 + with: + command: check + args: --benches + + test: + name: Test suite + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Run cargo test + uses: actions-rs/cargo@v1 + with: + command: test + args: --release + + loom-dry-run: + name: Loom dry run + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Dry-run cargo test (Loom) + uses: actions-rs/cargo@v1 + with: + command: test + args: --no-run --tests + env: + RUSTFLAGS: --cfg asynchronix_loom + + lints: + name: Lints + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: default + override: true + + - name: Run cargo fmt + uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + - name: Run cargo clippy + uses: actions-rs/cargo@v1 + with: + command: clippy + + docs: + name: Docs + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Run cargo doc + uses: actions-rs/cargo@v1 + with: + command: doc + args: --no-deps --document-private-items diff --git a/.github/workflows/loom.yml b/.github/workflows/loom.yml new file mode 100644 index 0000000..a65424e --- /dev/null +++ b/.github/workflows/loom.yml @@ -0,0 +1,34 @@ +name: Loom + +on: + pull_request: + push: + branches: [ main ] + paths: + - 'asynchronix/src/runtime/executor/queue.rs' + - 'asynchronix/src/runtime/executor/queue/**' + - 'asynchronix/src/runtime/executor/task.rs' + - 'asynchronix/src/runtime/executor/task/**' + +jobs: + loom: + name: Loom + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - name: Run cargo test (Loom) + uses: actions-rs/cargo@v1 + with: + command: test + args: --tests --release + env: + RUSTFLAGS: --cfg asynchronix_loom \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f2f9e58 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..94974a1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,2 @@ +[workspace] +members = ["asynchronix"] diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..9f71055 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..8b3f32a --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2022 Serge Barral + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..47650c7 --- /dev/null +++ b/README.md @@ -0,0 +1,96 @@ +# Asynchronix + +A high-performance asynchronous computation framework for system simulation. + +## What is this? + +> **Warning**: this page is at the moment mostly addressed at interested +> contributors, but resources for users will be added soon. + +In a nutshell, Asynchronix is an effort to develop a framework for +discrete-event system simulation, with a particular focus on cyberphysical +systems. In this context, a system might be something as large as a spacecraft, +or as small as a IoT device. + +Asynchronix draws from experience in the space industry but differs from +existing tools in a number of respects, including: + +1) *open-source license*: it is distributed under the very permissive MIT and + Apache 2 licenses, with the intent to foster an ecosystem where models can be + easily exchanged without reliance on proprietary APIs, +2) *developer-friendly technology*: Rust's support for algebraic types and its + powerful type system make it ideal for the "cyber" part in cyberphysical, + i.e. for modelling digital devices with state machines, +3) *very fast*: by leveraging Rust's excellent support for multithreading and + async programming, simulation models can run efficiently in parallel with all + required synchronization being transparently handled by the simulator. + + +## General design + +Asynchronix is an async compute framework for time-based discrete event +simulation. + +From the perspective of simulation model implementers and users, it closely +resembles a flow-based programming framework: a model is essentially an isolated +entity with a fixed set of typed inputs and outputs, communicating with other +models and with the scheduler through message passing. Unlike in conventional +flow-based programming, however, request-response patterns are also possible. + +Under the hood, Asynchronix' implementation is based on async Rust and the actor +model. All inputs are forwarded to a single "mailbox" (an async channel), +preserving the relative order of arrival of input messages. + +Computations proceed at discrete times. When executed, models can post events +for the future, i.e. request the delayed activation of an input. Whenever the +computation at a given time completes, the scheduler selects the nearest future +time at which one or several events are scheduled, thus triggering another set +of computations. + +This computational process makes it difficult to use general-purposes runtimes +such as Tokio, because the end of a set of computations is technically a +deadlock: the computation completes when all model have nothing left to do and +are blocked on an empty mailbox. Also, instead of managing a conventional +reactor, the runtime manages a priority queue containing the posted events. For +these reasons, it was necessary for Asynchronix to develop a fully custom +runtime. + +Another crucial aspect of async compute is message-passing efficiency: +oftentimes the processing of an input is a simple action, making inter-thread +message-passing the bottleneck. This in turns calls for a very efficient +channel implementation, heavily optimized for the case of starved receivers +since models are most of the time waiting for an input to become available. + + +## Current state + +The simulator is rapidly approaching MVP completion and has achieved 2 major +milestones: + +* completion of an extremely fast asynchronous multi-threaded channel, + demonstrated in the [Tachyonix][tachyonix] project; this channel is the + backbone of the actor model, +* completion of a custom `async` executor optimized for message-passing and + deadlock detection, which has demonstrated even better performance than Tokio + for message-passing; this executor is already in the main branch and can be + tested against other executors using the Tachyonix [benchmark]. + +Before it becomes usable, however, further work is required to implement the +priority queue, implement model inputs and outputs and adapt the channel. + +[tachyonix]: https://github.com/asynchronics/tachyonix + +[benchmark]: https://github.com/asynchronics/tachyonix/tree/main/bench + + +## License + +This software is licensed under the [Apache License, Version 2.0](LICENSE-APACHE) or the +[MIT license](LICENSE-MIT), at your option. + + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. \ No newline at end of file diff --git a/asynchronix/Cargo.toml b/asynchronix/Cargo.toml new file mode 100644 index 0000000..6a9599b --- /dev/null +++ b/asynchronix/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "asynchronix" +authors = ["Serge Barral "] +version = "0.1.0" +edition = "2021" +rust-version = "1.64" +license = "MIT OR Apache-2.0" +repository = "https://github.com/asynchronics/asynchronix" +readme = "../README.md" +description = """ +A high performance asychronous compute framework for system simulation. +""" +categories = ["simulation", "aerospace", "science"] +keywords = ["simulation", "discrete-event", "systems", "cyberphysical", "real-time"] + +[features] +# API-unstable public exports meant for external test/benchmarking; development only. +dev-hooks = [] +# Logging of performance-related statistics; development only. +dev-logs = [] + +[dependencies] +parking = "2.0" +slab = "0.4" +cache-padded = "1.1" +num_cpus = "1.13" + +[target.'cfg(asynchronix_loom)'.dependencies] +loom = "0.5" + +[dev-dependencies] +futures-channel = "0.3" +futures-util = "0.3" diff --git a/asynchronix/src/dev_hooks.rs b/asynchronix/src/dev_hooks.rs new file mode 100644 index 0000000..c0bfef5 --- /dev/null +++ b/asynchronix/src/dev_hooks.rs @@ -0,0 +1,38 @@ +//! Unstable, unofficial public API meant for external benchmarking and testing. +//! +//! Not for production use! + +use std::future::Future; + +use crate::runtime::executor; + +/// A multi-threaded `async` executor. +#[derive(Debug)] +pub struct Executor(executor::Executor); + +impl Executor { + /// Creates an executor that runs futures on a thread pool. + /// + /// The maximum number of threads is set with the `pool_size` parameter. + pub fn new(pool_size: usize) -> Self { + Self(executor::Executor::new(pool_size)) + } + + /// Spawns a task which output will never be retrieved. + /// + /// This is mostly useful to avoid undue reference counting for futures that + /// return a `()` type. + pub fn spawn_and_forget(&self, future: T) + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + self.0.spawn_and_forget(future); + } + + /// Let the executor run, blocking until all futures have completed or until + /// the executor deadlocks. + pub fn run(&mut self) { + self.0.run(); + } +} diff --git a/asynchronix/src/lib.rs b/asynchronix/src/lib.rs new file mode 100644 index 0000000..754d0ee --- /dev/null +++ b/asynchronix/src/lib.rs @@ -0,0 +1,11 @@ +//! Asynchronix: a high-performance asynchronous computation framework for +//! system simulation. + +#![warn(missing_docs, missing_debug_implementations, unreachable_pub)] + +mod loom_exports; +pub(crate) mod macros; +pub mod runtime; + +#[cfg(feature = "dev-hooks")] +pub mod dev_hooks; diff --git a/asynchronix/src/loom_exports.rs b/asynchronix/src/loom_exports.rs new file mode 100644 index 0000000..e279e62 --- /dev/null +++ b/asynchronix/src/loom_exports.rs @@ -0,0 +1,53 @@ +#[cfg(asynchronix_loom)] +#[allow(unused_imports)] +pub(crate) mod sync { + pub(crate) mod atomic { + pub(crate) use loom::sync::atomic::{fence, AtomicU32, AtomicU64, AtomicUsize, Ordering}; + } +} +#[cfg(not(asynchronix_loom))] +#[allow(unused_imports)] +pub(crate) mod sync { + pub(crate) mod atomic { + pub(crate) use std::sync::atomic::{fence, AtomicU32, AtomicU64, AtomicUsize, Ordering}; + } +} + +#[cfg(asynchronix_loom)] +pub(crate) mod cell { + pub(crate) use loom::cell::UnsafeCell; +} +#[cfg(not(asynchronix_loom))] +pub(crate) mod cell { + #[derive(Debug)] + pub(crate) struct UnsafeCell(std::cell::UnsafeCell); + + #[allow(dead_code)] + impl UnsafeCell { + #[inline(always)] + pub(crate) fn new(data: T) -> UnsafeCell { + UnsafeCell(std::cell::UnsafeCell::new(data)) + } + #[inline(always)] + pub(crate) fn with(&self, f: impl FnOnce(*const T) -> R) -> R { + f(self.0.get()) + } + #[inline(always)] + pub(crate) fn with_mut(&self, f: impl FnOnce(*mut T) -> R) -> R { + f(self.0.get()) + } + } +} + +#[allow(unused_macros)] +macro_rules! debug_or_loom_assert { + ($($arg:tt)*) => (if cfg!(any(debug_assertions, asynchronix_loom)) { assert!($($arg)*); }) +} +#[allow(unused_macros)] +macro_rules! debug_or_loom_assert_eq { + ($($arg:tt)*) => (if cfg!(any(debug_assertions, asynchronix_loom)) { assert_eq!($($arg)*); }) +} +#[allow(unused_imports)] +pub(crate) use debug_or_loom_assert; +#[allow(unused_imports)] +pub(crate) use debug_or_loom_assert_eq; diff --git a/asynchronix/src/macros.rs b/asynchronix/src/macros.rs new file mode 100644 index 0000000..d21c9cd --- /dev/null +++ b/asynchronix/src/macros.rs @@ -0,0 +1 @@ +pub(crate) mod scoped_local_key; diff --git a/asynchronix/src/macros/scoped_local_key.rs b/asynchronix/src/macros/scoped_local_key.rs new file mode 100644 index 0000000..07db399 --- /dev/null +++ b/asynchronix/src/macros/scoped_local_key.rs @@ -0,0 +1,182 @@ +use std::thread::LocalKey; + +use std::cell::Cell; +use std::marker; +use std::ptr; + +/// Declare a new thread-local storage scoped key of type `ScopedKey`. +/// +/// This is based on the `scoped-tls` crate, with slight modifications, such as +/// the use of the newly available `const` qualifier for TLS. +macro_rules! scoped_thread_local { + ($(#[$attrs:meta])* $vis:vis static $name:ident: $ty:ty) => ( + $(#[$attrs])* + $vis static $name: $crate::macros::scoped_local_key::ScopedLocalKey<$ty> + = $crate::macros::scoped_local_key::ScopedLocalKey { + inner: { + thread_local!(static FOO: ::std::cell::Cell<*const ()> = const { + std::cell::Cell::new(::std::ptr::null()) + }); + &FOO + }, + _marker: ::std::marker::PhantomData, + }; + ) +} +pub(crate) use scoped_thread_local; + +/// Type representing a thread local storage key corresponding to a reference +/// to the type parameter `T`. +pub(crate) struct ScopedLocalKey { + pub(crate) inner: &'static LocalKey>, + pub(crate) _marker: marker::PhantomData, +} + +unsafe impl Sync for ScopedLocalKey {} + +impl ScopedLocalKey { + /// Inserts a value into this scoped thread local storage slot for the + /// duration of a closure. + pub(crate) fn set(&'static self, t: &T, f: F) -> R + where + F: FnOnce() -> R, + { + struct Reset { + key: &'static LocalKey>, + val: *const (), + } + + impl Drop for Reset { + fn drop(&mut self) { + self.key.with(|c| c.set(self.val)); + } + } + + let prev = self.inner.with(|c| { + let prev = c.get(); + c.set(t as *const _ as *const ()); + prev + }); + + let _reset = Reset { + key: self.inner, + val: prev, + }; + + f() + } + + /// Removes the value from this scoped thread local storage slot for the + /// duration of a closure. + pub(crate) fn unset(&'static self, f: F) -> R + where + F: FnOnce() -> R, + { + struct Reset { + key: &'static LocalKey>, + val: *const (), + } + + impl Drop for Reset { + fn drop(&mut self) { + self.key.with(|c| c.set(self.val)); + } + } + + let prev = self.inner.with(|c| { + let prev = c.get(); + c.set(ptr::null()); + prev + }); + + let _reset = Reset { + key: self.inner, + val: prev, + }; + + f() + } + + /// Evaluates a closure taking as argument a reference to the value if set + /// and returns the closures output, or `None` if the value is not set. + pub(crate) fn map(&'static self, f: F) -> Option + where + F: FnOnce(&T) -> R, + { + let val = self.inner.with(|c| c.get()); + + if val.is_null() { + None + } else { + Some(f(unsafe { &*(val as *const T) })) + } + } +} + +#[cfg(all(test, not(asynchronix_loom)))] +mod tests { + use std::cell::Cell; + use std::sync::mpsc::{channel, Sender}; + use std::thread; + + scoped_thread_local!(static FOO: u32); + + #[test] + fn scoped_local_key_smoke() { + scoped_thread_local!(static BAR: u32); + + BAR.set(&1, || { + BAR.map(|_slot| {}).unwrap(); + }); + } + + #[test] + fn scoped_local_key_set() { + scoped_thread_local!(static BAR: Cell); + + BAR.set(&Cell::new(1), || { + BAR.map(|slot| { + assert_eq!(slot.get(), 1); + }) + .unwrap(); + }); + } + + #[test] + fn scoped_local_key_unset() { + scoped_thread_local!(static BAR: Cell); + + BAR.set(&Cell::new(1), || { + BAR.unset(|| assert!(BAR.map(|_| {}).is_none())); + BAR.map(|slot| { + assert_eq!(slot.get(), 1); + }) + .unwrap(); + }); + } + + #[test] + fn scoped_local_key_panic_resets() { + struct Check(Sender); + impl Drop for Check { + fn drop(&mut self) { + FOO.map(|r| { + self.0.send(*r).unwrap(); + }) + .unwrap() + } + } + + let (tx, rx) = channel(); + let t = thread::spawn(|| { + FOO.set(&1, || { + let _r = Check(tx); + + FOO.set(&2, || panic!()); + }); + }); + + assert_eq!(rx.recv().unwrap(), 1); + assert!(t.join().is_err()); + } +} diff --git a/asynchronix/src/runtime.rs b/asynchronix/src/runtime.rs new file mode 100644 index 0000000..bb50d71 --- /dev/null +++ b/asynchronix/src/runtime.rs @@ -0,0 +1,3 @@ +//! Executor and tasks. + +pub(crate) mod executor; diff --git a/asynchronix/src/runtime/executor.rs b/asynchronix/src/runtime/executor.rs new file mode 100644 index 0000000..08b4385 --- /dev/null +++ b/asynchronix/src/runtime/executor.rs @@ -0,0 +1,466 @@ +use std::future::Future; +use std::panic::{self, AssertUnwindSafe}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread::{self, JoinHandle}; +use std::time::{Duration, Instant}; + +use parking::Parker; +use slab::Slab; + +mod find_bit; +mod injector; +mod pool; +mod queue; +mod rng; +mod task; +mod worker; + +#[cfg(all(test, not(asynchronix_loom)))] +mod tests; + +use self::pool::{Pool, PoolState}; +use self::rng::Rng; +use self::task::{CancelToken, Promise, Runnable}; +use self::worker::Worker; +use crate::macros::scoped_local_key::scoped_thread_local; + +type Bucket = injector::Bucket; +type GlobalQueue = injector::Injector; +type LocalQueue = queue::Worker; +type Stealer = queue::Stealer; + +scoped_thread_local!(static LOCAL_WORKER: Worker); +scoped_thread_local!(static ACTIVE_TASKS: Mutex>); +static NEXT_EXECUTOR_ID: AtomicUsize = AtomicUsize::new(0); + +/// A multi-threaded `async` executor. +/// +/// The executor is exclusively designed for message-passing computational +/// tasks. As such, it does not include an I/O reactor and does not consider +/// fairness as a goal in itself. While it does use fair local queues inasmuch +/// as these tend to perform better in message-passing applications, it uses an +/// unfair injection queue and a LIFO slot without attempt to mitigate the +/// effect of badly behaving code (e.g. futures that use spin-locks and hope for +/// the best by yielding to the executor with something like tokio's +/// `yield_now`). +/// +/// Another way in which it differs from other `async` executors is that it +/// treats deadlocking as a normal occurrence. This is because in a +/// discrete-time simulator, the simulation of a system at a given time step +/// will make as much progress as possible until it technically reaches a +/// deadlock. Only then does the simulator advance the simulated time until the +/// next "event" extracted from a time-sorted priority queue, sending it to +/// enable further progress in the computation. +/// +/// The design of the executor is largely influenced by the tokio and go +/// schedulers, both of which are optimized for message-passing applications. In +/// particular, it uses fast, fixed-size thread-local work-stealing queues with +/// a "fast" non-stealable slot in combination with a global injector queue. The +/// injector queue is used both to schedule new tasks and to absorb temporary +/// overflow in the local queues. The design of the injector queue is kept very +/// simple by taking advantage of the fact that the injector is not required to +/// be either LIFO or FIFO. +/// +/// Probably the largest difference with tokio is the task system, which boasts +/// a higher throughput achieved by reducing the need for synchronization. +/// Another difference is that, at the moment, the complete subset of active +/// worker threads is stored in a single atomic variable. This makes it in +/// particular possible to rapidly identify free worker threads for stealing +/// operations. The downside of this approach is that the maximum number of +/// worker threads is limited to `usize::BITS`, but this is unlikely to +/// constitute a limitation since system simulation is not typically an +/// embarrassingly parallel problem. +#[derive(Debug)] +pub(crate) struct Executor { + pool: Arc, + active_tasks: Arc>>, + parker: parking::Parker, + join_handles: Vec>, +} + +impl Executor { + /// Creates an executor that runs futures on a thread pool. + /// + /// The maximum number of threads is set with the `num_threads` parameter. + pub(crate) fn new(num_threads: usize) -> Self { + let (parker, unparker) = parking::pair(); + + let (local_data, shared_data): (Vec<_>, Vec<_>) = (0..num_threads) + .map(|_| { + let (parker, unparker) = parking::pair(); + let local_queue = LocalQueue::new(); + let stealer = local_queue.stealer(); + + ((local_queue, parker), (stealer, unparker)) + }) + .unzip(); + + // Each executor instance has a unique ID inherited by tasks to ensure + // that tasks are scheduled on their parent executor. + let executor_id = NEXT_EXECUTOR_ID.fetch_add(1, Ordering::Relaxed); + assert!( + executor_id <= usize::MAX / 2, + "{} executors have been instantiated: this is most probably a bug.", + usize::MAX / 2 + ); + + let pool = Arc::new(Pool::new(executor_id, unparker, shared_data.into_iter())); + let active_tasks = Arc::new(Mutex::new(Slab::new())); + + // All workers must be marked as active _before_ spawning the threads to + // make sure that the count of active workers does not fall to zero + // before all workers are blocked on the signal barrier. + pool.set_all_workers_active(); + + // Spawn all worker threads. + let join_handles: Vec<_> = local_data + .into_iter() + .enumerate() + .into_iter() + .map(|(id, (local_queue, worker_parker))| { + let thread_builder = thread::Builder::new().name(format!("Worker #{}", id)); + + thread_builder + .spawn({ + let pool = pool.clone(); + let active_tasks = active_tasks.clone(); + move || { + let worker = Worker::new(local_queue, pool); + ACTIVE_TASKS.set(&active_tasks, || { + LOCAL_WORKER + .set(&worker, || run_local_worker(&worker, id, worker_parker)) + }); + } + }) + .unwrap() + }) + .collect(); + + // Wait until all workers are blocked on the signal barrier. + parker.park(); + assert!(pool.is_idle()); + + Self { + pool, + active_tasks, + parker, + join_handles, + } + } + + /// Spawns a task and returns a promise that can be polled to retrieve the + /// task's output. + pub(crate) fn spawn(&self, future: T) -> Promise + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + // Book a slot to store the task cancellation token. + let mut active_tasks = self.active_tasks.lock().unwrap(); + let task_entry = active_tasks.vacant_entry(); + + // Wrap the future so that it removes its cancel token from the + // executor's list when dropped. + let future = CancellableFuture::new(future, task_entry.key()); + + let (promise, runnable, cancel_token) = + task::spawn(future, schedule_task, self.pool.executor_id); + + task_entry.insert(cancel_token); + self.pool.global_queue.insert_task(runnable); + + self.pool.activate_worker(); + + promise + } + + /// Spawns a task which output will never be retrieved. + /// + /// This is mostly useful to avoid undue reference counting for futures that + /// return a `()` type. + pub(crate) fn spawn_and_forget(&self, future: T) + where + T: Future + Send + 'static, + T::Output: Send + 'static, + { + // Book a slot to store the task cancellation token. + let mut active_tasks = self.active_tasks.lock().unwrap(); + let task_entry = active_tasks.vacant_entry(); + + // Wrap the future so that it removes its cancel token from the + // executor's list when dropped. + let future = CancellableFuture::new(future, task_entry.key()); + + let (runnable, cancel_token) = + task::spawn_and_forget(future, schedule_task, self.pool.executor_id); + + task_entry.insert(cancel_token); + self.pool.global_queue.insert_task(runnable); + + self.pool.activate_worker(); + } + + /// Let the executor run, blocking until all futures have completed or until + /// the executor deadlocks. + pub(crate) fn run(&mut self) { + loop { + if let Some(worker_panic) = self.pool.take_panic() { + panic::resume_unwind(worker_panic); + } + if self.pool.is_idle() { + return; + } + + self.parker.park(); + } + } +} + +impl Drop for Executor { + fn drop(&mut self) { + // Force all threads to return. + self.pool.trigger_termination(); + for join_handle in self.join_handles.drain(0..) { + join_handle.join().unwrap(); + } + + // Drop all tasks that have not completed. + // + // A local worker must be set because some tasks may schedule other + // tasks when dropped, which requires that a local worker be available. + let worker = Worker::new(LocalQueue::new(), self.pool.clone()); + LOCAL_WORKER.set(&worker, || { + // Cancel all pending futures. + // + // `ACTIVE_TASKS` is explicitly unset to prevent + // `CancellableFuture::drop()` from trying to remove its own token + // from the list of active tasks as this would result in a reentrant + // lock. This is mainly to stay on the safe side: `ACTIVE_TASKS` + // should not be set on this thread anyway, unless for some reason + // the executor runs inside another executor. + ACTIVE_TASKS.unset(|| { + let mut tasks = self.active_tasks.lock().unwrap(); + for task in tasks.drain() { + task.cancel(); + } + + // Some of the dropped tasks may have scheduled other tasks that + // were not yet cancelled, preventing them from being dropped + // upon cancellation. This is OK: the scheduled tasks will be + // dropped when the local and global queues are dropped, and + // they cannot re-schedule one another since all tasks were + // cancelled. + }); + }); + } +} + +// A `Future` wrapper that removes its cancellation token from the executor's +// list of active tasks when dropped. +struct CancellableFuture { + inner: T, + cancellation_key: usize, +} +impl CancellableFuture { + fn new(fut: T, cancellation_key: usize) -> Self { + Self { + inner: fut, + cancellation_key, + } + } +} +impl Future for CancellableFuture { + type Output = T::Output; + + #[inline(always)] + fn poll( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll { + unsafe { self.map_unchecked_mut(|s| &mut s.inner).poll(cx) } + } +} +impl Drop for CancellableFuture { + fn drop(&mut self) { + // Remove the task from the list of active tasks if the future is + // dropped on a worker thread. Otherwise do nothing and let the + // executor's drop handler do the cleanup. + let _ = ACTIVE_TASKS.map(|active_tasks| { + // Don't unwrap on `lock()` because this function can be called from + // a destructor and should not panic. In the worse case, the cancel + // token will be left in the list of active tasks, which does + // prevents eager task deallocation but does not cause any issue + // otherwise. + if let Ok(mut active_tasks) = active_tasks.lock() { + let _cancel_token = active_tasks.try_remove(self.cancellation_key); + } + }); + } +} + +// Schedules a `Runnable`. +fn schedule_task(task: Runnable, executor_id: usize) { + LOCAL_WORKER + .map(|worker| { + // Check that this task was indeed spawned on this executor. + assert_eq!( + executor_id, worker.pool.executor_id, + "Tasks must be awaken on the same executor they are spawned on" + ); + + // Store the task in the fast slot and retrieve the one that was + // formerly stored, if any. + let prev_task = match worker.fast_slot.replace(Some(task)) { + // If there already was a task in the slot, proceed so it can be + // moved to a task queue. + Some(t) => t, + // Otherwise return immediately: this task cannot be stolen so + // there is no point in activating a sibling worker. + None => return, + }; + + // Push the previous task to the local queue if possible or on the + // global queue otherwise. + if let Err(prev_task) = worker.local_queue.push(prev_task) { + // The local queue is full. Try to move half of it to the global + // queue; if this fails, just push one task to the global queue. + if let Ok(drain) = worker.local_queue.drain(|_| Bucket::capacity()) { + worker + .pool + .global_queue + .push_bucket(Bucket::from_iter(drain)); + worker.local_queue.push(prev_task).unwrap(); + } else { + worker.pool.global_queue.insert_task(prev_task); + } + } + + // A task has been pushed to the local or global queue: try to + // activate another worker if no worker is currently searching for a + // task. + if worker.pool.searching_worker_count() == 0 { + worker.pool.activate_worker_relaxed(); + } + }) + .expect("Tasks may not be awaken outside executor threads"); +} + +/// Processes all incoming tasks on a worker thread until the `Terminate` signal +/// is received or until it panics. +fn run_local_worker(worker: &Worker, id: usize, parker: Parker) { + let result = panic::catch_unwind(AssertUnwindSafe(|| { + // Set how long to spin when searching for a task. + const MAX_SEARCH_DURATION: Duration = Duration::from_nanos(1000); + + // Seed a thread RNG with the worker ID. + let rng = Rng::new(id as u64); + + loop { + // Signal barrier: park until notified to continue or terminate. + if worker.pool.set_worker_inactive(id) == PoolState::Idle { + // If this worker was the last active worker, it is necessary to + // check again whether the global queue is not populated. This + // could happen if the executor thread pushed a task to the + // global queue but could not activate a new worker because all + // workers were then activated. + if !worker.pool.global_queue.is_empty() { + worker.pool.set_worker_active(id); + } else { + worker.pool.executor_unparker.unpark(); + parker.park(); + } + } else { + parker.park(); + } + if worker.pool.termination_is_triggered() { + return; + } + + // We may spin for a little while: start counting. + let mut search_start = Instant::now(); + + // Process the tasks one by one. + loop { + // Check the global queue first. + if let Some(bucket) = worker.pool.global_queue.pop_bucket() { + let bucket_iter = bucket.into_iter(); + + // There is a _very_ remote possibility that, even though + // the local queue is empty, it has temporarily too little + // spare capacity for the bucket. This could happen because + // a concurrent steal operation could be preempted for all + // the time it took to pop and process the remaining tasks + // and hasn't released the stolen capacity yet. + // + // Unfortunately, we cannot just skip checking the global + // queue altogether when there isn't enough spare capacity + // in the local queue, as this could lead to a race: suppose + // that (1) this thread has earlier pushed tasks onto the + // global queue, and (2) the stealer has processed all + // stolen tasks before this thread sees the capacity + // restored and at the same time (3) the stealer does not + // yet see the tasks this thread pushed to the global queue; + // in such scenario, both this thread and the stealer thread + // may park and leave unprocessed tasks in the global queue. + // + // This is the only instance where spinning is used, as the + // probability of this happening is close to zero and the + // complexity of a signaling mechanism (condvar & friends) + // wouldn't carry its weight. + while worker.local_queue.spare_capacity() < bucket_iter.len() {} + + // Since empty buckets are never pushed onto the global + // queue, we should now have at least one task to process. + worker.local_queue.extend(bucket_iter); + } else { + // The global queue is empty. Try to steal from active + // siblings. + let mut stealers = worker.pool.shuffled_stealers(Some(id), &rng); + if stealers.all(|stealer| { + stealer + .steal_and_pop(&worker.local_queue, |n| n - n / 2) + .map(|task| { + let prev_task = worker.fast_slot.replace(Some(task)); + assert!(prev_task.is_none()); + }) + .is_err() + }) { + // Give up if unsuccessful for too long. + if (Instant::now() - search_start) > MAX_SEARCH_DURATION { + worker.pool.end_worker_search(); + break; + } + + // Re-try. + continue; + } + } + + // Signal the end of the search so that another worker can be + // activated when a new task is scheduled. + worker.pool.end_worker_search(); + + // Pop tasks from the fast slot or the local queue. + while let Some(task) = worker.fast_slot.take().or_else(|| worker.local_queue.pop()) + { + if worker.pool.termination_is_triggered() { + return; + } + task.run(); + } + + // Resume the search for tasks. + worker.pool.begin_worker_search(); + search_start = Instant::now(); + } + } + })); + + // Propagate the panic, if any. + if let Err(panic) = result { + worker.pool.register_panic(panic); + worker.pool.trigger_termination(); + worker.pool.executor_unparker.unpark(); + } +} diff --git a/asynchronix/src/runtime/executor/find_bit.rs b/asynchronix/src/runtime/executor/find_bit.rs new file mode 100644 index 0000000..cf18c8e --- /dev/null +++ b/asynchronix/src/runtime/executor/find_bit.rs @@ -0,0 +1,190 @@ +/// Find the position of the `Nᵗʰ` set bit starting the search from the least +/// significant bit. +/// +/// A rank `N=1` specifies the first set bit starting from the LSB, a rank `N=2` +/// specifies the second set bit starting from the LSB, etc. +/// +/// The rank is to be provided as a closure that takes as argument the total +/// number of set bits in the value (same as `value.count_ones()`). The rank +/// returned by the closure should therefore never be greater than the closure's +/// argument. +/// +/// The returned position is 0-based. If the bit to be found is the LSB, or if +/// the provided rank is 0, the returned position is 0. If in turn the bit to be +/// found is the MSB, or if the specified rank is strictly greater than the +/// total number of bits set, the returned position is `usize::BITS - 1`. +/// +/// It is recommended to check for zero values before calling this function +/// since the returned position is then meaningless regardless of the rank. +/// +/// Implementation notes: the implementation is based on a tree-of-adders +/// algorithm followed by binary search, with overall theoretical complexity +/// `O(log(usize::BITS))`. In release mode the function is optimized to fully +/// branchless code with a pretty moderate cost of about 70 CPU cycles on x86-64 +/// and less than 60 instruction on aarch64, independently of the inputs. The +/// use of the `popcnt` intrinsic was also investigated to compute sub-sums in +/// the binary search but was found to be slower than the tree-of-adders. +#[allow(clippy::assertions_on_constants)] +pub(crate) fn find_bit usize>(value: usize, rank_fn: F) -> usize { + const P: usize = usize::BITS.trailing_zeros() as usize; // P = log2(usize::BITS) + const M: [usize; P] = sum_masks(); + + const _: () = assert!(usize::BITS.is_power_of_two()); + const _: () = assert!(P >= 2); + + // Partial sub-sums in groups of adjacent 2^p bits. + let mut sum = [0; P + 1]; + + // The zero-order sub-sums (2^p == 1) simply reflect the original value. + sum[0] = value; + + // Sub-sums for groups of 2 adjacent bits. The RHS is equivalent to + // `(sum[0] & M[0]) + ((sum[0] >> 1) & M[0]);`. + sum[1] = value - ((value >> 1) & M[0]); + + // Sub-sums for groups of 4 adjacent bits. + sum[2] = (sum[1] & M[1]) + ((sum[1] >> 2) & M[1]); + + // Sub-sums for groups of 8, 16 etc. adjacent bits. + // + // The below loop seems to be reliably unrolled in release mode, which in + // turn enables constant propagation and folding. To stay on the safe side, + // however, the sum masks `M[p]` are const-evaluated as they use integer + // division and would be otherwise very expensive should loop unrolling fail + // to kick in. + for p in 2..P { + // From p>=2, the mask can be applied to pairwise sums rather than to + // each operand separately as there is no risk that sub-sums will + // overflow on neighboring groups. The RHS is thus equivalent to + // `(sum[p] & M[p]) + ((sum[0] >> (1 << p)) & M[p]);` + sum[p + 1] = (sum[p] + (sum[p] >> (1 << p))) & M[p]; + } + + let mut rank = rank_fn(sum[P]); + + // Find the bit using binary search. + // + // The below loop seems to be reliably unrolled in release mode so the whole + // function is effectively optimized to fully branchless code. + let mut shift = 0usize; + for p in (0..P).rev() { + // Low bits mask of width 2^p. + let sub_mask = (1 << (1 << p)) - 1; + + // Bit sum of the lower half of the current subset. + let lower_sum = (sum[p] >> shift) & sub_mask; + + // Update the rank and the shift if the bit lies in the upper half. The + // below is a branchless version of: + // ``` + // if rank > lower_sum { + // rank -= lower_sum; + // shift += 1 << p; + // } + //``` + let cmp_mask = ((lower_sum as isize - rank as isize) >> (isize::BITS - 1)) as usize; + rank -= lower_sum & cmp_mask; + shift += (1 << p) & cmp_mask; + } + + shift +} + +/// Generates masks for the tree-of-adder bit summing algorithm. +/// +/// The masks are generated according to the pattern: +/// +/// ```text +/// m[0] = 0b010101010101...010101010101; +/// m[1] = 0b001100110011...001100110011; +/// m[2] = 0b000011110000...111100001111; +/// ... +/// m[P-1] = 0b000000000000...111111111111; +/// ``` +#[allow(clippy::assertions_on_constants)] +const fn sum_masks() -> [usize; usize::BITS.trailing_zeros() as usize] { + const P: usize = usize::BITS.trailing_zeros() as usize; // P = log2(usize::BITS) + const _: () = assert!( + usize::BITS == 1 << P, + "sum masks are only supported for `usize` with a power-of-two bit width" + ); + + let mut m = [0usize; P]; + let mut p = 0; + while p != P { + m[p] = !0 / (1 + (1 << (1 << p))); + p += 1; + } + + m +} + +#[cfg(all(test, not(asynchronix_loom), not(miri)))] +mod tests { + use super::super::rng; + use super::*; + + // Fuzzing test. + #[test] + fn find_bit_fuzz() { + const SAMPLES: usize = 100_000; + + #[inline(always)] + fn check(value: usize) { + let bitsum = value.count_ones() as usize; + + for rank in 1..=bitsum { + let pos = find_bit(value, |s| { + assert_eq!(s, bitsum); + + rank + }); + + // Check that the bit is indeed set. + assert!( + value & (1 << pos) != 0, + "input value: {:064b}\nrequested rank: {}\nreturned position: {}", + value, + rank, + pos + ); + + // Check that the bit is indeed of the requested rank. + assert_eq!( + rank, + (value & ((1 << pos) - 1)).count_ones() as usize + 1, + "input value: {:064b}\nrequested rank: {}\nreturned position: {}", + value, + rank, + pos + ); + } + } + + // Check behavior with a null input value. + let pos = find_bit(0, |s| { + assert_eq!(s, 0); + 0 + }); + assert_eq!(pos, 0); + + // Check behavior with other special values. + check(1); + check(1 << (usize::BITS - 1)); + check(usize::MAX); + + // Check behavior with random values. + let rng = rng::Rng::new(12345); + for _ in 0..SAMPLES { + // Generate a random usize from one or more random u64 ...for the + // day we get 128+ bit platforms :-) + let mut r = rng.gen() as usize; + let mut shift = 64; + while shift < usize::BITS { + r |= (rng.gen() as usize) << shift; + shift += 64; + } + check(r); + } + } +} diff --git a/asynchronix/src/runtime/executor/injector.rs b/asynchronix/src/runtime/executor/injector.rs new file mode 100644 index 0000000..18c53fe --- /dev/null +++ b/asynchronix/src/runtime/executor/injector.rs @@ -0,0 +1,189 @@ +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Mutex; +use std::{mem, vec}; + +/// An unfair injector queue which stores batches of tasks in bounded-size +/// buckets. +/// +/// This is a simple but effective unfair injector design which, despite being +/// based on a mutex-protected `Vec`, ensures low contention and low latency in +/// most realistic cases. +/// +/// This is achieved by enabling the worker to push and pop batches of tasks +/// readily stored in buckets. Since only the handles to the buckets are moved +/// to and from the injector, pushing and popping a bucket is very fast and the +/// lock is therefore only held for a very short time. +/// +/// Also, since tasks in a bucket are memory-contiguous, they can be efficiently +/// copied to and from worker queues. The use of buckets also keeps the size of +/// the injector queue small (its size is the number of buckets) so +/// re-allocation is rare and fast. +/// +/// As an additional optimization, an `is_empty` atomic flag allows workers +/// seeking for tasks to skip taking the lock if the queue is likely to be +/// empty. +/// +/// The queue is not strictly LIFO. While buckets are indeed pushed and popped +/// in LIFO order, individual tasks are stored in a bucket at the front of the +/// queue and this bucket is only moved to the back of the queue when full. +#[derive(Debug)] +pub(crate) struct Injector { + inner: Mutex>>, + is_empty: AtomicBool, +} + +impl Injector { + /// Creates an empty injector queue. + /// + /// # Panic + /// + /// Panics if the capacity is 0. + pub(crate) const fn new() -> Self { + assert!(BUCKET_CAPACITY >= 1); + + Self { + inner: Mutex::new(Vec::new()), + is_empty: AtomicBool::new(true), + } + } + + /// Inserts a task. + /// + /// The task is inserted in a bucket at the front of the queue. Once this + /// bucket is full, it is moved to the back of the queue. + pub(crate) fn insert_task(&self, task: T) { + let mut inner = self.inner.lock().unwrap(); + + // Try to push the task onto the first bucket if it has enough capacity left. + if let Some(bucket) = inner.first_mut() { + if let Err(task) = bucket.push(task) { + // The bucket is full: move it to the back of the vector and + // replace it with a newly created bucket that contains the + // task. + let mut new_bucket = Bucket::new(); + let _ = new_bucket.push(task); // this cannot fail provided the capacity is >=1 + + let full_bucket = mem::replace(bucket, new_bucket); + inner.push(full_bucket); + } + + return; + } + + // The queue is empty: create a new bucket. + let mut new_bucket = Bucket::new(); + let _ = new_bucket.push(task); // this cannot fail provided the capacity is >=1 + + inner.push(new_bucket); + + // Ordering: this flag is only used as a hint so Relaxed ordering is + // enough. + self.is_empty.store(false, Ordering::Relaxed); + } + + /// Appends a bucket to the back of the queue. + pub(crate) fn push_bucket(&self, bucket: Bucket) { + let mut inner = self.inner.lock().unwrap(); + + let was_empty = inner.is_empty(); + inner.push(bucket); + + // If the queue was empty before, update the flag. + if was_empty { + // Ordering: this flag is only used as a hint so Relaxed ordering is + // enough. + self.is_empty.store(false, Ordering::Relaxed); + } + } + + /// Takes the bucket at the back of the queue, if any. + /// + /// Note that this can spuriously return `None` even though the queue is + /// populated, unless a happens-before relationship exists between the + /// thread that populated the queue and the thread calling this method (this + /// is obviously the case if they are the same thread). + /// + /// This is not an issue in practice because it cannot lead to executor + /// deadlock. Indeed, if the last task/bucket was inserted by a worker + /// thread, this worker thread will always see that the injector queue is + /// populated (unless the bucket was already popped) so it will never exit + /// before all tasks in the injector are processed. Likewise, if the last + /// task/bucket was inserted by the main executor thread before + /// `Executor::run()` is called, the synchronization established when the + /// executor unparks worker threads ensures that the task is visible to all + /// unparked workers. + pub(crate) fn pop_bucket(&self) -> Option> { + // Ordering: this flag is only used as a hint so Relaxed ordering is + // enough. + if self.is_empty.load(Ordering::Relaxed) { + return None; + } + + let mut inner = self.inner.lock().unwrap(); + + let bucket = inner.pop(); + + if inner.is_empty() { + // Ordering: this flag is only used as a hint so Relaxed ordering is + // enough. + self.is_empty.store(true, Ordering::Relaxed); + } + + bucket + } + + /// Checks whether the queue is empty. + /// + /// Note that this can spuriously return `true` even though the queue is + /// populated, unless a happens-before relationship exists between the + /// thread that populated the queue and the thread calling this method (this + /// is obviously the case if they are the same thread). + pub(crate) fn is_empty(&self) -> bool { + self.is_empty.load(Ordering::Relaxed) + } +} + +/// A collection of tasks with a bounded size. +/// +/// This is just a very thin wrapper around a `Vec` that ensures that the +/// nominal capacity bound is never exceeded. +#[derive(Debug)] +pub(crate) struct Bucket(Vec); + +impl Bucket { + /// Creates a new bucket, allocating the full capacity upfront. + pub(crate) fn new() -> Self { + Self(Vec::with_capacity(CAPACITY)) + } + + /// Returns the bucket's nominal capacity. + pub(crate) const fn capacity() -> usize { + CAPACITY + } + + /// Appends one task if capacity allows; otherwise returns the task in the + /// error. + pub(crate) fn push(&mut self, task: T) -> Result<(), T> { + if self.0.len() < CAPACITY { + self.0.push(task); + Ok(()) + } else { + Err(task) + } + } +} + +impl IntoIterator for Bucket { + type Item = T; + type IntoIter = vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl FromIterator for Bucket { + fn from_iter>(iter: U) -> Self { + Self(Vec::from_iter(iter.into_iter().take(CAPACITY))) + } +} diff --git a/asynchronix/src/runtime/executor/pool.rs b/asynchronix/src/runtime/executor/pool.rs new file mode 100644 index 0000000..34dfb91 --- /dev/null +++ b/asynchronix/src/runtime/executor/pool.rs @@ -0,0 +1,423 @@ +use std::any::Any; +use std::sync::atomic::{self, AtomicBool, AtomicUsize, Ordering}; +use std::sync::Mutex; + +use super::find_bit; +use super::injector::Injector; +use super::rng; +use super::{GlobalQueue, Stealer}; + +#[derive(Debug)] +pub(crate) struct Pool { + pub(crate) global_queue: GlobalQueue, + pub(crate) executor_id: usize, + pub(crate) executor_unparker: parking::Unparker, + state: PoolRegistry, + stealers: Box<[Stealer]>, + worker_unparkers: Box<[parking::Unparker]>, + searching_workers: AtomicUsize, + terminate_signal: AtomicBool, + worker_panic: Mutex>>, +} + +impl Pool { + /// Creates a new pool. + pub(crate) fn new( + executor_id: usize, + executor_unparker: parking::Unparker, + shared_data: impl Iterator, + ) -> Self { + let (stealers, worker_unparkers): (Vec<_>, Vec<_>) = shared_data.into_iter().unzip(); + let worker_unparkers = worker_unparkers.into_boxed_slice(); + + Self { + global_queue: Injector::new(), + executor_id, + executor_unparker, + state: PoolRegistry::new(worker_unparkers.len()), + stealers: stealers.into_boxed_slice(), + worker_unparkers, + searching_workers: AtomicUsize::new(0), + terminate_signal: AtomicBool::new(false), + worker_panic: Mutex::new(None), + } + } + + /// Marks all pool workers as active. + /// + /// Unparking the worker threads is the responsibility of the caller. + pub(crate) fn set_all_workers_active(&self) { + self.state.set_all_active(); + } + + /// Marks the specified worker as active. + /// + /// Unparking the worker thread is the responsibility of the caller. + pub(crate) fn set_worker_active(&self, worker_id: usize) { + self.state.set_active(worker_id); + } + + /// Marks the specified worker as idle. + /// + /// Parking the worker thread is the responsibility of the caller. + /// + /// If this was the last active worker, the main executor thread is + /// unparked. + pub(crate) fn set_worker_inactive(&self, worker_id: usize) -> PoolState { + self.state.set_inactive(worker_id) + } + + /// Unparks an idle worker if any is found, or do nothing otherwise. + /// + /// For performance reasons, no synchronization is established if no worker + /// is found, meaning that workers in other threads may later transition to + /// idle state without observing the tasks scheduled by the caller to this + /// method. If this is not tolerable (for instance if this method is called + /// from a non-worker thread), use the more expensive `activate_worker`. + pub(crate) fn activate_worker_relaxed(&self) { + if let Some(worker_id) = self.state.set_one_active_relaxed() { + self.searching_workers.fetch_add(1, Ordering::Relaxed); + self.worker_unparkers[worker_id].unpark(); + } + } + + /// Unparks an idle worker if any is found, or ensure that at least the last + /// worker to transition to idle state will observe all tasks previously + /// scheduled by the caller to this method. + pub(crate) fn activate_worker(&self) { + if let Some(worker_id) = self.state.set_one_active() { + self.searching_workers.fetch_add(1, Ordering::Relaxed); + self.worker_unparkers[worker_id].unpark(); + } + } + + /// Check if the pool is idle, i.e. if no worker is currently active. + /// + /// If `true` is returned, it is guaranteed that all operations performed by + /// the now-inactive workers become visible in this thread. + pub(crate) fn is_idle(&self) -> bool { + self.state.pool_state() == PoolState::Idle + } + + /// Increments the count of workers actively searching for tasks. + pub(crate) fn begin_worker_search(&self) { + self.searching_workers.fetch_add(1, Ordering::Relaxed); + } + + /// Decrements the count of workers actively searching for tasks. + pub(crate) fn end_worker_search(&self) { + self.searching_workers.fetch_sub(1, Ordering::Relaxed); + } + + /// Returns the count of workers actively searching for tasks. + pub(crate) fn searching_worker_count(&self) -> usize { + self.searching_workers.load(Ordering::Relaxed) + } + + /// Triggers the termination signal and unparks all worker threads so they + /// can cleanly terminate. + pub(crate) fn trigger_termination(&self) { + self.terminate_signal.store(true, Ordering::Relaxed); + + self.state.set_all_active(); + for unparker in &*self.worker_unparkers { + unparker.unpark(); + } + } + + /// Returns true if the termination signal was triggered. + pub(crate) fn termination_is_triggered(&self) -> bool { + self.terminate_signal.load(Ordering::Relaxed) + } + + /// Registers a panic associated with the provided worker ID. + /// + /// If no panic is currently registered, the panic in argument is + /// registered. If a panic was already registered by a worker and was not + /// yet processed by the executor, then nothing is done. + pub(crate) fn register_panic(&self, panic: Box) { + let mut worker_panic = self.worker_panic.lock().unwrap(); + if worker_panic.is_none() { + *worker_panic = Some(panic); + } + } + + /// Takes a worker panic if any is registered. + pub(crate) fn take_panic(&self) -> Option> { + let mut worker_panic = self.worker_panic.lock().unwrap(); + worker_panic.take() + } + + /// Returns an iterator yielding the stealers associated with all active + /// workers, starting from a randomly selected active worker. The worker + /// which ID is provided in argument (if any) is excluded from the pool of + /// candidates. + pub(crate) fn shuffled_stealers<'a>( + &'a self, + excluded_worker_id: Option, + rng: &'_ rng::Rng, + ) -> ShuffledStealers<'a> { + // All active workers except the specified one are candidate for stealing. + let mut candidates = self.state.get_active(); + if let Some(excluded_worker_id) = excluded_worker_id { + candidates &= !(1 << excluded_worker_id); + } + + ShuffledStealers::new(candidates, &self.stealers, rng) + } +} + +pub(crate) struct ShuffledStealers<'a> { + stealers: &'a [Stealer], + // A bit-rotated bit field of the remaining candidate workers to steal from. + // If set, the LSB represents the next candidate. + candidates: usize, + next_candidate: usize, // index of the next candidate +} +impl<'a> ShuffledStealers<'a> { + fn new(candidates: usize, stealers: &'a [Stealer], rng: &'_ rng::Rng) -> Self { + let (candidates, next_candidate) = if candidates == 0 { + (0, 0) + } else { + let next_candidate = find_bit::find_bit(candidates, |count| { + rng.gen_bounded(count as u64) as usize + 1 + }); + + // Right-rotate the candidates so that the bit corresponding to the + // randomly selected worker becomes the LSB. + let candidate_count = stealers.len(); + let lower_mask = (1 << next_candidate) - 1; + let lower_bits = candidates & lower_mask; + let candidates = + (candidates >> next_candidate) | (lower_bits << (candidate_count - next_candidate)); + + (candidates, next_candidate) + }; + + Self { + stealers, + candidates, + next_candidate, + } + } +} + +impl<'a> Iterator for ShuffledStealers<'a> { + type Item = &'a Stealer; + + fn next(&mut self) -> Option { + if self.candidates == 0 { + return None; + } + + // Clear the bit corresponding to the current candidate worker. + self.candidates &= !1; + + let current_candidate = self.next_candidate; + + if self.candidates != 0 { + // Locate the next candidate worker and make it the LSB. + let shift = self.candidates.trailing_zeros(); + self.candidates >>= shift; + + // Update the next candidate. + self.next_candidate += shift as usize; + if self.next_candidate >= self.stealers.len() { + self.next_candidate -= self.stealers.len(); + } + } + + Some(&self.stealers[current_candidate]) + } +} + +/// Registry of active/idle worker threads. +/// +/// The registry only supports up to `usize::BITS` threads. +#[derive(Debug)] +struct PoolRegistry { + active_workers: AtomicUsize, + pool_size: usize, + #[cfg(feature = "dev-logs")] + record: Record, +} +impl PoolRegistry { + /// Creates a new pool registry. + /// + /// #Panic + /// + /// This will panic if the specified pool size is zero or is more than + /// `usize::BITS`. + fn new(pool_size: usize) -> Self { + assert!( + pool_size >= 1, + "the executor pool size should be at least one" + ); + assert!( + pool_size <= usize::BITS as usize, + "the executor pool size should be at most {}", + usize::BITS + ); + + Self { + active_workers: AtomicUsize::new(0), + pool_size, + #[cfg(feature = "dev-logs")] + record: Record::new(pool_size), + } + } + /// Returns the state of the pool. + /// + /// This operation has Acquire semantic, which guarantees that if the pool + /// state returned is `PoolState::Idle`, then all operations performed by + /// the now-inactive workers are visible. + fn pool_state(&self) -> PoolState { + // Ordering: this Acquire operation synchronizes with all Release + // RMWs in the `set_inactive` method via a release sequence. + let active_workers = self.active_workers.load(Ordering::Acquire); + if active_workers == 0 { + PoolState::Idle + } else { + PoolState::Busy + } + } + + /// Marks the specified worker as inactive. + /// + /// The specified worker must currently be marked as active. Returns + /// `PoolState::Idle` if this was the last active thread. + /// + /// If this is the last active worker (i.e. `PoolState::Idle` is returned), + /// then it is guaranteed that all operations performed by the now-inactive + /// workers and by unsuccessful callers to `set_one_active` are now visible. + fn set_inactive(&self, worker_id: usize) -> PoolState { + // Ordering: this Release operation synchronizes with the Acquire + // fence in the below conditional when the pool becomes idle, and/or + // with the Acquire state load in the `pool_state` method. + let active_workers = self + .active_workers + .fetch_and(!(1 << worker_id), Ordering::Release); + + if active_workers & !(1 << worker_id) == 0 { + // Ordering: this Acquire fence synchronizes with all Release + // RMWs in this and in the previous calls to `set_inactive` via a + // release sequence. + atomic::fence(Ordering::Acquire); + PoolState::Idle + } else { + PoolState::Busy + } + } + + /// Marks the specified worker as active. + fn set_active(&self, worker_id: usize) { + self.active_workers + .fetch_or(1 << worker_id, Ordering::Relaxed); + } + + /// Marks all workers as active. + fn set_all_active(&self) { + // Mark all workers as busy. + self.active_workers.store( + !0 >> (usize::BITS - self.pool_size as u32), + Ordering::Relaxed, + ); + } + + /// Marks a worker as active if any is found, otherwise do nothing. + /// + /// The worker ID is returned if successful. + fn set_one_active_relaxed(&self) -> Option { + let mut active_workers = self.active_workers.load(Ordering::Relaxed); + loop { + let first_idle_worker = active_workers.trailing_ones() as usize; + if first_idle_worker >= self.pool_size { + return None; + }; + active_workers = self + .active_workers + .fetch_or(1 << first_idle_worker, Ordering::Relaxed); + if active_workers & (1 << first_idle_worker) == 0 { + #[cfg(feature = "dev-logs")] + self.record.increment(first_idle_worker); + return Some(first_idle_worker); + } + } + } + + /// Marks a worker as active if any is found, otherwise ensure that all + /// memory operations made by the caller prior to this call are visible by + /// the last worker transitioning to idle state. + /// + /// The worker ID is returned if successful. + fn set_one_active(&self) -> Option { + let mut active_workers = self.active_workers.load(Ordering::Relaxed); + loop { + let first_idle_worker = active_workers.trailing_ones() as usize; + + if first_idle_worker >= self.pool_size { + // There is apparently no free worker, so a dummy RMW with + // Release ordering is performed with the sole purpose of + // synchronizing with the Acquire fence in `set_inactive` so + // that the last worker to transition to idle can see the tasks + // that were queued prior to this call. + let new_active_workers = self.active_workers.fetch_or(0, Ordering::Release); + if new_active_workers == active_workers { + return None; + } + active_workers = new_active_workers; + } else { + active_workers = self + .active_workers + .fetch_or(1 << first_idle_worker, Ordering::Relaxed); + if active_workers & (1 << first_idle_worker) == 0 { + #[cfg(feature = "dev-logs")] + self.record.increment(first_idle_worker); + return Some(first_idle_worker); + } + } + } + } + + /// Returns a bit field that indicates all active workers. + fn get_active(&self) -> usize { + self.active_workers.load(Ordering::Relaxed) + } +} + +#[derive(PartialEq)] +pub(crate) enum PoolState { + Idle, + Busy, +} + +#[cfg(feature = "dev-logs")] +impl Drop for PoolRegistry { + fn drop(&mut self) { + println!("Thread launch count: {:?}", self.record.get()); + } +} + +#[cfg(feature = "dev-logs")] +#[derive(Debug)] +struct Record { + stats: Vec, +} + +#[cfg(feature = "dev-logs")] +impl Record { + fn new(worker_count: usize) -> Self { + let mut stats = Vec::new(); + stats.resize_with(worker_count, Default::default); + Self { stats } + } + fn increment(&self, worker_id: usize) { + self.stats[worker_id].fetch_add(1, Ordering::Relaxed); + } + fn get(&self) -> Vec { + self.stats + .iter() + .map(|s| s.load(Ordering::Relaxed)) + .collect() + } +} diff --git a/asynchronix/src/runtime/executor/queue.rs b/asynchronix/src/runtime/executor/queue.rs new file mode 100644 index 0000000..6089e9a --- /dev/null +++ b/asynchronix/src/runtime/executor/queue.rs @@ -0,0 +1,586 @@ +use std::fmt; +use std::iter::FusedIterator; +use std::marker::PhantomData; +use std::mem::{drop, MaybeUninit}; +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release}; +use std::sync::Arc; + +use cache_padded::CachePadded; + +use crate::loom_exports::cell::UnsafeCell; +use crate::loom_exports::sync::atomic::{AtomicU32, AtomicU64}; +use crate::loom_exports::{debug_or_loom_assert, debug_or_loom_assert_eq}; + +pub(crate) use buffers::*; + +mod buffers; +#[cfg(test)] +mod tests; + +/// A double-ended FIFO work-stealing queue. +/// +/// The general operation of the queue is based on tokio's worker queue, itself +/// based on the Go scheduler's worker queue. +/// +/// The queue tracks its tail and head position within a ring buffer with +/// wrap-around integers, where the least significant bits specify the actual +/// buffer index. All positions have bit widths that are intentionally larger +/// than necessary for buffer indexing because: +/// - an extra bit is needed to disambiguate between empty and full buffers when +/// the start and end position of the buffer are equal, +/// - the worker head is also used as long-cycle counter to mitigate the risk of +/// ABA. +/// +#[derive(Debug)] +struct Queue> { + /// Positions of the head as seen by the worker (most significant bits) and + /// as seen by a stealer (least significant bits). + heads: CachePadded, + + /// Position of the tail. + tail: CachePadded, + + /// Queue items. + buffer: Box, + + /// Make the type !Send and !Sync by default. + _phantom: PhantomData>, +} + +impl> Queue { + /// Read an item at the given position. + /// + /// The position is automatically mapped to a valid buffer index using a + /// modulo operation. + /// + /// # Safety + /// + /// The item at the given position must have been initialized before and + /// cannot have been moved out. + /// + /// The caller must guarantee that the item at this position cannot be + /// written to or moved out concurrently. + #[inline] + unsafe fn read_at(&self, position: u32) -> T { + let index = (position & B::MASK) as usize; + (*self.buffer).as_ref()[index].with(|slot| slot.read().assume_init()) + } + + /// Write an item at the given position. + /// + /// The position is automatically mapped to a valid buffer index using a + /// modulo operation. + /// + /// # Note + /// + /// If an item is already initialized but was not moved out yet, it will be + /// leaked. + /// + /// # Safety + /// + /// The caller must guarantee that the item at this position cannot be read + /// or written to concurrently. + #[inline] + unsafe fn write_at(&self, position: u32, item: T) { + let index = (position & B::MASK) as usize; + (*self.buffer).as_ref()[index].with_mut(|slot| slot.write(MaybeUninit::new(item))); + } + + /// Attempt to book `N` items for stealing where `N` is specified by a + /// closure which takes as argument the total count of available items. + /// + /// In case of success, the returned tuple contains the stealer head and an + /// item count at least equal to 1, in this order. + /// + /// # Errors + /// + /// An error is returned in the following cases: + /// 1) no item could be stolen, either because the queue is empty or because + /// `N` is 0, + /// 2) a concurrent stealing operation is ongoing. + /// + /// # Safety + /// + /// This function is not strictly unsafe, but because it initiates the + /// stealing operation by modifying the post-stealing head in + /// `push_count_and_head` without ever updating the `head` atomic variable, + /// its misuse can result in permanently blocking subsequent stealing + /// operations. + fn book_items(&self, mut count_fn: C, max_count: u32) -> Result<(u32, u32), StealError> + where + C: FnMut(usize) -> usize, + { + let mut heads = self.heads.load(Acquire); + + loop { + let (worker_head, stealer_head) = unpack_heads(heads); + + // Bail out if both heads differ because it means another stealing + // operation is concurrently ongoing. + if stealer_head != worker_head { + return Err(StealError::Busy); + } + + let tail = self.tail.load(Acquire); + let item_count = tail.wrapping_sub(worker_head); + + // `item_count` is tested now because `count_fn` may expect + // `item_count>0`. + if item_count == 0 { + return Err(StealError::Empty); + } + + // Unwind safety: it is OK if `count_fn` panics because no state has + // been modified yet. + let count = + (count_fn(item_count as usize).min(max_count as usize) as u32).min(item_count); + + // The special case `count_fn() == 0` must be tested specifically, + // because if the compare-exchange succeeds with `count=0`, the new + // worker head will be the same as the old one so other stealers + // will not detect that stealing is currently ongoing and may try to + // actually steal items and concurrently modify the position of the + // heads. + if count == 0 { + return Err(StealError::Empty); + } + + // Move the worker head only. + let new_heads = pack_heads(worker_head.wrapping_add(count), stealer_head); + + // Attempt to book the slots. Only one stealer can succeed since + // once this atomic is changed, the other thread will necessarily + // observe a mismatch between the two heads. + match self + .heads + .compare_exchange_weak(heads, new_heads, Acquire, Acquire) + { + Ok(_) => return Ok((stealer_head, count)), + // We lost the race to a concurrent pop or steal operation, or + // the CAS failed spuriously; try again. + Err(h) => heads = h, + } + } + } +} + +impl> Drop for Queue { + fn drop(&mut self) { + let worker_head = unpack_heads(self.heads.load(Relaxed)).0; + let tail = self.tail.load(Relaxed); + + let count = tail.wrapping_sub(worker_head); + + for offset in 0..count { + drop(unsafe { self.read_at(worker_head.wrapping_add(offset)) }) + } + } +} + +/// Handle for single-threaded FIFO push and pop operations. +#[derive(Debug)] +pub(crate) struct Worker> { + queue: Arc>, +} + +impl> Worker { + /// Creates a new queue and returns a `Worker` handle. + pub(crate) fn new() -> Self { + let queue = Arc::new(Queue { + heads: CachePadded::new(AtomicU64::new(0)), + tail: CachePadded::new(AtomicU32::new(0)), + buffer: B::allocate(), + _phantom: PhantomData, + }); + + Worker { queue } + } + + /// Creates a new `Stealer` handle associated to this `Worker`. + /// + /// An arbitrary number of `Stealer` handles can be created, either using + /// this method or cloning an existing `Stealer` handle. + pub(crate) fn stealer(&self) -> Stealer { + Stealer { + queue: self.queue.clone(), + } + } + + /// Returns the number of items that can be successfully pushed onto the + /// queue. + /// + /// Note that that the spare capacity may be underestimated due to + /// concurrent stealing operations. + pub(crate) fn spare_capacity(&self) -> usize { + let capacity = >::CAPACITY; + let stealer_head = unpack_heads(self.queue.heads.load(Relaxed)).1; + let tail = self.queue.tail.load(Relaxed); + + // Aggregate count of available items (those which can be popped) and of + // items currently being stolen. + let len = tail.wrapping_sub(stealer_head); + + (capacity - len) as usize + } + + /// Attempts to push one item at the tail of the queue. + /// + /// # Errors + /// + /// This will fail if the queue is full, in which case the item is returned + /// as the error field. + pub(crate) fn push(&self, item: T) -> Result<(), T> { + let stealer_head = unpack_heads(self.queue.heads.load(Acquire)).1; + let tail = self.queue.tail.load(Relaxed); + + // Check that the buffer is not full. + if tail.wrapping_sub(stealer_head) >= B::CAPACITY { + return Err(item); + } + + // Store the item. + unsafe { self.queue.write_at(tail, item) }; + + // Make the item visible by moving the tail. + // + // Ordering: the Release ordering ensures that the subsequent + // acquisition of this atomic by a stealer will make the previous write + // visible. + self.queue.tail.store(tail.wrapping_add(1), Release); + + Ok(()) + } + + /// Attempts to push the content of an iterator at the tail of the queue. + /// + /// It is the responsibility of the caller to ensure that there is enough + /// spare capacity to accommodate all iterator items, for instance by + /// calling `[Worker::spare_capacity]` beforehand. Otherwise, the iterator + /// is dropped while still holding the items in excess. + pub(crate) fn extend>(&self, iter: I) { + let stealer_head = unpack_heads(self.queue.heads.load(Acquire)).1; + let mut tail = self.queue.tail.load(Relaxed); + + let max_tail = stealer_head.wrapping_add(B::CAPACITY); + for item in iter { + // Check whether the buffer is full. + if tail == max_tail { + break; + } + // Store the item. + unsafe { self.queue.write_at(tail, item) }; + tail = tail.wrapping_add(1); + } + + // Make the items visible by incrementing the push count. + // + // Ordering: the Release ordering ensures that the subsequent + // acquisition of this atomic by a stealer will make the previous write + // visible. + self.queue.tail.store(tail, Release); + } + + /// Attempts to pop one item from the head of the queue. + /// + /// This returns None if the queue is empty. + pub(crate) fn pop(&self) -> Option { + let mut heads = self.queue.heads.load(Acquire); + + let prev_worker_head = loop { + let (worker_head, stealer_head) = unpack_heads(heads); + let tail = self.queue.tail.load(Relaxed); + + // Check if the queue is empty. + if tail == worker_head { + return None; + } + + // Move the worker head. The weird cast from `bool` to `u32` is to + // steer the compiler towards branchless code. + let next_heads = pack_heads( + worker_head.wrapping_add(1), + stealer_head.wrapping_add((stealer_head == worker_head) as u32), + ); + + // Attempt to book the items. + let res = self + .queue + .heads + .compare_exchange_weak(heads, next_heads, AcqRel, Acquire); + + match res { + Ok(_) => break worker_head, + // We lost the race to a stealer or the CAS failed spuriously; try again. + Err(h) => heads = h, + } + }; + + unsafe { Some(self.queue.read_at(prev_worker_head)) } + } + + /// Returns an iterator that steals items from the head of the queue. + /// + /// The returned iterator steals up to `N` items, where `N` is specified by + /// a closure which takes as argument the total count of items available for + /// stealing. Upon success, the number of items ultimately stolen can be + /// from 1 to `N`, depending on the number of available items. + /// + /// # Beware + /// + /// All items stolen by the iterator should be moved out as soon as + /// possible, because until then or until the iterator is dropped, all + /// concurrent stealing operations will fail with [`StealError::Busy`]. + /// + /// # Leaking + /// + /// If the iterator is leaked before all stolen items have been moved out, + /// subsequent stealing operations will permanently fail with + /// [`StealError::Busy`]. + /// + /// # Errors + /// + /// An error is returned in the following cases: + /// 1) no item was stolen, either because the queue is empty or `N` is 0, + /// 2) a concurrent stealing operation is ongoing. + pub(crate) fn drain(&self, count_fn: C) -> Result, StealError> + where + C: FnMut(usize) -> usize, + { + let (head, count) = self.queue.book_items(count_fn, u32::MAX)?; + + Ok(Drain { + queue: &self.queue, + head, + from_head: head, + to_head: head.wrapping_add(count), + }) + } +} + +impl> Default for Worker { + fn default() -> Self { + Self::new() + } +} + +impl> UnwindSafe for Worker {} +impl> RefUnwindSafe for Worker {} +unsafe impl> Send for Worker {} + +/// A draining iterator for [`Worker`]. +/// +/// This iterator is created by [`Worker::drain`]. See its documentation for +/// more. +#[derive(Debug)] +pub(crate) struct Drain<'a, T, B: Buffer> { + queue: &'a Queue, + head: u32, + from_head: u32, + to_head: u32, +} + +impl<'a, T, B: Buffer> Iterator for Drain<'a, T, B> { + type Item = T; + + fn next(&mut self) -> Option { + if self.head == self.to_head { + return None; + } + + let item = Some(unsafe { self.queue.read_at(self.head) }); + + self.head = self.head.wrapping_add(1); + + // We cannot rely on the caller to call `next` again after the last item + // is yielded so the heads must be updated immediately when yielding the + // last item. + if self.head == self.to_head { + // Signal that the stealing operation has completed. + let mut heads = self.queue.heads.load(Relaxed); + loop { + let (worker_head, stealer_head) = unpack_heads(heads); + + debug_or_loom_assert_eq!(stealer_head, self.from_head); + + let res = self.queue.heads.compare_exchange_weak( + heads, + pack_heads(worker_head, worker_head), + AcqRel, + Acquire, + ); + + match res { + Ok(_) => break, + Err(h) => { + heads = h; + } + } + } + } + + item + } + + fn size_hint(&self) -> (usize, Option) { + let sz = self.to_head.wrapping_sub(self.head) as usize; + + (sz, Some(sz)) + } +} + +impl<'a, T, B: Buffer> ExactSizeIterator for Drain<'a, T, B> {} + +impl<'a, T, B: Buffer> FusedIterator for Drain<'a, T, B> {} + +impl<'a, T, B: Buffer> Drop for Drain<'a, T, B> { + fn drop(&mut self) { + // Drop all items and make sure the head is updated so that subsequent + // stealing operations can succeed. + for _item in self {} + } +} + +impl<'a, T, B: Buffer> UnwindSafe for Drain<'a, T, B> {} +impl<'a, T, B: Buffer> RefUnwindSafe for Drain<'a, T, B> {} +unsafe impl<'a, T: Send, B: Buffer> Send for Drain<'a, T, B> {} +unsafe impl<'a, T: Send, B: Buffer> Sync for Drain<'a, T, B> {} + +/// Handle for multi-threaded stealing operations. +#[derive(Debug)] +pub(crate) struct Stealer> { + queue: Arc>, +} + +impl> Stealer { + /// Attempts to steal items from the head of the queue, returning one of + /// them directly and moving the others to the tail of another queue. + /// + /// Up to `N` items are stolen (including the one returned directly), where + /// `N` is specified by a closure which takes as argument the total count of + /// items available for stealing. Upon success, one item is returned and + /// from 0 to `N-1` items are moved to the destination queue, depending on + /// the number of available items and the capacity of the destination queue. + /// + /// The returned item is the most recent one among the stolen items. + /// + /// # Errors + /// + /// An error is returned in the following cases: + /// 1) no item was stolen, either because the queue is empty or `N` is 0, + /// 2) a concurrent stealing operation is ongoing. + /// + /// Failure to transfer any item to the destination queue is not considered + /// an error as long as one element could be returned directly. This can + /// occur if the destination queue is full, if the source queue has only one + /// item or if `N` is 1. + pub(crate) fn steal_and_pop( + &self, + dest: &Worker, + count_fn: C, + ) -> Result + where + C: FnMut(usize) -> usize, + BDest: Buffer, + { + // Compute the free capacity of the destination queue. + // + // Ordering: see `Worker::push()` method. + let dest_tail = dest.queue.tail.load(Relaxed); + let dest_stealer_head = unpack_heads(dest.queue.heads.load(Acquire)).1; + let dest_free_capacity = BDest::CAPACITY - dest_tail.wrapping_sub(dest_stealer_head); + + debug_or_loom_assert!(dest_free_capacity <= BDest::CAPACITY); + + let (stealer_head, count) = self.queue.book_items(count_fn, dest_free_capacity + 1)?; + let transfer_count = count - 1; + + debug_or_loom_assert!(transfer_count <= dest_free_capacity); + + // Move all items but the last to the destination queue. + for offset in 0..transfer_count { + unsafe { + let item = self.queue.read_at(stealer_head.wrapping_add(offset)); + dest.queue.write_at(dest_tail.wrapping_add(offset), item); + } + } + + // Read the last item. + let last_item = unsafe { + self.queue + .read_at(stealer_head.wrapping_add(transfer_count)) + }; + + // Make the moved items visible by updating the destination tail position. + // + // Ordering: see comments in the `push()` method. + dest.queue + .tail + .store(dest_tail.wrapping_add(transfer_count), Release); + + // Signal that the stealing operation has completed. + let mut heads = self.queue.heads.load(Relaxed); + loop { + let (worker_head, sh) = unpack_heads(heads); + + debug_or_loom_assert_eq!(stealer_head, sh); + + let res = self.queue.heads.compare_exchange_weak( + heads, + pack_heads(worker_head, worker_head), + AcqRel, + Acquire, + ); + + match res { + Ok(_) => return Ok(last_item), + Err(h) => { + heads = h; + } + } + } + } +} + +impl> Clone for Stealer { + fn clone(&self) -> Self { + Stealer { + queue: self.queue.clone(), + } + } +} + +impl> UnwindSafe for Stealer {} +impl> RefUnwindSafe for Stealer {} +unsafe impl> Send for Stealer {} +unsafe impl> Sync for Stealer {} + +/// Error returned when stealing is unsuccessful. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum StealError { + /// No item was stolen. + Empty, + /// Another concurrent stealing operation is ongoing. + Busy, +} + +impl fmt::Display for StealError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + StealError::Empty => write!(f, "cannot steal from empty queue"), + StealError::Busy => write!(f, "a concurrent steal operation is ongoing"), + } + } +} + +#[inline(always)] +/// Extract the worker head and stealer head (in this order) from packed heads. +fn unpack_heads(heads: u64) -> (u32, u32) { + ((heads >> u32::BITS) as u32, heads as u32) +} + +#[inline(always)] +/// Insert a new stealer head into packed heads. +fn pack_heads(worker_head: u32, stealer_head: u32) -> u64 { + ((worker_head as u64) << u32::BITS) | stealer_head as u64 +} diff --git a/asynchronix/src/runtime/executor/queue/buffers.rs b/asynchronix/src/runtime/executor/queue/buffers.rs new file mode 100644 index 0000000..bf787ff --- /dev/null +++ b/asynchronix/src/runtime/executor/queue/buffers.rs @@ -0,0 +1,100 @@ +//! Internal queue buffers of various sizes. + +use std::fmt::Debug; +use std::mem::MaybeUninit; + +use crate::loom_exports::cell::UnsafeCell; + +/// Marker trait for fixed-size buffers. +pub(crate) trait Buffer: private::Sealed { + /// Buffer size. + const CAPACITY: u32; + + #[doc(hidden)] + /// Buffer index bit mask. + const MASK: u32; + + #[doc(hidden)] + /// Buffer data type. + type Data: AsRef<[UnsafeCell>]> + Debug; + + #[doc(hidden)] + /// Returns an uninitialized buffer. + fn allocate() -> Box; +} + +macro_rules! make_buffer { + ($b:ident, $cap:expr) => { + #[doc = concat!("Marker type for buffers of capacity ", $cap, ".")] + #[derive(Copy, Clone, Debug)] + pub(crate) struct $b {} + + impl private::Sealed for $b {} + + impl Buffer for $b { + const CAPACITY: u32 = $cap; + + #[doc(hidden)] + const MASK: u32 = $cap - 1; + + #[doc(hidden)] + type Data = [UnsafeCell>; $cap]; + + #[doc(hidden)] + #[cfg(not(asynchronix_loom))] + fn allocate() -> Box { + // Safety: initializing an array of `MaybeUninit` items with + // `assume_init()` is valid, as per the `MaybeUninit` documentation. + // Admittedly the situation is slightly different here: the buffer is + // made of `MaybeUninit` elements wrapped in `UnsafeCell`s; however, the + // latter is a `repr(transparent)` type with a trivial constructor, so + // this should not make any difference. + Box::new(unsafe { MaybeUninit::uninit().assume_init() }) + } + #[doc(hidden)] + #[cfg(asynchronix_loom)] + fn allocate() -> Box { + // Loom's `UnsafeCell` is not `repr(transparent)` and does not + // have a trivial constructor so initialization must be done + // element-wise. + fn make_fixed_size(buffer: Box<[T]>) -> Box<[T; $cap]> { + assert_eq!(buffer.len(), $cap); + + // Safety: The length was checked. + unsafe { Box::from_raw(Box::into_raw(buffer).cast()) } + } + + let mut buffer = Vec::with_capacity($cap); + for _ in 0..$cap { + buffer.push(UnsafeCell::new(MaybeUninit::uninit())); + } + + make_fixed_size(buffer.into_boxed_slice()) + } + } + }; +} + +// Define buffer capacities up to 2^15, which is the maximum that can be +// supported with 16-bit wide buffer positions (1 bit is required for +// disambiguation between full and empty buffer). +make_buffer!(B2, 2); +make_buffer!(B4, 4); +make_buffer!(B8, 8); +make_buffer!(B16, 16); +make_buffer!(B32, 32); +make_buffer!(B64, 64); +make_buffer!(B128, 128); +make_buffer!(B256, 256); +make_buffer!(B512, 512); +make_buffer!(B1024, 1024); +make_buffer!(B2048, 2048); +make_buffer!(B4096, 4096); +make_buffer!(B8192, 8192); +make_buffer!(B16384, 12384); +make_buffer!(B32768, 32768); + +/// Prevent public implementation of Buffer. +mod private { + pub(crate) trait Sealed {} +} diff --git a/asynchronix/src/runtime/executor/queue/tests.rs b/asynchronix/src/runtime/executor/queue/tests.rs new file mode 100644 index 0000000..1ba7a21 --- /dev/null +++ b/asynchronix/src/runtime/executor/queue/tests.rs @@ -0,0 +1,7 @@ +use super::*; + +#[cfg(not(asynchronix_loom))] +mod general; + +#[cfg(asynchronix_loom)] +mod loom; diff --git a/asynchronix/src/runtime/executor/queue/tests/general.rs b/asynchronix/src/runtime/executor/queue/tests/general.rs new file mode 100644 index 0000000..99ba144 --- /dev/null +++ b/asynchronix/src/runtime/executor/queue/tests/general.rs @@ -0,0 +1,240 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::thread::spawn; + +use super::*; + +// Rotate the internal ring buffer indices by `n`. +fn rotate>(worker: &Worker, n: usize) { + let stealer = worker.stealer(); + let dummy_worker = Worker::::new(); + + for _ in 0..n { + worker.push(T::default()).unwrap(); + stealer.steal_and_pop(&dummy_worker, |_| 1).unwrap(); + } +} + +#[test] +fn queue_single_threaded_steal() { + let rotations: &[_] = if cfg!(miri) { + &[42] + } else { + &[0, 255, 256, 257, 65535, 65536, 65537] + }; + for &rotation in rotations { + let worker1 = Worker::<_, B128>::new(); + let worker2 = Worker::<_, B128>::new(); + let stealer1 = worker1.stealer(); + rotate(&worker1, rotation); + rotate(&worker2, rotation); + + worker1.push(1).unwrap(); + worker1.push(2).unwrap(); + worker1.push(3).unwrap(); + worker1.push(4).unwrap(); + + assert_eq!(worker1.pop(), Some(1)); + assert_eq!(stealer1.steal_and_pop(&worker2, |_| 2), Ok(3)); + assert_eq!(worker1.pop(), Some(4)); + assert_eq!(worker1.pop(), None); + assert_eq!(worker2.pop(), Some(2)); + assert_eq!(worker2.pop(), None); + } +} + +#[test] +fn queue_self_steal() { + let rotations: &[_] = if cfg!(miri) { + &[42] + } else { + &[0, 255, 256, 257, 65535, 65536, 65537] + }; + for &rotation in rotations { + let worker = Worker::<_, B128>::new(); + rotate(&worker, rotation); + let stealer = worker.stealer(); + + worker.push(1).unwrap(); + worker.push(2).unwrap(); + worker.push(3).unwrap(); + worker.push(4).unwrap(); + + assert_eq!(worker.pop(), Some(1)); + assert_eq!(stealer.steal_and_pop(&worker, |_| 2), Ok(3)); + assert_eq!(worker.pop(), Some(4)); + assert_eq!(worker.pop(), Some(2)); + assert_eq!(worker.pop(), None); + } +} + +#[test] +fn queue_drain_steal() { + let rotations: &[_] = if cfg!(miri) { + &[42] + } else { + &[0, 255, 256, 257, 65535, 65536, 65537] + }; + for &rotation in rotations { + let worker = Worker::<_, B128>::new(); + let dummy_worker = Worker::<_, B128>::new(); + let stealer = worker.stealer(); + rotate(&worker, rotation); + + worker.push(1).unwrap(); + worker.push(2).unwrap(); + worker.push(3).unwrap(); + worker.push(4).unwrap(); + + assert_eq!(worker.pop(), Some(1)); + let mut iter = worker.drain(|n| n - 1).unwrap(); + assert_eq!( + stealer.steal_and_pop(&dummy_worker, |_| 1), + Err(StealError::Busy) + ); + assert_eq!(iter.next(), Some(2)); + assert_eq!( + stealer.steal_and_pop(&dummy_worker, |_| 1), + Err(StealError::Busy) + ); + assert_eq!(iter.next(), Some(3)); + assert_eq!(stealer.steal_and_pop(&dummy_worker, |_| 1), Ok(4)); + assert_eq!(iter.next(), None); + } +} + +#[test] +fn queue_extend_basic() { + let rotations: &[_] = if cfg!(miri) { + &[42] + } else { + &[0, 255, 256, 257, 65535, 65536, 65537] + }; + for &rotation in rotations { + let worker = Worker::<_, B128>::new(); + rotate(&worker, rotation); + + let initial_capacity = worker.spare_capacity(); + worker.push(1).unwrap(); + worker.push(2).unwrap(); + worker.extend([3, 4]); + + assert_eq!(worker.spare_capacity(), initial_capacity - 4); + assert_eq!(worker.pop(), Some(1)); + assert_eq!(worker.pop(), Some(2)); + assert_eq!(worker.pop(), Some(3)); + assert_eq!(worker.pop(), Some(4)); + assert_eq!(worker.pop(), None); + } +} + +#[test] +fn queue_extend_overflow() { + let rotations: &[_] = if cfg!(miri) { + &[42] + } else { + &[0, 255, 256, 257, 65535, 65536, 65537] + }; + for &rotation in rotations { + let worker = Worker::<_, B128>::new(); + rotate(&worker, rotation); + + let initial_capacity = worker.spare_capacity(); + worker.push(1).unwrap(); + worker.push(2).unwrap(); + worker.extend(3..); // try to append infinitely many integers + + assert_eq!(worker.spare_capacity(), 0); + for i in 1..=initial_capacity { + assert_eq!(worker.pop(), Some(i)); + } + assert_eq!(worker.pop(), None); + } +} + +#[test] +fn queue_multi_threaded_steal() { + use crate::runtime::executor::rng::Rng; + + const N: usize = if cfg!(miri) { 50 } else { 1_000_000 }; + + let counter = Arc::new(AtomicUsize::new(0)); + let worker = Worker::<_, B128>::new(); + let stealer = worker.stealer(); + + let counter0 = counter.clone(); + let stealer1 = stealer.clone(); + let counter1 = counter.clone(); + let stealer = stealer; + let counter2 = counter; + + // Worker thread. + // + // Push all numbers from 0 to N, popping one from time to time. + let t0 = spawn(move || { + let mut i = 0; + let rng = Rng::new(0); + let mut stats = vec![0; N]; + 'outer: loop { + for _ in 0..(rng.gen_bounded(10) + 1) { + while let Err(_) = worker.push(i) {} + i += 1; + if i == N { + break 'outer; + } + } + if let Some(j) = worker.pop() { + stats[j] += 1; + counter0.fetch_add(1, Ordering::Relaxed); + } + } + + stats + }); + + // Stealer threads. + // + // Repeatedly steal a random number of items. + fn steal_periodically( + stealer: Stealer, + counter: Arc, + rng_seed: u64, + ) -> Vec { + let mut stats = vec![0; N]; + let rng = Rng::new(rng_seed); + let dest_worker = Worker::<_, B128>::new(); + + loop { + if let Ok(i) = + stealer.steal_and_pop(&dest_worker, |m| rng.gen_bounded(m as u64 + 1) as usize) + { + stats[i] += 1; // the popped item + counter.fetch_add(1, Ordering::Relaxed); + while let Some(j) = dest_worker.pop() { + stats[j] += 1; + counter.fetch_add(1, Ordering::Relaxed); + } + } + let count = counter.load(Ordering::Relaxed); + if count == N { + break; + } + assert!(count < N); + } + + stats + } + let t1 = spawn(move || steal_periodically(stealer1, counter1, 1)); + let t2 = spawn(move || steal_periodically(stealer, counter2, 2)); + let mut stats = Vec::new(); + stats.push(t0.join().unwrap()); + stats.push(t1.join().unwrap()); + stats.push(t2.join().unwrap()); + for i in 0..N { + let mut count = 0; + for j in 0..stats.len() { + count += stats[j][i]; + } + assert_eq!(count, 1); + } +} diff --git a/asynchronix/src/runtime/executor/queue/tests/loom.rs b/asynchronix/src/runtime/executor/queue/tests/loom.rs new file mode 100644 index 0000000..ca979df --- /dev/null +++ b/asynchronix/src/runtime/executor/queue/tests/loom.rs @@ -0,0 +1,323 @@ +use super::*; + +use ::loom::model::Builder; +use ::loom::thread; + +// Test adapted from the Tokio test suite. +#[test] +fn loom_queue_basic_steal() { + const DEFAULT_PREEMPTION_BOUND: usize = 3; + const LOOP_COUNT: usize = 2; + const ITEM_COUNT_PER_LOOP: usize = 3; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(|| { + let worker = Worker::::new(); + let stealer = worker.stealer(); + + let th = thread::spawn(move || { + let dest_worker = Worker::::new(); + let mut n = 0; + + for _ in 0..3 { + if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() { + n += 1; + while dest_worker.pop().is_some() { + n += 1; + } + } + } + + n + }); + + let mut n = 0; + + for _ in 0..LOOP_COUNT { + for _ in 0..(ITEM_COUNT_PER_LOOP - 1) { + if worker.push(42).is_err() { + n += 1; + } + } + + if worker.pop().is_some() { + n += 1; + } + + // Push another task + if worker.push(42).is_err() { + n += 1; + } + + while worker.pop().is_some() { + n += 1; + } + } + + n += th.join().unwrap(); + + assert_eq!(ITEM_COUNT_PER_LOOP * LOOP_COUNT, n); + }); +} + +// Test adapted from the Tokio test suite. +#[test] +fn loom_queue_drain_overflow() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + const ITEM_COUNT: usize = 7; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(|| { + let worker = Worker::::new(); + let stealer = worker.stealer(); + + let th = thread::spawn(move || { + let dest_worker = Worker::::new(); + let mut n = 0; + + if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() { + n += 1; + while dest_worker.pop().is_some() { + n += 1; + } + } + + n + }); + + let mut n = 0; + + // Push an item, pop an item. + worker.push(42).unwrap(); + + if worker.pop().is_some() { + n += 1; + } + + for _ in 0..(ITEM_COUNT - 1) { + if worker.push(42).is_err() { + // Spin until some of the old items can be drained to make room + // for the new item. + loop { + if let Ok(drain) = worker.drain(|n| n - n / 2) { + for _ in drain { + n += 1; + } + assert_eq!(worker.push(42), Ok(())); + break; + } + thread::yield_now(); + } + } + } + + n += th.join().unwrap(); + + while worker.pop().is_some() { + n += 1; + } + + assert_eq!(ITEM_COUNT, n); + }); +} + +// Test adapted from the Tokio test suite. +#[test] +fn loom_queue_multi_stealer() { + const DEFAULT_PREEMPTION_BOUND: usize = 3; + const ITEM_COUNT: usize = 5; + + fn steal_half(stealer: Stealer) -> usize { + let dest_worker = Worker::::new(); + + if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() { + let mut n = 1; + while dest_worker.pop().is_some() { + n += 1; + } + + n + } else { + 0 + } + } + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(|| { + let worker = Worker::::new(); + let stealer1 = worker.stealer(); + let stealer2 = worker.stealer(); + + let th1 = thread::spawn(move || steal_half(stealer1)); + let th2 = thread::spawn(move || steal_half(stealer2)); + + let mut n = 0; + for _ in 0..ITEM_COUNT { + if worker.push(42).is_err() { + n += 1; + } + } + + while worker.pop().is_some() { + n += 1; + } + + n += th1.join().unwrap(); + n += th2.join().unwrap(); + + assert_eq!(ITEM_COUNT, n); + }); +} + +// Test adapted from the Tokio test suite. +#[test] +fn loom_queue_chained_steal() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(|| { + let w1 = Worker::::new(); + let w2 = Worker::::new(); + let s1 = w1.stealer(); + let s2 = w2.stealer(); + + for _ in 0..4 { + w1.push(42).unwrap(); + w2.push(42).unwrap(); + } + + let th = thread::spawn(move || { + let dest_worker = Worker::::new(); + let _ = s1.steal_and_pop(&dest_worker, |n| n - n / 2); + + while dest_worker.pop().is_some() {} + }); + + while w1.pop().is_some() {} + + let _ = s2.steal_and_pop(&w1, |n| n - n / 2); + + th.join().unwrap(); + + while w1.pop().is_some() {} + while w2.pop().is_some() {} + }); +} + +// A variant of multi-stealer with concurrent push. +#[test] +fn loom_queue_push_and_steal() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + fn steal_half(stealer: Stealer) -> usize { + let dest_worker = Worker::::new(); + + if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() { + let mut n = 1; + while dest_worker.pop().is_some() { + n += 1; + } + + n + } else { + 0 + } + } + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(|| { + let worker = Worker::::new(); + let stealer1 = worker.stealer(); + let stealer2 = worker.stealer(); + + let th1 = thread::spawn(move || steal_half(stealer1)); + let th2 = thread::spawn(move || steal_half(stealer2)); + + worker.push(42).unwrap(); + worker.push(42).unwrap(); + + let mut n = 0; + while worker.pop().is_some() { + n += 1; + } + + n += th1.join().unwrap(); + n += th2.join().unwrap(); + + assert_eq!(n, 2); + }); +} + +// Attempts extending the queue based on `Worker::free_capacity`. +#[test] +fn loom_queue_extend() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + fn steal_half(stealer: Stealer) -> usize { + let dest_worker = Worker::::new(); + + if stealer.steal_and_pop(&dest_worker, |n| n - n / 2).is_ok() { + let mut n = 1; + while dest_worker.pop().is_some() { + n += 1; + } + + n + } else { + 0 + } + } + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(|| { + let worker = Worker::::new(); + let stealer1 = worker.stealer(); + let stealer2 = worker.stealer(); + + let th1 = thread::spawn(move || steal_half(stealer1)); + let th2 = thread::spawn(move || steal_half(stealer2)); + + worker.push(1).unwrap(); + worker.push(7).unwrap(); + + // Try to fill up the queue. + let spare_capacity = worker.spare_capacity(); + assert!(spare_capacity >= 2); + worker.extend(0..spare_capacity); + + let mut n = 0; + + n += th1.join().unwrap(); + n += th2.join().unwrap(); + + while worker.pop().is_some() { + n += 1; + } + + assert_eq!(2 + spare_capacity, n); + }); +} diff --git a/asynchronix/src/runtime/executor/rng.rs b/asynchronix/src/runtime/executor/rng.rs new file mode 100644 index 0000000..6a3604d --- /dev/null +++ b/asynchronix/src/runtime/executor/rng.rs @@ -0,0 +1,72 @@ +use std::cell::Cell; + +/// A pseudo-random number generator based on Wang Yi's Wyrand. +/// +/// See: https://github.com/wangyi-fudan/wyhash +#[derive(Clone, Debug)] +pub(crate) struct Rng { + seed: Cell, +} + +impl Rng { + /// Creates a new RNG with the provided seed. + pub(crate) fn new(seed: u64) -> Self { + Self { + seed: Cell::new(seed), + } + } + + /// Generates a pseudo-random number within the range `0..2⁶⁴`. + pub(crate) fn gen(&self) -> u64 { + let seed = self.seed.get().wrapping_add(0xA0761D6478BD642F); + self.seed.set(seed); + let t = seed as u128 * (seed ^ 0xE7037ED1A0B428DB) as u128; + (t as u64) ^ (t >> 64) as u64 + } + + /// Generates a pseudo-random number within the range `0..upper_bound`. + /// + /// This generator is biased as it uses the fast (but crude) multiply-shift + /// method. The bias is negligible, however, as long as the bound is much + /// smaller than 2⁶⁴. + pub(crate) fn gen_bounded(&self, upper_bound: u64) -> u64 { + ((self.gen() as u128 * upper_bound as u128) >> 64) as u64 + } +} + +#[cfg(all(test, not(asynchronix_loom), not(miri)))] +mod tests { + use super::*; + + #[test] + fn rng_gen_bounded_chi2() { + const RNG_SEED: u64 = 12345; + const DICE_ROLLS: u64 = 1_000_000; + const DICE_FACES: u64 = 6; // beware: modify the p-values if you change this. + const CHI2_PVAL_LOWER: f64 = 0.210; // critical chi2 for lower p-value = 0.001 and DoF = DICE_FACES - 1 + const CHI2_PVAL_UPPER: f64 = 20.515; // critical chi2 for upper p-value = 0.999 and DoF = DICE_FACES - 1. + + let rng = Rng::new(RNG_SEED); + + let mut tally = [0u64; 6]; + + for _ in 0..DICE_ROLLS { + let face = rng.gen_bounded(DICE_FACES); + tally[face as usize] += 1; + } + + let expected = DICE_ROLLS as f64 / DICE_FACES as f64; + + let chi2 = (0..DICE_FACES).fold(0f64, |chi2, face| { + let actual = tally[face as usize] as f64; + + chi2 + (actual - expected) * (actual - expected) / expected + }); + + println!("tally = {:?}", tally); + println!("chi2 = {}", chi2); + + assert!(chi2 > CHI2_PVAL_LOWER); + assert!(chi2 < CHI2_PVAL_UPPER); + } +} diff --git a/asynchronix/src/runtime/executor/task.rs b/asynchronix/src/runtime/executor/task.rs new file mode 100644 index 0000000..8a08e76 --- /dev/null +++ b/asynchronix/src/runtime/executor/task.rs @@ -0,0 +1,398 @@ +extern crate alloc; + +use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; +use std::future::Future; +use std::mem::{self, ManuallyDrop}; +use std::task::{RawWaker, RawWakerVTable}; + +use crate::loom_exports::cell::UnsafeCell; +use crate::loom_exports::sync::atomic::{self, AtomicU64, Ordering}; + +mod cancel_token; +mod promise; +mod runnable; +mod util; + +#[cfg(test)] +mod tests; + +pub(crate) use cancel_token::CancelToken; +pub(crate) use promise::{Promise, Stage}; +pub(crate) use runnable::Runnable; + +use self::util::{runnable_exists, RunOnDrop}; + +/// Flag indicating that the future has not been polled to completion yet. +const POLLING: u64 = 1 << 0; +/// Flag indicating that the task has been cancelled or that the output has +/// already been moved out. +const CLOSED: u64 = 1 << 1; +/// A single reference count increment. +const REF_INC: u64 = 1 << 2; +/// A single wake count increment. +const WAKE_INC: u64 = 1 << 33; +/// Reference count mask. +const REF_MASK: u64 = !(REF_INC - 1) & (WAKE_INC - 1); +/// Wake count mask. +const WAKE_MASK: u64 = !(WAKE_INC - 1); +/// Critical value of the reference count at which preventive measures must be +/// enacted to prevent counter overflow. +const REF_CRITICAL: u64 = (REF_MASK / 2) & REF_MASK; +/// Critical value of the wake count at which preventive measures must be +/// enacted to prevent counter overflow. +const WAKE_CRITICAL: u64 = (WAKE_MASK / 2) & WAKE_MASK; + +/// Either a future, its output, or uninitialized (empty). +union TaskCore { + /// Field present during the `Polling` and the `Wind-down` phases. + future: ManuallyDrop, + + /// Field present during the `Completed` phase. + output: ManuallyDrop, +} + +/// A task. +/// +/// A task contains both the scheduling function and the future to be polled (or +/// its output if available). `Waker`, `Runnable`, `Promise` and `CancelToken` +/// are all type-erased (fat) pointers to a `Task`. The task is automatically +/// deallocated when all the formers have been dropped. +/// +/// The lifetime of a task involves up to 4 phases: +/// - `Polling` phase: the future needs to be polled, +/// - `Completed` phase: the future has been polled to completion and its output +/// is available, +/// - `Wind-down` phase: the task has been cancelled while it was already +/// scheduled for processing, so the future had to be kept temporarily alive +/// to avoid a race; the `Closed` phase will be entered only when the +/// scheduled task is processed, +/// - `Closed` phase: neither the future nor its output are available, either +/// because the task has been cancelled or because the output has been moved +/// out. +/// +/// It is possible to move from `Polling` to `Completed`, `Wind-down` or +/// `Closed`, but the only possible transition from `Wind-down` and from +/// `Completed` is to `Closed`. +/// +/// The different states and sub-states and their corresponding flags are +/// summarized below: +/// +/// | Phase | CLOSED | POLLING | WAKE_COUNT | Runnable exists? | +/// |---------------------|--------|---------|------------|------------------| +/// | Polling (idle) | 0 | 1 | 0 | No | +/// | Polling (scheduled) | 0 | 1 | ≠0 | Yes | +/// | Completed | 0 | 0 | any | No | +/// | Wind-down | 1 | 1 | any | Yes | +/// | Closed | 1 | 0 | any | No | +/// +/// A `Runnable` is a reference to a task that has been scheduled. There can be +/// at most one `Runnable` at any given time. +/// +/// `WAKE_COUNT` is a counter incremented each time the task is awaken and reset +/// each time the `Runnable` has finished polling the task. The waker that +/// increments the wake count from 0 to 1 is responsible for creating and +/// scheduling a new `Runnable`. +/// +/// The state includes as well a reference count `REF_COUNT` that accounts for +/// the `Promise`, the `CancelToken` and all `Waker`s. The `Runnable` is _not_ +/// included in `REF_COUNT` because its existence can be inferred from `CLOSED`, +/// `POLLING` and `WAKE_COUNT` (see table above). +struct Task { + /// State of the task. + /// + /// The state has the following layout, where bit 0 is the LSB and bit 63 is + /// the MSB: + /// + /// | 33-63 | 2-32 | 1 | 0 | + /// |------------|-----------|--------|---------| + /// | WAKE_COUNT | REF_COUNT | CLOSED | POLLING | + state: AtomicU64, + + /// The future, its output, or nothing. + core: UnsafeCell>, + + /// The task scheduling function. + schedule_fn: S, + + /// An arbitrary `Clone` tag that is passed to the scheduling function. + tag: T, +} + +impl Task +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + const RAW_WAKER_VTABLE: RawWakerVTable = RawWakerVTable::new( + Self::clone_waker, + Self::wake_by_val, + Self::wake_by_ref, + Self::drop_waker, + ); + + /// Clones a waker. + unsafe fn clone_waker(ptr: *const ()) -> RawWaker { + let this = &*(ptr as *const Self); + + let ref_count = this.state.fetch_add(REF_INC, Ordering::Relaxed) & REF_MASK; + if ref_count > REF_CRITICAL { + panic!("Attack of the clones: the waker was cloned too many times"); + } + + RawWaker::new(ptr, &Self::RAW_WAKER_VTABLE) + } + + /// Wakes the task by value. + unsafe fn wake_by_val(ptr: *const ()) { + // Verify that the scheduling function does not capture any variable. + // + // It is always possible for the `Runnable` scheduled in the call to + // `wake` to be called and complete its execution before the scheduling + // call returns. For efficiency reasons, the reference count is + // preemptively decremented, which implies that the `Runnable` could + // prematurely drop and deallocate this task. By making sure that the + // schedule function is zero-sized, we ensure that premature + // deallocation is safe since the scheduling function does not access + // any allocated data. + if mem::size_of::() != 0 { + // Note: a static assert is not possible as `S` is defined in the + // outer scope. + Self::drop_waker(ptr); + panic!("Scheduling functions with captured variables are not supported"); + } + + // Wake the task, decreasing at the same time the reference count. + let state = Self::wake(ptr, WAKE_INC - REF_INC); + + // Deallocate the task if this waker is the last reference to the task, + // meaning that the reference count was 1 and the `POLLING` flag was + // cleared. Note that if the `POLLING` flag was set then a `Runnable` + // must exist. + + if state & (REF_MASK | POLLING) == REF_INC { + // Ensure that the newest state of the task output (if any) is + // visible before it is dropped. + // + // Ordering: Acquire ordering is necessary to synchronize with the + // Release ordering in all previous reference count decrements + // and/or in the wake count reset (the latter is equivalent to a + // reference count decrement for a `Runnable`). + atomic::fence(Ordering::Acquire); + + let this = &*(ptr as *const Self); + + // Set a drop guard to ensure that the task is deallocated whether + // or not `output` panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + dealloc(ptr as *mut u8, Layout::new::()); + }); + + if state & CLOSED == 0 { + // Since the `CLOSED` and `POLLING` flags are both cleared, the + // output is present and must be dropped. + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output)); + } + // Else the `CLOSED` flag is set and the `POLLING` flag is cleared + // so the task is already in the `Closed` phase. + } + } + + /// Wakes the task by reference. + unsafe fn wake_by_ref(ptr: *const ()) { + // Wake the task. + Self::wake(ptr, WAKE_INC); + } + + /// Wakes the task, either by value or by reference. + #[inline(always)] + unsafe fn wake(ptr: *const (), state_delta: u64) -> u64 { + let this = &*(ptr as *const Self); + + // Increment the wake count and, if woken by value, decrement the + // reference count at the same time. + // + // Ordering: Release ordering is necessary to synchronize with either + // the Acquire load or with the RMW in `Runnable::run`, which ensures + // that all memory operations performed by the user before the call to + // `wake` will be visible when the future is polled. Note that there is + // no need to use AcqRel ordering to synchronize with all calls to + // `wake` that precede the call to `Runnable::run`. This is because, + // according to the C++ memory model, an RMW takes part in a Release + // sequence irrespective of its ordering. The below RMW also happens to + // takes part in another Release sequence: it allows the Acquire-Release + // RMW that zeroes the wake count in the previous call to + // `Runnable::run` to synchronizes with the initial Acquire load of the + // state in the next call `Runnable::run` (or the Acquire fence in + // `Runnable::cancel`), thus ensuring that the next `Runnable` sees the + // newest state of the future. + let state = this.state.fetch_add(state_delta, Ordering::Release); + + if state & WAKE_MASK > WAKE_CRITICAL { + panic!("The task was woken too many times: {:0x}", state); + } + + // Schedule the task if it is in the `Polling` phase but is not + // scheduled yet. + if state & (WAKE_MASK | CLOSED | POLLING) == POLLING { + // Safety: calling `new_unchecked` is safe since: there is no other + // `Runnable` running (the wake count was 0, the `POLLING` flag was + // set, the `CLOSED` flag was cleared); the wake count is now 1; the + // `POLLING` flag is set; the `CLOSED` flag is cleared; the task + // contains a live future. + + let runnable = Runnable::new_unchecked(ptr as *const Self); + (this.schedule_fn)(runnable, this.tag.clone()); + } + + state + } + + /// Drops a waker. + unsafe fn drop_waker(ptr: *const ()) { + let this = &*(ptr as *const Self); + + // Ordering: Release ordering is necessary to synchronize with the + // Acquire fence in the drop handler of the last reference to the task + // and to make sure that all previous operations on the `core` member + // are visible when it is dropped. + let state = this.state.fetch_sub(REF_INC, Ordering::Release); + + // Deallocate the task if this waker was the last reference to the task. + if state & REF_MASK == REF_INC && !runnable_exists(state) { + // Ensure that the newest state of the `core` member is visible + // before it is dropped. + // + // Ordering: Acquire ordering is necessary to synchronize with the + // Release ordering in all previous reference count decrements + // and/or in the wake count reset (the latter is equivalent to a + // reference count decrement for a `Runnable`). + atomic::fence(Ordering::Acquire); + + // Set a drop guard to ensure that the task is deallocated whether + // or not the `core` member panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + dealloc(ptr as *mut u8, Layout::new::()); + }); + + if state & POLLING == POLLING { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future)); + } else if state & CLOSED == 0 { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output)); + } + // Else the `CLOSED` flag is set but the `POLLING` flag is cleared + // so the future was already dropped. + } + } +} + +/// Spawns a task. +/// +/// An arbitrary tag can be attached to the task, a clone of which will be +/// passed to the scheduling function each time it is called. + +/// The returned `Runnable` must be scheduled by the user. +pub(crate) fn spawn( + future: F, + schedule_fn: S, + tag: T, +) -> (Promise, Runnable, CancelToken) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + // Create a task with preemptively incremented reference and wake counts to + // account for the returned `Promise`, `CancelToken` and `Runnable` (a + // non-zero wake count with the `POLLING` flag set indicates that there is a + // live `Runnable`). + let task = Task { + state: AtomicU64::new((2 * REF_INC) | WAKE_INC | POLLING), + core: UnsafeCell::new(TaskCore { + future: ManuallyDrop::new(future), + }), + schedule_fn, + tag, + }; + + // Pin the task with its future to the heap. + unsafe { + let layout = Layout::new::>(); + let ptr = alloc(layout) as *mut Task; + if ptr.is_null() { + handle_alloc_error(layout); + } + *ptr = task; + + // Safety: this is safe since the task was allocated with the global + // allocator, there is no other `Runnable` running since the task was + // just created, the wake count is 1, the `POLLING` flag is set, the + // `CLOSED` flag is cleared and `core` contains a future. + let runnable = Runnable::new_unchecked(ptr); + + // Safety: this is safe since the task was allocated with the global + // allocator and the reference count is 2. + let promise = Promise::new_unchecked(ptr); + let cancel_token = CancelToken::new_unchecked(ptr); + + (promise, runnable, cancel_token) + } +} + +/// Spawns a task which output will never be retrieved. +/// +/// This is mostly useful to avoid undue reference counting for futures that +/// return a `()` type. +/// +/// An arbitrary tag can be attached to the task, a clone of which will be +/// passed to the scheduling function each time it is called. +/// +/// The returned `Runnable` must be scheduled by the user. +pub(crate) fn spawn_and_forget( + future: F, + schedule_fn: S, + tag: T, +) -> (Runnable, CancelToken) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + // Create a task with preemptively incremented reference and wake counts to + // account for the returned `CancelToken` and `Runnable` (a non-zero wake + // count with the `POLLING` flag set indicates that there is a live + // `Runnable`). + let task = Task { + state: AtomicU64::new(REF_INC | WAKE_INC | POLLING), + core: UnsafeCell::new(TaskCore { + future: ManuallyDrop::new(future), + }), + schedule_fn, + tag, + }; + + // Pin the task with its future to the heap. + unsafe { + let layout = Layout::new::>(); + let ptr = alloc(layout) as *mut Task; + if ptr.is_null() { + handle_alloc_error(layout); + } + *ptr = task; + + // Safety: this is safe since the task was allocated with the global + // allocator, there is no other `Runnable` running since the task was + // just created, the wake count is 1, the `POLLING` flag is set, the + // `CLOSED` flag is cleared and `core` contains a future. + let runnable = Runnable::new_unchecked(ptr); + + // Safety: this is safe since the task was allocated with the global + // allocator and the reference count is 1. + let cancel_token = CancelToken::new_unchecked(ptr); + + (runnable, cancel_token) + } +} diff --git a/asynchronix/src/runtime/executor/task/cancel_token.rs b/asynchronix/src/runtime/executor/task/cancel_token.rs new file mode 100644 index 0000000..1454b87 --- /dev/null +++ b/asynchronix/src/runtime/executor/task/cancel_token.rs @@ -0,0 +1,220 @@ +extern crate alloc; + +use std::alloc::{dealloc, Layout}; +use std::future::Future; +use std::mem::ManuallyDrop; +use std::panic::{RefUnwindSafe, UnwindSafe}; + +use crate::loom_exports::sync::atomic::{self, Ordering}; + +use super::runnable::Runnable; +use super::util::{runnable_exists, RunOnDrop}; +use super::Task; +use super::{CLOSED, POLLING, REF_INC, REF_MASK}; + +/// Virtual table for a `CancelToken`. +#[derive(Debug)] +struct VTable { + cancel: unsafe fn(*const ()), + drop: unsafe fn(*const ()), +} + +/// Cancels a pending task. +/// +/// If the task is completed, nothing is done. If the task is not completed +/// but not currently scheduled (no `Runnable` exist) then the future is +/// dropped immediately. Otherwise, the future will be dropped at a later +/// time by the scheduled `Runnable` once it runs. +unsafe fn cancel(ptr: *const ()) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + let this = &*(ptr as *const Task); + + // Enter the `Closed` or `Wind-down` phase if the tasks is not + // completed. + // + // Ordering: Acquire ordering is necessary to synchronize with any + // operation that modified or dropped the future or output. This ensures + // that the future or output can be safely dropped or that the task can + // be safely deallocated if necessary. The Release ordering synchronizes + // with any of the Acquire atomic fences and ensure that this atomic + // access is fully completed upon deallocation. + let state = this + .state + .fetch_update(Ordering::AcqRel, Ordering::Relaxed, |s| { + if s & POLLING == 0 { + // The task has completed or is closed so there is no need + // to drop the future or output and the reference count can + // be decremented right away. + Some(s - REF_INC) + } else if runnable_exists(s) { + // A `Runnable` exists so the future cannot be dropped (this + // will be done by the `Runnable`) and the reference count + // can be decremented right away. + Some((s | CLOSED) - REF_INC) + } else { + // The future or the output needs to be dropped so the + // reference count cannot be decremented just yet, otherwise + // another reference could deallocate the task before the + // drop is complete. + Some((s | CLOSED) & !POLLING) + } + }) + .unwrap(); + + if runnable_exists(state) { + // The task is in the `Wind-down` phase so the cancellation is now + // the responsibility of the current `Runnable`. + return; + } + + if state & POLLING == 0 { + // Deallocate the task if this was the last reference. + if state & REF_MASK == REF_INC { + // Ensure that all atomic accesses to the state are visible. + // + // Ordering: this Acquire fence synchronizes with all Release + // operations that decrement the number of references to the + // task. + atomic::fence(Ordering::Acquire); + + // Set a drop guard to ensure that the task is deallocated, + // whether or not the output panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + dealloc(ptr as *mut u8, Layout::new::>()); + }); + + // Drop the output if any. + if state & CLOSED == 0 { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output)); + } + } + + return; + } + + // Set a drop guard to ensure that reference count is decremented and + // the task is deallocated if this is the last reference, whether or not + // the future panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + // Ordering: Release ordering is necessary to ensure that the drop + // of the future or output is visible when the last reference + // deallocates the task. + let state = this.state.fetch_sub(REF_INC, Ordering::Release); + if state & REF_MASK == REF_INC { + // Ensure that all atomic accesses to the state are visible. + // + // Ordering: this Acquire fence synchronizes with all Release + // operations that decrement the number of references to the + // task. + atomic::fence(Ordering::Acquire); + + dealloc(ptr as *mut u8, Layout::new::>()); + } + }); + + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future)); +} + +/// Drops the token without cancelling the task. +unsafe fn drop(ptr: *const ()) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + let this = &*(ptr as *const Task); + + // Decrement the reference count. + // + // Ordering: the Release ordering synchronizes with any of the Acquire + // atomic fences and ensure that this atomic access is fully completed + // upon deallocation. + let state = this.state.fetch_sub(REF_INC, Ordering::Release); + + // Deallocate the task if this token was the last reference to the task. + if state & REF_MASK == REF_INC && !runnable_exists(state) { + // Ensure that the newest state of the future or output is visible + // before it is dropped. + // + // Ordering: this Acquire fence synchronizes with all Release + // operations that decrement the number of references to the task. + atomic::fence(Ordering::Acquire); + + // Set a drop guard to ensure that the task is deallocated whether + // or not the future or output panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + dealloc(ptr as *mut u8, Layout::new::>()); + }); + + if state & POLLING == POLLING { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future)); + } else if state & CLOSED == 0 { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output)); + } + // Else the `CLOSED` flag is set but the `POLLING` flag is cleared + // so the future was already dropped. + } +} + +/// A token that can be used to cancel a task. +#[derive(Debug)] +pub(crate) struct CancelToken { + task: *const (), + vtable: &'static VTable, +} + +impl CancelToken { + /// Creates a `CancelToken`. + /// + /// Safety: this is safe provided that: + /// + /// - the task pointer points to a live task allocated with the global + /// allocator, + /// - the reference count has been incremented to account for this new task + /// reference. + pub(super) unsafe fn new_unchecked(task: *const Task) -> Self + where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, + { + Self { + task: task as *const (), + vtable: &VTable { + cancel: cancel::, + drop: drop::, + }, + } + } + + /// Cancels the task. + /// + /// If the task is completed, nothing is done. If the task is not completed + /// but not currently scheduled (no `Runnable` exist) then the future is + /// dropped immediately. Otherwise, the future will be dropped at a later + /// time by the scheduled `Runnable` once it runs. + pub(crate) fn cancel(self) { + // Prevent the drop handler from being called, as it would call + // `drop_token` on the inner field. + let this = ManuallyDrop::new(self); + + unsafe { (this.vtable.cancel)(this.task) } + } +} + +impl Drop for CancelToken { + fn drop(&mut self) { + unsafe { (self.vtable.drop)(self.task) } + } +} + +unsafe impl Send for CancelToken {} +impl UnwindSafe for CancelToken {} +impl RefUnwindSafe for CancelToken {} diff --git a/asynchronix/src/runtime/executor/task/promise.rs b/asynchronix/src/runtime/executor/task/promise.rs new file mode 100644 index 0000000..c72e87c --- /dev/null +++ b/asynchronix/src/runtime/executor/task/promise.rs @@ -0,0 +1,198 @@ +extern crate alloc; + +use std::alloc::{dealloc, Layout}; +use std::future::Future; +use std::mem::ManuallyDrop; +use std::panic::{RefUnwindSafe, UnwindSafe}; + +use crate::loom_exports::sync::atomic::{self, Ordering}; + +use super::runnable::Runnable; +use super::util::{runnable_exists, RunOnDrop}; +use super::Task; +use super::{CLOSED, POLLING, REF_INC, REF_MASK}; + +/// Virtual table for a `Promise`. +#[derive(Debug)] +struct VTable { + poll: unsafe fn(*const ()) -> Stage, + drop: unsafe fn(*const ()), +} + +/// Retrieves the output of the task if ready. +unsafe fn poll(ptr: *const ()) -> Stage +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + let this = &*(ptr as *const Task); + + // Set the `CLOSED` flag if the task is in the `Completed` phase. + // + // Ordering: Acquire ordering is necessary to synchronize with the + // operation that modified or dropped the future or output. This ensures + // that the newest state of the output is visible before it is moved + // out, or that the future can be safely dropped when the promised is + // dropped if the promise is the last reference to the task. + let state = this + .state + .fetch_update(Ordering::Acquire, Ordering::Relaxed, |s| { + if s & (POLLING | CLOSED) == 0 { + Some(s | CLOSED) + } else { + None + } + }); + + if let Err(s) = state { + if s & CLOSED == CLOSED { + // The task is either in the `Wind-down` or `Closed` phase. + return Stage::Cancelled; + } else { + // The task is in the `Polling` phase. + return Stage::Pending; + }; + } + + let output = this.core.with_mut(|c| ManuallyDrop::take(&mut (*c).output)); + + Stage::Ready(output) +} + +/// Drops the promise. +unsafe fn drop(ptr: *const ()) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + let this = &*(ptr as *const Task); + + // Decrement the reference count. + // + // Ordering: Release ordering is necessary to ensure that if the output + // was moved out by using `poll`, then the move has completed when the + // last reference deallocates the task. + let state = this.state.fetch_sub(REF_INC, Ordering::Release); + + // Deallocate the task if this token was the last reference to the task. + if state & REF_MASK == REF_INC && !runnable_exists(state) { + // Ensure that the newest state of the future or output is visible + // before it is dropped. + // + // Ordering: Acquire ordering is necessary to synchronize with the + // Release ordering in all previous reference count decrements + // and/or in the wake count reset (the latter is equivalent to a + // reference count decrement for a `Runnable`). + atomic::fence(Ordering::Acquire); + + // Set a drop guard to ensure that the task is deallocated whether + // or not the `core` member panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + dealloc(ptr as *mut u8, Layout::new::>()); + }); + + if state & POLLING == POLLING { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future)); + } else if state & CLOSED == 0 { + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output)); + } + // Else the `CLOSED` flag is set but the `POLLING` flag is cleared + // so the future was already dropped. + } +} + +/// The stage of progress of a promise. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub(crate) enum Stage { + /// The task has completed. + Ready(T), + /// The task is still being processed. + Pending, + /// The task has been cancelled. + Cancelled, +} + +impl Stage { + /// Maps a `Stage` to `Stage` by applying a function to a contained value. + pub(crate) fn map(self, f: F) -> Stage + where + F: FnOnce(U) -> V, + { + match self { + Stage::Ready(t) => Stage::Ready(f(t)), + Stage::Pending => Stage::Pending, + Stage::Cancelled => Stage::Cancelled, + } + } + + /// Returns `true` if the promise is a [`Stage::Ready`] value. + #[inline] + pub(crate) fn is_ready(&self) -> bool { + matches!(*self, Stage::Ready(_)) + } + + /// Returns `true` if the promise is a [`Stage::Pending`] value. + #[inline] + pub(crate) fn is_pending(&self) -> bool { + matches!(*self, Stage::Pending) + } + + /// Returns `true` if the promise is a [`Stage::Cancelled`] value. + #[inline] + pub(crate) fn is_cancelled(&self) -> bool { + matches!(*self, Stage::Cancelled) + } +} + +/// A promise that can poll a task's output of type `T`. +/// +/// Note that dropping a promise does not cancel the task. +#[derive(Debug)] +pub(crate) struct Promise { + task: *const (), + vtable: &'static VTable, +} + +impl Promise { + /// Creates a `Promise`. + /// + /// Safety: this is safe provided that: + /// + /// - the task pointer points to a live task allocated with the global + /// allocator, + /// - the reference count has been incremented to account for this new task + /// reference. + pub(super) unsafe fn new_unchecked(task: *const Task) -> Self + where + F: Future + Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, + { + Self { + task: task as *const (), + vtable: &VTable:: { + poll: poll::, + drop: drop::, + }, + } + } + + /// Retrieves the output of the task if ready. + pub(crate) fn poll(&self) -> Stage { + unsafe { (self.vtable.poll)(self.task) } + } +} + +impl Drop for Promise { + fn drop(&mut self) { + unsafe { (self.vtable.drop)(self.task) } + } +} + +unsafe impl Send for Promise {} +impl UnwindSafe for Promise {} +impl RefUnwindSafe for Promise {} diff --git a/asynchronix/src/runtime/executor/task/runnable.rs b/asynchronix/src/runtime/executor/task/runnable.rs new file mode 100644 index 0000000..993b79a --- /dev/null +++ b/asynchronix/src/runtime/executor/task/runnable.rs @@ -0,0 +1,320 @@ +extern crate alloc; + +use std::alloc::{dealloc, Layout}; +use std::future::Future; +use std::mem::{self, ManuallyDrop}; +use std::panic::{RefUnwindSafe, UnwindSafe}; +use std::pin::Pin; +use std::task::{Context, Poll, RawWaker, Waker}; + +use crate::loom_exports::debug_or_loom_assert; +use crate::loom_exports::sync::atomic::{self, Ordering}; + +use super::util::RunOnDrop; +use super::Task; +use super::{CLOSED, POLLING, REF_MASK, WAKE_MASK}; + +/// Virtual table for a `Runnable`. +#[derive(Debug)] +struct VTable { + run: unsafe fn(*const ()), + cancel: unsafe fn(*const ()), +} + +/// Polls the inner future. +unsafe fn run(ptr: *const ()) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + let this = &*(ptr as *const Task); + + // A this point, the task cannot be in the `Completed` phase, otherwise + // it would not have been scheduled in the first place. It could, + // however, have been cancelled and transitioned from `Polling` to + // `Wind-down` after it was already scheduled. It is possible that in + // such case the `CLOSED` flag may not be visible when loading the + // state, but this is not a problem: when a task is cancelled while + // already scheduled (i.e. while the wake count is non-zero), its future + // is kept alive so even if the state loaded is stale, the worse that + // can happen is that the future will be unnecessarily polled. + // + // It is worth mentioning that, in order to detect if the task was + // awaken while polled, other executors reset a notification flag with + // an RMW when entering `run`. The idea here is to avoid such RMW and + // instead load a wake count. Only once the task has been polled, an RMW + // checks the wake count again to detect if the task was notified in the + // meantime. This method may be slightly more prone to spurious false + // positives but is much faster (1 vs 2 RMWs) and still prevent the + // occurrence of lost wake-ups. + + // Load the state. + // + // Ordering: the below Acquire load synchronizes with the Release + // operation at the end of the call to `run` by the previous `Runnable` + // and ensures that the new state of the future stored by the previous + // call to `run` is visible. This synchronization exists because the RMW + // in the call to `Task::wake` or `Task::wake_by_ref` that scheduled + // this `Runnable` establishes a Release sequence. This load also + // synchronizes with the Release operation in `wake` and ensures that + // all memory operations performed by their callers are visible. Since + // this is a simple load, it may be stale and some wake requests may not + // be visible yet, but the post-polling RMW will later check if all wake + // requests were serviced. + let mut state = this.state.load(Ordering::Acquire); + let mut wake_count = state & WAKE_MASK; + + debug_or_loom_assert!(state & POLLING == POLLING); + + loop { + // Drop the future if the phase has transitioned to `Wind-down`. + if state & CLOSED == CLOSED { + cancel::(ptr); + + return; + } + + // Poll the task. + let raw_waker = RawWaker::new(ptr, &Task::::RAW_WAKER_VTABLE); + let waker = ManuallyDrop::new(Waker::from_raw(raw_waker)); + + let cx = &mut Context::from_waker(&waker); + let fut = Pin::new_unchecked(this.core.with_mut(|c| &mut *(*c).future)); + + // Set a panic guard to cancel the task if the future panics when + // polled. + let panic_guard = RunOnDrop::new(|| cancel::(ptr)); + + let poll_state = fut.poll(cx); + mem::forget(panic_guard); + + if let Poll::Ready(output) = poll_state { + // Set a panic guard to close the task if the future or the + // output panic when dropped. + let panic_guard = RunOnDrop::new(|| { + // Clear the `POLLING` flag while setting the `CLOSED` flag + // to enter the `Closed` phase. + // + // Ordering: Release ordering on success is necessary to + // ensure that all memory operations on the future or the + // output are visible when the last reference deallocates + // the task. + let state = this + .state + .fetch_update(Ordering::Release, Ordering::Relaxed, |s| { + Some((s | CLOSED) & !POLLING) + }) + .unwrap(); + + // Deallocate if there are no more references to the task. + if state & REF_MASK == 0 { + // Ensure that all atomic accesses to the state are + // visible. + // + // Ordering: this Acquire fence synchronizes with all + // Release operations that decrement the number of + // references to the task. + atomic::fence(Ordering::Acquire); + + dealloc(ptr as *mut u8, Layout::new::>()); + } + }); + + // Drop the future and publish its output. + this.core.with_mut(|c| { + ManuallyDrop::drop(&mut (*c).future); + (*c).output = ManuallyDrop::new(output); + }); + + // Clear the `POLLING` flag to enter the `Completed` phase, + // unless the task has concurrently transitioned to the + // `Wind-down` phase or unless this `Runnable` is the last + // reference to the task. + if this + .state + .fetch_update(Ordering::Release, Ordering::Relaxed, |s| { + if s & CLOSED == CLOSED || s & REF_MASK == 0 { + None + } else { + Some(s & !POLLING) + } + }) + .is_ok() + { + mem::forget(panic_guard); + return; + } + + // The task is in the `Wind-down` phase or this `Runnable` + // was the last reference, so the output must be dropped. + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).output)); + mem::forget(panic_guard); + + // Clear the `POLLING` flag to enter the `Closed` phase. This is + // not actually necessary if the `Runnable` is the last + // reference, but that should be a very rare occurrence. + // + // Ordering: Release ordering is necessary to ensure that the + // drop of the output is visible when the last reference + // deallocates the task. + state = this.state.fetch_and(!POLLING, Ordering::Release); + + // Deallocate the task if there are no task references left. + if state & REF_MASK == 0 { + // Ensure that all atomic accesses to the state are visible. + // + // Ordering: this Acquire fence synchronizes with all + // Release operations that decrement the number of + // references to the task. + atomic::fence(Ordering::Acquire); + dealloc(ptr as *mut u8, Layout::new::>()); + } + + return; + } + + // The future is `Pending`: try to reset the wake count. + // + // Ordering: a Release ordering is required in case the wake count + // is successfully cleared; it synchronizes, via a Release sequence, + // with the Acquire load upon entering `Runnable::run` the next time + // it is called. Acquire ordering is in turn necessary in case the + // wake count has changed and the future must be polled again; it + // synchronizes with the Release RMW in `wake` and ensures that all + // memory operations performed by their callers are visible when the + // polling loop is repeated. + state = this.state.fetch_sub(wake_count, Ordering::AcqRel); + debug_or_loom_assert!(state > wake_count); + wake_count = (state & WAKE_MASK) - wake_count; + + // Return now if the wake count has been successfully cleared, + // provided that the task was not concurrently cancelled. + if wake_count == 0 && state & CLOSED == 0 { + // If there are no task references left, cancel and deallocate + // the task since it can never be scheduled again. + if state & REF_MASK == 0 { + let _drop_guard = RunOnDrop::new(|| { + dealloc(ptr as *mut u8, Layout::new::>()); + }); + + // Drop the future; + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future)); + } + + return; + } + } +} + +/// Cancels the task, dropping the inner future. +unsafe fn cancel(ptr: *const ()) +where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, +{ + let this = &*(ptr as *const Task); + + // Ensure that the modifications of the future by the previous + // `Runnable` are visible. + // + // Ordering: this Acquire fence synchronizes with the Release operation + // at the end of the call to `run` by the previous `Runnable` and + // ensures that the new state of the future stored by the previous call + // to `run` is visible. This synchronization exists because the wake + // count RMW in the call to `Task::wake` that created this `Runnable` + // establishes a Release sequence. + atomic::fence(Ordering::Acquire); + + // Set a drop guard to enter the `Closed` phase whether or not the + // future panics when dropped. + let _drop_guard = RunOnDrop::new(|| { + // Clear the `POLLING` flag while setting the `CLOSED` flag to enter + // the `Closed` phase. + // + // Ordering: Release ordering on success is necessary to ensure that + // all memory operations on the future are visible when the last + // reference deallocates the task. + let state = this + .state + .fetch_update(Ordering::Release, Ordering::Relaxed, |s| { + Some((s | CLOSED) & !POLLING) + }) + .unwrap(); + + // Deallocate if there are no more references to the task. + if state & REF_MASK == 0 { + // Ensure that all atomic accesses to the state are visible. + // + // Ordering: this Acquire fence synchronizes with all Release + // operations that decrement the number of references to the + // task. + atomic::fence(Ordering::Acquire); + dealloc(ptr as *mut u8, Layout::new::>()); + } + }); + + // Drop the future; + this.core.with_mut(|c| ManuallyDrop::drop(&mut (*c).future)); +} + +/// Handle to a scheduled task. +/// +/// Dropping the runnable directly instead of calling `run` cancels the task. +#[derive(Debug)] +pub(crate) struct Runnable { + task: *const (), + vtable: &'static VTable, +} + +impl Runnable { + /// Creates a `Runnable`. + /// + /// Safety: this is safe provided that: + /// + /// - the task pointer points to a live task allocated with the global + /// allocator, + /// - there is not other live `Runnable` for this task, + /// - the wake count is non-zero, + /// - the `POLLING` flag is set and the `CLOSED` flag is cleared, + /// - the task contains a live future. + pub(super) unsafe fn new_unchecked(task: *const Task) -> Self + where + F: Future + Send + 'static, + F::Output: Send + 'static, + S: Fn(Runnable, T) + Send + Sync + 'static, + T: Clone + Send + Sync + 'static, + { + Self { + task: task as *const (), + vtable: &VTable { + run: run::, + cancel: cancel::, + }, + } + } + + /// Polls the wrapped future. + pub(crate) fn run(self) { + // Prevent the drop handler from being called, as it would call `cancel` + // on the inner field. + let this = ManuallyDrop::new(self); + + // Poll the future. + unsafe { (this.vtable.run)(this.task) } + } +} + +impl Drop for Runnable { + fn drop(&mut self) { + // Cancel the task. + unsafe { (self.vtable.cancel)(self.task) } + } +} + +unsafe impl Send for Runnable {} +impl UnwindSafe for Runnable {} +impl RefUnwindSafe for Runnable {} diff --git a/asynchronix/src/runtime/executor/task/tests.rs b/asynchronix/src/runtime/executor/task/tests.rs new file mode 100644 index 0000000..1ba7a21 --- /dev/null +++ b/asynchronix/src/runtime/executor/task/tests.rs @@ -0,0 +1,7 @@ +use super::*; + +#[cfg(not(asynchronix_loom))] +mod general; + +#[cfg(asynchronix_loom)] +mod loom; diff --git a/asynchronix/src/runtime/executor/task/tests/general.rs b/asynchronix/src/runtime/executor/task/tests/general.rs new file mode 100644 index 0000000..58ef994 --- /dev/null +++ b/asynchronix/src/runtime/executor/task/tests/general.rs @@ -0,0 +1,625 @@ +use std::future::Future; +use std::ops::Deref; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::task::{Context, Poll}; +use std::thread; + +use futures_channel::{mpsc, oneshot}; +use futures_util::StreamExt; + +use super::*; + +// Test prelude to simulates a single-slot scheduler queue. +macro_rules! test_prelude { + () => { + static QUEUE: Mutex> = Mutex::new(Vec::new()); + + // Schedules one runnable task. + // + // Will panic if the slot was already occupied since there should exist + // at most 1 runnable per task at any time. + #[allow(dead_code)] + fn schedule_runnable(runnable: Runnable, _tag: ()) { + let mut queue = QUEUE.lock().unwrap(); + queue.push(runnable); + } + + // Runs one runnable task and returns true if a task was scheduled, + // otherwise returns false. + #[allow(dead_code)] + fn run_scheduled_runnable() -> bool { + if let Some(runnable) = QUEUE.lock().unwrap().pop() { + runnable.run(); + return true; + } + + false + } + + // Drops a runnable task and returns true if a task was scheduled, otherwise + // returns false. + #[allow(dead_code)] + fn drop_runnable() -> bool { + if let Some(_runnable) = QUEUE.lock().unwrap().pop() { + return true; + } + + false + } + }; +} + +// A friendly wrapper over a shared atomic boolean that uses only Relaxed +// ordering. +#[derive(Clone)] +struct Flag(Arc); +impl Flag { + fn new(value: bool) -> Self { + Self(Arc::new(AtomicBool::new(value))) + } + fn set(&self, value: bool) { + self.0.store(value, Ordering::Relaxed); + } + fn get(&self) -> bool { + self.0.load(Ordering::Relaxed) + } +} + +// A simple wrapper for the output of a future with a liveness flag. +struct MonitoredOutput { + is_alive: Flag, + inner: T, +} +impl Deref for MonitoredOutput { + type Target = T; + + fn deref(&self) -> &T { + &self.inner + } +} +impl Drop for MonitoredOutput { + fn drop(&mut self) { + self.is_alive.set(false); + } +} + +// A simple future wrapper with a liveness flag returning a `MonitoredOutput` on +// completion. +struct MonitoredFuture { + future_is_alive: Flag, + output_is_alive: Flag, + inner: F, +} +impl MonitoredFuture { + // Returns the `MonitoredFuture`, a liveness flag for the future and a + // liveness flag for the output. + fn new(future: F) -> (Self, Flag, Flag) { + let future_is_alive = Flag::new(true); + let output_is_alive = Flag::new(false); + let future_is_alive_remote = future_is_alive.clone(); + let output_is_alive_remote = output_is_alive.clone(); + + ( + Self { + future_is_alive, + output_is_alive, + inner: future, + }, + future_is_alive_remote, + output_is_alive_remote, + ) + } +} +impl Future for MonitoredFuture { + type Output = MonitoredOutput; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let inner = unsafe { self.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + match inner.poll(cx) { + Poll::Pending => Poll::Pending, + Poll::Ready(value) => { + self.output_is_alive.set(true); + let test_output = MonitoredOutput { + is_alive: self.output_is_alive.clone(), + inner: value, + }; + Poll::Ready(test_output) + } + } + } +} +impl Drop for MonitoredFuture { + fn drop(&mut self) { + self.future_is_alive.set(false); + } +} + +#[test] +fn task_schedule() { + test_prelude!(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { 42 }); + let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ()); + assert_eq!(future_is_alive.get(), true); + assert_eq!(output_is_alive.get(), false); + + // The task should complete immediately when ran. + runnable.run(); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); + assert_eq!(promise.poll().map(|v| *v), Stage::Ready(42)); +} + +#[test] +fn task_schedule_mt() { + test_prelude!(); + + let (promise, runnable, _cancel_token) = spawn(async move { 42 }, schedule_runnable, ()); + + let th = thread::spawn(move || runnable.run()); + loop { + match promise.poll() { + Stage::Pending => {} + Stage::Cancelled => unreachable!(), + Stage::Ready(v) => { + assert_eq!(v, 42); + break; + } + } + } + th.join().unwrap(); +} + +#[test] +fn task_schedule_and_forget() { + test_prelude!(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async {}); + let (runnable, _cancel_token) = spawn_and_forget(future, schedule_runnable, ()); + assert_eq!(future_is_alive.get(), true); + assert_eq!(output_is_alive.get(), false); + + // The task should complete immediately when ran. + runnable.run(); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); +} + +#[test] +fn task_wake() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { + let result = receiver.await.unwrap(); + result + }); + + let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ()); + runnable.run(); + + // The future should have been polled but should not have completed. + assert_eq!(output_is_alive.get(), false); + assert!(promise.poll().is_pending()); + + // Wake the task. + sender.send(42).unwrap(); + + // The task should have been scheduled by the channel sender. + assert_eq!(run_scheduled_runnable(), true); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); + assert_eq!(promise.poll().map(|v| *v), Stage::Ready(42)); +} + +#[test] +fn task_wake_mt() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (promise, runnable, _cancel_token) = spawn( + async move { + let result = receiver.await.unwrap(); + result + }, + schedule_runnable, + (), + ); + runnable.run(); + + let th_sender = thread::spawn(move || sender.send(42).unwrap()); + let th_exec = thread::spawn(|| while !run_scheduled_runnable() {}); + + loop { + match promise.poll() { + Stage::Pending => {} + Stage::Cancelled => unreachable!(), + Stage::Ready(v) => { + assert_eq!(v, 42); + break; + } + } + } + th_sender.join().unwrap(); + th_exec.join().unwrap(); +} + +#[test] +fn task_wake_and_forget() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { + let _ = receiver.await; + }); + + let (runnable, _cancel_token) = spawn_and_forget(future, schedule_runnable, ()); + runnable.run(); + + // The future should have been polled but should not have completed. + assert_eq!(output_is_alive.get(), false); + + // Wake the task. + sender.send(42).unwrap(); + + // The task should have been scheduled by the channel sender. + assert_eq!(run_scheduled_runnable(), true); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); +} + +#[test] +fn task_multiple_wake() { + test_prelude!(); + + let (mut sender, mut receiver) = mpsc::channel(3); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { + let mut sum = 0; + for _ in 0..5 { + sum += receiver.next().await.unwrap(); + } + sum + }); + + let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ()); + runnable.run(); + + // The future should have been polled but should not have completed. + assert!(promise.poll().is_pending()); + + // Wake the task 3 times. + sender.try_send(1).unwrap(); + sender.try_send(2).unwrap(); + sender.try_send(3).unwrap(); + + // The task should have been scheduled by the channel sender. + assert_eq!(run_scheduled_runnable(), true); + assert!(promise.poll().is_pending()); + + // The channel should be empty. Wake the task 2 more times. + sender.try_send(4).unwrap(); + sender.try_send(5).unwrap(); + + // The task should have been scheduled by the channel sender. + assert_eq!(run_scheduled_runnable(), true); + + // The task should have completed. + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); + assert_eq!(promise.poll().map(|v| *v), Stage::Ready(15)); +} + +#[test] +fn task_multiple_wake_mt() { + test_prelude!(); + + let (mut sender1, mut receiver) = mpsc::channel(3); + let mut sender2 = sender1.clone(); + let mut sender3 = sender1.clone(); + + let (promise, runnable, _cancel_token) = spawn( + async move { + let mut sum = 0; + for _ in 0..3 { + sum += receiver.next().await.unwrap(); + } + sum + }, + schedule_runnable, + (), + ); + runnable.run(); + + // Wake the task 3 times. + let th_sender1 = thread::spawn(move || { + sender1.try_send(1).unwrap(); + while run_scheduled_runnable() {} + }); + let th_sender2 = thread::spawn(move || { + sender2.try_send(2).unwrap(); + while run_scheduled_runnable() {} + }); + let th_sender3 = thread::spawn(move || { + sender3.try_send(3).unwrap(); + while run_scheduled_runnable() {} + }); + + loop { + match promise.poll() { + Stage::Pending => {} + Stage::Cancelled => unreachable!(), + Stage::Ready(v) => { + assert_eq!(v, 6); + break; + } + } + } + th_sender1.join().unwrap(); + th_sender2.join().unwrap(); + th_sender3.join().unwrap(); +} + +#[test] +fn task_cancel_scheduled() { + test_prelude!(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async {}); + + let (promise, runnable, cancel_token) = spawn(future, schedule_runnable, ()); + + // Cancel the task while a `Runnable` exists (i.e. while the task is + // considered scheduled). + cancel_token.cancel(); + + // The future should not be dropped while the `Runnable` exists, even if the + // task is cancelled, but the task should be seen as cancelled. + assert_eq!(future_is_alive.get(), true); + assert!(promise.poll().is_cancelled()); + + // An attempt to run the task should now drop the future without polling it. + runnable.run(); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), false); +} + +#[test] +fn task_cancel_unscheduled() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { + let _ = receiver.await; + }); + + let (promise, runnable, cancel_token) = spawn(future, schedule_runnable, ()); + runnable.run(); + assert_eq!(future_is_alive.get(), true); + assert_eq!(output_is_alive.get(), false); + + // Cancel the task while no `Runnable` exists (the task is not scheduled as + // it needs to be woken by the channel sender first). + cancel_token.cancel(); + assert!(promise.poll().is_cancelled()); + assert!(sender.send(()).is_err()); + + // The future should be dropped immediately upon cancellation without + // completing. + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), false); +} + +#[test] +fn task_cancel_completed() { + test_prelude!(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { 42 }); + + let (promise, runnable, cancel_token) = spawn(future, schedule_runnable, ()); + runnable.run(); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); + + // Cancel the already completed task. + cancel_token.cancel(); + assert_eq!(output_is_alive.get(), true); + assert_eq!(promise.poll().map(|v| *v), Stage::Ready(42)); +} + +#[test] +fn task_cancel_mt() { + test_prelude!(); + + let (runnable, cancel_token) = spawn_and_forget(async {}, schedule_runnable, ()); + + let th_cancel = thread::spawn(move || cancel_token.cancel()); + runnable.run(); + + th_cancel.join().unwrap(); +} + +#[test] +fn task_drop_promise_scheduled() { + test_prelude!(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async {}); + + let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ()); + // Drop the promise while a `Runnable` exists (i.e. while the task is + // considered scheduled). + drop(promise); + + // The task should complete immediately when ran. + runnable.run(); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); +} + +#[test] +fn task_drop_promise_unscheduled() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { + let _ = receiver.await; + }); + + let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ()); + runnable.run(); + + // Drop the promise while no `Runnable` exists (the task is not scheduled as + // it needs to be woken by the channel sender first). + drop(promise); + + // Wake the task. + assert!(sender.send(()).is_ok()); + + // The task should have been scheduled by the channel sender. + assert_eq!(run_scheduled_runnable(), true); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), true); +} + +#[test] +fn task_drop_promise_mt() { + test_prelude!(); + + let (promise, runnable, _cancel_token) = spawn(async {}, schedule_runnable, ()); + + let th_drop = thread::spawn(move || drop(promise)); + runnable.run(); + + th_drop.join().unwrap() +} + +#[test] +fn task_drop_runnable() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (future, future_is_alive, output_is_alive) = MonitoredFuture::new(async move { + let _ = receiver.await; + }); + + let (promise, runnable, _cancel_token) = spawn(future, schedule_runnable, ()); + runnable.run(); + + // Wake the task. + assert!(sender.send(()).is_ok()); + + // Drop the task scheduled by the channel sender. + assert_eq!(drop_runnable(), true); + assert_eq!(future_is_alive.get(), false); + assert_eq!(output_is_alive.get(), false); + assert!(promise.poll().is_cancelled()); +} + +#[test] +fn task_drop_runnable_mt() { + test_prelude!(); + + let (sender, receiver) = oneshot::channel(); + + let (runnable, _cancel_token) = spawn_and_forget( + async move { + let _ = receiver.await; + }, + schedule_runnable, + (), + ); + runnable.run(); + + let th_sender = thread::spawn(move || sender.send(()).is_ok()); + drop_runnable(); + + th_sender.join().unwrap(); +} + +#[test] +fn task_drop_cycle() { + test_prelude!(); + + let (sender1, mut receiver1) = mpsc::channel(2); + let (sender2, mut receiver2) = mpsc::channel(2); + let (sender3, mut receiver3) = mpsc::channel(2); + + static DROP_COUNT: AtomicUsize = AtomicUsize::new(0); + + // Spawn 3 tasks that wake one another when dropped. + let (runnable1, cancel_token1) = spawn_and_forget( + { + let mut sender2 = sender2.clone(); + let mut sender3 = sender3.clone(); + + async move { + let _guard = RunOnDrop::new(move || { + let _ = sender2.try_send(()); + let _ = sender3.try_send(()); + DROP_COUNT.fetch_add(1, Ordering::Relaxed); + }); + let _ = receiver1.next().await; + } + }, + schedule_runnable, + (), + ); + runnable1.run(); + + let (runnable2, cancel_token2) = spawn_and_forget( + { + let mut sender1 = sender1.clone(); + let mut sender3 = sender3.clone(); + + async move { + let _guard = RunOnDrop::new(move || { + let _ = sender1.try_send(()); + let _ = sender3.try_send(()); + DROP_COUNT.fetch_add(1, Ordering::Relaxed); + }); + let _ = receiver2.next().await; + } + }, + schedule_runnable, + (), + ); + runnable2.run(); + + let (runnable3, cancel_token3) = spawn_and_forget( + { + let mut sender1 = sender1.clone(); + let mut sender2 = sender2.clone(); + + async move { + let _guard = RunOnDrop::new(move || { + let _ = sender1.try_send(()); + let _ = sender2.try_send(()); + DROP_COUNT.fetch_add(1, Ordering::Relaxed); + }); + let _ = receiver3.next().await; + } + }, + schedule_runnable, + (), + ); + runnable3.run(); + + let th1 = thread::spawn(move || cancel_token1.cancel()); + let th2 = thread::spawn(move || cancel_token2.cancel()); + let th3 = thread::spawn(move || cancel_token3.cancel()); + + th1.join().unwrap(); + th2.join().unwrap(); + th3.join().unwrap(); + + while run_scheduled_runnable() {} + + assert_eq!(DROP_COUNT.load(Ordering::Relaxed), 3); +} diff --git a/asynchronix/src/runtime/executor/task/tests/loom.rs b/asynchronix/src/runtime/executor/task/tests/loom.rs new file mode 100644 index 0000000..baa6eb6 --- /dev/null +++ b/asynchronix/src/runtime/executor/task/tests/loom.rs @@ -0,0 +1,536 @@ +use std::future::Future; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::task::Waker; + +use ::loom::cell::UnsafeCell; +use ::loom::model::Builder; +use ::loom::sync::atomic::AtomicBool; +use ::loom::sync::atomic::AtomicUsize; +use ::loom::sync::atomic::Ordering::*; +use ::loom::sync::Arc; +use ::loom::{lazy_static, thread}; + +use super::*; + +// Test prelude to simulates a single-slot scheduler queue. +macro_rules! test_prelude { + () => { + // A single-slot scheduling queue. + lazy_static! { + static ref RUNNABLE_SLOT: RunnableSlot = RunnableSlot::new(); + } + + // Schedules one runnable task. + // + // Will panic if the slot was already occupied since there should exist + // at most 1 runnable per task at any time. + #[allow(dead_code)] + fn schedule_task(runnable: Runnable, _tag: ()) { + RUNNABLE_SLOT.set(runnable); + } + + // Runs one runnable task and returns true if a task was indeed + // scheduled, otherwise returns false. + #[allow(dead_code)] + fn try_poll_task() -> bool { + if let Some(runnable) = RUNNABLE_SLOT.take() { + runnable.run(); + return true; + } + + false + } + + // Cancel a scheduled task by dropping its runnable and returns true is + // a task was indeed scheduled, otherwise returns false. + #[allow(dead_code)] + fn try_cancel_task() -> bool { + if let Some(_runnable) = RUNNABLE_SLOT.take() { + // Just drop the runnable to cancel the task. + return true; + } + + false + } + }; +} + +struct RunnableSlot { + state: AtomicUsize, + runnable: UnsafeCell>, +} +impl RunnableSlot { + const LOCKED: usize = 0b01; + const POPULATED: usize = 0b10; + + fn new() -> Self { + Self { + state: AtomicUsize::new(0), + runnable: UnsafeCell::new(None), + } + } + + fn take(&self) -> Option { + self.state + .fetch_update(Acquire, Relaxed, |s| { + // Only lock if there is a runnable and it is not already locked. + if s == Self::POPULATED { + Some(Self::LOCKED) + } else { + None + } + }) + .ok() + .and_then(|_| { + // Take the `Runnable`. + let runnable = unsafe { self.runnable.with_mut(|r| (*r).take()) }; + assert!(runnable.is_some()); + + // Release the lock and signal that the slot is empty. + self.state.store(0, Release); + + runnable + }) + } + + fn set(&self, runnable: Runnable) { + // Take the lock. + let state = self.state.swap(Self::LOCKED, Acquire); + + // Expect the initial state to be 0. Otherwise, there is already a + // stored `Runnable` or one is being stored or taken, which should not + // happen since a task can have at most 1 `Runnable` at a time. + if state != 0 { + panic!("Error: there are several live `Runnable`s for the same task"); + } + + // Store the `Runnable`. + unsafe { self.runnable.with_mut(|r| *r = Some(runnable)) }; + + // Release the lock and signal that the slot is populated. + self.state.store(Self::POPULATED, Release); + } +} + +// An asynchronous count-down counter. +// +// The implementation is intentionally naive and wakes the `CountWatcher` each +// time the count is decremented, even though the future actually only completes +// when the count reaches 0. +// +// Note that for simplicity, the waker may not be changed once set; this is not +// an issue since the tested task implementation never changes the waker. +fn count_down(init_count: usize) -> (CountController, CountWatcher) { + let inner = Arc::new(CounterInner::new(init_count)); + + ( + CountController { + inner: inner.clone(), + }, + CountWatcher { inner }, + ) +} + +// The counter inner type. +struct CounterInner { + waker: UnsafeCell>, + state: AtomicUsize, +} +impl CounterInner { + const HAS_WAKER: usize = 1 << 0; + const INCREMENT: usize = 1 << 1; + + fn new(init_count: usize) -> Self { + Self { + waker: UnsafeCell::new(None), + state: AtomicUsize::new(init_count * Self::INCREMENT), + } + } +} + +// A `Clone` and `Sync` entity that can decrement the counter. +#[derive(Clone)] +struct CountController { + inner: Arc, +} +impl CountController { + // Decrement the count and notify the counter if a waker is registered. + // + // This will panic if the counter is decremented too many times. + fn decrement(&self) { + let state = self.inner.state.fetch_sub(CounterInner::INCREMENT, Acquire); + + if state / CounterInner::INCREMENT == 0 { + panic!("The count-down counter has wrapped around"); + } + + if state & CounterInner::HAS_WAKER != 0 { + unsafe { + self.inner + .waker + .with(|w| (&*w).as_ref().map(Waker::wake_by_ref)) + }; + } + } +} +unsafe impl Send for CountController {} +unsafe impl Sync for CountController {} + +// An entity notified by the controller each time the count is decremented. +struct CountWatcher { + inner: Arc, +} +impl Future for CountWatcher { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let state = self.inner.state.load(Relaxed); + + if state / CounterInner::INCREMENT == 0 { + return Poll::Ready(()); + } + if state & CounterInner::HAS_WAKER == CounterInner::HAS_WAKER { + // Changes of the waker are not supported, so check that the waker + // indeed hasn't changed. + assert!( + unsafe { + self.inner + .waker + .with(|w| cx.waker().will_wake((*w).as_ref().unwrap())) + }, + "This testing primitive does not support changes of waker" + ); + + return Poll::Pending; + } + + unsafe { self.inner.waker.with_mut(|w| *w = Some(cx.waker().clone())) }; + + let state = self.inner.state.fetch_or(CounterInner::HAS_WAKER, Release); + if state / CounterInner::INCREMENT == 0 { + Poll::Ready(()) + } else { + Poll::Pending + } + } +} +unsafe impl Send for CountWatcher {} + +#[test] +fn loom_task_schedule() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + lazy_static! { + static ref READY: AtomicBool = AtomicBool::new(false); + } + + let (promise, runnable, _cancel_token) = spawn(async move { 42 }, schedule_task, ()); + + let t = thread::spawn(move || { + // The task should complete immediately when ran. + runnable.run(); + READY.store(true, Release); + }); + + if READY.load(Acquire) { + assert_eq!(promise.poll(), Stage::Ready(42)); + } + + t.join().unwrap(); + }); +} + +#[test] +fn loom_task_custom1() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + lazy_static! { + static ref READY: AtomicBool = AtomicBool::new(false); + } + + let (promise, runnable, cancel_token) = spawn(async move { 42 }, schedule_task, ()); + + let t = thread::spawn(move || { + // The task should complete immediately when ran. + runnable.run(); + }); + + cancel_token.cancel(); + + t.join().unwrap(); + }); +} + +#[test] +fn loom_task_cancel() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + lazy_static! { + static ref IS_CANCELLED: AtomicBool = AtomicBool::new(false); + } + + let (count_controller, count_watcher) = count_down(1); + + let (promise, runnable, cancel_token) = + spawn(async move { count_watcher.await }, schedule_task, ()); + runnable.run(); + + let waker_thread = thread::spawn(move || { + count_controller.decrement(); + }); + let scheduler_thread = thread::spawn(|| { + try_poll_task(); + }); + let cancel_thread = thread::spawn(move || { + cancel_token.cancel(); + IS_CANCELLED.store(true, Release); + }); + + if IS_CANCELLED.load(Acquire) { + assert!(promise.poll() != Stage::Pending); + } + + waker_thread.join().unwrap(); + scheduler_thread.join().unwrap(); + cancel_thread.join().unwrap(); + }); +} + +#[test] +fn loom_task_run_and_drop() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + + let (count_controller, count_watcher) = count_down(1); + + let (runnable, cancel_token) = + spawn_and_forget(async move { count_watcher.await }, schedule_task, ()); + runnable.run(); + + let waker_thread = thread::spawn(move || { + count_controller.decrement(); + }); + let runnable_thread = thread::spawn(|| { + try_poll_task(); + }); + drop(cancel_token); + + waker_thread.join().unwrap(); + runnable_thread.join().unwrap(); + }); +} + +#[test] +fn loom_task_run_and_cancel() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + + let (count_controller, count_watcher) = count_down(1); + + let (runnable, cancel_token) = + spawn_and_forget(async move { count_watcher.await }, schedule_task, ()); + runnable.run(); + + let waker_thread = thread::spawn(move || { + count_controller.decrement(); + }); + let runnable_thread = thread::spawn(|| { + try_poll_task(); + }); + cancel_token.cancel(); + + waker_thread.join().unwrap(); + runnable_thread.join().unwrap(); + }); +} + +#[test] +fn loom_task_drop_all() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + + let (promise, runnable, cancel_token) = spawn(async move {}, schedule_task, ()); + + let promise_thread = thread::spawn(move || { + drop(promise); + }); + let runnable_thread = thread::spawn(move || { + drop(runnable); + }); + drop(cancel_token); + + promise_thread.join().unwrap(); + runnable_thread.join().unwrap(); + }); +} + +#[test] +fn loom_task_drop_with_waker() { + const DEFAULT_PREEMPTION_BOUND: usize = 4; + + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(DEFAULT_PREEMPTION_BOUND); + } + + builder.check(move || { + test_prelude!(); + + let (count_controller, count_watcher) = count_down(1); + + let (promise, runnable, cancel_token) = + spawn(async move { count_watcher.await }, schedule_task, ()); + runnable.run(); + + let waker_thread = thread::spawn(move || { + count_controller.decrement(); + }); + + let promise_thread = thread::spawn(move || { + drop(promise); + }); + let runnable_thread = thread::spawn(|| { + try_cancel_task(); // drop the runnable if available + }); + drop(cancel_token); + + waker_thread.join().unwrap(); + promise_thread.join().unwrap(); + runnable_thread.join().unwrap(); + }); +} + +#[test] +fn loom_task_wake_single_thread() { + const DEFAULT_PREEMPTION_BOUND: usize = 3; + const TICK_COUNT1: usize = 4; + const TICK_COUNT2: usize = 0; + + loom_task_wake(DEFAULT_PREEMPTION_BOUND, TICK_COUNT1, TICK_COUNT2); +} + +#[test] +fn loom_task_wake_multi_thread() { + const DEFAULT_PREEMPTION_BOUND: usize = 3; + const TICK_COUNT1: usize = 1; + const TICK_COUNT2: usize = 2; + + loom_task_wake(DEFAULT_PREEMPTION_BOUND, TICK_COUNT1, TICK_COUNT2); +} + +// Test task wakening from one or two threads. +fn loom_task_wake(preemption_bound: usize, tick_count1: usize, tick_count2: usize) { + let mut builder = Builder::new(); + if builder.preemption_bound.is_none() { + builder.preemption_bound = Some(preemption_bound); + } + + let total_tick_count = tick_count1 + tick_count2; + builder.check(move || { + test_prelude!(); + lazy_static! { + static ref POLL_COUNT: AtomicUsize = AtomicUsize::new(0); + } + + let (count_controller1, count_watcher) = count_down(total_tick_count); + let count_controller2 = count_controller1.clone(); + + let (promise, runnable, _cancel_token) = + spawn(async move { count_watcher.await }, schedule_task, ()); + runnable.run(); + + let waker_thread1 = if tick_count1 != 0 { + Some(thread::spawn(move || { + for _ in 0..tick_count1 { + count_controller1.decrement(); + } + })) + } else { + None + }; + let waker_thread2 = if tick_count2 != 0 { + Some(thread::spawn(move || { + for _ in 0..tick_count2 { + count_controller2.decrement(); + } + })) + } else { + None + }; + let scheduler_thread = thread::spawn(move || { + // Try to run scheduled runnables. + for _ in 0..total_tick_count { + if try_poll_task() { + POLL_COUNT.fetch_add(1, Release); + } + } + }); + + let poll_count = POLL_COUNT.load(Acquire); + let has_completed = poll_count == total_tick_count; + + // Check that the promise is available if the task has been polled + // `total_tick_count` times. + if has_completed { + assert_eq!(promise.poll(), Stage::Ready(())); + } + + scheduler_thread.join().unwrap(); + waker_thread1.map(|t| t.join().unwrap()); + waker_thread2.map(|t| t.join().unwrap()); + + // If the promise has not been retrieved yet, retrieve it now. It may be + // necessary to poll the task one last time. + if !has_completed { + if POLL_COUNT.load(Acquire) != total_tick_count { + try_poll_task(); + } + + assert_eq!(promise.poll(), Stage::Ready(())); + } + }); +} diff --git a/asynchronix/src/runtime/executor/task/util.rs b/asynchronix/src/runtime/executor/task/util.rs new file mode 100644 index 0000000..af62b8b --- /dev/null +++ b/asynchronix/src/runtime/executor/task/util.rs @@ -0,0 +1,23 @@ +use super::{CLOSED, POLLING, WAKE_MASK}; + +/// An object that runs an arbitrary closure when dropped. +pub(crate) struct RunOnDrop { + drop_fn: F, +} +impl RunOnDrop { + /// Creates a new `RunOnDrop`. + pub(crate) fn new(drop_fn: F) -> Self { + Self { drop_fn } + } +} +impl Drop for RunOnDrop { + fn drop(&mut self) { + (self.drop_fn)(); + } +} + +/// Check if a `Runnable` exists based on the state. +#[inline(always)] +pub(crate) fn runnable_exists(state: u64) -> bool { + state & POLLING != 0 && state & (WAKE_MASK | CLOSED) != 0 +} diff --git a/asynchronix/src/runtime/executor/tests.rs b/asynchronix/src/runtime/executor/tests.rs new file mode 100644 index 0000000..9e21f8d --- /dev/null +++ b/asynchronix/src/runtime/executor/tests.rs @@ -0,0 +1,142 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use futures_channel::{mpsc, oneshot}; +use futures_util::StreamExt; + +use super::*; + +/// An object that runs an arbitrary closure when dropped. +struct RunOnDrop { + drop_fn: Option, +} +impl RunOnDrop { + /// Creates a new `RunOnDrop`. + fn new(drop_fn: F) -> Self { + Self { + drop_fn: Some(drop_fn), + } + } +} +impl Drop for RunOnDrop { + fn drop(&mut self) { + self.drop_fn.take().map(|f| f()); + } +} + +#[test] +fn executor_deadlock() { + const NUM_THREADS: usize = 3; + + let (_sender1, receiver1) = oneshot::channel::<()>(); + let (_sender2, receiver2) = oneshot::channel::<()>(); + + let mut executor = Executor::new(NUM_THREADS); + static LAUNCH_COUNT: AtomicUsize = AtomicUsize::new(0); + static COMPLETION_COUNT: AtomicUsize = AtomicUsize::new(0); + + executor.spawn_and_forget(async move { + LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed); + let _ = receiver2.await; + COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed); + }); + executor.spawn_and_forget(async move { + LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed); + let _ = receiver1.await; + COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed); + }); + + executor.run(); + // Check that the executor returns on deadlock, i.e. none of the task has + // completed. + assert_eq!(LAUNCH_COUNT.load(Ordering::Relaxed), 2); + assert_eq!(COMPLETION_COUNT.load(Ordering::Relaxed), 0); +} + +#[test] +fn executor_deadlock_st() { + const NUM_THREADS: usize = 1; + + let (_sender1, receiver1) = oneshot::channel::<()>(); + let (_sender2, receiver2) = oneshot::channel::<()>(); + + let mut executor = Executor::new(NUM_THREADS); + static LAUNCH_COUNT: AtomicUsize = AtomicUsize::new(0); + static COMPLETION_COUNT: AtomicUsize = AtomicUsize::new(0); + + executor.spawn_and_forget(async move { + LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed); + let _ = receiver2.await; + COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed); + }); + executor.spawn_and_forget(async move { + LAUNCH_COUNT.fetch_add(1, Ordering::Relaxed); + let _ = receiver1.await; + COMPLETION_COUNT.fetch_add(1, Ordering::Relaxed); + }); + + executor.run(); + // Check that the executor returnes on deadlock, i.e. none of the task has + // completed. + assert_eq!(LAUNCH_COUNT.load(Ordering::Relaxed), 2); + assert_eq!(COMPLETION_COUNT.load(Ordering::Relaxed), 0); +} + +#[test] +fn executor_drop_cycle() { + const NUM_THREADS: usize = 3; + + let (sender1, mut receiver1) = mpsc::channel(2); + let (sender2, mut receiver2) = mpsc::channel(2); + let (sender3, mut receiver3) = mpsc::channel(2); + + let mut executor = Executor::new(NUM_THREADS); + static DROP_COUNT: AtomicUsize = AtomicUsize::new(0); + + // Spawn 3 tasks that wake one another when dropped. + executor.spawn_and_forget({ + let mut sender2 = sender2.clone(); + let mut sender3 = sender3.clone(); + + async move { + let _guard = RunOnDrop::new(move || { + let _ = sender2.try_send(()); + let _ = sender3.try_send(()); + DROP_COUNT.fetch_add(1, Ordering::Relaxed); + }); + let _ = receiver1.next().await; + } + }); + executor.spawn_and_forget({ + let mut sender1 = sender1.clone(); + let mut sender3 = sender3.clone(); + + async move { + let _guard = RunOnDrop::new(move || { + let _ = sender1.try_send(()); + let _ = sender3.try_send(()); + DROP_COUNT.fetch_add(1, Ordering::Relaxed); + }); + let _ = receiver2.next().await; + } + }); + executor.spawn_and_forget({ + let mut sender1 = sender1.clone(); + let mut sender2 = sender2.clone(); + + async move { + let _guard = RunOnDrop::new(move || { + let _ = sender1.try_send(()); + let _ = sender2.try_send(()); + DROP_COUNT.fetch_add(1, Ordering::Relaxed); + }); + let _ = receiver3.next().await; + } + }); + + executor.run(); + + // Make sure that all tasks are eventually dropped even though each task + // wakes the others when dropped. + drop(executor); + assert_eq!(DROP_COUNT.load(Ordering::Relaxed), 3); +} diff --git a/asynchronix/src/runtime/executor/worker.rs b/asynchronix/src/runtime/executor/worker.rs new file mode 100644 index 0000000..b989b02 --- /dev/null +++ b/asynchronix/src/runtime/executor/worker.rs @@ -0,0 +1,25 @@ +use std::cell::Cell; +use std::sync::Arc; + +use super::task::Runnable; + +use super::pool::Pool; +use super::LocalQueue; + +/// A local worker with access to global executor resources. +pub(crate) struct Worker { + pub(crate) local_queue: LocalQueue, + pub(crate) fast_slot: Cell>, + pub(crate) pool: Arc, +} + +impl Worker { + /// Creates a new worker. + pub(crate) fn new(local_queue: LocalQueue, pool: Arc) -> Self { + Self { + local_queue, + fast_slot: Cell::new(None), + pool, + } + } +}