1
0
forked from ROMEO/nexosim
2022-10-12 05:33:16 +02:00

587 lines
20 KiB
Rust

use std::fmt;
use std::iter::FusedIterator;
use std::marker::PhantomData;
use std::mem::{drop, MaybeUninit};
use std::panic::{RefUnwindSafe, UnwindSafe};
use std::sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release};
use std::sync::Arc;
use cache_padded::CachePadded;
use crate::loom_exports::cell::UnsafeCell;
use crate::loom_exports::sync::atomic::{AtomicU32, AtomicU64};
use crate::loom_exports::{debug_or_loom_assert, debug_or_loom_assert_eq};
pub(crate) use buffers::*;
mod buffers;
#[cfg(test)]
mod tests;
/// A double-ended FIFO work-stealing queue.
///
/// The general operation of the queue is based on tokio's worker queue, itself
/// based on the Go scheduler's worker queue.
///
/// The queue tracks its tail and head position within a ring buffer with
/// wrap-around integers, where the least significant bits specify the actual
/// buffer index. All positions have bit widths that are intentionally larger
/// than necessary for buffer indexing because:
/// - an extra bit is needed to disambiguate between empty and full buffers when
/// the start and end position of the buffer are equal,
/// - the worker head is also used as long-cycle counter to mitigate the risk of
/// ABA.
///
#[derive(Debug)]
struct Queue<T, B: Buffer<T>> {
/// Positions of the head as seen by the worker (most significant bits) and
/// as seen by a stealer (least significant bits).
heads: CachePadded<AtomicU64>,
/// Position of the tail.
tail: CachePadded<AtomicU32>,
/// Queue items.
buffer: Box<B::Data>,
/// Make the type !Send and !Sync by default.
_phantom: PhantomData<UnsafeCell<T>>,
}
impl<T, B: Buffer<T>> Queue<T, B> {
/// Read an item at the given position.
///
/// The position is automatically mapped to a valid buffer index using a
/// modulo operation.
///
/// # Safety
///
/// The item at the given position must have been initialized before and
/// cannot have been moved out.
///
/// The caller must guarantee that the item at this position cannot be
/// written to or moved out concurrently.
#[inline]
unsafe fn read_at(&self, position: u32) -> T {
let index = (position & B::MASK) as usize;
(*self.buffer).as_ref()[index].with(|slot| slot.read().assume_init())
}
/// Write an item at the given position.
///
/// The position is automatically mapped to a valid buffer index using a
/// modulo operation.
///
/// # Note
///
/// If an item is already initialized but was not moved out yet, it will be
/// leaked.
///
/// # Safety
///
/// The caller must guarantee that the item at this position cannot be read
/// or written to concurrently.
#[inline]
unsafe fn write_at(&self, position: u32, item: T) {
let index = (position & B::MASK) as usize;
(*self.buffer).as_ref()[index].with_mut(|slot| slot.write(MaybeUninit::new(item)));
}
/// Attempt to book `N` items for stealing where `N` is specified by a
/// closure which takes as argument the total count of available items.
///
/// In case of success, the returned tuple contains the stealer head and an
/// item count at least equal to 1, in this order.
///
/// # Errors
///
/// An error is returned in the following cases:
/// 1) no item could be stolen, either because the queue is empty or because
/// `N` is 0,
/// 2) a concurrent stealing operation is ongoing.
///
/// # Safety
///
/// This function is not strictly unsafe, but because it initiates the
/// stealing operation by modifying the post-stealing head in
/// `push_count_and_head` without ever updating the `head` atomic variable,
/// its misuse can result in permanently blocking subsequent stealing
/// operations.
fn book_items<C>(&self, mut count_fn: C, max_count: u32) -> Result<(u32, u32), StealError>
where
C: FnMut(usize) -> usize,
{
let mut heads = self.heads.load(Acquire);
loop {
let (worker_head, stealer_head) = unpack_heads(heads);
// Bail out if both heads differ because it means another stealing
// operation is concurrently ongoing.
if stealer_head != worker_head {
return Err(StealError::Busy);
}
let tail = self.tail.load(Acquire);
let item_count = tail.wrapping_sub(worker_head);
// `item_count` is tested now because `count_fn` may expect
// `item_count>0`.
if item_count == 0 {
return Err(StealError::Empty);
}
// Unwind safety: it is OK if `count_fn` panics because no state has
// been modified yet.
let count =
(count_fn(item_count as usize).min(max_count as usize) as u32).min(item_count);
// The special case `count_fn() == 0` must be tested specifically,
// because if the compare-exchange succeeds with `count=0`, the new
// worker head will be the same as the old one so other stealers
// will not detect that stealing is currently ongoing and may try to
// actually steal items and concurrently modify the position of the
// heads.
if count == 0 {
return Err(StealError::Empty);
}
// Move the worker head only.
let new_heads = pack_heads(worker_head.wrapping_add(count), stealer_head);
// Attempt to book the slots. Only one stealer can succeed since
// once this atomic is changed, the other thread will necessarily
// observe a mismatch between the two heads.
match self
.heads
.compare_exchange_weak(heads, new_heads, Acquire, Acquire)
{
Ok(_) => return Ok((stealer_head, count)),
// We lost the race to a concurrent pop or steal operation, or
// the CAS failed spuriously; try again.
Err(h) => heads = h,
}
}
}
}
impl<T, B: Buffer<T>> Drop for Queue<T, B> {
fn drop(&mut self) {
let worker_head = unpack_heads(self.heads.load(Relaxed)).0;
let tail = self.tail.load(Relaxed);
let count = tail.wrapping_sub(worker_head);
for offset in 0..count {
drop(unsafe { self.read_at(worker_head.wrapping_add(offset)) })
}
}
}
/// Handle for single-threaded FIFO push and pop operations.
#[derive(Debug)]
pub(crate) struct Worker<T, B: Buffer<T>> {
queue: Arc<Queue<T, B>>,
}
impl<T, B: Buffer<T>> Worker<T, B> {
/// Creates a new queue and returns a `Worker` handle.
pub(crate) fn new() -> Self {
let queue = Arc::new(Queue {
heads: CachePadded::new(AtomicU64::new(0)),
tail: CachePadded::new(AtomicU32::new(0)),
buffer: B::allocate(),
_phantom: PhantomData,
});
Worker { queue }
}
/// Creates a new `Stealer` handle associated to this `Worker`.
///
/// An arbitrary number of `Stealer` handles can be created, either using
/// this method or cloning an existing `Stealer` handle.
pub(crate) fn stealer(&self) -> Stealer<T, B> {
Stealer {
queue: self.queue.clone(),
}
}
/// Returns the number of items that can be successfully pushed onto the
/// queue.
///
/// Note that that the spare capacity may be underestimated due to
/// concurrent stealing operations.
pub(crate) fn spare_capacity(&self) -> usize {
let capacity = <B as Buffer<T>>::CAPACITY;
let stealer_head = unpack_heads(self.queue.heads.load(Relaxed)).1;
let tail = self.queue.tail.load(Relaxed);
// Aggregate count of available items (those which can be popped) and of
// items currently being stolen.
let len = tail.wrapping_sub(stealer_head);
(capacity - len) as usize
}
/// Attempts to push one item at the tail of the queue.
///
/// # Errors
///
/// This will fail if the queue is full, in which case the item is returned
/// as the error field.
pub(crate) fn push(&self, item: T) -> Result<(), T> {
let stealer_head = unpack_heads(self.queue.heads.load(Acquire)).1;
let tail = self.queue.tail.load(Relaxed);
// Check that the buffer is not full.
if tail.wrapping_sub(stealer_head) >= B::CAPACITY {
return Err(item);
}
// Store the item.
unsafe { self.queue.write_at(tail, item) };
// Make the item visible by moving the tail.
//
// Ordering: the Release ordering ensures that the subsequent
// acquisition of this atomic by a stealer will make the previous write
// visible.
self.queue.tail.store(tail.wrapping_add(1), Release);
Ok(())
}
/// Attempts to push the content of an iterator at the tail of the queue.
///
/// It is the responsibility of the caller to ensure that there is enough
/// spare capacity to accommodate all iterator items, for instance by
/// calling `[Worker::spare_capacity]` beforehand. Otherwise, the iterator
/// is dropped while still holding the items in excess.
pub(crate) fn extend<I: IntoIterator<Item = T>>(&self, iter: I) {
let stealer_head = unpack_heads(self.queue.heads.load(Acquire)).1;
let mut tail = self.queue.tail.load(Relaxed);
let max_tail = stealer_head.wrapping_add(B::CAPACITY);
for item in iter {
// Check whether the buffer is full.
if tail == max_tail {
break;
}
// Store the item.
unsafe { self.queue.write_at(tail, item) };
tail = tail.wrapping_add(1);
}
// Make the items visible by incrementing the push count.
//
// Ordering: the Release ordering ensures that the subsequent
// acquisition of this atomic by a stealer will make the previous write
// visible.
self.queue.tail.store(tail, Release);
}
/// Attempts to pop one item from the head of the queue.
///
/// This returns None if the queue is empty.
pub(crate) fn pop(&self) -> Option<T> {
let mut heads = self.queue.heads.load(Acquire);
let prev_worker_head = loop {
let (worker_head, stealer_head) = unpack_heads(heads);
let tail = self.queue.tail.load(Relaxed);
// Check if the queue is empty.
if tail == worker_head {
return None;
}
// Move the worker head. The weird cast from `bool` to `u32` is to
// steer the compiler towards branchless code.
let next_heads = pack_heads(
worker_head.wrapping_add(1),
stealer_head.wrapping_add((stealer_head == worker_head) as u32),
);
// Attempt to book the items.
let res = self
.queue
.heads
.compare_exchange_weak(heads, next_heads, AcqRel, Acquire);
match res {
Ok(_) => break worker_head,
// We lost the race to a stealer or the CAS failed spuriously; try again.
Err(h) => heads = h,
}
};
unsafe { Some(self.queue.read_at(prev_worker_head)) }
}
/// Returns an iterator that steals items from the head of the queue.
///
/// The returned iterator steals up to `N` items, where `N` is specified by
/// a closure which takes as argument the total count of items available for
/// stealing. Upon success, the number of items ultimately stolen can be
/// from 1 to `N`, depending on the number of available items.
///
/// # Beware
///
/// All items stolen by the iterator should be moved out as soon as
/// possible, because until then or until the iterator is dropped, all
/// concurrent stealing operations will fail with [`StealError::Busy`].
///
/// # Leaking
///
/// If the iterator is leaked before all stolen items have been moved out,
/// subsequent stealing operations will permanently fail with
/// [`StealError::Busy`].
///
/// # Errors
///
/// An error is returned in the following cases:
/// 1) no item was stolen, either because the queue is empty or `N` is 0,
/// 2) a concurrent stealing operation is ongoing.
pub(crate) fn drain<C>(&self, count_fn: C) -> Result<Drain<'_, T, B>, StealError>
where
C: FnMut(usize) -> usize,
{
let (head, count) = self.queue.book_items(count_fn, u32::MAX)?;
Ok(Drain {
queue: &self.queue,
head,
from_head: head,
to_head: head.wrapping_add(count),
})
}
}
impl<T, B: Buffer<T>> Default for Worker<T, B> {
fn default() -> Self {
Self::new()
}
}
impl<T, B: Buffer<T>> UnwindSafe for Worker<T, B> {}
impl<T, B: Buffer<T>> RefUnwindSafe for Worker<T, B> {}
unsafe impl<T: Send, B: Buffer<T>> Send for Worker<T, B> {}
/// A draining iterator for [`Worker<T, B>`].
///
/// This iterator is created by [`Worker::drain`]. See its documentation for
/// more.
#[derive(Debug)]
pub(crate) struct Drain<'a, T, B: Buffer<T>> {
queue: &'a Queue<T, B>,
head: u32,
from_head: u32,
to_head: u32,
}
impl<'a, T, B: Buffer<T>> Iterator for Drain<'a, T, B> {
type Item = T;
fn next(&mut self) -> Option<T> {
if self.head == self.to_head {
return None;
}
let item = Some(unsafe { self.queue.read_at(self.head) });
self.head = self.head.wrapping_add(1);
// We cannot rely on the caller to call `next` again after the last item
// is yielded so the heads must be updated immediately when yielding the
// last item.
if self.head == self.to_head {
// Signal that the stealing operation has completed.
let mut heads = self.queue.heads.load(Relaxed);
loop {
let (worker_head, stealer_head) = unpack_heads(heads);
debug_or_loom_assert_eq!(stealer_head, self.from_head);
let res = self.queue.heads.compare_exchange_weak(
heads,
pack_heads(worker_head, worker_head),
AcqRel,
Acquire,
);
match res {
Ok(_) => break,
Err(h) => {
heads = h;
}
}
}
}
item
}
fn size_hint(&self) -> (usize, Option<usize>) {
let sz = self.to_head.wrapping_sub(self.head) as usize;
(sz, Some(sz))
}
}
impl<'a, T, B: Buffer<T>> ExactSizeIterator for Drain<'a, T, B> {}
impl<'a, T, B: Buffer<T>> FusedIterator for Drain<'a, T, B> {}
impl<'a, T, B: Buffer<T>> Drop for Drain<'a, T, B> {
fn drop(&mut self) {
// Drop all items and make sure the head is updated so that subsequent
// stealing operations can succeed.
for _item in self {}
}
}
impl<'a, T, B: Buffer<T>> UnwindSafe for Drain<'a, T, B> {}
impl<'a, T, B: Buffer<T>> RefUnwindSafe for Drain<'a, T, B> {}
unsafe impl<'a, T: Send, B: Buffer<T>> Send for Drain<'a, T, B> {}
unsafe impl<'a, T: Send, B: Buffer<T>> Sync for Drain<'a, T, B> {}
/// Handle for multi-threaded stealing operations.
#[derive(Debug)]
pub(crate) struct Stealer<T, B: Buffer<T>> {
queue: Arc<Queue<T, B>>,
}
impl<T, B: Buffer<T>> Stealer<T, B> {
/// Attempts to steal items from the head of the queue, returning one of
/// them directly and moving the others to the tail of another queue.
///
/// Up to `N` items are stolen (including the one returned directly), where
/// `N` is specified by a closure which takes as argument the total count of
/// items available for stealing. Upon success, one item is returned and
/// from 0 to `N-1` items are moved to the destination queue, depending on
/// the number of available items and the capacity of the destination queue.
///
/// The returned item is the most recent one among the stolen items.
///
/// # Errors
///
/// An error is returned in the following cases:
/// 1) no item was stolen, either because the queue is empty or `N` is 0,
/// 2) a concurrent stealing operation is ongoing.
///
/// Failure to transfer any item to the destination queue is not considered
/// an error as long as one element could be returned directly. This can
/// occur if the destination queue is full, if the source queue has only one
/// item or if `N` is 1.
pub(crate) fn steal_and_pop<C, BDest>(
&self,
dest: &Worker<T, BDest>,
count_fn: C,
) -> Result<T, StealError>
where
C: FnMut(usize) -> usize,
BDest: Buffer<T>,
{
// Compute the free capacity of the destination queue.
//
// Ordering: see `Worker::push()` method.
let dest_tail = dest.queue.tail.load(Relaxed);
let dest_stealer_head = unpack_heads(dest.queue.heads.load(Acquire)).1;
let dest_free_capacity = BDest::CAPACITY - dest_tail.wrapping_sub(dest_stealer_head);
debug_or_loom_assert!(dest_free_capacity <= BDest::CAPACITY);
let (stealer_head, count) = self.queue.book_items(count_fn, dest_free_capacity + 1)?;
let transfer_count = count - 1;
debug_or_loom_assert!(transfer_count <= dest_free_capacity);
// Move all items but the last to the destination queue.
for offset in 0..transfer_count {
unsafe {
let item = self.queue.read_at(stealer_head.wrapping_add(offset));
dest.queue.write_at(dest_tail.wrapping_add(offset), item);
}
}
// Read the last item.
let last_item = unsafe {
self.queue
.read_at(stealer_head.wrapping_add(transfer_count))
};
// Make the moved items visible by updating the destination tail position.
//
// Ordering: see comments in the `push()` method.
dest.queue
.tail
.store(dest_tail.wrapping_add(transfer_count), Release);
// Signal that the stealing operation has completed.
let mut heads = self.queue.heads.load(Relaxed);
loop {
let (worker_head, sh) = unpack_heads(heads);
debug_or_loom_assert_eq!(stealer_head, sh);
let res = self.queue.heads.compare_exchange_weak(
heads,
pack_heads(worker_head, worker_head),
AcqRel,
Acquire,
);
match res {
Ok(_) => return Ok(last_item),
Err(h) => {
heads = h;
}
}
}
}
}
impl<T, B: Buffer<T>> Clone for Stealer<T, B> {
fn clone(&self) -> Self {
Stealer {
queue: self.queue.clone(),
}
}
}
impl<T, B: Buffer<T>> UnwindSafe for Stealer<T, B> {}
impl<T, B: Buffer<T>> RefUnwindSafe for Stealer<T, B> {}
unsafe impl<T: Send, B: Buffer<T>> Send for Stealer<T, B> {}
unsafe impl<T: Send, B: Buffer<T>> Sync for Stealer<T, B> {}
/// Error returned when stealing is unsuccessful.
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum StealError {
/// No item was stolen.
Empty,
/// Another concurrent stealing operation is ongoing.
Busy,
}
impl fmt::Display for StealError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
StealError::Empty => write!(f, "cannot steal from empty queue"),
StealError::Busy => write!(f, "a concurrent steal operation is ongoing"),
}
}
}
#[inline(always)]
/// Extract the worker head and stealer head (in this order) from packed heads.
fn unpack_heads(heads: u64) -> (u32, u32) {
((heads >> u32::BITS) as u32, heads as u32)
}
#[inline(always)]
/// Insert a new stealer head into packed heads.
fn pack_heads(worker_head: u32, stealer_head: u32) -> u64 {
((worker_head as u64) << u32::BITS) | stealer_head as u64
}