diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f01acdc..e8fd799e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,17 @@ change warranting a new major release: # [unreleased] +## Added + +- PDEC: Added basic FDIR to limit the number of allowed TC interrupts and to allow complete task + lockups in the case an IRQ is immediately re-raised by the PDEC module. This is done by only + allowing a certain number of handled IRQs (whether they yield a valid TC or not) during + time windows of one second. Right now, 800 IRQs/TCs are allowed per time window. + This time window is reset if a TC reception timeout after 500ms occurs. TBD whether the maximum + allowed number will be a configurable parameter. If the number of occured IRQs is exceeded, + an event is triggered and the task is delayed for 400 ms. + PR: https://egit.irs.uni-stuttgart.de/eive/eive-obsw/pulls/393 + # [v1.29.1] ## Fixed @@ -56,6 +67,8 @@ eive-tmtc: v2.13.0 - Patch version which compiles for EM - CFDP Funnel bugfix: CCSDS wrapping was buggy and works properly now. +- PDEC: Some adaptions to prevent task lockups on invalid FAR states. + PR: https://egit.irs.uni-stuttgart.de/eive/eive-obsw/pulls/393 - CMakeLists.txt fix which broke CI/CD builds when server could not retrieve full git SHA. - Possible regression in the MAX31865 polling task: Using a `ManualCsLockGuard` for reconfiguring and then polling the sensor is problematic, invalid sensor values will be read. diff --git a/linux/ipcore/PdecHandler.cpp b/linux/ipcore/PdecHandler.cpp index 3244280e..5ea85516 100644 --- a/linux/ipcore/PdecHandler.cpp +++ b/linux/ipcore/PdecHandler.cpp @@ -1,6 +1,7 @@ #include "PdecHandler.h" #include +#include #include #include #include @@ -113,7 +114,7 @@ ReturnValue_t PdecHandler::polledOperation() { // Requires reconfiguration and reinitialization of PDEC triggerEvent(INVALID_FAR); state = State::WAIT_FOR_RECOVERY; - return result; + break; } state = State::RUNNING; break; @@ -145,8 +146,9 @@ ReturnValue_t PdecHandler::irqOperation() { // Used to unmask IRQ uint32_t info = 1; - ssize_t nb = 0; - int ret = 0; + + interruptWindowCd.resetTimer(); + // Clear interrupts with dummy read before unmasking the interrupt. Use a volatile to prevent // read being optimized away. volatile uint32_t dummy = *(registerBaseAddress + PDEC_PIR_OFFSET); @@ -157,7 +159,7 @@ ReturnValue_t PdecHandler::irqOperation() { readCommandQueue(); switch (state) { case State::INIT: - resetFarStatFlag(); + result = resetFarStatFlag(); if (result != returnvalue::OK) { // Requires reconfiguration and reinitialization of PDEC triggerEvent(INVALID_FAR); @@ -167,57 +169,16 @@ ReturnValue_t PdecHandler::irqOperation() { state = State::RUNNING; break; case State::RUNNING: { - nb = write(fd, &info, sizeof(info)); - if (nb != static_cast(sizeof(info))) { - sif::error << "PdecHandler::irqOperation: Unmasking IRQ failed" << std::endl; - triggerEvent(WRITE_SYSCALL_ERROR_PDEC, errno); - close(fd); - state = State::INIT; - return returnvalue::FAILED; - } - struct pollfd fds = {.fd = fd, .events = POLLIN, .revents = 0}; - ret = poll(&fds, 1, IRQ_TIMEOUT_MS); - if (ret == 0) { - // No TCs for timeout period - checkLocks(); - lockCheckCd.resetTimer(); - } else if (ret >= 1) { - nb = read(fd, &info, sizeof(info)); - if (nb == static_cast(sizeof(info))) { - uint32_t pisr = *(registerBaseAddress + PDEC_PISR_OFFSET); - if ((pisr & TC_NEW_MASK) == TC_NEW_MASK) { - // handle TC - handleNewTc(); - } - if ((pisr & TC_ABORT_MASK) == TC_ABORT_MASK) { - tcAbortCounter += 1; - } - if ((pisr & NEW_FAR_MASK) == NEW_FAR_MASK) { - // Read FAR here - CURRENT_FAR = readFar(); - checkFrameAna(CURRENT_FAR); - } - if (lockCheckCd.hasTimedOut()) { - checkLocks(); - lockCheckCd.resetTimer(); - } - // Clear interrupts with dummy read - dummy = *(registerBaseAddress + PDEC_PIR_OFFSET); - } - } else { - sif::error << "PdecHandler::irqOperation: Poll error with errno " << errno << ": " - << strerror(errno) << std::endl; - triggerEvent(POLL_SYSCALL_ERROR_PDEC, errno); - close(fd); - state = State::INIT; - return returnvalue::FAILED; - } + checkAndHandleIrqs(fd, info); break; } case State::WAIT_FOR_RECOVERY: + TaskFactory::delayTask(400); break; default: + // Should never happen. sif::error << "PdecHandler::performOperation: Invalid state" << std::endl; + TaskFactory::delayTask(400); break; } } @@ -226,6 +187,71 @@ ReturnValue_t PdecHandler::irqOperation() { return returnvalue::OK; } +ReturnValue_t PdecHandler::checkAndHandleIrqs(int fd, uint32_t& info) { + ssize_t nb = write(fd, &info, sizeof(info)); + if (nb != static_cast(sizeof(info))) { + sif::error << "PdecHandler::irqOperation: Unmasking IRQ failed" << std::endl; + triggerEvent(WRITE_SYSCALL_ERROR_PDEC, errno); + close(fd); + state = State::INIT; + return returnvalue::FAILED; + } + struct pollfd fds = {.fd = fd, .events = POLLIN, .revents = 0}; + int ret = poll(&fds, 1, IRQ_TIMEOUT_MS); + if (ret == 0) { + // No TCs for timeout period + checkLocks(); + genericCheckCd.resetTimer(); + resetIrqLimiters(); + } else if (ret >= 1) { + // Interrupt handling. + nb = read(fd, &info, sizeof(info)); + interruptCounter++; + if (nb == static_cast(sizeof(info))) { + uint32_t pisr = *(registerBaseAddress + PDEC_PISR_OFFSET); + if ((pisr & TC_NEW_MASK) == TC_NEW_MASK) { + // handle TC + handleNewTc(); + } + if ((pisr & TC_ABORT_MASK) == TC_ABORT_MASK) { + tcAbortCounter += 1; + } + if ((pisr & NEW_FAR_MASK) == NEW_FAR_MASK) { + // Read FAR here + CURRENT_FAR = readFar(); + checkFrameAna(CURRENT_FAR); + } + // Clear interrupts with dummy read. Volatile is important here to prevent + // compiler opitmizations in release builds! + volatile uint32_t dummy = *(registerBaseAddress + PDEC_PIR_OFFSET); + static_cast(dummy); + + if (genericCheckCd.hasTimedOut()) { + checkLocks(); + genericCheckCd.resetTimer(); + if (interruptWindowCd.hasTimedOut()) { + if (interruptCounter >= MAX_ALLOWED_IRQS_PER_WINDOW) { + sif::error << "PdecHandler::irqOperation: Possible IRQ storm" << std::endl; + triggerEvent(TOO_MANY_IRQS, MAX_ALLOWED_IRQS_PER_WINDOW); + resetIrqLimiters(); + TaskFactory::delayTask(400); + return returnvalue::FAILED; + } + resetIrqLimiters(); + } + } + } + } else { + sif::error << "PdecHandler::irqOperation: Poll error with errno " << errno << ": " + << strerror(errno) << std::endl; + triggerEvent(POLL_SYSCALL_ERROR_PDEC, errno); + close(fd); + state = State::INIT; + return returnvalue::FAILED; + } + return returnvalue::OK; +} + void PdecHandler::readCommandQueue(void) { CommandMessage commandMessage; ReturnValue_t result = returnvalue::FAILED; @@ -618,6 +644,11 @@ void PdecHandler::printPdecMon() { uint32_t PdecHandler::readFar() { return *(registerBaseAddress + PDEC_FAR_OFFSET); } +void PdecHandler::resetIrqLimiters() { + interruptWindowCd.resetTimer(); + interruptCounter = 0; +} + std::string PdecHandler::getMonStatusString(uint32_t status) { switch (status) { case TC_CHANNEL_INACTIVE: diff --git a/linux/ipcore/PdecHandler.h b/linux/ipcore/PdecHandler.h index b514f501..51cf750e 100644 --- a/linux/ipcore/PdecHandler.h +++ b/linux/ipcore/PdecHandler.h @@ -87,6 +87,8 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc static const Event LOST_CARRIER_LOCK_PDEC = MAKE_EVENT(5, severity::INFO); //! [EXPORT] : [COMMENT] Lost bit lock static const Event LOST_BIT_LOCK_PDEC = MAKE_EVENT(6, severity::INFO); + //! [EXPORT] : [COMMENT] Too many IRQs over the time window of one second. P1: Allowed TCs + static constexpr Event TOO_MANY_IRQS = MAKE_EVENT(7, severity::MEDIUM); static constexpr Event POLL_SYSCALL_ERROR_PDEC = event::makeEvent(SUBSYSTEM_ID, 7, severity::MEDIUM); static constexpr Event WRITE_SYSCALL_ERROR_PDEC = @@ -180,6 +182,8 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc // discarded static const uint8_t MAP_CLK_FREQ = 2; + static constexpr uint32_t MAX_ALLOWED_IRQS_PER_WINDOW = 800; + enum class FrameAna_t : uint8_t { ABANDONED_CLTU, FRAME_DIRTY, @@ -206,13 +210,16 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc static uint32_t CURRENT_FAR; - Countdown lockCheckCd = Countdown(IRQ_TIMEOUT_MS); + Countdown genericCheckCd = Countdown(IRQ_TIMEOUT_MS); object_id_t tcDestinationId; AcceptsTelecommandsIF* tcDestination = nullptr; LinuxLibgpioIF* gpioComIF = nullptr; + uint32_t interruptCounter = 0; + Countdown interruptWindowCd = Countdown(1000); + /** * Reset signal is required to hold PDEC in reset state until the configuration has been * written to the appropriate memory space. @@ -259,6 +266,7 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc ReturnValue_t polledOperation(); ReturnValue_t irqOperation(); + ReturnValue_t checkAndHandleIrqs(int fd, uint32_t& info); uint32_t readFar(); @@ -294,6 +302,8 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc */ void checkLocks(); + void resetIrqLimiters(); + /** * @brief Analyzes the FramAna field (frame analysis data) of a FAR report. *