diff --git a/CHANGELOG.md b/CHANGELOG.md index 69041a24..f89175d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/). The [milestone](https://egit.irs.uni-stuttgart.de/eive/eive-obsw/milestones) list yields a list of all related PRs for each release. -Starting at v2.0.0, the following changes will consitute of a breaking -change warranting a new major release: +Starting at v2.0.0, this project will adhere to semantic versioning and the the following changes +will consitute of a breaking change warranting a new major release: - The TMTC interface changes in any shape of form. -- The behavour of the OBSW changes in a major shape or form relevant - for operations +- The behavour of the OBSW changes in a major shape or form relevant for operations # [unreleased] @@ -23,6 +22,23 @@ change warranting a new major release: telemetry. PR: https://egit.irs.uni-stuttgart.de/eive/eive-obsw/pulls/320/files +# [v1.30.0] + +eive-tmtc: v2.14.0 + +Event IDs for PDEC handler have changed in a breaking manner. + +## Added and Fixed + +- PDEC: Added basic FDIR to limit the number of allowed TC interrupts and to allow complete task + lockups in the case an IRQ is immediately re-raised by the PDEC module. This is done by only + allowing a certain number of handled IRQs (whether they yield a valid TC or not) during + time windows of one second. Right now, 800 IRQs/TCs are allowed per time window. + This time window is reset if a TC reception timeout after 500ms occurs. TBD whether the maximum + allowed number will be a configurable parameter. If the number of occured IRQs is exceeded, + an event is triggered and the task is delayed for 400 ms. + PR: https://egit.irs.uni-stuttgart.de/eive/eive-obsw/pulls/393 + # [v1.29.1] ## Fixed @@ -62,6 +78,8 @@ eive-tmtc: v2.13.0 - Patch version which compiles for EM - CFDP Funnel bugfix: CCSDS wrapping was buggy and works properly now. +- PDEC: Some adaptions to prevent task lockups on invalid FAR states. + PR: https://egit.irs.uni-stuttgart.de/eive/eive-obsw/pulls/393 - CMakeLists.txt fix which broke CI/CD builds when server could not retrieve full git SHA. - Possible regression in the MAX31865 polling task: Using a `ManualCsLockGuard` for reconfiguring and then polling the sensor is problematic, invalid sensor values will be read. diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e1aa4eb..a41e2a8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,8 +10,8 @@ cmake_minimum_required(VERSION 3.13) set(OBSW_VERSION_MAJOR 1) -set(OBSW_VERSION_MINOR 29) -set(OBSW_VERSION_REVISION 1) +set(OBSW_VERSION_MINOR 30) +set(OBSW_VERSION_REVISION 0) # set(CMAKE_VERBOSE TRUE) diff --git a/bsp_hosted/fsfwconfig/events/translateEvents.cpp b/bsp_hosted/fsfwconfig/events/translateEvents.cpp index 32dc1112..79228e3f 100644 --- a/bsp_hosted/fsfwconfig/events/translateEvents.cpp +++ b/bsp_hosted/fsfwconfig/events/translateEvents.cpp @@ -1,7 +1,7 @@ /** - * @brief Auto-generated event translation file. Contains 258 translations. + * @brief Auto-generated event translation file. Contains 259 translations. * @details - * Generated on: 2023-02-21 11:14:30 + * Generated on: 2023-02-22 15:14:40 */ #include "translateEvents.h" @@ -151,6 +151,7 @@ const char *CARRIER_LOCK_STRING = "CARRIER_LOCK"; const char *BIT_LOCK_PDEC_STRING = "BIT_LOCK_PDEC"; const char *LOST_CARRIER_LOCK_PDEC_STRING = "LOST_CARRIER_LOCK_PDEC"; const char *LOST_BIT_LOCK_PDEC_STRING = "LOST_BIT_LOCK_PDEC"; +const char *TOO_MANY_IRQS_STRING = "TOO_MANY_IRQS"; const char *POLL_SYSCALL_ERROR_PDEC_STRING = "POLL_SYSCALL_ERROR_PDEC"; const char *WRITE_SYSCALL_ERROR_PDEC_STRING = "WRITE_SYSCALL_ERROR_PDEC"; const char *IMAGE_UPLOAD_FAILED_STRING = "IMAGE_UPLOAD_FAILED"; @@ -554,8 +555,10 @@ const char *translateEvents(Event event) { case (12406): return LOST_BIT_LOCK_PDEC_STRING; case (12407): - return POLL_SYSCALL_ERROR_PDEC_STRING; + return TOO_MANY_IRQS_STRING; case (12408): + return POLL_SYSCALL_ERROR_PDEC_STRING; + case (12409): return WRITE_SYSCALL_ERROR_PDEC_STRING; case (12500): return IMAGE_UPLOAD_FAILED_STRING; diff --git a/bsp_hosted/fsfwconfig/objects/translateObjects.cpp b/bsp_hosted/fsfwconfig/objects/translateObjects.cpp index be535c8c..2717b4e1 100644 --- a/bsp_hosted/fsfwconfig/objects/translateObjects.cpp +++ b/bsp_hosted/fsfwconfig/objects/translateObjects.cpp @@ -2,7 +2,7 @@ * @brief Auto-generated object translation file. * @details * Contains 154 translations. - * Generated on: 2023-02-21 11:14:30 + * Generated on: 2023-02-22 15:14:40 */ #include "translateObjects.h" diff --git a/generators/bsp_hosted_events.csv b/generators/bsp_hosted_events.csv index b8bc4677..5c2b7e99 100644 --- a/generators/bsp_hosted_events.csv +++ b/generators/bsp_hosted_events.csv @@ -145,8 +145,9 @@ Event ID (dec); Event ID (hex); Name; Severity; Description; File Path 12404;0x3074;BIT_LOCK_PDEC;INFO;Bit lock detected (data valid);linux/ipcore/PdecHandler.h 12405;0x3075;LOST_CARRIER_LOCK_PDEC;INFO;Lost carrier lock;linux/ipcore/PdecHandler.h 12406;0x3076;LOST_BIT_LOCK_PDEC;INFO;Lost bit lock;linux/ipcore/PdecHandler.h -12407;0x3077;POLL_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h -12408;0x3078;WRITE_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h +12407;0x3077;TOO_MANY_IRQS;MEDIUM;Too many IRQs over the time window of one second. P1: Allowed TCs;linux/ipcore/PdecHandler.h +12408;0x3078;POLL_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h +12409;0x3079;WRITE_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h 12500;0x30d4;IMAGE_UPLOAD_FAILED;LOW;Image upload failed;linux/devices/startracker/StrHelper.h 12501;0x30d5;IMAGE_DOWNLOAD_FAILED;LOW;Image download failed;linux/devices/startracker/StrHelper.h 12502;0x30d6;IMAGE_UPLOAD_SUCCESSFUL;LOW;Uploading image to star tracker was successfulop;linux/devices/startracker/StrHelper.h diff --git a/generators/bsp_q7s_events.csv b/generators/bsp_q7s_events.csv index b8bc4677..5c2b7e99 100644 --- a/generators/bsp_q7s_events.csv +++ b/generators/bsp_q7s_events.csv @@ -145,8 +145,9 @@ Event ID (dec); Event ID (hex); Name; Severity; Description; File Path 12404;0x3074;BIT_LOCK_PDEC;INFO;Bit lock detected (data valid);linux/ipcore/PdecHandler.h 12405;0x3075;LOST_CARRIER_LOCK_PDEC;INFO;Lost carrier lock;linux/ipcore/PdecHandler.h 12406;0x3076;LOST_BIT_LOCK_PDEC;INFO;Lost bit lock;linux/ipcore/PdecHandler.h -12407;0x3077;POLL_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h -12408;0x3078;WRITE_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h +12407;0x3077;TOO_MANY_IRQS;MEDIUM;Too many IRQs over the time window of one second. P1: Allowed TCs;linux/ipcore/PdecHandler.h +12408;0x3078;POLL_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h +12409;0x3079;WRITE_SYSCALL_ERROR_PDEC;MEDIUM;No description;linux/ipcore/PdecHandler.h 12500;0x30d4;IMAGE_UPLOAD_FAILED;LOW;Image upload failed;linux/devices/startracker/StrHelper.h 12501;0x30d5;IMAGE_DOWNLOAD_FAILED;LOW;Image download failed;linux/devices/startracker/StrHelper.h 12502;0x30d6;IMAGE_UPLOAD_SUCCESSFUL;LOW;Uploading image to star tracker was successfulop;linux/devices/startracker/StrHelper.h diff --git a/generators/events/translateEvents.cpp b/generators/events/translateEvents.cpp index 32dc1112..79228e3f 100644 --- a/generators/events/translateEvents.cpp +++ b/generators/events/translateEvents.cpp @@ -1,7 +1,7 @@ /** - * @brief Auto-generated event translation file. Contains 258 translations. + * @brief Auto-generated event translation file. Contains 259 translations. * @details - * Generated on: 2023-02-21 11:14:30 + * Generated on: 2023-02-22 15:14:40 */ #include "translateEvents.h" @@ -151,6 +151,7 @@ const char *CARRIER_LOCK_STRING = "CARRIER_LOCK"; const char *BIT_LOCK_PDEC_STRING = "BIT_LOCK_PDEC"; const char *LOST_CARRIER_LOCK_PDEC_STRING = "LOST_CARRIER_LOCK_PDEC"; const char *LOST_BIT_LOCK_PDEC_STRING = "LOST_BIT_LOCK_PDEC"; +const char *TOO_MANY_IRQS_STRING = "TOO_MANY_IRQS"; const char *POLL_SYSCALL_ERROR_PDEC_STRING = "POLL_SYSCALL_ERROR_PDEC"; const char *WRITE_SYSCALL_ERROR_PDEC_STRING = "WRITE_SYSCALL_ERROR_PDEC"; const char *IMAGE_UPLOAD_FAILED_STRING = "IMAGE_UPLOAD_FAILED"; @@ -554,8 +555,10 @@ const char *translateEvents(Event event) { case (12406): return LOST_BIT_LOCK_PDEC_STRING; case (12407): - return POLL_SYSCALL_ERROR_PDEC_STRING; + return TOO_MANY_IRQS_STRING; case (12408): + return POLL_SYSCALL_ERROR_PDEC_STRING; + case (12409): return WRITE_SYSCALL_ERROR_PDEC_STRING; case (12500): return IMAGE_UPLOAD_FAILED_STRING; diff --git a/generators/objects/translateObjects.cpp b/generators/objects/translateObjects.cpp index f4c54617..a273b886 100644 --- a/generators/objects/translateObjects.cpp +++ b/generators/objects/translateObjects.cpp @@ -2,7 +2,7 @@ * @brief Auto-generated object translation file. * @details * Contains 159 translations. - * Generated on: 2023-02-21 11:14:30 + * Generated on: 2023-02-22 15:14:40 */ #include "translateObjects.h" diff --git a/linux/fsfwconfig/events/translateEvents.cpp b/linux/fsfwconfig/events/translateEvents.cpp index 32dc1112..79228e3f 100644 --- a/linux/fsfwconfig/events/translateEvents.cpp +++ b/linux/fsfwconfig/events/translateEvents.cpp @@ -1,7 +1,7 @@ /** - * @brief Auto-generated event translation file. Contains 258 translations. + * @brief Auto-generated event translation file. Contains 259 translations. * @details - * Generated on: 2023-02-21 11:14:30 + * Generated on: 2023-02-22 15:14:40 */ #include "translateEvents.h" @@ -151,6 +151,7 @@ const char *CARRIER_LOCK_STRING = "CARRIER_LOCK"; const char *BIT_LOCK_PDEC_STRING = "BIT_LOCK_PDEC"; const char *LOST_CARRIER_LOCK_PDEC_STRING = "LOST_CARRIER_LOCK_PDEC"; const char *LOST_BIT_LOCK_PDEC_STRING = "LOST_BIT_LOCK_PDEC"; +const char *TOO_MANY_IRQS_STRING = "TOO_MANY_IRQS"; const char *POLL_SYSCALL_ERROR_PDEC_STRING = "POLL_SYSCALL_ERROR_PDEC"; const char *WRITE_SYSCALL_ERROR_PDEC_STRING = "WRITE_SYSCALL_ERROR_PDEC"; const char *IMAGE_UPLOAD_FAILED_STRING = "IMAGE_UPLOAD_FAILED"; @@ -554,8 +555,10 @@ const char *translateEvents(Event event) { case (12406): return LOST_BIT_LOCK_PDEC_STRING; case (12407): - return POLL_SYSCALL_ERROR_PDEC_STRING; + return TOO_MANY_IRQS_STRING; case (12408): + return POLL_SYSCALL_ERROR_PDEC_STRING; + case (12409): return WRITE_SYSCALL_ERROR_PDEC_STRING; case (12500): return IMAGE_UPLOAD_FAILED_STRING; diff --git a/linux/fsfwconfig/objects/translateObjects.cpp b/linux/fsfwconfig/objects/translateObjects.cpp index f4c54617..a273b886 100644 --- a/linux/fsfwconfig/objects/translateObjects.cpp +++ b/linux/fsfwconfig/objects/translateObjects.cpp @@ -2,7 +2,7 @@ * @brief Auto-generated object translation file. * @details * Contains 159 translations. - * Generated on: 2023-02-21 11:14:30 + * Generated on: 2023-02-22 15:14:40 */ #include "translateObjects.h" diff --git a/linux/ipcore/PdecHandler.cpp b/linux/ipcore/PdecHandler.cpp index 3244280e..5ea85516 100644 --- a/linux/ipcore/PdecHandler.cpp +++ b/linux/ipcore/PdecHandler.cpp @@ -1,6 +1,7 @@ #include "PdecHandler.h" #include +#include #include #include #include @@ -113,7 +114,7 @@ ReturnValue_t PdecHandler::polledOperation() { // Requires reconfiguration and reinitialization of PDEC triggerEvent(INVALID_FAR); state = State::WAIT_FOR_RECOVERY; - return result; + break; } state = State::RUNNING; break; @@ -145,8 +146,9 @@ ReturnValue_t PdecHandler::irqOperation() { // Used to unmask IRQ uint32_t info = 1; - ssize_t nb = 0; - int ret = 0; + + interruptWindowCd.resetTimer(); + // Clear interrupts with dummy read before unmasking the interrupt. Use a volatile to prevent // read being optimized away. volatile uint32_t dummy = *(registerBaseAddress + PDEC_PIR_OFFSET); @@ -157,7 +159,7 @@ ReturnValue_t PdecHandler::irqOperation() { readCommandQueue(); switch (state) { case State::INIT: - resetFarStatFlag(); + result = resetFarStatFlag(); if (result != returnvalue::OK) { // Requires reconfiguration and reinitialization of PDEC triggerEvent(INVALID_FAR); @@ -167,57 +169,16 @@ ReturnValue_t PdecHandler::irqOperation() { state = State::RUNNING; break; case State::RUNNING: { - nb = write(fd, &info, sizeof(info)); - if (nb != static_cast(sizeof(info))) { - sif::error << "PdecHandler::irqOperation: Unmasking IRQ failed" << std::endl; - triggerEvent(WRITE_SYSCALL_ERROR_PDEC, errno); - close(fd); - state = State::INIT; - return returnvalue::FAILED; - } - struct pollfd fds = {.fd = fd, .events = POLLIN, .revents = 0}; - ret = poll(&fds, 1, IRQ_TIMEOUT_MS); - if (ret == 0) { - // No TCs for timeout period - checkLocks(); - lockCheckCd.resetTimer(); - } else if (ret >= 1) { - nb = read(fd, &info, sizeof(info)); - if (nb == static_cast(sizeof(info))) { - uint32_t pisr = *(registerBaseAddress + PDEC_PISR_OFFSET); - if ((pisr & TC_NEW_MASK) == TC_NEW_MASK) { - // handle TC - handleNewTc(); - } - if ((pisr & TC_ABORT_MASK) == TC_ABORT_MASK) { - tcAbortCounter += 1; - } - if ((pisr & NEW_FAR_MASK) == NEW_FAR_MASK) { - // Read FAR here - CURRENT_FAR = readFar(); - checkFrameAna(CURRENT_FAR); - } - if (lockCheckCd.hasTimedOut()) { - checkLocks(); - lockCheckCd.resetTimer(); - } - // Clear interrupts with dummy read - dummy = *(registerBaseAddress + PDEC_PIR_OFFSET); - } - } else { - sif::error << "PdecHandler::irqOperation: Poll error with errno " << errno << ": " - << strerror(errno) << std::endl; - triggerEvent(POLL_SYSCALL_ERROR_PDEC, errno); - close(fd); - state = State::INIT; - return returnvalue::FAILED; - } + checkAndHandleIrqs(fd, info); break; } case State::WAIT_FOR_RECOVERY: + TaskFactory::delayTask(400); break; default: + // Should never happen. sif::error << "PdecHandler::performOperation: Invalid state" << std::endl; + TaskFactory::delayTask(400); break; } } @@ -226,6 +187,71 @@ ReturnValue_t PdecHandler::irqOperation() { return returnvalue::OK; } +ReturnValue_t PdecHandler::checkAndHandleIrqs(int fd, uint32_t& info) { + ssize_t nb = write(fd, &info, sizeof(info)); + if (nb != static_cast(sizeof(info))) { + sif::error << "PdecHandler::irqOperation: Unmasking IRQ failed" << std::endl; + triggerEvent(WRITE_SYSCALL_ERROR_PDEC, errno); + close(fd); + state = State::INIT; + return returnvalue::FAILED; + } + struct pollfd fds = {.fd = fd, .events = POLLIN, .revents = 0}; + int ret = poll(&fds, 1, IRQ_TIMEOUT_MS); + if (ret == 0) { + // No TCs for timeout period + checkLocks(); + genericCheckCd.resetTimer(); + resetIrqLimiters(); + } else if (ret >= 1) { + // Interrupt handling. + nb = read(fd, &info, sizeof(info)); + interruptCounter++; + if (nb == static_cast(sizeof(info))) { + uint32_t pisr = *(registerBaseAddress + PDEC_PISR_OFFSET); + if ((pisr & TC_NEW_MASK) == TC_NEW_MASK) { + // handle TC + handleNewTc(); + } + if ((pisr & TC_ABORT_MASK) == TC_ABORT_MASK) { + tcAbortCounter += 1; + } + if ((pisr & NEW_FAR_MASK) == NEW_FAR_MASK) { + // Read FAR here + CURRENT_FAR = readFar(); + checkFrameAna(CURRENT_FAR); + } + // Clear interrupts with dummy read. Volatile is important here to prevent + // compiler opitmizations in release builds! + volatile uint32_t dummy = *(registerBaseAddress + PDEC_PIR_OFFSET); + static_cast(dummy); + + if (genericCheckCd.hasTimedOut()) { + checkLocks(); + genericCheckCd.resetTimer(); + if (interruptWindowCd.hasTimedOut()) { + if (interruptCounter >= MAX_ALLOWED_IRQS_PER_WINDOW) { + sif::error << "PdecHandler::irqOperation: Possible IRQ storm" << std::endl; + triggerEvent(TOO_MANY_IRQS, MAX_ALLOWED_IRQS_PER_WINDOW); + resetIrqLimiters(); + TaskFactory::delayTask(400); + return returnvalue::FAILED; + } + resetIrqLimiters(); + } + } + } + } else { + sif::error << "PdecHandler::irqOperation: Poll error with errno " << errno << ": " + << strerror(errno) << std::endl; + triggerEvent(POLL_SYSCALL_ERROR_PDEC, errno); + close(fd); + state = State::INIT; + return returnvalue::FAILED; + } + return returnvalue::OK; +} + void PdecHandler::readCommandQueue(void) { CommandMessage commandMessage; ReturnValue_t result = returnvalue::FAILED; @@ -618,6 +644,11 @@ void PdecHandler::printPdecMon() { uint32_t PdecHandler::readFar() { return *(registerBaseAddress + PDEC_FAR_OFFSET); } +void PdecHandler::resetIrqLimiters() { + interruptWindowCd.resetTimer(); + interruptCounter = 0; +} + std::string PdecHandler::getMonStatusString(uint32_t status) { switch (status) { case TC_CHANNEL_INACTIVE: diff --git a/linux/ipcore/PdecHandler.h b/linux/ipcore/PdecHandler.h index b514f501..09093b88 100644 --- a/linux/ipcore/PdecHandler.h +++ b/linux/ipcore/PdecHandler.h @@ -87,10 +87,12 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc static const Event LOST_CARRIER_LOCK_PDEC = MAKE_EVENT(5, severity::INFO); //! [EXPORT] : [COMMENT] Lost bit lock static const Event LOST_BIT_LOCK_PDEC = MAKE_EVENT(6, severity::INFO); + //! [EXPORT] : [COMMENT] Too many IRQs over the time window of one second. P1: Allowed TCs + static constexpr Event TOO_MANY_IRQS = MAKE_EVENT(7, severity::MEDIUM); static constexpr Event POLL_SYSCALL_ERROR_PDEC = - event::makeEvent(SUBSYSTEM_ID, 7, severity::MEDIUM); - static constexpr Event WRITE_SYSCALL_ERROR_PDEC = event::makeEvent(SUBSYSTEM_ID, 8, severity::MEDIUM); + static constexpr Event WRITE_SYSCALL_ERROR_PDEC = + event::makeEvent(SUBSYSTEM_ID, 9, severity::MEDIUM); private: static const uint8_t INTERFACE_ID = CLASS_ID::PDEC_HANDLER; @@ -180,6 +182,8 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc // discarded static const uint8_t MAP_CLK_FREQ = 2; + static constexpr uint32_t MAX_ALLOWED_IRQS_PER_WINDOW = 800; + enum class FrameAna_t : uint8_t { ABANDONED_CLTU, FRAME_DIRTY, @@ -206,13 +210,16 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc static uint32_t CURRENT_FAR; - Countdown lockCheckCd = Countdown(IRQ_TIMEOUT_MS); + Countdown genericCheckCd = Countdown(IRQ_TIMEOUT_MS); object_id_t tcDestinationId; AcceptsTelecommandsIF* tcDestination = nullptr; LinuxLibgpioIF* gpioComIF = nullptr; + uint32_t interruptCounter = 0; + Countdown interruptWindowCd = Countdown(1000); + /** * Reset signal is required to hold PDEC in reset state until the configuration has been * written to the appropriate memory space. @@ -259,6 +266,7 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc ReturnValue_t polledOperation(); ReturnValue_t irqOperation(); + ReturnValue_t checkAndHandleIrqs(int fd, uint32_t& info); uint32_t readFar(); @@ -294,6 +302,8 @@ class PdecHandler : public SystemObject, public ExecutableObjectIF, public HasAc */ void checkLocks(); + void resetIrqLimiters(); + /** * @brief Analyzes the FramAna field (frame analysis data) of a FAR report. * diff --git a/tmtc b/tmtc index fb676dc9..1dd667ae 160000 --- a/tmtc +++ b/tmtc @@ -1 +1 @@ -Subproject commit fb676dc90fe77585959297c36c828a7e852637c8 +Subproject commit 1dd667ae254ef588b9a41a169d88e5ee85ebeeb7