From 669c3630a9c819c4b3d30f7cb4487f7ff2bc79cd Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Fri, 4 Aug 2023 14:23:10 +0200 Subject: [PATCH 1/5] reworked PDEC FDIR --- mission/system/EiveSystem.cpp | 59 +++++++++++++++++------------------ mission/system/EiveSystem.h | 5 +-- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index 41f52cde..8461b528 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -296,39 +296,38 @@ ReturnValue_t EiveSystem::sendFullRebootCommand() { } void EiveSystem::pdecRecoveryLogic() { - if (pdecResetWasAttempted and pdecResetWasAttemptedCd.hasTimedOut()) { - pdecResetWasAttempted = false; + if (pdecResetCounter >= PDEC_RESET_MAX_COUNT_BEFORE_REBOOT) { + // If a PTME reset was already attempted and there is still an issue receiving TC frames, + // reboot the system. + if (waitingForPdecReboot) { + return; + } + triggerEvent(core::PDEC_REBOOT); + // Some delay to ensure that the event is stored in the persistent TM store as well. + TaskFactory::delayTask(500); + // Send reboot command. + ReturnValue_t result = sendSelfRebootCommand(); + if (result != returnvalue::OK) { + sif::error << "Sending a reboot command has failed" << std::endl; + // If the previous operation failed, it should be re-attempted the next task cycle. + pdecResetCounterResetCd.resetTimer(); + return; + } + waitingForPdecReboot = true; + return; + } + if (pdecResetCounterResetCd.hasTimedOut()) { + pdecResetCounter = 0; } if (frameDirtyCheckCd.hasTimedOut()) { if (frameDirtyErrorCounter >= FRAME_DIRTY_COM_REBOOT_LIMIT) { - // If a PTME reset was already attempted and there is still an issue receiving TC frames, - // reboot the system. - if (pdecResetWasAttempted) { - if (waitingForPdecReboot) { - return; - } - triggerEvent(core::PDEC_REBOOT); - // Some delay to ensure that the event is stored in the persistent TM store as well. - TaskFactory::delayTask(500); - // Send reboot command. - ReturnValue_t result = sendSelfRebootCommand(); - if (result != returnvalue::OK) { - sif::error << "Sending a reboot command has failed" << std::endl; - // If the previous operation failed, it should be re-attempted the next task cycle. - pdecResetWasAttemptedCd.resetTimer(); - return; - } - waitingForPdecReboot = true; - return; - } else { - // Try one full PDEC reset. - CommandMessage msg; - store_address_t dummy{}; - ActionMessage::setCommand(&msg, pdec::RESET_PDEC_WITH_REINIITALIZATION, dummy); - commandQueue->sendMessage(pdecHandlerQueueId, &msg); - pdecResetWasAttemptedCd.resetTimer(); - pdecResetWasAttempted = true; - } + // Try one full PDEC reset. + CommandMessage msg; + store_address_t dummy{}; + ActionMessage::setCommand(&msg, pdec::RESET_PDEC_WITH_REINIITALIZATION, dummy); + commandQueue->sendMessage(pdecHandlerQueueId, &msg); + pdecResetCounterResetCd.resetTimer(); + pdecResetCounter++; } frameDirtyErrorCounter = 0; frameDirtyCheckCd.resetTimer(); diff --git a/mission/system/EiveSystem.h b/mission/system/EiveSystem.h index c724ba34..e6ff19b9 100644 --- a/mission/system/EiveSystem.h +++ b/mission/system/EiveSystem.h @@ -10,6 +10,7 @@ class EiveSystem : public Subsystem, public HasActionsIF { public: static constexpr uint8_t FRAME_DIRTY_COM_REBOOT_LIMIT = 4; + static constexpr uint32_t PDEC_RESET_MAX_COUNT_BEFORE_REBOOT = 10; static constexpr ActionId_t EXECUTE_I2C_REBOOT = 10; @@ -39,11 +40,11 @@ class EiveSystem : public Subsystem, public HasActionsIF { Countdown frameDirtyCheckCd = Countdown(10000); // If the PDEC reset was already attempted in the last 2 minutes, there is a high chance that // only a full reboot will fix the issue. - Countdown pdecResetWasAttemptedCd = Countdown(120000); - bool pdecResetWasAttempted = false; + Countdown pdecResetCounterResetCd = Countdown(120000); bool waitingForI2cReboot = false; bool waitingForPdecReboot = false; + uint32_t pdecResetCounter = 0; ActionHelper actionHelper; PowerSwitchIF* powerSwitcher = nullptr; std::atomic_uint16_t& i2cErrors; From e4632b2538d917e64e9add8ee3ae081772ada560 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Fri, 4 Aug 2023 14:26:13 +0200 Subject: [PATCH 2/5] changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18ba4097..5f7b7788 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,13 @@ will consitute of a breaking change warranting a new major release: # [unreleased] +## Changed + +- PDEC FDIR rework: A full PDEC reboot will now only be performed after a regular PDEC reset has + failed 10 times. The mechanism will reset after no PDEC reset has happended for 2 minutes. + The PDEC reset will be performed when counting 4 dirty frame events in 10 seconds just like done + before. + # [v6.3.0] 2023-08-03 ## Fixed From 9f176e19599ae3f3ba016530022e1611cda30f46 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Fri, 4 Aug 2023 14:28:58 +0200 Subject: [PATCH 3/5] doc correction --- mission/system/EiveSystem.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index 8461b528..eb5b3529 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -296,9 +296,8 @@ ReturnValue_t EiveSystem::sendFullRebootCommand() { } void EiveSystem::pdecRecoveryLogic() { + // PDEC reset has happened too often in the last time. Perform reboot to same image. if (pdecResetCounter >= PDEC_RESET_MAX_COUNT_BEFORE_REBOOT) { - // If a PTME reset was already attempted and there is still an issue receiving TC frames, - // reboot the system. if (waitingForPdecReboot) { return; } From 0095397b4f65b99958abae570963ad5c653a5c34 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Mon, 14 Aug 2023 10:30:46 +0200 Subject: [PATCH 4/5] that should do the job --- mission/system/EiveSystem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index eb5b3529..8271b179 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -133,6 +133,7 @@ void EiveSystem::handleEventMessages() { case pdec::INVALID_TC_FRAME: { if (event.getParameter1() == pdec::FRAME_DIRTY_RETVAL) { frameDirtyErrorCounter++; + frameDirtyCheckCd.resetTimer(); } break; } @@ -318,7 +319,7 @@ void EiveSystem::pdecRecoveryLogic() { if (pdecResetCounterResetCd.hasTimedOut()) { pdecResetCounter = 0; } - if (frameDirtyCheckCd.hasTimedOut()) { + if (frameDirtyCheckCd.hasTimedOut() and frameDirtyErrorCounter > 0) { if (frameDirtyErrorCounter >= FRAME_DIRTY_COM_REBOOT_LIMIT) { // Try one full PDEC reset. CommandMessage msg; @@ -329,7 +330,6 @@ void EiveSystem::pdecRecoveryLogic() { pdecResetCounter++; } frameDirtyErrorCounter = 0; - frameDirtyCheckCd.resetTimer(); } } From da25d650d982e4f95b519185c509e055f34da1b4 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Mon, 14 Aug 2023 10:34:06 +0200 Subject: [PATCH 5/5] bugfix --- mission/system/EiveSystem.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index 8271b179..84b4edf7 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -133,7 +133,10 @@ void EiveSystem::handleEventMessages() { case pdec::INVALID_TC_FRAME: { if (event.getParameter1() == pdec::FRAME_DIRTY_RETVAL) { frameDirtyErrorCounter++; - frameDirtyCheckCd.resetTimer(); + // Check whether threshold was reached after 10 seconds. + if(frameDirtyErrorCounter == 1) { + frameDirtyCheckCd.resetTimer(); + } } break; }