From a884176773715887f20fd0d2af1c9c40a419ba6b Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Tue, 1 Aug 2023 09:26:06 +0200 Subject: [PATCH 1/7] improvements for reboot FDIR --- mission/system/EiveSystem.cpp | 24 +++++++++++++++++++++--- mission/system/EiveSystem.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index f17b82cb..b58316d4 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -196,9 +196,11 @@ void EiveSystem::i2cRecoveryLogic() { executeAction(EXECUTE_I2C_REBOOT, MessageQueueIF::NO_QUEUE, nullptr, 0); } else { triggerEvent(core::I2C_REBOOT); + // Some delay to ensure that the event is stored in the persistent TM store as well. + TaskFactory::delayTask(500); // We already tried an I2C recovery but the bus is still broken. - // Send full reboot request to core controller. - sendFullRebootCommand(); + // Send reboot request to core controller. + sendSelfRebootCommand(); return; } } @@ -294,8 +296,10 @@ void EiveSystem::pdecRecoveryLogic() { // reboot the system. if (ptmeResetWasAttempted) { triggerEvent(core::PDEC_REBOOT); + // Some delay to ensure that the event is stored in the persistent TM store as well. + TaskFactory::delayTask(500); // Send reboot command. - sendFullRebootCommand(); + sendSelfRebootCommand(); } else { // Try one full PDEC reset. CommandMessage msg; @@ -329,3 +333,17 @@ ReturnValue_t EiveSystem::handleCommandMessage(CommandMessage* message) { } return Subsystem::handleCommandMessage(message); } + +ReturnValue_t EiveSystem::sendSelfRebootCommand() { + CommandMessage msg; + uint8_t data[1]; + // This option is used to target the same image. + data[0] = true; + store_address_t storeId; + ReturnValue_t result = IPCStore->addData(&storeId, data, sizeof(data)); + if (result != returnvalue::OK) { + return result; + } + ActionMessage::setCommand(&msg, core::XSC_REBOOT_OBC, storeId); + return commandQueue->sendMessage(coreCtrlQueueId, &msg); +} diff --git a/mission/system/EiveSystem.h b/mission/system/EiveSystem.h index 67f11c3e..33b75151 100644 --- a/mission/system/EiveSystem.h +++ b/mission/system/EiveSystem.h @@ -63,6 +63,7 @@ class EiveSystem : public Subsystem, public HasActionsIF { ReturnValue_t handleCommandMessage(CommandMessage* message) override; ReturnValue_t sendFullRebootCommand(); + ReturnValue_t sendSelfRebootCommand(); void pdecRecoveryLogic(); -- 2.43.0 From b27694321fae80ecc868c66b7a81ce69ef39e5ea Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Tue, 1 Aug 2023 09:29:34 +0200 Subject: [PATCH 2/7] update changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61ee5871..e902fb35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,12 @@ will consitute of a breaking change warranting a new major release: - Small SCEX fix: The temperatur check option was not passed on for commands with a user data size larger than 1. +- `EiveSystem`: Add a small delay between triggering an event for FDIR reboots and sending the + command to the core controller. + +## Changed + +- The `EiveSystem` now only sends reboot commands targetting the same image. # [v6.2.0] 2023-07-26 -- 2.43.0 From 8105e5f689308f9804792d1615b44451bc585a67 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Tue, 1 Aug 2023 09:36:08 +0200 Subject: [PATCH 3/7] more robust code --- mission/system/EiveSystem.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index b58316d4..8f0d6783 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -200,7 +200,11 @@ void EiveSystem::i2cRecoveryLogic() { TaskFactory::delayTask(500); // We already tried an I2C recovery but the bus is still broken. // Send reboot request to core controller. - sendSelfRebootCommand(); + ReturnValue_t result = sendSelfRebootCommand(); + if(result != returnvalue::OK) { + sif::error << "Sending a reboot command has failed" << std::endl; + } + // If the previous operation failed, it should be re-attempted the next task cycle. return; } } @@ -299,7 +303,13 @@ void EiveSystem::pdecRecoveryLogic() { // Some delay to ensure that the event is stored in the persistent TM store as well. TaskFactory::delayTask(500); // Send reboot command. - sendSelfRebootCommand(); + ReturnValue_t result = sendSelfRebootCommand(); + if(result != returnvalue::OK) { + sif::error << "Sending a reboot command has failed" << std::endl; + // If the previous operation failed, it should be re-attempted the next task cycle. + ptmeResetWasAttemptedCd.resetTimer(); + } + return; } else { // Try one full PDEC reset. CommandMessage msg; -- 2.43.0 From 988da377b1c44b24211f41a5131309ca70b5a4a8 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Wed, 2 Aug 2023 09:00:45 +0200 Subject: [PATCH 4/7] pause the tasks waiting for a reboot --- mission/system/EiveSystem.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index 8f0d6783..a202b8d6 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -200,11 +200,14 @@ void EiveSystem::i2cRecoveryLogic() { TaskFactory::delayTask(500); // We already tried an I2C recovery but the bus is still broken. // Send reboot request to core controller. - ReturnValue_t result = sendSelfRebootCommand(); - if(result != returnvalue::OK) { + result = sendSelfRebootCommand(); + if (result != returnvalue::OK) { sif::error << "Sending a reboot command has failed" << std::endl; + // If the previous operation failed, it should be re-attempted the next task cycle. + return; } - // If the previous operation failed, it should be re-attempted the next task cycle. + // Wait for reboot to be done. + TaskFactory::delayTask(3000); return; } } @@ -304,11 +307,14 @@ void EiveSystem::pdecRecoveryLogic() { TaskFactory::delayTask(500); // Send reboot command. ReturnValue_t result = sendSelfRebootCommand(); - if(result != returnvalue::OK) { + if (result != returnvalue::OK) { sif::error << "Sending a reboot command has failed" << std::endl; // If the previous operation failed, it should be re-attempted the next task cycle. ptmeResetWasAttemptedCd.resetTimer(); } + // We are done / about to reboot. Delay the rest of the time, graceful reboot takes some + // time. + TaskFactory::delayTask(3000); return; } else { // Try one full PDEC reset. -- 2.43.0 From 4b4dd35b55f6fa8b088944dc0db1066089222aff Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Wed, 2 Aug 2023 09:19:43 +0200 Subject: [PATCH 5/7] this is the cleanest solution --- mission/system/EiveSystem.cpp | 14 +++++++++----- mission/system/EiveSystem.h | 2 ++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index a202b8d6..78791159 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -195,6 +195,9 @@ void EiveSystem::i2cRecoveryLogic() { // Try recovery. executeAction(EXECUTE_I2C_REBOOT, MessageQueueIF::NO_QUEUE, nullptr, 0); } else { + if (waitingForI2cReboot) { + return; + } triggerEvent(core::I2C_REBOOT); // Some delay to ensure that the event is stored in the persistent TM store as well. TaskFactory::delayTask(500); @@ -206,8 +209,7 @@ void EiveSystem::i2cRecoveryLogic() { // If the previous operation failed, it should be re-attempted the next task cycle. return; } - // Wait for reboot to be done. - TaskFactory::delayTask(3000); + waitingForI2cReboot = true; return; } } @@ -302,6 +304,9 @@ void EiveSystem::pdecRecoveryLogic() { // If a PTME reset was already attempted and there is still an issue receiving TC frames, // reboot the system. if (ptmeResetWasAttempted) { + if (waitingForPdecReboot) { + return; + } triggerEvent(core::PDEC_REBOOT); // Some delay to ensure that the event is stored in the persistent TM store as well. TaskFactory::delayTask(500); @@ -312,9 +317,7 @@ void EiveSystem::pdecRecoveryLogic() { // If the previous operation failed, it should be re-attempted the next task cycle. ptmeResetWasAttemptedCd.resetTimer(); } - // We are done / about to reboot. Delay the rest of the time, graceful reboot takes some - // time. - TaskFactory::delayTask(3000); + waitingForPdecReboot = true; return; } else { // Try one full PDEC reset. @@ -324,6 +327,7 @@ void EiveSystem::pdecRecoveryLogic() { commandQueue->sendMessage(pdecHandlerQueueId, &msg); ptmeResetWasAttemptedCd.resetTimer(); ptmeResetWasAttempted = true; + return; } } frameDirtyErrorCounter = 0; diff --git a/mission/system/EiveSystem.h b/mission/system/EiveSystem.h index 33b75151..486b13c5 100644 --- a/mission/system/EiveSystem.h +++ b/mission/system/EiveSystem.h @@ -41,6 +41,8 @@ class EiveSystem : public Subsystem, public HasActionsIF { // only a full reboot will fix the issue. Countdown ptmeResetWasAttemptedCd = Countdown(120000); bool ptmeResetWasAttempted = false; + bool waitingForI2cReboot = false; + bool waitingForPdecReboot = false; ActionHelper actionHelper; PowerSwitchIF* powerSwitcher = nullptr; -- 2.43.0 From 6d18e21edf3a2420633aa90799eb1bf931beab58 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Wed, 2 Aug 2023 09:23:00 +0200 Subject: [PATCH 6/7] removed stray return --- mission/system/EiveSystem.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index 78791159..ed1e76a5 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -327,7 +327,6 @@ void EiveSystem::pdecRecoveryLogic() { commandQueue->sendMessage(pdecHandlerQueueId, &msg); ptmeResetWasAttemptedCd.resetTimer(); ptmeResetWasAttempted = true; - return; } } frameDirtyErrorCounter = 0; -- 2.43.0 From 949ac8942dc954e027ec43abdc647f6ba46db4ed Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Wed, 2 Aug 2023 09:25:01 +0200 Subject: [PATCH 7/7] small corrections --- mission/system/EiveSystem.cpp | 13 +++++++------ mission/system/EiveSystem.h | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mission/system/EiveSystem.cpp b/mission/system/EiveSystem.cpp index ed1e76a5..41f52cde 100644 --- a/mission/system/EiveSystem.cpp +++ b/mission/system/EiveSystem.cpp @@ -296,14 +296,14 @@ ReturnValue_t EiveSystem::sendFullRebootCommand() { } void EiveSystem::pdecRecoveryLogic() { - if (ptmeResetWasAttempted and ptmeResetWasAttemptedCd.hasTimedOut()) { - ptmeResetWasAttempted = false; + if (pdecResetWasAttempted and pdecResetWasAttemptedCd.hasTimedOut()) { + pdecResetWasAttempted = false; } if (frameDirtyCheckCd.hasTimedOut()) { if (frameDirtyErrorCounter >= FRAME_DIRTY_COM_REBOOT_LIMIT) { // If a PTME reset was already attempted and there is still an issue receiving TC frames, // reboot the system. - if (ptmeResetWasAttempted) { + if (pdecResetWasAttempted) { if (waitingForPdecReboot) { return; } @@ -315,7 +315,8 @@ void EiveSystem::pdecRecoveryLogic() { if (result != returnvalue::OK) { sif::error << "Sending a reboot command has failed" << std::endl; // If the previous operation failed, it should be re-attempted the next task cycle. - ptmeResetWasAttemptedCd.resetTimer(); + pdecResetWasAttemptedCd.resetTimer(); + return; } waitingForPdecReboot = true; return; @@ -325,8 +326,8 @@ void EiveSystem::pdecRecoveryLogic() { store_address_t dummy{}; ActionMessage::setCommand(&msg, pdec::RESET_PDEC_WITH_REINIITALIZATION, dummy); commandQueue->sendMessage(pdecHandlerQueueId, &msg); - ptmeResetWasAttemptedCd.resetTimer(); - ptmeResetWasAttempted = true; + pdecResetWasAttemptedCd.resetTimer(); + pdecResetWasAttempted = true; } } frameDirtyErrorCounter = 0; diff --git a/mission/system/EiveSystem.h b/mission/system/EiveSystem.h index 486b13c5..c724ba34 100644 --- a/mission/system/EiveSystem.h +++ b/mission/system/EiveSystem.h @@ -39,8 +39,8 @@ class EiveSystem : public Subsystem, public HasActionsIF { Countdown frameDirtyCheckCd = Countdown(10000); // If the PDEC reset was already attempted in the last 2 minutes, there is a high chance that // only a full reboot will fix the issue. - Countdown ptmeResetWasAttemptedCd = Countdown(120000); - bool ptmeResetWasAttempted = false; + Countdown pdecResetWasAttemptedCd = Countdown(120000); + bool pdecResetWasAttempted = false; bool waitingForI2cReboot = false; bool waitingForPdecReboot = false; -- 2.43.0