Merge pull request 'EIVE system changes for reboot handling' (#763) from pdec-reboot-target-self-image into main
All checks were successful
EIVE/eive-obsw/pipeline/head This commit looks good

Reviewed-on: #763
Reviewed-by: Marius Eggert <eggertm@irs.uni-stuttgart.de>
This commit is contained in:
Robin Müller 2023-08-02 09:27:26 +02:00
commit 0ea0322e45
3 changed files with 57 additions and 10 deletions

View File

@ -20,6 +20,12 @@ will consitute of a breaking change warranting a new major release:
- Small SCEX fix: The temperatur check option was not passed
on for commands with a user data size larger than 1.
- `EiveSystem`: Add a small delay between triggering an event for FDIR reboots and sending the
command to the core controller.
## Changed
- The `EiveSystem` now only sends reboot commands targetting the same image.
# [v6.2.0] 2023-07-26

View File

@ -195,10 +195,21 @@ void EiveSystem::i2cRecoveryLogic() {
// Try recovery.
executeAction(EXECUTE_I2C_REBOOT, MessageQueueIF::NO_QUEUE, nullptr, 0);
} else {
if (waitingForI2cReboot) {
return;
}
triggerEvent(core::I2C_REBOOT);
// Some delay to ensure that the event is stored in the persistent TM store as well.
TaskFactory::delayTask(500);
// We already tried an I2C recovery but the bus is still broken.
// Send full reboot request to core controller.
sendFullRebootCommand();
// Send reboot request to core controller.
result = sendSelfRebootCommand();
if (result != returnvalue::OK) {
sif::error << "Sending a reboot command has failed" << std::endl;
// If the previous operation failed, it should be re-attempted the next task cycle.
return;
}
waitingForI2cReboot = true;
return;
}
}
@ -285,25 +296,38 @@ ReturnValue_t EiveSystem::sendFullRebootCommand() {
}
void EiveSystem::pdecRecoveryLogic() {
if (ptmeResetWasAttempted and ptmeResetWasAttemptedCd.hasTimedOut()) {
ptmeResetWasAttempted = false;
if (pdecResetWasAttempted and pdecResetWasAttemptedCd.hasTimedOut()) {
pdecResetWasAttempted = false;
}
if (frameDirtyCheckCd.hasTimedOut()) {
if (frameDirtyErrorCounter >= FRAME_DIRTY_COM_REBOOT_LIMIT) {
// If a PTME reset was already attempted and there is still an issue receiving TC frames,
// reboot the system.
if (ptmeResetWasAttempted) {
if (pdecResetWasAttempted) {
if (waitingForPdecReboot) {
return;
}
triggerEvent(core::PDEC_REBOOT);
// Some delay to ensure that the event is stored in the persistent TM store as well.
TaskFactory::delayTask(500);
// Send reboot command.
sendFullRebootCommand();
ReturnValue_t result = sendSelfRebootCommand();
if (result != returnvalue::OK) {
sif::error << "Sending a reboot command has failed" << std::endl;
// If the previous operation failed, it should be re-attempted the next task cycle.
pdecResetWasAttemptedCd.resetTimer();
return;
}
waitingForPdecReboot = true;
return;
} else {
// Try one full PDEC reset.
CommandMessage msg;
store_address_t dummy{};
ActionMessage::setCommand(&msg, pdec::RESET_PDEC_WITH_REINIITALIZATION, dummy);
commandQueue->sendMessage(pdecHandlerQueueId, &msg);
ptmeResetWasAttemptedCd.resetTimer();
ptmeResetWasAttempted = true;
pdecResetWasAttemptedCd.resetTimer();
pdecResetWasAttempted = true;
}
}
frameDirtyErrorCounter = 0;
@ -329,3 +353,17 @@ ReturnValue_t EiveSystem::handleCommandMessage(CommandMessage* message) {
}
return Subsystem::handleCommandMessage(message);
}
ReturnValue_t EiveSystem::sendSelfRebootCommand() {
CommandMessage msg;
uint8_t data[1];
// This option is used to target the same image.
data[0] = true;
store_address_t storeId;
ReturnValue_t result = IPCStore->addData(&storeId, data, sizeof(data));
if (result != returnvalue::OK) {
return result;
}
ActionMessage::setCommand(&msg, core::XSC_REBOOT_OBC, storeId);
return commandQueue->sendMessage(coreCtrlQueueId, &msg);
}

View File

@ -39,8 +39,10 @@ class EiveSystem : public Subsystem, public HasActionsIF {
Countdown frameDirtyCheckCd = Countdown(10000);
// If the PDEC reset was already attempted in the last 2 minutes, there is a high chance that
// only a full reboot will fix the issue.
Countdown ptmeResetWasAttemptedCd = Countdown(120000);
bool ptmeResetWasAttempted = false;
Countdown pdecResetWasAttemptedCd = Countdown(120000);
bool pdecResetWasAttempted = false;
bool waitingForI2cReboot = false;
bool waitingForPdecReboot = false;
ActionHelper actionHelper;
PowerSwitchIF* powerSwitcher = nullptr;
@ -63,6 +65,7 @@ class EiveSystem : public Subsystem, public HasActionsIF {
ReturnValue_t handleCommandMessage(CommandMessage* message) override;
ReturnValue_t sendFullRebootCommand();
ReturnValue_t sendSelfRebootCommand();
void pdecRecoveryLogic();