Merge pull request 'SD shutdown handling non-blocking' (#753) from sd-shutdown-non-blocking into main
All checks were successful
EIVE/eive-obsw/pipeline/head This commit looks good

Reviewed-on: #753
Reviewed-by: Marius Eggert <eggertm@irs.uni-stuttgart.de>
This commit is contained in:
Robin Müller 2023-07-26 11:09:47 +02:00
commit 8cd959f55a
5 changed files with 102 additions and 47 deletions

View File

@ -24,6 +24,8 @@ will consitute of a breaking change warranting a new major release:
- Re-ordered some functions of the core controller in the initialize function. - Re-ordered some functions of the core controller in the initialize function.
- Rad sensor is now only polled every 30 minutes instead of every device cycle to reduce wear of - Rad sensor is now only polled every 30 minutes instead of every device cycle to reduce wear of
the RADFET electronics. the RADFET electronics.
- The SD cards will still be switched OFF on a reboot, but this is done in a non-blocking manner
now with a timeout of 10 seconds where the reboot will be performed in any case.
- ACS Controller now includes the safe mode from FLP, which will calculate its rotational rate - ACS Controller now includes the safe mode from FLP, which will calculate its rotational rate
from SUS and MGM measurements. To accommodate these changes, low-pass filters for SUS from SUS and MGM measurements. To accommodate these changes, low-pass filters for SUS
measurements and rates as well as MGM measurements and rates are included. Usage of the new measurements and rates as well as MGM measurements and rates are included. Usage of the new
@ -45,6 +47,9 @@ will consitute of a breaking change warranting a new major release:
## Fixed ## Fixed
- General bugs in the SD card state machine. This might fix some other known bugs for certain
combinations of switching ON and OFF SD cards and also makes the whole state machine a lot more
robust against hanging up.
- SUS dummy handler went to `MODE_NORMAL` for ON commands. - SUS dummy handler went to `MODE_NORMAL` for ON commands.
- PL PCDU dummy went to `MODE_NORMAL` for ON commands. - PL PCDU dummy went to `MODE_NORMAL` for ON commands.

View File

@ -554,21 +554,24 @@ ReturnValue_t CoreController::sdStateMachine() {
} }
// This lambda checks the non-blocking operation of the SD card manager and assigns the new // This lambda checks the non-blocking operation of the SD card manager and assigns the new
// state on success. It returns true for an operation success and false otherwise // state on success. It returns 0 for an operation success, -1 for failed operations, and 1
// for pending operations
auto nonBlockingSdcOpChecking = [&](SdStates newStateOnSuccess, uint16_t maxCycleCount, auto nonBlockingSdcOpChecking = [&](SdStates newStateOnSuccess, uint16_t maxCycleCount,
std::string opPrintout) { std::string opPrintout) {
SdCardManager::OpStatus status = sdcMan->checkCurrentOp(operation); SdCardManager::OpStatus status = sdcMan->checkCurrentOp(operation);
if (status == SdCardManager::OpStatus::SUCCESS) { if (status == SdCardManager::OpStatus::SUCCESS or sdInfo.cycleCount > maxCycleCount) {
sdFsmState = newStateOnSuccess; sdFsmState = newStateOnSuccess;
sdInfo.commandPending = false; sdInfo.commandPending = false;
if (sdInfo.cycleCount > maxCycleCount) {
sif::warning << "CoreController::sdStateMachine: " << opPrintout << " takes too long"
<< std::endl;
sdInfo.cycleCount = 0;
return -1;
}
sdInfo.cycleCount = 0; sdInfo.cycleCount = 0;
return true; return 0;
} else if (sdInfo.cycleCount > 4) { };
sif::warning << "CoreController::sdStateMachine: " << opPrintout << " takes too long" return 1;
<< std::endl;
return false;
}
return false;
}; };
if (sdFsmState == SdStates::UPDATE_SD_INFO_START) { if (sdFsmState == SdStates::UPDATE_SD_INFO_START) {
@ -644,7 +647,7 @@ ReturnValue_t CoreController::sdStateMachine() {
sdFsmState = tgtState; sdFsmState = tgtState;
} }
} else { } else {
if (nonBlockingSdcOpChecking(SdStates::MOUNT_SELF, 10, "Setting SDC state")) { if (nonBlockingSdcOpChecking(SdStates::MOUNT_SELF, 10, "Setting SDC state") <= 0) {
sdInfo.activeState = sd::SdState::ON; sdInfo.activeState = sd::SdState::ON;
currentStateSetter(sdInfo.active, sd::SdState::ON); currentStateSetter(sdInfo.active, sd::SdState::ON);
// Skip the two cycles now. // Skip the two cycles now.
@ -672,7 +675,7 @@ ReturnValue_t CoreController::sdStateMachine() {
result = sdCardSetup(sdInfo.active, sd::SdState::MOUNTED, sdInfo.activeChar); result = sdCardSetup(sdInfo.active, sd::SdState::MOUNTED, sdInfo.activeChar);
sdInfo.commandPending = true; sdInfo.commandPending = true;
} else { } else {
if (nonBlockingSdcOpChecking(SdStates::DETERMINE_OTHER, 5, "Mounting SD card")) { if (nonBlockingSdcOpChecking(SdStates::DETERMINE_OTHER, 5, "Mounting SD card") <= 0) {
sdcMan->setActiveSdCard(sdInfo.active); sdcMan->setActiveSdCard(sdInfo.active);
currMntPrefix = sdcMan->getCurrentMountPrefix(); currMntPrefix = sdcMan->getCurrentMountPrefix();
sdInfo.activeState = sd::SdState::MOUNTED; sdInfo.activeState = sd::SdState::MOUNTED;
@ -714,12 +717,7 @@ ReturnValue_t CoreController::sdStateMachine() {
sdInfo.commandPending = true; sdInfo.commandPending = true;
} else { } else {
if (nonBlockingSdcOpChecking(SdStates::SKIP_CYCLE_BEFORE_INFO_UPDATE, 10, if (nonBlockingSdcOpChecking(SdStates::SKIP_CYCLE_BEFORE_INFO_UPDATE, 10,
"Switching off other SD card")) { "Switching off other SD card") <= 0) {
sdInfo.otherState = sd::SdState::OFF;
currentStateSetter(sdInfo.other, sd::SdState::OFF);
} else {
// Continue.. avoid being stuck here..
sdFsmState = SdStates::SKIP_CYCLE_BEFORE_INFO_UPDATE;
sdInfo.otherState = sd::SdState::OFF; sdInfo.otherState = sd::SdState::OFF;
currentStateSetter(sdInfo.other, sd::SdState::OFF); currentStateSetter(sdInfo.other, sd::SdState::OFF);
} }
@ -730,12 +728,7 @@ ReturnValue_t CoreController::sdStateMachine() {
sdInfo.commandPending = true; sdInfo.commandPending = true;
} else { } else {
if (nonBlockingSdcOpChecking(SdStates::MOUNT_UNMOUNT_OTHER, 10, if (nonBlockingSdcOpChecking(SdStates::MOUNT_UNMOUNT_OTHER, 10,
"Switching on other SD card")) { "Switching on other SD card") <= 0) {
sdInfo.otherState = sd::SdState::ON;
currentStateSetter(sdInfo.other, sd::SdState::ON);
} else {
// Contnue.. avoid being stuck here.
sdFsmState = SdStates::MOUNT_UNMOUNT_OTHER;
sdInfo.otherState = sd::SdState::ON; sdInfo.otherState = sd::SdState::ON;
currentStateSetter(sdInfo.other, sd::SdState::ON); currentStateSetter(sdInfo.other, sd::SdState::ON);
} }
@ -750,7 +743,8 @@ ReturnValue_t CoreController::sdStateMachine() {
result = sdCardSetup(sdInfo.other, sd::SdState::ON, sdInfo.otherChar); result = sdCardSetup(sdInfo.other, sd::SdState::ON, sdInfo.otherChar);
sdInfo.commandPending = true; sdInfo.commandPending = true;
} else { } else {
if (nonBlockingSdcOpChecking(SdStates::SET_STATE_OTHER, 10, "Unmounting other SD card")) { if (nonBlockingSdcOpChecking(SdStates::SET_STATE_OTHER, 10, "Unmounting other SD card") <=
0) {
sdInfo.otherState = sd::SdState::ON; sdInfo.otherState = sd::SdState::ON;
currentStateSetter(sdInfo.other, sd::SdState::ON); currentStateSetter(sdInfo.other, sd::SdState::ON);
} else { } else {
@ -764,7 +758,8 @@ ReturnValue_t CoreController::sdStateMachine() {
result = sdCardSetup(sdInfo.other, sd::SdState::MOUNTED, sdInfo.otherChar); result = sdCardSetup(sdInfo.other, sd::SdState::MOUNTED, sdInfo.otherChar);
sdInfo.commandPending = true; sdInfo.commandPending = true;
} else { } else {
if (nonBlockingSdcOpChecking(SdStates::UPDATE_SD_INFO_END, 4, "Mounting other SD card")) { if (nonBlockingSdcOpChecking(SdStates::UPDATE_SD_INFO_END, 4, "Mounting other SD card") <=
0) {
sdInfo.otherState = sd::SdState::MOUNTED; sdInfo.otherState = sd::SdState::MOUNTED;
currentStateSetter(sdInfo.other, sd::SdState::MOUNTED); currentStateSetter(sdInfo.other, sd::SdState::MOUNTED);
} }
@ -840,7 +835,7 @@ ReturnValue_t CoreController::sdCardSetup(sd::SdCard sdCard, sd::SdState targetS
if (state == sd::SdState::MOUNTED) { if (state == sd::SdState::MOUNTED) {
if (targetState == sd::SdState::OFF) { if (targetState == sd::SdState::OFF) {
sif::info << "Switching off SD card " << sdChar << std::endl; sif::info << "Switching off SD card " << sdChar << std::endl;
return sdcMan->switchOffSdCard(sdCard, true, &sdInfo.currentState); return sdcMan->switchOffSdCard(sdCard, sdInfo.currentState, true);
} else if (targetState == sd::SdState::ON) { } else if (targetState == sd::SdState::ON) {
sif::info << "Unmounting SD card " << sdChar << std::endl; sif::info << "Unmounting SD card " << sdChar << std::endl;
return sdcMan->unmountSdCard(sdCard); return sdcMan->unmountSdCard(sdCard);
@ -874,7 +869,7 @@ ReturnValue_t CoreController::sdCardSetup(sd::SdCard sdCard, sd::SdState targetS
return sdcMan->mountSdCard(sdCard); return sdcMan->mountSdCard(sdCard);
} else if (targetState == sd::SdState::OFF) { } else if (targetState == sd::SdState::OFF) {
sif::info << "Switching off SD card " << sdChar << std::endl; sif::info << "Switching off SD card " << sdChar << std::endl;
return sdcMan->switchOffSdCard(sdCard, false, &sdInfo.currentState); return sdcMan->switchOffSdCard(sdCard, sdInfo.currentState, false);
} }
} else { } else {
sif::warning << "CoreController::sdCardSetup: Invalid state for this call" << std::endl; sif::warning << "CoreController::sdCardSetup: Invalid state for this call" << std::endl;
@ -898,8 +893,7 @@ ReturnValue_t CoreController::sdColdRedundantBlockingInit() {
sif::info << "Switching off secondary SD card " << sdInfo.otherChar << std::endl; sif::info << "Switching off secondary SD card " << sdInfo.otherChar << std::endl;
// Switch off other SD card in cold redundant mode if setting up preferred one worked // Switch off other SD card in cold redundant mode if setting up preferred one worked
// without issues // without issues
ReturnValue_t result2 = ReturnValue_t result2 = sdcMan->switchOffSdCard(sdInfo.other, sdInfo.currentState, true);
sdcMan->switchOffSdCard(sdInfo.other, sdInfo.otherState, &sdInfo.currentState);
if (result2 != returnvalue::OK and result2 != SdCardManager::ALREADY_OFF) { if (result2 != returnvalue::OK and result2 != SdCardManager::ALREADY_OFF) {
sif::warning << "Switching off secondary SD card " << sdInfo.otherChar sif::warning << "Switching off secondary SD card " << sdInfo.otherChar
<< " in cold redundant mode failed" << std::endl; << " in cold redundant mode failed" << std::endl;
@ -1229,18 +1223,27 @@ ReturnValue_t CoreController::gracefulShutdownTasks(xsc::Chip chip, xsc::Copy co
// Ensure that all writes/reads do finish. // Ensure that all writes/reads do finish.
sync(); sync();
// Attempt graceful shutdown by unmounting and switching off SD cards // Unmount and switch off SD cards. This could possibly fix issues with the SD card and is
sdcMan->switchOffSdCard(sd::SdCard::SLOT_0); // the more graceful way to reboot the system. This function takes around 400 ms.
sdcMan->switchOffSdCard(sd::SdCard::SLOT_1); ReturnValue_t result = handleSwitchingSdCardsOffNonBlocking();
if (result != returnvalue::OK) {
sif::error
<< "CoreController::gracefulShutdownTasks: Issues unmounting or switching SD cards off"
<< std::endl;
}
// If any boot copies are unprotected. // If any boot copies are unprotected.
// Actually this function only ensures that reboots to the own image are protected.. // Actually this function only ensures that reboots to the own image are protected..
ReturnValue_t result = setBootCopyProtection(xsc::Chip::SELF_CHIP, xsc::Copy::SELF_COPY, true, result = setBootCopyProtection(xsc::Chip::SELF_CHIP, xsc::Copy::SELF_COPY, true, protOpPerformed,
protOpPerformed, false); false);
if (result == returnvalue::OK and protOpPerformed) { if (result == returnvalue::OK and protOpPerformed) {
// TODO: Would be nice to notify operator. But we can't use the filesystem anymore // TODO: Would be nice to notify operator. But we can't use the filesystem anymore
// and a reboot is imminent. Use scratch buffer? // and a reboot is imminent. Use scratch buffer?
sif::info << "Running slot was writeprotected before reboot" << std::endl; sif::info << "Running slot was writeprotected before reboot" << std::endl;
} }
sif::info << "Graceful shutdown handling done" << std::endl;
// Ensure that all diagnostic prinouts arrive.
TaskFactory::delayTask(50);
return result; return result;
} }
@ -2583,6 +2586,57 @@ void CoreController::announceSdInfo(SdCardManager::SdStatePair sdStates) {
triggerEvent(core::ACTIVE_SD_INFO, p1, p2); triggerEvent(core::ACTIVE_SD_INFO, p1, p2);
} }
ReturnValue_t CoreController::handleSwitchingSdCardsOffNonBlocking() {
sdcMan->setBlocking(false);
SdCardManager::Operations op;
std::pair<sd::SdState, sd::SdState> sdStatus;
ReturnValue_t result = sdcMan->getSdCardsStatus(sdStatus);
if (result != returnvalue::OK) {
return result;
}
Countdown maxWaitTimeCd(10000);
// Stopwatch watch;
auto waitingForFinish = [&]() {
auto currentState = sdcMan->checkCurrentOp(op);
if (currentState == SdCardManager::OpStatus::IDLE) {
return returnvalue::OK;
}
while (currentState == SdCardManager::OpStatus::ONGOING) {
if (maxWaitTimeCd.hasTimedOut()) {
return returnvalue::FAILED;
}
TaskFactory::delayTask(50);
currentState = sdcMan->checkCurrentOp(op);
}
return returnvalue::OK;
};
if (sdStatus.first != sd::SdState::OFF) {
sdcMan->unmountSdCard(sd::SdCard::SLOT_0);
result = waitingForFinish();
if (result != returnvalue::OK) {
return result;
}
sdcMan->switchOffSdCard(sd::SdCard::SLOT_0, sdStatus, false);
result = waitingForFinish();
if (result != returnvalue::OK) {
return result;
}
}
if (sdStatus.second != sd::SdState::OFF) {
sdcMan->unmountSdCard(sd::SdCard::SLOT_1);
result = waitingForFinish();
if (result != returnvalue::OK) {
return result;
}
sdcMan->switchOffSdCard(sd::SdCard::SLOT_1, sdStatus, false);
result = waitingForFinish();
if (result != returnvalue::OK) {
return result;
}
}
return result;
}
bool CoreController::isNumber(const std::string &s) { bool CoreController::isNumber(const std::string &s) {
return !s.empty() && std::find_if(s.begin(), s.end(), return !s.empty() && std::find_if(s.begin(), s.end(),
[](unsigned char c) { return !std::isdigit(c); }) == s.end(); [](unsigned char c) { return !std::isdigit(c); }) == s.end();

View File

@ -374,6 +374,7 @@ class CoreController : public ExtendedControllerBase, public ReceivesParameterMe
ReturnValue_t gracefulShutdownTasks(xsc::Chip chip, xsc::Copy copy, bool& protOpPerformed); ReturnValue_t gracefulShutdownTasks(xsc::Chip chip, xsc::Copy copy, bool& protOpPerformed);
ReturnValue_t handleProtInfoUpdateLine(std::string nextLine); ReturnValue_t handleProtInfoUpdateLine(std::string nextLine);
ReturnValue_t handleSwitchingSdCardsOffNonBlocking();
int handleBootCopyProtAtIndex(xsc::Chip targetChip, xsc::Copy targetCopy, bool protect, int handleBootCopyProtAtIndex(xsc::Chip targetChip, xsc::Copy targetCopy, bool protect,
bool& protOperationPerformed, bool selfChip, bool selfCopy, bool& protOperationPerformed, bool selfChip, bool selfCopy,
bool allChips, bool allCopies, uint8_t arrIdx); bool allChips, bool allCopies, uint8_t arrIdx);

View File

@ -125,13 +125,8 @@ ReturnValue_t SdCardManager::switchOnSdCard(sd::SdCard sdCard, bool doMountSdCar
return mountSdCard(sdCard); return mountSdCard(sdCard);
} }
ReturnValue_t SdCardManager::switchOffSdCard(sd::SdCard sdCard, bool doUnmountSdCard, ReturnValue_t SdCardManager::switchOffSdCard(sd::SdCard sdCard, SdStatePair& sdStates,
SdStatePair* statusPair) { bool doUnmountSdCard) {
std::pair<sd::SdState, sd::SdState> active;
ReturnValue_t result = getSdCardsStatus(active);
if (result != returnvalue::OK) {
return result;
}
if (doUnmountSdCard) { if (doUnmountSdCard) {
if (not blocking) { if (not blocking) {
sif::warning << "SdCardManager::switchOffSdCard: Two-step command but manager is" sif::warning << "SdCardManager::switchOffSdCard: Two-step command but manager is"
@ -147,17 +142,17 @@ ReturnValue_t SdCardManager::switchOffSdCard(sd::SdCard sdCard, bool doUnmountSd
return returnvalue::FAILED; return returnvalue::FAILED;
} }
if (sdCard == sd::SdCard::SLOT_0) { if (sdCard == sd::SdCard::SLOT_0) {
if (active.first == sd::SdState::OFF) { if (sdStates.first == sd::SdState::OFF) {
return ALREADY_OFF; return ALREADY_OFF;
} }
} else if (sdCard == sd::SdCard::SLOT_1) { } else if (sdCard == sd::SdCard::SLOT_1) {
if (active.second == sd::SdState::OFF) { if (sdStates.second == sd::SdState::OFF) {
return ALREADY_OFF; return ALREADY_OFF;
} }
} }
if (doUnmountSdCard) { if (doUnmountSdCard) {
result = unmountSdCard(sdCard); ReturnValue_t result = unmountSdCard(sdCard);
if (result != returnvalue::OK) { if (result != returnvalue::OK) {
return result; return result;
} }
@ -189,7 +184,7 @@ ReturnValue_t SdCardManager::setSdCardState(sd::SdCard sdCard, bool on) {
command << "q7hw sd set " << sdstring << " " << statestring; command << "q7hw sd set " << sdstring << " " << statestring;
cmdExecutor.load(command.str(), blocking, printCmdOutput); cmdExecutor.load(command.str(), blocking, printCmdOutput);
ReturnValue_t result = cmdExecutor.execute(); ReturnValue_t result = cmdExecutor.execute();
if (blocking and result != returnvalue::OK) { if (result != returnvalue::OK) {
utility::handleSystemError(cmdExecutor.getLastError(), "SdCardManager::setSdCardState"); utility::handleSystemError(cmdExecutor.getLastError(), "SdCardManager::setSdCardState");
} }
return result; return result;
@ -204,6 +199,7 @@ ReturnValue_t SdCardManager::getSdCardsStatus(SdStatePair& sdStates) {
ReturnValue_t SdCardManager::mountSdCard(sd::SdCard sdCard) { ReturnValue_t SdCardManager::mountSdCard(sd::SdCard sdCard) {
using namespace std; using namespace std;
if (cmdExecutor.getCurrentState() == CommandExecutor::States::PENDING) { if (cmdExecutor.getCurrentState() == CommandExecutor::States::PENDING) {
sif::warning << "SdCardManager::mountSdCard: Command still pending" << std::endl;
return CommandExecutor::COMMAND_PENDING; return CommandExecutor::COMMAND_PENDING;
} }
if (sdCard == sd::SdCard::BOTH) { if (sdCard == sd::SdCard::BOTH) {

View File

@ -114,8 +114,7 @@ class SdCardManager : public SystemObject, public SdCardMountedIF {
* @return - returnvalue::OK on success, ALREADY_ON if it is already on, * @return - returnvalue::OK on success, ALREADY_ON if it is already on,
* SYSTEM_CALL_ERROR on system error * SYSTEM_CALL_ERROR on system error
*/ */
ReturnValue_t switchOffSdCard(sd::SdCard sdCard, bool doUnmountSdCard = true, ReturnValue_t switchOffSdCard(sd::SdCard sdCard, SdStatePair& sdStates, bool doUnmountSdCard);
SdStatePair* statusPair = nullptr);
/** /**
* Get the state of the SD cards. If the state file does not exist, this function will * Get the state of the SD cards. If the state file does not exist, this function will