Merge pull request 'SD shutdown handling non-blocking' (#753) from sd-shutdown-non-blocking into main
All checks were successful
EIVE/eive-obsw/pipeline/head This commit looks good
All checks were successful
EIVE/eive-obsw/pipeline/head This commit looks good
Reviewed-on: #753 Reviewed-by: Marius Eggert <eggertm@irs.uni-stuttgart.de>
This commit is contained in:
commit
8cd959f55a
@ -24,6 +24,8 @@ will consitute of a breaking change warranting a new major release:
|
||||
- Re-ordered some functions of the core controller in the initialize function.
|
||||
- Rad sensor is now only polled every 30 minutes instead of every device cycle to reduce wear of
|
||||
the RADFET electronics.
|
||||
- The SD cards will still be switched OFF on a reboot, but this is done in a non-blocking manner
|
||||
now with a timeout of 10 seconds where the reboot will be performed in any case.
|
||||
- ACS Controller now includes the safe mode from FLP, which will calculate its rotational rate
|
||||
from SUS and MGM measurements. To accommodate these changes, low-pass filters for SUS
|
||||
measurements and rates as well as MGM measurements and rates are included. Usage of the new
|
||||
@ -45,6 +47,9 @@ will consitute of a breaking change warranting a new major release:
|
||||
|
||||
## Fixed
|
||||
|
||||
- General bugs in the SD card state machine. This might fix some other known bugs for certain
|
||||
combinations of switching ON and OFF SD cards and also makes the whole state machine a lot more
|
||||
robust against hanging up.
|
||||
- SUS dummy handler went to `MODE_NORMAL` for ON commands.
|
||||
- PL PCDU dummy went to `MODE_NORMAL` for ON commands.
|
||||
|
||||
|
@ -554,21 +554,24 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
}
|
||||
|
||||
// This lambda checks the non-blocking operation of the SD card manager and assigns the new
|
||||
// state on success. It returns true for an operation success and false otherwise
|
||||
// state on success. It returns 0 for an operation success, -1 for failed operations, and 1
|
||||
// for pending operations
|
||||
auto nonBlockingSdcOpChecking = [&](SdStates newStateOnSuccess, uint16_t maxCycleCount,
|
||||
std::string opPrintout) {
|
||||
SdCardManager::OpStatus status = sdcMan->checkCurrentOp(operation);
|
||||
if (status == SdCardManager::OpStatus::SUCCESS) {
|
||||
if (status == SdCardManager::OpStatus::SUCCESS or sdInfo.cycleCount > maxCycleCount) {
|
||||
sdFsmState = newStateOnSuccess;
|
||||
sdInfo.commandPending = false;
|
||||
if (sdInfo.cycleCount > maxCycleCount) {
|
||||
sif::warning << "CoreController::sdStateMachine: " << opPrintout << " takes too long"
|
||||
<< std::endl;
|
||||
sdInfo.cycleCount = 0;
|
||||
return -1;
|
||||
}
|
||||
sdInfo.cycleCount = 0;
|
||||
return true;
|
||||
} else if (sdInfo.cycleCount > 4) {
|
||||
sif::warning << "CoreController::sdStateMachine: " << opPrintout << " takes too long"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
return 0;
|
||||
};
|
||||
return 1;
|
||||
};
|
||||
|
||||
if (sdFsmState == SdStates::UPDATE_SD_INFO_START) {
|
||||
@ -644,7 +647,7 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
sdFsmState = tgtState;
|
||||
}
|
||||
} else {
|
||||
if (nonBlockingSdcOpChecking(SdStates::MOUNT_SELF, 10, "Setting SDC state")) {
|
||||
if (nonBlockingSdcOpChecking(SdStates::MOUNT_SELF, 10, "Setting SDC state") <= 0) {
|
||||
sdInfo.activeState = sd::SdState::ON;
|
||||
currentStateSetter(sdInfo.active, sd::SdState::ON);
|
||||
// Skip the two cycles now.
|
||||
@ -672,7 +675,7 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
result = sdCardSetup(sdInfo.active, sd::SdState::MOUNTED, sdInfo.activeChar);
|
||||
sdInfo.commandPending = true;
|
||||
} else {
|
||||
if (nonBlockingSdcOpChecking(SdStates::DETERMINE_OTHER, 5, "Mounting SD card")) {
|
||||
if (nonBlockingSdcOpChecking(SdStates::DETERMINE_OTHER, 5, "Mounting SD card") <= 0) {
|
||||
sdcMan->setActiveSdCard(sdInfo.active);
|
||||
currMntPrefix = sdcMan->getCurrentMountPrefix();
|
||||
sdInfo.activeState = sd::SdState::MOUNTED;
|
||||
@ -714,12 +717,7 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
sdInfo.commandPending = true;
|
||||
} else {
|
||||
if (nonBlockingSdcOpChecking(SdStates::SKIP_CYCLE_BEFORE_INFO_UPDATE, 10,
|
||||
"Switching off other SD card")) {
|
||||
sdInfo.otherState = sd::SdState::OFF;
|
||||
currentStateSetter(sdInfo.other, sd::SdState::OFF);
|
||||
} else {
|
||||
// Continue.. avoid being stuck here..
|
||||
sdFsmState = SdStates::SKIP_CYCLE_BEFORE_INFO_UPDATE;
|
||||
"Switching off other SD card") <= 0) {
|
||||
sdInfo.otherState = sd::SdState::OFF;
|
||||
currentStateSetter(sdInfo.other, sd::SdState::OFF);
|
||||
}
|
||||
@ -730,12 +728,7 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
sdInfo.commandPending = true;
|
||||
} else {
|
||||
if (nonBlockingSdcOpChecking(SdStates::MOUNT_UNMOUNT_OTHER, 10,
|
||||
"Switching on other SD card")) {
|
||||
sdInfo.otherState = sd::SdState::ON;
|
||||
currentStateSetter(sdInfo.other, sd::SdState::ON);
|
||||
} else {
|
||||
// Contnue.. avoid being stuck here.
|
||||
sdFsmState = SdStates::MOUNT_UNMOUNT_OTHER;
|
||||
"Switching on other SD card") <= 0) {
|
||||
sdInfo.otherState = sd::SdState::ON;
|
||||
currentStateSetter(sdInfo.other, sd::SdState::ON);
|
||||
}
|
||||
@ -750,7 +743,8 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
result = sdCardSetup(sdInfo.other, sd::SdState::ON, sdInfo.otherChar);
|
||||
sdInfo.commandPending = true;
|
||||
} else {
|
||||
if (nonBlockingSdcOpChecking(SdStates::SET_STATE_OTHER, 10, "Unmounting other SD card")) {
|
||||
if (nonBlockingSdcOpChecking(SdStates::SET_STATE_OTHER, 10, "Unmounting other SD card") <=
|
||||
0) {
|
||||
sdInfo.otherState = sd::SdState::ON;
|
||||
currentStateSetter(sdInfo.other, sd::SdState::ON);
|
||||
} else {
|
||||
@ -764,7 +758,8 @@ ReturnValue_t CoreController::sdStateMachine() {
|
||||
result = sdCardSetup(sdInfo.other, sd::SdState::MOUNTED, sdInfo.otherChar);
|
||||
sdInfo.commandPending = true;
|
||||
} else {
|
||||
if (nonBlockingSdcOpChecking(SdStates::UPDATE_SD_INFO_END, 4, "Mounting other SD card")) {
|
||||
if (nonBlockingSdcOpChecking(SdStates::UPDATE_SD_INFO_END, 4, "Mounting other SD card") <=
|
||||
0) {
|
||||
sdInfo.otherState = sd::SdState::MOUNTED;
|
||||
currentStateSetter(sdInfo.other, sd::SdState::MOUNTED);
|
||||
}
|
||||
@ -840,7 +835,7 @@ ReturnValue_t CoreController::sdCardSetup(sd::SdCard sdCard, sd::SdState targetS
|
||||
if (state == sd::SdState::MOUNTED) {
|
||||
if (targetState == sd::SdState::OFF) {
|
||||
sif::info << "Switching off SD card " << sdChar << std::endl;
|
||||
return sdcMan->switchOffSdCard(sdCard, true, &sdInfo.currentState);
|
||||
return sdcMan->switchOffSdCard(sdCard, sdInfo.currentState, true);
|
||||
} else if (targetState == sd::SdState::ON) {
|
||||
sif::info << "Unmounting SD card " << sdChar << std::endl;
|
||||
return sdcMan->unmountSdCard(sdCard);
|
||||
@ -874,7 +869,7 @@ ReturnValue_t CoreController::sdCardSetup(sd::SdCard sdCard, sd::SdState targetS
|
||||
return sdcMan->mountSdCard(sdCard);
|
||||
} else if (targetState == sd::SdState::OFF) {
|
||||
sif::info << "Switching off SD card " << sdChar << std::endl;
|
||||
return sdcMan->switchOffSdCard(sdCard, false, &sdInfo.currentState);
|
||||
return sdcMan->switchOffSdCard(sdCard, sdInfo.currentState, false);
|
||||
}
|
||||
} else {
|
||||
sif::warning << "CoreController::sdCardSetup: Invalid state for this call" << std::endl;
|
||||
@ -898,8 +893,7 @@ ReturnValue_t CoreController::sdColdRedundantBlockingInit() {
|
||||
sif::info << "Switching off secondary SD card " << sdInfo.otherChar << std::endl;
|
||||
// Switch off other SD card in cold redundant mode if setting up preferred one worked
|
||||
// without issues
|
||||
ReturnValue_t result2 =
|
||||
sdcMan->switchOffSdCard(sdInfo.other, sdInfo.otherState, &sdInfo.currentState);
|
||||
ReturnValue_t result2 = sdcMan->switchOffSdCard(sdInfo.other, sdInfo.currentState, true);
|
||||
if (result2 != returnvalue::OK and result2 != SdCardManager::ALREADY_OFF) {
|
||||
sif::warning << "Switching off secondary SD card " << sdInfo.otherChar
|
||||
<< " in cold redundant mode failed" << std::endl;
|
||||
@ -1229,18 +1223,27 @@ ReturnValue_t CoreController::gracefulShutdownTasks(xsc::Chip chip, xsc::Copy co
|
||||
// Ensure that all writes/reads do finish.
|
||||
sync();
|
||||
|
||||
// Attempt graceful shutdown by unmounting and switching off SD cards
|
||||
sdcMan->switchOffSdCard(sd::SdCard::SLOT_0);
|
||||
sdcMan->switchOffSdCard(sd::SdCard::SLOT_1);
|
||||
// Unmount and switch off SD cards. This could possibly fix issues with the SD card and is
|
||||
// the more graceful way to reboot the system. This function takes around 400 ms.
|
||||
ReturnValue_t result = handleSwitchingSdCardsOffNonBlocking();
|
||||
if (result != returnvalue::OK) {
|
||||
sif::error
|
||||
<< "CoreController::gracefulShutdownTasks: Issues unmounting or switching SD cards off"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// If any boot copies are unprotected.
|
||||
// Actually this function only ensures that reboots to the own image are protected..
|
||||
ReturnValue_t result = setBootCopyProtection(xsc::Chip::SELF_CHIP, xsc::Copy::SELF_COPY, true,
|
||||
protOpPerformed, false);
|
||||
result = setBootCopyProtection(xsc::Chip::SELF_CHIP, xsc::Copy::SELF_COPY, true, protOpPerformed,
|
||||
false);
|
||||
if (result == returnvalue::OK and protOpPerformed) {
|
||||
// TODO: Would be nice to notify operator. But we can't use the filesystem anymore
|
||||
// and a reboot is imminent. Use scratch buffer?
|
||||
sif::info << "Running slot was writeprotected before reboot" << std::endl;
|
||||
}
|
||||
sif::info << "Graceful shutdown handling done" << std::endl;
|
||||
// Ensure that all diagnostic prinouts arrive.
|
||||
TaskFactory::delayTask(50);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -2583,6 +2586,57 @@ void CoreController::announceSdInfo(SdCardManager::SdStatePair sdStates) {
|
||||
triggerEvent(core::ACTIVE_SD_INFO, p1, p2);
|
||||
}
|
||||
|
||||
ReturnValue_t CoreController::handleSwitchingSdCardsOffNonBlocking() {
|
||||
sdcMan->setBlocking(false);
|
||||
SdCardManager::Operations op;
|
||||
std::pair<sd::SdState, sd::SdState> sdStatus;
|
||||
ReturnValue_t result = sdcMan->getSdCardsStatus(sdStatus);
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
Countdown maxWaitTimeCd(10000);
|
||||
// Stopwatch watch;
|
||||
auto waitingForFinish = [&]() {
|
||||
auto currentState = sdcMan->checkCurrentOp(op);
|
||||
if (currentState == SdCardManager::OpStatus::IDLE) {
|
||||
return returnvalue::OK;
|
||||
}
|
||||
while (currentState == SdCardManager::OpStatus::ONGOING) {
|
||||
if (maxWaitTimeCd.hasTimedOut()) {
|
||||
return returnvalue::FAILED;
|
||||
}
|
||||
TaskFactory::delayTask(50);
|
||||
currentState = sdcMan->checkCurrentOp(op);
|
||||
}
|
||||
return returnvalue::OK;
|
||||
};
|
||||
if (sdStatus.first != sd::SdState::OFF) {
|
||||
sdcMan->unmountSdCard(sd::SdCard::SLOT_0);
|
||||
result = waitingForFinish();
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
sdcMan->switchOffSdCard(sd::SdCard::SLOT_0, sdStatus, false);
|
||||
result = waitingForFinish();
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
if (sdStatus.second != sd::SdState::OFF) {
|
||||
sdcMan->unmountSdCard(sd::SdCard::SLOT_1);
|
||||
result = waitingForFinish();
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
sdcMan->switchOffSdCard(sd::SdCard::SLOT_1, sdStatus, false);
|
||||
result = waitingForFinish();
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool CoreController::isNumber(const std::string &s) {
|
||||
return !s.empty() && std::find_if(s.begin(), s.end(),
|
||||
[](unsigned char c) { return !std::isdigit(c); }) == s.end();
|
||||
|
@ -374,6 +374,7 @@ class CoreController : public ExtendedControllerBase, public ReceivesParameterMe
|
||||
ReturnValue_t gracefulShutdownTasks(xsc::Chip chip, xsc::Copy copy, bool& protOpPerformed);
|
||||
|
||||
ReturnValue_t handleProtInfoUpdateLine(std::string nextLine);
|
||||
ReturnValue_t handleSwitchingSdCardsOffNonBlocking();
|
||||
int handleBootCopyProtAtIndex(xsc::Chip targetChip, xsc::Copy targetCopy, bool protect,
|
||||
bool& protOperationPerformed, bool selfChip, bool selfCopy,
|
||||
bool allChips, bool allCopies, uint8_t arrIdx);
|
||||
|
@ -125,13 +125,8 @@ ReturnValue_t SdCardManager::switchOnSdCard(sd::SdCard sdCard, bool doMountSdCar
|
||||
return mountSdCard(sdCard);
|
||||
}
|
||||
|
||||
ReturnValue_t SdCardManager::switchOffSdCard(sd::SdCard sdCard, bool doUnmountSdCard,
|
||||
SdStatePair* statusPair) {
|
||||
std::pair<sd::SdState, sd::SdState> active;
|
||||
ReturnValue_t result = getSdCardsStatus(active);
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
ReturnValue_t SdCardManager::switchOffSdCard(sd::SdCard sdCard, SdStatePair& sdStates,
|
||||
bool doUnmountSdCard) {
|
||||
if (doUnmountSdCard) {
|
||||
if (not blocking) {
|
||||
sif::warning << "SdCardManager::switchOffSdCard: Two-step command but manager is"
|
||||
@ -147,17 +142,17 @@ ReturnValue_t SdCardManager::switchOffSdCard(sd::SdCard sdCard, bool doUnmountSd
|
||||
return returnvalue::FAILED;
|
||||
}
|
||||
if (sdCard == sd::SdCard::SLOT_0) {
|
||||
if (active.first == sd::SdState::OFF) {
|
||||
if (sdStates.first == sd::SdState::OFF) {
|
||||
return ALREADY_OFF;
|
||||
}
|
||||
} else if (sdCard == sd::SdCard::SLOT_1) {
|
||||
if (active.second == sd::SdState::OFF) {
|
||||
if (sdStates.second == sd::SdState::OFF) {
|
||||
return ALREADY_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
if (doUnmountSdCard) {
|
||||
result = unmountSdCard(sdCard);
|
||||
ReturnValue_t result = unmountSdCard(sdCard);
|
||||
if (result != returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
@ -189,7 +184,7 @@ ReturnValue_t SdCardManager::setSdCardState(sd::SdCard sdCard, bool on) {
|
||||
command << "q7hw sd set " << sdstring << " " << statestring;
|
||||
cmdExecutor.load(command.str(), blocking, printCmdOutput);
|
||||
ReturnValue_t result = cmdExecutor.execute();
|
||||
if (blocking and result != returnvalue::OK) {
|
||||
if (result != returnvalue::OK) {
|
||||
utility::handleSystemError(cmdExecutor.getLastError(), "SdCardManager::setSdCardState");
|
||||
}
|
||||
return result;
|
||||
@ -204,6 +199,7 @@ ReturnValue_t SdCardManager::getSdCardsStatus(SdStatePair& sdStates) {
|
||||
ReturnValue_t SdCardManager::mountSdCard(sd::SdCard sdCard) {
|
||||
using namespace std;
|
||||
if (cmdExecutor.getCurrentState() == CommandExecutor::States::PENDING) {
|
||||
sif::warning << "SdCardManager::mountSdCard: Command still pending" << std::endl;
|
||||
return CommandExecutor::COMMAND_PENDING;
|
||||
}
|
||||
if (sdCard == sd::SdCard::BOTH) {
|
||||
|
@ -114,8 +114,7 @@ class SdCardManager : public SystemObject, public SdCardMountedIF {
|
||||
* @return - returnvalue::OK on success, ALREADY_ON if it is already on,
|
||||
* SYSTEM_CALL_ERROR on system error
|
||||
*/
|
||||
ReturnValue_t switchOffSdCard(sd::SdCard sdCard, bool doUnmountSdCard = true,
|
||||
SdStatePair* statusPair = nullptr);
|
||||
ReturnValue_t switchOffSdCard(sd::SdCard sdCard, SdStatePair& sdStates, bool doUnmountSdCard);
|
||||
|
||||
/**
|
||||
* Get the state of the SD cards. If the state file does not exist, this function will
|
||||
|
Loading…
Reference in New Issue
Block a user