reworked watchdog
This commit is contained in:
parent
e33a0fd60b
commit
f789380343
12
CHANGELOG.md
12
CHANGELOG.md
@ -16,6 +16,18 @@ will consitute of a breaking change warranting a new major release:
|
|||||||
|
|
||||||
# [unreleased]
|
# [unreleased]
|
||||||
|
|
||||||
|
## Changed
|
||||||
|
|
||||||
|
- Improved the OBSW watchdog by adding a watch functionality. The watch functionality is optional
|
||||||
|
and has to be enabled specifically by the application being watched by the watchdog when
|
||||||
|
starting the watchdog. If the watch functionality is enabled and the OBSW has not pinged
|
||||||
|
the watchdog via the FIFO for 2 minutes, the watchdog will restart the OBSW service via systemd.
|
||||||
|
The primary OBSW will only activate the watch functionality if it is the OBSW inside the
|
||||||
|
`/usr/bin` directory. This allows debugging the system by leaving flashed or manually copied
|
||||||
|
debugging images 2 minutes to start the watchdog without the watch functionality.
|
||||||
|
- The SD card prefix is now set earlier inside the `CoreController` constructor
|
||||||
|
- The watchdog handling was moved outside the `CoreController` into the main loop.
|
||||||
|
|
||||||
# [v1.31.1]
|
# [v1.31.1]
|
||||||
|
|
||||||
## Fixed
|
## Fixed
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
target_sources(${OBSW_NAME} PRIVATE CoreController.cpp scheduling.cpp
|
target_sources(${OBSW_NAME} PRIVATE CoreController.cpp scheduling.cpp
|
||||||
ObjectFactory.cpp)
|
ObjectFactory.cpp WatchdogHandler.cpp)
|
||||||
|
|
||||||
target_sources(${SIMPLE_OBSW_NAME} PRIVATE scheduling.cpp)
|
target_sources(${SIMPLE_OBSW_NAME} PRIVATE scheduling.cpp)
|
||||||
|
@ -33,12 +33,7 @@ xsc::Copy CoreController::CURRENT_COPY = xsc::Copy::NO_COPY;
|
|||||||
|
|
||||||
CoreController::CoreController(object_id_t objectId)
|
CoreController::CoreController(object_id_t objectId)
|
||||||
: ExtendedControllerBase(objectId, 5), opDivider5(5), opDivider10(10), hkSet(this) {
|
: ExtendedControllerBase(objectId, 5), opDivider5(5), opDivider10(10), hkSet(this) {
|
||||||
ReturnValue_t result = returnvalue::OK;
|
|
||||||
try {
|
try {
|
||||||
result = initWatchdogFifo();
|
|
||||||
if (result != returnvalue::OK) {
|
|
||||||
sif::warning << "CoreController::CoreController: Watchdog FIFO init failed" << std::endl;
|
|
||||||
}
|
|
||||||
sdcMan = SdCardManager::instance();
|
sdcMan = SdCardManager::instance();
|
||||||
if (sdcMan == nullptr) {
|
if (sdcMan == nullptr) {
|
||||||
sif::error << "CoreController::CoreController: SD card manager invalid!" << std::endl;
|
sif::error << "CoreController::CoreController: SD card manager invalid!" << std::endl;
|
||||||
@ -47,11 +42,25 @@ CoreController::CoreController(object_id_t objectId)
|
|||||||
if (not BLOCKING_SD_INIT) {
|
if (not BLOCKING_SD_INIT) {
|
||||||
sdcMan->setBlocking(false);
|
sdcMan->setBlocking(false);
|
||||||
}
|
}
|
||||||
|
auto sdCard = sdcMan->getPreferredSdCard();
|
||||||
|
if (not sdCard.has_value()) {
|
||||||
|
sif::error << "CoreController::initializeAfterTaskCreation: "
|
||||||
|
"Issues getting preferred SD card, setting to 0"
|
||||||
|
<< std::endl;
|
||||||
|
sdCard = sd::SdCard::SLOT_0;
|
||||||
|
}
|
||||||
|
sdInfo.active = sdCard.value();
|
||||||
|
sdcMan->setActiveSdCard(sdInfo.active);
|
||||||
|
currMntPrefix = sdcMan->getCurrentMountPrefix();
|
||||||
|
|
||||||
getCurrentBootCopy(CURRENT_CHIP, CURRENT_COPY);
|
getCurrentBootCopy(CURRENT_CHIP, CURRENT_COPY);
|
||||||
} catch (const std::filesystem::filesystem_error &e) {
|
} catch (const std::filesystem::filesystem_error &e) {
|
||||||
sif::error << "CoreController::CoreController: Failed with exception " << e.what() << std::endl;
|
sif::error << "CoreController::CoreController: Failed with exception " << e.what() << std::endl;
|
||||||
}
|
}
|
||||||
|
// Add script folder to path
|
||||||
|
char *currentEnvPath = getenv("PATH");
|
||||||
|
std::string updatedEnvPath = std::string(currentEnvPath) + ":/home/root/scripts:/usr/local/bin";
|
||||||
|
setenv("PATH", updatedEnvPath.c_str(), true);
|
||||||
sdCardCheckCd.timeOut();
|
sdCardCheckCd.timeOut();
|
||||||
eventQueue = QueueFactory::instance()->createMessageQueue(5, EventMessage::MAX_MESSAGE_SIZE);
|
eventQueue = QueueFactory::instance()->createMessageQueue(5, EventMessage::MAX_MESSAGE_SIZE);
|
||||||
}
|
}
|
||||||
@ -76,7 +85,6 @@ void CoreController::performControlOperation() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
performWatchdogControlOperation();
|
|
||||||
sdStateMachine();
|
sdStateMachine();
|
||||||
performMountedSdCardOperations();
|
performMountedSdCardOperations();
|
||||||
if (sdCardCheckCd.hasTimedOut()) {
|
if (sdCardCheckCd.hasTimedOut()) {
|
||||||
@ -146,19 +154,6 @@ ReturnValue_t CoreController::initialize() {
|
|||||||
|
|
||||||
ReturnValue_t CoreController::initializeAfterTaskCreation() {
|
ReturnValue_t CoreController::initializeAfterTaskCreation() {
|
||||||
ReturnValue_t result = returnvalue::OK;
|
ReturnValue_t result = returnvalue::OK;
|
||||||
auto sdCard = sdcMan->getPreferredSdCard();
|
|
||||||
if (not sdCard) {
|
|
||||||
return returnvalue::FAILED;
|
|
||||||
}
|
|
||||||
sdInfo.active = sdCard.value();
|
|
||||||
if (sdInfo.active == sd::SdCard::NONE) {
|
|
||||||
sif::error << "CoreController::initializeAfterTaskCreation: "
|
|
||||||
"Issues getting preferred SD card, setting to 0"
|
|
||||||
<< std::endl;
|
|
||||||
sdInfo.active = sd::SdCard::SLOT_0;
|
|
||||||
}
|
|
||||||
sdcMan->setActiveSdCard(sdInfo.active);
|
|
||||||
currMntPrefix = sdcMan->getCurrentMountPrefix();
|
|
||||||
if (BLOCKING_SD_INIT) {
|
if (BLOCKING_SD_INIT) {
|
||||||
result = initSdCardBlocking();
|
result = initSdCardBlocking();
|
||||||
if (result != returnvalue::OK and result != SdCardManager::ALREADY_MOUNTED) {
|
if (result != returnvalue::OK and result != SdCardManager::ALREADY_MOUNTED) {
|
||||||
@ -170,12 +165,7 @@ ReturnValue_t CoreController::initializeAfterTaskCreation() {
|
|||||||
if (result != returnvalue::OK) {
|
if (result != returnvalue::OK) {
|
||||||
sif::warning << "CoreController::initialize: Version initialization failed" << std::endl;
|
sif::warning << "CoreController::initialize: Version initialization failed" << std::endl;
|
||||||
}
|
}
|
||||||
// Add script folder to path
|
|
||||||
char *currentEnvPath = getenv("PATH");
|
|
||||||
std::string updatedEnvPath = std::string(currentEnvPath) + ":/home/root/scripts:/usr/local/bin";
|
|
||||||
setenv("PATH", updatedEnvPath.c_str(), true);
|
|
||||||
updateProtInfo();
|
updateProtInfo();
|
||||||
initPrint();
|
|
||||||
return ExtendedControllerBase::initializeAfterTaskCreation();
|
return ExtendedControllerBase::initializeAfterTaskCreation();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -839,36 +829,6 @@ void CoreController::getCurrentBootCopy(xsc::Chip &chip, xsc::Copy ©) {
|
|||||||
copy = static_cast<xsc::Copy>(xscCopy);
|
copy = static_cast<xsc::Copy>(xscCopy);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReturnValue_t CoreController::initWatchdogFifo() {
|
|
||||||
if (not std::filesystem::exists(watchdog::FIFO_NAME)) {
|
|
||||||
// Still return returnvalue::OK for now
|
|
||||||
sif::info << "Watchdog FIFO " << watchdog::FIFO_NAME << " does not exist, can't initiate"
|
|
||||||
<< " watchdog" << std::endl;
|
|
||||||
return returnvalue::OK;
|
|
||||||
}
|
|
||||||
// Open FIFO write only and non-blocking to prevent SW from killing itself.
|
|
||||||
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
|
|
||||||
if (watchdogFifoFd < 0) {
|
|
||||||
if (errno == ENXIO) {
|
|
||||||
watchdogFifoFd = RETRY_FIFO_OPEN;
|
|
||||||
sif::info << "eive-watchdog not running. FIFO can not be opened" << std::endl;
|
|
||||||
} else {
|
|
||||||
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with " << errno
|
|
||||||
<< ": " << strerror(errno) << std::endl;
|
|
||||||
return returnvalue::FAILED;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return returnvalue::OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CoreController::initPrint() {
|
|
||||||
#if OBSW_VERBOSE_LEVEL >= 1
|
|
||||||
if (watchdogFifoFd > 0) {
|
|
||||||
sif::info << "Opened watchdog FIFO successfully.." << std::endl;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ReturnValue_t CoreController::actionXscReboot(const uint8_t *data, size_t size) {
|
ReturnValue_t CoreController::actionXscReboot(const uint8_t *data, size_t size) {
|
||||||
if (size < 1) {
|
if (size < 1) {
|
||||||
return HasActionsIF::INVALID_PARAMETERS;
|
return HasActionsIF::INVALID_PARAMETERS;
|
||||||
@ -1223,36 +1183,6 @@ ReturnValue_t CoreController::handleProtInfoUpdateLine(std::string nextLine) {
|
|||||||
return returnvalue::OK;
|
return returnvalue::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CoreController::performWatchdogControlOperation() {
|
|
||||||
// Only perform each fifth iteration
|
|
||||||
if (watchdogFifoFd != 0 and opDivider5.check()) {
|
|
||||||
if (watchdogFifoFd == RETRY_FIFO_OPEN) {
|
|
||||||
// Open FIFO write only and non-blocking
|
|
||||||
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
|
|
||||||
if (watchdogFifoFd < 0) {
|
|
||||||
if (errno == ENXIO) {
|
|
||||||
watchdogFifoFd = RETRY_FIFO_OPEN;
|
|
||||||
// No printout for now, would be spam
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with "
|
|
||||||
<< errno << ": " << strerror(errno) << std::endl;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sif::info << "Opened " << watchdog::FIFO_NAME << " successfully" << std::endl;
|
|
||||||
} else if (watchdogFifoFd > 0) {
|
|
||||||
// Write to OBSW watchdog FIFO here
|
|
||||||
const char writeChar = 'a';
|
|
||||||
ssize_t writtenBytes = write(watchdogFifoFd, &writeChar, 1);
|
|
||||||
if (writtenBytes < 0) {
|
|
||||||
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
|
|
||||||
<< std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void CoreController::performMountedSdCardOperations() {
|
void CoreController::performMountedSdCardOperations() {
|
||||||
auto mountedSdCardOp = [&](sd::SdCard sdCard, std::string mntPoint) {
|
auto mountedSdCardOp = [&](sd::SdCard sdCard, std::string mntPoint) {
|
||||||
if (not performOneShotSdCardOpsSwitch) {
|
if (not performOneShotSdCardOpsSwitch) {
|
||||||
|
@ -162,9 +162,6 @@ class CoreController : public ExtendedControllerBase {
|
|||||||
private:
|
private:
|
||||||
static constexpr MutexIF::TimeoutType TIMEOUT_TYPE = MutexIF::TimeoutType::WAITING;
|
static constexpr MutexIF::TimeoutType TIMEOUT_TYPE = MutexIF::TimeoutType::WAITING;
|
||||||
static constexpr uint32_t MUTEX_TIMEOUT = 20;
|
static constexpr uint32_t MUTEX_TIMEOUT = 20;
|
||||||
// Designated value for rechecking FIFO open
|
|
||||||
static constexpr int RETRY_FIFO_OPEN = -2;
|
|
||||||
int watchdogFifoFd = 0;
|
|
||||||
GpsHyperion::FixMode gpsFix = GpsHyperion::FixMode::UNKNOWN;
|
GpsHyperion::FixMode gpsFix = GpsHyperion::FixMode::UNKNOWN;
|
||||||
|
|
||||||
// States for SD state machine, which is used in non-blocking mode
|
// States for SD state machine, which is used in non-blocking mode
|
||||||
@ -260,7 +257,6 @@ class CoreController : public ExtendedControllerBase {
|
|||||||
ReturnValue_t performSdCardCheck();
|
ReturnValue_t performSdCardCheck();
|
||||||
ReturnValue_t timeFileHandler();
|
ReturnValue_t timeFileHandler();
|
||||||
ReturnValue_t initBootCopyFile();
|
ReturnValue_t initBootCopyFile();
|
||||||
ReturnValue_t initWatchdogFifo();
|
|
||||||
ReturnValue_t initSdCardBlocking();
|
ReturnValue_t initSdCardBlocking();
|
||||||
bool startSdStateMachine(sd::SdCard targetActiveSd, SdCfgMode mode, MessageQueueId_t commander,
|
bool startSdStateMachine(sd::SdCard targetActiveSd, SdCfgMode mode, MessageQueueId_t commander,
|
||||||
DeviceCommandId_t actionId);
|
DeviceCommandId_t actionId);
|
||||||
@ -285,8 +281,6 @@ class CoreController : public ExtendedControllerBase {
|
|||||||
|
|
||||||
ReturnValue_t gracefulShutdownTasks(xsc::Chip chip, xsc::Copy copy, bool& protOpPerformed);
|
ReturnValue_t gracefulShutdownTasks(xsc::Chip chip, xsc::Copy copy, bool& protOpPerformed);
|
||||||
|
|
||||||
void performWatchdogControlOperation();
|
|
||||||
|
|
||||||
ReturnValue_t handleProtInfoUpdateLine(std::string nextLine);
|
ReturnValue_t handleProtInfoUpdateLine(std::string nextLine);
|
||||||
int handleBootCopyProtAtIndex(xsc::Chip targetChip, xsc::Copy targetCopy, bool protect,
|
int handleBootCopyProtAtIndex(xsc::Chip targetChip, xsc::Copy targetCopy, bool protect,
|
||||||
bool& protOperationPerformed, bool selfChip, bool selfCopy,
|
bool& protOperationPerformed, bool selfChip, bool selfCopy,
|
||||||
|
84
bsp_q7s/core/WatchdogHandler.cpp
Normal file
84
bsp_q7s/core/WatchdogHandler.cpp
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
#include "WatchdogHandler.h"
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cstring>
|
||||||
|
#include <filesystem>
|
||||||
|
|
||||||
|
#include "fsfw/serviceinterface.h"
|
||||||
|
#include "watchdog/definitions.h"
|
||||||
|
|
||||||
|
WatchdogHandler::WatchdogHandler() {}
|
||||||
|
|
||||||
|
void WatchdogHandler::periodicOperation() {
|
||||||
|
if (watchdogFifoFd != 0) {
|
||||||
|
if (watchdogFifoFd == RETRY_FIFO_OPEN) {
|
||||||
|
// Open FIFO write only and non-blocking
|
||||||
|
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
|
||||||
|
if (watchdogFifoFd < 0) {
|
||||||
|
if (errno == ENXIO) {
|
||||||
|
watchdogFifoFd = RETRY_FIFO_OPEN;
|
||||||
|
// No printout for now, would be spam
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with "
|
||||||
|
<< errno << ": " << strerror(errno) << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sif::info << "Opened " << watchdog::FIFO_NAME << " successfully" << std::endl;
|
||||||
|
performStartHandling();
|
||||||
|
} else if (watchdogFifoFd > 0) {
|
||||||
|
// Write to OBSW watchdog FIFO here
|
||||||
|
const char writeChar = watchdog::first::IDLE_CHAR;
|
||||||
|
ssize_t writtenBytes = write(watchdogFifoFd, &writeChar, 1);
|
||||||
|
if (writtenBytes < 0) {
|
||||||
|
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReturnValue_t WatchdogHandler::initialize(bool enableWatchdogFunction) {
|
||||||
|
using namespace std::filesystem;
|
||||||
|
this->enableWatchFunction = enableWatchdogFunction;
|
||||||
|
if (not std::filesystem::exists(watchdog::FIFO_NAME)) {
|
||||||
|
// Still return returnvalue::OK for now
|
||||||
|
sif::info << "Watchdog FIFO " << watchdog::FIFO_NAME << " does not exist, can't initiate"
|
||||||
|
<< " watchdog" << std::endl;
|
||||||
|
return returnvalue::OK;
|
||||||
|
}
|
||||||
|
// Open FIFO write only and non-blocking to prevent SW from killing itself.
|
||||||
|
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
|
||||||
|
if (watchdogFifoFd < 0) {
|
||||||
|
if (errno == ENXIO) {
|
||||||
|
watchdogFifoFd = RETRY_FIFO_OPEN;
|
||||||
|
sif::info << "eive-watchdog not running. FIFO can not be opened" << std::endl;
|
||||||
|
} else {
|
||||||
|
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with " << errno
|
||||||
|
<< ": " << strerror(errno) << std::endl;
|
||||||
|
return returnvalue::FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return performStartHandling();
|
||||||
|
}
|
||||||
|
|
||||||
|
ReturnValue_t WatchdogHandler::performStartHandling() {
|
||||||
|
char startBuf[2];
|
||||||
|
size_t writeLen = 1;
|
||||||
|
startBuf[0] = watchdog::first::START_CHAR;
|
||||||
|
if (enableWatchFunction) {
|
||||||
|
writeLen += 1;
|
||||||
|
startBuf[1] = watchdog::second::WATCH_FLAG;
|
||||||
|
}
|
||||||
|
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
|
||||||
|
if (writtenBytes < 0) {
|
||||||
|
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
|
||||||
|
<< std::endl;
|
||||||
|
return returnvalue::FAILED;
|
||||||
|
}
|
||||||
|
return returnvalue::OK;
|
||||||
|
}
|
23
bsp_q7s/core/WatchdogHandler.h
Normal file
23
bsp_q7s/core/WatchdogHandler.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
#ifndef BSP_Q7S_CORE_WATCHDOGHANDLER_H_
|
||||||
|
#define BSP_Q7S_CORE_WATCHDOGHANDLER_H_
|
||||||
|
|
||||||
|
#include "fsfw/returnvalues/returnvalue.h"
|
||||||
|
|
||||||
|
class WatchdogHandler {
|
||||||
|
public:
|
||||||
|
WatchdogHandler();
|
||||||
|
|
||||||
|
ReturnValue_t initialize(bool enableWatchFunction);
|
||||||
|
void periodicOperation();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Designated value for rechecking FIFO open
|
||||||
|
static constexpr int RETRY_FIFO_OPEN = -2;
|
||||||
|
|
||||||
|
int watchdogFifoFd = 0;
|
||||||
|
bool enableWatchFunction = false;
|
||||||
|
|
||||||
|
ReturnValue_t performStartHandling();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* BSP_Q7S_CORE_WATCHDOGHANDLER_H_ */
|
@ -9,6 +9,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "OBSWConfig.h"
|
#include "OBSWConfig.h"
|
||||||
|
#include "bsp_q7s/core/WatchdogHandler.h"
|
||||||
#include "commonConfig.h"
|
#include "commonConfig.h"
|
||||||
#include "core/scheduling.h"
|
#include "core/scheduling.h"
|
||||||
#include "fsfw/tasks/TaskFactory.h"
|
#include "fsfw/tasks/TaskFactory.h"
|
||||||
@ -24,6 +25,9 @@ static const char* DEV_STRING = "Xiphos Q7S FM";
|
|||||||
#else
|
#else
|
||||||
static const char* DEV_STRING = "Xiphos Q7S EM";
|
static const char* DEV_STRING = "Xiphos Q7S EM";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
WatchdogHandler WATCHDOG_HANDLER;
|
||||||
|
|
||||||
int obsw::obsw() {
|
int obsw::obsw() {
|
||||||
using namespace fsfw;
|
using namespace fsfw;
|
||||||
std::cout << "-- EIVE OBSW --" << std::endl;
|
std::cout << "-- EIVE OBSW --" << std::endl;
|
||||||
@ -44,6 +48,35 @@ int obsw::obsw() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Delay the boot if applicable.
|
||||||
|
bootDelayHandling();
|
||||||
|
|
||||||
|
bool initWatchFunction = false;
|
||||||
|
if (std::filesystem::current_path() == "/usr/bin") {
|
||||||
|
initWatchFunction = true;
|
||||||
|
}
|
||||||
|
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
|
||||||
|
if (result != returnvalue::OK) {
|
||||||
|
std::cerr << "Initiating EIVE watchdog handler failed" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
scheduling::initMission();
|
||||||
|
|
||||||
|
// Command the EIVE system to safe mode
|
||||||
|
#if OBSW_COMMAND_SAFE_MODE_AT_STARTUP == 1
|
||||||
|
commandEiveSystemToSafe();
|
||||||
|
#else
|
||||||
|
announceAllModes();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
WATCHDOG_HANDLER.periodicOperation();
|
||||||
|
TaskFactory::delayTask(1000);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void obsw::bootDelayHandling() {
|
||||||
const char* homedir = nullptr;
|
const char* homedir = nullptr;
|
||||||
homedir = getenv("HOME");
|
homedir = getenv("HOME");
|
||||||
if (homedir == nullptr) {
|
if (homedir == nullptr) {
|
||||||
@ -71,31 +104,26 @@ int obsw::obsw() {
|
|||||||
std::cout << "Delaying OBSW start for " << bootDelaySecs << " seconds" << std::endl;
|
std::cout << "Delaying OBSW start for " << bootDelaySecs << " seconds" << std::endl;
|
||||||
TaskFactory::delayTask(bootDelaySecs * 1000);
|
TaskFactory::delayTask(bootDelaySecs * 1000);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
scheduling::initMission();
|
void obsw::commandEiveSystemToSafe() {
|
||||||
|
|
||||||
// Command the EIVE system to safe mode
|
|
||||||
auto sysQueueId = satsystem::EIVE_SYSTEM.getCommandQueue();
|
auto sysQueueId = satsystem::EIVE_SYSTEM.getCommandQueue();
|
||||||
CommandMessage msg;
|
CommandMessage msg;
|
||||||
#if OBSW_COMMAND_SAFE_MODE_AT_STARTUP == 1
|
|
||||||
ModeMessage::setCmdModeMessage(msg, acs::AcsMode::SAFE, 0);
|
ModeMessage::setCmdModeMessage(msg, acs::AcsMode::SAFE, 0);
|
||||||
ReturnValue_t result =
|
ReturnValue_t result =
|
||||||
MessageQueueSenderIF::sendMessage(sysQueueId, &msg, MessageQueueIF::NO_QUEUE, false);
|
MessageQueueSenderIF::sendMessage(sysQueueId, &msg, MessageQueueIF::NO_QUEUE, false);
|
||||||
if (result != returnvalue::OK) {
|
if (result != returnvalue::OK) {
|
||||||
sif::error << "Sending safe mode command to EIVE system failed" << std::endl;
|
sif::error << "Sending safe mode command to EIVE system failed" << std::endl;
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
|
|
||||||
|
void obsw::announceAllModes() {
|
||||||
|
auto sysQueueId = satsystem::EIVE_SYSTEM.getCommandQueue();
|
||||||
|
CommandMessage msg;
|
||||||
ModeMessage::setModeAnnounceMessage(msg, true);
|
ModeMessage::setModeAnnounceMessage(msg, true);
|
||||||
ReturnValue_t result =
|
ReturnValue_t result =
|
||||||
MessageQueueSenderIF::sendMessage(sysQueueId, &msg, MessageQueueIF::NO_QUEUE, false);
|
MessageQueueSenderIF::sendMessage(sysQueueId, &msg, MessageQueueIF::NO_QUEUE, false);
|
||||||
if (result != returnvalue::OK) {
|
if (result != returnvalue::OK) {
|
||||||
sif::error << "Sending safe mode command to EIVE system failed" << std::endl;
|
sif::error << "Sending safe mode command to EIVE system failed" << std::endl;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
/* Suspend main thread by sleeping it. */
|
|
||||||
TaskFactory::delayTask(5000);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,10 @@ namespace obsw {
|
|||||||
|
|
||||||
int obsw();
|
int obsw();
|
||||||
|
|
||||||
};
|
void bootDelayHandling();
|
||||||
|
void commandEiveSystemToSafe();
|
||||||
|
void announceAllModes();
|
||||||
|
|
||||||
|
}; // namespace obsw
|
||||||
|
|
||||||
#endif /* BSP_Q7S_CORE_OBSW_H_ */
|
#endif /* BSP_Q7S_CORE_OBSW_H_ */
|
||||||
|
@ -4,6 +4,7 @@ if [[ ! -f README.md ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
folder_list=(
|
folder_list=(
|
||||||
|
"./watchdog"
|
||||||
"./mission"
|
"./mission"
|
||||||
"./linux"
|
"./linux"
|
||||||
"./bsp_q7s"
|
"./bsp_q7s"
|
||||||
|
@ -1,10 +1,5 @@
|
|||||||
target_sources(${WATCHDOG_NAME} PRIVATE
|
target_sources(${WATCHDOG_NAME} PRIVATE main.cpp Watchdog.cpp)
|
||||||
main.cpp
|
|
||||||
Watchdog.cpp
|
|
||||||
)
|
|
||||||
|
|
||||||
target_include_directories(${WATCHDOG_NAME} PRIVATE
|
target_include_directories(${WATCHDOG_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}
|
|
||||||
)
|
|
||||||
|
|
||||||
install(TARGETS ${WATCHDOG_NAME} RUNTIME DESTINATION bin)
|
install(TARGETS ${WATCHDOG_NAME} RUNTIME DESTINATION bin)
|
||||||
|
@ -1,258 +1,276 @@
|
|||||||
#include "Watchdog.h"
|
#include "Watchdog.h"
|
||||||
#include "definitions.h"
|
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <poll.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
#include <poll.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
#include <iostream>
|
#include <cstdlib>
|
||||||
#include <fstream>
|
|
||||||
#include <thread>
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include "definitions.h"
|
||||||
|
|
||||||
WatchdogTask::WatchdogTask (): fd(0) {
|
WatchdogTask::WatchdogTask() : fd(0) {
|
||||||
int result = 0;
|
int result = 0;
|
||||||
// Only create the FIFO if it does not exist yet
|
// Only create the FIFO if it does not exist yet
|
||||||
if(not std::filesystem::exists(watchdog::FIFO_NAME)) {
|
if (not std::filesystem::exists(watchdog::FIFO_NAME)) {
|
||||||
// Permission 666 or rw-rw-rw-
|
// Permission 666 or rw-rw-rw-
|
||||||
mode_t mode = DEFFILEMODE;
|
mode_t mode = DEFFILEMODE;
|
||||||
result = mkfifo(watchdog::FIFO_NAME.c_str(), mode);
|
result = mkfifo(watchdog::FIFO_NAME.c_str(), mode);
|
||||||
if(result != 0) {
|
if (result != 0) {
|
||||||
std::cerr << "eive-watchdog: Could not created named pipe at " <<
|
std::cerr << "eive-watchdog: Could not created named pipe at " << watchdog::FIFO_NAME
|
||||||
watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) <<
|
<< ", error " << errno << ": " << strerror(errno) << std::endl;
|
||||||
std::endl;
|
throw std::runtime_error("eive-watchdog: FIFO creation failed");
|
||||||
throw std::runtime_error("eive-watchdog: FIFO creation failed");
|
|
||||||
}
|
|
||||||
#if WATCHDOG_VERBOSE_LEVEL >= 1
|
|
||||||
std::cout << "eive-watchdog: Pipe at " << watchdog::FIFO_NAME <<
|
|
||||||
" created successfully" << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
#if WATCHDOG_VERBOSE_LEVEL >= 1
|
||||||
|
std::cout << "eive-watchdog: Pipe at " << watchdog::FIFO_NAME << " created successfully"
|
||||||
|
<< std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::~WatchdogTask() {
|
WatchdogTask::~WatchdogTask() {}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
int WatchdogTask::performOperation() {
|
int WatchdogTask::performOperation() {
|
||||||
// Open FIFO read only and non-blocking
|
// Open FIFO read only and non-blocking
|
||||||
fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK);
|
fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK);
|
||||||
if(fd < 0) {
|
if (fd < 0) {
|
||||||
std::cerr << "eive-watchdog: Opening pipe " << watchdog::FIFO_NAME <<
|
std::cerr << "eive-watchdog: Opening pipe " << watchdog::FIFO_NAME << "read-only failed with "
|
||||||
"read-only failed with " << errno << ": " << strerror(errno) << std::endl;
|
<< errno << ": " << strerror(errno) << std::endl;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
state = States::RUNNING;
|
state = States::NOT_STARTED;
|
||||||
|
|
||||||
while(true) {
|
while (true) {
|
||||||
WatchdogTask::LoopResult loopResult = watchdogLoop();
|
WatchdogTask::LoopResult loopResult = watchdogLoop();
|
||||||
switch(loopResult) {
|
if (not stateMachine(loopResult)) {
|
||||||
case(LoopResult::OK): {
|
break;
|
||||||
performRunningOperation();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(LoopResult::CANCEL_RQ): {
|
|
||||||
std::cout << "eive-watchdog: Received cancel request, closing watchdog.." << std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
case(LoopResult::SUSPEND_RQ): {
|
|
||||||
performSuspendOperation();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(LoopResult::TIMEOUT): {
|
|
||||||
performNotRunningOperation(loopResult);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(LoopResult::HUNG_UP): {
|
|
||||||
performNotRunningOperation(loopResult);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(LoopResult::RESTART_RQ): {
|
|
||||||
if(state == States::SUSPENDED or state == States::FAULTY) {
|
|
||||||
performRunningOperation();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(LoopResult::FAULT): {
|
|
||||||
using namespace std::chrono_literals;
|
|
||||||
// Configuration error
|
|
||||||
std::cerr << "Fault has occured in watchdog loop" << std::endl;
|
|
||||||
// Prevent spam
|
|
||||||
std::this_thread::sleep_for(2000ms);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (close(fd) < 0) {
|
}
|
||||||
std::cerr << "eive-watchdog: Closing named pipe at " << watchdog::FIFO_NAME <<
|
if (close(fd) < 0) {
|
||||||
"failed, error " << errno << ": " << strerror(errno) << std::endl;
|
std::cerr << "eive-watchdog: Closing named pipe at " << watchdog::FIFO_NAME << "failed, error "
|
||||||
}
|
<< errno << ": " << strerror(errno) << std::endl;
|
||||||
std::cout << "eive-watchdog: Finished" << std::endl;
|
}
|
||||||
return 0;
|
std::cout << "eive-watchdog: Finished" << std::endl;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
||||||
using namespace std::chrono_literals;
|
using namespace std::chrono_literals;
|
||||||
struct pollfd waiter = {};
|
struct pollfd waiter = {};
|
||||||
waiter.fd = fd;
|
waiter.fd = fd;
|
||||||
waiter.events = POLLIN;
|
waiter.events = POLLIN;
|
||||||
|
|
||||||
switch(state) {
|
// Only poll one file descriptor with timeout
|
||||||
case(States::SUSPENDED): {
|
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
|
||||||
// Sleep, then check whether a restart request was received
|
case (0): {
|
||||||
std::this_thread::sleep_for(1000ms);
|
return LoopResult::TIMEOUT;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case(States::RUNNING): {
|
case (1): {
|
||||||
// Continue as usual
|
return pollEvent(waiter);
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(States::NOT_STARTED): {
|
|
||||||
// This should not happen
|
|
||||||
std::cerr << "eive-watchdog: State is NOT_STARTED, configuration error" << std::endl;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case(States::FAULTY): {
|
|
||||||
// TODO: Not sure what to do yet. Continue for now
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 10 seconds timeout, only poll one file descriptor
|
|
||||||
switch(poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
|
|
||||||
case(0): {
|
|
||||||
return LoopResult::TIMEOUT;
|
|
||||||
}
|
|
||||||
case(1): {
|
|
||||||
return pollEvent(waiter);
|
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
std::cerr << "eive-watchdog: Unknown poll error at " << watchdog::FIFO_NAME << ", error " <<
|
std::cerr << "eive-watchdog: Unknown poll error at " << watchdog::FIFO_NAME << ", error "
|
||||||
errno << ": " << strerror(errno) << std::endl;
|
<< errno << ": " << strerror(errno) << std::endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return LoopResult::OK;
|
return LoopResult::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
|
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
|
||||||
if (waiter.revents & POLLIN) {
|
if (waiter.revents & POLLIN) {
|
||||||
ssize_t readLen = read(fd, buf.data(), buf.size());
|
ssize_t readLen = read(fd, buf.data(), buf.size());
|
||||||
if (readLen < 0) {
|
if (readLen < 0) {
|
||||||
std::cerr << "eive-watchdog: Read error on pipe " << watchdog::FIFO_NAME <<
|
std::cerr << "eive-watchdog: Read error on pipe " << watchdog::FIFO_NAME << ", error "
|
||||||
", error " << errno << ": " << strerror(errno) << std::endl;
|
<< errno << ": " << strerror(errno) << std::endl;
|
||||||
return LoopResult::OK;
|
return LoopResult::OK;
|
||||||
}
|
}
|
||||||
#if WATCHDOG_VERBOSE_LEVEL == 2
|
#if WATCHDOG_VERBOSE_LEVEL == 2
|
||||||
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME
|
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl;
|
||||||
<< std::endl;
|
|
||||||
#endif
|
#endif
|
||||||
else if(readLen >= 1) {
|
else if (readLen >= 1) {
|
||||||
return parseCommandByte(readLen);
|
return parseCommand(readLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} else if (waiter.revents & POLLERR) {
|
||||||
else if(waiter.revents & POLLERR) {
|
std::cerr << "eive-watchdog: Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
|
||||||
std::cerr << "eive-watchdog: Poll error error on pipe " << watchdog::FIFO_NAME <<
|
|
||||||
std::endl;
|
|
||||||
return LoopResult::FAULT;
|
|
||||||
}
|
|
||||||
else if (waiter.revents & POLLHUP) {
|
|
||||||
// Writer closed its end
|
|
||||||
return LoopResult::HUNG_UP;
|
|
||||||
}
|
|
||||||
return LoopResult::FAULT;
|
return LoopResult::FAULT;
|
||||||
|
} else if (waiter.revents & POLLHUP) {
|
||||||
|
// Writer closed its end
|
||||||
|
return LoopResult::HUNG_UP;
|
||||||
|
}
|
||||||
|
return LoopResult::FAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::LoopResult WatchdogTask::parseCommandByte(ssize_t readLen) {
|
WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) {
|
||||||
for(ssize_t idx = 0; idx < readLen; idx++) {
|
char readChar = buf[0];
|
||||||
char readChar = buf[idx];
|
// Cancel request
|
||||||
// Cancel request
|
if (readChar == watchdog::first::CANCEL_CHAR) {
|
||||||
if(readChar == watchdog::CANCEL_CHAR) {
|
return LoopResult::CANCEL_REQ;
|
||||||
return LoopResult::CANCEL_RQ;
|
} else if (readChar == watchdog::first::SUSPEND_CHAR) {
|
||||||
}
|
// Suspend request
|
||||||
// Begin request. Does not work if the operation was not suspended before
|
return LoopResult::SUSPEND_REQ;
|
||||||
else if(readChar == watchdog::RESTART_CHAR) {
|
} else if (readChar == watchdog::first::START_CHAR) {
|
||||||
return LoopResult::RESTART_RQ;
|
if (readLen == 2 and static_cast<char>(buf[1]) == watchdog::second::WATCH_FLAG) {
|
||||||
}
|
return LoopResult::START_WITH_WATCH_REQ;
|
||||||
// Suspend request
|
|
||||||
else if(readChar == watchdog::SUSPEND_CHAR) {
|
|
||||||
return LoopResult::SUSPEND_RQ;
|
|
||||||
}
|
|
||||||
// Everything else: All working as expected
|
|
||||||
}
|
}
|
||||||
return LoopResult::OK;
|
return LoopResult::START_REQ;
|
||||||
|
}
|
||||||
|
// Everything else: All working as expected
|
||||||
|
return LoopResult::OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int WatchdogTask::performRunningOperation() {
|
int WatchdogTask::performRunningOperation() {
|
||||||
if(state != States::RUNNING) {
|
if (state != States::RUNNING) {
|
||||||
state = States::RUNNING;
|
state = States::RUNNING;
|
||||||
|
}
|
||||||
|
if (notRunningStart.has_value()) {
|
||||||
|
notRunningStart = std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (not obswRunning) {
|
||||||
|
if (printNotRunningLatch) {
|
||||||
|
// Reset latch so user can see timeouts
|
||||||
|
printNotRunningLatch = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(not obswRunning) {
|
obswRunning = true;
|
||||||
if(printNotRunningLatch) {
|
std::cout << "eive-watchdog: Running OBSW detected.." << std::endl;
|
||||||
// Reset latch so user can see timeouts
|
|
||||||
printNotRunningLatch = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
obswRunning = true;
|
|
||||||
std::cout << "eive-watchdog: Running OBSW detected.." << std::endl;
|
|
||||||
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
||||||
std::cout << "eive-watchdog: Creating " << watchdog::RUNNING_FILE_NAME << std::endl;
|
std::cout << "eive-watchdog: Creating " << watchdog::RUNNING_FILE_NAME << std::endl;
|
||||||
if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
||||||
std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME);
|
std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME);
|
||||||
if(not obswRunningFile.good()) {
|
if (not obswRunningFile.good()) {
|
||||||
std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed"
|
std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed" << std::endl;
|
||||||
<< std::endl;
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
return 0;
|
#endif
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
||||||
// Latch prevents spam on console
|
// Latch prevents spam on console
|
||||||
if(not printNotRunningLatch) {
|
if (not printNotRunningLatch) {
|
||||||
if(type == LoopResult::HUNG_UP) {
|
if (type == LoopResult::HUNG_UP) {
|
||||||
std::cout << "eive-watchdog: FIFO writer hung up!" << std::endl;
|
std::cout << "eive-watchdog: FIFO writer hung up!" << std::endl;
|
||||||
}
|
} else {
|
||||||
else {
|
std::cout << "eive-watchdog: The FIFO timed out!" << std::endl;
|
||||||
std::cout << "eive-watchdog: The FIFO timed out!" << std::endl;
|
|
||||||
}
|
|
||||||
printNotRunningLatch = true;
|
|
||||||
}
|
}
|
||||||
|
printNotRunningLatch = true;
|
||||||
|
}
|
||||||
|
|
||||||
if(obswRunning) {
|
if (not notRunningStart.has_value()) {
|
||||||
|
notRunningStart = std::chrono::system_clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (obswRunning) {
|
||||||
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
||||||
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
||||||
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
||||||
if(result != 0) {
|
if (result != 0) {
|
||||||
std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " <<
|
std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " << errno
|
||||||
errno << ": " << strerror(errno) << std::endl;
|
<< ": " << strerror(errno) << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
obswRunning = false;
|
obswRunning = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (watchingObsw) {
|
||||||
|
auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value();
|
||||||
|
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
|
||||||
|
watchdog::MAX_NOT_RUNNING_MS) {
|
||||||
|
std::cout << "Restarting OBSW" << std::endl;
|
||||||
|
std::system("systemctl restart obsw");
|
||||||
}
|
}
|
||||||
if(type == LoopResult::HUNG_UP) {
|
}
|
||||||
using namespace std::chrono_literals;
|
if (type == LoopResult::HUNG_UP) {
|
||||||
// Prevent spam
|
using namespace std::chrono_literals;
|
||||||
std::this_thread::sleep_for(2000ms);
|
// Prevent spam
|
||||||
}
|
std::this_thread::sleep_for(2000ms);
|
||||||
return 0;
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int WatchdogTask::performSuspendOperation() {
|
bool WatchdogTask::stateMachine(LoopResult loopResult) {
|
||||||
if(state == States::RUNNING or state == States::FAULTY) {
|
using namespace std::chrono_literals;
|
||||||
std::cout << "eive-watchdog: Suspending watchdog operations" << std::endl;
|
bool sleep = false;
|
||||||
watchdogRunning = false;
|
switch (state) {
|
||||||
state = States::SUSPENDED;
|
case (States::RUNNING): {
|
||||||
|
switch (loopResult) {
|
||||||
|
case (LoopResult::TIMEOUT):
|
||||||
|
case (LoopResult::HUNG_UP): {
|
||||||
|
performNotRunningOperation(loopResult);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case (LoopResult::OK): {
|
||||||
|
performRunningOperation();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case (LoopResult::SUSPEND_REQ): {
|
||||||
|
if (state == States::RUNNING or state == States::FAULTY) {
|
||||||
|
std::cout << "eive-watchdog: Suspending watchdog operations" << std::endl;
|
||||||
|
state = States::SUSPENDED;
|
||||||
|
}
|
||||||
|
performSuspendOperation();
|
||||||
|
sleep = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case (LoopResult::CANCEL_REQ): {
|
||||||
|
std::cout << "eive-watchdog: Received cancel request, closing watchdog.." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
case (States::FAULTY):
|
||||||
|
case (States::SUSPENDED):
|
||||||
|
case (States::NOT_STARTED): {
|
||||||
|
switch (loopResult) {
|
||||||
|
case (LoopResult::SUSPEND_REQ): {
|
||||||
|
// Ignore and also delay
|
||||||
|
sleep = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case (LoopResult::START_REQ):
|
||||||
|
case (LoopResult::START_WITH_WATCH_REQ): {
|
||||||
|
if (state == States::NOT_STARTED or state == States::FAULTY) {
|
||||||
|
state = States::RUNNING;
|
||||||
|
}
|
||||||
|
std::cout << "Watch request received. Restarting OBSW if not running for "
|
||||||
|
<< watchdog::MAX_NOT_RUNNING_MS / 1000 << " seconds" << std::endl;
|
||||||
|
if (loopResult == LoopResult::START_REQ) {
|
||||||
|
watchingObsw = false;
|
||||||
|
} else if (loopResult == LoopResult::START_WITH_WATCH_REQ) {
|
||||||
|
watchingObsw = true;
|
||||||
|
}
|
||||||
|
performRunningOperation();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
sleep = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (loopResult == LoopResult::FAULT) {
|
||||||
|
// Configuration error
|
||||||
|
std::cerr << "Fault has occured in watchdog loop" << std::endl;
|
||||||
|
// Prevent spam
|
||||||
|
sleep = true;
|
||||||
|
}
|
||||||
|
if (sleep) {
|
||||||
|
std::this_thread::sleep_for(1000ms);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int WatchdogTask::performSuspendOperation() { return 0; }
|
||||||
|
@ -2,49 +2,52 @@
|
|||||||
#define WATCHDOG_WATCHDOG_H_
|
#define WATCHDOG_WATCHDOG_H_
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <chrono>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <optional>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
class WatchdogTask {
|
class WatchdogTask {
|
||||||
public:
|
public:
|
||||||
enum class States {
|
enum class States { NOT_STARTED, RUNNING, SUSPENDED, FAULTY };
|
||||||
NOT_STARTED,
|
|
||||||
RUNNING,
|
|
||||||
SUSPENDED,
|
|
||||||
FAULTY
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class LoopResult {
|
enum class LoopResult {
|
||||||
OK,
|
OK,
|
||||||
SUSPEND_RQ,
|
START_REQ,
|
||||||
CANCEL_RQ,
|
START_WITH_WATCH_REQ,
|
||||||
RESTART_RQ,
|
SUSPEND_REQ,
|
||||||
TIMEOUT,
|
CANCEL_REQ,
|
||||||
HUNG_UP,
|
TIMEOUT,
|
||||||
FAULT
|
HUNG_UP,
|
||||||
};
|
FAULT
|
||||||
|
};
|
||||||
|
|
||||||
WatchdogTask();
|
WatchdogTask();
|
||||||
|
|
||||||
virtual ~WatchdogTask();
|
virtual ~WatchdogTask();
|
||||||
|
|
||||||
int performOperation();
|
int performOperation();
|
||||||
private:
|
|
||||||
int fd = 0;
|
|
||||||
|
|
||||||
bool obswRunning = false;
|
private:
|
||||||
bool watchdogRunning = false;
|
int fd = 0;
|
||||||
bool printNotRunningLatch = false;
|
|
||||||
std::array<uint8_t, 64> buf;
|
|
||||||
States state = States::NOT_STARTED;
|
|
||||||
|
|
||||||
LoopResult watchdogLoop();
|
bool obswRunning = false;
|
||||||
LoopResult pollEvent(struct pollfd& waiter);
|
bool watchingObsw = false;
|
||||||
LoopResult parseCommandByte(ssize_t readLen);
|
bool printNotRunningLatch = false;
|
||||||
|
std::array<uint8_t, 64> buf;
|
||||||
|
std::optional<std::chrono::time_point<std::chrono::system_clock>> notRunningStart;
|
||||||
|
States state = States::NOT_STARTED;
|
||||||
|
|
||||||
int performRunningOperation();
|
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating
|
||||||
int performNotRunningOperation(LoopResult type);
|
// messages to loop results.
|
||||||
int performSuspendOperation();
|
LoopResult watchdogLoop();
|
||||||
|
bool stateMachine(LoopResult result);
|
||||||
|
LoopResult pollEvent(struct pollfd& waiter);
|
||||||
|
LoopResult parseCommand(ssize_t readLen);
|
||||||
|
|
||||||
|
int performRunningOperation();
|
||||||
|
int performNotRunningOperation(LoopResult type);
|
||||||
|
int performSuspendOperation();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* WATCHDOG_WATCHDOG_H_ */
|
#endif /* WATCHDOG_WATCHDOG_H_ */
|
||||||
|
@ -5,17 +5,31 @@
|
|||||||
|
|
||||||
namespace watchdog {
|
namespace watchdog {
|
||||||
|
|
||||||
|
namespace first {
|
||||||
|
|
||||||
|
// Start or restart character
|
||||||
|
static constexpr char START_CHAR = 'b';
|
||||||
// Suspend watchdog operations temporarily
|
// Suspend watchdog operations temporarily
|
||||||
static constexpr char SUSPEND_CHAR = 's';
|
static constexpr char SUSPEND_CHAR = 's';
|
||||||
// Resume watchdog operations
|
|
||||||
static constexpr char RESTART_CHAR = 'b';
|
|
||||||
// Causes the watchdog to close down
|
// Causes the watchdog to close down
|
||||||
static constexpr char CANCEL_CHAR = 'c';
|
static constexpr char CANCEL_CHAR = 'c';
|
||||||
|
static constexpr char IDLE_CHAR = 'i';
|
||||||
|
|
||||||
|
} // namespace first
|
||||||
|
|
||||||
|
namespace second {
|
||||||
|
|
||||||
|
// Supplied with the start character. This will instruct the watchdog to actually watch
|
||||||
|
// the OBSW is runnng all the time.
|
||||||
|
static constexpr char WATCH_FLAG = 'w';
|
||||||
|
} // namespace second
|
||||||
|
|
||||||
static constexpr int TIMEOUT_MS = 5 * 1000;
|
static constexpr int TIMEOUT_MS = 5 * 1000;
|
||||||
|
// 2 minutes
|
||||||
|
static constexpr unsigned MAX_NOT_RUNNING_MS = 2 * 60 * 1000;
|
||||||
const std::string FIFO_NAME = "/tmp/watchdog-pipe";
|
const std::string FIFO_NAME = "/tmp/watchdog-pipe";
|
||||||
const std::string RUNNING_FILE_NAME = "/tmp/obsw-running";
|
const std::string RUNNING_FILE_NAME = "/tmp/obsw-running";
|
||||||
|
|
||||||
}
|
} // namespace watchdog
|
||||||
|
|
||||||
#endif /* WATCHDOG_DEFINITIONS_H_ */
|
#endif /* WATCHDOG_DEFINITIONS_H_ */
|
||||||
|
@ -1,24 +1,22 @@
|
|||||||
#include "Watchdog.h"
|
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "Watchdog.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
|
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
|
||||||
* It checks whether the OBSW writes to the the FIFO regularly.
|
* It checks whether the OBSW writes to the the FIFO regularly.
|
||||||
*/
|
*/
|
||||||
int main() {
|
int main() {
|
||||||
std::cout << "eive-watchdog: Starting OBSW watchdog.." << std::endl;
|
std::cout << "eive-watchdog: Starting OBSW watchdog.." << std::endl;
|
||||||
try {
|
try {
|
||||||
WatchdogTask watchdogTask;
|
WatchdogTask watchdogTask;
|
||||||
int result = watchdogTask.performOperation();
|
int result = watchdogTask.performOperation();
|
||||||
if(result != 0) {
|
if (result != 0) {
|
||||||
return result;
|
return result;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch(const std::runtime_error& e) {
|
} catch (const std::runtime_error& e) {
|
||||||
std::cerr << "eive-watchdog: Run time exception " << e.what() << std::endl;
|
std::cerr << "eive-watchdog: Run time exception " << e.what() << std::endl;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user