Merge pull request 'Watchdog Extension' (#404) from feature_watchdog_extension into develop
All checks were successful
EIVE/eive-obsw/pipeline/head This commit looks good

Reviewed-on: #404
Reviewed-by: Jakob Meier <meierj@irs.uni-stuttgart.de>
This commit is contained in:
Robin Müller 2023-03-04 10:45:48 +01:00
commit cfbc53792d
14 changed files with 468 additions and 369 deletions

View File

@ -26,6 +26,15 @@ the `/proc/uptime` file is read.
## Changed
- Improved the OBSW watchdog by adding a watch functionality. The watch functionality is optional
and has to be enabled specifically by the application being watched by the watchdog when
starting the watchdog. If the watch functionality is enabled and the OBSW has not pinged
the watchdog via the FIFO for 2 minutes, the watchdog will restart the OBSW service via systemd.
The primary OBSW will only activate the watch functionality if it is the OBSW inside the
`/usr/bin` directory. This allows debugging the system by leaving flashed or manually copied
debugging images 2 minutes to start the watchdog without the watch functionality.
- The SD card prefix is now set earlier inside the `CoreController` constructor
- The watchdog handling was moved outside the `CoreController` into the main loop.
- Moved polling of all SPI parts to the same PST.
- Allow quicker transition for the EIVE system component by allowing consecutive TCS and ACS
component commanding again.

View File

@ -1,4 +1,4 @@
target_sources(${OBSW_NAME} PRIVATE CoreController.cpp scheduling.cpp
ObjectFactory.cpp)
ObjectFactory.cpp WatchdogHandler.cpp)
target_sources(${SIMPLE_OBSW_NAME} PRIVATE scheduling.cpp)

View File

@ -33,12 +33,7 @@ xsc::Copy CoreController::CURRENT_COPY = xsc::Copy::NO_COPY;
CoreController::CoreController(object_id_t objectId)
: ExtendedControllerBase(objectId, 5), opDivider5(5), opDivider10(10), hkSet(this) {
ReturnValue_t result = returnvalue::OK;
try {
result = initWatchdogFifo();
if (result != returnvalue::OK) {
sif::warning << "CoreController::CoreController: Watchdog FIFO init failed" << std::endl;
}
sdcMan = SdCardManager::instance();
if (sdcMan == nullptr) {
sif::error << "CoreController::CoreController: SD card manager invalid!" << std::endl;
@ -47,6 +42,16 @@ CoreController::CoreController(object_id_t objectId)
if (not BLOCKING_SD_INIT) {
sdcMan->setBlocking(false);
}
auto sdCard = sdcMan->getPreferredSdCard();
if (not sdCard.has_value()) {
sif::error << "CoreController::initializeAfterTaskCreation: "
"Issues getting preferred SD card, setting to 0"
<< std::endl;
sdCard = sd::SdCard::SLOT_0;
}
sdInfo.active = sdCard.value();
sdcMan->setActiveSdCard(sdInfo.active);
currMntPrefix = sdcMan->getCurrentMountPrefix();
getCurrentBootCopy(CURRENT_CHIP, CURRENT_COPY);
@ -54,6 +59,10 @@ CoreController::CoreController(object_id_t objectId)
} catch (const std::filesystem::filesystem_error &e) {
sif::error << "CoreController::CoreController: Failed with exception " << e.what() << std::endl;
}
// Add script folder to path
char *currentEnvPath = getenv("PATH");
std::string updatedEnvPath = std::string(currentEnvPath) + ":/home/root/scripts:/usr/local/bin";
setenv("PATH", updatedEnvPath.c_str(), true);
sdCardCheckCd.timeOut();
eventQueue = QueueFactory::instance()->createMessageQueue(5, EventMessage::MAX_MESSAGE_SIZE);
}
@ -78,7 +87,6 @@ void CoreController::performControlOperation() {
}
}
}
performWatchdogControlOperation();
sdStateMachine();
performMountedSdCardOperations();
if (sdCardCheckCd.hasTimedOut()) {
@ -148,22 +156,6 @@ ReturnValue_t CoreController::initialize() {
ReturnValue_t CoreController::initializeAfterTaskCreation() {
ReturnValue_t result = returnvalue::OK;
auto sdCard = sdcMan->getPreferredSdCard();
if (not sdCard) {
return returnvalue::FAILED;
}
sdInfo.active = sdCard.value();
if (sdInfo.active == sd::SdCard::NONE) {
sif::error << "CoreController::initializeAfterTaskCreation: "
"Issues getting preferred SD card, setting to 0"
<< std::endl;
sdInfo.active = sd::SdCard::SLOT_0;
}
sdcMan->setActiveSdCard(sdInfo.active);
currMntPrefix = sdcMan->getCurrentMountPrefix();
if (currMntPrefix == "") {
return ObjectManagerIF::CHILD_INIT_FAILED;
}
if (BLOCKING_SD_INIT) {
result = initSdCardBlocking();
if (result != returnvalue::OK and result != SdCardManager::ALREADY_MOUNTED) {
@ -175,12 +167,7 @@ ReturnValue_t CoreController::initializeAfterTaskCreation() {
if (result != returnvalue::OK) {
sif::warning << "CoreController::initialize: Version initialization failed" << std::endl;
}
// Add script folder to path
char *currentEnvPath = getenv("PATH");
std::string updatedEnvPath = std::string(currentEnvPath) + ":/home/root/scripts:/usr/local/bin";
setenv("PATH", updatedEnvPath.c_str(), true);
updateProtInfo();
initPrint();
return ExtendedControllerBase::initializeAfterTaskCreation();
}
@ -844,36 +831,6 @@ void CoreController::getCurrentBootCopy(xsc::Chip &chip, xsc::Copy &copy) {
copy = static_cast<xsc::Copy>(xscCopy);
}
ReturnValue_t CoreController::initWatchdogFifo() {
if (not std::filesystem::exists(watchdog::FIFO_NAME)) {
// Still return returnvalue::OK for now
sif::info << "Watchdog FIFO " << watchdog::FIFO_NAME << " does not exist, can't initiate"
<< " watchdog" << std::endl;
return returnvalue::OK;
}
// Open FIFO write only and non-blocking to prevent SW from killing itself.
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
if (watchdogFifoFd < 0) {
if (errno == ENXIO) {
watchdogFifoFd = RETRY_FIFO_OPEN;
sif::info << "eive-watchdog not running. FIFO can not be opened" << std::endl;
} else {
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with " << errno
<< ": " << strerror(errno) << std::endl;
return returnvalue::FAILED;
}
}
return returnvalue::OK;
}
void CoreController::initPrint() {
#if OBSW_VERBOSE_LEVEL >= 1
if (watchdogFifoFd > 0) {
sif::info << "Opened watchdog FIFO successfully.." << std::endl;
}
#endif
}
ReturnValue_t CoreController::actionXscReboot(const uint8_t *data, size_t size) {
if (size < 1) {
return HasActionsIF::INVALID_PARAMETERS;
@ -1231,36 +1188,6 @@ ReturnValue_t CoreController::handleProtInfoUpdateLine(std::string nextLine) {
return returnvalue::OK;
}
void CoreController::performWatchdogControlOperation() {
// Only perform each fifth iteration
if (watchdogFifoFd != 0 and opDivider5.check()) {
if (watchdogFifoFd == RETRY_FIFO_OPEN) {
// Open FIFO write only and non-blocking
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
if (watchdogFifoFd < 0) {
if (errno == ENXIO) {
watchdogFifoFd = RETRY_FIFO_OPEN;
// No printout for now, would be spam
return;
} else {
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with "
<< errno << ": " << strerror(errno) << std::endl;
return;
}
}
sif::info << "Opened " << watchdog::FIFO_NAME << " successfully" << std::endl;
} else if (watchdogFifoFd > 0) {
// Write to OBSW watchdog FIFO here
const char writeChar = 'a';
ssize_t writtenBytes = write(watchdogFifoFd, &writeChar, 1);
if (writtenBytes < 0) {
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
<< std::endl;
}
}
}
}
void CoreController::performMountedSdCardOperations() {
auto mountedSdCardOp = [&](sd::SdCard sdCard, std::string mntPoint) {
if (not performOneShotSdCardOpsSwitch) {

View File

@ -164,9 +164,6 @@ class CoreController : public ExtendedControllerBase {
static constexpr uint32_t BOOT_OFFSET_SECONDS = 15;
static constexpr MutexIF::TimeoutType TIMEOUT_TYPE = MutexIF::TimeoutType::WAITING;
static constexpr uint32_t MUTEX_TIMEOUT = 20;
// Designated value for rechecking FIFO open
static constexpr int RETRY_FIFO_OPEN = -2;
int watchdogFifoFd = 0;
GpsHyperion::FixMode gpsFix = GpsHyperion::FixMode::UNKNOWN;
// States for SD state machine, which is used in non-blocking mode
@ -263,7 +260,6 @@ class CoreController : public ExtendedControllerBase {
ReturnValue_t performSdCardCheck();
ReturnValue_t backupTimeFileHandler();
ReturnValue_t initBootCopyFile();
ReturnValue_t initWatchdogFifo();
ReturnValue_t initSdCardBlocking();
bool startSdStateMachine(sd::SdCard targetActiveSd, SdCfgMode mode, MessageQueueId_t commander,
DeviceCommandId_t actionId);
@ -288,8 +284,6 @@ class CoreController : public ExtendedControllerBase {
ReturnValue_t gracefulShutdownTasks(xsc::Chip chip, xsc::Copy copy, bool& protOpPerformed);
void performWatchdogControlOperation();
ReturnValue_t handleProtInfoUpdateLine(std::string nextLine);
int handleBootCopyProtAtIndex(xsc::Chip targetChip, xsc::Copy targetCopy, bool protect,
bool& protOperationPerformed, bool selfChip, bool selfCopy,

View File

@ -0,0 +1,84 @@
#include "WatchdogHandler.h"
#include <fcntl.h>
#include <unistd.h>
#include <cerrno>
#include <cstring>
#include <filesystem>
#include "fsfw/serviceinterface.h"
#include "watchdog/definitions.h"
WatchdogHandler::WatchdogHandler() {}
void WatchdogHandler::periodicOperation() {
if (watchdogFifoFd != 0) {
if (watchdogFifoFd == RETRY_FIFO_OPEN) {
// Open FIFO write only and non-blocking
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
if (watchdogFifoFd < 0) {
if (errno == ENXIO) {
watchdogFifoFd = RETRY_FIFO_OPEN;
// No printout for now, would be spam
return;
} else {
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with "
<< errno << ": " << strerror(errno) << std::endl;
return;
}
}
sif::info << "Opened " << watchdog::FIFO_NAME << " successfully" << std::endl;
performStartHandling();
} else if (watchdogFifoFd > 0) {
// Write to OBSW watchdog FIFO here
const char writeChar = watchdog::first::IDLE_CHAR;
ssize_t writtenBytes = write(watchdogFifoFd, &writeChar, 1);
if (writtenBytes < 0) {
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
<< std::endl;
}
}
}
}
ReturnValue_t WatchdogHandler::initialize(bool enableWatchdogFunction) {
using namespace std::filesystem;
this->enableWatchFunction = enableWatchdogFunction;
if (not std::filesystem::exists(watchdog::FIFO_NAME)) {
// Still return returnvalue::OK for now
sif::info << "Watchdog FIFO " << watchdog::FIFO_NAME << " does not exist, can't initiate"
<< " watchdog" << std::endl;
return returnvalue::OK;
}
// Open FIFO write only and non-blocking to prevent SW from killing itself.
watchdogFifoFd = open(watchdog::FIFO_NAME.c_str(), O_WRONLY | O_NONBLOCK);
if (watchdogFifoFd < 0) {
if (errno == ENXIO) {
watchdogFifoFd = RETRY_FIFO_OPEN;
sif::info << "eive-watchdog not running. FIFO can not be opened" << std::endl;
} else {
sif::error << "Opening pipe " << watchdog::FIFO_NAME << " write-only failed with " << errno
<< ": " << strerror(errno) << std::endl;
return returnvalue::FAILED;
}
}
return performStartHandling();
}
ReturnValue_t WatchdogHandler::performStartHandling() {
char startBuf[2];
size_t writeLen = 1;
startBuf[0] = watchdog::first::START_CHAR;
if (enableWatchFunction) {
writeLen += 1;
startBuf[1] = watchdog::second::WATCH_FLAG;
}
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
if (writtenBytes < 0) {
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
<< std::endl;
return returnvalue::FAILED;
}
return returnvalue::OK;
}

View File

@ -0,0 +1,23 @@
#ifndef BSP_Q7S_CORE_WATCHDOGHANDLER_H_
#define BSP_Q7S_CORE_WATCHDOGHANDLER_H_
#include "fsfw/returnvalues/returnvalue.h"
class WatchdogHandler {
public:
WatchdogHandler();
ReturnValue_t initialize(bool enableWatchFunction);
void periodicOperation();
private:
// Designated value for rechecking FIFO open
static constexpr int RETRY_FIFO_OPEN = -2;
int watchdogFifoFd = 0;
bool enableWatchFunction = false;
ReturnValue_t performStartHandling();
};
#endif /* BSP_Q7S_CORE_WATCHDOGHANDLER_H_ */

View File

@ -9,6 +9,7 @@
#include <iostream>
#include "OBSWConfig.h"
#include "bsp_q7s/core/WatchdogHandler.h"
#include "commonConfig.h"
#include "core/scheduling.h"
#include "fsfw/tasks/TaskFactory.h"
@ -24,6 +25,9 @@ static const char* DEV_STRING = "Xiphos Q7S FM";
#else
static const char* DEV_STRING = "Xiphos Q7S EM";
#endif
WatchdogHandler WATCHDOG_HANDLER;
int obsw::obsw() {
using namespace fsfw;
std::cout << "-- EIVE OBSW --" << std::endl;
@ -44,6 +48,35 @@ int obsw::obsw() {
}
#endif
// Delay the boot if applicable.
bootDelayHandling();
bool initWatchFunction = false;
if (std::filesystem::current_path() == "/usr/bin") {
initWatchFunction = true;
}
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
if (result != returnvalue::OK) {
std::cerr << "Initiating EIVE watchdog handler failed" << std::endl;
}
scheduling::initMission();
// Command the EIVE system to safe mode
#if OBSW_COMMAND_SAFE_MODE_AT_STARTUP == 1
commandEiveSystemToSafe();
#else
announceAllModes();
#endif
for (;;) {
WATCHDOG_HANDLER.periodicOperation();
TaskFactory::delayTask(1000);
}
return 0;
}
void obsw::bootDelayHandling() {
const char* homedir = nullptr;
homedir = getenv("HOME");
if (homedir == nullptr) {
@ -71,31 +104,26 @@ int obsw::obsw() {
std::cout << "Delaying OBSW start for " << bootDelaySecs << " seconds" << std::endl;
TaskFactory::delayTask(bootDelaySecs * 1000);
}
}
scheduling::initMission();
// Command the EIVE system to safe mode
void obsw::commandEiveSystemToSafe() {
auto sysQueueId = satsystem::EIVE_SYSTEM.getCommandQueue();
CommandMessage msg;
#if OBSW_COMMAND_SAFE_MODE_AT_STARTUP == 1
ModeMessage::setCmdModeMessage(msg, acs::AcsMode::SAFE, 0);
ReturnValue_t result =
MessageQueueSenderIF::sendMessage(sysQueueId, &msg, MessageQueueIF::NO_QUEUE, false);
if (result != returnvalue::OK) {
sif::error << "Sending safe mode command to EIVE system failed" << std::endl;
}
#else
}
void obsw::announceAllModes() {
auto sysQueueId = satsystem::EIVE_SYSTEM.getCommandQueue();
CommandMessage msg;
ModeMessage::setModeAnnounceMessage(msg, true);
ReturnValue_t result =
MessageQueueSenderIF::sendMessage(sysQueueId, &msg, MessageQueueIF::NO_QUEUE, false);
if (result != returnvalue::OK) {
sif::error << "Sending safe mode command to EIVE system failed" << std::endl;
}
#endif
for (;;) {
/* Suspend main thread by sleeping it. */
TaskFactory::delayTask(5000);
}
return 0;
}

View File

@ -5,6 +5,10 @@ namespace obsw {
int obsw();
};
void bootDelayHandling();
void commandEiveSystemToSafe();
void announceAllModes();
}; // namespace obsw
#endif /* BSP_Q7S_CORE_OBSW_H_ */

View File

@ -4,6 +4,7 @@ if [[ ! -f README.md ]]; then
fi
folder_list=(
"./watchdog"
"./mission"
"./linux"
"./bsp_q7s"

View File

@ -1,10 +1,5 @@
target_sources(${WATCHDOG_NAME} PRIVATE
main.cpp
Watchdog.cpp
)
target_sources(${WATCHDOG_NAME} PRIVATE main.cpp Watchdog.cpp)
target_include_directories(${WATCHDOG_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
)
target_include_directories(${WATCHDOG_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS ${WATCHDOG_NAME} RUNTIME DESTINATION bin)

View File

@ -1,19 +1,20 @@
#include "Watchdog.h"
#include "definitions.h"
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <poll.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <iostream>
#include <fstream>
#include <thread>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <thread>
#include "definitions.h"
WatchdogTask::WatchdogTask() : fd(0) {
int result = 0;
@ -23,76 +24,39 @@ WatchdogTask::WatchdogTask (): fd(0) {
mode_t mode = DEFFILEMODE;
result = mkfifo(watchdog::FIFO_NAME.c_str(), mode);
if (result != 0) {
std::cerr << "eive-watchdog: Could not created named pipe at " <<
watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) <<
std::endl;
std::cerr << "Could not created named pipe at " << watchdog::FIFO_NAME << ", error " << errno
<< ": " << strerror(errno) << std::endl;
throw std::runtime_error("eive-watchdog: FIFO creation failed");
}
#if WATCHDOG_VERBOSE_LEVEL >= 1
std::cout << "eive-watchdog: Pipe at " << watchdog::FIFO_NAME <<
" created successfully" << std::endl;
std::cout << "Pipe at " << watchdog::FIFO_NAME << " created successfully" << std::endl;
#endif
}
}
WatchdogTask::~WatchdogTask() {
}
WatchdogTask::~WatchdogTask() {}
int WatchdogTask::performOperation() {
// Open FIFO read only and non-blocking
fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK);
if (fd < 0) {
std::cerr << "eive-watchdog: Opening pipe " << watchdog::FIFO_NAME <<
"read-only failed with " << errno << ": " << strerror(errno) << std::endl;
std::cerr << "Opening pipe " << watchdog::FIFO_NAME << "read-only failed with " << errno << ": "
<< strerror(errno) << std::endl;
return -1;
}
state = States::RUNNING;
state = States::NOT_STARTED;
while (true) {
WatchdogTask::LoopResult loopResult = watchdogLoop();
switch(loopResult) {
case(LoopResult::OK): {
performRunningOperation();
if (not stateMachine(loopResult)) {
break;
}
case(LoopResult::CANCEL_RQ): {
std::cout << "eive-watchdog: Received cancel request, closing watchdog.." << std::endl;
return 0;
}
case(LoopResult::SUSPEND_RQ): {
performSuspendOperation();
break;
}
case(LoopResult::TIMEOUT): {
performNotRunningOperation(loopResult);
break;
}
case(LoopResult::HUNG_UP): {
performNotRunningOperation(loopResult);
break;
}
case(LoopResult::RESTART_RQ): {
if(state == States::SUSPENDED or state == States::FAULTY) {
performRunningOperation();
}
break;
}
case(LoopResult::FAULT): {
using namespace std::chrono_literals;
// Configuration error
std::cerr << "Fault has occured in watchdog loop" << std::endl;
// Prevent spam
std::this_thread::sleep_for(2000ms);
}
}
}
if (close(fd) < 0) {
std::cerr << "eive-watchdog: Closing named pipe at " << watchdog::FIFO_NAME <<
"failed, error " << errno << ": " << strerror(errno) << std::endl;
std::cerr << "Closing named pipe at " << watchdog::FIFO_NAME << "failed, error " << errno
<< ": " << strerror(errno) << std::endl;
}
std::cout << "eive-watchdog: Finished" << std::endl;
std::cout << "Closing" << std::endl;
return 0;
}
@ -102,28 +66,7 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
waiter.fd = fd;
waiter.events = POLLIN;
switch(state) {
case(States::SUSPENDED): {
// Sleep, then check whether a restart request was received
std::this_thread::sleep_for(1000ms);
break;
}
case(States::RUNNING): {
// Continue as usual
break;
}
case(States::NOT_STARTED): {
// This should not happen
std::cerr << "eive-watchdog: State is NOT_STARTED, configuration error" << std::endl;
break;
}
case(States::FAULTY): {
// TODO: Not sure what to do yet. Continue for now
break;
}
}
// 10 seconds timeout, only poll one file descriptor
// Only poll one file descriptor with timeout
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
case (0): {
return LoopResult::TIMEOUT;
@ -132,8 +75,8 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
return pollEvent(waiter);
}
default: {
std::cerr << "eive-watchdog: Unknown poll error at " << watchdog::FIFO_NAME << ", error " <<
errno << ": " << strerror(errno) << std::endl;
std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": "
<< strerror(errno) << std::endl;
break;
}
}
@ -144,48 +87,42 @@ WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
if (waiter.revents & POLLIN) {
ssize_t readLen = read(fd, buf.data(), buf.size());
if (readLen < 0) {
std::cerr << "eive-watchdog: Read error on pipe " << watchdog::FIFO_NAME <<
", error " << errno << ": " << strerror(errno) << std::endl;
std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": "
<< strerror(errno) << std::endl;
return LoopResult::OK;
}
#if WATCHDOG_VERBOSE_LEVEL == 2
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME
<< std::endl;
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl;
#endif
else if (readLen >= 1) {
return parseCommandByte(readLen);
return parseCommand(readLen);
}
}
else if(waiter.revents & POLLERR) {
std::cerr << "eive-watchdog: Poll error error on pipe " << watchdog::FIFO_NAME <<
std::endl;
} else if (waiter.revents & POLLERR) {
std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
return LoopResult::FAULT;
}
else if (waiter.revents & POLLHUP) {
} else if (waiter.revents & POLLHUP) {
// Writer closed its end
return LoopResult::HUNG_UP;
}
return LoopResult::FAULT;
}
WatchdogTask::LoopResult WatchdogTask::parseCommandByte(ssize_t readLen) {
for(ssize_t idx = 0; idx < readLen; idx++) {
char readChar = buf[idx];
WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) {
char readChar = buf[0];
// Cancel request
if(readChar == watchdog::CANCEL_CHAR) {
return LoopResult::CANCEL_RQ;
}
// Begin request. Does not work if the operation was not suspended before
else if(readChar == watchdog::RESTART_CHAR) {
return LoopResult::RESTART_RQ;
}
if (readChar == watchdog::first::CANCEL_CHAR) {
return LoopResult::CANCEL_REQ;
} else if (readChar == watchdog::first::SUSPEND_CHAR) {
// Suspend request
else if(readChar == watchdog::SUSPEND_CHAR) {
return LoopResult::SUSPEND_RQ;
return LoopResult::SUSPEND_REQ;
} else if (readChar == watchdog::first::START_CHAR) {
if (readLen == 2 and static_cast<char>(buf[1]) == watchdog::second::WATCH_FLAG) {
return LoopResult::START_WITH_WATCH_REQ;
}
return LoopResult::START_REQ;
}
// Everything else: All working as expected
}
return LoopResult::OK;
}
@ -193,6 +130,9 @@ int WatchdogTask::performRunningOperation() {
if (state != States::RUNNING) {
state = States::RUNNING;
}
if (notRunningStart.has_value()) {
notRunningStart = std::nullopt;
}
if (not obswRunning) {
if (printNotRunningLatch) {
@ -201,14 +141,13 @@ int WatchdogTask::performRunningOperation() {
}
obswRunning = true;
std::cout << "eive-watchdog: Running OBSW detected.." << std::endl;
std::cout << "OBSW is running" << std::endl;
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
std::cout << "eive-watchdog: Creating " << watchdog::RUNNING_FILE_NAME << std::endl;
std::cout << "Creating " << watchdog::RUNNING_FILE_NAME << std::endl;
if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME);
if (not obswRunningFile.good()) {
std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed"
<< std::endl;
std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed" << std::endl;
}
}
#endif
@ -220,26 +159,38 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
// Latch prevents spam on console
if (not printNotRunningLatch) {
if (type == LoopResult::HUNG_UP) {
std::cout << "eive-watchdog: FIFO writer hung up!" << std::endl;
}
else {
std::cout << "eive-watchdog: The FIFO timed out!" << std::endl;
std::cout << "OBSW hung up" << std::endl;
} else {
std::cout << "The FIFO timed out, OBSW might not be running" << std::endl;
}
printNotRunningLatch = true;
}
if (not notRunningStart.has_value()) {
notRunningStart = std::chrono::system_clock::now();
}
if (obswRunning) {
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
if (result != 0) {
std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " <<
errno << ": " << strerror(errno) << std::endl;
std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " << errno
<< ": " << strerror(errno) << std::endl;
}
}
#endif
obswRunning = false;
}
if (watchingObsw) {
auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value();
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
watchdog::MAX_NOT_RUNNING_MS) {
std::cout << "Restarting OBSW with systemctl" << std::endl;
std::system("systemctl restart obsw");
}
}
if (type == LoopResult::HUNG_UP) {
using namespace std::chrono_literals;
// Prevent spam
@ -248,11 +199,79 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
return 0;
}
int WatchdogTask::performSuspendOperation() {
bool WatchdogTask::stateMachine(LoopResult loopResult) {
using namespace std::chrono_literals;
bool sleep = false;
switch (state) {
case (States::RUNNING): {
switch (loopResult) {
case (LoopResult::TIMEOUT):
case (LoopResult::HUNG_UP): {
performNotRunningOperation(loopResult);
break;
}
case (LoopResult::OK): {
performRunningOperation();
break;
}
case (LoopResult::SUSPEND_REQ): {
if (state == States::RUNNING or state == States::FAULTY) {
std::cout << "eive-watchdog: Suspending watchdog operations" << std::endl;
watchdogRunning = false;
std::cout << "Received suspend request, suspending watchdog operations" << std::endl;
state = States::SUSPENDED;
}
return 0;
performSuspendOperation();
sleep = true;
break;
}
case (LoopResult::CANCEL_REQ): {
std::cout << "Received cancel request, closing watchdog.." << std::endl;
return false;
}
}
}
case (States::FAULTY):
case (States::SUSPENDED):
case (States::NOT_STARTED): {
switch (loopResult) {
case (LoopResult::SUSPEND_REQ): {
// Ignore and also delay
sleep = true;
break;
}
case (LoopResult::START_REQ):
case (LoopResult::START_WITH_WATCH_REQ): {
if (state == States::NOT_STARTED or state == States::FAULTY) {
state = States::RUNNING;
}
if (loopResult == LoopResult::START_REQ) {
std::cout << "Start request without watch request received" << std::endl;
watchingObsw = false;
} else if (loopResult == LoopResult::START_WITH_WATCH_REQ) {
std::cout << "Start request with watch request received. Restarting OBSW if not "
"running for "
<< watchdog::MAX_NOT_RUNNING_MS / 1000 << " seconds" << std::endl;
watchingObsw = true;
}
performRunningOperation();
break;
}
default: {
sleep = true;
}
}
break;
}
}
if (loopResult == LoopResult::FAULT) {
// Configuration error
std::cerr << "Fault has occured in watchdog loop" << std::endl;
// Prevent spam
sleep = true;
}
if (sleep) {
std::this_thread::sleep_for(1000ms);
}
return true;
}
int WatchdogTask::performSuspendOperation() { return 0; }

View File

@ -2,23 +2,21 @@
#define WATCHDOG_WATCHDOG_H_
#include <array>
#include <chrono>
#include <cstdint>
#include <optional>
#include <string>
class WatchdogTask {
public:
enum class States {
NOT_STARTED,
RUNNING,
SUSPENDED,
FAULTY
};
enum class States { NOT_STARTED, RUNNING, SUSPENDED, FAULTY };
enum class LoopResult {
OK,
SUSPEND_RQ,
CANCEL_RQ,
RESTART_RQ,
START_REQ,
START_WITH_WATCH_REQ,
SUSPEND_REQ,
CANCEL_REQ,
TIMEOUT,
HUNG_UP,
FAULT
@ -29,18 +27,23 @@ public:
virtual ~WatchdogTask();
int performOperation();
private:
int fd = 0;
bool obswRunning = false;
bool watchdogRunning = false;
bool watchingObsw = false;
bool printNotRunningLatch = false;
std::array<uint8_t, 64> buf;
std::optional<std::chrono::time_point<std::chrono::system_clock>> notRunningStart;
States state = States::NOT_STARTED;
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating
// messages to loop results.
LoopResult watchdogLoop();
bool stateMachine(LoopResult result);
LoopResult pollEvent(struct pollfd& waiter);
LoopResult parseCommandByte(ssize_t readLen);
LoopResult parseCommand(ssize_t readLen);
int performRunningOperation();
int performNotRunningOperation(LoopResult type);

View File

@ -5,17 +5,31 @@
namespace watchdog {
namespace first {
// Start or restart character
static constexpr char START_CHAR = 'b';
// Suspend watchdog operations temporarily
static constexpr char SUSPEND_CHAR = 's';
// Resume watchdog operations
static constexpr char RESTART_CHAR = 'b';
// Causes the watchdog to close down
static constexpr char CANCEL_CHAR = 'c';
static constexpr char IDLE_CHAR = 'i';
} // namespace first
namespace second {
// Supplied with the start character. This will instruct the watchdog to actually watch
// the OBSW is runnng all the time.
static constexpr char WATCH_FLAG = 'w';
} // namespace second
static constexpr int TIMEOUT_MS = 5 * 1000;
// 2 minutes
static constexpr unsigned MAX_NOT_RUNNING_MS = 2 * 60 * 1000;
const std::string FIFO_NAME = "/tmp/watchdog-pipe";
const std::string RUNNING_FILE_NAME = "/tmp/obsw-running";
}
} // namespace watchdog
#endif /* WATCHDOG_DEFINITIONS_H_ */

View File

@ -1,24 +1,22 @@
#include "Watchdog.h"
#include <iostream>
#include "Watchdog.h"
/**
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
* It checks whether the OBSW writes to the the FIFO regularly.
*/
int main() {
std::cout << "eive-watchdog: Starting OBSW watchdog.." << std::endl;
std::cout << "Starting OBSW watchdog" << std::endl;
try {
WatchdogTask watchdogTask;
int result = watchdogTask.performOperation();
if (result != 0) {
return result;
}
}
catch(const std::runtime_error& e) {
std::cerr << "eive-watchdog: Run time exception " << e.what() << std::endl;
} catch (const std::runtime_error& e) {
std::cerr << "Run time exception " << e.what() << std::endl;
return -1;
}
return 0;
}