Robin Mueller
d8acf94a02
All checks were successful
EIVE/eive-obsw/pipeline/pr-develop This commit looks good
259 lines
7.9 KiB
C++
259 lines
7.9 KiB
C++
#include "Watchdog.h"
|
|
#include "watchdogConf.h"
|
|
|
|
#include <errno.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <poll.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <thread>
|
|
#include <cstring>
|
|
#include <filesystem>
|
|
|
|
|
|
WatchdogTask::WatchdogTask (): fd(0) {
|
|
int result = 0;
|
|
// Only create the FIFO if it does not exist yet
|
|
if(not std::filesystem::exists(watchdog::FIFO_NAME)) {
|
|
// Permission 666 or rw-rw-rw-
|
|
mode_t mode = DEFFILEMODE;
|
|
result = mkfifo(watchdog::FIFO_NAME.c_str(), mode);
|
|
if(result != 0) {
|
|
std::cerr << "eive-watchdog: Could not created named pipe at " <<
|
|
watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) <<
|
|
std::endl;
|
|
throw std::runtime_error("eive-watchdog: FIFO creation failed");
|
|
}
|
|
#if WATCHDOG_VERBOSE_LEVEL >= 1
|
|
std::cout << "eive-watchdog: Pipe at " << watchdog::FIFO_NAME <<
|
|
" created successfully" << std::endl;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
WatchdogTask::~WatchdogTask() {
|
|
|
|
}
|
|
|
|
int WatchdogTask::performOperation() {
|
|
// Open FIFO read only and non-blocking
|
|
fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK);
|
|
if(fd < 0) {
|
|
std::cerr << "eive-watchdog: Opening pipe " << watchdog::FIFO_NAME <<
|
|
"read-only failed with " << errno << ": " << strerror(errno) << std::endl;
|
|
return -1;
|
|
}
|
|
state = States::RUNNING;
|
|
|
|
while(true) {
|
|
WatchdogTask::LoopResult loopResult = watchdogLoop();
|
|
switch(loopResult) {
|
|
case(LoopResult::OK): {
|
|
performRunningOperation();
|
|
break;
|
|
}
|
|
case(LoopResult::CANCEL_RQ): {
|
|
std::cout << "eive-watchdog: Received cancel request, closing watchdog.." << std::endl;
|
|
return 0;
|
|
}
|
|
case(LoopResult::SUSPEND_RQ): {
|
|
performSuspendOperation();
|
|
break;
|
|
}
|
|
case(LoopResult::TIMEOUT): {
|
|
performNotRunningOperation(loopResult);
|
|
break;
|
|
}
|
|
case(LoopResult::HUNG_UP): {
|
|
performNotRunningOperation(loopResult);
|
|
break;
|
|
}
|
|
case(LoopResult::RESTART_RQ): {
|
|
if(state == States::SUSPENDED or state == States::FAULTY) {
|
|
performRunningOperation();
|
|
}
|
|
break;
|
|
}
|
|
case(LoopResult::FAULT): {
|
|
using namespace std::chrono_literals;
|
|
// Configuration error
|
|
std::cerr << "Fault has occured in watchdog loop" << std::endl;
|
|
// Prevent spam
|
|
std::this_thread::sleep_for(2000ms);
|
|
|
|
}
|
|
}
|
|
}
|
|
if (close(fd) < 0) {
|
|
std::cerr << "eive-watchdog: Closing named pipe at " << watchdog::FIFO_NAME <<
|
|
"failed, error " << errno << ": " << strerror(errno) << std::endl;
|
|
}
|
|
std::cout << "eive-watchdog: Finished" << std::endl;
|
|
return 0;
|
|
}
|
|
|
|
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
|
using namespace std::chrono_literals;
|
|
struct pollfd waiter = {};
|
|
waiter.fd = fd;
|
|
waiter.events = POLLIN;
|
|
|
|
switch(state) {
|
|
case(States::SUSPENDED): {
|
|
// Sleep, then check whether a restart request was received
|
|
std::this_thread::sleep_for(1000ms);
|
|
break;
|
|
}
|
|
case(States::RUNNING): {
|
|
// Continue as usual
|
|
break;
|
|
}
|
|
case(States::NOT_STARTED): {
|
|
// This should not happen
|
|
std::cerr << "eive-watchdog: State is NOT_STARTED, configuration error" << std::endl;
|
|
break;
|
|
}
|
|
case(States::FAULTY): {
|
|
// TODO: Not sure what to do yet. Continue for now
|
|
break;
|
|
}
|
|
}
|
|
|
|
// 10 seconds timeout, only poll one file descriptor
|
|
switch(poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
|
|
case(0): {
|
|
return LoopResult::TIMEOUT;
|
|
}
|
|
case(1): {
|
|
return pollEvent(waiter);
|
|
}
|
|
default: {
|
|
std::cerr << "eive-watchdog: Unknown poll error at " << watchdog::FIFO_NAME << ", error " <<
|
|
errno << ": " << strerror(errno) << std::endl;
|
|
break;
|
|
}
|
|
}
|
|
return LoopResult::OK;
|
|
}
|
|
|
|
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
|
|
if (waiter.revents & POLLIN) {
|
|
ssize_t readLen = read(fd, buf.data(), buf.size());
|
|
if (readLen < 0) {
|
|
std::cerr << "eive-watchdog: Read error on pipe " << watchdog::FIFO_NAME <<
|
|
", error " << errno << ": " << strerror(errno) << std::endl;
|
|
return LoopResult::OK;
|
|
}
|
|
#if WATCHDOG_VERBOSE_LEVEL == 2
|
|
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME
|
|
<< std::endl;
|
|
#endif
|
|
else if(readLen >= 1) {
|
|
return parseCommandByte(readLen);
|
|
}
|
|
|
|
}
|
|
else if(waiter.revents & POLLERR) {
|
|
std::cerr << "eive-watchdog: Poll error error on pipe " << watchdog::FIFO_NAME <<
|
|
std::endl;
|
|
return LoopResult::FAULT;
|
|
}
|
|
else if (waiter.revents & POLLHUP) {
|
|
// Writer closed its end
|
|
return LoopResult::HUNG_UP;
|
|
}
|
|
return LoopResult::FAULT;
|
|
}
|
|
|
|
WatchdogTask::LoopResult WatchdogTask::parseCommandByte(ssize_t readLen) {
|
|
for(ssize_t idx = 0; idx < readLen; idx++) {
|
|
char readChar = buf[idx];
|
|
// Cancel request
|
|
if(readChar == watchdog::CANCEL_CHAR) {
|
|
return LoopResult::CANCEL_RQ;
|
|
}
|
|
// Begin request. Does not work if the operation was not suspended before
|
|
else if(readChar == watchdog::RESTART_CHAR) {
|
|
return LoopResult::RESTART_RQ;
|
|
}
|
|
// Suspend request
|
|
else if(readChar == watchdog::SUSPEND_CHAR) {
|
|
return LoopResult::SUSPEND_RQ;
|
|
}
|
|
// Everything else: All working as expected
|
|
}
|
|
return LoopResult::OK;
|
|
}
|
|
|
|
int WatchdogTask::performRunningOperation() {
|
|
if(state != States::RUNNING) {
|
|
state = States::RUNNING;
|
|
}
|
|
|
|
if(not obswRunning) {
|
|
if(printNotRunningLatch) {
|
|
// Reset latch so user can see timeouts
|
|
printNotRunningLatch = false;
|
|
}
|
|
|
|
obswRunning = true;
|
|
std::cout << "eive-watchdog: Running OBSW detected.." << std::endl;
|
|
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
|
std::cout << "eive-watchdog: Creating " << watchdog::RUNNING_FILE_NAME << std::endl;
|
|
if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
|
std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME);
|
|
if(not obswRunningFile.good()) {
|
|
std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed"
|
|
<< std::endl;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
|
// Latch prevents spam on console
|
|
if(not printNotRunningLatch) {
|
|
if(type == LoopResult::HUNG_UP) {
|
|
std::cout << "eive-watchdog: FIFO writer hung up!" << std::endl;
|
|
}
|
|
else {
|
|
std::cout << "eive-watchdog: The FIFO timed out!" << std::endl;
|
|
}
|
|
printNotRunningLatch = true;
|
|
}
|
|
|
|
if(obswRunning) {
|
|
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
|
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
|
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
|
if(result != 0) {
|
|
std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " <<
|
|
errno << ": " << strerror(errno) << std::endl;
|
|
}
|
|
}
|
|
#endif
|
|
obswRunning = false;
|
|
}
|
|
if(type == LoopResult::HUNG_UP) {
|
|
using namespace std::chrono_literals;
|
|
// Prevent spam
|
|
std::this_thread::sleep_for(2000ms);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int WatchdogTask::performSuspendOperation() {
|
|
if(state == States::RUNNING or state == States::FAULTY) {
|
|
std::cout << "eive-watchdog: Suspending watchdog operations" << std::endl;
|
|
watchdogRunning = false;
|
|
state = States::SUSPENDED;
|
|
}
|
|
return 0;
|
|
}
|