eive-obsw/watchdog/Watchdog.cpp

259 lines
7.9 KiB
C++
Raw Normal View History

2021-07-29 11:35:20 +02:00
#include "Watchdog.h"
2022-04-19 17:24:04 +02:00
#include "definitions.h"
2021-07-29 11:35:20 +02:00
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <iostream>
2021-07-29 16:31:04 +02:00
#include <fstream>
2021-07-29 11:35:20 +02:00
#include <thread>
#include <cstring>
#include <filesystem>
WatchdogTask::WatchdogTask (): fd(0) {
int result = 0;
// Only create the FIFO if it does not exist yet
if(not std::filesystem::exists(watchdog::FIFO_NAME)) {
// Permission 666 or rw-rw-rw-
mode_t mode = DEFFILEMODE;
result = mkfifo(watchdog::FIFO_NAME.c_str(), mode);
if(result != 0) {
std::cerr << "eive-watchdog: Could not created named pipe at " <<
watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) <<
std::endl;
2021-07-29 11:59:32 +02:00
throw std::runtime_error("eive-watchdog: FIFO creation failed");
2021-07-29 11:35:20 +02:00
}
#if WATCHDOG_VERBOSE_LEVEL >= 1
std::cout << "eive-watchdog: Pipe at " << watchdog::FIFO_NAME <<
" created successfully" << std::endl;
#endif
}
2021-07-29 11:59:32 +02:00
}
WatchdogTask::~WatchdogTask() {
2021-07-29 11:35:20 +02:00
2021-07-29 11:59:32 +02:00
}
int WatchdogTask::performOperation() {
2021-07-29 11:35:20 +02:00
// Open FIFO read only and non-blocking
fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK);
if(fd < 0) {
std::cerr << "eive-watchdog: Opening pipe " << watchdog::FIFO_NAME <<
"read-only failed with " << errno << ": " << strerror(errno) << std::endl;
2021-07-29 18:33:15 +02:00
return -1;
2021-07-29 11:35:20 +02:00
}
2021-07-29 11:59:32 +02:00
state = States::RUNNING;
2021-07-29 11:35:20 +02:00
while(true) {
WatchdogTask::LoopResult loopResult = watchdogLoop();
switch(loopResult) {
case(LoopResult::OK): {
2021-07-29 17:21:27 +02:00
performRunningOperation();
2021-07-29 11:35:20 +02:00
break;
}
case(LoopResult::CANCEL_RQ): {
2021-07-29 16:31:04 +02:00
std::cout << "eive-watchdog: Received cancel request, closing watchdog.." << std::endl;
return 0;
}
case(LoopResult::SUSPEND_RQ): {
2021-07-29 17:21:27 +02:00
performSuspendOperation();
2021-07-29 11:35:20 +02:00
break;
}
case(LoopResult::TIMEOUT): {
2021-07-29 18:09:54 +02:00
performNotRunningOperation(loopResult);
break;
}
case(LoopResult::HUNG_UP): {
performNotRunningOperation(loopResult);
2021-07-29 11:35:20 +02:00
break;
}
case(LoopResult::RESTART_RQ): {
2021-07-29 16:31:04 +02:00
if(state == States::SUSPENDED or state == States::FAULTY) {
performRunningOperation();
2021-07-29 11:35:20 +02:00
}
break;
}
2021-07-29 16:31:04 +02:00
case(LoopResult::FAULT): {
2021-07-29 17:21:27 +02:00
using namespace std::chrono_literals;
2021-07-29 16:31:04 +02:00
// Configuration error
std::cerr << "Fault has occured in watchdog loop" << std::endl;
2021-07-29 17:21:27 +02:00
// Prevent spam
std::this_thread::sleep_for(2000ms);
2021-07-29 16:31:04 +02:00
}
2021-07-29 11:35:20 +02:00
}
}
if (close(fd) < 0) {
std::cerr << "eive-watchdog: Closing named pipe at " << watchdog::FIFO_NAME <<
"failed, error " << errno << ": " << strerror(errno) << std::endl;
}
std::cout << "eive-watchdog: Finished" << std::endl;
return 0;
}
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
using namespace std::chrono_literals;
struct pollfd waiter = {};
waiter.fd = fd;
waiter.events = POLLIN;
switch(state) {
case(States::SUSPENDED): {
// Sleep, then check whether a restart request was received
std::this_thread::sleep_for(1000ms);
break;
}
case(States::RUNNING): {
// Continue as usual
break;
}
case(States::NOT_STARTED): {
// This should not happen
std::cerr << "eive-watchdog: State is NOT_STARTED, configuration error" << std::endl;
break;
}
case(States::FAULTY): {
// TODO: Not sure what to do yet. Continue for now
break;
}
}
// 10 seconds timeout, only poll one file descriptor
switch(poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
case(0): {
return LoopResult::TIMEOUT;
}
case(1): {
2021-07-29 16:31:04 +02:00
return pollEvent(waiter);
2021-07-29 11:35:20 +02:00
}
default: {
std::cerr << "eive-watchdog: Unknown poll error at " << watchdog::FIFO_NAME << ", error " <<
errno << ": " << strerror(errno) << std::endl;
break;
}
}
return LoopResult::OK;
}
2021-07-29 16:31:04 +02:00
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
if (waiter.revents & POLLIN) {
ssize_t readLen = read(fd, buf.data(), buf.size());
if (readLen < 0) {
std::cerr << "eive-watchdog: Read error on pipe " << watchdog::FIFO_NAME <<
", error " << errno << ": " << strerror(errno) << std::endl;
return LoopResult::OK;
}
#if WATCHDOG_VERBOSE_LEVEL == 2
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME
<< std::endl;
#endif
2021-07-29 17:21:27 +02:00
else if(readLen >= 1) {
return parseCommandByte(readLen);
}
2021-07-29 16:31:04 +02:00
}
else if(waiter.revents & POLLERR) {
std::cerr << "eive-watchdog: Poll error error on pipe " << watchdog::FIFO_NAME <<
std::endl;
return LoopResult::FAULT;
}
else if (waiter.revents & POLLHUP) {
// Writer closed its end
2021-07-29 18:09:54 +02:00
return LoopResult::HUNG_UP;
2021-07-29 16:31:04 +02:00
}
return LoopResult::FAULT;
}
WatchdogTask::LoopResult WatchdogTask::parseCommandByte(ssize_t readLen) {
for(ssize_t idx = 0; idx < readLen; idx++) {
char readChar = buf[idx];
// Cancel request
if(readChar == watchdog::CANCEL_CHAR) {
return LoopResult::CANCEL_RQ;
}
// Begin request. Does not work if the operation was not suspended before
else if(readChar == watchdog::RESTART_CHAR) {
return LoopResult::RESTART_RQ;
}
// Suspend request
else if(readChar == watchdog::SUSPEND_CHAR) {
return LoopResult::SUSPEND_RQ;
}
// Everything else: All working as expected
}
return LoopResult::OK;
}
int WatchdogTask::performRunningOperation() {
2021-07-29 17:21:27 +02:00
if(state != States::RUNNING) {
state = States::RUNNING;
}
2021-07-29 16:31:04 +02:00
if(not obswRunning) {
2021-07-29 18:09:54 +02:00
if(printNotRunningLatch) {
2021-07-29 17:21:27 +02:00
// Reset latch so user can see timeouts
2021-07-29 18:09:54 +02:00
printNotRunningLatch = false;
2021-07-29 17:21:27 +02:00
}
2021-07-29 16:31:04 +02:00
obswRunning = true;
2021-07-29 17:21:27 +02:00
std::cout << "eive-watchdog: Running OBSW detected.." << std::endl;
2021-07-29 16:31:04 +02:00
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
2021-07-29 17:21:27 +02:00
std::cout << "eive-watchdog: Creating " << watchdog::RUNNING_FILE_NAME << std::endl;
2021-07-29 16:31:04 +02:00
if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME);
if(not obswRunningFile.good()) {
std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed"
<< std::endl;
}
}
#endif
}
return 0;
}
2021-07-29 18:09:54 +02:00
int WatchdogTask::performNotRunningOperation(LoopResult type) {
2021-07-29 17:21:27 +02:00
// Latch prevents spam on console
2021-07-29 18:09:54 +02:00
if(not printNotRunningLatch) {
if(type == LoopResult::HUNG_UP) {
std::cout << "eive-watchdog: FIFO writer hung up!" << std::endl;
}
else {
std::cout << "eive-watchdog: The FIFO timed out!" << std::endl;
}
printNotRunningLatch = true;
2021-07-29 17:21:27 +02:00
}
2021-07-29 18:09:54 +02:00
if(obswRunning) {
2021-07-29 16:31:04 +02:00
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
2021-07-29 18:09:54 +02:00
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
if(result != 0) {
std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " <<
errno << ": " << strerror(errno) << std::endl;
}
2021-07-29 16:31:04 +02:00
}
#endif
obswRunning = false;
}
2021-07-29 18:09:54 +02:00
if(type == LoopResult::HUNG_UP) {
using namespace std::chrono_literals;
// Prevent spam
std::this_thread::sleep_for(2000ms);
}
2021-07-29 16:31:04 +02:00
return 0;
}
2021-07-29 17:21:27 +02:00
int WatchdogTask::performSuspendOperation() {
if(state == States::RUNNING or state == States::FAULTY) {
std::cout << "eive-watchdog: Suspending watchdog operations" << std::endl;
watchdogRunning = false;
state = States::SUSPENDED;
}
return 0;
}