#include "Watchdog.h" #include "watchdogConf.h" #include #include #include #include #include #include #include #include #include #include #include WatchdogTask::WatchdogTask (): fd(0) { int result = 0; // Only create the FIFO if it does not exist yet if(not std::filesystem::exists(watchdog::FIFO_NAME)) { // Permission 666 or rw-rw-rw- mode_t mode = DEFFILEMODE; result = mkfifo(watchdog::FIFO_NAME.c_str(), mode); if(result != 0) { std::cerr << "eive-watchdog: Could not created named pipe at " << watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) << std::endl; throw std::runtime_error("eive-watchdog: FIFO creation failed"); } #if WATCHDOG_VERBOSE_LEVEL >= 1 std::cout << "eive-watchdog: Pipe at " << watchdog::FIFO_NAME << " created successfully" << std::endl; #endif } } WatchdogTask::~WatchdogTask() { } int WatchdogTask::performOperation() { // Open FIFO read only and non-blocking fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK); if(fd < 0) { std::cerr << "eive-watchdog: Opening pipe " << watchdog::FIFO_NAME << "read-only failed with " << errno << ": " << strerror(errno) << std::endl; } state = States::RUNNING; while(true) { WatchdogTask::LoopResult loopResult = watchdogLoop(); switch(loopResult) { case(LoopResult::OK): { performRunningOperation(); break; } case(LoopResult::CANCEL_RQ): { std::cout << "eive-watchdog: Received cancel request, closing watchdog.." << std::endl; return 0; } case(LoopResult::SUSPEND_RQ): { performSuspendOperation(); break; } case(LoopResult::TIMEOUT): { performNotRunningOperation(loopResult); break; } case(LoopResult::HUNG_UP): { performNotRunningOperation(loopResult); break; } case(LoopResult::RESTART_RQ): { if(state == States::SUSPENDED or state == States::FAULTY) { performRunningOperation(); } break; } case(LoopResult::FAULT): { using namespace std::chrono_literals; // Configuration error std::cerr << "Fault has occured in watchdog loop" << std::endl; // Prevent spam std::this_thread::sleep_for(2000ms); } } } if (close(fd) < 0) { std::cerr << "eive-watchdog: Closing named pipe at " << watchdog::FIFO_NAME << "failed, error " << errno << ": " << strerror(errno) << std::endl; } std::cout << "eive-watchdog: Finished" << std::endl; return 0; } WatchdogTask::LoopResult WatchdogTask::watchdogLoop() { using namespace std::chrono_literals; struct pollfd waiter = {}; waiter.fd = fd; waiter.events = POLLIN; switch(state) { case(States::SUSPENDED): { // Sleep, then check whether a restart request was received std::this_thread::sleep_for(1000ms); break; } case(States::RUNNING): { // Continue as usual break; } case(States::NOT_STARTED): { // This should not happen std::cerr << "eive-watchdog: State is NOT_STARTED, configuration error" << std::endl; break; } case(States::FAULTY): { // TODO: Not sure what to do yet. Continue for now break; } } // 10 seconds timeout, only poll one file descriptor switch(poll(&waiter, 1, watchdog::TIMEOUT_MS)) { case(0): { return LoopResult::TIMEOUT; } case(1): { return pollEvent(waiter); } default: { std::cerr << "eive-watchdog: Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) << std::endl; break; } } return LoopResult::OK; } WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) { if (waiter.revents & POLLIN) { ssize_t readLen = read(fd, buf.data(), buf.size()); if (readLen < 0) { std::cerr << "eive-watchdog: Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) << std::endl; return LoopResult::OK; } #if WATCHDOG_VERBOSE_LEVEL == 2 std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl; #endif else if(readLen >= 1) { return parseCommandByte(readLen); } } else if(waiter.revents & POLLERR) { std::cerr << "eive-watchdog: Poll error error on pipe " << watchdog::FIFO_NAME << std::endl; return LoopResult::FAULT; } else if (waiter.revents & POLLHUP) { // Writer closed its end return LoopResult::HUNG_UP; } return LoopResult::FAULT; } WatchdogTask::LoopResult WatchdogTask::parseCommandByte(ssize_t readLen) { for(ssize_t idx = 0; idx < readLen; idx++) { char readChar = buf[idx]; // Cancel request if(readChar == watchdog::CANCEL_CHAR) { return LoopResult::CANCEL_RQ; } // Begin request. Does not work if the operation was not suspended before else if(readChar == watchdog::RESTART_CHAR) { return LoopResult::RESTART_RQ; } // Suspend request else if(readChar == watchdog::SUSPEND_CHAR) { return LoopResult::SUSPEND_RQ; } // Everything else: All working as expected } return LoopResult::OK; } int WatchdogTask::performRunningOperation() { if(state != States::RUNNING) { state = States::RUNNING; } if(not obswRunning) { if(printNotRunningLatch) { // Reset latch so user can see timeouts printNotRunningLatch = false; } obswRunning = true; std::cout << "eive-watchdog: Running OBSW detected.." << std::endl; #if WATCHDOG_CREATE_FILE_IF_RUNNING == 1 std::cout << "eive-watchdog: Creating " << watchdog::RUNNING_FILE_NAME << std::endl; if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) { std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME); if(not obswRunningFile.good()) { std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed" << std::endl; } } #endif } return 0; } int WatchdogTask::performNotRunningOperation(LoopResult type) { // Latch prevents spam on console if(not printNotRunningLatch) { if(type == LoopResult::HUNG_UP) { std::cout << "eive-watchdog: FIFO writer hung up!" << std::endl; } else { std::cout << "eive-watchdog: The FIFO timed out!" << std::endl; } printNotRunningLatch = true; } if(obswRunning) { #if WATCHDOG_CREATE_FILE_IF_RUNNING == 1 if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) { int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str()); if(result != 0) { std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " << errno << ": " << strerror(errno) << std::endl; } } #endif obswRunning = false; } if(type == LoopResult::HUNG_UP) { using namespace std::chrono_literals; // Prevent spam std::this_thread::sleep_for(2000ms); } return 0; } int WatchdogTask::performSuspendOperation() { if(state == States::RUNNING or state == States::FAULTY) { std::cout << "eive-watchdog: Suspending watchdog operations" << std::endl; watchdogRunning = false; state = States::SUSPENDED; } return 0; }