#include "Watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "definitions.h" WatchdogTask::WatchdogTask() : fd(0) { int result = 0; // Only create the FIFO if it does not exist yet if (not std::filesystem::exists(watchdog::FIFO_NAME)) { // Permission 666 or rw-rw-rw- mode_t mode = DEFFILEMODE; result = mkfifo(watchdog::FIFO_NAME.c_str(), mode); if (result != 0) { std::cerr << "Could not created named pipe at " << watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) << std::endl; throw std::runtime_error("eive-watchdog: FIFO creation failed"); } #if WATCHDOG_VERBOSE_LEVEL >= 1 std::cout << "Pipe at " << watchdog::FIFO_NAME << " created successfully" << std::endl; #endif } } WatchdogTask::~WatchdogTask() {} int WatchdogTask::performOperation() { // Open FIFO read only and non-blocking fd = open(watchdog::FIFO_NAME.c_str(), O_RDONLY | O_NONBLOCK); if (fd < 0) { std::cerr << "Opening pipe " << watchdog::FIFO_NAME << "read-only failed with " << errno << ": " << strerror(errno) << std::endl; return -1; } state = States::NOT_STARTED; while (true) { WatchdogTask::LoopResult loopResult = watchdogLoop(); if (not stateMachine(loopResult)) { break; } } if (close(fd) < 0) { std::cerr << "Closing named pipe at " << watchdog::FIFO_NAME << "failed, error " << errno << ": " << strerror(errno) << std::endl; } std::cout << "Closing" << std::endl; return 0; } WatchdogTask::LoopResult WatchdogTask::watchdogLoop() { using namespace std::chrono_literals; struct pollfd waiter = {}; waiter.fd = fd; waiter.events = POLLIN; // Only poll one file descriptor with timeout switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) { case (0): { return LoopResult::TIMEOUT; } case (1): { return pollEvent(waiter); } default: { std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) << std::endl; break; } } return LoopResult::OK; } WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) { if (waiter.revents & POLLIN) { ssize_t readLen = read(fd, buf.data(), buf.size()); if (readLen < 0) { std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": " << strerror(errno) << std::endl; return LoopResult::OK; } #if WATCHDOG_VERBOSE_LEVEL == 2 std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl; #endif else if (readLen >= 1) { return parseCommand(readLen); } } else if (waiter.revents & POLLERR) { std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl; return LoopResult::FAULT; } else if (waiter.revents & POLLHUP) { // Writer closed its end return LoopResult::HUNG_UP; } return LoopResult::FAULT; } WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) { char readChar = buf[0]; // Cancel request if (readChar == watchdog::first::CANCEL_CHAR) { return LoopResult::CANCEL_REQ; } else if (readChar == watchdog::first::SUSPEND_CHAR) { // Suspend request return LoopResult::SUSPEND_REQ; } else if (readChar == watchdog::first::START_CHAR) { if (readLen == 2 and static_cast(buf[1]) == watchdog::second::WATCH_FLAG) { return LoopResult::START_WITH_WATCH_REQ; } return LoopResult::START_REQ; } // Everything else: All working as expected return LoopResult::OK; } int WatchdogTask::performRunningOperation() { if (state != States::RUNNING) { state = States::RUNNING; } if (notRunningStart.has_value()) { notRunningStart = std::nullopt; } if (not obswRunning) { if (printNotRunningLatch) { // Reset latch so user can see timeouts printNotRunningLatch = false; } obswRunning = true; std::cout << "OBSW is running" << std::endl; #if WATCHDOG_CREATE_FILE_IF_RUNNING == 1 std::cout << "Creating " << watchdog::RUNNING_FILE_NAME << std::endl; if (not std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) { std::ofstream obswRunningFile(watchdog::RUNNING_FILE_NAME); if (not obswRunningFile.good()) { std::cerr << "Creating file " << watchdog::RUNNING_FILE_NAME << " failed" << std::endl; } } #endif } return 0; } int WatchdogTask::performNotRunningOperation(LoopResult type) { // Latch prevents spam on console if (not printNotRunningLatch) { if (type == LoopResult::HUNG_UP) { std::cout << "OBSW hung up" << std::endl; } else { std::cout << "The FIFO timed out, OBSW might not be running" << std::endl; } printNotRunningLatch = true; } if (not notRunningStart.has_value()) { notRunningStart = std::chrono::system_clock::now(); } if (obswRunning) { #if WATCHDOG_CREATE_FILE_IF_RUNNING == 1 if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) { int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str()); if (result != 0) { std::cerr << "Removing " << watchdog::RUNNING_FILE_NAME << " failed with code " << errno << ": " << strerror(errno) << std::endl; } } #endif obswRunning = false; } if (watchingObsw) { auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value(); if (std::chrono::duration_cast(timeNotRunning).count() > watchdog::MAX_NOT_RUNNING_MS) { std::cout << "Restarting OBSW with systemctl" << std::endl; std::system("systemctl restart obsw"); } } if (type == LoopResult::HUNG_UP) { using namespace std::chrono_literals; // Prevent spam std::this_thread::sleep_for(2000ms); } return 0; } bool WatchdogTask::stateMachine(LoopResult loopResult) { using namespace std::chrono_literals; bool sleep = false; switch (state) { case (States::RUNNING): { switch (loopResult) { case (LoopResult::TIMEOUT): case (LoopResult::HUNG_UP): { performNotRunningOperation(loopResult); break; } case (LoopResult::OK): { performRunningOperation(); break; } case (LoopResult::SUSPEND_REQ): { if (state == States::RUNNING or state == States::FAULTY) { std::cout << "Received suspend request, suspending watchdog operations" << std::endl; state = States::SUSPENDED; } performSuspendOperation(); sleep = true; break; } case (LoopResult::CANCEL_REQ): { std::cout << "Received cancel request, closing watchdog.." << std::endl; return false; } } } case (States::FAULTY): case (States::SUSPENDED): case (States::NOT_STARTED): { switch (loopResult) { case (LoopResult::SUSPEND_REQ): { // Ignore and also delay sleep = true; break; } case (LoopResult::START_REQ): case (LoopResult::START_WITH_WATCH_REQ): { if (state == States::NOT_STARTED or state == States::FAULTY) { state = States::RUNNING; } if (loopResult == LoopResult::START_REQ) { std::cout << "Start request without watch request received" << std::endl; watchingObsw = false; } else if (loopResult == LoopResult::START_WITH_WATCH_REQ) { std::cout << "Start request with watch request received. Restarting OBSW if not " "running for " << watchdog::MAX_NOT_RUNNING_MS / 1000 << " seconds" << std::endl; watchingObsw = true; } performRunningOperation(); break; } default: { sleep = true; } } break; } } if (loopResult == LoopResult::FAULT) { // Configuration error std::cerr << "Fault has occured in watchdog loop" << std::endl; // Prevent spam sleep = true; } if (sleep) { std::this_thread::sleep_for(1000ms); } return true; } int WatchdogTask::performSuspendOperation() { return 0; }