Watchdog Bugfixes #432
@ -295,8 +295,10 @@ include(BuildType)
|
||||
set_build_type()
|
||||
|
||||
set(FSFW_DEBUG_INFO 0)
|
||||
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 0)
|
||||
if(RELEASE_BUILD MATCHES 0)
|
||||
set(FSFW_DEBUG_INFO 1)
|
||||
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1)
|
||||
endif()
|
||||
|
||||
# Configuration files
|
||||
|
@ -17,7 +17,7 @@
|
||||
/*******************************************************************/
|
||||
|
||||
// Probably better if this is disabled for mission code. Convenient for development
|
||||
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1
|
||||
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG @Q7S_CHECK_FOR_ALREADY_RUNNING_IMG@
|
||||
|
||||
#define Q7S_SIMPLE_ADD_FILE_SYSTEM_TEST 0
|
||||
|
||||
|
@ -68,7 +68,7 @@ ReturnValue_t WatchdogHandler::initialize(bool enableWatchdogFunction) {
|
||||
|
||||
ReturnValue_t WatchdogHandler::performStartHandling() {
|
||||
char startBuf[2];
|
||||
size_t writeLen = 1;
|
||||
ssize_t writeLen = 1;
|
||||
startBuf[0] = watchdog::first::START_CHAR;
|
||||
if (enableWatchFunction) {
|
||||
writeLen += 1;
|
||||
@ -76,9 +76,11 @@ ReturnValue_t WatchdogHandler::performStartHandling() {
|
||||
}
|
||||
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
|
||||
if (writtenBytes < 0) {
|
||||
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
|
||||
<< std::endl;
|
||||
sif::error << "WatchdogHandler: Errors writing to watchdog FIFO, code " << errno << ": "
|
||||
<< strerror(errno) << std::endl;
|
||||
return returnvalue::FAILED;
|
||||
} else if (writtenBytes != writeLen) {
|
||||
sif::warning << "WatchdogHandler: Not all bytes were written, possible error" << std::endl;
|
||||
}
|
||||
return returnvalue::OK;
|
||||
}
|
||||
|
@ -12,10 +12,10 @@
|
||||
* @brief This is the main program for the target hardware.
|
||||
* @return
|
||||
*/
|
||||
int main(void) {
|
||||
int main(int argc, char* argv[]) {
|
||||
using namespace std;
|
||||
#if Q7S_SIMPLE_MODE == 0
|
||||
return obsw::obsw();
|
||||
return obsw::obsw(argc, argv);
|
||||
#else
|
||||
return simple::simple();
|
||||
#endif
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "q7sConfig.h"
|
||||
#include "watchdog/definitions.h"
|
||||
|
||||
static int OBSW_ALREADY_RUNNING = -2;
|
||||
static constexpr int OBSW_ALREADY_RUNNING = -2;
|
||||
#if OBSW_Q7S_EM == 0
|
||||
static const char* DEV_STRING = "Xiphos Q7S FM";
|
||||
#else
|
||||
@ -28,7 +28,7 @@ static const char* DEV_STRING = "Xiphos Q7S EM";
|
||||
|
||||
WatchdogHandler WATCHDOG_HANDLER;
|
||||
|
||||
int obsw::obsw() {
|
||||
int obsw::obsw(int argc, char* argv[]) {
|
||||
using namespace fsfw;
|
||||
std::cout << "-- EIVE OBSW --" << std::endl;
|
||||
std::cout << "-- Compiled for Linux (" << DEV_STRING << ") --" << std::endl;
|
||||
@ -52,7 +52,8 @@ int obsw::obsw() {
|
||||
bootDelayHandling();
|
||||
|
||||
bool initWatchFunction = false;
|
||||
if (std::filesystem::current_path() == "/usr/bin") {
|
||||
std::string fullExecPath = argv[0];
|
||||
if (fullExecPath.find("/usr/bin") != std::string::npos) {
|
||||
initWatchFunction = true;
|
||||
}
|
||||
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
|
||||
@ -71,7 +72,7 @@ int obsw::obsw() {
|
||||
|
||||
for (;;) {
|
||||
WATCHDOG_HANDLER.periodicOperation();
|
||||
TaskFactory::delayTask(1000);
|
||||
TaskFactory::delayTask(2000);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
namespace obsw {
|
||||
|
||||
int obsw();
|
||||
int obsw(int argc, char* argv[]);
|
||||
|
||||
void bootDelayHandling();
|
||||
void commandEiveSystemToSafe();
|
||||
|
@ -44,11 +44,30 @@ int WatchdogTask::performOperation() {
|
||||
<< strerror(errno) << std::endl;
|
||||
return -1;
|
||||
}
|
||||
// Clear FIFO by reading until it is empty.
|
||||
while (true) {
|
||||
ssize_t readBytes = read(fd, buf.data(), buf.size());
|
||||
if (readBytes < 0) {
|
||||
std::cerr << "Read error of FIFO: " << strerror(errno) << std::endl;
|
||||
} else if (readBytes == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
state = States::NOT_STARTED;
|
||||
|
||||
bool breakOuter = false;
|
||||
while (true) {
|
||||
WatchdogTask::LoopResult loopResult = watchdogLoop();
|
||||
if (not stateMachine(loopResult)) {
|
||||
watchdogLoop();
|
||||
while (not resultQueue.empty()) {
|
||||
auto nextRequest = resultQueue.front();
|
||||
if (not stateMachine(nextRequest)) {
|
||||
breakOuter = true;
|
||||
resultQueue.pop();
|
||||
break;
|
||||
}
|
||||
resultQueue.pop();
|
||||
}
|
||||
if (breakOuter) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -60,7 +79,7 @@ int WatchdogTask::performOperation() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
||||
void WatchdogTask::watchdogLoop() {
|
||||
using namespace std::chrono_literals;
|
||||
struct pollfd waiter = {};
|
||||
waiter.fd = fd;
|
||||
@ -69,10 +88,12 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
||||
// Only poll one file descriptor with timeout
|
||||
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
|
||||
case (0): {
|
||||
return LoopResult::TIMEOUT;
|
||||
resultQueue.push(LoopResult::TIMEOUT);
|
||||
return;
|
||||
}
|
||||
case (1): {
|
||||
return pollEvent(waiter);
|
||||
pollEvent(waiter);
|
||||
return;
|
||||
}
|
||||
default: {
|
||||
std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": "
|
||||
@ -80,50 +101,52 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return LoopResult::OK;
|
||||
}
|
||||
|
||||
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
|
||||
void WatchdogTask::pollEvent(struct pollfd& waiter) {
|
||||
if (waiter.revents & POLLIN) {
|
||||
ssize_t readLen = read(fd, buf.data(), buf.size());
|
||||
#if WATCHDOG_VERBOSE_LEVEL == 2
|
||||
std::cout << "Read " << readLen << " byte(s) on the pipe " << watchdog::FIFO_NAME << std::endl;
|
||||
#endif
|
||||
if (readLen < 0) {
|
||||
std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": "
|
||||
<< strerror(errno) << std::endl;
|
||||
return LoopResult::OK;
|
||||
}
|
||||
#if WATCHDOG_VERBOSE_LEVEL == 2
|
||||
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl;
|
||||
#endif
|
||||
else if (readLen >= 1) {
|
||||
return parseCommand(readLen);
|
||||
resultQueue.push(LoopResult::OK);
|
||||
} else if (readLen >= 1) {
|
||||
parseCommands(readLen);
|
||||
}
|
||||
|
||||
} else if (waiter.revents & POLLERR) {
|
||||
std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
|
||||
return LoopResult::FAULT;
|
||||
resultQueue.push(LoopResult::FAULT);
|
||||
} else if (waiter.revents & POLLHUP) {
|
||||
// Writer closed its end
|
||||
return LoopResult::HUNG_UP;
|
||||
resultQueue.push(LoopResult::HUNG_UP);
|
||||
}
|
||||
return LoopResult::FAULT;
|
||||
}
|
||||
|
||||
WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) {
|
||||
char readChar = buf[0];
|
||||
// Cancel request
|
||||
if (readChar == watchdog::first::CANCEL_CHAR) {
|
||||
return LoopResult::CANCEL_REQ;
|
||||
} else if (readChar == watchdog::first::SUSPEND_CHAR) {
|
||||
// Suspend request
|
||||
return LoopResult::SUSPEND_REQ;
|
||||
} else if (readChar == watchdog::first::START_CHAR) {
|
||||
if (readLen == 2 and static_cast<char>(buf[1]) == watchdog::second::WATCH_FLAG) {
|
||||
return LoopResult::START_WITH_WATCH_REQ;
|
||||
void WatchdogTask::parseCommands(ssize_t readLen) {
|
||||
for (ssize_t idx = 0; idx < readLen; idx++) {
|
||||
char nextChar = buf[idx];
|
||||
// Cancel request
|
||||
if (nextChar == watchdog::first::CANCEL_CHAR) {
|
||||
resultQueue.push(LoopResult::CANCEL_REQ);
|
||||
} else if (nextChar == watchdog::first::SUSPEND_CHAR) {
|
||||
// Suspend request
|
||||
resultQueue.push(LoopResult::SUSPEND_REQ);
|
||||
} else if (nextChar == watchdog::first::START_CHAR) {
|
||||
if (idx < readLen - 1 and static_cast<char>(buf[idx + 1]) == watchdog::second::WATCH_FLAG) {
|
||||
resultQueue.push(LoopResult::START_WITH_WATCH_REQ);
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
resultQueue.push(LoopResult::START_REQ);
|
||||
} else if (nextChar == watchdog::first::IDLE_CHAR) {
|
||||
resultQueue.push(LoopResult::OK);
|
||||
}
|
||||
return LoopResult::START_REQ;
|
||||
}
|
||||
// Everything else: All working as expected
|
||||
return LoopResult::OK;
|
||||
}
|
||||
|
||||
int WatchdogTask::performRunningOperation() {
|
||||
@ -167,11 +190,12 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
||||
}
|
||||
|
||||
if (not notRunningStart.has_value()) {
|
||||
notRunningStart = std::chrono::system_clock::now();
|
||||
notRunningStart = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
if (obswRunning) {
|
||||
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
||||
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
|
||||
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
||||
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
||||
if (result != 0) {
|
||||
@ -184,7 +208,7 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
||||
}
|
||||
|
||||
if (watchingObsw) {
|
||||
auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value();
|
||||
auto timeNotRunning = std::chrono::steady_clock::now() - notRunningStart.value();
|
||||
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
|
||||
watchdog::MAX_NOT_RUNNING_MS) {
|
||||
std::cout << "Restarting OBSW with systemctl" << std::endl;
|
||||
@ -269,7 +293,7 @@ bool WatchdogTask::stateMachine(LoopResult loopResult) {
|
||||
sleep = true;
|
||||
}
|
||||
if (sleep) {
|
||||
std::this_thread::sleep_for(1000ms);
|
||||
std::this_thread::sleep_for(500ms);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
|
||||
class WatchdogTask {
|
||||
@ -35,15 +36,17 @@ class WatchdogTask {
|
||||
bool watchingObsw = false;
|
||||
bool printNotRunningLatch = false;
|
||||
std::array<uint8_t, 64> buf;
|
||||
std::optional<std::chrono::time_point<std::chrono::system_clock>> notRunningStart;
|
||||
std::queue<LoopResult> resultQueue;
|
||||
|
||||
std::optional<std::chrono::time_point<std::chrono::steady_clock>> notRunningStart;
|
||||
States state = States::NOT_STARTED;
|
||||
|
||||
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating
|
||||
// messages to loop results.
|
||||
LoopResult watchdogLoop();
|
||||
void watchdogLoop();
|
||||
bool stateMachine(LoopResult result);
|
||||
LoopResult pollEvent(struct pollfd& waiter);
|
||||
LoopResult parseCommand(ssize_t readLen);
|
||||
void pollEvent(struct pollfd& waiter);
|
||||
void parseCommands(ssize_t readLen);
|
||||
|
||||
int performRunningOperation();
|
||||
int performNotRunningOperation(LoopResult type);
|
||||
|
@ -1,6 +1,10 @@
|
||||
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "Watchdog.h"
|
||||
#include "definitions.h"
|
||||
|
||||
/**
|
||||
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
|
||||
@ -8,6 +12,13 @@
|
||||
*/
|
||||
int main() {
|
||||
std::cout << "Starting OBSW watchdog" << std::endl;
|
||||
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
||||
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
|
||||
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
||||
if (result != 0) {
|
||||
std::cerr << "file removal failure" << std::endl;
|
||||
}
|
||||
}
|
||||
try {
|
||||
WatchdogTask watchdogTask;
|
||||
int result = watchdogTask.performOperation();
|
||||
|
Loading…
Reference in New Issue
Block a user