Watchdog Bugfixes #432

Merged
muellerr merged 3 commits from bugfix_watchdog_init into develop 2023-03-06 11:34:39 +01:00
9 changed files with 91 additions and 48 deletions

View File

@ -295,8 +295,10 @@ include(BuildType)
set_build_type()
set(FSFW_DEBUG_INFO 0)
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 0)
if(RELEASE_BUILD MATCHES 0)
set(FSFW_DEBUG_INFO 1)
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1)
endif()
# Configuration files

View File

@ -17,7 +17,7 @@
/*******************************************************************/
// Probably better if this is disabled for mission code. Convenient for development
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG @Q7S_CHECK_FOR_ALREADY_RUNNING_IMG@
#define Q7S_SIMPLE_ADD_FILE_SYSTEM_TEST 0

View File

@ -68,7 +68,7 @@ ReturnValue_t WatchdogHandler::initialize(bool enableWatchdogFunction) {
ReturnValue_t WatchdogHandler::performStartHandling() {
char startBuf[2];
size_t writeLen = 1;
ssize_t writeLen = 1;
startBuf[0] = watchdog::first::START_CHAR;
if (enableWatchFunction) {
writeLen += 1;
@ -76,9 +76,11 @@ ReturnValue_t WatchdogHandler::performStartHandling() {
}
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
if (writtenBytes < 0) {
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
<< std::endl;
sif::error << "WatchdogHandler: Errors writing to watchdog FIFO, code " << errno << ": "
<< strerror(errno) << std::endl;
return returnvalue::FAILED;
} else if (writtenBytes != writeLen) {
sif::warning << "WatchdogHandler: Not all bytes were written, possible error" << std::endl;
}
return returnvalue::OK;
}

View File

@ -12,10 +12,10 @@
* @brief This is the main program for the target hardware.
* @return
*/
int main(void) {
int main(int argc, char* argv[]) {
using namespace std;
#if Q7S_SIMPLE_MODE == 0
return obsw::obsw();
return obsw::obsw(argc, argv);
#else
return simple::simple();
#endif

View File

@ -19,7 +19,7 @@
#include "q7sConfig.h"
#include "watchdog/definitions.h"
static int OBSW_ALREADY_RUNNING = -2;
static constexpr int OBSW_ALREADY_RUNNING = -2;
#if OBSW_Q7S_EM == 0
static const char* DEV_STRING = "Xiphos Q7S FM";
#else
@ -28,7 +28,7 @@ static const char* DEV_STRING = "Xiphos Q7S EM";
WatchdogHandler WATCHDOG_HANDLER;
int obsw::obsw() {
int obsw::obsw(int argc, char* argv[]) {
using namespace fsfw;
std::cout << "-- EIVE OBSW --" << std::endl;
std::cout << "-- Compiled for Linux (" << DEV_STRING << ") --" << std::endl;
@ -52,7 +52,8 @@ int obsw::obsw() {
bootDelayHandling();
bool initWatchFunction = false;
if (std::filesystem::current_path() == "/usr/bin") {
std::string fullExecPath = argv[0];
if (fullExecPath.find("/usr/bin") != std::string::npos) {
initWatchFunction = true;
}
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
@ -71,7 +72,7 @@ int obsw::obsw() {
for (;;) {
WATCHDOG_HANDLER.periodicOperation();
TaskFactory::delayTask(1000);
TaskFactory::delayTask(2000);
}
return 0;
}

View File

@ -3,7 +3,7 @@
namespace obsw {
int obsw();
int obsw(int argc, char* argv[]);
void bootDelayHandling();
void commandEiveSystemToSafe();

View File

@ -44,11 +44,30 @@ int WatchdogTask::performOperation() {
<< strerror(errno) << std::endl;
return -1;
}
// Clear FIFO by reading until it is empty.
while (true) {
ssize_t readBytes = read(fd, buf.data(), buf.size());
if (readBytes < 0) {
std::cerr << "Read error of FIFO: " << strerror(errno) << std::endl;
} else if (readBytes == 0) {
break;
}
}
state = States::NOT_STARTED;
bool breakOuter = false;
while (true) {
WatchdogTask::LoopResult loopResult = watchdogLoop();
if (not stateMachine(loopResult)) {
watchdogLoop();
while (not resultQueue.empty()) {
auto nextRequest = resultQueue.front();
if (not stateMachine(nextRequest)) {
breakOuter = true;
resultQueue.pop();
break;
}
resultQueue.pop();
}
if (breakOuter) {
break;
}
}
@ -60,7 +79,7 @@ int WatchdogTask::performOperation() {
return 0;
}
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
void WatchdogTask::watchdogLoop() {
using namespace std::chrono_literals;
struct pollfd waiter = {};
waiter.fd = fd;
@ -69,10 +88,12 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
// Only poll one file descriptor with timeout
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
case (0): {
return LoopResult::TIMEOUT;
resultQueue.push(LoopResult::TIMEOUT);
return;
}
case (1): {
return pollEvent(waiter);
pollEvent(waiter);
return;
}
default: {
std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": "
@ -80,50 +101,52 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
break;
}
}
return LoopResult::OK;
}
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
void WatchdogTask::pollEvent(struct pollfd& waiter) {
if (waiter.revents & POLLIN) {
ssize_t readLen = read(fd, buf.data(), buf.size());
#if WATCHDOG_VERBOSE_LEVEL == 2
std::cout << "Read " << readLen << " byte(s) on the pipe " << watchdog::FIFO_NAME << std::endl;
#endif
if (readLen < 0) {
std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": "
<< strerror(errno) << std::endl;
return LoopResult::OK;
}
#if WATCHDOG_VERBOSE_LEVEL == 2
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl;
#endif
else if (readLen >= 1) {
return parseCommand(readLen);
resultQueue.push(LoopResult::OK);
} else if (readLen >= 1) {
parseCommands(readLen);
}
} else if (waiter.revents & POLLERR) {
std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
return LoopResult::FAULT;
resultQueue.push(LoopResult::FAULT);
} else if (waiter.revents & POLLHUP) {
// Writer closed its end
return LoopResult::HUNG_UP;
resultQueue.push(LoopResult::HUNG_UP);
}
return LoopResult::FAULT;
}
WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) {
char readChar = buf[0];
// Cancel request
if (readChar == watchdog::first::CANCEL_CHAR) {
return LoopResult::CANCEL_REQ;
} else if (readChar == watchdog::first::SUSPEND_CHAR) {
// Suspend request
return LoopResult::SUSPEND_REQ;
} else if (readChar == watchdog::first::START_CHAR) {
if (readLen == 2 and static_cast<char>(buf[1]) == watchdog::second::WATCH_FLAG) {
return LoopResult::START_WITH_WATCH_REQ;
void WatchdogTask::parseCommands(ssize_t readLen) {
for (ssize_t idx = 0; idx < readLen; idx++) {
char nextChar = buf[idx];
// Cancel request
if (nextChar == watchdog::first::CANCEL_CHAR) {
resultQueue.push(LoopResult::CANCEL_REQ);
} else if (nextChar == watchdog::first::SUSPEND_CHAR) {
// Suspend request
resultQueue.push(LoopResult::SUSPEND_REQ);
} else if (nextChar == watchdog::first::START_CHAR) {
if (idx < readLen - 1 and static_cast<char>(buf[idx + 1]) == watchdog::second::WATCH_FLAG) {
resultQueue.push(LoopResult::START_WITH_WATCH_REQ);
idx++;
continue;
}
resultQueue.push(LoopResult::START_REQ);
} else if (nextChar == watchdog::first::IDLE_CHAR) {
resultQueue.push(LoopResult::OK);
}
return LoopResult::START_REQ;
}
// Everything else: All working as expected
return LoopResult::OK;
}
int WatchdogTask::performRunningOperation() {
@ -167,11 +190,12 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
}
if (not notRunningStart.has_value()) {
notRunningStart = std::chrono::system_clock::now();
notRunningStart = std::chrono::steady_clock::now();
}
if (obswRunning) {
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
if (result != 0) {
@ -184,7 +208,7 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
}
if (watchingObsw) {
auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value();
auto timeNotRunning = std::chrono::steady_clock::now() - notRunningStart.value();
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
watchdog::MAX_NOT_RUNNING_MS) {
std::cout << "Restarting OBSW with systemctl" << std::endl;
@ -269,7 +293,7 @@ bool WatchdogTask::stateMachine(LoopResult loopResult) {
sleep = true;
}
if (sleep) {
std::this_thread::sleep_for(1000ms);
std::this_thread::sleep_for(500ms);
}
return true;
}

View File

@ -5,6 +5,7 @@
#include <chrono>
#include <cstdint>
#include <optional>
#include <queue>
#include <string>
class WatchdogTask {
@ -35,15 +36,17 @@ class WatchdogTask {
bool watchingObsw = false;
bool printNotRunningLatch = false;
std::array<uint8_t, 64> buf;
std::optional<std::chrono::time_point<std::chrono::system_clock>> notRunningStart;
std::queue<LoopResult> resultQueue;
std::optional<std::chrono::time_point<std::chrono::steady_clock>> notRunningStart;
States state = States::NOT_STARTED;
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating
// messages to loop results.
LoopResult watchdogLoop();
void watchdogLoop();
bool stateMachine(LoopResult result);
LoopResult pollEvent(struct pollfd& waiter);
LoopResult parseCommand(ssize_t readLen);
void pollEvent(struct pollfd& waiter);
void parseCommands(ssize_t readLen);
int performRunningOperation();
int performNotRunningOperation(LoopResult type);

View File

@ -1,6 +1,10 @@
#include <filesystem>
#include <iostream>
#include <string>
#include "Watchdog.h"
#include "definitions.h"
/**
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
@ -8,6 +12,13 @@
*/
int main() {
std::cout << "Starting OBSW watchdog" << std::endl;
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
if (result != 0) {
std::cerr << "file removal failure" << std::endl;
}
}
try {
WatchdogTask watchdogTask;
int result = watchdogTask.performOperation();