Watchdog Bugfixes #432

Merged
muellerr merged 3 commits from bugfix_watchdog_init into develop 2023-03-06 11:34:39 +01:00
9 changed files with 91 additions and 48 deletions

View File

@ -295,8 +295,10 @@ include(BuildType)
set_build_type() set_build_type()
set(FSFW_DEBUG_INFO 0) set(FSFW_DEBUG_INFO 0)
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 0)
if(RELEASE_BUILD MATCHES 0) if(RELEASE_BUILD MATCHES 0)
set(FSFW_DEBUG_INFO 1) set(FSFW_DEBUG_INFO 1)
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1)
endif() endif()
# Configuration files # Configuration files

View File

@ -17,7 +17,7 @@
/*******************************************************************/ /*******************************************************************/
// Probably better if this is disabled for mission code. Convenient for development // Probably better if this is disabled for mission code. Convenient for development
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1 #define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG @Q7S_CHECK_FOR_ALREADY_RUNNING_IMG@
#define Q7S_SIMPLE_ADD_FILE_SYSTEM_TEST 0 #define Q7S_SIMPLE_ADD_FILE_SYSTEM_TEST 0

View File

@ -68,7 +68,7 @@ ReturnValue_t WatchdogHandler::initialize(bool enableWatchdogFunction) {
ReturnValue_t WatchdogHandler::performStartHandling() { ReturnValue_t WatchdogHandler::performStartHandling() {
char startBuf[2]; char startBuf[2];
size_t writeLen = 1; ssize_t writeLen = 1;
startBuf[0] = watchdog::first::START_CHAR; startBuf[0] = watchdog::first::START_CHAR;
if (enableWatchFunction) { if (enableWatchFunction) {
writeLen += 1; writeLen += 1;
@ -76,9 +76,11 @@ ReturnValue_t WatchdogHandler::performStartHandling() {
} }
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen); ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
if (writtenBytes < 0) { if (writtenBytes < 0) {
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno) sif::error << "WatchdogHandler: Errors writing to watchdog FIFO, code " << errno << ": "
<< std::endl; << strerror(errno) << std::endl;
return returnvalue::FAILED; return returnvalue::FAILED;
} else if (writtenBytes != writeLen) {
sif::warning << "WatchdogHandler: Not all bytes were written, possible error" << std::endl;
} }
return returnvalue::OK; return returnvalue::OK;
} }

View File

@ -12,10 +12,10 @@
* @brief This is the main program for the target hardware. * @brief This is the main program for the target hardware.
* @return * @return
*/ */
int main(void) { int main(int argc, char* argv[]) {
using namespace std; using namespace std;
#if Q7S_SIMPLE_MODE == 0 #if Q7S_SIMPLE_MODE == 0
return obsw::obsw(); return obsw::obsw(argc, argv);
#else #else
return simple::simple(); return simple::simple();
#endif #endif

View File

@ -19,7 +19,7 @@
#include "q7sConfig.h" #include "q7sConfig.h"
#include "watchdog/definitions.h" #include "watchdog/definitions.h"
static int OBSW_ALREADY_RUNNING = -2; static constexpr int OBSW_ALREADY_RUNNING = -2;
#if OBSW_Q7S_EM == 0 #if OBSW_Q7S_EM == 0
static const char* DEV_STRING = "Xiphos Q7S FM"; static const char* DEV_STRING = "Xiphos Q7S FM";
#else #else
@ -28,7 +28,7 @@ static const char* DEV_STRING = "Xiphos Q7S EM";
WatchdogHandler WATCHDOG_HANDLER; WatchdogHandler WATCHDOG_HANDLER;
int obsw::obsw() { int obsw::obsw(int argc, char* argv[]) {
using namespace fsfw; using namespace fsfw;
std::cout << "-- EIVE OBSW --" << std::endl; std::cout << "-- EIVE OBSW --" << std::endl;
std::cout << "-- Compiled for Linux (" << DEV_STRING << ") --" << std::endl; std::cout << "-- Compiled for Linux (" << DEV_STRING << ") --" << std::endl;
@ -52,7 +52,8 @@ int obsw::obsw() {
bootDelayHandling(); bootDelayHandling();
bool initWatchFunction = false; bool initWatchFunction = false;
if (std::filesystem::current_path() == "/usr/bin") { std::string fullExecPath = argv[0];
if (fullExecPath.find("/usr/bin") != std::string::npos) {
initWatchFunction = true; initWatchFunction = true;
} }
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction); ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
@ -71,7 +72,7 @@ int obsw::obsw() {
for (;;) { for (;;) {
WATCHDOG_HANDLER.periodicOperation(); WATCHDOG_HANDLER.periodicOperation();
TaskFactory::delayTask(1000); TaskFactory::delayTask(2000);
} }
return 0; return 0;
} }

View File

@ -3,7 +3,7 @@
namespace obsw { namespace obsw {
int obsw(); int obsw(int argc, char* argv[]);
void bootDelayHandling(); void bootDelayHandling();
void commandEiveSystemToSafe(); void commandEiveSystemToSafe();

View File

@ -44,11 +44,30 @@ int WatchdogTask::performOperation() {
<< strerror(errno) << std::endl; << strerror(errno) << std::endl;
return -1; return -1;
} }
// Clear FIFO by reading until it is empty.
while (true) {
ssize_t readBytes = read(fd, buf.data(), buf.size());
if (readBytes < 0) {
std::cerr << "Read error of FIFO: " << strerror(errno) << std::endl;
} else if (readBytes == 0) {
break;
}
}
state = States::NOT_STARTED; state = States::NOT_STARTED;
bool breakOuter = false;
while (true) { while (true) {
WatchdogTask::LoopResult loopResult = watchdogLoop(); watchdogLoop();
if (not stateMachine(loopResult)) { while (not resultQueue.empty()) {
auto nextRequest = resultQueue.front();
if (not stateMachine(nextRequest)) {
breakOuter = true;
resultQueue.pop();
break;
}
resultQueue.pop();
}
if (breakOuter) {
break; break;
} }
} }
@ -60,7 +79,7 @@ int WatchdogTask::performOperation() {
return 0; return 0;
} }
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() { void WatchdogTask::watchdogLoop() {
using namespace std::chrono_literals; using namespace std::chrono_literals;
struct pollfd waiter = {}; struct pollfd waiter = {};
waiter.fd = fd; waiter.fd = fd;
@ -69,10 +88,12 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
// Only poll one file descriptor with timeout // Only poll one file descriptor with timeout
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) { switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
case (0): { case (0): {
return LoopResult::TIMEOUT; resultQueue.push(LoopResult::TIMEOUT);
return;
} }
case (1): { case (1): {
return pollEvent(waiter); pollEvent(waiter);
return;
} }
default: { default: {
std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": " std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": "
@ -80,50 +101,52 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
break; break;
} }
} }
return LoopResult::OK;
} }
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) { void WatchdogTask::pollEvent(struct pollfd& waiter) {
if (waiter.revents & POLLIN) { if (waiter.revents & POLLIN) {
ssize_t readLen = read(fd, buf.data(), buf.size()); ssize_t readLen = read(fd, buf.data(), buf.size());
#if WATCHDOG_VERBOSE_LEVEL == 2
std::cout << "Read " << readLen << " byte(s) on the pipe " << watchdog::FIFO_NAME << std::endl;
#endif
if (readLen < 0) { if (readLen < 0) {
std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": " std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": "
<< strerror(errno) << std::endl; << strerror(errno) << std::endl;
return LoopResult::OK; resultQueue.push(LoopResult::OK);
} } else if (readLen >= 1) {
#if WATCHDOG_VERBOSE_LEVEL == 2 parseCommands(readLen);
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl;
#endif
else if (readLen >= 1) {
return parseCommand(readLen);
} }
} else if (waiter.revents & POLLERR) { } else if (waiter.revents & POLLERR) {
std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl; std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
return LoopResult::FAULT; resultQueue.push(LoopResult::FAULT);
} else if (waiter.revents & POLLHUP) { } else if (waiter.revents & POLLHUP) {
// Writer closed its end // Writer closed its end
return LoopResult::HUNG_UP; resultQueue.push(LoopResult::HUNG_UP);
} }
return LoopResult::FAULT;
} }
WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) { void WatchdogTask::parseCommands(ssize_t readLen) {
char readChar = buf[0]; for (ssize_t idx = 0; idx < readLen; idx++) {
// Cancel request char nextChar = buf[idx];
if (readChar == watchdog::first::CANCEL_CHAR) { // Cancel request
return LoopResult::CANCEL_REQ; if (nextChar == watchdog::first::CANCEL_CHAR) {
} else if (readChar == watchdog::first::SUSPEND_CHAR) { resultQueue.push(LoopResult::CANCEL_REQ);
// Suspend request } else if (nextChar == watchdog::first::SUSPEND_CHAR) {
return LoopResult::SUSPEND_REQ; // Suspend request
} else if (readChar == watchdog::first::START_CHAR) { resultQueue.push(LoopResult::SUSPEND_REQ);
if (readLen == 2 and static_cast<char>(buf[1]) == watchdog::second::WATCH_FLAG) { } else if (nextChar == watchdog::first::START_CHAR) {
return LoopResult::START_WITH_WATCH_REQ; if (idx < readLen - 1 and static_cast<char>(buf[idx + 1]) == watchdog::second::WATCH_FLAG) {
resultQueue.push(LoopResult::START_WITH_WATCH_REQ);
idx++;
continue;
}
resultQueue.push(LoopResult::START_REQ);
} else if (nextChar == watchdog::first::IDLE_CHAR) {
resultQueue.push(LoopResult::OK);
} }
return LoopResult::START_REQ;
} }
// Everything else: All working as expected // Everything else: All working as expected
return LoopResult::OK;
} }
int WatchdogTask::performRunningOperation() { int WatchdogTask::performRunningOperation() {
@ -167,11 +190,12 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
} }
if (not notRunningStart.has_value()) { if (not notRunningStart.has_value()) {
notRunningStart = std::chrono::system_clock::now(); notRunningStart = std::chrono::steady_clock::now();
} }
if (obswRunning) { if (obswRunning) {
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1 #if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) { if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str()); int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
if (result != 0) { if (result != 0) {
@ -184,7 +208,7 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
} }
if (watchingObsw) { if (watchingObsw) {
auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value(); auto timeNotRunning = std::chrono::steady_clock::now() - notRunningStart.value();
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() > if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
watchdog::MAX_NOT_RUNNING_MS) { watchdog::MAX_NOT_RUNNING_MS) {
std::cout << "Restarting OBSW with systemctl" << std::endl; std::cout << "Restarting OBSW with systemctl" << std::endl;
@ -269,7 +293,7 @@ bool WatchdogTask::stateMachine(LoopResult loopResult) {
sleep = true; sleep = true;
} }
if (sleep) { if (sleep) {
std::this_thread::sleep_for(1000ms); std::this_thread::sleep_for(500ms);
} }
return true; return true;
} }

View File

@ -5,6 +5,7 @@
#include <chrono> #include <chrono>
#include <cstdint> #include <cstdint>
#include <optional> #include <optional>
#include <queue>
#include <string> #include <string>
class WatchdogTask { class WatchdogTask {
@ -35,15 +36,17 @@ class WatchdogTask {
bool watchingObsw = false; bool watchingObsw = false;
bool printNotRunningLatch = false; bool printNotRunningLatch = false;
std::array<uint8_t, 64> buf; std::array<uint8_t, 64> buf;
std::optional<std::chrono::time_point<std::chrono::system_clock>> notRunningStart; std::queue<LoopResult> resultQueue;
std::optional<std::chrono::time_point<std::chrono::steady_clock>> notRunningStart;
States state = States::NOT_STARTED; States state = States::NOT_STARTED;
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating // Primary loop. Takes care of delaying, and reading from the communication pipe and translating
// messages to loop results. // messages to loop results.
LoopResult watchdogLoop(); void watchdogLoop();
bool stateMachine(LoopResult result); bool stateMachine(LoopResult result);
LoopResult pollEvent(struct pollfd& waiter); void pollEvent(struct pollfd& waiter);
LoopResult parseCommand(ssize_t readLen); void parseCommands(ssize_t readLen);
int performRunningOperation(); int performRunningOperation();
int performNotRunningOperation(LoopResult type); int performNotRunningOperation(LoopResult type);

View File

@ -1,6 +1,10 @@
#include <filesystem>
#include <iostream> #include <iostream>
#include <string>
#include "Watchdog.h" #include "Watchdog.h"
#include "definitions.h"
/** /**
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running. * @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
@ -8,6 +12,13 @@
*/ */
int main() { int main() {
std::cout << "Starting OBSW watchdog" << std::endl; std::cout << "Starting OBSW watchdog" << std::endl;
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
if (result != 0) {
std::cerr << "file removal failure" << std::endl;
}
}
try { try {
WatchdogTask watchdogTask; WatchdogTask watchdogTask;
int result = watchdogTask.performOperation(); int result = watchdogTask.performOperation();