Watchdog Bugfixes #432
@ -295,8 +295,10 @@ include(BuildType)
|
|||||||
set_build_type()
|
set_build_type()
|
||||||
|
|
||||||
set(FSFW_DEBUG_INFO 0)
|
set(FSFW_DEBUG_INFO 0)
|
||||||
|
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 0)
|
||||||
if(RELEASE_BUILD MATCHES 0)
|
if(RELEASE_BUILD MATCHES 0)
|
||||||
set(FSFW_DEBUG_INFO 1)
|
set(FSFW_DEBUG_INFO 1)
|
||||||
|
set(Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Configuration files
|
# Configuration files
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
/*******************************************************************/
|
/*******************************************************************/
|
||||||
|
|
||||||
// Probably better if this is disabled for mission code. Convenient for development
|
// Probably better if this is disabled for mission code. Convenient for development
|
||||||
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG 1
|
#define Q7S_CHECK_FOR_ALREADY_RUNNING_IMG @Q7S_CHECK_FOR_ALREADY_RUNNING_IMG@
|
||||||
|
|
||||||
#define Q7S_SIMPLE_ADD_FILE_SYSTEM_TEST 0
|
#define Q7S_SIMPLE_ADD_FILE_SYSTEM_TEST 0
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ ReturnValue_t WatchdogHandler::initialize(bool enableWatchdogFunction) {
|
|||||||
|
|
||||||
ReturnValue_t WatchdogHandler::performStartHandling() {
|
ReturnValue_t WatchdogHandler::performStartHandling() {
|
||||||
char startBuf[2];
|
char startBuf[2];
|
||||||
size_t writeLen = 1;
|
ssize_t writeLen = 1;
|
||||||
startBuf[0] = watchdog::first::START_CHAR;
|
startBuf[0] = watchdog::first::START_CHAR;
|
||||||
if (enableWatchFunction) {
|
if (enableWatchFunction) {
|
||||||
writeLen += 1;
|
writeLen += 1;
|
||||||
@ -76,9 +76,11 @@ ReturnValue_t WatchdogHandler::performStartHandling() {
|
|||||||
}
|
}
|
||||||
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
|
ssize_t writtenBytes = write(watchdogFifoFd, &startBuf, writeLen);
|
||||||
if (writtenBytes < 0) {
|
if (writtenBytes < 0) {
|
||||||
sif::error << "Errors writing to watchdog FIFO, code " << errno << ": " << strerror(errno)
|
sif::error << "WatchdogHandler: Errors writing to watchdog FIFO, code " << errno << ": "
|
||||||
<< std::endl;
|
<< strerror(errno) << std::endl;
|
||||||
return returnvalue::FAILED;
|
return returnvalue::FAILED;
|
||||||
|
} else if (writtenBytes != writeLen) {
|
||||||
|
sif::warning << "WatchdogHandler: Not all bytes were written, possible error" << std::endl;
|
||||||
}
|
}
|
||||||
return returnvalue::OK;
|
return returnvalue::OK;
|
||||||
}
|
}
|
||||||
|
@ -12,10 +12,10 @@
|
|||||||
* @brief This is the main program for the target hardware.
|
* @brief This is the main program for the target hardware.
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
int main(void) {
|
int main(int argc, char* argv[]) {
|
||||||
using namespace std;
|
using namespace std;
|
||||||
#if Q7S_SIMPLE_MODE == 0
|
#if Q7S_SIMPLE_MODE == 0
|
||||||
return obsw::obsw();
|
return obsw::obsw(argc, argv);
|
||||||
#else
|
#else
|
||||||
return simple::simple();
|
return simple::simple();
|
||||||
#endif
|
#endif
|
||||||
|
@ -19,7 +19,7 @@
|
|||||||
#include "q7sConfig.h"
|
#include "q7sConfig.h"
|
||||||
#include "watchdog/definitions.h"
|
#include "watchdog/definitions.h"
|
||||||
|
|
||||||
static int OBSW_ALREADY_RUNNING = -2;
|
static constexpr int OBSW_ALREADY_RUNNING = -2;
|
||||||
#if OBSW_Q7S_EM == 0
|
#if OBSW_Q7S_EM == 0
|
||||||
static const char* DEV_STRING = "Xiphos Q7S FM";
|
static const char* DEV_STRING = "Xiphos Q7S FM";
|
||||||
#else
|
#else
|
||||||
@ -28,7 +28,7 @@ static const char* DEV_STRING = "Xiphos Q7S EM";
|
|||||||
|
|
||||||
WatchdogHandler WATCHDOG_HANDLER;
|
WatchdogHandler WATCHDOG_HANDLER;
|
||||||
|
|
||||||
int obsw::obsw() {
|
int obsw::obsw(int argc, char* argv[]) {
|
||||||
using namespace fsfw;
|
using namespace fsfw;
|
||||||
std::cout << "-- EIVE OBSW --" << std::endl;
|
std::cout << "-- EIVE OBSW --" << std::endl;
|
||||||
std::cout << "-- Compiled for Linux (" << DEV_STRING << ") --" << std::endl;
|
std::cout << "-- Compiled for Linux (" << DEV_STRING << ") --" << std::endl;
|
||||||
@ -52,7 +52,8 @@ int obsw::obsw() {
|
|||||||
bootDelayHandling();
|
bootDelayHandling();
|
||||||
|
|
||||||
bool initWatchFunction = false;
|
bool initWatchFunction = false;
|
||||||
if (std::filesystem::current_path() == "/usr/bin") {
|
std::string fullExecPath = argv[0];
|
||||||
|
if (fullExecPath.find("/usr/bin") != std::string::npos) {
|
||||||
initWatchFunction = true;
|
initWatchFunction = true;
|
||||||
}
|
}
|
||||||
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
|
ReturnValue_t result = WATCHDOG_HANDLER.initialize(initWatchFunction);
|
||||||
@ -71,7 +72,7 @@ int obsw::obsw() {
|
|||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
WATCHDOG_HANDLER.periodicOperation();
|
WATCHDOG_HANDLER.periodicOperation();
|
||||||
TaskFactory::delayTask(1000);
|
TaskFactory::delayTask(2000);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
namespace obsw {
|
namespace obsw {
|
||||||
|
|
||||||
int obsw();
|
int obsw(int argc, char* argv[]);
|
||||||
|
|
||||||
void bootDelayHandling();
|
void bootDelayHandling();
|
||||||
void commandEiveSystemToSafe();
|
void commandEiveSystemToSafe();
|
||||||
|
@ -44,11 +44,30 @@ int WatchdogTask::performOperation() {
|
|||||||
<< strerror(errno) << std::endl;
|
<< strerror(errno) << std::endl;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
// Clear FIFO by reading until it is empty.
|
||||||
|
while (true) {
|
||||||
|
ssize_t readBytes = read(fd, buf.data(), buf.size());
|
||||||
|
if (readBytes < 0) {
|
||||||
|
std::cerr << "Read error of FIFO: " << strerror(errno) << std::endl;
|
||||||
|
} else if (readBytes == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
state = States::NOT_STARTED;
|
state = States::NOT_STARTED;
|
||||||
|
|
||||||
|
bool breakOuter = false;
|
||||||
while (true) {
|
while (true) {
|
||||||
WatchdogTask::LoopResult loopResult = watchdogLoop();
|
watchdogLoop();
|
||||||
if (not stateMachine(loopResult)) {
|
while (not resultQueue.empty()) {
|
||||||
|
auto nextRequest = resultQueue.front();
|
||||||
|
if (not stateMachine(nextRequest)) {
|
||||||
|
breakOuter = true;
|
||||||
|
resultQueue.pop();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
resultQueue.pop();
|
||||||
|
}
|
||||||
|
if (breakOuter) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -60,7 +79,7 @@ int WatchdogTask::performOperation() {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
void WatchdogTask::watchdogLoop() {
|
||||||
using namespace std::chrono_literals;
|
using namespace std::chrono_literals;
|
||||||
struct pollfd waiter = {};
|
struct pollfd waiter = {};
|
||||||
waiter.fd = fd;
|
waiter.fd = fd;
|
||||||
@ -69,10 +88,12 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
|||||||
// Only poll one file descriptor with timeout
|
// Only poll one file descriptor with timeout
|
||||||
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
|
switch (poll(&waiter, 1, watchdog::TIMEOUT_MS)) {
|
||||||
case (0): {
|
case (0): {
|
||||||
return LoopResult::TIMEOUT;
|
resultQueue.push(LoopResult::TIMEOUT);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
case (1): {
|
case (1): {
|
||||||
return pollEvent(waiter);
|
pollEvent(waiter);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": "
|
std::cerr << "Unknown poll error at " << watchdog::FIFO_NAME << ", error " << errno << ": "
|
||||||
@ -80,50 +101,52 @@ WatchdogTask::LoopResult WatchdogTask::watchdogLoop() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return LoopResult::OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::LoopResult WatchdogTask::pollEvent(struct pollfd& waiter) {
|
void WatchdogTask::pollEvent(struct pollfd& waiter) {
|
||||||
if (waiter.revents & POLLIN) {
|
if (waiter.revents & POLLIN) {
|
||||||
ssize_t readLen = read(fd, buf.data(), buf.size());
|
ssize_t readLen = read(fd, buf.data(), buf.size());
|
||||||
|
#if WATCHDOG_VERBOSE_LEVEL == 2
|
||||||
|
std::cout << "Read " << readLen << " byte(s) on the pipe " << watchdog::FIFO_NAME << std::endl;
|
||||||
|
#endif
|
||||||
if (readLen < 0) {
|
if (readLen < 0) {
|
||||||
std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": "
|
std::cerr << "Read error on pipe " << watchdog::FIFO_NAME << ", error " << errno << ": "
|
||||||
<< strerror(errno) << std::endl;
|
<< strerror(errno) << std::endl;
|
||||||
return LoopResult::OK;
|
resultQueue.push(LoopResult::OK);
|
||||||
}
|
} else if (readLen >= 1) {
|
||||||
#if WATCHDOG_VERBOSE_LEVEL == 2
|
parseCommands(readLen);
|
||||||
std::cout << "Read " << readLen << " byte(s) on the pipe " << FIFO_NAME << std::endl;
|
|
||||||
#endif
|
|
||||||
else if (readLen >= 1) {
|
|
||||||
return parseCommand(readLen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (waiter.revents & POLLERR) {
|
} else if (waiter.revents & POLLERR) {
|
||||||
std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
|
std::cerr << "Poll error error on pipe " << watchdog::FIFO_NAME << std::endl;
|
||||||
return LoopResult::FAULT;
|
resultQueue.push(LoopResult::FAULT);
|
||||||
} else if (waiter.revents & POLLHUP) {
|
} else if (waiter.revents & POLLHUP) {
|
||||||
// Writer closed its end
|
// Writer closed its end
|
||||||
return LoopResult::HUNG_UP;
|
resultQueue.push(LoopResult::HUNG_UP);
|
||||||
}
|
}
|
||||||
return LoopResult::FAULT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WatchdogTask::LoopResult WatchdogTask::parseCommand(ssize_t readLen) {
|
void WatchdogTask::parseCommands(ssize_t readLen) {
|
||||||
char readChar = buf[0];
|
for (ssize_t idx = 0; idx < readLen; idx++) {
|
||||||
// Cancel request
|
char nextChar = buf[idx];
|
||||||
if (readChar == watchdog::first::CANCEL_CHAR) {
|
// Cancel request
|
||||||
return LoopResult::CANCEL_REQ;
|
if (nextChar == watchdog::first::CANCEL_CHAR) {
|
||||||
} else if (readChar == watchdog::first::SUSPEND_CHAR) {
|
resultQueue.push(LoopResult::CANCEL_REQ);
|
||||||
// Suspend request
|
} else if (nextChar == watchdog::first::SUSPEND_CHAR) {
|
||||||
return LoopResult::SUSPEND_REQ;
|
// Suspend request
|
||||||
} else if (readChar == watchdog::first::START_CHAR) {
|
resultQueue.push(LoopResult::SUSPEND_REQ);
|
||||||
if (readLen == 2 and static_cast<char>(buf[1]) == watchdog::second::WATCH_FLAG) {
|
} else if (nextChar == watchdog::first::START_CHAR) {
|
||||||
return LoopResult::START_WITH_WATCH_REQ;
|
if (idx < readLen - 1 and static_cast<char>(buf[idx + 1]) == watchdog::second::WATCH_FLAG) {
|
||||||
|
resultQueue.push(LoopResult::START_WITH_WATCH_REQ);
|
||||||
|
idx++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
resultQueue.push(LoopResult::START_REQ);
|
||||||
|
} else if (nextChar == watchdog::first::IDLE_CHAR) {
|
||||||
|
resultQueue.push(LoopResult::OK);
|
||||||
}
|
}
|
||||||
return LoopResult::START_REQ;
|
|
||||||
}
|
}
|
||||||
// Everything else: All working as expected
|
// Everything else: All working as expected
|
||||||
return LoopResult::OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int WatchdogTask::performRunningOperation() {
|
int WatchdogTask::performRunningOperation() {
|
||||||
@ -167,11 +190,12 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (not notRunningStart.has_value()) {
|
if (not notRunningStart.has_value()) {
|
||||||
notRunningStart = std::chrono::system_clock::now();
|
notRunningStart = std::chrono::steady_clock::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (obswRunning) {
|
if (obswRunning) {
|
||||||
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
#if WATCHDOG_CREATE_FILE_IF_RUNNING == 1
|
||||||
|
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
|
||||||
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
||||||
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
@ -184,7 +208,7 @@ int WatchdogTask::performNotRunningOperation(LoopResult type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (watchingObsw) {
|
if (watchingObsw) {
|
||||||
auto timeNotRunning = std::chrono::system_clock::now() - notRunningStart.value();
|
auto timeNotRunning = std::chrono::steady_clock::now() - notRunningStart.value();
|
||||||
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
|
if (std::chrono::duration_cast<std::chrono::milliseconds>(timeNotRunning).count() >
|
||||||
watchdog::MAX_NOT_RUNNING_MS) {
|
watchdog::MAX_NOT_RUNNING_MS) {
|
||||||
std::cout << "Restarting OBSW with systemctl" << std::endl;
|
std::cout << "Restarting OBSW with systemctl" << std::endl;
|
||||||
@ -269,7 +293,7 @@ bool WatchdogTask::stateMachine(LoopResult loopResult) {
|
|||||||
sleep = true;
|
sleep = true;
|
||||||
}
|
}
|
||||||
if (sleep) {
|
if (sleep) {
|
||||||
std::this_thread::sleep_for(1000ms);
|
std::this_thread::sleep_for(500ms);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <queue>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
class WatchdogTask {
|
class WatchdogTask {
|
||||||
@ -35,15 +36,17 @@ class WatchdogTask {
|
|||||||
bool watchingObsw = false;
|
bool watchingObsw = false;
|
||||||
bool printNotRunningLatch = false;
|
bool printNotRunningLatch = false;
|
||||||
std::array<uint8_t, 64> buf;
|
std::array<uint8_t, 64> buf;
|
||||||
std::optional<std::chrono::time_point<std::chrono::system_clock>> notRunningStart;
|
std::queue<LoopResult> resultQueue;
|
||||||
|
|
||||||
|
std::optional<std::chrono::time_point<std::chrono::steady_clock>> notRunningStart;
|
||||||
States state = States::NOT_STARTED;
|
States state = States::NOT_STARTED;
|
||||||
|
|
||||||
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating
|
// Primary loop. Takes care of delaying, and reading from the communication pipe and translating
|
||||||
// messages to loop results.
|
// messages to loop results.
|
||||||
LoopResult watchdogLoop();
|
void watchdogLoop();
|
||||||
bool stateMachine(LoopResult result);
|
bool stateMachine(LoopResult result);
|
||||||
LoopResult pollEvent(struct pollfd& waiter);
|
void pollEvent(struct pollfd& waiter);
|
||||||
LoopResult parseCommand(ssize_t readLen);
|
void parseCommands(ssize_t readLen);
|
||||||
|
|
||||||
int performRunningOperation();
|
int performRunningOperation();
|
||||||
int performNotRunningOperation(LoopResult type);
|
int performNotRunningOperation(LoopResult type);
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
|
|
||||||
|
#include <filesystem>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#include "Watchdog.h"
|
#include "Watchdog.h"
|
||||||
|
#include "definitions.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
|
* @brief This watchdog application uses a FIFO to check whether the OBSW is still running.
|
||||||
@ -8,6 +12,13 @@
|
|||||||
*/
|
*/
|
||||||
int main() {
|
int main() {
|
||||||
std::cout << "Starting OBSW watchdog" << std::endl;
|
std::cout << "Starting OBSW watchdog" << std::endl;
|
||||||
|
if (std::filesystem::exists(watchdog::RUNNING_FILE_NAME)) {
|
||||||
|
std::cout << "Removing " << watchdog::RUNNING_FILE_NAME << std::endl;
|
||||||
|
int result = std::remove(watchdog::RUNNING_FILE_NAME.c_str());
|
||||||
|
if (result != 0) {
|
||||||
|
std::cerr << "file removal failure" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
WatchdogTask watchdogTask;
|
WatchdogTask watchdogTask;
|
||||||
int result = watchdogTask.performOperation();
|
int result = watchdogTask.performOperation();
|
||||||
|
Loading…
Reference in New Issue
Block a user