2021-07-13 20:22:54 +02:00
|
|
|
#include "fsfw/devicehandlers/DeviceHandlerFailureIsolation.h"
|
2020-08-27 20:07:03 +02:00
|
|
|
|
2021-07-13 20:22:54 +02:00
|
|
|
#include "fsfw/devicehandlers/DeviceHandlerIF.h"
|
|
|
|
#include "fsfw/health/HealthTableIF.h"
|
2022-02-02 10:29:30 +01:00
|
|
|
#include "fsfw/modes/HasModesIF.h"
|
|
|
|
#include "fsfw/objectmanager/ObjectManager.h"
|
2021-07-13 20:22:54 +02:00
|
|
|
#include "fsfw/power/Fuse.h"
|
|
|
|
#include "fsfw/serviceinterface/ServiceInterfaceStream.h"
|
|
|
|
#include "fsfw/thermal/ThermalComponentIF.h"
|
2018-07-12 16:29:32 +02:00
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
object_id_t DeviceHandlerFailureIsolation::powerConfirmationId = objects::NO_OBJECT;
|
2016-06-15 23:48:41 +02:00
|
|
|
|
2023-04-14 21:22:24 +02:00
|
|
|
DeviceHandlerFailureIsolation::DeviceHandlerFailureIsolation(object_id_t owner, object_id_t parent,
|
|
|
|
uint8_t eventQueueDepth)
|
|
|
|
: FailureIsolationBase(owner, parent, eventQueueDepth),
|
2022-02-02 10:29:30 +01:00
|
|
|
strangeReplyCount(DEFAULT_MAX_STRANGE_REPLIES, DEFAULT_STRANGE_REPLIES_TIME_MS,
|
|
|
|
parameterDomainBase++),
|
|
|
|
missedReplyCount(DEFAULT_MAX_MISSED_REPLY_COUNT, DEFAULT_MISSED_REPLY_TIME_MS,
|
|
|
|
parameterDomainBase++),
|
|
|
|
recoveryCounter(DEFAULT_MAX_REBOOT, DEFAULT_REBOOT_TIME_MS, parameterDomainBase++),
|
|
|
|
fdirState(NONE) {}
|
2016-06-15 23:48:41 +02:00
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
DeviceHandlerFailureIsolation::~DeviceHandlerFailureIsolation() {}
|
2016-06-15 23:48:41 +02:00
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
ReturnValue_t DeviceHandlerFailureIsolation::eventReceived(EventMessage* event) {
|
2022-02-02 10:29:30 +01:00
|
|
|
if (isFdirInActionOrAreWeFaulty(event)) {
|
2022-08-16 01:08:26 +02:00
|
|
|
return returnvalue::OK;
|
2022-02-02 10:29:30 +01:00
|
|
|
}
|
2022-08-16 01:08:26 +02:00
|
|
|
ReturnValue_t result = returnvalue::FAILED;
|
2022-02-02 10:29:30 +01:00
|
|
|
switch (event->getEvent()) {
|
|
|
|
case HasModesIF::MODE_TRANSITION_FAILED:
|
|
|
|
case HasModesIF::OBJECT_IN_INVALID_MODE:
|
2022-02-02 16:04:36 +01:00
|
|
|
case DeviceHandlerIF::DEVICE_WANTS_HARD_REBOOT:
|
2022-02-02 10:29:30 +01:00
|
|
|
// We'll try a recovery as long as defined in MAX_REBOOT.
|
|
|
|
// Might cause some AssemblyBase cycles, so keep number low.
|
|
|
|
handleRecovery(event->getEvent());
|
|
|
|
break;
|
|
|
|
case DeviceHandlerIF::DEVICE_INTERPRETING_REPLY_FAILED:
|
|
|
|
case DeviceHandlerIF::DEVICE_READING_REPLY_FAILED:
|
|
|
|
case DeviceHandlerIF::DEVICE_UNREQUESTED_REPLY:
|
|
|
|
case DeviceHandlerIF::DEVICE_UNKNOWN_REPLY: // Some DH's generate generic reply-ids.
|
|
|
|
case DeviceHandlerIF::DEVICE_BUILDING_COMMAND_FAILED:
|
|
|
|
// These faults all mean that there were stupid replies from a device.
|
|
|
|
if (strangeReplyCount.incrementAndCheck()) {
|
|
|
|
handleRecovery(event->getEvent());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
|
|
|
|
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
|
|
|
|
// The two above should never be confirmed.
|
|
|
|
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
|
|
|
|
result = sendConfirmationRequest(event);
|
2022-08-16 01:08:26 +02:00
|
|
|
if (result == returnvalue::OK) {
|
2022-02-02 10:29:30 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
// else
|
|
|
|
if (missedReplyCount.incrementAndCheck()) {
|
|
|
|
handleRecovery(event->getEvent());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case StorageManagerIF::GET_DATA_FAILED:
|
|
|
|
case StorageManagerIF::STORE_DATA_FAILED:
|
|
|
|
// Rather strange bugs, occur in RAW mode only. Ignore.
|
|
|
|
break;
|
|
|
|
case DeviceHandlerIF::INVALID_DEVICE_COMMAND:
|
|
|
|
// Ignore, is bad configuration. We can't do anything in flight.
|
|
|
|
break;
|
|
|
|
case HasHealthIF::HEALTH_INFO:
|
|
|
|
case HasModesIF::MODE_INFO:
|
|
|
|
case HasModesIF::CHANGING_MODE:
|
|
|
|
// Do nothing, but mark as handled.
|
|
|
|
break;
|
|
|
|
//****Power*****
|
|
|
|
case PowerSwitchIF::SWITCH_WENT_OFF:
|
|
|
|
if (powerConfirmation != MessageQueueIF::NO_QUEUE) {
|
|
|
|
result = sendConfirmationRequest(event, powerConfirmation);
|
2022-08-16 01:08:26 +02:00
|
|
|
if (result == returnvalue::OK) {
|
2022-02-02 10:29:30 +01:00
|
|
|
setFdirState(DEVICE_MIGHT_BE_OFF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Fuse::FUSE_WENT_OFF:
|
|
|
|
// Not so good, because PCDU reacted.
|
|
|
|
case Fuse::POWER_ABOVE_HIGH_LIMIT:
|
|
|
|
// Better, because software detected over-current.
|
|
|
|
setFaulty(event->getEvent());
|
|
|
|
break;
|
|
|
|
case Fuse::POWER_BELOW_LOW_LIMIT:
|
|
|
|
// Device might got stuck during boot, retry.
|
|
|
|
handleRecovery(event->getEvent());
|
|
|
|
break;
|
|
|
|
//****Thermal*****
|
|
|
|
case ThermalComponentIF::COMPONENT_TEMP_LOW:
|
|
|
|
case ThermalComponentIF::COMPONENT_TEMP_HIGH:
|
|
|
|
case ThermalComponentIF::COMPONENT_TEMP_OOL_LOW:
|
|
|
|
case ThermalComponentIF::COMPONENT_TEMP_OOL_HIGH:
|
|
|
|
// Well, the device is not really faulty, but it is required to stay off as long as possible.
|
|
|
|
setFaulty(event->getEvent());
|
|
|
|
break;
|
|
|
|
case ThermalComponentIF::TEMP_NOT_IN_OP_RANGE:
|
|
|
|
// Ignore, is information only.
|
|
|
|
break;
|
|
|
|
//*******Default monitoring variables. Are currently not used.*****
|
|
|
|
// case DeviceHandlerIF::MONITORING_LIMIT_EXCEEDED:
|
|
|
|
// setFaulty(event->getEvent());
|
|
|
|
// break;
|
|
|
|
// case DeviceHandlerIF::MONITORING_AMBIGUOUS:
|
|
|
|
// break;
|
|
|
|
default:
|
|
|
|
// We don't know the event, someone else should handle it.
|
2022-08-16 01:08:26 +02:00
|
|
|
return returnvalue::FAILED;
|
2022-02-02 10:29:30 +01:00
|
|
|
}
|
2022-08-16 01:08:26 +02:00
|
|
|
return returnvalue::OK;
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::eventConfirmed(EventMessage* event) {
|
2022-02-02 10:29:30 +01:00
|
|
|
switch (event->getEvent()) {
|
|
|
|
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
|
|
|
|
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
|
|
|
|
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
|
|
|
|
if (missedReplyCount.incrementAndCheck()) {
|
|
|
|
handleRecovery(event->getEvent());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case PowerSwitchIF::SWITCH_WENT_OFF:
|
|
|
|
// This means the switch went off only for one device.
|
|
|
|
handleRecovery(event->getEvent());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::decrementFaultCounters() {
|
2022-02-02 10:29:30 +01:00
|
|
|
strangeReplyCount.checkForDecrement();
|
|
|
|
missedReplyCount.checkForDecrement();
|
|
|
|
recoveryCounter.checkForDecrement();
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::handleRecovery(Event reason) {
|
2022-02-02 10:29:30 +01:00
|
|
|
clearFaultCounters();
|
|
|
|
if (not recoveryCounter.incrementAndCheck()) {
|
|
|
|
startRecovery(reason);
|
|
|
|
} else {
|
|
|
|
setFaulty(reason);
|
|
|
|
}
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::wasParentsFault(EventMessage* event) {
|
2022-02-02 10:29:30 +01:00
|
|
|
// We'll better ignore the SWITCH_WENT_OFF event and await a system-wide reset.
|
|
|
|
// This means, no fault message will come through until a MODE_ or
|
|
|
|
// HEALTH_INFO message comes through -> Is that ok?
|
|
|
|
// Same issue in TxFailureIsolation!
|
|
|
|
// if ((event->getEvent() == PowerSwitchIF::SWITCH_WENT_OFF)
|
|
|
|
// && (fdirState != RECOVERY_ONGOING)) {
|
|
|
|
// setFdirState(NONE);
|
|
|
|
// }
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::clearFaultCounters() {
|
2022-02-02 10:29:30 +01:00
|
|
|
strangeReplyCount.clear();
|
|
|
|
missedReplyCount.clear();
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
ReturnValue_t DeviceHandlerFailureIsolation::initialize() {
|
2022-02-02 10:29:30 +01:00
|
|
|
ReturnValue_t result = FailureIsolationBase::initialize();
|
2022-08-16 01:08:26 +02:00
|
|
|
if (result != returnvalue::OK) {
|
2021-01-03 14:16:52 +01:00
|
|
|
#if FSFW_CPP_OSTREAM_ENABLED == 1
|
2022-02-02 10:29:30 +01:00
|
|
|
sif::error << "DeviceHandlerFailureIsolation::initialize: Could not"
|
|
|
|
" initialize FailureIsolationBase."
|
|
|
|
<< std::endl;
|
2021-01-03 13:58:18 +01:00
|
|
|
#endif
|
2022-02-02 10:29:30 +01:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
ConfirmsFailuresIF* power =
|
|
|
|
ObjectManager::instance()->get<ConfirmsFailuresIF>(powerConfirmationId);
|
|
|
|
if (power != nullptr) {
|
|
|
|
powerConfirmation = power->getEventReceptionQueue();
|
|
|
|
}
|
2020-08-08 13:28:59 +02:00
|
|
|
|
2022-08-16 01:08:26 +02:00
|
|
|
return returnvalue::OK;
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::setFdirState(FDIRState state) {
|
2022-02-02 10:29:30 +01:00
|
|
|
FailureIsolationBase::throwFdirEvent(FDIR_CHANGED_STATE, state, fdirState);
|
|
|
|
fdirState = state;
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::triggerEvent(Event event, uint32_t parameter1,
|
2022-02-02 10:29:30 +01:00
|
|
|
uint32_t parameter2) {
|
|
|
|
// Do not throw error events if fdirState != none.
|
|
|
|
// This will still forward MODE and HEALTH INFO events in any case.
|
|
|
|
if (fdirState == NONE || event::getSeverity(event) == severity::INFO) {
|
|
|
|
FailureIsolationBase::triggerEvent(event, parameter1, parameter2);
|
|
|
|
}
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
bool DeviceHandlerFailureIsolation::isFdirActionInProgress() { return (fdirState != NONE); }
|
2016-06-15 23:48:41 +02:00
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::startRecovery(Event reason) {
|
2022-02-02 10:29:30 +01:00
|
|
|
throwFdirEvent(FDIR_STARTS_RECOVERY, event::getEventId(reason));
|
|
|
|
setOwnerHealth(HasHealthIF::NEEDS_RECOVERY);
|
|
|
|
setFdirState(RECOVERY_ONGOING);
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
ReturnValue_t DeviceHandlerFailureIsolation::getParameter(uint8_t domainId, uint8_t uniqueId,
|
|
|
|
ParameterWrapper* parameterWrapper,
|
|
|
|
const ParameterWrapper* newValues,
|
|
|
|
uint16_t startAtIndex) {
|
|
|
|
ReturnValue_t result =
|
|
|
|
strangeReplyCount.getParameter(domainId, uniqueId, parameterWrapper, newValues, startAtIndex);
|
|
|
|
if (result != INVALID_DOMAIN_ID) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
result =
|
|
|
|
missedReplyCount.getParameter(domainId, uniqueId, parameterWrapper, newValues, startAtIndex);
|
|
|
|
if (result != INVALID_DOMAIN_ID) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
result =
|
|
|
|
recoveryCounter.getParameter(domainId, uniqueId, parameterWrapper, newValues, startAtIndex);
|
|
|
|
if (result != INVALID_DOMAIN_ID) {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
return INVALID_DOMAIN_ID;
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
|
|
|
|
2018-07-12 16:29:32 +02:00
|
|
|
void DeviceHandlerFailureIsolation::setFaulty(Event reason) {
|
2022-02-02 10:29:30 +01:00
|
|
|
throwFdirEvent(FDIR_TURNS_OFF_DEVICE, event::getEventId(reason));
|
|
|
|
setOwnerHealth(HasHealthIF::FAULTY);
|
|
|
|
setFdirState(AWAIT_SHUTDOWN);
|
2016-06-15 23:48:41 +02:00
|
|
|
}
|
2018-07-12 16:29:32 +02:00
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
bool DeviceHandlerFailureIsolation::isFdirInActionOrAreWeFaulty(EventMessage* event) {
|
|
|
|
if (fdirState != NONE) {
|
|
|
|
// Only wait for those events, ignore all others.
|
|
|
|
if (event->getParameter1() == HasHealthIF::HEALTHY &&
|
|
|
|
event->getEvent() == HasHealthIF::HEALTH_INFO) {
|
|
|
|
setFdirState(NONE);
|
|
|
|
}
|
|
|
|
if (event->getEvent() == HasModesIF::MODE_INFO && fdirState != RECOVERY_ONGOING) {
|
|
|
|
setFdirState(NONE);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2020-10-12 18:18:41 +02:00
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
if (owner == nullptr) {
|
|
|
|
// Configuration error.
|
2021-01-03 14:16:52 +01:00
|
|
|
#if FSFW_CPP_OSTREAM_ENABLED == 1
|
2022-02-02 10:29:30 +01:00
|
|
|
sif::error << "DeviceHandlerFailureIsolation::"
|
|
|
|
<< "isFdirInActionOrAreWeFaulty: Owner not set!" << std::endl;
|
2021-01-03 13:58:18 +01:00
|
|
|
#endif
|
2022-02-02 10:29:30 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-10-12 18:18:41 +02:00
|
|
|
|
2022-02-02 10:29:30 +01:00
|
|
|
if (owner->getHealth() == HasHealthIF::FAULTY ||
|
|
|
|
owner->getHealth() == HasHealthIF::PERMANENT_FAULTY) {
|
|
|
|
// Ignore all events in case device is already faulty.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2018-07-12 16:29:32 +02:00
|
|
|
}
|