1
0
forked from fsfw/fsfw
Files
action
container
contrib
controller
coordinates
datalinklayer
datapool
datapoolglob
datapoollocal
devicehandlers
AcceptsDeviceResponsesIF.h
AssemblyBase.cpp
AssemblyBase.h
ChildHandlerBase.cpp
ChildHandlerBase.h
ChildHandlerFDIR.cpp
ChildHandlerFDIR.h
CommunicationMessage.cpp
CommunicationMessage.h
CookieIF.h
DeviceCommunicationIF.h
DeviceHandlerBase.cpp
DeviceHandlerBase.h
DeviceHandlerFailureIsolation.cpp
DeviceHandlerFailureIsolation.h
DeviceHandlerIF.h
DeviceHandlerMessage.cpp
DeviceHandlerMessage.h
DeviceTmReportingWrapper.cpp
DeviceTmReportingWrapper.h
HealthDevice.cpp
HealthDevice.h
events
fdir
globalfunctions
health
housekeeping
internalError
ipc
memory
modes
monitoring
objectmanager
osal
parameters
power
pus
returnvalues
rmap
serialize
serviceinterface
storagemanager
subsystem
tasks
tcdistribution
thermal
timemanager
tmstorage
tmtcpacket
tmtcservices
.gitignore
LICENSE
NOTICE
framework.mk
fsfw/devicehandlers/DeviceHandlerFailureIsolation.cpp

252 lines
8.2 KiB
C++

#include <framework/devicehandlers/DeviceHandlerBase.h>
#include <framework/devicehandlers/DeviceHandlerFailureIsolation.h>
#include <framework/health/HealthTableIF.h>
#include <framework/power/Fuse.h>
#include <framework/serviceinterface/ServiceInterfaceStream.h>
#include <framework/thermal/ThermalComponentIF.h>
object_id_t DeviceHandlerFailureIsolation::powerConfirmationId = 0;
DeviceHandlerFailureIsolation::DeviceHandlerFailureIsolation(object_id_t owner,
object_id_t parent) :
FailureIsolationBase(owner, parent),
strangeReplyCount(MAX_STRANGE_REPLIES, STRANGE_REPLIES_TIME_MS,
parameterDomainBase++),
missedReplyCount( MAX_MISSED_REPLY_COUNT, MISSED_REPLY_TIME_MS,
parameterDomainBase++),
recoveryCounter(MAX_REBOOT, REBOOT_TIME_MS, parameterDomainBase++),
fdirState(NONE), powerConfirmation(0) {
}
DeviceHandlerFailureIsolation::~DeviceHandlerFailureIsolation() {
}
ReturnValue_t DeviceHandlerFailureIsolation::eventReceived(EventMessage* event) {
if(isFdirInActionOrAreWeFaulty(event)) {
return RETURN_OK;
}
ReturnValue_t result = RETURN_FAILED;
switch (event->getEvent()) {
case HasModesIF::MODE_TRANSITION_FAILED:
case HasModesIF::OBJECT_IN_INVALID_MODE:
//We'll try a recovery as long as defined in MAX_REBOOT.
//Might cause some AssemblyBase cycles, so keep number low.
handleRecovery(event->getEvent());
break;
case DeviceHandlerIF::DEVICE_INTERPRETING_REPLY_FAILED:
case DeviceHandlerIF::DEVICE_READING_REPLY_FAILED:
case DeviceHandlerIF::DEVICE_UNREQUESTED_REPLY:
case DeviceHandlerIF::DEVICE_UNKNOWN_REPLY: //Some DH's generate generic reply-ids.
case DeviceHandlerIF::DEVICE_BUILDING_COMMAND_FAILED:
//These faults all mean that there were stupid replies from a device.
if (strangeReplyCount.incrementAndCheck()) {
handleRecovery(event->getEvent());
}
break;
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
//The two above should never be confirmed.
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
result = sendConfirmationRequest(event);
if (result == HasReturnvaluesIF::RETURN_OK) {
break;
}
//else
if (missedReplyCount.incrementAndCheck()) {
handleRecovery(event->getEvent());
}
break;
case StorageManagerIF::GET_DATA_FAILED:
case StorageManagerIF::STORE_DATA_FAILED:
//Rather strange bugs, occur in RAW mode only. Ignore.
break;
case DeviceHandlerIF::INVALID_DEVICE_COMMAND:
//Ignore, is bad configuration. We can't do anything in flight.
break;
case HasHealthIF::HEALTH_INFO:
case HasModesIF::MODE_INFO:
case HasModesIF::CHANGING_MODE:
//Do nothing, but mark as handled.
break;
//****Power*****
case PowerSwitchIF::SWITCH_WENT_OFF:
if(hasPowerConfirmation) {
result = sendConfirmationRequest(event, powerConfirmation);
if (result == RETURN_OK) {
setFdirState(DEVICE_MIGHT_BE_OFF);
}
}
break;
case Fuse::FUSE_WENT_OFF:
//Not so good, because PCDU reacted.
case Fuse::POWER_ABOVE_HIGH_LIMIT:
//Better, because software detected over-current.
setFaulty(event->getEvent());
break;
case Fuse::POWER_BELOW_LOW_LIMIT:
//Device might got stuck during boot, retry.
handleRecovery(event->getEvent());
break;
//****Thermal*****
case ThermalComponentIF::COMPONENT_TEMP_LOW:
case ThermalComponentIF::COMPONENT_TEMP_HIGH:
case ThermalComponentIF::COMPONENT_TEMP_OOL_LOW:
case ThermalComponentIF::COMPONENT_TEMP_OOL_HIGH:
//Well, the device is not really faulty, but it is required to stay off as long as possible.
setFaulty(event->getEvent());
break;
case ThermalComponentIF::TEMP_NOT_IN_OP_RANGE:
//Ignore, is information only.
break;
//*******Default monitoring variables. Are currently not used.*****
// case DeviceHandlerIF::MONITORING_LIMIT_EXCEEDED:
// setFaulty(event->getEvent());
// break;
// case DeviceHandlerIF::MONITORING_AMBIGUOUS:
// break;
default:
//We don't know the event, someone else should handle it.
return RETURN_FAILED;
}
return RETURN_OK;
}
void DeviceHandlerFailureIsolation::eventConfirmed(EventMessage* event) {
switch (event->getEvent()) {
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
if (missedReplyCount.incrementAndCheck()) {
handleRecovery(event->getEvent());
}
break;
case PowerSwitchIF::SWITCH_WENT_OFF:
//This means the switch went off only for one device.
handleRecovery(event->getEvent());
break;
default:
break;
}
}
void DeviceHandlerFailureIsolation::decrementFaultCounters() {
strangeReplyCount.checkForDecrement();
missedReplyCount.checkForDecrement();
recoveryCounter.checkForDecrement();
}
void DeviceHandlerFailureIsolation::handleRecovery(Event reason) {
clearFaultCounters();
if (not recoveryCounter.incrementAndCheck()) {
startRecovery(reason);
} else {
setFaulty(reason);
}
}
void DeviceHandlerFailureIsolation::wasParentsFault(EventMessage* event) {
//We'll better ignore the SWITCH_WENT_OFF event and await a system-wide reset.
//This means, no fault message will come through until a MODE_ or
//HEALTH_INFO message comes through -> Is that ok?
//Same issue in TxFailureIsolation!
// if ((event->getEvent() == PowerSwitchIF::SWITCH_WENT_OFF)
// && (fdirState != RECOVERY_ONGOING)) {
// setFdirState(NONE);
// }
}
void DeviceHandlerFailureIsolation::clearFaultCounters() {
strangeReplyCount.clear();
missedReplyCount.clear();
}
ReturnValue_t DeviceHandlerFailureIsolation::initialize() {
ReturnValue_t result = FailureIsolationBase::initialize();
if (result != HasReturnvaluesIF::RETURN_OK) {
sif::error << "DeviceHandlerFailureIsolation::initialize: Could not"
" initialize FailureIsolationBase." << std::endl;
return result;
}
ConfirmsFailuresIF* power = objectManager->get<ConfirmsFailuresIF>(
powerConfirmationId);
if (power != nullptr) {
powerConfirmation = power->getEventReceptionQueue();
hasPowerConfirmation = true;
}
return RETURN_OK;
}
void DeviceHandlerFailureIsolation::setFdirState(FDIRState state) {
FailureIsolationBase::throwFdirEvent(FDIR_CHANGED_STATE, state, fdirState);
fdirState = state;
}
void DeviceHandlerFailureIsolation::triggerEvent(Event event, uint32_t parameter1,
uint32_t parameter2) {
//Do not throw error events if fdirState != none.
//This will still forward MODE and HEALTH INFO events in any case.
if (fdirState == NONE || EVENT::getSeverity(event) == SEVERITY::INFO) {
FailureIsolationBase::triggerEvent(event, parameter1, parameter2);
}
}
bool DeviceHandlerFailureIsolation::isFdirActionInProgress() {
return (fdirState != NONE);
}
void DeviceHandlerFailureIsolation::startRecovery(Event reason) {
throwFdirEvent(FDIR_STARTS_RECOVERY, EVENT::getEventId(reason));
setOwnerHealth(HasHealthIF::NEEDS_RECOVERY);
setFdirState(RECOVERY_ONGOING);
}
ReturnValue_t DeviceHandlerFailureIsolation::getParameter(uint8_t domainId,
uint16_t parameterId, ParameterWrapper* parameterWrapper,
const ParameterWrapper* newValues, uint16_t startAtIndex) {
ReturnValue_t result = strangeReplyCount.getParameter(domainId, parameterId,
parameterWrapper, newValues, startAtIndex);
if (result != INVALID_DOMAIN_ID) {
return result;
}
result = missedReplyCount.getParameter(domainId, parameterId,
parameterWrapper, newValues, startAtIndex);
if (result != INVALID_DOMAIN_ID) {
return result;
}
result = recoveryCounter.getParameter(domainId, parameterId,
parameterWrapper, newValues, startAtIndex);
if (result != INVALID_DOMAIN_ID) {
return result;
}
return INVALID_DOMAIN_ID;
}
void DeviceHandlerFailureIsolation::setFaulty(Event reason) {
throwFdirEvent(FDIR_TURNS_OFF_DEVICE, EVENT::getEventId(reason));
setOwnerHealth(HasHealthIF::FAULTY);
setFdirState(AWAIT_SHUTDOWN);
}
bool DeviceHandlerFailureIsolation::isFdirInActionOrAreWeFaulty(
EventMessage* event) {
if (fdirState != NONE) {
//Only wait for those events, ignore all others.
if (event->getParameter1() == HasHealthIF::HEALTHY
&& event->getEvent() == HasHealthIF::HEALTH_INFO) {
setFdirState(NONE);
}
if (event->getEvent() == HasModesIF::MODE_INFO
&& fdirState != RECOVERY_ONGOING) {
setFdirState(NONE);
}
return true;
}
if (owner->getHealth() == HasHealthIF::FAULTY
|| owner->getHealth() == HasHealthIF::PERMANENT_FAULTY) {
//Ignore all events in case device is already faulty.
return true;
}
return false;
}