re-work some FDIR logic #172

Merged
muellerr merged 2 commits from improve-dev-fdir into develop 2024-04-09 10:47:33 +02:00
4 changed files with 25 additions and 26 deletions

View File

@ -26,6 +26,11 @@ ReturnValue_t DeviceHandlerFailureIsolation::eventReceived(EventMessage* event)
if (isFdirInActionOrAreWeFaulty(event)) {
return returnvalue::OK;
}
// As mentioned in the function documentation, no FDIR reaction are performed when the device
// is in external control.
if (owner->getHealth() == HasHealthIF::EXTERNAL_CONTROL) {
return returnvalue::OK;
}
ReturnValue_t result = returnvalue::FAILED;
switch (event->getEvent()) {
case HasModesIF::MODE_TRANSITION_FAILED:
@ -186,15 +191,6 @@ void DeviceHandlerFailureIsolation::setFdirState(FDIRState state) {
fdirState = state;
}
void DeviceHandlerFailureIsolation::triggerEvent(Event event, uint32_t parameter1,
Review

Is this function not called or why remove it?

If the base function is used instead, events will get triggered during recoveries which might break stuff.

Is this function not called or why remove it? If the base function is used instead, events will get triggered during recoveries which might break stuff.
Review

Aren't FDIR reactions ignored if the FDIR is active? Also, aren't there cases where I'd at least want to see the events even if the FDIR is active?

Aren't FDIR reactions ignored if the FDIR is active? Also, aren't there cases where I'd at least want to see the events even if the FDIR is active?
Review

I don't know for sure. what will happen if another event is triggered during recoveries. For now it seems ok as the FDIR does not react. Although, higher level FDIRs will react if the system has any.

I don't know for sure. what will happen if another event is triggered during recoveries. For now it seems ok as the FDIR does not react. Although, higher level FDIRs will react if the system has any.
Review

Have you watched any events during testing while in recovery? it might filter event spam.

Have you watched any events during testing while in recovery? it might filter event spam.
Review

I think this should be fine.. isn't is a configuration error if a device throws tons of unexpected events for reboot handling or when being faulty? I tested this for EIVE and did not receive any unexpected event spam so far.

I think this should be fine.. isn't is a configuration error if a device throws tons of unexpected events for reboot handling or when being faulty? I tested this for EIVE and did not receive any unexpected event spam so far.
uint32_t parameter2) {
// Do not throw error events if fdirState != none.
// This will still forward MODE and HEALTH INFO events in any case.
if (fdirState == NONE || event::getSeverity(event) == severity::INFO) {
FailureIsolationBase::triggerEvent(event, parameter1, parameter2);
}
}
bool DeviceHandlerFailureIsolation::isFdirActionInProgress() { return (fdirState != NONE); }
void DeviceHandlerFailureIsolation::startRecovery(Event reason) {

View File

@ -17,7 +17,6 @@ class DeviceHandlerFailureIsolation : public FailureIsolationBase {
uint8_t eventQueueDepth = 10);
~DeviceHandlerFailureIsolation();
ReturnValue_t initialize();
void triggerEvent(Event event, uint32_t parameter1 = 0, uint32_t parameter2 = 0);
bool isFdirActionInProgress();
virtual ReturnValue_t getParameter(uint8_t domainId, uint8_t uniqueId,
ParameterWrapper* parameterWrapper,
@ -41,6 +40,19 @@ class DeviceHandlerFailureIsolation : public FailureIsolationBase {
static const uint32_t DEFAULT_MAX_MISSED_REPLY_COUNT = 5;
static const uint32_t DEFAULT_MISSED_REPLY_TIME_MS = 10000;
/**
* This is the default implementation of the eventReceived function.
*
* It will perform recoveries or failures on a pre-defined set of events. If the user wants
* to add handling for custom events, this function should be overriden.
*
* It should be noted that the default implementation will not perform FDIR reactions if the
* handler is faulty or in external control by default. If the user commands the device
* manually, this might be related to debugging to testing the device in a low-level way. FDIR
* reactions might get in the way of this process by restarting the device or putting it in
* the faulty state. If the user still requires FDIR handling in the EXTERNAL_CONTROL case,
* this function should be overriden.
*/
virtual ReturnValue_t eventReceived(EventMessage* event);
virtual void eventConfirmed(EventMessage* event);
void wasParentsFault(EventMessage* event);

View File

@ -148,25 +148,16 @@ void FailureIsolationBase::doConfirmFault(EventMessage* event) {
ReturnValue_t FailureIsolationBase::confirmFault(EventMessage* event) { return YOUR_FAULT; }
void FailureIsolationBase::triggerEvent(Event event, uint32_t parameter1, uint32_t parameter2) {
// With this mechanism, all events are disabled for a certain device.
// That's not so good for visibility.
if (isFdirDisabledForSeverity(event::getSeverity(event))) {
return;
}
// By default, we trigger all events and also call the handler function to handle FDIR reactions
// which might occur due to these events. This makes all events visible. If the handling of
// FDIR reaction should be disabled, this should be done through dedicated logic inside the
// eventReceived function.
EventMessage message(event, ownerId, parameter1, parameter2);
EventManagerIF::triggerEvent(&message, eventQueue->getId());
eventReceived(&message);
}
bool FailureIsolationBase::isFdirDisabledForSeverity(EventSeverity_t severity) {
if ((owner != NULL) && (severity != severity::INFO)) {
if (owner->getHealth() == HasHealthIF::EXTERNAL_CONTROL) {
// External control disables handling of fault messages.
return true;
}
}
return false;
}
bool FailureIsolationBase::isFdirDisabledForSeverity(EventSeverity_t severity) { return false; }
void FailureIsolationBase::throwFdirEvent(Event event, uint32_t parameter1, uint32_t parameter2) {
EventMessage message(event, ownerId, parameter1, parameter2);

View File

@ -44,13 +44,13 @@ class FailureIsolationBase : public ConfirmsFailuresIF, public HasParametersIF {
virtual void wasParentsFault(EventMessage* event);
virtual ReturnValue_t confirmFault(EventMessage* event);
virtual void decrementFaultCounters() = 0;
virtual bool isFdirDisabledForSeverity(EventSeverity_t severity);
ReturnValue_t sendConfirmationRequest(EventMessage* event,
MessageQueueId_t destination = MessageQueueIF::NO_QUEUE);
void throwFdirEvent(Event event, uint32_t parameter1 = 0, uint32_t parameter2 = 0);
private:
void doConfirmFault(EventMessage* event);
bool isFdirDisabledForSeverity(EventSeverity_t severity);
};
#endif /* FRAMEWORK_FDIR_FAILUREISOLATIONBASE_H_ */
#endif /* FRAMEWORK_FDIR */