took over dHB failure isolation base fixes
This commit is contained in:
parent
4d1b1ba506
commit
a6e99e443a
@ -1,251 +1,251 @@
|
||||
#include "../devicehandlers/DeviceHandlerBase.h"
|
||||
#include "../devicehandlers/DeviceHandlerFailureIsolation.h"
|
||||
#include "../health/HealthTableIF.h"
|
||||
#include "../power/Fuse.h"
|
||||
#include "../serviceinterface/ServiceInterfaceStream.h"
|
||||
#include "../thermal/ThermalComponentIF.h"
|
||||
|
||||
object_id_t DeviceHandlerFailureIsolation::powerConfirmationId = 0;
|
||||
|
||||
DeviceHandlerFailureIsolation::DeviceHandlerFailureIsolation(object_id_t owner,
|
||||
object_id_t parent) :
|
||||
FailureIsolationBase(owner, parent),
|
||||
strangeReplyCount(MAX_STRANGE_REPLIES, STRANGE_REPLIES_TIME_MS,
|
||||
parameterDomainBase++),
|
||||
missedReplyCount( MAX_MISSED_REPLY_COUNT, MISSED_REPLY_TIME_MS,
|
||||
parameterDomainBase++),
|
||||
recoveryCounter(MAX_REBOOT, REBOOT_TIME_MS, parameterDomainBase++),
|
||||
fdirState(NONE), powerConfirmation(0) {
|
||||
}
|
||||
|
||||
DeviceHandlerFailureIsolation::~DeviceHandlerFailureIsolation() {
|
||||
}
|
||||
|
||||
ReturnValue_t DeviceHandlerFailureIsolation::eventReceived(EventMessage* event) {
|
||||
if(isFdirInActionOrAreWeFaulty(event)) {
|
||||
return RETURN_OK;
|
||||
}
|
||||
ReturnValue_t result = RETURN_FAILED;
|
||||
switch (event->getEvent()) {
|
||||
case HasModesIF::MODE_TRANSITION_FAILED:
|
||||
case HasModesIF::OBJECT_IN_INVALID_MODE:
|
||||
//We'll try a recovery as long as defined in MAX_REBOOT.
|
||||
//Might cause some AssemblyBase cycles, so keep number low.
|
||||
handleRecovery(event->getEvent());
|
||||
break;
|
||||
case DeviceHandlerIF::DEVICE_INTERPRETING_REPLY_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_READING_REPLY_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_UNREQUESTED_REPLY:
|
||||
case DeviceHandlerIF::DEVICE_UNKNOWN_REPLY: //Some DH's generate generic reply-ids.
|
||||
case DeviceHandlerIF::DEVICE_BUILDING_COMMAND_FAILED:
|
||||
//These faults all mean that there were stupid replies from a device.
|
||||
if (strangeReplyCount.incrementAndCheck()) {
|
||||
handleRecovery(event->getEvent());
|
||||
}
|
||||
break;
|
||||
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
|
||||
//The two above should never be confirmed.
|
||||
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
|
||||
result = sendConfirmationRequest(event);
|
||||
if (result == HasReturnvaluesIF::RETURN_OK) {
|
||||
break;
|
||||
}
|
||||
//else
|
||||
if (missedReplyCount.incrementAndCheck()) {
|
||||
handleRecovery(event->getEvent());
|
||||
}
|
||||
break;
|
||||
case StorageManagerIF::GET_DATA_FAILED:
|
||||
case StorageManagerIF::STORE_DATA_FAILED:
|
||||
//Rather strange bugs, occur in RAW mode only. Ignore.
|
||||
break;
|
||||
case DeviceHandlerIF::INVALID_DEVICE_COMMAND:
|
||||
//Ignore, is bad configuration. We can't do anything in flight.
|
||||
break;
|
||||
case HasHealthIF::HEALTH_INFO:
|
||||
case HasModesIF::MODE_INFO:
|
||||
case HasModesIF::CHANGING_MODE:
|
||||
//Do nothing, but mark as handled.
|
||||
break;
|
||||
//****Power*****
|
||||
case PowerSwitchIF::SWITCH_WENT_OFF:
|
||||
if(hasPowerConfirmation) {
|
||||
result = sendConfirmationRequest(event, powerConfirmation);
|
||||
if (result == RETURN_OK) {
|
||||
setFdirState(DEVICE_MIGHT_BE_OFF);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Fuse::FUSE_WENT_OFF:
|
||||
//Not so good, because PCDU reacted.
|
||||
case Fuse::POWER_ABOVE_HIGH_LIMIT:
|
||||
//Better, because software detected over-current.
|
||||
setFaulty(event->getEvent());
|
||||
break;
|
||||
case Fuse::POWER_BELOW_LOW_LIMIT:
|
||||
//Device might got stuck during boot, retry.
|
||||
handleRecovery(event->getEvent());
|
||||
break;
|
||||
//****Thermal*****
|
||||
case ThermalComponentIF::COMPONENT_TEMP_LOW:
|
||||
case ThermalComponentIF::COMPONENT_TEMP_HIGH:
|
||||
case ThermalComponentIF::COMPONENT_TEMP_OOL_LOW:
|
||||
case ThermalComponentIF::COMPONENT_TEMP_OOL_HIGH:
|
||||
//Well, the device is not really faulty, but it is required to stay off as long as possible.
|
||||
setFaulty(event->getEvent());
|
||||
break;
|
||||
case ThermalComponentIF::TEMP_NOT_IN_OP_RANGE:
|
||||
//Ignore, is information only.
|
||||
break;
|
||||
//*******Default monitoring variables. Are currently not used.*****
|
||||
// case DeviceHandlerIF::MONITORING_LIMIT_EXCEEDED:
|
||||
// setFaulty(event->getEvent());
|
||||
// break;
|
||||
// case DeviceHandlerIF::MONITORING_AMBIGUOUS:
|
||||
// break;
|
||||
default:
|
||||
//We don't know the event, someone else should handle it.
|
||||
return RETURN_FAILED;
|
||||
}
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::eventConfirmed(EventMessage* event) {
|
||||
switch (event->getEvent()) {
|
||||
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
|
||||
if (missedReplyCount.incrementAndCheck()) {
|
||||
handleRecovery(event->getEvent());
|
||||
}
|
||||
break;
|
||||
case PowerSwitchIF::SWITCH_WENT_OFF:
|
||||
//This means the switch went off only for one device.
|
||||
handleRecovery(event->getEvent());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::decrementFaultCounters() {
|
||||
strangeReplyCount.checkForDecrement();
|
||||
missedReplyCount.checkForDecrement();
|
||||
recoveryCounter.checkForDecrement();
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::handleRecovery(Event reason) {
|
||||
clearFaultCounters();
|
||||
if (not recoveryCounter.incrementAndCheck()) {
|
||||
startRecovery(reason);
|
||||
} else {
|
||||
setFaulty(reason);
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::wasParentsFault(EventMessage* event) {
|
||||
//We'll better ignore the SWITCH_WENT_OFF event and await a system-wide reset.
|
||||
//This means, no fault message will come through until a MODE_ or
|
||||
//HEALTH_INFO message comes through -> Is that ok?
|
||||
//Same issue in TxFailureIsolation!
|
||||
// if ((event->getEvent() == PowerSwitchIF::SWITCH_WENT_OFF)
|
||||
// && (fdirState != RECOVERY_ONGOING)) {
|
||||
// setFdirState(NONE);
|
||||
// }
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::clearFaultCounters() {
|
||||
strangeReplyCount.clear();
|
||||
missedReplyCount.clear();
|
||||
}
|
||||
|
||||
ReturnValue_t DeviceHandlerFailureIsolation::initialize() {
|
||||
ReturnValue_t result = FailureIsolationBase::initialize();
|
||||
if (result != HasReturnvaluesIF::RETURN_OK) {
|
||||
sif::error << "DeviceHandlerFailureIsolation::initialize: Could not"
|
||||
" initialize FailureIsolationBase." << std::endl;
|
||||
return result;
|
||||
}
|
||||
ConfirmsFailuresIF* power = objectManager->get<ConfirmsFailuresIF>(
|
||||
powerConfirmationId);
|
||||
if (power != nullptr) {
|
||||
powerConfirmation = power->getEventReceptionQueue();
|
||||
hasPowerConfirmation = true;
|
||||
}
|
||||
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::setFdirState(FDIRState state) {
|
||||
FailureIsolationBase::throwFdirEvent(FDIR_CHANGED_STATE, state, fdirState);
|
||||
fdirState = state;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::triggerEvent(Event event, uint32_t parameter1,
|
||||
uint32_t parameter2) {
|
||||
//Do not throw error events if fdirState != none.
|
||||
//This will still forward MODE and HEALTH INFO events in any case.
|
||||
if (fdirState == NONE || EVENT::getSeverity(event) == SEVERITY::INFO) {
|
||||
FailureIsolationBase::triggerEvent(event, parameter1, parameter2);
|
||||
}
|
||||
}
|
||||
|
||||
bool DeviceHandlerFailureIsolation::isFdirActionInProgress() {
|
||||
return (fdirState != NONE);
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::startRecovery(Event reason) {
|
||||
throwFdirEvent(FDIR_STARTS_RECOVERY, EVENT::getEventId(reason));
|
||||
setOwnerHealth(HasHealthIF::NEEDS_RECOVERY);
|
||||
setFdirState(RECOVERY_ONGOING);
|
||||
}
|
||||
|
||||
ReturnValue_t DeviceHandlerFailureIsolation::getParameter(uint8_t domainId,
|
||||
uint16_t parameterId, ParameterWrapper* parameterWrapper,
|
||||
const ParameterWrapper* newValues, uint16_t startAtIndex) {
|
||||
ReturnValue_t result = strangeReplyCount.getParameter(domainId, parameterId,
|
||||
parameterWrapper, newValues, startAtIndex);
|
||||
if (result != INVALID_DOMAIN_ID) {
|
||||
return result;
|
||||
}
|
||||
result = missedReplyCount.getParameter(domainId, parameterId,
|
||||
parameterWrapper, newValues, startAtIndex);
|
||||
if (result != INVALID_DOMAIN_ID) {
|
||||
return result;
|
||||
}
|
||||
result = recoveryCounter.getParameter(domainId, parameterId,
|
||||
parameterWrapper, newValues, startAtIndex);
|
||||
if (result != INVALID_DOMAIN_ID) {
|
||||
return result;
|
||||
}
|
||||
return INVALID_DOMAIN_ID;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::setFaulty(Event reason) {
|
||||
throwFdirEvent(FDIR_TURNS_OFF_DEVICE, EVENT::getEventId(reason));
|
||||
setOwnerHealth(HasHealthIF::FAULTY);
|
||||
setFdirState(AWAIT_SHUTDOWN);
|
||||
}
|
||||
|
||||
bool DeviceHandlerFailureIsolation::isFdirInActionOrAreWeFaulty(
|
||||
EventMessage* event) {
|
||||
if (fdirState != NONE) {
|
||||
//Only wait for those events, ignore all others.
|
||||
if (event->getParameter1() == HasHealthIF::HEALTHY
|
||||
&& event->getEvent() == HasHealthIF::HEALTH_INFO) {
|
||||
setFdirState(NONE);
|
||||
}
|
||||
if (event->getEvent() == HasModesIF::MODE_INFO
|
||||
&& fdirState != RECOVERY_ONGOING) {
|
||||
setFdirState(NONE);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (owner->getHealth() == HasHealthIF::FAULTY
|
||||
|| owner->getHealth() == HasHealthIF::PERMANENT_FAULTY) {
|
||||
//Ignore all events in case device is already faulty.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#include "DeviceHandlerBase.h"
|
||||
#include "DeviceHandlerFailureIsolation.h"
|
||||
#include "../health/HealthTableIF.h"
|
||||
#include "../power/Fuse.h"
|
||||
#include "../serviceinterface/ServiceInterfaceStream.h"
|
||||
#include "../thermal/ThermalComponentIF.h"
|
||||
|
||||
object_id_t DeviceHandlerFailureIsolation::powerConfirmationId =
|
||||
objects::NO_OBJECT;
|
||||
|
||||
DeviceHandlerFailureIsolation::DeviceHandlerFailureIsolation(object_id_t owner,
|
||||
object_id_t parent) :
|
||||
FailureIsolationBase(owner, parent),
|
||||
strangeReplyCount(MAX_STRANGE_REPLIES, STRANGE_REPLIES_TIME_MS,
|
||||
parameterDomainBase++),
|
||||
missedReplyCount( MAX_MISSED_REPLY_COUNT, MISSED_REPLY_TIME_MS,
|
||||
parameterDomainBase++),
|
||||
recoveryCounter(MAX_REBOOT, REBOOT_TIME_MS, parameterDomainBase++),
|
||||
fdirState(NONE), powerConfirmation(0) {
|
||||
}
|
||||
|
||||
DeviceHandlerFailureIsolation::~DeviceHandlerFailureIsolation() {
|
||||
}
|
||||
|
||||
ReturnValue_t DeviceHandlerFailureIsolation::eventReceived(EventMessage* event) {
|
||||
if(isFdirInActionOrAreWeFaulty(event)) {
|
||||
return RETURN_OK;
|
||||
}
|
||||
ReturnValue_t result = RETURN_FAILED;
|
||||
switch (event->getEvent()) {
|
||||
case HasModesIF::MODE_TRANSITION_FAILED:
|
||||
case HasModesIF::OBJECT_IN_INVALID_MODE:
|
||||
//We'll try a recovery as long as defined in MAX_REBOOT.
|
||||
//Might cause some AssemblyBase cycles, so keep number low.
|
||||
handleRecovery(event->getEvent());
|
||||
break;
|
||||
case DeviceHandlerIF::DEVICE_INTERPRETING_REPLY_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_READING_REPLY_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_UNREQUESTED_REPLY:
|
||||
case DeviceHandlerIF::DEVICE_UNKNOWN_REPLY: //Some DH's generate generic reply-ids.
|
||||
case DeviceHandlerIF::DEVICE_BUILDING_COMMAND_FAILED:
|
||||
//These faults all mean that there were stupid replies from a device.
|
||||
if (strangeReplyCount.incrementAndCheck()) {
|
||||
handleRecovery(event->getEvent());
|
||||
}
|
||||
break;
|
||||
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
|
||||
//The two above should never be confirmed.
|
||||
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
|
||||
result = sendConfirmationRequest(event);
|
||||
if (result == HasReturnvaluesIF::RETURN_OK) {
|
||||
break;
|
||||
}
|
||||
//else
|
||||
if (missedReplyCount.incrementAndCheck()) {
|
||||
handleRecovery(event->getEvent());
|
||||
}
|
||||
break;
|
||||
case StorageManagerIF::GET_DATA_FAILED:
|
||||
case StorageManagerIF::STORE_DATA_FAILED:
|
||||
//Rather strange bugs, occur in RAW mode only. Ignore.
|
||||
break;
|
||||
case DeviceHandlerIF::INVALID_DEVICE_COMMAND:
|
||||
//Ignore, is bad configuration. We can't do anything in flight.
|
||||
break;
|
||||
case HasHealthIF::HEALTH_INFO:
|
||||
case HasModesIF::MODE_INFO:
|
||||
case HasModesIF::CHANGING_MODE:
|
||||
//Do nothing, but mark as handled.
|
||||
break;
|
||||
//****Power*****
|
||||
case PowerSwitchIF::SWITCH_WENT_OFF:
|
||||
if(powerConfirmation != MessageQueueIF::NO_QUEUE) {
|
||||
result = sendConfirmationRequest(event, powerConfirmation);
|
||||
if (result == RETURN_OK) {
|
||||
setFdirState(DEVICE_MIGHT_BE_OFF);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Fuse::FUSE_WENT_OFF:
|
||||
//Not so good, because PCDU reacted.
|
||||
case Fuse::POWER_ABOVE_HIGH_LIMIT:
|
||||
//Better, because software detected over-current.
|
||||
setFaulty(event->getEvent());
|
||||
break;
|
||||
case Fuse::POWER_BELOW_LOW_LIMIT:
|
||||
//Device might got stuck during boot, retry.
|
||||
handleRecovery(event->getEvent());
|
||||
break;
|
||||
//****Thermal*****
|
||||
case ThermalComponentIF::COMPONENT_TEMP_LOW:
|
||||
case ThermalComponentIF::COMPONENT_TEMP_HIGH:
|
||||
case ThermalComponentIF::COMPONENT_TEMP_OOL_LOW:
|
||||
case ThermalComponentIF::COMPONENT_TEMP_OOL_HIGH:
|
||||
//Well, the device is not really faulty, but it is required to stay off as long as possible.
|
||||
setFaulty(event->getEvent());
|
||||
break;
|
||||
case ThermalComponentIF::TEMP_NOT_IN_OP_RANGE:
|
||||
//Ignore, is information only.
|
||||
break;
|
||||
//*******Default monitoring variables. Are currently not used.*****
|
||||
// case DeviceHandlerIF::MONITORING_LIMIT_EXCEEDED:
|
||||
// setFaulty(event->getEvent());
|
||||
// break;
|
||||
// case DeviceHandlerIF::MONITORING_AMBIGUOUS:
|
||||
// break;
|
||||
default:
|
||||
//We don't know the event, someone else should handle it.
|
||||
return RETURN_FAILED;
|
||||
}
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::eventConfirmed(EventMessage* event) {
|
||||
switch (event->getEvent()) {
|
||||
case DeviceHandlerIF::DEVICE_SENDING_COMMAND_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_REQUESTING_REPLY_FAILED:
|
||||
case DeviceHandlerIF::DEVICE_MISSED_REPLY:
|
||||
if (missedReplyCount.incrementAndCheck()) {
|
||||
handleRecovery(event->getEvent());
|
||||
}
|
||||
break;
|
||||
case PowerSwitchIF::SWITCH_WENT_OFF:
|
||||
//This means the switch went off only for one device.
|
||||
handleRecovery(event->getEvent());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::decrementFaultCounters() {
|
||||
strangeReplyCount.checkForDecrement();
|
||||
missedReplyCount.checkForDecrement();
|
||||
recoveryCounter.checkForDecrement();
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::handleRecovery(Event reason) {
|
||||
clearFaultCounters();
|
||||
if (not recoveryCounter.incrementAndCheck()) {
|
||||
startRecovery(reason);
|
||||
} else {
|
||||
setFaulty(reason);
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::wasParentsFault(EventMessage* event) {
|
||||
//We'll better ignore the SWITCH_WENT_OFF event and await a system-wide reset.
|
||||
//This means, no fault message will come through until a MODE_ or
|
||||
//HEALTH_INFO message comes through -> Is that ok?
|
||||
//Same issue in TxFailureIsolation!
|
||||
// if ((event->getEvent() == PowerSwitchIF::SWITCH_WENT_OFF)
|
||||
// && (fdirState != RECOVERY_ONGOING)) {
|
||||
// setFdirState(NONE);
|
||||
// }
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::clearFaultCounters() {
|
||||
strangeReplyCount.clear();
|
||||
missedReplyCount.clear();
|
||||
}
|
||||
|
||||
ReturnValue_t DeviceHandlerFailureIsolation::initialize() {
|
||||
ReturnValue_t result = FailureIsolationBase::initialize();
|
||||
if (result != HasReturnvaluesIF::RETURN_OK) {
|
||||
sif::error << "DeviceHandlerFailureIsolation::initialize: Could not"
|
||||
" initialize FailureIsolationBase." << std::endl;
|
||||
return result;
|
||||
}
|
||||
ConfirmsFailuresIF* power = objectManager->get<ConfirmsFailuresIF>(
|
||||
powerConfirmationId);
|
||||
if (power != nullptr) {
|
||||
powerConfirmation = power->getEventReceptionQueue();
|
||||
}
|
||||
|
||||
return RETURN_OK;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::setFdirState(FDIRState state) {
|
||||
FailureIsolationBase::throwFdirEvent(FDIR_CHANGED_STATE, state, fdirState);
|
||||
fdirState = state;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::triggerEvent(Event event, uint32_t parameter1,
|
||||
uint32_t parameter2) {
|
||||
//Do not throw error events if fdirState != none.
|
||||
//This will still forward MODE and HEALTH INFO events in any case.
|
||||
if (fdirState == NONE || EVENT::getSeverity(event) == SEVERITY::INFO) {
|
||||
FailureIsolationBase::triggerEvent(event, parameter1, parameter2);
|
||||
}
|
||||
}
|
||||
|
||||
bool DeviceHandlerFailureIsolation::isFdirActionInProgress() {
|
||||
return (fdirState != NONE);
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::startRecovery(Event reason) {
|
||||
throwFdirEvent(FDIR_STARTS_RECOVERY, EVENT::getEventId(reason));
|
||||
setOwnerHealth(HasHealthIF::NEEDS_RECOVERY);
|
||||
setFdirState(RECOVERY_ONGOING);
|
||||
}
|
||||
|
||||
ReturnValue_t DeviceHandlerFailureIsolation::getParameter(uint8_t domainId,
|
||||
uint16_t parameterId, ParameterWrapper* parameterWrapper,
|
||||
const ParameterWrapper* newValues, uint16_t startAtIndex) {
|
||||
ReturnValue_t result = strangeReplyCount.getParameter(domainId, parameterId,
|
||||
parameterWrapper, newValues, startAtIndex);
|
||||
if (result != INVALID_DOMAIN_ID) {
|
||||
return result;
|
||||
}
|
||||
result = missedReplyCount.getParameter(domainId, parameterId,
|
||||
parameterWrapper, newValues, startAtIndex);
|
||||
if (result != INVALID_DOMAIN_ID) {
|
||||
return result;
|
||||
}
|
||||
result = recoveryCounter.getParameter(domainId, parameterId,
|
||||
parameterWrapper, newValues, startAtIndex);
|
||||
if (result != INVALID_DOMAIN_ID) {
|
||||
return result;
|
||||
}
|
||||
return INVALID_DOMAIN_ID;
|
||||
}
|
||||
|
||||
void DeviceHandlerFailureIsolation::setFaulty(Event reason) {
|
||||
throwFdirEvent(FDIR_TURNS_OFF_DEVICE, EVENT::getEventId(reason));
|
||||
setOwnerHealth(HasHealthIF::FAULTY);
|
||||
setFdirState(AWAIT_SHUTDOWN);
|
||||
}
|
||||
|
||||
bool DeviceHandlerFailureIsolation::isFdirInActionOrAreWeFaulty(
|
||||
EventMessage* event) {
|
||||
if (fdirState != NONE) {
|
||||
//Only wait for those events, ignore all others.
|
||||
if (event->getParameter1() == HasHealthIF::HEALTHY
|
||||
&& event->getEvent() == HasHealthIF::HEALTH_INFO) {
|
||||
setFdirState(NONE);
|
||||
}
|
||||
if (event->getEvent() == HasModesIF::MODE_INFO
|
||||
&& fdirState != RECOVERY_ONGOING) {
|
||||
setFdirState(NONE);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (owner->getHealth() == HasHealthIF::FAULTY
|
||||
|| owner->getHealth() == HasHealthIF::PERMANENT_FAULTY) {
|
||||
//Ignore all events in case device is already faulty.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1,54 +1,53 @@
|
||||
#ifndef FRAMEWORK_DEVICEHANDLERS_DEVICEHANDLERFAILUREISOLATION_H_
|
||||
#define FRAMEWORK_DEVICEHANDLERS_DEVICEHANDLERFAILUREISOLATION_H_
|
||||
|
||||
#include "../fdir/FaultCounter.h"
|
||||
#include "../fdir/FailureIsolationBase.h"
|
||||
namespace Factory{
|
||||
void setStaticFrameworkObjectIds();
|
||||
}
|
||||
|
||||
|
||||
class DeviceHandlerFailureIsolation: public FailureIsolationBase {
|
||||
friend void (Factory::setStaticFrameworkObjectIds)();
|
||||
friend class Heater;
|
||||
public:
|
||||
DeviceHandlerFailureIsolation(object_id_t owner, object_id_t parent);
|
||||
~DeviceHandlerFailureIsolation();
|
||||
ReturnValue_t initialize();
|
||||
void triggerEvent(Event event, uint32_t parameter1 = 0,
|
||||
uint32_t parameter2 = 0);bool isFdirActionInProgress();
|
||||
virtual ReturnValue_t getParameter(uint8_t domainId, uint16_t parameterId,
|
||||
ParameterWrapper *parameterWrapper,
|
||||
const ParameterWrapper *newValues, uint16_t startAtIndex);
|
||||
protected:
|
||||
FaultCounter strangeReplyCount;
|
||||
FaultCounter missedReplyCount;
|
||||
FaultCounter recoveryCounter;
|
||||
enum FDIRState {
|
||||
NONE, RECOVERY_ONGOING, DEVICE_MIGHT_BE_OFF, AWAIT_SHUTDOWN
|
||||
};
|
||||
FDIRState fdirState;
|
||||
bool hasPowerConfirmation = false;
|
||||
MessageQueueId_t powerConfirmation;
|
||||
static object_id_t powerConfirmationId;
|
||||
// TODO: Are those hardcoded value? How can they be changed.
|
||||
static const uint32_t MAX_REBOOT = 1;
|
||||
static const uint32_t REBOOT_TIME_MS = 180000;
|
||||
static const uint32_t MAX_STRANGE_REPLIES = 10;
|
||||
static const uint32_t STRANGE_REPLIES_TIME_MS = 10000;
|
||||
static const uint32_t MAX_MISSED_REPLY_COUNT = 5;
|
||||
static const uint32_t MISSED_REPLY_TIME_MS = 10000;
|
||||
virtual ReturnValue_t eventReceived(EventMessage* event);
|
||||
virtual void eventConfirmed(EventMessage* event);
|
||||
void wasParentsFault(EventMessage* event);
|
||||
void decrementFaultCounters();
|
||||
void handleRecovery(Event reason);
|
||||
virtual void clearFaultCounters();
|
||||
void setFdirState(FDIRState state);
|
||||
void startRecovery(Event reason);
|
||||
void setFaulty(Event reason);
|
||||
|
||||
bool isFdirInActionOrAreWeFaulty(EventMessage* event);
|
||||
};
|
||||
|
||||
#endif /* FRAMEWORK_DEVICEHANDLERS_DEVICEHANDLERFAILUREISOLATION_H_ */
|
||||
#ifndef FRAMEWORK_DEVICEHANDLERS_DEVICEHANDLERFAILUREISOLATION_H_
|
||||
#define FRAMEWORK_DEVICEHANDLERS_DEVICEHANDLERFAILUREISOLATION_H_
|
||||
|
||||
#include "../fdir/FaultCounter.h"
|
||||
#include "../fdir/FailureIsolationBase.h"
|
||||
|
||||
namespace Factory{
|
||||
void setStaticFrameworkObjectIds();
|
||||
}
|
||||
|
||||
class DeviceHandlerFailureIsolation: public FailureIsolationBase {
|
||||
friend void (Factory::setStaticFrameworkObjectIds)();
|
||||
friend class Heater;
|
||||
public:
|
||||
DeviceHandlerFailureIsolation(object_id_t owner, object_id_t parent);
|
||||
~DeviceHandlerFailureIsolation();
|
||||
ReturnValue_t initialize();
|
||||
void triggerEvent(Event event, uint32_t parameter1 = 0,
|
||||
uint32_t parameter2 = 0);bool isFdirActionInProgress();
|
||||
virtual ReturnValue_t getParameter(uint8_t domainId, uint16_t parameterId,
|
||||
ParameterWrapper *parameterWrapper,
|
||||
const ParameterWrapper *newValues, uint16_t startAtIndex);
|
||||
protected:
|
||||
FaultCounter strangeReplyCount;
|
||||
FaultCounter missedReplyCount;
|
||||
FaultCounter recoveryCounter;
|
||||
enum FDIRState {
|
||||
NONE, RECOVERY_ONGOING, DEVICE_MIGHT_BE_OFF, AWAIT_SHUTDOWN
|
||||
};
|
||||
FDIRState fdirState;
|
||||
MessageQueueId_t powerConfirmation = MessageQueueIF::NO_QUEUE;
|
||||
static object_id_t powerConfirmationId;
|
||||
// TODO: Are those hardcoded value? How can they be changed.
|
||||
static const uint32_t MAX_REBOOT = 1;
|
||||
static const uint32_t REBOOT_TIME_MS = 180000;
|
||||
static const uint32_t MAX_STRANGE_REPLIES = 10;
|
||||
static const uint32_t STRANGE_REPLIES_TIME_MS = 10000;
|
||||
static const uint32_t MAX_MISSED_REPLY_COUNT = 5;
|
||||
static const uint32_t MISSED_REPLY_TIME_MS = 10000;
|
||||
virtual ReturnValue_t eventReceived(EventMessage* event);
|
||||
virtual void eventConfirmed(EventMessage* event);
|
||||
void wasParentsFault(EventMessage* event);
|
||||
void decrementFaultCounters();
|
||||
void handleRecovery(Event reason);
|
||||
virtual void clearFaultCounters();
|
||||
void setFdirState(FDIRState state);
|
||||
void startRecovery(Event reason);
|
||||
void setFaulty(Event reason);
|
||||
|
||||
bool isFdirInActionOrAreWeFaulty(EventMessage* event);
|
||||
};
|
||||
|
||||
#endif /* FRAMEWORK_DEVICEHANDLERS_DEVICEHANDLERFAILUREISOLATION_H_ */
|
||||
|
Loading…
x
Reference in New Issue
Block a user