MPSoC Update Retry Logic #900
@ -16,6 +16,15 @@ will consitute of a breaking change warranting a new major release:
|
||||
|
||||
# [unreleased]
|
||||
|
||||
# [v8.1.1] 2024-06-05
|
||||
|
||||
## Added
|
||||
|
||||
- PLOC SUPV MPSoC update re-try logic for the `WRITE_MEMORY` command. These packets form > 98%
|
||||
of all packets required for a software update, but the update mechanism is not tolerant against
|
||||
occasional glitches on the RS485 communication to the PLOC SUPV. A simple re-try mechanism which
|
||||
tries to re-attempt packet handling up to three times for those packets is introduced.
|
||||
|
||||
# [v8.1.0] 2024-05-29
|
||||
|
||||
## Fixed
|
||||
|
@ -11,6 +11,8 @@
|
||||
#include <fstream>
|
||||
|
||||
#include "OBSWConfig.h"
|
||||
#include "fsfw/returnvalues/returnvalue.h"
|
||||
#include "linux/payload/plocSupvDefs.h"
|
||||
#include "tas/hdlc.h"
|
||||
#ifdef XIPHOS_Q7S
|
||||
#include "bsp_q7s/fs/FilesystemHelper.h"
|
||||
@ -21,9 +23,13 @@
|
||||
|
||||
#include "fsfw/tasks/TaskFactory.h"
|
||||
#include "fsfw/timemanager/Countdown.h"
|
||||
|
||||
#if OBSW_DEBUG_PLOC_SUPERVISOR == 1
|
||||
#include "mission/utility/Filenaming.h"
|
||||
#include "mission/utility/ProgressPrinter.h"
|
||||
#include "mission/utility/Timestamp.h"
|
||||
#endif
|
||||
|
||||
#include "tas/crc.h"
|
||||
|
||||
using namespace returnvalue;
|
||||
@ -277,23 +283,6 @@ ReturnValue_t PlocSupvUartManager::initiateUpdateContinuation() {
|
||||
return returnvalue::OK;
|
||||
}
|
||||
|
||||
// ReturnValue_t PlocSupvHelper::startEventBufferRequest(std::string path) {
|
||||
// #ifdef XIPHOS_Q7S
|
||||
// ReturnValue_t result = FilesystemHelper::checkPath(path);
|
||||
// if (result != returnvalue::OK) {
|
||||
// return result;
|
||||
// }
|
||||
// #endif
|
||||
// if (not std::filesystem::exists(path)) {
|
||||
// return PATH_NOT_EXISTS;
|
||||
// }
|
||||
// eventBufferReq.path = path;
|
||||
// request = Request::REQUEST_EVENT_BUFFER;
|
||||
// //uartComIF->flushUartTxAndRxBuf(comCookie);
|
||||
// semaphore->release();
|
||||
// return returnvalue::OK;
|
||||
// }
|
||||
|
||||
void PlocSupvUartManager::stop() {
|
||||
MutexGuard mg(lock);
|
||||
if (state == InternalState::SLEEPING or state == InternalState::GO_TO_SLEEP) {
|
||||
@ -449,10 +438,8 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() {
|
||||
update.bytesWritten);
|
||||
return result;
|
||||
}
|
||||
result = handlePacketTransmissionNoReply(packet, 5000);
|
||||
result = writeMemoryHandlingWithRetryLogic(packet, progPercent);
|
||||
if (result != returnvalue::OK) {
|
||||
triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount),
|
||||
update.bytesWritten);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -463,7 +450,25 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() {
|
||||
#if OBSW_DEBUG_PLOC_SUPERVISOR == 1
|
||||
progressPrinter.print(update.bytesWritten);
|
||||
#endif /* OBSW_DEBUG_PLOC_SUPERVISOR == 1 */
|
||||
// TaskFactory::delayTask(1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ReturnValue_t PlocSupvUartManager::writeMemoryHandlingWithRetryLogic(supv::WriteMemory& packet,
|
||||
unsigned progPercent) {
|
||||
ReturnValue_t result = returnvalue::OK;
|
||||
// Simple re-try logic in place to deal with communication unreliability in orbit.
|
||||
for (uint8_t retryCount = 0; retryCount < MAX_RETRY_COUNT; retryCount++) {
|
||||
result = handlePacketTransmissionNoReply(packet, COM_TIMEOUT_MS);
|
||||
if (result == returnvalue::OK) {
|
||||
return result;
|
||||
}
|
||||
triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount),
|
||||
update.bytesWritten);
|
||||
// Clear data structures related to reply handling.
|
||||
serial::flushTxRxBuf(serialPort);
|
||||
recRingBuf.clear();
|
||||
decodedRingBuf.clear();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -572,7 +577,16 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply(
|
||||
bool ackReceived = false;
|
||||
bool packetWasHandled = false;
|
||||
while (true) {
|
||||
handleUartReception();
|
||||
ReturnValue_t status = handleUartReception();
|
||||
if (status != returnvalue::OK) {
|
||||
|
||||
result = status;
|
||||
if (result == HDLC_ERROR) {
|
||||
// We could bail here immediately.. but I prefer to wait for the timeout, because we should
|
||||
// ensure that all packets which might be related to the transfer are still received and
|
||||
// cleared from all data structures related to reply handling.
|
||||
// return result;
|
||||
}
|
||||
}
|
||||
if (not decodedQueue.empty()) {
|
||||
size_t packetLen = 0;
|
||||
decodedQueue.retrieve(&packetLen);
|
||||
@ -615,7 +629,7 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply(
|
||||
return result::NO_REPLY_TIMEOUT;
|
||||
}
|
||||
}
|
||||
return returnvalue::OK;
|
||||
return result;
|
||||
}
|
||||
|
||||
int PlocSupvUartManager::handleAckReception(supv::TcBase& tc, size_t packetLen) {
|
||||
|
@ -118,6 +118,7 @@ class PlocSupvUartManager : public DeviceCommunicationIF,
|
||||
static constexpr Event HDLC_FRAME_REMOVAL_ERROR = MAKE_EVENT(31, severity::INFO);
|
||||
static constexpr Event HDLC_CRC_ERROR = MAKE_EVENT(32, severity::INFO);
|
||||
|
||||
static constexpr unsigned MAX_RETRY_COUNT = 3;
|
||||
PlocSupvUartManager(object_id_t objectId);
|
||||
virtual ~PlocSupvUartManager();
|
||||
/**
|
||||
@ -199,6 +200,8 @@ class PlocSupvUartManager : public DeviceCommunicationIF,
|
||||
static constexpr ReturnValue_t POSSIBLE_PACKET_LOSS_CONSECUTIVE_END = returnvalue::makeCode(1, 4);
|
||||
static constexpr ReturnValue_t HDLC_ERROR = returnvalue::makeCode(1, 5);
|
||||
|
||||
static constexpr uint32_t COM_TIMEOUT_MS = 3000;
|
||||
|
||||
static const uint16_t CRC16_INIT = 0xFFFF;
|
||||
// Event buffer reply will carry 24 space packets with 1016 bytes and one space packet with
|
||||
// 192 bytes
|
||||
@ -369,6 +372,8 @@ class PlocSupvUartManager : public DeviceCommunicationIF,
|
||||
*/
|
||||
ReturnValue_t requestReceiveMessage(CookieIF* cookie, size_t requestLen) override;
|
||||
|
||||
ReturnValue_t writeMemoryHandlingWithRetryLogic(supv::WriteMemory& packet, unsigned progPercent);
|
||||
|
||||
void performUartShutdown();
|
||||
void updateVtime(uint8_t vtime);
|
||||
};
|
||||
|
@ -1146,14 +1146,14 @@ class WriteMemory : public TcBase {
|
||||
: TcBase(params, Apid::MEM_MAN, static_cast<uint8_t>(tc::MemManId::WRITE), 1) {}
|
||||
|
||||
ReturnValue_t buildPacket(ccsds::SequenceFlags seqFlags, uint16_t sequenceCount, uint8_t memoryId,
|
||||
uint32_t startAddress, uint16_t length, uint8_t* updateData) {
|
||||
uint32_t currentAddr, uint16_t length, uint8_t* updateData) {
|
||||
if (length > CHUNK_MAX) {
|
||||
sif::error << "WriteMemory::WriteMemory: Invalid length" << std::endl;
|
||||
return SerializeIF::BUFFER_TOO_SHORT;
|
||||
}
|
||||
spParams.creator.setSeqFlags(seqFlags);
|
||||
spParams.creator.setSeqCount(sequenceCount);
|
||||
auto res = initPacket(memoryId, startAddress, length, updateData);
|
||||
auto res = initPacket(memoryId, currentAddr, length, updateData);
|
||||
if (res != returnvalue::OK) {
|
||||
return res;
|
||||
}
|
||||
@ -1171,7 +1171,7 @@ class WriteMemory : public TcBase {
|
||||
static const uint16_t META_DATA_LENGTH = 8;
|
||||
uint8_t n = 1;
|
||||
|
||||
ReturnValue_t initPacket(uint8_t memoryId, uint32_t startAddr, uint16_t updateDataLen,
|
||||
ReturnValue_t initPacket(uint8_t memoryId, uint32_t currentAddr, uint16_t updateDataLen,
|
||||
uint8_t* updateData) {
|
||||
uint8_t* data = payloadStart;
|
||||
if (updateDataLen % 2 != 0) {
|
||||
@ -1189,7 +1189,7 @@ class WriteMemory : public TcBase {
|
||||
SerializeIF::Endianness::BIG);
|
||||
SerializeAdapter::serialize(&n, &data, &serializedSize, spParams.maxSize,
|
||||
SerializeIF::Endianness::BIG);
|
||||
SerializeAdapter::serialize(&startAddr, &data, &serializedSize, spParams.maxSize,
|
||||
SerializeAdapter::serialize(¤tAddr, &data, &serializedSize, spParams.maxSize,
|
||||
SerializeIF::Endianness::BIG);
|
||||
SerializeAdapter::serialize(&updateDataLen, &data, &serializedSize, spParams.maxSize,
|
||||
SerializeIF::Endianness::BIG);
|
||||
|
Loading…
Reference in New Issue
Block a user
well this is useless