Merge pull request 'MPSoC Update Retry Logic' (#900) from supv-retry-logic into main
All checks were successful
EIVE/eive-obsw/pipeline/head This commit looks good

Reviewed-on: #900
Reviewed-by: Marius Eggert <eggertm@irs.uni-stuttgart.de>
This commit is contained in:
Robin Müller 2024-06-05 13:27:18 +02:00
commit a73c36c237
4 changed files with 55 additions and 27 deletions

View File

@ -16,6 +16,15 @@ will consitute of a breaking change warranting a new major release:
# [unreleased]
# [v8.1.1] 2024-06-05
## Added
- PLOC SUPV MPSoC update re-try logic for the `WRITE_MEMORY` command. These packets form > 98%
of all packets required for a software update, but the update mechanism is not tolerant against
occasional glitches on the RS485 communication to the PLOC SUPV. A simple re-try mechanism which
tries to re-attempt packet handling up to three times for those packets is introduced.
# [v8.1.0] 2024-05-29
## Fixed

View File

@ -11,6 +11,8 @@
#include <fstream>
#include "OBSWConfig.h"
#include "fsfw/returnvalues/returnvalue.h"
#include "linux/payload/plocSupvDefs.h"
#include "tas/hdlc.h"
#ifdef XIPHOS_Q7S
#include "bsp_q7s/fs/FilesystemHelper.h"
@ -21,9 +23,13 @@
#include "fsfw/tasks/TaskFactory.h"
#include "fsfw/timemanager/Countdown.h"
#if OBSW_DEBUG_PLOC_SUPERVISOR == 1
#include "mission/utility/Filenaming.h"
#include "mission/utility/ProgressPrinter.h"
#include "mission/utility/Timestamp.h"
#endif
#include "tas/crc.h"
using namespace returnvalue;
@ -277,23 +283,6 @@ ReturnValue_t PlocSupvUartManager::initiateUpdateContinuation() {
return returnvalue::OK;
}
// ReturnValue_t PlocSupvHelper::startEventBufferRequest(std::string path) {
// #ifdef XIPHOS_Q7S
// ReturnValue_t result = FilesystemHelper::checkPath(path);
// if (result != returnvalue::OK) {
// return result;
// }
// #endif
// if (not std::filesystem::exists(path)) {
// return PATH_NOT_EXISTS;
// }
// eventBufferReq.path = path;
// request = Request::REQUEST_EVENT_BUFFER;
// //uartComIF->flushUartTxAndRxBuf(comCookie);
// semaphore->release();
// return returnvalue::OK;
// }
void PlocSupvUartManager::stop() {
MutexGuard mg(lock);
if (state == InternalState::SLEEPING or state == InternalState::GO_TO_SLEEP) {
@ -449,10 +438,8 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() {
update.bytesWritten);
return result;
}
result = handlePacketTransmissionNoReply(packet, 5000);
result = writeMemoryHandlingWithRetryLogic(packet, progPercent);
if (result != returnvalue::OK) {
triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount),
update.bytesWritten);
return result;
}
@ -463,7 +450,25 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() {
#if OBSW_DEBUG_PLOC_SUPERVISOR == 1
progressPrinter.print(update.bytesWritten);
#endif /* OBSW_DEBUG_PLOC_SUPERVISOR == 1 */
// TaskFactory::delayTask(1);
}
return result;
}
ReturnValue_t PlocSupvUartManager::writeMemoryHandlingWithRetryLogic(supv::WriteMemory& packet,
unsigned progPercent) {
ReturnValue_t result = returnvalue::OK;
// Simple re-try logic in place to deal with communication unreliability in orbit.
for (uint8_t retryCount = 0; retryCount < MAX_RETRY_COUNT; retryCount++) {
result = handlePacketTransmissionNoReply(packet, COM_TIMEOUT_MS);
if (result == returnvalue::OK) {
return result;
}
triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount),
update.bytesWritten);
// Clear data structures related to reply handling.
serial::flushTxRxBuf(serialPort);
recRingBuf.clear();
decodedRingBuf.clear();
}
return result;
}
@ -572,7 +577,16 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply(
bool ackReceived = false;
bool packetWasHandled = false;
while (true) {
handleUartReception();
ReturnValue_t status = handleUartReception();
if (status != returnvalue::OK) {
result = status;
if (result == HDLC_ERROR) {
// We could bail here immediately.. but I prefer to wait for the timeout, because we should
// ensure that all packets which might be related to the transfer are still received and
// cleared from all data structures related to reply handling.
// return result;
}
}
if (not decodedQueue.empty()) {
size_t packetLen = 0;
decodedQueue.retrieve(&packetLen);
@ -615,7 +629,7 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply(
return result::NO_REPLY_TIMEOUT;
}
}
return returnvalue::OK;
return result;
}
int PlocSupvUartManager::handleAckReception(supv::TcBase& tc, size_t packetLen) {

View File

@ -118,6 +118,7 @@ class PlocSupvUartManager : public DeviceCommunicationIF,
static constexpr Event HDLC_FRAME_REMOVAL_ERROR = MAKE_EVENT(31, severity::INFO);
static constexpr Event HDLC_CRC_ERROR = MAKE_EVENT(32, severity::INFO);
static constexpr unsigned MAX_RETRY_COUNT = 3;
PlocSupvUartManager(object_id_t objectId);
virtual ~PlocSupvUartManager();
/**
@ -199,6 +200,8 @@ class PlocSupvUartManager : public DeviceCommunicationIF,
static constexpr ReturnValue_t POSSIBLE_PACKET_LOSS_CONSECUTIVE_END = returnvalue::makeCode(1, 4);
static constexpr ReturnValue_t HDLC_ERROR = returnvalue::makeCode(1, 5);
static constexpr uint32_t COM_TIMEOUT_MS = 3000;
static const uint16_t CRC16_INIT = 0xFFFF;
// Event buffer reply will carry 24 space packets with 1016 bytes and one space packet with
// 192 bytes
@ -369,6 +372,8 @@ class PlocSupvUartManager : public DeviceCommunicationIF,
*/
ReturnValue_t requestReceiveMessage(CookieIF* cookie, size_t requestLen) override;
ReturnValue_t writeMemoryHandlingWithRetryLogic(supv::WriteMemory& packet, unsigned progPercent);
void performUartShutdown();
void updateVtime(uint8_t vtime);
};

View File

@ -1146,14 +1146,14 @@ class WriteMemory : public TcBase {
: TcBase(params, Apid::MEM_MAN, static_cast<uint8_t>(tc::MemManId::WRITE), 1) {}
ReturnValue_t buildPacket(ccsds::SequenceFlags seqFlags, uint16_t sequenceCount, uint8_t memoryId,
uint32_t startAddress, uint16_t length, uint8_t* updateData) {
uint32_t currentAddr, uint16_t length, uint8_t* updateData) {
if (length > CHUNK_MAX) {
sif::error << "WriteMemory::WriteMemory: Invalid length" << std::endl;
return SerializeIF::BUFFER_TOO_SHORT;
}
spParams.creator.setSeqFlags(seqFlags);
spParams.creator.setSeqCount(sequenceCount);
auto res = initPacket(memoryId, startAddress, length, updateData);
auto res = initPacket(memoryId, currentAddr, length, updateData);
if (res != returnvalue::OK) {
return res;
}
@ -1171,7 +1171,7 @@ class WriteMemory : public TcBase {
static const uint16_t META_DATA_LENGTH = 8;
uint8_t n = 1;
ReturnValue_t initPacket(uint8_t memoryId, uint32_t startAddr, uint16_t updateDataLen,
ReturnValue_t initPacket(uint8_t memoryId, uint32_t currentAddr, uint16_t updateDataLen,
uint8_t* updateData) {
uint8_t* data = payloadStart;
if (updateDataLen % 2 != 0) {
@ -1189,7 +1189,7 @@ class WriteMemory : public TcBase {
SerializeIF::Endianness::BIG);
SerializeAdapter::serialize(&n, &data, &serializedSize, spParams.maxSize,
SerializeIF::Endianness::BIG);
SerializeAdapter::serialize(&startAddr, &data, &serializedSize, spParams.maxSize,
SerializeAdapter::serialize(&currentAddr, &data, &serializedSize, spParams.maxSize,
SerializeIF::Endianness::BIG);
SerializeAdapter::serialize(&updateDataLen, &data, &serializedSize, spParams.maxSize,
SerializeIF::Endianness::BIG);