From e43a86432b5604db63794c5f2d8def867275bf47 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Tue, 4 Jun 2024 15:10:54 +0200 Subject: [PATCH 1/3] start adding re-try logic for SUPV --- linux/payload/PlocSupvUartMan.cpp | 16 +++++++++++++--- linux/payload/PlocSupvUartMan.h | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/linux/payload/PlocSupvUartMan.cpp b/linux/payload/PlocSupvUartMan.cpp index 8009e89e..4f05c762 100644 --- a/linux/payload/PlocSupvUartMan.cpp +++ b/linux/payload/PlocSupvUartMan.cpp @@ -11,6 +11,7 @@ #include #include "OBSWConfig.h" +#include "fsfw/returnvalues/returnvalue.h" #include "tas/hdlc.h" #ifdef XIPHOS_Q7S #include "bsp_q7s/fs/FilesystemHelper.h" @@ -449,10 +450,11 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() { update.bytesWritten); return result; } - result = handlePacketTransmissionNoReply(packet, 5000); + result = handlePacketTransmissionNoReply(packet, COM_TIMEOUT_MS); if (result != returnvalue::OK) { triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount), update.bytesWritten); + // TODO: Retry logic, re-try up to three times. return result; } @@ -572,7 +574,15 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply( bool ackReceived = false; bool packetWasHandled = false; while (true) { - handleUartReception(); + ReturnValue_t status = handleUartReception(); + if (status != returnvalue::OK) { + result = status; + // We probably missed a packet or the communication was temporarily corrupted.. + // It probably would be good to bail immediately for re-try logic. + if (result == HDLC_ERROR) { + return result; + } + } if (not decodedQueue.empty()) { size_t packetLen = 0; decodedQueue.retrieve(&packetLen); @@ -615,7 +625,7 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply( return result::NO_REPLY_TIMEOUT; } } - return returnvalue::OK; + return result; } int PlocSupvUartManager::handleAckReception(supv::TcBase& tc, size_t packetLen) { diff --git a/linux/payload/PlocSupvUartMan.h b/linux/payload/PlocSupvUartMan.h index 562bd3bd..7397729b 100644 --- a/linux/payload/PlocSupvUartMan.h +++ b/linux/payload/PlocSupvUartMan.h @@ -199,6 +199,8 @@ class PlocSupvUartManager : public DeviceCommunicationIF, static constexpr ReturnValue_t POSSIBLE_PACKET_LOSS_CONSECUTIVE_END = returnvalue::makeCode(1, 4); static constexpr ReturnValue_t HDLC_ERROR = returnvalue::makeCode(1, 5); + static constexpr uint32_t COM_TIMEOUT_MS = 3000; + static const uint16_t CRC16_INIT = 0xFFFF; // Event buffer reply will carry 24 space packets with 1016 bytes and one space packet with // 192 bytes From 1f8dc67922c44c650c75b6e0b96639e34c411fe9 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Wed, 5 Jun 2024 12:36:31 +0200 Subject: [PATCH 2/3] some minor improvements --- linux/payload/PlocSupvUartMan.cpp | 44 +++++++++++++++---------------- linux/payload/PlocSupvUartMan.h | 3 +++ linux/payload/plocSupvDefs.h | 8 +++--- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/linux/payload/PlocSupvUartMan.cpp b/linux/payload/PlocSupvUartMan.cpp index 4f05c762..f785ee57 100644 --- a/linux/payload/PlocSupvUartMan.cpp +++ b/linux/payload/PlocSupvUartMan.cpp @@ -12,6 +12,7 @@ #include "OBSWConfig.h" #include "fsfw/returnvalues/returnvalue.h" +#include "linux/payload/plocSupvDefs.h" #include "tas/hdlc.h" #ifdef XIPHOS_Q7S #include "bsp_q7s/fs/FilesystemHelper.h" @@ -22,9 +23,13 @@ #include "fsfw/tasks/TaskFactory.h" #include "fsfw/timemanager/Countdown.h" + +#if OBSW_DEBUG_PLOC_SUPERVISOR == 1 #include "mission/utility/Filenaming.h" #include "mission/utility/ProgressPrinter.h" #include "mission/utility/Timestamp.h" +#endif + #include "tas/crc.h" using namespace returnvalue; @@ -278,23 +283,6 @@ ReturnValue_t PlocSupvUartManager::initiateUpdateContinuation() { return returnvalue::OK; } -// ReturnValue_t PlocSupvHelper::startEventBufferRequest(std::string path) { -// #ifdef XIPHOS_Q7S -// ReturnValue_t result = FilesystemHelper::checkPath(path); -// if (result != returnvalue::OK) { -// return result; -// } -// #endif -// if (not std::filesystem::exists(path)) { -// return PATH_NOT_EXISTS; -// } -// eventBufferReq.path = path; -// request = Request::REQUEST_EVENT_BUFFER; -// //uartComIF->flushUartTxAndRxBuf(comCookie); -// semaphore->release(); -// return returnvalue::OK; -// } - void PlocSupvUartManager::stop() { MutexGuard mg(lock); if (state == InternalState::SLEEPING or state == InternalState::GO_TO_SLEEP) { @@ -450,11 +438,8 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() { update.bytesWritten); return result; } - result = handlePacketTransmissionNoReply(packet, COM_TIMEOUT_MS); + result = writeMemoryHandlingWithRetryLogic(packet, progPercent); if (result != returnvalue::OK) { - triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount), - update.bytesWritten); - // TODO: Retry logic, re-try up to three times. return result; } @@ -465,7 +450,22 @@ ReturnValue_t PlocSupvUartManager::writeUpdatePackets() { #if OBSW_DEBUG_PLOC_SUPERVISOR == 1 progressPrinter.print(update.bytesWritten); #endif /* OBSW_DEBUG_PLOC_SUPERVISOR == 1 */ - // TaskFactory::delayTask(1); + } + return result; +} + +ReturnValue_t PlocSupvUartManager::writeMemoryHandlingWithRetryLogic(supv::WriteMemory& packet, + unsigned progPercent) { + ReturnValue_t result = returnvalue::OK; + // Simple re-try logic in place to deal with communication unreliability in orbit. + for (uint8_t retryCount = 0; retryCount < MAX_RETRY_COUNT; retryCount++) { + result = handlePacketTransmissionNoReply(packet, COM_TIMEOUT_MS); + if (result == returnvalue::OK) { + return result; + } + triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount), + update.bytesWritten); + serial::flushTxRxBuf(serialPort); } return result; } diff --git a/linux/payload/PlocSupvUartMan.h b/linux/payload/PlocSupvUartMan.h index 7397729b..1cea2002 100644 --- a/linux/payload/PlocSupvUartMan.h +++ b/linux/payload/PlocSupvUartMan.h @@ -118,6 +118,7 @@ class PlocSupvUartManager : public DeviceCommunicationIF, static constexpr Event HDLC_FRAME_REMOVAL_ERROR = MAKE_EVENT(31, severity::INFO); static constexpr Event HDLC_CRC_ERROR = MAKE_EVENT(32, severity::INFO); + static constexpr unsigned MAX_RETRY_COUNT = 3; PlocSupvUartManager(object_id_t objectId); virtual ~PlocSupvUartManager(); /** @@ -371,6 +372,8 @@ class PlocSupvUartManager : public DeviceCommunicationIF, */ ReturnValue_t requestReceiveMessage(CookieIF* cookie, size_t requestLen) override; + ReturnValue_t writeMemoryHandlingWithRetryLogic(supv::WriteMemory& packet, unsigned progPercent); + void performUartShutdown(); void updateVtime(uint8_t vtime); }; diff --git a/linux/payload/plocSupvDefs.h b/linux/payload/plocSupvDefs.h index e6536713..340da81d 100644 --- a/linux/payload/plocSupvDefs.h +++ b/linux/payload/plocSupvDefs.h @@ -1146,14 +1146,14 @@ class WriteMemory : public TcBase { : TcBase(params, Apid::MEM_MAN, static_cast(tc::MemManId::WRITE), 1) {} ReturnValue_t buildPacket(ccsds::SequenceFlags seqFlags, uint16_t sequenceCount, uint8_t memoryId, - uint32_t startAddress, uint16_t length, uint8_t* updateData) { + uint32_t currentAddr, uint16_t length, uint8_t* updateData) { if (length > CHUNK_MAX) { sif::error << "WriteMemory::WriteMemory: Invalid length" << std::endl; return SerializeIF::BUFFER_TOO_SHORT; } spParams.creator.setSeqFlags(seqFlags); spParams.creator.setSeqCount(sequenceCount); - auto res = initPacket(memoryId, startAddress, length, updateData); + auto res = initPacket(memoryId, currentAddr, length, updateData); if (res != returnvalue::OK) { return res; } @@ -1171,7 +1171,7 @@ class WriteMemory : public TcBase { static const uint16_t META_DATA_LENGTH = 8; uint8_t n = 1; - ReturnValue_t initPacket(uint8_t memoryId, uint32_t startAddr, uint16_t updateDataLen, + ReturnValue_t initPacket(uint8_t memoryId, uint32_t currentAddr, uint16_t updateDataLen, uint8_t* updateData) { uint8_t* data = payloadStart; if (updateDataLen % 2 != 0) { @@ -1189,7 +1189,7 @@ class WriteMemory : public TcBase { SerializeIF::Endianness::BIG); SerializeAdapter::serialize(&n, &data, &serializedSize, spParams.maxSize, SerializeIF::Endianness::BIG); - SerializeAdapter::serialize(&startAddr, &data, &serializedSize, spParams.maxSize, + SerializeAdapter::serialize(¤tAddr, &data, &serializedSize, spParams.maxSize, SerializeIF::Endianness::BIG); SerializeAdapter::serialize(&updateDataLen, &data, &serializedSize, spParams.maxSize, SerializeIF::Endianness::BIG); From 5dd0c2a5cbf581747fa5f4b55e4b64a2e2d97715 Mon Sep 17 00:00:00 2001 From: Robin Mueller Date: Wed, 5 Jun 2024 12:47:17 +0200 Subject: [PATCH 3/3] let's not do that for now.. --- CHANGELOG.md | 9 +++++++++ linux/payload/PlocSupvUartMan.cpp | 10 +++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 784ec061..8f92ffec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,15 @@ will consitute of a breaking change warranting a new major release: # [unreleased] +# [v8.1.1] 2024-06-05 + +## Added + +- PLOC SUPV MPSoC update re-try logic for the `WRITE_MEMORY` command. These packets form > 98% + of all packets required for a software update, but the update mechanism is not tolerant against + occasional glitches on the RS485 communication to the PLOC SUPV. A simple re-try mechanism which + tries to re-attempt packet handling up to three times for those packets is introduced. + # [v8.1.0] 2024-05-29 ## Fixed diff --git a/linux/payload/PlocSupvUartMan.cpp b/linux/payload/PlocSupvUartMan.cpp index f785ee57..cb93b4ec 100644 --- a/linux/payload/PlocSupvUartMan.cpp +++ b/linux/payload/PlocSupvUartMan.cpp @@ -465,7 +465,10 @@ ReturnValue_t PlocSupvUartManager::writeMemoryHandlingWithRetryLogic(supv::Write } triggerEvent(WRITE_MEMORY_FAILED, buildProgParams1(progPercent, update.sequenceCount), update.bytesWritten); + // Clear data structures related to reply handling. serial::flushTxRxBuf(serialPort); + recRingBuf.clear(); + decodedRingBuf.clear(); } return result; } @@ -577,10 +580,11 @@ ReturnValue_t PlocSupvUartManager::handlePacketTransmissionNoReply( ReturnValue_t status = handleUartReception(); if (status != returnvalue::OK) { result = status; - // We probably missed a packet or the communication was temporarily corrupted.. - // It probably would be good to bail immediately for re-try logic. if (result == HDLC_ERROR) { - return result; + // We could bail here immediately.. but I prefer to wait for the timeout, because we should + // ensure that all packets which might be related to the transfer are still received and + // cleared from all data structures related to reply handling. + // return result; } } if (not decodedQueue.empty()) {