Skip to content

Commit

Permalink
masternode|net|rpc: Improve masternode sync process (dashpay#3690)
Browse files Browse the repository at this point in the history
* masternode: Replace sync states INITIAL and WAITING with BLOCKCHAIN

* masternode: Peer dependent "assume tip" timeout

I would say its enough to only wait 1 tick if we have more than 3
peers before we move over to governance sync.

* masternode: Notify the UI instantly if switched to governance sync

Without this it takes one iteration more for the UI to receive the
update.

* masternode: Notify the UI about CMasternodeSync::Reset calls

* masternode: Don't instantly reset the sync process

Give it MASTERNODE_SYNC_RESET_SECONDS (600) seconds time after the last
UpdateBlockTip call.

* rpc: Don't switch to next asset in "mnsync reset"

* rpc: Force the reset in "mnsync reset"

* net: Make sure the sync gets a reset if required after network changes

This will reset the sync process if its outdated in the following cases:

- If the connections dropped to zero
- If the connections went from zero to one
- If the network has been enabled or disabled

* Apply suggestions from code review

Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>

* net: Only open masternode connections if the blockchain is synced

In general it doesn't make sense to connect to masternodes before due to
MNAUTH requires blockchain sync. This could lead to failing quorum
connections/failing masternode
probing.. if a just restarted node/a out of sync node
would hit a dkg block.. Then they would not try to open those
llmq/probing connections for the next 60s (nLLMQConnectionRetryTimeout).
Thats basically what happens in tests right now and they fail without
this commit.

* test: Make sure nodes are synced when they get restored after isolation

Their sync might be out of date otherwise due to bigger mocktime bumps

Co-authored-by: UdjinM6 <UdjinM6@users.noreply.github.com>
  • Loading branch information
xdustinface and UdjinM6 committed Sep 12, 2020
1 parent aa81a06 commit 2bb94c7
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 44 deletions.
67 changes: 31 additions & 36 deletions src/masternode/masternode-sync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@
class CMasternodeSync;
CMasternodeSync masternodeSync;

void CMasternodeSync::Reset()
void CMasternodeSync::Reset(bool fForce, bool fNotifyReset)
{
nCurrentAsset = MASTERNODE_SYNC_INITIAL;
nTriedPeerCount = 0;
nTimeAssetSyncStarted = GetTime();
nTimeLastBumped = GetTime();
// Avoid resetting the sync process if we just "recently" received a new block
if (fForce || (GetTime() - nTimeLastUpdateBlockTip > MASTERNODE_SYNC_RESET_SECONDS)) {
nCurrentAsset = MASTERNODE_SYNC_BLOCKCHAIN;
nTriedPeerCount = 0;
nTimeAssetSyncStarted = GetTime();
nTimeLastBumped = GetTime();
nTimeLastUpdateBlockTip = 0;
fReachedBestHeader = false;
if (fNotifyReset) {
uiInterface.NotifyAdditionalDataSyncProgressChanged(-1);
}
}
}

void CMasternodeSync::BumpAssetLastTime(const std::string& strFuncName)
Expand All @@ -35,8 +43,7 @@ std::string CMasternodeSync::GetAssetName()
{
switch(nCurrentAsset)
{
case(MASTERNODE_SYNC_INITIAL): return "MASTERNODE_SYNC_INITIAL";
case(MASTERNODE_SYNC_WAITING): return "MASTERNODE_SYNC_WAITING";
case(MASTERNODE_SYNC_BLOCKCHAIN): return "MASTERNODE_SYNC_BLOCKCHAIN";
case(MASTERNODE_SYNC_GOVERNANCE): return "MASTERNODE_SYNC_GOVERNANCE";
case MASTERNODE_SYNC_FINISHED: return "MASTERNODE_SYNC_FINISHED";
default: return "UNKNOWN";
Expand All @@ -47,11 +54,7 @@ void CMasternodeSync::SwitchToNextAsset(CConnman& connman)
{
switch(nCurrentAsset)
{
case(MASTERNODE_SYNC_INITIAL):
nCurrentAsset = MASTERNODE_SYNC_WAITING;
LogPrintf("CMasternodeSync::SwitchToNextAsset -- Starting %s\n", GetAssetName());
break;
case(MASTERNODE_SYNC_WAITING):
case(MASTERNODE_SYNC_BLOCKCHAIN):
LogPrintf("CMasternodeSync::SwitchToNextAsset -- Completed %s in %llds\n", GetAssetName(), GetTime() - nTimeAssetSyncStarted);
nCurrentAsset = MASTERNODE_SYNC_GOVERNANCE;
LogPrintf("CMasternodeSync::SwitchToNextAsset -- Starting %s\n", GetAssetName());
Expand All @@ -76,8 +79,7 @@ void CMasternodeSync::SwitchToNextAsset(CConnman& connman)
std::string CMasternodeSync::GetSyncStatus()
{
switch (masternodeSync.nCurrentAsset) {
case MASTERNODE_SYNC_INITIAL: return _("Synchronizing blockchain...");
case MASTERNODE_SYNC_WAITING: return _("Synchronization pending...");
case MASTERNODE_SYNC_BLOCKCHAIN: return _("Synchronizing blockchain...");
case MASTERNODE_SYNC_GOVERNANCE: return _("Synchronizing governance objects...");
case MASTERNODE_SYNC_FINISHED: return _("Synchronization finished");
default: return "";
Expand Down Expand Up @@ -111,8 +113,7 @@ void CMasternodeSync::ProcessTick(CConnman& connman)
static int64_t nTimeLastProcess = GetTime();
if(GetTime() - nTimeLastProcess > 60*60 && !fMasternodeMode) {
LogPrintf("CMasternodeSync::ProcessTick -- WARNING: no actions for too long, restarting sync...\n");
Reset();
SwitchToNextAsset(connman);
Reset(true);
nTimeLastProcess = GetTime();
return;
}
Expand Down Expand Up @@ -152,7 +153,7 @@ void CMasternodeSync::ProcessTick(CConnman& connman)
// QUICK MODE (REGTEST ONLY!)
if(Params().NetworkIDString() == CBaseChainParams::REGTEST)
{
if (nCurrentAsset == MASTERNODE_SYNC_WAITING) {
if (nCurrentAsset == MASTERNODE_SYNC_BLOCKCHAIN) {
connman.PushMessage(pnode, msgMaker.Make(NetMsgType::GETSPORKS)); //get current network sporks
SwitchToNextAsset(connman);
} else if (nCurrentAsset == MASTERNODE_SYNC_GOVERNANCE) {
Expand Down Expand Up @@ -189,25 +190,26 @@ void CMasternodeSync::ProcessTick(CConnman& connman)
LogPrintf("CMasternodeSync::ProcessTick -- nTick %d nCurrentAsset %d -- requesting sporks from peer=%d\n", nTick, nCurrentAsset, pnode->GetId());
}

// INITIAL TIMEOUT

if(nCurrentAsset == MASTERNODE_SYNC_WAITING) {
if (nCurrentAsset == MASTERNODE_SYNC_BLOCKCHAIN) {
if(pnode->nVersion >= 70216 && !pnode->fInbound && gArgs.GetBoolArg("-syncmempool", DEFAULT_SYNC_MEMPOOL) && !netfulfilledman.HasFulfilledRequest(pnode->addr, "mempool-sync")) {
netfulfilledman.AddFulfilledRequest(pnode->addr, "mempool-sync");
connman.PushMessage(pnode, msgMaker.Make(NetMsgType::MEMPOOL));
LogPrintf("CMasternodeSync::ProcessTick -- nTick %d nCurrentAsset %d -- syncing mempool from peer=%d\n", nTick, nCurrentAsset, pnode->GetId());
}

if(GetTime() - nTimeLastBumped > MASTERNODE_SYNC_TIMEOUT_SECONDS) {
int64_t nTimeSyncTimeout = vNodesCopy.size() > 3 ? MASTERNODE_SYNC_TICK_SECONDS : MASTERNODE_SYNC_TIMEOUT_SECONDS;
if (fReachedBestHeader && (GetTime() - nTimeLastBumped > nTimeSyncTimeout)) {
// At this point we know that:
// a) there are peers (because we are looping on at least one of them);
// b) we waited for at least MASTERNODE_SYNC_TIMEOUT_SECONDS since we reached
// the headers tip the last time (i.e. since we switched from
// MASTERNODE_SYNC_INITIAL to MASTERNODE_SYNC_WAITING and bumped time);
// b) we waited for at least MASTERNODE_SYNC_TICK_SECONDS/MASTERNODE_SYNC_TIMEOUT_SECONDS
// (depending on the number of connected peers) since we reached the headers tip the last
// time (i.e. since fReachedBestHeader has been set to true);
// c) there were no blocks (UpdatedBlockTip, NotifyHeaderTip) or headers (AcceptedBlockHeader)
// for at least MASTERNODE_SYNC_TIMEOUT_SECONDS.
// for at least MASTERNODE_SYNC_TICK_SECONDS/MASTERNODE_SYNC_TIMEOUT_SECONDS (depending on
// the number of connected peers).
// We must be at the tip already, let's move to the next asset.
SwitchToNextAsset(connman);
uiInterface.NotifyAdditionalDataSyncProgressChanged(nSyncProgress);
}
}

Expand Down Expand Up @@ -324,6 +326,8 @@ void CMasternodeSync::UpdatedBlockTip(const CBlockIndex *pindexNew, bool fInitia
{
LogPrint(BCLog::MNSYNC, "CMasternodeSync::UpdatedBlockTip -- pindexNew->nHeight: %d fInitialDownload=%d\n", pindexNew->nHeight, fInitialDownload);

nTimeLastUpdateBlockTip = GetAdjustedTime();

if (IsSynced() || !pindexBestHeader)
return;

Expand All @@ -335,36 +339,27 @@ void CMasternodeSync::UpdatedBlockTip(const CBlockIndex *pindexNew, bool fInitia
if (fInitialDownload) {
// switched too early
if (IsBlockchainSynced()) {
Reset();
Reset(true);
}

// no need to check any further while still in IBD mode
return;
}

// Note: since we sync headers first, it should be ok to use this
static bool fReachedBestHeader = false;
bool fReachedBestHeaderNew = pindexNew->GetBlockHash() == pindexBestHeader->GetBlockHash();

if (fReachedBestHeader && !fReachedBestHeaderNew) {
// Switching from true to false means that we previousely stuck syncing headers for some reason,
// probably initial timeout was not enough,
// because there is no way we can update tip not having best header
Reset();
fReachedBestHeader = false;
return;
Reset(true);
}

fReachedBestHeader = fReachedBestHeaderNew;

LogPrint(BCLog::MNSYNC, "CMasternodeSync::UpdatedBlockTip -- pindexNew->nHeight: %d pindexBestHeader->nHeight: %d fInitialDownload=%d fReachedBestHeader=%d\n",
pindexNew->nHeight, pindexBestHeader->nHeight, fInitialDownload, fReachedBestHeader);

if (!IsBlockchainSynced() && fReachedBestHeader) {
// Reached best header while being in initial mode.
// We must be at the tip already, let's move to the next asset.
SwitchToNextAsset(connman);
}
}

void CMasternodeSync::DoMaintenance(CConnman &connman)
Expand Down
15 changes: 10 additions & 5 deletions src/masternode/masternode-sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

class CMasternodeSync;

static const int MASTERNODE_SYNC_INITIAL = 0; // sync just started, was reset recently or still in IDB
static const int MASTERNODE_SYNC_WAITING = 1; // waiting after initial to see if we can get more headers/blocks
static const int MASTERNODE_SYNC_BLOCKCHAIN = 1;
static const int MASTERNODE_SYNC_GOVERNANCE = 4;
static const int MASTERNODE_SYNC_GOVOBJ = 10;
static const int MASTERNODE_SYNC_GOVOBJ_VOTE = 11;
static const int MASTERNODE_SYNC_FINISHED = 999;

static const int MASTERNODE_SYNC_TICK_SECONDS = 6;
static const int MASTERNODE_SYNC_TIMEOUT_SECONDS = 30; // our blocks are 2.5 minutes so 30 seconds should be fine
static const int MASTERNODE_SYNC_RESET_SECONDS = 600; // Reset fReachedBestHeader in CMasternodeSync::Reset if UpdateBlockTip hasn't been called for this seconds

extern CMasternodeSync masternodeSync;

Expand All @@ -38,13 +38,18 @@ class CMasternodeSync
// ... last bumped
int64_t nTimeLastBumped;

/// Set to true if best header is reached in CMasternodeSync::UpdatedBlockTip
bool fReachedBestHeader{false};
/// Last time UpdateBlockTip has been called
int64_t nTimeLastUpdateBlockTip{0};

public:
CMasternodeSync() { Reset(); }
CMasternodeSync() { Reset(true, false); }


void SendGovernanceSyncRequest(CNode* pnode, CConnman& connman);

bool IsBlockchainSynced() { return nCurrentAsset > MASTERNODE_SYNC_WAITING; }
bool IsBlockchainSynced() { return nCurrentAsset > MASTERNODE_SYNC_BLOCKCHAIN; }
bool IsSynced() { return nCurrentAsset == MASTERNODE_SYNC_FINISHED; }

int GetAssetID() { return nCurrentAsset; }
Expand All @@ -54,7 +59,7 @@ class CMasternodeSync
std::string GetAssetName();
std::string GetSyncStatus();

void Reset();
void Reset(bool fForce = false, bool fNotifyReset = true);
void SwitchToNextAsset(CConnman& connman);

void ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStream& vRecv);
Expand Down
13 changes: 12 additions & 1 deletion src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1392,6 +1392,13 @@ void CConnman::NotifyNumConnectionsChanged()
LOCK(cs_vNodes);
vNodesSize = vNodes.size();
}

// If we had zero connections before and new connections now or if we just dropped
// to zero connections reset the sync process if its outdated.
if ((vNodesSize > 0 && nPrevNodeCount == 0) || (vNodesSize == 0 && nPrevNodeCount > 0)) {
masternodeSync.Reset();
}

if(vNodesSize != nPrevNodeCount) {
nPrevNodeCount = vNodesSize;
if(clientInterface)
Expand Down Expand Up @@ -2496,7 +2503,7 @@ void CConnman::ThreadOpenMasternodeConnections()

didConnect = false;

if (!fNetworkActive)
if (!fNetworkActive || !masternodeSync.IsBlockchainSynced())
continue;

std::set<CService> connectedNodes;
Expand Down Expand Up @@ -2886,6 +2893,10 @@ void CConnman::SetNetworkActive(bool active)

fNetworkActive = active;

// Always call the Reset() if the network gets enabled/disabled to make sure the sync process
// gets a reset if its outdated..
masternodeSync.Reset();

uiInterface.NotifyNetworkActiveChanged(fNetworkActive);
}

Expand Down
3 changes: 1 addition & 2 deletions src/rpc/misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ UniValue mnsync(const JSONRPCRequest& request)

if(strMode == "reset")
{
masternodeSync.Reset();
masternodeSync.SwitchToNextAsset(*g_connman);
masternodeSync.Reset(true);
return "success";
}
return "failure";
Expand Down
1 change: 1 addition & 0 deletions test/functional/llmq-simplepose.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def repair_masternodes(self, restart):
mn.node.setnetworkactive(False)
wait_until(lambda: mn.node.getconnectioncount() == 0)
mn.node.setnetworkactive(True)
force_finish_mnsync(mn.node)
connect_nodes(mn.node, 0)

def reset_probe_timeouts(self):
Expand Down

0 comments on commit 2bb94c7

Please sign in to comment.