Permalink
Browse files

Reduce granularity of failure detector locking and do not destroy all…

… enqueued requests upon setUnavailable.

src/java/voldemort/cluster/failuredetector/ThresholdFailureDetector.java
- Reduce amount of work done within synchronized section to reduce lock granularity and so ensure "side effects" of node being marked (un)available are not w/in sync section.

src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorPool.java
- Added TODO/comment to decide whether we want to actively destroy all connections upon node being marked unavailable
- Switched behavior to lazily destroying connections.
  • Loading branch information...
1 parent 3aaabcc commit 551b96df105d6babbe0d393aaccf19b561f8c242 @jayjwylie jayjwylie committed Nov 28, 2012
@@ -145,6 +145,10 @@ protected void update(Node node, int successDelta, UnreachableStoreException e)
String catastrophicError = getCatastrophicError(e);
NodeStatus nodeStatus = getNodeStatus(node);
+ boolean invokeSetAvailable = false;
+ boolean invokeSetUnavailable = false;
+ // Protect all logic to decide on available/unavailable w/in
+ // synchronized section
synchronized(nodeStatus) {
if(currentTime >= nodeStatus.getStartMillis() + getConfig().getThresholdInterval()) {
// We've passed into a new interval, so reset our counts
@@ -165,20 +169,28 @@ protected void update(Node node, int successDelta, UnreachableStoreException e)
logger.trace("Node " + node.getId() + " experienced catastrophic error: "
+ catastrophicError);
- setUnavailable(node, e);
+ invokeSetUnavailable = true;
} else if(nodeStatus.getFailure() >= getConfig().getThresholdCountMinimum()) {
long percentage = (nodeStatus.getSuccess() * 100) / nodeStatus.getTotal();
if(logger.isTraceEnabled())
logger.trace("Node " + node.getId() + " percentage: " + percentage + "%");
if(percentage >= getConfig().getThreshold())
- setAvailable(node);
+ invokeSetAvailable = true;
else
- setUnavailable(node, e);
+ invokeSetUnavailable = true;
}
}
}
+ // Actually call set(Un)Available outside of synchronized section. This
+ // ensures that side effects are not w/in a sync section (e.g., alerting
+ // all the failure detector listeners).
+ if(invokeSetAvailable) {
+ setAvailable(node);
+ } else if(invokeSetUnavailable) {
+ setUnavailable(node, e);
+ }
}
protected String getCatastrophicError(UnreachableStoreException e) {
@@ -55,7 +55,6 @@
* Upon successful construction of this object, a new Thread is started. It is
* terminated upon calling {@link #close()}.
*/
-
public class ClientRequestExecutorPool implements SocketStoreFactory {
private final QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor> queuedPool;
@@ -204,7 +203,12 @@ public void checkin(SocketDestination destination, ClientRequestExecutor clientR
@Override
public void close(SocketDestination destination) {
factory.setLastClosedTimestamp(destination);
- queuedPool.reset(destination);
+ // TODO: Lazily destroy connections instead of actively destroying
+ // connections. Commenting out the next line changes to the lazy
+ // behavior. The other option is to change this behavior w/in
+ // QueuedKeyedResourcePool (i.e., instead of over-riding reset, just let
+ // KeyedResourcePool.reset be invoked directly).
+ // queuedPool.reset(destination);
}
/**

0 comments on commit 551b96d

Please sign in to comment.