Skip to content

Commit

Permalink
Merge pull request #9813 from vespa-engine/freva/cfg-client-exception
Browse files Browse the repository at this point in the history
Node-Admin: cfg client exceptions
  • Loading branch information
freva committed Jun 16, 2019
2 parents 31280ec + 9c0424d commit c506af9
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 177 deletions.
Expand Up @@ -7,7 +7,6 @@
import com.yahoo.vespa.athenz.identity.ServiceIdentitySslSocketFactory;
import com.yahoo.vespa.athenz.identity.SiaIdentityProvider;
import com.yahoo.vespa.hosted.node.admin.component.ConfigServerInfo;
import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger;
import org.apache.http.HttpHeaders;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
Expand Down Expand Up @@ -38,6 +37,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.logging.Logger;

/**
* Retries request on config server a few times before giving up. Assumes that all requests should be sent with
Expand All @@ -47,7 +47,7 @@
* @author bjorncs
*/
public class ConfigServerApiImpl implements ConfigServerApi {
private static final PrefixLogger NODE_ADMIN_LOGGER = PrefixLogger.getNodeAdminLogger(ConfigServerApiImpl.class);
private static final Logger logger = Logger.getLogger(ConfigServerApiImpl.class.getName());

private final ObjectMapper mapper = new ObjectMapper();

Expand Down Expand Up @@ -106,7 +106,7 @@ private <T> T tryAllConfigServers(CreateRequest requestFactory, Class<T> wantedR
try {
return mapper.readValue(response.getEntity().getContent(), wantedReturnType);
} catch (IOException e) {
throw new RuntimeException("Failed parse response from config server", e);
throw new UncheckedIOException("Failed parse response from config server", e);
}
} catch (HttpException e) {
if (!e.isRetryable()) throw e;
Expand All @@ -117,15 +117,15 @@ private <T> T tryAllConfigServers(CreateRequest requestFactory, Class<T> wantedR

// Failure to communicate with a config server is not abnormal during upgrades
if (e.getMessage().contains("(Connection refused)")) {
NODE_ADMIN_LOGGER.info("Connection refused to " + configServer + " (upgrading?), will try next");
logger.info("Connection refused to " + configServer + " (upgrading?), will try next");
} else {
NODE_ADMIN_LOGGER.warning("Failed to communicate with " + configServer + ", will try next: " + e.getMessage());
logger.warning("Failed to communicate with " + configServer + ", will try next: " + e.getMessage());
}
}
}

throw new RuntimeException("All requests against the config servers ("
+ configServers + ") failed, last as follows:", lastException);
throw HttpException.handleException(
"All requests against the config servers (" + configServers + ") failed, last as follows:", lastException);
}

@Override
Expand Down
@@ -1,13 +1,19 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.configserver;

import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;
import org.apache.http.NoHttpResponseException;

import javax.ws.rs.core.Response;
import java.io.EOFException;
import java.net.SocketException;
import java.net.SocketTimeoutException;

/**
* @author hakonhall
*/
@SuppressWarnings("serial")
public class HttpException extends RuntimeException {
public class HttpException extends ConvergenceException {

private final boolean isRetryable;

Expand All @@ -21,7 +27,12 @@ private HttpException(Response.Status status, String message, boolean isRetryabl
this.isRetryable = isRetryable;
}

public boolean isRetryable() {
private HttpException(String message) {
super(message);
this.isRetryable = false;
}

boolean isRetryable() {
return isRetryable;
}

Expand Down Expand Up @@ -55,6 +66,22 @@ static void handleStatusCode(int statusCode, String message) {
throw new HttpException(status, message, true);
}

/**
* Returns {@link HttpException} if the given Throwable is of a known and well understood error or
* a RuntimeException with the given exception as cause otherwise.
*/
public static RuntimeException handleException(String prefix, Throwable t) {
for (; t != null; t = t.getCause()) {
if (t instanceof SocketException ||
t instanceof SocketTimeoutException ||
t instanceof NoHttpResponseException ||
t instanceof EOFException)
return new HttpException(prefix + t.getMessage());
}

return new RuntimeException(prefix, t);
}

public static class NotFoundException extends HttpException {
public NotFoundException(String message) {
super(Response.Status.NOT_FOUND, message, false);
Expand Down
@@ -1,7 +1,9 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.configserver.noderepository;

public class NodeRepositoryException extends RuntimeException {
import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;

public class NodeRepositoryException extends ConvergenceException {
public NodeRepositoryException(String message) {
super(message);
}
Expand Down
Expand Up @@ -91,6 +91,13 @@ public NodeSpec(
Set<String> additionalIpAddresses,
NodeReports reports,
Optional<String> parentHostname) {
if (state == NodeState.active) {
Objects.requireNonNull(wantedVespaVersion, "Unknown vespa version for active node");
Objects.requireNonNull(wantedDockerImage, "Unknown docker image for active node");
Objects.requireNonNull(wantedRestartGeneration, "Unknown restartGeneration for active node");
Objects.requireNonNull(currentRestartGeneration, "Unknown currentRestartGeneration for active node");
}

this.hostname = Objects.requireNonNull(hostname);
this.wantedDockerImage = Objects.requireNonNull(wantedDockerImage);
this.currentDockerImage = Objects.requireNonNull(currentDockerImage);
Expand Down
Expand Up @@ -12,25 +12,24 @@
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.GetNodesResponse;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.NodeMessageResponse;
import com.yahoo.vespa.hosted.node.admin.configserver.noderepository.bindings.NodeRepositoryNode;
import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger;

import java.time.Instant;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* @author stiankri, dybis
*/
public class RealNodeRepository implements NodeRepository {
private static final PrefixLogger NODE_ADMIN_LOGGER = PrefixLogger.getNodeAdminLogger(RealNodeRepository.class);
private static final Logger logger = Logger.getLogger(RealNodeRepository.class.getName());

private final ConfigServerApi configServerApi;

Expand All @@ -46,7 +45,7 @@ public void addNodes(List<AddNode> nodes) {

NodeMessageResponse response = configServerApi.post("/nodes/v2/node", nodesToPost, NodeMessageResponse.class);
if (Strings.isNullOrEmpty(response.errorCode)) return;
throw new NodeRepositoryException("Failed to add nodes to node-repo: " + response.message + " " + response.errorCode);
throw new NodeRepositoryException("Failed to add nodes: " + response.message + " " + response.errorCode);
}

@Override
Expand Down Expand Up @@ -80,43 +79,37 @@ public Optional<NodeSpec> getOptionalNode(String hostName) {
*/
@Override
public Map<String, Acl> getAcls(String hostName) {
try {
String path = String.format("/nodes/v2/acl/%s?children=true", hostName);
GetAclResponse response = configServerApi.get(path, GetAclResponse.class);

// Group ports by container hostname that trusts them
Map<String, Set<Integer>> trustedPorts = response.trustedPorts.stream()
.collect(Collectors.groupingBy(
GetAclResponse.Port::getTrustedBy,
Collectors.mapping(port -> port.port, Collectors.toSet())));

// Group node ip-addresses by container hostname that trusts them
Map<String, Set<Acl.Node>> trustedNodes = response.trustedNodes.stream()
.collect(Collectors.groupingBy(
GetAclResponse.Node::getTrustedBy,
Collectors.mapping(
node -> new Acl.Node(node.hostname, node.ipAddress),
Collectors.toSet())));

// Group trusted networks by container hostname that trusts them
Map<String, Set<String>> trustedNetworks = response.trustedNetworks.stream()
.collect(Collectors.groupingBy(GetAclResponse.Network::getTrustedBy,
Collectors.mapping(node -> node.network, Collectors.toSet())));


// For each hostname create an ACL
return Stream.of(trustedNodes.keySet(), trustedPorts.keySet(), trustedNetworks.keySet())
.flatMap(Set::stream)
.distinct()
.collect(Collectors.toMap(
Function.identity(),
hostname -> new Acl(trustedPorts.get(hostname), trustedNodes.get(hostname),
trustedNetworks.get(hostname))));
} catch (HttpException.NotFoundException e) {
NODE_ADMIN_LOGGER.warning("Failed to fetch ACLs for " + hostName + " No ACL will be applied");
}

return Collections.emptyMap();
String path = String.format("/nodes/v2/acl/%s?children=true", hostName);
GetAclResponse response = configServerApi.get(path, GetAclResponse.class);

// Group ports by container hostname that trusts them
Map<String, Set<Integer>> trustedPorts = response.trustedPorts.stream()
.collect(Collectors.groupingBy(
GetAclResponse.Port::getTrustedBy,
Collectors.mapping(port -> port.port, Collectors.toSet())));

// Group node ip-addresses by container hostname that trusts them
Map<String, Set<Acl.Node>> trustedNodes = response.trustedNodes.stream()
.collect(Collectors.groupingBy(
GetAclResponse.Node::getTrustedBy,
Collectors.mapping(
node -> new Acl.Node(node.hostname, node.ipAddress),
Collectors.toSet())));

// Group trusted networks by container hostname that trusts them
Map<String, Set<String>> trustedNetworks = response.trustedNetworks.stream()
.collect(Collectors.groupingBy(GetAclResponse.Network::getTrustedBy,
Collectors.mapping(node -> node.network, Collectors.toSet())));


// For each hostname create an ACL
return Stream.of(trustedNodes.keySet(), trustedPorts.keySet(), trustedNetworks.keySet())
.flatMap(Set::stream)
.distinct()
.collect(Collectors.toMap(
Function.identity(),
hostname -> new Acl(trustedPorts.get(hostname), trustedNodes.get(hostname),
trustedNetworks.get(hostname))));
}

@Override
Expand All @@ -127,7 +120,7 @@ public void updateNodeAttributes(String hostName, NodeAttributes nodeAttributes)
NodeMessageResponse.class);

if (Strings.isNullOrEmpty(response.errorCode)) return;
throw new NodeRepositoryException("Unexpected message " + response.message + " " + response.errorCode);
throw new NodeRepositoryException("Failed to update node attributes: " + response.message + " " + response.errorCode);
}

@Override
Expand All @@ -137,10 +130,10 @@ public void setNodeState(String hostName, NodeState nodeState) {
"/nodes/v2/state/" + state + "/" + hostName,
Optional.empty(), /* body */
NodeMessageResponse.class);
NODE_ADMIN_LOGGER.info(response.message);
logger.info(response.message);

if (Strings.isNullOrEmpty(response.errorCode)) return;
throw new NodeRepositoryException("Unexpected message " + response.message + " " + response.errorCode);
throw new NodeRepositoryException("Failed to set node state: " + response.message + " " + response.errorCode);
}

private static NodeSpec createNodeSpec(NodeRepositoryNode node) {
Expand All @@ -149,30 +142,13 @@ private static NodeSpec createNodeSpec(NodeRepositoryNode node) {

Objects.requireNonNull(node.state, "Unknown node state");
NodeState nodeState = NodeState.valueOf(node.state);
if (nodeState == NodeState.active) {
Objects.requireNonNull(node.wantedVespaVersion, "Unknown vespa version for active node");
Objects.requireNonNull(node.wantedDockerImage, "Unknown docker image for active node");
Objects.requireNonNull(node.restartGeneration, "Unknown restartGeneration for active node");
Objects.requireNonNull(node.currentRestartGeneration, "Unknown currentRestartGeneration for active node");
}

String hostName = Objects.requireNonNull(node.hostname, "hostname is null");

NodeOwner owner = null;
if (node.owner != null) {
owner = new NodeOwner(node.owner.tenant, node.owner.application, node.owner.instance);
}

NodeMembership membership = null;
if (node.membership != null) {
membership = new NodeMembership(node.membership.clusterType, node.membership.clusterId,
node.membership.group, node.membership.index, node.membership.retired);
}

NodeReports reports = NodeReports.fromMap(node.reports == null ? Collections.emptyMap() : node.reports);
Optional<NodeMembership> membership = Optional.ofNullable(node.membership)
.map(m -> new NodeMembership(m.clusterType, m.clusterId, m.group, m.index, m.retired));
NodeReports reports = NodeReports.fromMap(Optional.ofNullable(node.reports).orElseGet(Map::of));

return new NodeSpec(
hostName,
node.hostname,
Optional.ofNullable(node.wantedDockerImage).map(DockerImage::fromString),
Optional.ofNullable(node.currentDockerImage).map(DockerImage::fromString),
nodeState,
Expand All @@ -185,8 +161,8 @@ private static NodeSpec createNodeSpec(NodeRepositoryNode node) {
Optional.ofNullable(node.currentOsVersion).map(Version::fromString),
Optional.ofNullable(node.allowedToBeDown),
Optional.ofNullable(node.wantToDeprovision),
Optional.ofNullable(owner),
Optional.ofNullable(membership),
Optional.ofNullable(node.owner).map(o -> new NodeOwner(o.tenant, o.application, o.instance)),
membership,
Optional.ofNullable(node.restartGeneration),
Optional.ofNullable(node.currentRestartGeneration),
node.rebootGeneration,
Expand Down
@@ -1,8 +1,10 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.node.admin.configserver.orchestrator;

import com.yahoo.vespa.hosted.node.admin.nodeadmin.ConvergenceException;

@SuppressWarnings("serial")
public class OrchestratorException extends RuntimeException {
public class OrchestratorException extends ConvergenceException {
public OrchestratorException(String message) {
super(message);
}
Expand Down
Expand Up @@ -40,8 +40,7 @@ public void suspend(final String hostName) {
} catch (HttpException.NotFoundException n) {
throw new OrchestratorNotFoundException("Failed to suspend " + hostName + ", host not found");
} catch (HttpException e) {
throw new OrchestratorException("Failed to suspend " + hostName + ": " +
e.toString());
throw new OrchestratorException("Failed to suspend " + hostName + ": " + e.toString());
} catch (RuntimeException e) {
throw new RuntimeException("Got error on suspend", e);
}
Expand All @@ -60,9 +59,8 @@ public void suspend(String parentHostName, List<String> hostNames) {
parentHostName, params);
batchOperationResult = configServerApi.put(url, Optional.empty(), BatchOperationResult.class);
} catch (HttpException e) {
throw new OrchestratorException("Failed to batch suspend for " +
parentHostName + ": " + e.toString());
} catch (Exception e) {
throw new OrchestratorException("Failed to batch suspend for " + parentHostName + ": " + e.toString());
} catch (RuntimeException e) {
throw new RuntimeException("Got error on batch suspend for " + parentHostName + ", with nodes " + hostNames, e);
}

Expand All @@ -80,9 +78,8 @@ public void resume(final String hostName) {
} catch (HttpException.NotFoundException n) {
throw new OrchestratorNotFoundException("Failed to resume " + hostName + ", host not found");
} catch (HttpException e) {
throw new OrchestratorException("Failed to suspend " + hostName + ": " +
e.toString());
} catch (Exception e) {
throw new OrchestratorException("Failed to suspend " + hostName + ": " + e.toString());
} catch (RuntimeException e) {
throw new RuntimeException("Got error on resume", e);
}

Expand Down

0 comments on commit c506af9

Please sign in to comment.