Skip to content

Commit

Permalink
[diagnose] test service connectivity when no VPC endpoint exists (#12)
Browse files Browse the repository at this point in the history
* [.gitpod] add devcontainer.json and automations.yaml

* Fix

* Only require private DNS for execute-api, as per our docs

* [diagnose] test service connectivity when no VPC endpoint exists

This helps for when central VPC endpoints are used, which exist in a separate VPC (not the one you are testing from)

* Add execute-api service check for main subnet

* Fix binding for api-endpoint and instance-ami

* Do not provide a default
  • Loading branch information
kylos101 authored Feb 3, 2025
1 parent 0a5b246 commit 087017c
Showing 5 changed files with 109 additions and 22 deletions.
21 changes: 21 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "Enterprise Deployment Toolkit",
"image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
"features": {
"ghcr.io/devcontainers/features/go": {
"version": "1.22.0"
}
},
"remoteEnv": {
"GIT_EDITOR": "code --wait"
},
"customizations": {
"vscode": {
"extensions": [
"EditorConfig.EditorConfig",
"golang.go",
"zxh404.vscode-proto3"
]
}
}
}
14 changes: 14 additions & 0 deletions .gitpod/automations.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
tasks:
goBuild:
name: Build project
command: cd gitpod-network-check && go build ./...
triggeredBy:
- manual
- postEnvironmentStart

goTest:
name: Run tests
command: cd gitpod-network-check && go test -v ./...
triggeredBy:
- manual
- postEnvironmentStart
83 changes: 66 additions & 17 deletions gitpod-network-check/cmd/checks.go
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ import (
"encoding/base64"
"errors"
"fmt"
"net"
"net/url"
"slices"
"sort"
@@ -65,6 +66,10 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
log.Infof("ℹ️ Found duplicate subnets. We'll test each subnet '%v' only once.", distinctSubnets)
}

if networkConfig.ApiEndpoint == "" {
return fmt.Errorf("❌ API endpoint is required")
}

log.Infof("ℹ️ Launching EC2 instances in Main subnets")
mainInstanceIds, err := launchInstances(cmd.Context(), ec2Client, networkConfig.MainSubnets, instanceProfile.Arn)
if err != nil {
@@ -100,7 +105,7 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals

time.Sleep(time.Minute)

log.Infof("ℹ️ Checking if the required AWS Services can be reached from the ec2 instances")
log.Infof("ℹ️ Checking if the required AWS Services can be reached from the ec2 instances in the pod subnet")
serviceEndpoints := map[string]string{
"SSM": fmt.Sprintf("https://ssm.%s.amazonaws.com", networkConfig.AwsRegion),
"SSMmessages": fmt.Sprintf("https://ssmmessages.%s.amazonaws.com", networkConfig.AwsRegion),
@@ -122,8 +127,9 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals

log.Infof("ℹ️ Checking if certain AWS Services can be reached from ec2 instances in the main subnet")
serviceEndpointsForMain := map[string]string{
"S3": fmt.Sprintf("https://s3.%s.amazonaws.com", networkConfig.AwsRegion),
"DynamoDB": fmt.Sprintf("https://dynamodb.%s.amazonaws.com", networkConfig.AwsRegion),
"S3": fmt.Sprintf("https://s3.%s.amazonaws.com", networkConfig.AwsRegion),
"DynamoDB": fmt.Sprintf("https://dynamodb.%s.amazonaws.com", networkConfig.AwsRegion),
"ExecuteAPI": fmt.Sprintf("https://%s.execute-api.%s.amazonaws.com", networkConfig.ApiEndpoint, networkConfig.AwsRegion),
}
checkServicesAvailability(cmd.Context(), ssmClient, mainInstanceIds, serviceEndpointsForMain)

@@ -155,8 +161,9 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
}

type vpcEndpointsMap struct {
Endpoint string
Required bool
Endpoint string
PrivateDnsName string
PrivateDnsRequired bool
}

// the ssm-agent requires that ec2messages, ssm and ssmmessages are available
@@ -166,20 +173,24 @@ func checkSMPrerequisites(ctx context.Context, ec2Client *ec2.Client) error {
log.Infof("ℹ️ Checking prerequisites")
vpcEndpoints := []vpcEndpointsMap{
{
Endpoint: fmt.Sprintf("com.amazonaws.%s.ec2messages", networkConfig.AwsRegion),
Required: false,
Endpoint: fmt.Sprintf("com.amazonaws.%s.ec2messages", networkConfig.AwsRegion),
PrivateDnsName: fmt.Sprintf("ec2messages.%s.amazonaws.com", networkConfig.AwsRegion),
PrivateDnsRequired: false,
},
{
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssm", networkConfig.AwsRegion),
Required: false,
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssm", networkConfig.AwsRegion),
PrivateDnsName: fmt.Sprintf("ssm.%s.amazonaws.com", networkConfig.AwsRegion),
PrivateDnsRequired: false,
},
{
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssmmessages", networkConfig.AwsRegion),
Required: false,
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssmmessages", networkConfig.AwsRegion),
PrivateDnsName: fmt.Sprintf("ssmmessages.%s.amazonaws.com", networkConfig.AwsRegion),
PrivateDnsRequired: false,
},
{
Endpoint: fmt.Sprintf("com.amazonaws.%s.execute-api", networkConfig.AwsRegion),
Required: true,
Endpoint: fmt.Sprintf("com.amazonaws.%s.execute-api", networkConfig.AwsRegion),
PrivateDnsName: fmt.Sprintf("execute-api.%s.amazonaws.com", networkConfig.AwsRegion),
PrivateDnsRequired: true,
},
}

@@ -198,13 +209,22 @@ func checkSMPrerequisites(ctx context.Context, ec2Client *ec2.Client) error {
}

if len(response.VpcEndpoints) == 0 {
if endpoint.Required {
return fmt.Errorf("❌ VPC endpoint %s not configured: %w", endpoint.Endpoint, err)
if strings.Contains(endpoint.Endpoint, "execute-api") {
log.Infof("ℹ️ Deferring connectivity test for %s service until testing main subnet", endpoint.PrivateDnsName)
continue
}
log.Infof("ℹ️ VPC endpoint %s is not configured, testing service connectivity...", endpoint.Endpoint)
_, err := TestServiceConnectivity(ctx, endpoint.PrivateDnsName, 5*time.Second)
if err != nil {
log.Errorf("❌ Service %s connectivity test failed: %v\n", endpoint.PrivateDnsName, err)
} else if endpoint.PrivateDnsRequired {
log.Warnf("✅ Service %s has connectivity, ensure Private DNS is enabled 🙏", endpoint.PrivateDnsName)
} else if !endpoint.PrivateDnsRequired {
log.Infof("✅ Service %s has connectivity", endpoint.PrivateDnsName)
}
log.Infof("ℹ️ VPC endpoint %s is not configured", endpoint.Endpoint)
} else {
for _, e := range response.VpcEndpoints {
if e.PrivateDnsEnabled != nil && !*e.PrivateDnsEnabled {
if e.PrivateDnsEnabled != nil && !*e.PrivateDnsEnabled && endpoint.PrivateDnsRequired {
log.Errorf("❌ VPC endpoint '%s' has private DNS disabled, it must be enabled", *e.VpcEndpointId)
}
}
@@ -658,3 +678,32 @@ func instanceTypeExists(ctx context.Context, svc *ec2.Client, instanceType types

return len(resp.InstanceTypeOfferings) > 0, nil
}

// ConnectivityTestResult represents the results of DNS and network connectivity tests
type ConnectivityTestResult struct {
IPAddresses []string
}

// TestServiceConnectivity tests both DNS resolution and TCP connectivity given a hostname
func TestServiceConnectivity(ctx context.Context, hostname string, timeout time.Duration) (*ConnectivityTestResult, error) {
result := &ConnectivityTestResult{}

ips, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
if err != nil {
return result, fmt.Errorf("DNS resolution failed: %w", err)
}
for _, ip := range ips {
result.IPAddresses = append(result.IPAddresses, ip.String())
}
if len(result.IPAddresses) == 0 {
return result, fmt.Errorf("no IP addresses found for hostname: %s", hostname)
}
dialer := net.Dialer{Timeout: timeout}
conn, err := dialer.DialContext(ctx, "tcp", fmt.Sprintf("%s:443", result.IPAddresses[0]))
if err != nil {
return result, fmt.Errorf("TCP connection failed: %w", err)
}
defer conn.Close()

return result, nil
}
6 changes: 4 additions & 2 deletions gitpod-network-check/cmd/root.go
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@ type NetworkConfig struct {
PodSubnets []string
HttpsHosts []string
InstanceAMI string
ApiEndpoint string
}

var networkConfig = NetworkConfig{LogLevel: "INFO"}
@@ -89,9 +90,10 @@ func init() {
networkCheckCmd.PersistentFlags().StringSliceVar(&networkConfig.MainSubnets, "main-subnets", []string{}, "List of main subnets")
networkCheckCmd.PersistentFlags().StringSliceVar(&networkConfig.PodSubnets, "pod-subnets", []string{}, "List of pod subnets")
networkCheckCmd.PersistentFlags().StringSliceVar(&networkConfig.HttpsHosts, "https-hosts", []string{}, "Hosts to test for outbound HTTPS connectivity")
bindFlags(networkCheckCmd, v)
networkCheckCmd.PersistentFlags().StringVar(&networkConfig.InstanceAMI, "instance-ami", "", "Custom ec2 instance AMI id, if not set will use latest ubuntu")
log.Infof("ℹ️ Running with region `%s`, main subnet `%v`, pod subnet `%v`, and hosts `%v`", networkConfig.AwsRegion, networkConfig.MainSubnets, networkConfig.PodSubnets, networkConfig.HttpsHosts)
networkCheckCmd.PersistentFlags().StringVar(&networkConfig.ApiEndpoint, "api-endpoint", "", "The Gitpod Enterprise control plane's regional API endpoint subdomain")
bindFlags(networkCheckCmd, v)
log.Infof("ℹ️ Running with region `%s`, main subnet `%v`, pod subnet `%v`, hosts `%v`, and api endpoint `%v`", networkConfig.AwsRegion, networkConfig.MainSubnets, networkConfig.PodSubnets, networkConfig.HttpsHosts, networkConfig.ApiEndpoint)
}

func readConfigFile() *viper.Viper {
7 changes: 4 additions & 3 deletions gitpod-network-check/gitpod-network-check.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
log-level: debug # Options: debug, info, warning, error
region: eu-central-1
main-subnets: subnet-017c6a80f4879d851, subnet-0215744d52cd1c01f
pod-subnets: subnet-00a118009d1d572a5, subnet-062288af00ba50d86
main-subnets: subnet-03ed4c7f3f10ee64a, subnet-03ae0d9e3ad063d83
pod-subnets: subnet-09704642a44a1ae9b, subnet-0fc43a731956656cd
https-hosts: accounts.google.com, https://github.com
# put your custom ami id here if you want to use it, otherwise it will using latest ubuntu AMI from aws
instance-ami:
instance-ami:
api-endpoint:

0 comments on commit 087017c

Please sign in to comment.