Skip to content

Commit 087017c

Browse files
authored
[diagnose] test service connectivity when no VPC endpoint exists (#12)
* [.gitpod] add devcontainer.json and automations.yaml * Fix * Only require private DNS for execute-api, as per our docs * [diagnose] test service connectivity when no VPC endpoint exists This helps for when central VPC endpoints are used, which exist in a separate VPC (not the one you are testing from) * Add execute-api service check for main subnet * Fix binding for api-endpoint and instance-ami * Do not provide a default
1 parent 0a5b246 commit 087017c

File tree

5 files changed

+109
-22
lines changed

5 files changed

+109
-22
lines changed

.devcontainer/devcontainer.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"name": "Enterprise Deployment Toolkit",
3+
"image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
4+
"features": {
5+
"ghcr.io/devcontainers/features/go": {
6+
"version": "1.22.0"
7+
}
8+
},
9+
"remoteEnv": {
10+
"GIT_EDITOR": "code --wait"
11+
},
12+
"customizations": {
13+
"vscode": {
14+
"extensions": [
15+
"EditorConfig.EditorConfig",
16+
"golang.go",
17+
"zxh404.vscode-proto3"
18+
]
19+
}
20+
}
21+
}

.gitpod/automations.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
tasks:
2+
goBuild:
3+
name: Build project
4+
command: cd gitpod-network-check && go build ./...
5+
triggeredBy:
6+
- manual
7+
- postEnvironmentStart
8+
9+
goTest:
10+
name: Run tests
11+
command: cd gitpod-network-check && go test -v ./...
12+
triggeredBy:
13+
- manual
14+
- postEnvironmentStart

gitpod-network-check/cmd/checks.go

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/base64"
66
"errors"
77
"fmt"
8+
"net"
89
"net/url"
910
"slices"
1011
"sort"
@@ -65,6 +66,10 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
6566
log.Infof("ℹ️ Found duplicate subnets. We'll test each subnet '%v' only once.", distinctSubnets)
6667
}
6768

69+
if networkConfig.ApiEndpoint == "" {
70+
return fmt.Errorf("❌ API endpoint is required")
71+
}
72+
6873
log.Infof("ℹ️ Launching EC2 instances in Main subnets")
6974
mainInstanceIds, err := launchInstances(cmd.Context(), ec2Client, networkConfig.MainSubnets, instanceProfile.Arn)
7075
if err != nil {
@@ -100,7 +105,7 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
100105

101106
time.Sleep(time.Minute)
102107

103-
log.Infof("ℹ️ Checking if the required AWS Services can be reached from the ec2 instances")
108+
log.Infof("ℹ️ Checking if the required AWS Services can be reached from the ec2 instances in the pod subnet")
104109
serviceEndpoints := map[string]string{
105110
"SSM": fmt.Sprintf("https://ssm.%s.amazonaws.com", networkConfig.AwsRegion),
106111
"SSMmessages": fmt.Sprintf("https://ssmmessages.%s.amazonaws.com", networkConfig.AwsRegion),
@@ -122,8 +127,9 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
122127

123128
log.Infof("ℹ️ Checking if certain AWS Services can be reached from ec2 instances in the main subnet")
124129
serviceEndpointsForMain := map[string]string{
125-
"S3": fmt.Sprintf("https://s3.%s.amazonaws.com", networkConfig.AwsRegion),
126-
"DynamoDB": fmt.Sprintf("https://dynamodb.%s.amazonaws.com", networkConfig.AwsRegion),
130+
"S3": fmt.Sprintf("https://s3.%s.amazonaws.com", networkConfig.AwsRegion),
131+
"DynamoDB": fmt.Sprintf("https://dynamodb.%s.amazonaws.com", networkConfig.AwsRegion),
132+
"ExecuteAPI": fmt.Sprintf("https://%s.execute-api.%s.amazonaws.com", networkConfig.ApiEndpoint, networkConfig.AwsRegion),
127133
}
128134
checkServicesAvailability(cmd.Context(), ssmClient, mainInstanceIds, serviceEndpointsForMain)
129135

@@ -155,8 +161,9 @@ var checkCommand = &cobra.Command{ // nolint:gochecknoglobals
155161
}
156162

157163
type vpcEndpointsMap struct {
158-
Endpoint string
159-
Required bool
164+
Endpoint string
165+
PrivateDnsName string
166+
PrivateDnsRequired bool
160167
}
161168

162169
// the ssm-agent requires that ec2messages, ssm and ssmmessages are available
@@ -166,20 +173,24 @@ func checkSMPrerequisites(ctx context.Context, ec2Client *ec2.Client) error {
166173
log.Infof("ℹ️ Checking prerequisites")
167174
vpcEndpoints := []vpcEndpointsMap{
168175
{
169-
Endpoint: fmt.Sprintf("com.amazonaws.%s.ec2messages", networkConfig.AwsRegion),
170-
Required: false,
176+
Endpoint: fmt.Sprintf("com.amazonaws.%s.ec2messages", networkConfig.AwsRegion),
177+
PrivateDnsName: fmt.Sprintf("ec2messages.%s.amazonaws.com", networkConfig.AwsRegion),
178+
PrivateDnsRequired: false,
171179
},
172180
{
173-
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssm", networkConfig.AwsRegion),
174-
Required: false,
181+
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssm", networkConfig.AwsRegion),
182+
PrivateDnsName: fmt.Sprintf("ssm.%s.amazonaws.com", networkConfig.AwsRegion),
183+
PrivateDnsRequired: false,
175184
},
176185
{
177-
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssmmessages", networkConfig.AwsRegion),
178-
Required: false,
186+
Endpoint: fmt.Sprintf("com.amazonaws.%s.ssmmessages", networkConfig.AwsRegion),
187+
PrivateDnsName: fmt.Sprintf("ssmmessages.%s.amazonaws.com", networkConfig.AwsRegion),
188+
PrivateDnsRequired: false,
179189
},
180190
{
181-
Endpoint: fmt.Sprintf("com.amazonaws.%s.execute-api", networkConfig.AwsRegion),
182-
Required: true,
191+
Endpoint: fmt.Sprintf("com.amazonaws.%s.execute-api", networkConfig.AwsRegion),
192+
PrivateDnsName: fmt.Sprintf("execute-api.%s.amazonaws.com", networkConfig.AwsRegion),
193+
PrivateDnsRequired: true,
183194
},
184195
}
185196

@@ -198,13 +209,22 @@ func checkSMPrerequisites(ctx context.Context, ec2Client *ec2.Client) error {
198209
}
199210

200211
if len(response.VpcEndpoints) == 0 {
201-
if endpoint.Required {
202-
return fmt.Errorf("❌ VPC endpoint %s not configured: %w", endpoint.Endpoint, err)
212+
if strings.Contains(endpoint.Endpoint, "execute-api") {
213+
log.Infof("ℹ️ Deferring connectivity test for %s service until testing main subnet", endpoint.PrivateDnsName)
214+
continue
215+
}
216+
log.Infof("ℹ️ VPC endpoint %s is not configured, testing service connectivity...", endpoint.Endpoint)
217+
_, err := TestServiceConnectivity(ctx, endpoint.PrivateDnsName, 5*time.Second)
218+
if err != nil {
219+
log.Errorf("❌ Service %s connectivity test failed: %v\n", endpoint.PrivateDnsName, err)
220+
} else if endpoint.PrivateDnsRequired {
221+
log.Warnf("✅ Service %s has connectivity, ensure Private DNS is enabled 🙏", endpoint.PrivateDnsName)
222+
} else if !endpoint.PrivateDnsRequired {
223+
log.Infof("✅ Service %s has connectivity", endpoint.PrivateDnsName)
203224
}
204-
log.Infof("ℹ️ VPC endpoint %s is not configured", endpoint.Endpoint)
205225
} else {
206226
for _, e := range response.VpcEndpoints {
207-
if e.PrivateDnsEnabled != nil && !*e.PrivateDnsEnabled {
227+
if e.PrivateDnsEnabled != nil && !*e.PrivateDnsEnabled && endpoint.PrivateDnsRequired {
208228
log.Errorf("❌ VPC endpoint '%s' has private DNS disabled, it must be enabled", *e.VpcEndpointId)
209229
}
210230
}
@@ -658,3 +678,32 @@ func instanceTypeExists(ctx context.Context, svc *ec2.Client, instanceType types
658678

659679
return len(resp.InstanceTypeOfferings) > 0, nil
660680
}
681+
682+
// ConnectivityTestResult represents the results of DNS and network connectivity tests
683+
type ConnectivityTestResult struct {
684+
IPAddresses []string
685+
}
686+
687+
// TestServiceConnectivity tests both DNS resolution and TCP connectivity given a hostname
688+
func TestServiceConnectivity(ctx context.Context, hostname string, timeout time.Duration) (*ConnectivityTestResult, error) {
689+
result := &ConnectivityTestResult{}
690+
691+
ips, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
692+
if err != nil {
693+
return result, fmt.Errorf("DNS resolution failed: %w", err)
694+
}
695+
for _, ip := range ips {
696+
result.IPAddresses = append(result.IPAddresses, ip.String())
697+
}
698+
if len(result.IPAddresses) == 0 {
699+
return result, fmt.Errorf("no IP addresses found for hostname: %s", hostname)
700+
}
701+
dialer := net.Dialer{Timeout: timeout}
702+
conn, err := dialer.DialContext(ctx, "tcp", fmt.Sprintf("%s:443", result.IPAddresses[0]))
703+
if err != nil {
704+
return result, fmt.Errorf("TCP connection failed: %w", err)
705+
}
706+
defer conn.Close()
707+
708+
return result, nil
709+
}

gitpod-network-check/cmd/root.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ type NetworkConfig struct {
2424
PodSubnets []string
2525
HttpsHosts []string
2626
InstanceAMI string
27+
ApiEndpoint string
2728
}
2829

2930
var networkConfig = NetworkConfig{LogLevel: "INFO"}
@@ -89,9 +90,10 @@ func init() {
8990
networkCheckCmd.PersistentFlags().StringSliceVar(&networkConfig.MainSubnets, "main-subnets", []string{}, "List of main subnets")
9091
networkCheckCmd.PersistentFlags().StringSliceVar(&networkConfig.PodSubnets, "pod-subnets", []string{}, "List of pod subnets")
9192
networkCheckCmd.PersistentFlags().StringSliceVar(&networkConfig.HttpsHosts, "https-hosts", []string{}, "Hosts to test for outbound HTTPS connectivity")
92-
bindFlags(networkCheckCmd, v)
9393
networkCheckCmd.PersistentFlags().StringVar(&networkConfig.InstanceAMI, "instance-ami", "", "Custom ec2 instance AMI id, if not set will use latest ubuntu")
94-
log.Infof("ℹ️ Running with region `%s`, main subnet `%v`, pod subnet `%v`, and hosts `%v`", networkConfig.AwsRegion, networkConfig.MainSubnets, networkConfig.PodSubnets, networkConfig.HttpsHosts)
94+
networkCheckCmd.PersistentFlags().StringVar(&networkConfig.ApiEndpoint, "api-endpoint", "", "The Gitpod Enterprise control plane's regional API endpoint subdomain")
95+
bindFlags(networkCheckCmd, v)
96+
log.Infof("ℹ️ Running with region `%s`, main subnet `%v`, pod subnet `%v`, hosts `%v`, and api endpoint `%v`", networkConfig.AwsRegion, networkConfig.MainSubnets, networkConfig.PodSubnets, networkConfig.HttpsHosts, networkConfig.ApiEndpoint)
9597
}
9698

9799
func readConfigFile() *viper.Viper {
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
log-level: debug # Options: debug, info, warning, error
22
region: eu-central-1
3-
main-subnets: subnet-017c6a80f4879d851, subnet-0215744d52cd1c01f
4-
pod-subnets: subnet-00a118009d1d572a5, subnet-062288af00ba50d86
3+
main-subnets: subnet-03ed4c7f3f10ee64a, subnet-03ae0d9e3ad063d83
4+
pod-subnets: subnet-09704642a44a1ae9b, subnet-0fc43a731956656cd
55
https-hosts: accounts.google.com, https://github.com
66
# put your custom ami id here if you want to use it, otherwise it will using latest ubuntu AMI from aws
7-
instance-ami:
7+
instance-ami:
8+
api-endpoint:

0 commit comments

Comments
 (0)