diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml new file mode 100644 index 0000000..3330320 --- /dev/null +++ b/.github/workflows/shellcheck.yml @@ -0,0 +1,15 @@ +name: 'Lint Jobs' + +on: + push: + branch: + - master + +jobs: + shellcheck: + name: Shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run ShellCheck + uses: ludeeus/action-shellcheck@master diff --git a/clean.sh b/clean.sh index 83bd25c..de6e5df 100755 --- a/clean.sh +++ b/clean.sh @@ -4,7 +4,7 @@ # This is an unfinished script that will delete the cluster cloudformation along with any other loose ends # Execute it with a tag to identify which cluster to delete such as "./clean.sh tag" -if grep '^[-0-9a-zA-Z]*$' <<<$1 && [ ! -z "$1" ]; +if grep '^[-0-9a-zA-Z]*$' <<< "$1" && [ -n "$1" ]; then echo "Tag is valid"; else echo "Tag must be alphanumeric." && exit 1; fi @@ -13,14 +13,14 @@ TAG=$1 PROJECT="umsi-easy-hub" aws ec2 delete-key-pair --key-name "$PROJECT-$TAG" -rm "$PROJECT-$TAG.pem" +rm -f "$PROJECT-$TAG.pem" rm -rf dist -aws cloudformation delete-stack --stack-name "umsi-easy-hub-${TAG}-cluster" -# aws cloudformation wait stack-delete-complete --stack-name "umsi-easy-hub-${TAG}-cluster" - -# Step 2: manually delete the loadbalancer that was automatically generated by the helm chart -# otherwise, the control node cloudformation delete will fail because there are dependent resources still active -# Step 3: manually delete the control node cloudformation from the AWS console. +echo "Deleting cluster and waiting for deletion" +aws cloudformation delete-stack --stack-name "umsi-easy-hub-${TAG}-cluster" +aws cloudformation wait stack-delete-complete --stack-name "umsi-easy-hub-${TAG}-cluster" +echo "Deleting control node and waiting for deletion" +aws cloudformation delete-stack --stack-name "umsi-easy-hub-${TAG}-control-node" +aws cloudformation wait stack-delete-complete --stack-name "umsi-easy-hub-${TAG}-control-node" diff --git a/deploy.py b/deploy.py old mode 100644 new mode 100755 index 86552aa..0db8a64 --- a/deploy.py +++ b/deploy.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + # This script deploys the control node CloudFormation, which will then # automatically deploy and configure the cluster CloudFormation and kubernetes # deployment. @@ -21,6 +23,7 @@ s3 = boto3.client('s3') cloudformation = boto3.client('cloudformation') + def generate_ssh_key(config): """Generate an SSH key pair from EC2.""" name = "{}-{}".format(config["project"], config["tag"]) @@ -74,7 +77,7 @@ def create_control_node(config): template_data = template_fileobj.read() cloudformation.validate_template(TemplateBody=template_data) - response = cloudformation.create_stack( + cloudformation.create_stack( TemplateBody=template_data, StackName=stack_name(config), Parameters=[{'ParameterKey': 'BillingTag', @@ -87,6 +90,9 @@ def create_control_node(config): {'ParameterKey': 'KeyName', 'ParameterValue': config['ssh_key_name'], 'UsePreviousValue': False}, + {'ParameterKey': 'Domain', + 'ParameterValue': config['domain'], + 'UsePreviousValue': False}, {'ParameterKey': 'Tag', 'ParameterValue': config['tag'], 'UsePreviousValue': False}], @@ -125,6 +131,11 @@ def fail(msg): default="umsi-easy-hub", help="name of project, used in all AWS resources") + parser.add_argument( + "--domain", + required=True, + help="The FQDN which will host the hub") + args = parser.parse_args() # We plan to allow different names, but this project name is hard coded all @@ -145,6 +156,7 @@ def fail(msg): config = {} config['tag'] = args.tag config['project'] = args.project + config['domain'] = args.domain config['account_id'] = boto3.client( 'sts').get_caller_identity().get('Account') config['ssh_key_name'] = generate_ssh_key(config) diff --git a/src/cluster_cf.yaml b/src/cluster_cf.yaml index 7691715..5a6d1fa 100644 --- a/src/cluster_cf.yaml +++ b/src/cluster_cf.yaml @@ -105,6 +105,14 @@ Parameters: Default: "" Type: String + Domain: + Description: FQDN which this hub will be hosted on + Type: String + + HostedZoneId: + Description: ID of the hosted zone which contains the Domain + Type: String + Resources: EFS: @@ -148,6 +156,25 @@ Resources: - Ref: ControlNodeSecurityGroup - Ref: NodeSecurityGroup + DNSRecord: + Type: AWS::Route53::RecordSet + Properties: + Name: !Ref Domain + HostedZoneId: !Ref HostedZoneId + Type: A + AliasTarget: + HostedZoneId: !GetAtt Alb.CanonicalHostedZoneID + DNSName: !GetAtt Alb.DNSName + + DomainCertificate: + Type: AWS::CertificateManager::Certificate + Properties: + DomainName: !Ref Domain + ValidationMethod: DNS + DomainValidationOptions: + - DomainName: !Ref Domain + HostedZoneId: !Ref HostedZoneId + AlbSg: Type: AWS::EC2::SecurityGroup Properties: @@ -168,28 +195,33 @@ Resources: ToPort: 65535 CidrIp: 0.0.0.0/0 - # AlbListenerHttps: - # Type: AWS::ElasticLoadBalancingV2::Listener - # Properties: - # Certificates: [ CertificateArn: !Ref DomainCertificateArn] - # DefaultActions: - # - Type: forward - # TargetGroupArn: - # Ref: AlbTargetGroupHttps - # LoadBalancerArn: - # Ref: Alb - # Port: 443 - # Protocol: HTTPS - - AlbListenerHttp: + AlbListenerHttps: Type: AWS::ElasticLoadBalancingV2::Listener Properties: + Certificates: [ CertificateArn: !Ref DomainCertificate] DefaultActions: - Type: forward TargetGroupArn: - Ref: AlbTargetGroupHttp + Ref: AlbTargetGroupHttps LoadBalancerArn: Ref: Alb + Port: 443 + Protocol: HTTPS + + AlbListenerHttp: + Type: AWS::ElasticLoadBalancingV2::Listener + Properties: + DefaultActions: + - Type: redirect + RedirectConfig: + Protocol: "HTTPS" + Port: 443 + Host: "#{host}" + Path: "/#{path}" + Query: "#{query}" + StatusCode: "HTTP_301" + LoadBalancerArn: + Ref: Alb Port: 80 Protocol: HTTP diff --git a/src/control_node_cf.yaml b/src/control_node_cf.yaml index fbca351..1359880 100644 --- a/src/control_node_cf.yaml +++ b/src/control_node_cf.yaml @@ -20,6 +20,10 @@ Parameters: Description: The EC2 Key Pair to allow SSH access to the master and node instances Type: AWS::EC2::KeyPair::KeyName + Domain: + Description: FQDN which this hub will be hosted on + Type: String + Resources: VPC: Type: AWS::EC2::VPC @@ -256,3 +260,7 @@ Outputs: Instance: Description: The control node instance Value: !Ref ControlNode + + Domain: + Description: FQDN which this hub will be hosted on + Value: !Ref Domain diff --git a/src/control_node_startup_script.sh b/src/control_node_startup_script.sh index be31d93..12116d8 100644 --- a/src/control_node_startup_script.sh +++ b/src/control_node_startup_script.sh @@ -4,36 +4,51 @@ set -x exec > >(tee ~/user-data.log|logger -t user-data ) 2>&1 +# Quit on error +set -e + # Sanity check of args -cd /home/ec2-user/ -for X in "$@" -do -echo $X >> args.txt +for X in "$@"; do + echo "$X" >> args.txt done # Gather args passed to script -STACK_NAME=$1 -TAG=$2 -SCRIPT_BUCKET=$3 +export STACK_NAME=$1 +export TAG=$2 +export SCRIPT_BUCKET=$3 # Ensure you are in the home directory of ec2-user -cd /home/ec2-user/ +cd /home/ec2-user/ || exit export HOME=/home/ec2-user/ # Include the local binaries in the path (this is where we will put the aws and kubectl binaries) export PATH=/usr/local/bin/:$PATH && echo "export PATH=/usr/local/bin/:$PATH" >> ~/.bashrc +# Install aws cli v2 +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +sudo ./aws/install + +# Install kubectl binary which will expose control plane configuration options +curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.21.2/2021-07-05/bin/linux/amd64/kubectl +chmod +x ./kubectl +sudo cp ./kubectl /usr/local/bin/kubectl + +# Install eksctl for AWS-specific operations inside an EKS cluster +curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp +sudo mv /tmp/eksctl /usr/local/bin + # Download files from s3 -aws s3 cp --recursive s3://${SCRIPT_BUCKET}/ . +aws s3 cp --recursive "s3://$SCRIPT_BUCKET/" . # Fetch the SSH key from the secret store -aws secretsmanager get-secret-value --secret-id umsi-easy-hub-${TAG}.pem \ - --query SecretString --output text --region us-east-1 > umsi-easy-hub-${TAG}.pem +# export KEY_NAME="umsi-easy-hub-$TAG.pem" +# aws secretsmanager get-secret-value --secret-id $KEY_NAME \ +# --query SecretString --output text > $KEY_NAME # Install packages sudo yum install python37 python37-pip -y -sudo pip3 install boto3 -sudo pip3 install pyyaml +sudo pip3 install boto3 pyyaml # Configure aws cli region mkdir /home/ec2-user/.aws @@ -42,40 +57,45 @@ sudo chown -R 1000:100 /home/ec2-user/.aws/ # Deploy cluster cloudformation stack. This includes the EKS, EFS, Autoscaler, and Loadbalancer # This script needs output from the control node cloudformation stack -python3 deploy_cluster_cf.py --control-node-stackname ${STACK_NAME} +python3 deploy_cluster_cf.py --control-node-stackname "$STACK_NAME" # Wait for the cluster cloudformation stack to complete before continuing... -aws cloudformation wait stack-create-complete --stack-name "umsi-easy-hub-${TAG}-cluster" +aws cloudformation wait stack-create-complete --stack-name "umsi-easy-hub-$TAG-cluster" # Get output of cloudformation stack -output=($(python3 get_cluster_cf_output.py --cluster-stackname "umsi-easy-hub-${TAG}-cluster") ) +IFS=" " read -r -a output <<< "$(python3 get_cluster_cf_output.py --cluster-stackname "umsi-easy-hub-$TAG-cluster")" +echo "${output[*]}" +export EKS_NAME="${output[0]}" +export NODE_ROLE_ARN="${output[1]}" +export ASG_ARN="${output[2]}" # ${output[0]} = Tag # ${output[1]} = EksName # ${output[2]} = NodeRoleArn # ${output[3]} = Asg -# Get kubectl binary which will expose control plane configuration options -curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.21.2/2021-07-05/bin/linux/amd64/kubectl -chmod +x ./kubectl -sudo cp ./kubectl /usr/local/bin/kubectl - # Get aws-iam-authenticator to authenticate kubectl binary with our EKS backplane curl -o aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.11.5/2018-12-06/bin/linux/amd64/aws-iam-authenticator chmod +x ./aws-iam-authenticator sudo cp ./aws-iam-authenticator /usr/local/bin/aws-iam-authenticator -# Install aws cli -# echo yes | sudo amazon-linux-extras install python3 -sudo rm /usr/bin/aws -pip3 install --upgrade awscli --user -sudo cp ~/.local/bin/aws /usr/bin/aws - # Sync kubectl with the EKS we want -aws eks update-kubeconfig --name ${output[1]} +aws eks update-kubeconfig --name "$EKS_NAME" curl -O https://amazon-eks.s3-us-west-2.amazonaws.com/cloudformation/2019-01-09/aws-auth-cm.yaml -sed -i -e "s;;${output[2]};g" aws-auth-cm.yaml +sed -i -e "s;;$NODE_ROLE_ARN;g" aws-auth-cm.yaml kubectl apply -f aws-auth-cm.yaml +# Upgrade some internal components of the cluster +eksctl utils update-kube-proxy --cluster "$EKS_NAME" --approve +eksctl utils update-aws-node --cluster "$EKS_NAME" --approve +eksctl utils update-coredns --cluster "$EKS_NAME" --approve + +aws eks create-addon \ + --cluster-name "$EKS_NAME" \ + --addon-name vpc-cni \ + --addon-version v1.9.0 \ + --service-account-role-arn "$NODE_ROLE_ARN" \ + --resolve-conflicts OVERWRITE + # Install Helm per https://helm.sh/docs/intro/install/ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash @@ -90,7 +110,7 @@ helm repo add jupyterhub https://jupyterhub.github.io/helm-chart/ helm repo update export RELEASE=jhub export NAMESPACE=jhub -JUPYTERHUB_IMAGE="jupyterhub/jupyterhub" +export JUPYTERHUB_IMAGE="jupyterhub/jupyterhub" # Create namespace because helm expects it to exist already. kubectl create namespace $NAMESPACE @@ -99,8 +119,10 @@ helm upgrade --install $RELEASE $JUPYTERHUB_IMAGE --namespace $NAMESPACE --versi # Add in autoscaler sudo touch /etc/cron.d/autoscale_daemon sudo chmod 777 /etc/cron.d/autoscale_daemon -sudo echo "* * * * * ec2-user python3 /home/ec2-user/autoscale_daemon.py --asg=${output[3]}" >> /etc/cron.d/autoscale_daemon -sudo echo "* * * * * ec2-user sleep 15 && python3 /home/ec2-user/autoscale_daemon.py --asg=${output[3]}" >> /etc/cron.d/autoscale_daemon -sudo echo "* * * * * ec2-user sleep 30 && python3 /home/ec2-user/autoscale_daemon.py --asg=${output[3]}" >> /etc/cron.d/autoscale_daemon -sudo echo "* * * * * ec2-user sleep 45 && python3 /home/ec2-user/autoscale_daemon.py --asg=${output[3]}" >> /etc/cron.d/autoscale_daemon +echo "* * * * * ec2-user python3 /home/ec2-user/autoscale_daemon.py --asg=$ASG_ARN +* * * * * ec2-user sleep 15 && python3 /home/ec2-user/autoscale_daemon.py --asg=$ASG_ARN +* * * * * ec2-user sleep 30 && python3 /home/ec2-user/autoscale_daemon.py --asg=$ASG_ARN +* * * * * ec2-user sleep 45 && python3 /home/ec2-user/autoscale_daemon.py --asg=$ASG_ARN" | sudo tee -a /etc/cron.d/autoscale_daemon sudo chmod 644 /etc/cron.d/autoscale_daemon + +env diff --git a/src/deploy_cluster_cf.py b/src/deploy_cluster_cf.py index c99e87b..63428f2 100644 --- a/src/deploy_cluster_cf.py +++ b/src/deploy_cluster_cf.py @@ -1,6 +1,7 @@ import boto3 import argparse import yaml +import pprint # Still need to add logging!!! # Currently, the print statements should be caught by the control_node_startup_script.sh logger @@ -30,17 +31,27 @@ def get_cf_output(config): StackName=config['ControlNodeStackname'] ) - print(response) - output = {} for item in response['Stacks'][0]['Outputs']: output[item['OutputKey']] = item['OutputValue'] config.update(output) - # config['tag'] = 'test' return config + +def find_hosted_zone(config): + + name = '.'.join(config['Domain'].split('.')[-2:]) + '.' + + for zone in boto3.client('route53').list_hosted_zones()['HostedZones']: + if name == zone['Name']: + config['HostedZoneId'] = zone['Id'].split('/')[-1] + break + + return config + + # Deploy the cluster cloudformation using the boto client def create_cluster(config): @@ -80,6 +91,12 @@ def create_cluster(config): }, { 'ParameterKey': 'ControlNodeSecurityGroup', 'ParameterValue': config['ControlNodeSecurityGroup'], 'UsePreviousValue': False + }, + { + 'ParameterKey': 'Domain', 'ParameterValue': config['Domain'], 'UsePreviousValue': False + }, + { + 'ParameterKey': 'HostedZoneId', 'ParameterValue': config['HostedZoneId'], 'UsePreviousValue': False } ], Capabilities=[ @@ -108,7 +125,10 @@ def create_cluster(config): config = load_config(config) - print(config) + config = find_hosted_zone(config) + + print("Creating cluster with the following config:") + pprint.pprint(config) # Now deploy the cluster cloudformation create_cluster(config) diff --git a/src/get_cluster_cf_output.py b/src/get_cluster_cf_output.py index 6269be1..1563044 100644 --- a/src/get_cluster_cf_output.py +++ b/src/get_cluster_cf_output.py @@ -27,4 +27,4 @@ # print(output) -print(output['Tag'], output['EksName'], output['NodeRoleArn'], output['Asg']) \ No newline at end of file +print(output['EksName'], output['NodeRoleArn'], output['Asg']) diff --git a/src/node_startup_script.sh b/src/node_startup_script.sh index 6829278..a0be52e 100644 --- a/src/node_startup_script.sh +++ b/src/node_startup_script.sh @@ -4,9 +4,9 @@ set -x exec > >(tee ~/user-data.log|logger -t user-data ) 2>&1 -cd /home/ec2-user/ +cd /home/ec2-user/ || exit 1 -# You can manually change the value of this if you want a specific EFS +# You can manually change the value of this if you want a specific EFS # instead of the default EFS created in the Cloudformation EFSID="EFSIDREPLACE" @@ -14,10 +14,8 @@ EFSID="EFSIDREPLACE" echo $EFSID > /home/ec2-user/efsid.txt # Sanity check of args -cd /home/ec2-user/ -for X in "$@" -do -echo $X >> args.txt +for X in "$@"; do + echo "$X" >> args.txt done AlbTargetGroupHttpArn=$1 @@ -34,8 +32,9 @@ yum install -y amazon-efs-utils # Configure AWS region and then sanity check mkdir ~/.aws/ && echo -e '[default]\nregion = us-east-1\n' > ~/.aws/config -export INFO="$(aws efs describe-mount-targets --file-system-id $EFSID)" -echo $INFO > info.txt +INFO="$(aws efs describe-mount-targets --file-system-id $EFSID)" +export INFO +echo "$INFO" > info.txt # Mount all possible EFS endpoints (quick and dirty way of mounting proper endpoint). There's on in each subnet # this is probably deprecated now that the cluster is only in one subnet @@ -47,17 +46,18 @@ for mt in mountTargets: ipArray.append(mt['IpAddress']) ipStr = ' '.join(ipArray) print ipStr -END) +END +) -echo $OUTPUT > output.txt -IFS=" " read -a IPARR <<< "$OUTPUT" +echo "$OUTPUT" > output.txt +IFS=" " read -r -a IPARR <<< "$OUTPUT" for IP in "${IPARR[@]}"; do - timeout 5 mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport $IP:/ mnt + timeout 5 mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport "$IP:/" mnt done chmod -R 777 mnt/shared # Register with aws application load balancer -aws elbv2 register-targets --target-group-arn $AlbTargetGroupHttpArn --targets Id=$(curl http://169.254.169.254/latest/meta-data/instance-id),Port=30254 --region us-east-1 +aws elbv2 register-targets --target-group-arn "$AlbTargetGroupHttpArn" --targets Id="$(curl http://169.254.169.254/latest/meta-data/instance-id)",Port=30254 --region us-east-1 -aws elbv2 register-targets --target-group-arn $AlbTargetGroupHttpsArn --targets Id=$(curl http://169.254.169.254/latest/meta-data/instance-id),Port=30255 --region us-east-1 \ No newline at end of file +aws elbv2 register-targets --target-group-arn "$AlbTargetGroupHttpsArn" --targets Id="$(curl http://169.254.169.254/latest/meta-data/instance-id)",Port=30255 --region us-east-1