Skip to content

Commit

Permalink
pkg/steps/clusterinstall/template: Sync with openshift/release
Browse files Browse the repository at this point in the history
Bringing over a number of changes which have landed in
ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml
as of openshift/release@016eb4ed27 (Merge pull request
openshift/release#6505 from hongkailiu/clusterReaders, 2019-12-19).
One series was improved kill logic:

* openshift/release@9cd158adf3 (template: Use a more correct kill
  command, 2019-12-03, openshift/release#6223).
* openshift/release@d0744e520d (exit with 0 even if kill failed,
  2019-12-09, openshift/release#6295)

Another series was around AWS instance console logs:

* openshift/release@e102a16d89
  (ci-operator/templates/openshift/installer/cluster-launch-installer-e2e:
  Gather node console logs on AWS, 2019-12-02,
  openshift/release#6189).
* openshift/release@26fde70045
  (ci-operator/templates/openshift/installer/cluster-launch-installer-e2e:
  Set AWS_DEFAULT_REGION, 2019-12-04, openshift/release#6249).

And there was also:

* openshift/release@cdf97164aa (templates: Add large and xlarge
  variants, 2019-11-25, openshift/release#6081).
* openshift/release@8cbef5e4a7
  (ci-operator/templates/openshift/installer/cluster-launch-installer-e2e:
  Error-catching for Google OAuth pokes, 2019-12-02,
  openshift/release#6190).
* openshift/release@ad29eda8dd (template: Gather the prometheus target
  metadata during teardown, 2019-12-12, openshift/release#6379).
  • Loading branch information
wking committed Dec 20, 2019
1 parent fb6c435 commit 92631f1
Showing 1 changed file with 32 additions and 3 deletions.
35 changes: 32 additions & 3 deletions pkg/steps/clusterinstall/template.go
Expand Up @@ -156,7 +156,7 @@ objects:
export PATH=/usr/libexec/origin:$PATH
trap 'touch /tmp/shared/exit' EXIT
trap 'kill $(jobs -p); exit 0' TERM
trap 'jobs -p | xargs -r kill || true; exit 0' TERM
function fips_check() {
oc --insecure-skip-tls-verify --request-timeout=60s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodelist
Expand Down Expand Up @@ -425,6 +425,12 @@ objects:
workers=0
fi
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
master_type=null
if [[ "${CLUSTER_VARIANT}" =~ "xlarge" ]]; then
master_type=m5.8xlarge
elif [[ "${CLUSTER_VARIANT}" =~ "large" ]]; then
master_type=m5.4xlarge
fi
subnets="[]"
if [[ "${CLUSTER_VARIANT}" =~ "shared-vpc" ]]; then
case $((RANDOM % 4)) in
Expand All @@ -446,6 +452,7 @@ objects:
replicas: 3
platform:
aws:
type: ${master_type}
zones:
- us-east-1a
- us-east-1b
Expand Down Expand Up @@ -517,7 +524,7 @@ objects:
elif [[ "${CLUSTER_TYPE}" == "gcp" ]]; then
# HACK: try to "poke" the token endpoint before the test starts
for i in $(seq 1 30); do
code="$( curl -s -o /dev/null -w "%{http_code}" https://oauth2.googleapis.com/token -X POST -d '' )"
code="$( curl -s -o /dev/null -w "%{http_code}" https://oauth2.googleapis.com/token -X POST -d '' || echo "Failed to POST https://oauth2.googleapis.com/token with $?" 1>&2)"
if [[ "${code}" == "400" ]]; then
break
fi
Expand Down Expand Up @@ -706,6 +713,10 @@ objects:
value: /etc/openshift-installer/gce.json
- name: KUBECONFIG
value: /tmp/artifacts/installer/auth/kubeconfig
- name: USER
value: test
- name: HOME
value: /tmp
command:
- /bin/bash
- -c
Expand Down Expand Up @@ -773,6 +784,7 @@ objects:
fi
oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.spec.providerID}{"\n"}{end}' | sed 's|.*/||' > /tmp/node-provider-IDs
oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers
oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api
Expand Down Expand Up @@ -813,6 +825,22 @@ objects:
queue /tmp/artifacts/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap
done < /tmp/nodes
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
# FIXME: get epel-release or otherwise add awscli to our teardown image
export PATH="${HOME}/.local/bin:${PATH}"
easy_install --user pip # our Python 2.7.5 is even too old for ensurepip
pip install --user awscli
export AWS_DEFAULT_REGION="$(python -c 'import json; data = json.load(open("/tmp/artifacts/installer/metadata.json")); print(data["aws"]["region"])')"
echo "gathering node console output from ${AWS_DEFAULT_REGION}"
fi
while IFS= read -r i; do
mkdir -p "/tmp/artifacts/nodes/${i}"
if [[ "${CLUSTER_TYPE}" = "aws" ]]; then
queue /tmp/artifacts/nodes/$i/console aws ec2 get-console-output --instance-id "${i}"
fi
done < /tmp/node-provider-IDs
FILTER=gzip queue /tmp/artifacts/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false
FILTER=gzip queue /tmp/artifacts/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false
Expand All @@ -836,6 +864,7 @@ objects:
echo "Snapshotting prometheus (may take 15s) ..."
queue /tmp/artifacts/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus .
FILTER=gzip queue /tmp/artifacts/metrics/prometheus-target-metadata.json.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- /bin/bash -c "curl -G http://localhost:9090/api/v1/targets/metadata --data-urlencode 'match_target={instance!=\"\"}'"
echo "Running must-gather..."
mkdir -p /tmp/artifacts/must-gather
Expand All @@ -849,7 +878,7 @@ objects:
}
trap 'teardown' EXIT
trap 'kill $(jobs -p); exit 0' TERM
trap 'jobs -p | xargs -r kill || true; exit 0' TERM
for i in $(seq 1 220); do
if [[ -f /tmp/shared/exit ]]; then
Expand Down

0 comments on commit 92631f1

Please sign in to comment.