forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.yml
3638 lines (3208 loc) · 125 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# WARNING: DO NOT EDIT THIS FILE DIRECTLY!!!
# See the README.md in this directory.
# IMPORTANT: To update Docker image version, please first update
# https://github.com/pytorch/ossci-job-dsl/blob/master/src/main/groovy/ossci/pytorch/DockerVersion.groovy and
# https://github.com/pytorch/ossci-job-dsl/blob/master/src/main/groovy/ossci/caffe2/DockerVersion.groovy,
# and then update DOCKER_IMAGE_VERSION at the top of the following files:
# * cimodel/pytorch_build_definitions.py
# * cimodel/caffe2_build_definitions.py
docker_config_defaults: &docker_config_defaults
user: jenkins
aws_auth:
# This IAM user only allows read-write access to ECR
aws_access_key_id: ${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_WRITE_V3}
aws_secret_access_key: ${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_WRITE_V3}
# This system setup script is meant to run before the CI-related scripts, e.g.,
# installing Git client, checking out code, setting up CI env, and
# building/testing.
setup_linux_system_environment: &setup_linux_system_environment
name: Set Up System Environment
no_output_timeout: "1h"
command: |
set -e
# Set up CircleCI GPG keys for apt, if needed
curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
# NOTE: We only perform the merge in build step and not in test step, because
# all source files will be shared from build to test
install_official_git_client: &install_official_git_client
name: Install Official Git Client
no_output_timeout: "1h"
command: |
set -e
sudo apt-get -q -y update
sudo apt-get -q -y install openssh-client git
install_doc_push_script: &install_doc_push_script
name: Install the doc push script
no_output_timeout: "2m"
command: |
cat >/home/circleci/project/doc_push_script.sh <<EOL
# =================== The following code **should** be executed inside Docker container ===================
# This is where the local pytorch install in the docker image is located
pt_checkout="/var/lib/jenkins/workspace"
# Since we're cat-ing this file, we need to escape all $'s
echo "doc_push_script.sh: Invoked with \$*"
git clone https://yf225:${GITHUB_PYTORCHBOT_TOKEN}@github.com/pytorch/pytorch.github.io -b site
pushd pytorch.github.io
set -ex
# Argument 1: Where to copy the built documentation to
# (pytorch.github.io/$install_path)
install_path="\$1"
if [ -z "\$install_path" ]; then
echo "error: doc_push_script.sh: install_path (arg1) not specified"
exit 1
fi
# Argument 2: What version of the docs we are building.
version="\$2"
if [ -z "\$version" ]; then
echo "error: doc_push_script.sh: version (arg2) not specified"
exit 1
fi
is_master_doc=false
if [ "\$version" == "master" ]; then
is_master_doc=true
fi
# Argument 3: (optional) If present, we will NOT do any pushing. Used for testing.
dry_run=false
if [ "\$3" != "" ]; then
dry_run=true
fi
echo "install_path: \$install_path version: \$version dry_run: \$dry_run"
export LC_ALL=C
export PATH=/opt/conda/bin:$PATH
rm -rf pytorch || true
# Get all the documentation sources, put them in one place
pushd "\$pt_checkout"
git clone https://github.com/pytorch/vision
pushd vision
conda install -q pillow
time python setup.py install
popd
pushd docs
rm -rf source/torchvision
cp -a ../vision/docs/source source/torchvision
# Build the docs
pip -q install -r requirements.txt || true
if [ "\$is_master_doc" = true ]; then
make html
else
make html-stable
fi
# Move them into the docs repo
popd
popd
git rm -rf "\$install_path" || true
mv "\$pt_checkout/docs/build/html" "\$install_path"
# Add the version handler by search and replace.
# XXX: Consider moving this to the docs Makefile or site build
if [ "\$is_master_doc" = true ]; then
find "\$install_path" -name "*.html" -print0 | xargs -0 perl -pi -w -e "s@master\s+\((\d\.\d\.[A-Fa-f0-9]+\+[A-Fa-f0-9]+)\s+\)@<a href='http://pytorch.org/docs/versions.html'>\1 \▼</a>@g"
else
find "\$install_path" -name "*.html" -print0 | xargs -0 perl -pi -w -e "s@master\s+\((\d\.\d\.[A-Fa-f0-9]+\+[A-Fa-f0-9]+)\s+\)@<a href='http://pytorch.org/docs/versions.html'>\$version \▼</a>@g"
fi
git add "\$install_path" || true
git status
git config user.email "soumith+bot@pytorch.org"
git config user.name "pytorchbot"
# If there aren't changes, don't make a commit; push is no-op
git commit -m "auto-generating sphinx docs" || true
git status
if [ "\$dry_run" = false ]; then
echo "Pushing to pytorch.github.io:site"
git push origin site
else
echo "Skipping push due to dry_run"
fi
popd
# =================== The above code **should** be executed inside Docker container ===================
EOL
chmod +x /home/circleci/project/doc_push_script.sh
# `setup_ci_environment` has to be run **after** the ``checkout`` step because
# it writes into the checkout directory and otherwise CircleCI will complain
# that
# Directory (/home/circleci/project) you are trying to checkout to is not empty and not git repository
setup_ci_environment: &setup_ci_environment
name: Set Up CI Environment After Checkout
no_output_timeout: "1h"
command: |
set -e
# Set up NVIDIA docker repo
curl -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
echo "deb https://nvidia.github.io/libnvidia-container/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
echo "deb https://nvidia.github.io/nvidia-container-runtime/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
echo "deb https://nvidia.github.io/nvidia-docker/ubuntu14.04/amd64 /" | sudo tee -a /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get -q -y update
sudo apt-get -q -y remove linux-image-generic linux-headers-generic linux-generic docker-ce
# WARNING: Docker version is hardcoded here; you must update the
# version number below for docker-ce and nvidia-docker2 to get newer
# versions of Docker. We hardcode these numbers because we kept
# getting broken CI when Docker would update their docker version,
# and nvidia-docker2 would be out of date for a day until they
# released a newer version of their package.
sudo apt-get -q -y install \
linux-headers-$(uname -r) \
linux-image-generic \
moreutils \
docker-ce=18.06.2~ce~3-0~ubuntu \
nvidia-docker2=2.0.3+docker18.06.2-1 \
expect-dev
sudo pkill -SIGHUP dockerd
sudo pip -q install awscli==1.16.35
if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
wget 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-410.79.run'
sudo /bin/bash ./NVIDIA-Linux-x86_64-410.79.run -s --no-drm
nvidia-smi
fi
if [[ "${BUILD_ENVIRONMENT}" == *-build ]]; then
echo "declare -x IN_CIRCLECI=1" > /home/circleci/project/env
echo "declare -x COMMIT_SOURCE=${CIRCLE_BRANCH}" >> /home/circleci/project/env
echo "declare -x PYTHON_VERSION=${PYTHON_VERSION}" >> /home/circleci/project/env
echo "declare -x SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2" >> /home/circleci/project/env
if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
echo "declare -x TORCH_CUDA_ARCH_LIST=5.2" >> /home/circleci/project/env
fi
export SCCACHE_MAX_JOBS=`expr $(nproc) - 1`
export MEMORY_LIMIT_MAX_JOBS=8 # the "large" resource class on CircleCI has 32 CPU cores, if we use all of them we'll OOM
export MAX_JOBS=$(( ${SCCACHE_MAX_JOBS} > ${MEMORY_LIMIT_MAX_JOBS} ? ${MEMORY_LIMIT_MAX_JOBS} : ${SCCACHE_MAX_JOBS} ))
echo "declare -x MAX_JOBS=${MAX_JOBS}" >> /home/circleci/project/env
if [[ "${BUILD_ENVIRONMENT}" == *xla* ]]; then
# This IAM user allows write access to S3 bucket for sccache & bazels3cache
echo "declare -x AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V1}" >> /home/circleci/project/env
echo "declare -x AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_AND_XLA_BAZEL_S3_BUCKET_V1}" >> /home/circleci/project/env
else
# This IAM user allows write access to S3 bucket for sccache
echo "declare -x AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V3}" >> /home/circleci/project/env
echo "declare -x AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V3}" >> /home/circleci/project/env
fi
fi
# This IAM user only allows read-write access to ECR
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_ECR_READ_WRITE_V3}
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_ECR_READ_WRITE_V3}
eval $(aws ecr get-login --region us-east-1 --no-include-email)
macos_brew_update: &macos_brew_update
name: Brew update and install moreutils, expect and libomp
no_output_timeout: "1h"
command: |
set -ex
pwd
ls -lah
# moreutils installs a `parallel` executable by default, which conflicts
# with the executable from the GNU `parallel`, so we must unlink GNU
# `parallel` first, and relink it afterwards
brew update
brew unlink parallel
brew install moreutils
brew link parallel --overwrite
brew install expect
brew install libomp
##############################################################################
# Linux build defaults
##############################################################################
pytorch_linux_build_defaults: &pytorch_linux_build_defaults
resource_class: large
machine:
image: default
steps:
- run:
<<: *setup_linux_system_environment
- run:
<<: *install_official_git_client
- checkout
- run:
<<: *setup_ci_environment
- run:
name: Build
no_output_timeout: "1h"
command: |
set -e
# Pull Docker image and run build
echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
docker pull ${DOCKER_IMAGE} >/dev/null
export id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
git submodule sync && git submodule update -q --init
docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/build.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
# Push intermediate Docker image for next phase to use
if [ -z "${BUILD_ONLY}" ]; then
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
docker commit "$id" ${COMMIT_DOCKER_IMAGE}
docker push ${COMMIT_DOCKER_IMAGE}
fi
pytorch_linux_test_defaults: &pytorch_linux_test_defaults
machine:
image: default
steps:
- run:
<<: *setup_linux_system_environment
- run:
<<: *setup_ci_environment
- run:
name: Test
no_output_timeout: "90m"
command: |
set -e
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
export id=$(docker run --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
else
export id=$(docker run -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
fi
if [ -n "${MULTI_GPU}" ]; then
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/multigpu-test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
else
export COMMAND='((echo "export BUILD_ENVIRONMENT=${BUILD_ENVIRONMENT}" && echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && .jenkins/pytorch/test.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
fi
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
caffe2_linux_build_defaults: &caffe2_linux_build_defaults
resource_class: large
machine:
image: default
steps:
- run:
<<: *setup_linux_system_environment
- run:
<<: *install_official_git_client
- checkout
- run:
<<: *setup_ci_environment
- run:
name: Build
no_output_timeout: "1h"
command: |
set -e
cat >/home/circleci/project/ci_build_script.sh <<EOL
# =================== The following code will be executed inside Docker container ===================
set -ex
export BUILD_ENVIRONMENT="$BUILD_ENVIRONMENT"
# Reinitialize submodules
git submodule sync && git submodule update -q --init --recursive
# conda must be added to the path for Anaconda builds (this location must be
# the same as that in install_anaconda.sh used to build the docker image)
if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
export PATH=/opt/conda/bin:$PATH
sudo chown -R jenkins:jenkins '/opt/conda'
fi
# Build
./.jenkins/caffe2/build.sh
# Show sccache stats if it is running
if pgrep sccache > /dev/null; then
sccache --show-stats
fi
# =================== The above code will be executed inside Docker container ===================
EOL
chmod +x /home/circleci/project/ci_build_script.sh
echo "DOCKER_IMAGE: "${DOCKER_IMAGE}
docker pull ${DOCKER_IMAGE} >/dev/null
export id=$(docker run -t -d -w /var/lib/jenkins ${DOCKER_IMAGE})
docker cp /home/circleci/project/. $id:/var/lib/jenkins/workspace
export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_build_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
# Push intermediate Docker image for next phase to use
if [ -z "${BUILD_ONLY}" ]; then
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-cmake-${CIRCLE_SHA1}
else
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
fi
docker commit "$id" ${COMMIT_DOCKER_IMAGE}
docker push ${COMMIT_DOCKER_IMAGE}
fi
caffe2_linux_test_defaults: &caffe2_linux_test_defaults
machine:
image: default
steps:
- run:
<<: *setup_linux_system_environment
- run:
<<: *setup_ci_environment
- run:
name: Test
no_output_timeout: "1h"
command: |
set -e
# TODO: merge this into Caffe2 test.sh
cat >/home/circleci/project/ci_test_script.sh <<EOL
# =================== The following code will be executed inside Docker container ===================
set -ex
export BUILD_ENVIRONMENT="$BUILD_ENVIRONMENT"
# libdc1394 (dependency of OpenCV) expects /dev/raw1394 to exist...
sudo ln /dev/null /dev/raw1394
# conda must be added to the path for Anaconda builds (this location must be
# the same as that in install_anaconda.sh used to build the docker image)
if [[ "${BUILD_ENVIRONMENT}" == conda* ]]; then
export PATH=/opt/conda/bin:$PATH
fi
# Upgrade SSL module to avoid old SSL warnings
pip -q install --user --upgrade pyOpenSSL ndg-httpsclient pyasn1
pip -q install --user -b /tmp/pip_install_onnx "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
# Build
./.jenkins/caffe2/test.sh
# Remove benign core dumps.
# These are tests for signal handling (including SIGABRT).
rm -f ./crash/core.fatal_signal_as.*
rm -f ./crash/core.logging_test.*
# =================== The above code will be executed inside Docker container ===================
EOL
chmod +x /home/circleci/project/ci_test_script.sh
if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-cmake-${CIRCLE_SHA1}
else
export COMMIT_DOCKER_IMAGE=${DOCKER_IMAGE}-${CIRCLE_SHA1}
fi
echo "DOCKER_IMAGE: "${COMMIT_DOCKER_IMAGE}
docker pull ${COMMIT_DOCKER_IMAGE} >/dev/null
if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
export id=$(docker run --runtime=nvidia -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
else
export id=$(docker run -t -d -w /var/lib/jenkins ${COMMIT_DOCKER_IMAGE})
fi
docker cp /home/circleci/project/. "$id:/var/lib/jenkins/workspace"
export COMMAND='((echo "source ./workspace/env" && echo "sudo chown -R jenkins workspace && cd workspace && ./ci_test_script.sh") | docker exec -u jenkins -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
##############################################################################
# Macos build defaults
##############################################################################
caffe2_macos_build_defaults: &caffe2_macos_build_defaults
macos:
xcode: "9.0"
steps:
- checkout
- run:
<<: *macos_brew_update
- run:
name: Build
no_output_timeout: "1h"
command: |
set -e
export IN_CIRCLECI=1
brew install cmake
# Reinitialize submodules
git submodule sync && git submodule update -q --init --recursive
# Reinitialize path (see man page for path_helper(8))
eval `/usr/libexec/path_helper -s`
# Use Homebrew Python if configured to do so
if [ "${PYTHON_INSTALLATION}" == "homebrew" ]; then
export PATH=/usr/local/opt/python/libexec/bin:/usr/local/bin:$PATH
fi
pip -q install numpy
# Install Anaconda if we need to
if [ -n "${CAFFE2_USE_ANACONDA}" ]; then
rm -rf ${TMPDIR}/anaconda
curl -o ${TMPDIR}/conda.sh https://repo.continuum.io/miniconda/Miniconda${ANACONDA_VERSION}-latest-MacOSX-x86_64.sh
chmod +x ${TMPDIR}/conda.sh
/bin/bash ${TMPDIR}/conda.sh -b -p ${TMPDIR}/anaconda
rm -f ${TMPDIR}/conda.sh
export PATH="${TMPDIR}/anaconda/bin:${PATH}"
source ${TMPDIR}/anaconda/bin/activate
fi
# Install sccache
sudo curl https://s3.amazonaws.com/ossci-macos/sccache --output /usr/local/bin/sccache
sudo chmod +x /usr/local/bin/sccache
export SCCACHE_BUCKET=ossci-compiler-cache-circleci-v2
# This IAM user allows write access to S3 bucket for sccache
export AWS_ACCESS_KEY_ID=${CIRCLECI_AWS_ACCESS_KEY_FOR_SCCACHE_S3_BUCKET_V3}
export AWS_SECRET_ACCESS_KEY=${CIRCLECI_AWS_SECRET_KEY_FOR_SCCACHE_S3_BUCKET_V3}
export SCCACHE_BIN=${PWD}/sccache_bin
mkdir -p ${SCCACHE_BIN}
if which sccache > /dev/null; then
printf "#!/bin/sh\nexec sccache $(which clang++) \$*" > "${SCCACHE_BIN}/clang++"
chmod a+x "${SCCACHE_BIN}/clang++"
printf "#!/bin/sh\nexec sccache $(which clang) \$*" > "${SCCACHE_BIN}/clang"
chmod a+x "${SCCACHE_BIN}/clang"
export PATH="${SCCACHE_BIN}:$PATH"
fi
# Build
if [ "${BUILD_IOS:-0}" -eq 1 ]; then
unbuffer scripts/build_ios.sh 2>&1 | ts
elif [ -n "${CAFFE2_USE_ANACONDA}" ]; then
# All conda build logic should be in scripts/build_anaconda.sh
unbuffer scripts/build_anaconda.sh 2>&1 | ts
else
unbuffer scripts/build_local.sh 2>&1 | ts
fi
# Show sccache stats if it is running
if which sccache > /dev/null; then
sccache --show-stats
fi
##############################################################################
# Binary build (nightlies nightly build) defaults
# The binary builds use the docker executor b/c at time of writing the machine
# executor is limited to only two cores and is painfully slow (4.5+ hours per
# GPU build). But the docker executor cannot be run with --runtime=nvidia, and
# so the binary test/upload jobs must run on a machine executor. The package
# built in the build job is persisted to the workspace, which the test jobs
# expect. The test jobs just run a few quick smoke tests (very similar to the
# second-round-user-facing smoke tests above) and then upload the binaries to
# their final locations. The upload part requires credentials that should only
# be available to org-members.
##############################################################################
binary_populate_env: &binary_populate_env
name: Set up env
command: |
set -ex
export TZ=UTC
# We need to write an envfile to persist these variables to following
# steps, but the location of the envfile depends on the circleci executor
if [[ "$(uname)" == Darwin ]]; then
# macos executor (builds and tests)
workdir="/Users/distiller/project"
elif [[ -d "/home/circleci/project" ]]; then
# machine executor (binary tests)
workdir="/home/circleci/project"
else
# docker executor (binary builds)
workdir="/"
fi
envfile="$workdir/env"
touch "$envfile"
chmod +x "$envfile"
# Parse the BUILD_ENVIRONMENT to package type, python, and cuda
configs=($BUILD_ENVIRONMENT)
export PACKAGE_TYPE="${configs[0]}"
export DESIRED_PYTHON="${configs[1]}"
export DESIRED_CUDA="${configs[2]}"
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
export BUILD_PYTHONLESS=1
fi
# Pick docker image
if [[ "$PACKAGE_TYPE" == conda ]]; then
export DOCKER_IMAGE="soumith/conda-cuda"
elif [[ "$DESIRED_CUDA" == cpu ]]; then
export DOCKER_IMAGE="soumith/manylinux-cuda80"
else
export DOCKER_IMAGE="soumith/manylinux-cuda${DESIRED_CUDA:2}"
fi
# We put this here so that OVERRIDE_PACKAGE_VERSION below can read from it
export DATE="$(date -u +%Y%m%d)"
export PYTORCH_BUILD_VERSION="1.0.0.dev$DATE"
export PYTORCH_BUILD_NUMBER=1
cat >>"$envfile" <<EOL
# =================== The following code will be executed inside Docker container ===================
export TZ=UTC
echo "Running on $(uname -a) at $(date)"
export PACKAGE_TYPE="$PACKAGE_TYPE"
export DESIRED_PYTHON="$DESIRED_PYTHON"
export DESIRED_CUDA="$DESIRED_CUDA"
export LIBTORCH_VARIANT="$LIBTORCH_VARIANT"
export BUILD_PYTHONLESS="$BUILD_PYTHONLESS"
export DATE="$DATE"
export NIGHTLIES_DATE_PREAMBLE=1.0.0.dev
export PYTORCH_BUILD_VERSION="$PYTORCH_BUILD_VERSION"
export PYTORCH_BUILD_NUMBER="$PYTORCH_BUILD_NUMBER"
export OVERRIDE_PACKAGE_VERSION="$PYTORCH_BUILD_VERSION"
export TORCH_PACKAGE_NAME='torch-nightly'
export TORCH_CONDA_BUILD_FOLDER='pytorch-nightly'
export NO_FBGEMM=1
export PIP_UPLOAD_FOLDER='nightly/'
export DOCKER_IMAGE="$DOCKER_IMAGE"
export workdir="$workdir"
export MAC_PACKAGE_WORK_DIR="$workdir"
export PYTORCH_ROOT="$workdir/pytorch"
export BUILDER_ROOT="$workdir/builder"
export MINICONDA_ROOT="$workdir/miniconda"
export PYTORCH_FINAL_PACKAGE_DIR="$workdir/final_pkgs"
export CIRCLE_TAG="$CIRCLE_TAG"
export CIRCLE_SHA1="$CIRCLE_SHA1"
export CIRCLE_PR_NUMBER="$CIRCLE_PR_NUMBER"
export CIRCLE_BRANCH="$CIRCLE_BRANCH"
# =================== The above code will be executed inside Docker container ===================
EOL
echo 'retry () {' >> "$envfile"
echo ' $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)' >> "$envfile"
echo '}' >> "$envfile"
echo 'export -f retry' >> "$envfile"
cat "$envfile"
binary_checkout: &binary_checkout
name: Checkout
command: |
set -ex
# This step runs on multiple executors with different envfile locations
if [[ "$(uname)" == Darwin ]]; then
source "/Users/distiller/project/env"
elif [[ -d "/home/circleci/project" ]]; then
# machine executor (binary tests)
source "/home/circleci/project/env"
else
# docker executor (binary builds)
source "/env"
fi
# Clone the Pytorch branch
git clone https://github.com/pytorch/pytorch.git "$PYTORCH_ROOT"
pushd "$PYTORCH_ROOT"
if [[ -n "$CIRCLE_PR_NUMBER" ]]; then
# "smoke" binary build on PRs
git fetch --force origin "pull/${CIRCLE_PR_NUMBER}/head:remotes/origin/pull/${CIRCLE_PR_NUMBER}"
git reset --hard "$CIRCLE_SHA1"
git checkout -q -B "$CIRCLE_BRANCH"
git reset --hard "$CIRCLE_SHA1"
elif [[ -n "$CIRCLE_SHA1" ]]; then
# "smoke" binary build on master on PR merges
git reset --hard "$CIRCLE_SHA1"
git checkout -q -B master
else
# nightly binary builds. These run at 05:05 UTC every day.
last_commit="$(git rev-list --before "$(date -u +%Y-%m-%d) 05:00" --max-count 1 HEAD)"
git checkout "$last_commit"
fi
git submodule update --init --recursive --quiet
echo "Using Pytorch from "
git --no-pager log --max-count 1
popd
# Clone the Builder master repo
git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
pushd "$BUILDER_ROOT"
git fetch origin
git reset origin/master --hard
echo "Using builder from "
git --no-pager log --max-count 1
popd
binary_install_miniconda: &binary_install_miniconda
name: Install miniconda
no_output_timeout: "1h"
command: |
set -ex
# This step runs on multiple executors with different envfile locations
if [[ "$(uname)" == Darwin ]]; then
source "/Users/distiller/project/env"
elif [[ -d "/home/circleci/project" ]]; then
# machine executor (binary tests)
source "/home/circleci/project/env"
else
# docker executor (binary builds)
source "/env"
fi
conda_sh="$workdir/install_miniconda.sh"
if [[ "$(uname)" == Darwin ]]; then
curl -o "$conda_sh" https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
else
curl -o "$conda_sh" https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
fi
chmod +x "$conda_sh"
"$conda_sh" -b -p "$MINICONDA_ROOT"
rm -f "$conda_sh"
export PATH="$MINICONDA_ROOT/bin:$PATH"
source "$MINICONDA_ROOT/bin/activate"
# We can't actually add miniconda to the PATH in the envfile, because that
# breaks 'unbuffer' in Mac jobs. This is probably because conda comes with
# a tclsh, which then gets inserted before the tclsh needed in /usr/bin
# This section is used in the binary_test and smoke_test jobs. It expects
# 'binary_populate_env' to have populated /home/circleci/project/env and it
# expects another section to populate /home/circleci/project/ci_test_script.sh
# with the code to run in the docker
binary_run_in_docker: &binary_run_in_docker
name: Run in docker
command: |
# Expect all needed environment variables to be written to this file
source /home/circleci/project/env
echo "Running the following code in Docker"
cat /home/circleci/project/ci_test_script.sh
set -ex
# Expect actual code to be written to this file
chmod +x /home/circleci/project/ci_test_script.sh
# Run the docker and copy pkgs/env/script into it
if [ -n "${USE_CUDA_DOCKER_RUNTIME}" ]; then
export id=$(docker run --runtime=nvidia -t -d "${DOCKER_IMAGE}")
else
export id=$(docker run -t -d "${DOCKER_IMAGE}")
fi
docker cp /home/circleci/project/. "$id:/circleci_stuff"
if [[ -d "/home/circleci/project/final_pkgs" ]]; then
docker cp /home/circleci/project/final_pkgs "$id:/final_pkgs"
fi
# Execute the test script that was populated by an earlier section
export COMMAND='((echo "source /circleci_stuff/env && /circleci_stuff/ci_test_script.sh") | docker exec -i "$id" bash) 2>&1'
echo ${COMMAND} > ./command.sh && unbuffer bash ./command.sh | ts
# binary linux build defaults
##############################################################################
binary_linux_build: &binary_linux_build
resource_class: 2xlarge+
steps:
- run:
<<: *binary_populate_env
- run:
<<: *binary_checkout
- run:
name: Install unbuffer and ts
command: |
set -ex
source /env
retry yum -q -y install epel-release
retry yum -q -y install expect moreutils
- run:
name: Build
no_output_timeout: "1h"
command: |
echo "RUNNING ON $(uname -a) WITH $(nproc) CPUS AND $(free -m)"
set -ex
source /env
# Defaults here so they can be changed in one place
export MAX_JOBS=12
# Parse the parameters
if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
build_script='conda/build_pytorch.sh'
elif [[ "$DESIRED_CUDA" == cpu ]]; then
build_script='manywheel/build_cpu.sh'
else
build_script='manywheel/build.sh'
fi
# We want to call unbuffer, which calls tclsh which finds the expect
# package. The expect was installed by yum into /usr/bin so we want to
# find /usr/bin/tclsh, but this is shadowed by /opt/conda/bin/tclsh in
# the conda docker images.
if [[ "$PACKAGE_TYPE" == 'conda' ]]; then
mkdir /just_tclsh_bin
ln -s /usr/bin/tclsh /just_tclsh_bin/tclsh
export PATH=/just_tclsh_bin:$PATH
fi
# Build the package
SKIP_ALL_TESTS=1 unbuffer "/builder/$build_script" | ts
- persist_to_workspace:
root: /
paths: final_pkgs
# This should really just be another step of the binary_linux_build job above.
# This isn't possible right now b/c the build job uses the docker executor
# (otherwise they'd be really really slow) but this one uses the macine
# executor (b/c we have to run the docker with --runtime=nvidia and we can't do
# that on the docker executor)
binary_linux_test: &binary_linux_test
machine:
image: default
steps:
- run:
<<: *setup_linux_system_environment
- run:
<<: *setup_ci_environment
- attach_workspace:
at: /home/circleci/project
- run:
<<: *binary_populate_env
- run:
name: Prepare test code
no_output_timeout: "1h"
command: |
source /home/circleci/project/env
cat >/home/circleci/project/ci_test_script.sh <<EOL
# =================== The following code will be executed inside Docker container ===================
set -ex
# Set up Python
if [[ "$PACKAGE_TYPE" == conda ]]; then
retry conda create -qyn testenv python="$DESIRED_PYTHON"
source activate testenv >/dev/null
elif [[ "$DESIRED_PYTHON" == 2.7mu ]]; then
export PATH="/opt/python/cp27-cp27mu/bin:\$PATH"
else
python_nodot="\$(echo $DESIRED_PYTHON | tr -d m.u)"
export PATH="/opt/python/cp\$python_nodot-cp\${python_nodot}m/bin:\$PATH"
fi
# Clone the Pytorch branch
git clone https://github.com/pytorch/pytorch.git /pytorch
pushd /pytorch
if [[ -n "$CIRCLE_PR_NUMBER" ]]; then
# "smoke" binary build on PRs
git fetch --force origin "pull/${CIRCLE_PR_NUMBER}/head:remotes/origin/pull/${CIRCLE_PR_NUMBER}"
git reset --hard "$CIRCLE_SHA1"
git checkout -q -B "$CIRCLE_BRANCH"
git reset --hard "$CIRCLE_SHA1"
fi
git submodule update --init --recursive
popd
# Clone the Builder master repo
git clone -q https://github.com/pytorch/builder.git /builder
# Install the package
pkg="/final_pkgs/\$(ls /final_pkgs)"
if [[ "$PACKAGE_TYPE" == conda ]]; then
conda install -y "\$pkg" --offline
else
pip install "\$pkg"
fi
# Test the package
pushd /pytorch
/builder/run_tests.sh "$PACKAGE_TYPE" "$DESIRED_PYTHON" "$DESIRED_CUDA"
# =================== The above code will be executed inside Docker container ===================
EOL
echo "Prepared script to run in next step"
cat /home/circleci/project/ci_test_script.sh
- run:
<<: *binary_run_in_docker
binary_linux_upload: &binary_linux_upload
machine:
image: default
steps:
- run:
<<: *setup_linux_system_environment
- run:
<<: *setup_ci_environment
- attach_workspace:
at: /home/circleci/project
- run:
<<: *binary_populate_env
- run:
<<: *binary_install_miniconda
- run:
name: Upload
no_output_timeout: "10m"
command: |
source /home/circleci/project/env
declare -x "AWS_ACCESS_KEY_ID=${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}"
declare -x "AWS_SECRET_ACCESS_KEY=${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}"
cat >/home/circleci/project/login_to_anaconda.sh <<EOL
set +x
echo "Trying to login to Anaconda"
yes | anaconda login \
--username "$PYTORCH_BINARY_PJH5_CONDA_USERNAME" \
--password "$PYTORCH_BINARY_PJH5_CONDA_PASSWORD"
set -x
EOL
chmod +x /home/circleci/project/login_to_anaconda.sh
set -ex
export PATH="$MINICONDA_ROOT/bin:$PATH"
# Upload the package to the final location
pushd /home/circleci/project/final_pkgs
if [[ "$PACKAGE_TYPE" == conda ]]; then
retry conda install -yq anaconda-client
retry timeout 30 /home/circleci/project/login_to_anaconda.sh
anaconda upload "$(ls)" -u pytorch --label main --no-progress --force
elif [[ "$PACKAGE_TYPE" == libtorch ]]; then
retry pip install -q awscli
s3_dir="s3://pytorch/libtorch/${PIP_UPLOAD_FOLDER}${DESIRED_CUDA}/"
for pkg in $(ls); do
retry aws s3 cp "$pkg" "$s3_dir" --acl public-read
done
else
retry pip install -q awscli
s3_dir="s3://pytorch/whl/${PIP_UPLOAD_FOLDER}${DESIRED_CUDA}/"
retry aws s3 cp "$(ls)" "$s3_dir" --acl public-read
fi
##############################################################################
# Macos binary build defaults
# The root of everything is /Users/distiller/pytorch-ci-env/workspace
##############################################################################
binary_mac_build: &binary_mac_build
macos:
xcode: "9.0"
steps:
- run:
<<: *binary_populate_env
- run:
<<: *macos_brew_update
- run:
<<: *binary_checkout
- run:
<<: *binary_install_miniconda
- run:
name: Build
no_output_timeout: "1h"
command: |
set -ex
source "/Users/distiller/project/env"
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR"
# For some reason `unbuffer` breaks if we change the PATH here, so we
# write a script with the PATH change in it and unbuffer the whole
# thing
build_script="$workdir/build_script.sh"
touch "$build_script"
chmod +x "$build_script"
# Build
cat >"$build_script" <<EOL
export PATH="$workdir/miniconda/bin:$PATH"
if [[ "$PACKAGE_TYPE" == conda ]]; then
"$workdir/builder/conda/build_pytorch.sh"
else
export TORCH_PACKAGE_NAME="$(echo $TORCH_PACKAGE_NAME | tr '-' '_')"
"$workdir/builder/wheel/build_wheel.sh"
fi
EOL
unbuffer "$build_script" | ts
- run:
name: Test
no_output_timeout: "1h"
command: |
set -ex
source "/Users/distiller/project/env"
export "PATH=$workdir/miniconda/bin:$PATH"
pkg="$workdir/final_pkgs/$(ls $workdir/final_pkgs)"
# Don't test libtorch TODO
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
exit 0
fi
# Create a new test env TODO cut all this out into a separate test
# job and have an entirely different miniconda
source deactivate || true
conda create -qyn test python="$DESIRED_PYTHON"
source activate test >/dev/null
# Install the package
if [[ "$PACKAGE_TYPE" == conda ]]; then
conda install -y "$pkg" --offline
else
pip install "$pkg" --no-index --no-dependencies -v
fi
# Test
pushd "$workdir/pytorch"
$workdir/builder/run_tests.sh "$PACKAGE_TYPE" "$DESIRED_PYTHON" "$DESIRED_CUDA"
popd
- persist_to_workspace:
root: /Users/distiller/project
paths: final_pkgs
binary_mac_upload: &binary_mac_upload
macos:
xcode: "9.0"
steps:
- run:
<<: *binary_populate_env
- run:
<<: *macos_brew_update
- run:
<<: *binary_install_miniconda
- attach_workspace:
at: /Users/distiller/project
- run:
name: Upload
no_output_timeout: "10m"
command: |
export AWS_ACCESS_KEY_ID="${PYTORCH_BINARY_AWS_ACCESS_KEY_ID}"
export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}"
cat >/Users/distiller/project/login_to_anaconda.sh <<EOL
set +x