Skip to content

Commit

Permalink
Merge tag 'v0.1.2' into migration
Browse files Browse the repository at this point in the history
  • Loading branch information
robbiezhang committed Jun 20, 2017
2 parents bae0a8b + 579e8b8 commit c9d0704
Show file tree
Hide file tree
Showing 386 changed files with 45,953 additions and 20,141 deletions.
4 changes: 2 additions & 2 deletions .github/ISSUE_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

---

**Is this a BUG REPORT or FEATURE REQUEST?** (choose one):
**Is this an ISSUE or FEATURE REQUEST?** (choose one):

<!--
If this is a BUG REPORT, please:
If this is a ISSUE, please:
- Fill in as much of the template below as you can. If you leave out
information, we can't help you as well.
Expand Down
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ _input/
test/user.env
user.env
test/acs-engine-test/acs-engine-test

test/acs-engine-test/acs-engine-test.exe

test/acs-engine-test/acs-engine-test.exe
pkg/operations/junit.xml
pkg/operations/kubernetesupgrade/junit.xml
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM buildpack-deps:xenial

ENV GO_VERSION 1.8
ENV KUBECTL_VERSION 1.6.0
ENV AZURE_CLI_VERSION 2.0.3
ENV AZURE_CLI_VERSION 2.0.7

RUN apt-get update \
&& apt-get -y upgrade \
Expand Down
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ func NewRootCmd() *cobra.Command {

rootCmd.AddCommand(newVersionCmd())
rootCmd.AddCommand(newGenerateCmd())
rootCmd.AddCommand(newDeployCmd())

if val := os.Getenv("ACSENGINE_EXPERIMENTAL_FEATURES"); val == "1" {
rootCmd.AddCommand(newUpgradeCmd())
rootCmd.AddCommand(newDeployCmd())
}

return rootCmd
Expand Down
29 changes: 24 additions & 5 deletions cmd/upgrade.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package cmd

import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path"

"github.com/Azure/acs-engine/pkg/api"
"github.com/Azure/acs-engine/pkg/armhelpers"
"github.com/Azure/acs-engine/pkg/operations"
"github.com/Azure/acs-engine/pkg/operations/kubernetesupgrade"

log "github.com/Sirupsen/logrus"
"github.com/spf13/cobra"
Expand All @@ -32,6 +35,7 @@ type upgradeCmd struct {
upgradeContainerService *api.UpgradeContainerService
upgradeAPIVersion string
client armhelpers.ACSEngineClient
nameSuffix string
}

// NewUpgradeCmd run a command to upgrade a Kubernetes cluster
Expand Down Expand Up @@ -107,19 +111,34 @@ func (uc *upgradeCmd) validate(cmd *cobra.Command, args []string) {
log.Fatalf("failed to get client") // TODO: cleanup
}

// TODO: Validate that downgrade is not allowed
// TODO: Validate noop case and return early
// Read name suffix to identify nodes in the resource group that belong
// to this cluster.
// TODO: Also update to read namesuffix from the parameters file as
// user could have specified a name suffix instead of using the default
// value generated by ACS Engine
templatePath := path.Join(uc.deploymentDirectory, "azuredeploy.json")
contents, _ := ioutil.ReadFile(templatePath)

var template interface{}
json.Unmarshal(contents, &template)

templateMap := template.(map[string]interface{})
templateParameters := templateMap["parameters"].(map[string]interface{})

nameSuffixParam := templateParameters["nameSuffix"].(map[string]interface{})
uc.nameSuffix = nameSuffixParam["defaultValue"].(string)
log.Infoln(fmt.Sprintf("Name suffix: %s", uc.nameSuffix))
}

func (uc *upgradeCmd) run(cmd *cobra.Command, args []string) error {
uc.validate(cmd, args)

upgradeCluster := operations.UpgradeCluster{
upgradeCluster := kubernetesupgrade.UpgradeCluster{
Client: uc.client,
}

if err := upgradeCluster.UpgradeCluster(uc.authArgs.SubscriptionID, uc.resourceGroupName,
uc.containerService, uc.upgradeContainerService); err != nil {
uc.containerService, uc.upgradeContainerService, uc.nameSuffix); err != nil {
log.Fatalf("Error upgrading cluster: %s \n", err.Error())
}

Expand Down
1 change: 1 addition & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ This cluster definition examples demonstrate how to create a customized Docker E
* [DC/OS Walkthrough](dcos.md) - shows how to create a DC/OS enabled Docker cluster on Azure
* [Kubernetes Walkthrough](kubernetes.md) - shows how to create a Kubernetes enabled Docker cluster on Azure
* [Kubernetes Windows Walkthrough](kubernetes.windows.md) - shows how to create a hybrid Kubernetes Windows enabled Docker cluster on Azure.
* [Kubernetes with GPU support Walkthrough](kubernetes.gpu.md) - shows how to create a Kubernetes cluster with GPU support.
* [Swarm Walkthrough](swarm.md) - shows how to create a Swarm enabled Docker cluster on Azure
* [Swarm Mode Walkthrough](swarmmode.md) - shows how to create a Swarm Mode cluster on Azure
* [Custom VNET](../examples/vnet) - shows how to use a custom VNET
Expand Down
65 changes: 65 additions & 0 deletions docs/kubernetes.gpu.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Microsoft Azure Container Service Engine - Kubernetes Multi-GPU support Walkthrough

## Deployment

Here are the steps to deploy a simple Kubernetes cluster with multi-GPU support:

1. [Install a Kubernetes cluster][Kubernetes Walkthrough](kubernetes.md) - shows how to create a Kubernetes cluster.
> NOTE: Make sure to configure the agent nodes with vm size `Standard_NC12` or above to utilize the GPUs
2. Install drivers:
* SSH into each node and run the following scripts :
install-nvidia-driver.sh
```
curl -L -sf https://raw.githubusercontent.com/ritazh/acs-k8s-gpu/master/install-nvidia-driver.sh | sudo sh
```

To verify, when you run `kubectl describe node <node-name>`, you should get something like the following:

```
Capacity:
alpha.kubernetes.io/nvidia-gpu: 2
cpu: 12
memory: 115505744Ki
pods: 110
```

3. Scheduling a multi-GPU container

* You need to specify `alpha.kubernetes.io/nvidia-gpu: 2` as a limit
* You need to expose the drivers to the container as a volume. If you are using TF original docker image, it is based on ubuntu 16.04, just like your cluster's VM, so you can just mount `/usr/bin` and `/usr/lib/x86_64-linux-gnu`, it's a bit dirty but it works. Ideally, improve the previous script to install the driver in a specific directory and only expose this one.

``` yaml
apiVersion: v1
kind: Pod
metadata:
name: gpu-test
labels:
app: gpu-test
spec:
volumes:
- name: binaries
hostPath:
path: /usr/bin/
- name: libraries
hostPath:
path: /usr/lib/x86_64-linux-gnu
containers:
- name: tensorflow
image: gcr.io/tensorflow/tensorflow:latest-gpu
ports:
- containerPort: 8888
resources:
limits:
alpha.kubernetes.io/nvidia-gpu: 2
volumeMounts:
- mountPath: /usr/bin/
name: binaries
- mountPath: /usr/lib/x86_64-linux-gnu
name: libraries
```
To verify, when you run `kubectl describe pod <pod-name>`, you see get the following:

```
Successfully assigned gpu-test to k8s-agentpool1-10960440-1
```
10 changes: 2 additions & 8 deletions docs/kubernetes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Microsoft Azure Container Service Engine - Kubernetes Walkthrough

* [Kubernetes Windows Walkthrough](kubernetes.windows.md) - shows how to create a Kubernetes cluster on Windows.
* [Kubernetes with GPU support Walkthrough](kubernetes.gpu.md) - shows how to create a Kubernetes cluster with GPU support.

## Deployment

Expand Down Expand Up @@ -71,14 +72,7 @@ All VMs are in the same private VNET and are fully accessible to each other.

Using the default configuration, Kubernetes allows communication between all
Pods within a cluster. To ensure that Pods can only be accessed by authorized
Pods, a policy enforcement is needed. To enable policy enforcement using Calico
`azuredeploy.parameters.json` needs to be modified like that:

```json
"networkPolicy": {
"value": "calico"
}
```
Pods, a policy enforcement is needed. To enable policy enforcement using Calico refer to the [cluster definition](https://github.com/Azure/acs-engine/blob/master/docs/clusterdefinition.md#kubernetesconfig) document under networkPolicy. There is also a reference cluster definition available [here](https://github.com/Azure/acs-engine/blob/master/examples/networkpolicy/kubernetes-calico.json).

This will deploy a Calico node controller to every instance of the cluster
using a Kubernetes DaemonSet. After a successful deployment you should be able
Expand Down
28 changes: 28 additions & 0 deletions docs/roadmap/planning-process.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Planning Process

acs-engine features a lightweight process that emphasizes openness and ensures every community member can see project goals for the future.

## The Role of Maintainers

[Maintainers][] lead the acs-engine project. Their duties include proposing the Roadmap, reviewing and integrating contributions and maintaining the vision of the project.

## Open Roadmap

The [acs-engine Roadmap](roadmap.md) is a community document. While Maintainers propose the Roadmap, it gets discussed and refined in Release Planning Meetings.

## Contributing to the Roadmap

Proposals and issues can be opened by anyone. Every member of the community is welcome to participate in the discussion by providing feedback and/or offering counter-proposals.

## Release Milestones

The Roadmap gets delivered progressively via the [Release Schedule][]. Releases are defined during Release Planning Meetings and managed using GitHub Milestones which track specific deliverables and work-in-progress.

## Release Planning Meetings

Major decisions affecting the Roadmap are discussed during Release Planning Meetings on the first Thursday of each month, aligned with the [Release Schedule][] and monthly objectives for the Microsoft ACS team.

Release Planning Meetings are not currently open to non-Microsoft contributors, but we may change this in the future.

[Maintainers]: https://github.com/Azure/acs-engine/blob/master/OWNERS
[Release Schedule]: releases.md
103 changes: 103 additions & 0 deletions docs/roadmap/releases.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Releases

acs-engine uses a [continuous delivery][] approach for creating releases. Every merged commit that passes
testing results in a deliverable that can be given a [semantic version][] tag and shipped.

## Release as Needed

The master `git` branch of a project should always work. Only changes considered ready to be
released publicly are merged.

acs-engine depends on components that release new versions as often as needed. Fixing
a high priority bug requires the project maintainer to create a new patch release.
Merging a backward-compatible feature implies a minor release.

By releasing often, each component release becomes a safe and routine event. This makes it faster
and easier for users to obtain specific fixes. Continuous delivery also reduces the work
necessary to release a product such as acs-engine, which depends on several external projects.

"Components" applies not just to ACS projects, but also to development and release
tools, orchestrator versions (Kubernetes, DC/OS, Swarm),to Docker base images, and to other Azure
projects that do [semantic version][] releases.

## acs-engine Releases Each Month

acs-engine has a regular, public release cadence. From v0.1.0 onward, new acs-engine feature
releases arrive on the first Thursday of each month. Patch releases are created at any time,
as needed. GitHub milestones are used to communicate the content and timing of major and minor
releases, and longer-term planning is visible at [the Roadmap](roadmap.md).

acs-engine release timing is not linked to specific features. If a feature is merged before the
release date, it is included in the next release.

See "[How to Release acs-engine](#how-to-release-acs-engine)" for more detail.

## Semantic Versioning

acs-engine releases comply with [semantic versioning][semantic version], with the "public API" broadly
defined as:

- REST, gRPC, or other API that is network-accessible
- Library or framework API intended for public use
- "Pluggable" socket-level protocols users can redirect
- CLI commands and output formats
- Integration with Azure public APIs such as ARM

In general, changes to anything a user might reasonably link to, customize, or integrate with should
be backward-compatible, or else require a major release. acs-engine users can be confident that upgrading
to a patch or to a minor release will not break anything.

## How to Release acs-engine

This section leads a maintainer through creating an acs-engine release.

### Step 1: Assemble Master Changelog
A change log is a file which contains a curated, chronologically ordered list of changes
for each version of acs-engine, which helps users and contributors see what notable changes
have been made between each version of the project.

The CHANGELOG should be driven by release milestones defined on Github, which track specific deliverables and
work-in-progress.

### Step 2: Manual Testing

Now it's time to go above and beyond current CI tests. Create a testing matrix spreadsheet (copying
from the previous document is a good start) and sign up testers to cover all permutations.

Testers should pay special attention to the overall user experience, make sure upgrading from
earlier versions is smooth, and cover various storage configurations and Kubernetes versions and
infrastructure providers.

When showstopper-level bugs are found, the process is as follows:

1. Create an issue that describes the bug.
1. Create an PR that fixes the bug.
- PRs should always include tests (unit or e2e as appropriate) to add
automated coverage for the bug.
1. Once the PR passes and is reviewed, merge it and update the CHANGELOG


### Step 3: Tag and Create a Release

TBD


### Step 4: Close GitHub Milestones

TBD

### Step 5: Let Everyone Know

Let the rest of the team know they can start blogging and tweeting about the new acs-engine release.
Post a message to the #company channel on Slack. Include a link to the released chart and to the
master CHANGELOG:

```
@here acs-engine 0.1.0 is here!
Master CHANGELOG: https://github.com/Azure/acs-engine/CHANGELOG.md
```

You're done with the release. Nice job!

[continuous delivery]: https://en.wikipedia.org/wiki/Continuous_delivery
[semantic version]: http://semver.org
4 changes: 2 additions & 2 deletions examples/dcos.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
"agentPoolProfiles": [
{
"name": "agentprivate",
"count": 3,
"count": 1,
"vmSize": "Standard_D2_v2"
},
{
"name": "agentpublic",
"count": 3,
"count": 1,
"vmSize": "Standard_D2_v2",
"dnsPrefix": "",
"ports": [
Expand Down
12 changes: 9 additions & 3 deletions examples/disks-managed/dcos-preAttachedDisks-vmas.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,18 @@
},
"agentPoolProfiles": [
{
"name": "agent128",
"count": 10,
"name": "agentpublic",
"count": 6,
"vmSize": "Standard_D2_v2",
"availabilityProfile": "AvailabilitySet",
"storageProfile": "ManagedDisks",
"diskSizesGB": [128, 128, 128, 128]
"diskSizesGB": [128, 128, 128, 128],
"dnsPrefix": "",
"ports": [
80,
443,
8080
]
}
],
"linuxProfile": {
Expand Down
1 change: 1 addition & 0 deletions examples/disks-managed/dcos-preAttachedDisks-vmas.json.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MARATHON_JSON=marathon-slave-public.json

0 comments on commit c9d0704

Please sign in to comment.