Skip to content

Commit

Permalink
Extend scheduler to periodically garbage collect stranded VMs
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwilkie committed Jul 28, 2015
1 parent e3f0565 commit 9779e06
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 9 deletions.
8 changes: 1 addition & 7 deletions test/scheduler/.gitignore
@@ -1,7 +1 @@
*.egg-info
*.dist-info
flask
jinja2
markupsafe
werkzeug
itsdangerous.*
lib
2 changes: 1 addition & 1 deletion test/scheduler/README.md
@@ -1,6 +1,6 @@
To upload newer version:

```
pip install -r requirements.txt -t .
pip install -r requirements.txt -t lib
appcfg.py update .
```
4 changes: 3 additions & 1 deletion test/scheduler/app.yaml
Expand Up @@ -7,8 +7,10 @@ threadsafe: true
handlers:
- url: .*
script: main.app
- url: /tasks/*
script: main.app
login: admin

libraries:
- name: webapp2
version: latest

3 changes: 3 additions & 0 deletions test/scheduler/appengine_config.py
@@ -0,0 +1,3 @@
from google.appengine.ext import vendor

vendor.add('lib')
4 changes: 4 additions & 0 deletions test/scheduler/cron.yaml
@@ -0,0 +1,4 @@
cron:
- description: periodic gc
url: /tasks/gc
schedule: every 5 minutes
46 changes: 46 additions & 0 deletions test/scheduler/main.py
@@ -1,7 +1,14 @@
import collections
import json
import logging
import operator
import re

import flask
from oauth2client.client import GoogleCredentials
from googleapiclient import discovery

from google.appengine.api import urlfetch
from google.appengine.ext import ndb

app = flask.Flask('scheduler')
Expand All @@ -12,6 +19,9 @@
# observations faster.
alpha = 0.3

PROJECT = 'positive-cocoa-90213'
ZONE = 'us-central1-a'

class Test(ndb.Model):
total_run_time = ndb.FloatProperty(default=0.) # Not total, but a EWMA
total_runs = ndb.IntegerProperty(default=0)
Expand Down Expand Up @@ -64,3 +74,39 @@ def avg(test):
# atomically insert or retrieve existing schedule
schedule = Schedule.get_or_insert(schedule_id, shards=shards)
return flask.json.jsonify(tests=schedule.shards[str(shard)])

NAME_RE = re.compile(r'^host(?P<index>\d+)-(?P<build>\d+)-(?P<shard>\d+)$')

@app.route('/tasks/gc')
def gc():
# Get list of running VMs, pick build id out of VM name
credentials = GoogleCredentials.get_application_default()
compute = discovery.build('compute', 'v1', credentials=credentials)
instances = compute.instances().list(project=PROJECT, zone=ZONE).execute()
host_by_build = collections.defaultdict(list)
for instance in instances['items']:
matches = NAME_RE.match(instance['name'])
if matches is None:
continue
host_by_build[int(matches.group('build'))].append(instance['name'])
logging.info("Running VMs by build: %r", host_by_build)

# Get list of builds, filter down to runnning builds
result = urlfetch.fetch('https://circleci.com/api/v1/project/weaveworks/weave',
headers={'Accept': 'application/json'})
assert result.status_code == 200
builds = json.loads(result.content)
running = {build['build_num'] for build in builds if build['status'] == 'running'}
logging.info("Runnings builds: %r", running)

# Stop VMs for builds that aren't running
stopped = []
for build, names in host_by_build.iteritems():
if build in running:
continue
for name in names:
stopped.append(name)
logging.info("Stopping VM %s", name)
compute.instances().delete(project=PROJECT, zone=ZONE, instance=name).execute()

return (flask.json.jsonify(running=list(running), stopped=stopped), 200)
1 change: 1 addition & 0 deletions test/scheduler/requirements.txt
@@ -1 +1,2 @@
flask
google-api-python-client

0 comments on commit 9779e06

Please sign in to comment.