Skip to content

Commit

Permalink
Cleaning up getting started
Browse files Browse the repository at this point in the history
  • Loading branch information
itsderek23 committed May 8, 2020
1 parent 44c54e6 commit 340aa78
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 179 deletions.
3 changes: 2 additions & 1 deletion whisk/cli/commands/setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import click
from whisk.project import Project
import whisk.git as git
import os
import subprocess

Expand Down Expand Up @@ -42,7 +43,7 @@ def exec_setup(nbenv):
# direnv will fail if not installed
os.system("cp .envrc.example .envrc")
os.system("direnv allow . > /dev/null 2>&1")
if has_unstaged_changes():
if git.has_unstaged_changes():
exec("Adding files to git", "git add .")
exec("Making initial Git commit", "git commit -m 'Initial project structure' --author=\"Whisk <whisk@whisk-ml.org>\" > /dev/null")

Expand Down
24 changes: 1 addition & 23 deletions ...{ cookiecutter.repo_name }}/core/utils.py → whisk/dvc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import os
from subprocess import check_output
from os.path import dirname, realpath
from pathlib import Path

def dvc_pull(dvc_file):
def pull(dvc_file):
"""
Pulls the output of the specified `dvc_file` into the repository.
This is useful when running outside the local environment (like a deployed web server)
Expand All @@ -17,23 +15,3 @@ def dvc_pull(dvc_file):
os.system("git init")
# Pull the training output (the serialized model) when running on a deployed server.
check_output(["dvc", "pull", dvc_file])

def project_dir():
"""
Returns a string w/the full path to root project directory.
"""
filepath = realpath(__file__)
dir_of_file = dirname(filepath)
parent_dir_of_file = dirname(dir_of_file)
return parent_dir_of_file

def project_dir_name():
"""
Returns a string w/the name of the project directory.
"""
p = Path(project_dir())
return p.name

def has_unstaged_changes():
res=check_output("git status --porcelain",shell=True, universal_newlines=True)
return ("\n" in res)
6 changes: 6 additions & 0 deletions whisk/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from subprocess import check_output

def has_unstaged_changes():
"""Returns True if the git repo in the current directory has unstaged changes."""
res=check_output("git status --porcelain",shell=True, universal_newlines=True)
return ("\n" in res)
12 changes: 12 additions & 0 deletions whisk/model_stub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class ModelStub:
# DELETE ME - Remove this class when you load a real model. This only
# exists so the ModelWrapper works when creating an initial project.
"""
A placeholder for a real ML Model. Returns the number of features in each row.
Example:
ModelStub().predict([[1,2],[3,4]]) => [2,2]
"""
def predict(self,X):
return list(map(lambda instance: len(instance), X))
4 changes: 2 additions & 2 deletions whisk/template/{{ cookiecutter.repo_name }}/app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import flask
from {{cookiecutter.project_name}}.core.utils import dvc_pull
import whisk.dvc as dvc
from {{cookiecutter.project_name}}.models.model import Model
import sys
import os
Expand All @@ -8,7 +8,7 @@
app = flask.Flask(__name__)
# Pull the output of the DVC stage used to generate the serialized model when running on a
# deployed server. For example:
# dvc_pull("train.dvc")
# dvc.pull("train.dvc")
model = Model()

@app.route("/predict", methods=["POST"])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
"source": [
"After training a model you should save it to disk so you can invoke the model later. The method call for saving a model to disk is dependent on your ML framework (for example, Scikit-learn uses pickle while you just call `save` on a PyTorch model).\n",
"\n",
"Regardless of your ML framework, save your model and required artifacts for pre/post-processing to the artifacts directory. You can obtain the path to this directory like this:"
"Regardless of your ML framework, save your model and required artifacts for pre/post-processing to the artifacts directory. Saving a model looks this:"
]
},
{
Expand All @@ -113,8 +113,15 @@
"metadata": {},
"outputs": [],
"source": [
"import {{cookiecutter.project_name}}\n",
"{{cookiecutter.project_name}}.project.artifacts_dir"
"# This example uses pickle to serialize a Python object. \n",
"# Use the preferred serialization approach for your ML framework.\n",
"import pickle\n",
"from whisk.model_stub import ModelStub # A fake model\n",
"from {{cookiecutter.project_name}} import project\n",
"\n",
"model = ModelStub()\n",
"file_path = project.artifacts_dir / \"model.pkl\"\n",
"pickle.dump(model, open(file_path,\"wb\"))"
]
},
{
Expand Down Expand Up @@ -148,46 +155,6 @@
"source": [
"Update `src/{{cookiecutter.project_name}}/models/model.py` to handle loading and pre/post-processing for your own model."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## HTTP Web Service\n",
"The `/app` directory contains a Flask app that's ready to serve a model. Start the web service from your terminal: `whisk app start`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Packaging your model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can share your model with the world as a plain-old Python Package. Just follow the standard Python packaging process. For example, to create a source distribution run the following in your terminal:\n",
"\n",
"```\n",
"python setup.py sdist\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This will create the package with the `dist/` directory."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
90 changes: 0 additions & 90 deletions whisk/template/{{ cookiecutter.repo_name }}/scripts/install.py

This file was deleted.

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from invoke import task
from {{cookiecutter.project_name}}.core.utils import has_unstaged_changes
import whisk.git as git

@task
def start(c):
Expand All @@ -13,7 +13,7 @@ def create(c,name):
"""
Create a Heroku app for the web service.
"""
if has_unstaged_changes():
if git.has_unstaged_changes():
print("This project has uncommitted changes.\nPlease add and commit the files to the Git repo, then retry:\n\ngit add .\ngit commit -m 'First Commit'")
exit(1)
c.run("heroku create -a {}".format(name))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
class DummyModel:
# DELETE ME - Remove this class when you load a real model. This only
# exists so the ModelWrapper works when creating an initial project.
"""
A placeholder for a real ML Model. Returns the number of features in each row.
Example:
DummyModel().predict([[1,2],[3,4]]) => [2,2]
"""
def predict(self,X):
return list(map(lambda instance: len(instance), X))
import pickle
from whisk.model_stub import ModelStub
import {{cookiecutter.project_name}}

class Model:
"""
Expand All @@ -22,19 +13,18 @@ def __init__(self):
Load the model + required pre-processing artifacts from disk. Loading from disk is slow,
so this is done in `__init__` rather than loading from disk on every call to `predict`.
Use paths relative to the project root directory.
Tensorflow example:
self.model = load_model("models/model.h5")
self.model = load_model({{cookiecutter.project_name}}.project.artifacts_dir / "model.h5")
Pickle example:
with open('models/tokenizer.pickle', 'rb') as handle:
self.tokenizer = pickle.load(handle)
with open({{cookiecutter.project_name}}.project.artifacts_dir / 'tokenizer.pickle', 'rb') as file:
self.tokenizer = pickle.load(file)
"""
# REPLACE ME - add your loading logic
self.model = DummyModel()
with open({{cookiecutter.project_name}}.project.artifacts_dir / "model.pkl", 'rb') as file:
self.model = pickle.load(file)

def predict(self,data):
"""
Expand Down

0 comments on commit 340aa78

Please sign in to comment.