# Wind Farm predictive maintenance with Sagemaker

## Prepare for development

In [None]:
# S3 prefix
prefix = "Scikit-wind"

import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

## Train and deploy a model

### Create Sagemaker estimator

In [None]:
# Create Sagemaker estimator
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
script_path = "windfarm_train.py"

sklearn = SKLearn(
    entry_point=script_path, # this is where the training code is
    framework_version=FRAMEWORK_VERSION,
    instance_type="ml.m5.xlarge",
    role=role,
    sagemaker_session=sagemaker_session,
    hyperparameters={"max_leaf_nodes": 30},
)

### Train a model

In [None]:
train_input = sagemaker_session.upload_data("windfarm_train_data.csv")

In [None]:
sklearn.fit({"train": train_input})

### Deploy the model

In [None]:
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m5.xlarge")

## Test the model

In [None]:
# Get test data
import pandas as pd
raw_data = [ pd.read_csv(file, engine="python", header=None, delimiter='|') for file in ["windfarm_train_data.csv"]]
test_data = pd.concat(raw_data)

# labels are in the last column
test_y = test_data.iloc[:5000, -1]
test_X = test_data.iloc[:5000, :-1]

In [None]:
# Create batch transformer based on the trained model
transformer = sklearn.transformer(instance_count=1, instance_type='ml.m4.xlarge')

In [None]:
# Get predictions for the test dataset
preds = predictor.predict(test_X.values)

print("Error percentage: ", (sum(abs(preds-test_y.values))*100)/num_examples)

## Clean up

In [None]:
# Delete deployed endpoint
predictor.delete_endpoint()