# Deploy Sagemaker

## Importar dados

In [16]:
from io import BytesIO
from zipfile import ZipFile
import requests
import pandas as pd

url = 'https://archive-beta.ics.uci.edu/static/public/697/predict+students+dropout+and+academic+success.zip'
dataset_file = 'data.csv'
filename = requests.get(url).content
zip_file = ZipFile( BytesIO(filename), 'r' )
df = pd.read_csv(zip_file.open(dataset_file),sep=';')
df.columns = df.columns.str.lower().str.replace(' ', '_')
df.columns = df.columns.str.replace('[#,@,&,(,),\', ,/,\t]','')
df

  df.columns = df.columns.str.replace('[#,@,&,(,),\', ,/,\t]','')


# Preparar dados

In [17]:
import sagemaker
from sagemaker.image_uris import retrieve
from sklearn.model_selection import train_test_split
import boto3
import io, os
from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker import get_execution_role

bucket='educationai'

class_mapper = {'Dropout':0,'Graduate':1,'Enrolled':2}
df['class']=df['target'].replace(class_mapper)
df.drop('target', axis='columns', inplace=True)


cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

train, test_and_validate = train_test_split(df, test_size=0.2, random_state=42, stratify=df['class'])
test, validate = train_test_split(test_and_validate, test_size=0.5, random_state=42, stratify=test_and_validate['class'])

prefix='students_prediction'

train_file='students_train.csv'
test_file='students_test.csv'
validate_file='students_validate.csv'

s3_resource = boto3.Session().resource('s3')
def upload_s3_csv(filename, folder, dataframe):
    csv_buffer = io.StringIO()
    dataframe.to_csv(csv_buffer, header=False, index=False )
    s3_resource.Bucket(bucket).Object(os.path.join(prefix, folder, filename)).put(Body=csv_buffer.getvalue())

upload_s3_csv(train_file, 'train', train)
upload_s3_csv(test_file, 'test', test)
upload_s3_csv(validate_file, 'validate', validate)
df

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


Unnamed: 0,class,marital_status,application_mode,application_order,course,daytimeevening_attendance,previous_qualification,previous_qualification_grade,nacionality,mothers_qualification,...,curricular_units_1st_sem_without_evaluations,curricular_units_2nd_sem_credited,curricular_units_2nd_sem_enrolled,curricular_units_2nd_sem_evaluations,curricular_units_2nd_sem_approved,curricular_units_2nd_sem_grade,curricular_units_2nd_sem_without_evaluations,unemployment_rate,inflation_rate,gdp
0,0,1,17,5,171,1,1,122.0,1,19,...,0,0,0,0,0,0.000000,0,10.8,1.4,1.74
1,1,1,15,1,9254,1,1,160.0,1,1,...,0,0,6,6,6,13.666667,0,13.9,-0.3,0.79
2,0,1,1,5,9070,1,1,122.0,1,37,...,0,0,6,0,0,0.000000,0,10.8,1.4,1.74
3,1,1,17,2,9773,1,1,122.0,1,38,...,0,0,6,10,5,12.400000,0,9.4,-0.8,-3.12
4,1,2,39,1,8014,0,1,100.0,1,37,...,0,0,6,6,6,13.000000,0,13.9,-0.3,0.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4419,1,1,1,6,9773,1,1,125.0,1,1,...,0,0,6,8,5,12.666667,0,15.5,2.8,-4.06
4420,0,1,1,2,9773,1,1,120.0,105,1,...,0,0,6,6,2,11.000000,0,11.1,0.6,2.02
4421,0,1,1,1,9500,1,1,154.0,1,37,...,0,0,8,9,1,13.500000,0,13.9,-0.3,0.79
4422,1,1,1,1,9147,1,1,180.0,1,37,...,0,0,5,6,5,12.000000,0,9.4,-0.8,-3.12


# Criar modelo

In [18]:
# container = retrieve('xgboost',boto3.Session().region_name,'1.0-1')
s3_output_location="s3://{}/{}/output/".format(bucket,prefix)

hyperparams = {"feature_dim": 36, "k": 7, "sample_size": 2000, "predictor_type": "classifier"}

model = sagemaker.estimator.Estimator(
        get_image_uri(boto3.Session().region_name, "knn"),
        get_execution_role(),
        instance_count=1,
        instance_type="ml.m5.2xlarge",
        output_path=s3_output_location,
        hyperparameters=hyperparams,
        sagemaker_session=sagemaker.Session(),
    )
# model.set_hyperparameters(**hyperparams)
    
train_channel = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/train/".format(bucket,prefix,train_file),
    content_type='text/csv')

validate_channel = sagemaker.inputs.TrainingInput(
    "s3://{}/{}/validate/".format(bucket,prefix,validate_file),
    content_type='text/csv')

data_channels = {'train': train_channel, 'validation': validate_channel}

model.fit(inputs=data_channels, logs=False)

print('ready for hosting!')

See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker:Creating training-job with name: knn-2023-04-27-19-31-59-117



2023-04-27 19:31:59 Starting - Starting the training job..
2023-04-27 19:32:15 Starting - Preparing the instances for training........
2023-04-27 19:33:01 Downloading - Downloading input data....
2023-04-27 19:33:26 Training - Downloading the training image......................................
2023-04-27 19:36:42 Training - Training image download completed. Training in progress........
2023-04-27 19:37:23 Uploading - Uploading generated training model.
2023-04-27 19:37:34 Completed - Training job completed
ready for hosting!


## Deploy modelo

In [19]:
predictor = model.deploy(initial_instance_count=1,
                serializer = sagemaker.serializers.CSVSerializer(),
                instance_type='ml.m4.xlarge')

INFO:sagemaker:Creating model with name: knn-2023-04-27-19-37-37-677
INFO:sagemaker:Creating endpoint-config with name knn-2023-04-27-19-37-37-677
INFO:sagemaker:Creating endpoint with name knn-2023-04-27-19-37-37-677


-------------!

## Realizar previsões

In [20]:
test.shape

(442, 37)

In [21]:
test.head(5)

Unnamed: 0,class,marital_status,application_mode,application_order,course,daytimeevening_attendance,previous_qualification,previous_qualification_grade,nacionality,mothers_qualification,...,curricular_units_1st_sem_without_evaluations,curricular_units_2nd_sem_credited,curricular_units_2nd_sem_enrolled,curricular_units_2nd_sem_evaluations,curricular_units_2nd_sem_approved,curricular_units_2nd_sem_grade,curricular_units_2nd_sem_without_evaluations,unemployment_rate,inflation_rate,gdp
4204,0,1,1,1,9238,1,1,123.0,1,34,...,0,0,6,10,4,11.5,0,7.6,2.6,0.32
1279,1,1,17,1,9238,1,1,120.0,1,19,...,0,0,6,8,6,12.625,0,13.9,-0.3,0.79
2111,1,1,17,1,9003,1,1,131.0,1,19,...,0,0,6,8,6,14.333333,0,12.4,0.5,1.79
677,2,1,1,1,9853,1,1,133.0,1,37,...,0,0,6,6,4,12.0,0,15.5,2.8,-4.06
2579,1,1,1,1,9238,1,1,148.0,1,37,...,0,0,6,6,6,12.833333,0,10.8,1.4,1.74


# Remover classe

In [22]:
row = test.iloc[0:1,1:]
row.head()

Unnamed: 0,marital_status,application_mode,application_order,course,daytimeevening_attendance,previous_qualification,previous_qualification_grade,nacionality,mothers_qualification,fathers_qualification,...,curricular_units_1st_sem_without_evaluations,curricular_units_2nd_sem_credited,curricular_units_2nd_sem_enrolled,curricular_units_2nd_sem_evaluations,curricular_units_2nd_sem_approved,curricular_units_2nd_sem_grade,curricular_units_2nd_sem_without_evaluations,unemployment_rate,inflation_rate,gdp
4204,1,1,1,9238,1,1,123.0,1,34,38,...,0,0,6,10,4,11.5,0,7.6,2.6,0.32


### Preparar dados para previsão

In [23]:
batch_X_csv_buffer = io.StringIO()
row.to_csv(batch_X_csv_buffer, header=False, index=False)
test_row = batch_X_csv_buffer.getvalue()
print(test_row)

1,1,1,9238,1,1,123.0,1,34,38,0,7,120.2,1,0,0,1,0,0,20,0,0,6,9,5,11.8,0,0,6,10,4,11.5,0,7.6,2.6,0.32



### Realizar previsão

In [24]:
predictor.predict(test_row)

b'{"predictions": [{"predicted_label": 1.0}]}'

### Analisar resultados

In [25]:
test.head(5)

Unnamed: 0,class,marital_status,application_mode,application_order,course,daytimeevening_attendance,previous_qualification,previous_qualification_grade,nacionality,mothers_qualification,...,curricular_units_1st_sem_without_evaluations,curricular_units_2nd_sem_credited,curricular_units_2nd_sem_enrolled,curricular_units_2nd_sem_evaluations,curricular_units_2nd_sem_approved,curricular_units_2nd_sem_grade,curricular_units_2nd_sem_without_evaluations,unemployment_rate,inflation_rate,gdp
4204,0,1,1,1,9238,1,1,123.0,1,34,...,0,0,6,10,4,11.5,0,7.6,2.6,0.32
1279,1,1,17,1,9238,1,1,120.0,1,19,...,0,0,6,8,6,12.625,0,13.9,-0.3,0.79
2111,1,1,17,1,9003,1,1,131.0,1,19,...,0,0,6,8,6,14.333333,0,12.4,0.5,1.79
677,2,1,1,1,9853,1,1,133.0,1,37,...,0,0,6,6,4,12.0,0,15.5,2.8,-4.06
2579,1,1,1,1,9238,1,1,148.0,1,37,...,0,0,6,6,6,12.833333,0,10.8,1.4,1.74


## Encerrar deploy do modelo

In [26]:
# predictor.delete_endpoint(delete_endpoint_config=True)

# Fim de notebook