In [1]:
! pip install -qU sagemaker

In [2]:
from pandas import read_csv
from sklearn.model_selection import train_test_split
import math
import pandas as pd
import sagemaker
from sagemaker.tensorflow import TensorFlow

  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [3]:
# load the dataset
path = "s3://nguyen-viet-ceu2023/pokemon_showdown_ml_data/raw_data/20231209_game_state.csv"
df = read_csv(path)

severe performance issues, see also https://github.com/dask/dask/issues/10276

To fix, you should specify a lower version bound on s3fs, or
update the current installation.



In [4]:
# create label & feature data
features = df.copy()
labels = features.pop('p1_win')

In [5]:
# ensure all data are floating point values
features = features.astype('float32')
labels = labels.astype('float32')

In [6]:
# split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=12)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(38607, 214) (9652, 214) (38607,) (9652,)


In [7]:
# Reserve 200 samples for validation
X_val = X_train[-200:]
y_val = y_train[-200:]
X_train = X_train[:-200]
y_train = y_train[:-200]

In [8]:
# Use 'csv' format to store the data
X_train.to_csv('train_data.csv', index=False)
y_train.to_csv('train_label.csv', index=False)
X_test.to_csv('test_data.csv', index=False)
y_test.to_csv('test_label.csv', index=False)
X_val.to_csv('val_data.csv', index=False)
y_val.to_csv('val_label.csv', index=False)

In [9]:
# upload train, testing and validation data to S3
import sagemaker, boto3, os
bucket = "nguyen-viet-ceu2023"
prefix = "pokemon_showdown_ml_data"

boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/train_data.csv')).upload_file('train_data.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/train_label.csv')).upload_file('train_label.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/test_data.csv')).upload_file('test_data.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/test_label.csv')).upload_file('test_label.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/val_data.csv')).upload_file('val_data.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(
    os.path.join(prefix, 'data/val_label.csv')).upload_file('val_label.csv')

In [10]:
# check if the CSV files are successfully uploaded to the S3 bucket
! aws s3 ls {bucket}/{prefix}/data --recursive

2023-12-16 01:00:09    8420477 pokemon_showdown_ml_data/data/test_data.csv
2023-12-16 01:00:09      38615 pokemon_showdown_ml_data/data/test_label.csv
2023-12-16 01:00:08   33494843 pokemon_showdown_ml_data/data/train_data.csv
2023-12-16 01:00:08     153635 pokemon_showdown_ml_data/data/train_label.csv
2023-12-16 01:00:10     177809 pokemon_showdown_ml_data/data/val_data.csv
2023-12-16 01:00:10        807 pokemon_showdown_ml_data/data/val_label.csv


In [11]:
! pip install -qU sagemaker

In [12]:
# retrieving the basic information from your current SageMaker session
import sagemaker

region = sagemaker.Session().boto_region_name
print("AWS Region: {}".format(region))

role = sagemaker.get_execution_role()
print("RoleArn: {}".format(role))

AWS Region: eu-west-1
RoleArn: arn:aws:iam::870137400553:role/service-role/AmazonSageMaker-ExecutionRole-20231215T194361


In [14]:
# output path
s3_output_location='s3://{}/{}/{}'.format(bucket, prefix, 'model')

# create estimator
tf_estimator = TensorFlow(
    entry_point="ps_tensorflow_training.py",
    role=role,
    output_path=s3_output_location,
    instance_count=1,
    instance_type="ml.c5.xlarge",
    framework_version="2.13.0",
    py_version="py310",
    hyperparameters={'epochs' : 150, 'batch_size' : 32},
    script_mode=True
)

# Run training job
tf_estimator.fit({'train': 's3://nguyen-viet-ceu2023/pokemon_showdown_ml_data/data/'})

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: tensorflow-training-2023-12-16-01-22-05-971


Using provided s3_resource
2023-12-16 01:22:06 Starting - Starting the training job...
2023-12-16 01:22:20 Starting - Preparing the instances for training......
2023-12-16 01:23:21 Downloading - Downloading input data...
2023-12-16 01:23:51 Downloading - Downloading the training image...
2023-12-16 01:24:31 Training - Training image download completed. Training in progress..[34m2023-12-16 01:24:36.322400: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.[0m
[34mTo enable the following instructions: AVX512F, in other operations, rebuild TensorFlow with the appropriate compiler flags.[0m
[34m2023-12-16 01:24:38,505 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2023-12-16 01:24:38,506 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2023-12-16 01:24:38,506 sagemaker-training-to