<a href="https://colab.research.google.com/github/verm024/bsd-capstone/blob/main/Capstone_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Code Steps**
<br>

1.   Load dataset CSV
2.   Split data into training and test
3.   Extract feature columns
4.   Initiate model & layer
5.   Add train function and train the model
6.   Export model into JSON format (or TFX format)


## Import Library

In [None]:
# Import
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

## Load Dataset

In [15]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
# Load Dataset CSV
path = '/content/drive/MyDrive/Capstone/Dataset/dataset.csv'
df = pd.read_csv(path)
df

Unnamed: 0,umur,gaji,tanggungan,pekerjaan,diterima
0,45,1.5,5,8,1
1,50,2.5,6,5,1
2,60,0.0,1,0,1
3,42,1.6,3,5,1
4,63,0.2,0,1,1
5,36,1.8,2,8,1
6,29,0.7,1,4,1
7,38,1.5,2,7,1
8,48,1.1,2,7,1
9,57,2.2,4,5,1


## Split Train, Validation, Test

In [17]:
train, test = train_test_split(df, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

12 train examples
4 validation examples
4 test examples


## Convert dataframe to tf.data


In [18]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('diterima')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [19]:
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)
print(train_ds)

<BatchDataset shapes: ({umur: (None,), gaji: (None,), tanggungan: (None,), pekerjaan: (None,)}, (None,)), types: ({umur: tf.int64, gaji: tf.float64, tanggungan: tf.int64, pekerjaan: tf.int64}, tf.int64)>


In [21]:
for feature_batch, label_batch in train_ds.take(1):
  print('Every feature:', list(feature_batch.keys()))
  print('A batch of ages:', feature_batch['umur'])
  print('A batch of targets:', label_batch )

Every feature: ['umur', 'gaji', 'tanggungan', 'pekerjaan']
A batch of ages: tf.Tensor([44 48 60 29 50], shape=(5,), dtype=int64)
A batch of targets: tf.Tensor([0 1 0 1 1], shape=(5,), dtype=int64)


## Extract Feature Columns

In [23]:
example_batch = next(iter(train_ds))[0]

def demo(feature_column):
  feature_layer = layers.DenseFeatures(feature_column)
  print(feature_layer(example_batch).numpy())

In [25]:
umur = feature_column.numeric_column("umur")
demo(umur)

[[48.]
 [42.]
 [63.]
 [26.]
 [50.]]


In [28]:
umur_buckets = feature_column.bucketized_column(umur, boundaries=[25, 35, 45, 55, 65])
demo(umur_buckets)

[[0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]]
