# Melakukan Training Terhadap Dataset

Pada notebook ini, kalian akan diarahkan untuk melakukan training terhadap dataset yang telah kalian buat hingga menghasilkan sebuah model Machine Learning hingga komputer dapat menerjemahkan pose jari SIBI language kalian. Adapun library yang harus kalian install adalah sebagai berikut

<ul>
    <li>pandas</li>
    <li>numpy</li>
    <li>opencv</li>
    <li>mediapipe</li>
    <li>csv</li>
    <li>pickle</li>
    <li>scikit-learn</li>
</ul>


Diakhir sesi kalian akan menyimpan sebuah file dengan format .sav yang meruapakan model hasil training kalian, yang nantinya dapat digunakan untuk testing

In [217]:
import pandas as pd
import numpy as np
import cv2
import mediapipe as mp
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import pickle

## 1. Mengecek label saja yang akan kalian training

In [218]:
path_dataset = './data'

data_files = os.listdir(path_dataset)
for file in data_files:
    print(file[5:-4])

A
B
C
D
E
F
Selesai


## 2. Menggabungkan File CSV pada Setiap label ke Sebuah Dataframe

In [219]:
for i, filename in enumerate(data_files):
    label_name = filename[5:-4]
    data = pd.read_csv(path_dataset+ '/data_'+label_name+'.csv')
    columns = ['fitur_' + str(i) for i in range(1, 211)]
    data.columns = columns
    data['classes'] = label_name
    data.head()
    if i > 0:
        data_all = pd.concat([data_all, data], axis=0, join='outer')
    else:
        data_all = data.copy()

In [220]:
data_all

Unnamed: 0,fitur_1,fitur_2,fitur_3,fitur_4,fitur_5,fitur_6,fitur_7,fitur_8,fitur_9,fitur_10,...,fitur_202,fitur_203,fitur_204,fitur_205,fitur_206,fitur_207,fitur_208,fitur_209,fitur_210,classes
0,320.0,134.708301,34.392446,12.769665,0.000000,145.696174,117.066474,116.847531,139.331197,238.295912,...,535.759424,556.293177,636.938118,589.824337,539.342499,544.262461,619.052197,593.708081,557.538908,A
1,320.0,130.150883,30.815050,12.719490,0.000000,138.628910,125.406017,122.065132,132.449196,232.542191,...,521.415432,548.210228,637.132735,584.476236,528.805951,536.877390,613.959383,589.747087,552.753362,A
2,320.0,130.930720,29.465957,9.237091,0.000000,142.284451,123.571689,113.954804,124.763465,236.272879,...,495.812832,548.783390,640.000000,568.051696,510.059024,537.011808,613.228083,570.675923,525.993284,A
3,320.0,146.449547,52.762670,29.615589,0.000000,141.584634,136.098256,132.296266,140.982094,232.278022,...,498.012642,546.764211,640.000000,567.428109,510.135871,534.658145,612.455030,565.478866,518.710051,A
4,320.0,146.296694,51.539168,21.959509,0.000000,155.768251,138.514614,123.362193,129.759000,240.778083,...,498.542382,549.974288,640.000000,571.566510,511.950655,537.017565,615.673770,571.212285,522.407330,A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
485,320.0,283.039683,224.786252,181.248783,177.223181,181.980169,59.708881,86.181226,128.651810,178.692024,...,380.404808,358.196035,339.526864,325.499820,330.433550,291.103635,273.794624,253.788026,245.471797,Selesai
486,320.0,282.323211,224.342011,178.791559,173.742522,180.037423,63.256629,91.372216,133.108542,175.849939,...,379.473601,354.666636,332.414452,320.454258,328.028343,288.820181,267.729567,246.645145,237.288353,Selesai
487,320.0,281.943823,224.723974,182.015950,176.995096,177.149827,61.284962,87.674020,128.257116,173.777457,...,372.964863,350.387813,330.902474,316.800921,323.064649,283.502595,263.275591,241.266499,232.523491,Selesai
488,320.0,282.467046,224.763336,181.330650,176.179558,178.297931,62.327721,88.125179,128.670279,173.531271,...,370.095123,347.396270,328.276073,314.776120,321.080800,280.550887,259.778447,238.202034,229.847860,Selesai


In [221]:
data_all['classes'].value_counts()

C          1434
F           774
B           681
D           654
E           633
A           620
Selesai     490
Name: classes, dtype: int64

# 2. Membagi Data Untuk Label dan Feature-Feature dari Datasets

In [223]:
X = data_all.values[:,:-1]
X = X.astype('float32')
X

array([[320.      , 134.7083  ,  34.392445, ..., 619.0522  , 593.70807 ,
        557.5389  ],
       [320.      , 130.15088 ,  30.81505 , ..., 613.9594  , 589.7471  ,
        552.75336 ],
       [320.      , 130.93073 ,  29.465958, ..., 613.2281  , 570.6759  ,
        525.9933  ],
       ...,
       [320.      , 281.94382 , 224.72397 , ..., 263.2756  , 241.2665  ,
        232.5235  ],
       [320.      , 282.46704 , 224.76334 , ..., 259.77844 , 238.20203 ,
        229.84785 ],
       [320.      , 282.8415  , 225.56192 , ..., 264.07217 , 241.74173 ,
        232.66075 ]], dtype=float32)

In [224]:
Y = data_all.values[:,-1]
Y

array(['A', 'A', 'A', ..., 'Selesai', 'Selesai', 'Selesai'], dtype=object)

In [227]:
# label encoder to transform the categorical labels to numeric value
LE = LabelEncoder()
Y = LE.fit_transform(Y)
Y

array([0, 0, 0, ..., 6, 6, 6])

## 3. Membagi Data untuk Data Training dan Data Testing

In [228]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(4228, 210) (1058, 210) (4228,) (1058,)


## 4. Melakukan Modeling Menggunakan SVM untuk Classification

In [229]:
svm_model = SVC(kernel='rbf', C=100, probability=True)
 
# getting train from your train and label data
svm_model.fit(X_train, y_train)

SVC(C=100, probability=True)

# 5. Evaluasi Terhadap Model yang Telah Didapat

In [230]:
y_pred = svm_model.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9716446124763705


In [231]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       133
           1       0.88      0.92      0.90       140
           2       0.96      0.93      0.95       282
           3       1.00      1.00      1.00       123
           4       1.00      1.00      1.00       126
           5       1.00      1.00      1.00       161
           6       1.00      1.00      1.00        93

    accuracy                           0.97      1058
   macro avg       0.98      0.98      0.98      1058
weighted avg       0.97      0.97      0.97      1058



# 6. Menyimpan Model 

In [232]:
# save your svm model
path_saving_model = './model/mymodel.sav'

with open(path_saving_model, 'wb') as handle:
    pickle.dump(svm_model, handle)