# Prediction Using SVM

<p style="font-family: Arial; font-size:1.75em;color:purple; font-style:bold"><br>

Importing the Necessary Libraries<br></p>

In [28]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

## Plot Includes

In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

<p style="font-family: Arial; font-size:1.75em;color:purple; font-style:bold"><br>

Creating a Pandas DataFrame from a CSV file<br></p>

In [39]:
def CreateDataFrame(types, level):
    data = pd.read_csv('../Extraction/'+types+"/"+level+'.csv')
    data["Class"] = level
    return data

In [40]:
def LoadData(types):
    frames = [CreateDataFrame(types,"Clean"), 
          CreateDataFrame(types,"Dirty (0-20)"), 
          CreateDataFrame(types,"Dirty (20-40)"), 
          CreateDataFrame(types,"Dirty (40-60)"),
          CreateDataFrame(types,"Dirty (60-80)"),
          CreateDataFrame(types,"Dirty (80-100)")]
    df = pd.concat(frames)
    y=df[['Class']].copy()
    X = df.copy()
    del X['Class']
    return X,y

# Classification using Support Vector Machine

## Optimize Classfiers

In [55]:
def optimize(X_train, y_train):
    param_grid = [
            {'svc__kernel': ['poly'],'svc__degree':[1,2,3,4,5],'svc__coef0':[1,5,10,15,20,30]},
            {'svc__kernel': ['rbf'],'svc__gamma': [0.01, 0.03, 0.1, 0.3, 1.0, 3.0]},
            {'svc__kernel': ['sigmoid'],'svc__coef0':[1,5,10,15,20,30]}
            ]

    svcpipeline = Pipeline([
        ('scale', StandardScaler()),
        ('svc', SVC())
    ])
    grid_search = GridSearchCV(svcpipeline, param_grid, cv=5, verbose=3, n_jobs=-1)
    grid_search.fit(X_train, y_train.values.ravel())
    return grid_search

## Cycle Work

In [56]:
def cycleWork(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    gridsearch = optimize(X_train, y_train)
    print(gridsearch.best_params_)
    estimator = gridsearch.best_estimator_
    
    y_train_pred = estimator.predict(X_train)
    trnscore = accuracy_score(y_train, y_train_pred)
    
    y_tst_pred = estimator.predict(X_test)
    tstscore = accuracy_score(y_test, y_tst_pred)
    cm = confusion_matrix(y_test, y_tst_pred)
    true_pos = np.diag(cm) 
    precision = np.sum(true_pos / np.sum(cm, axis=0))
    recall = np.sum(true_pos / np.sum(cm, axis=1))
    f1 = 2 * ((precision* recall)/(precision + recall))
    #f1None = f1_score(y_test, y_tst_pred, average=None)
    #f1micro = f1_score(y_test, y_tst_pred, average='micro')
    return [trnscore, tstscore, f1]

In [57]:
def AllWork(types,cycles = 5):
    data = []
    X,y = LoadData(types)
    for i in range(0,cycles):
        print(i)
        acc = cycleWork(X,y)
        print("Train Accuracy ",acc[0],", Test Accuracy",acc[1], "F1 ",acc[2])
        data.append([acc[0],acc[1],acc[2]])
        numpyarray = np.array(data)
        df = pd.DataFrame(numpyarray, columns = ["Train Acc", "Test Acc", "F1 Score"])
        df.to_csv("SVC-"+types+'.csv',index = False)

In [59]:
types = "Solar Data"
AllWork(types,5)
types = "NoBG"
AllWork(types,5)

0
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.0s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   23.4s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   53.8s finished


{'svc__coef0': 15, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.985637342908 F1  5.92172016755
1
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   14.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   26.0s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   53.4s finished


{'svc__coef0': 5, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  0.998652896273 , Test Accuracy 0.989228007181 F1  5.92971121534
2
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   11.5s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   23.2s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   47.3s finished


{'svc__coef0': 30, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.980251346499 F1  5.8878877666
3
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   14.4s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   30.0s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   56.6s finished


{'svc__coef0': 5, 'svc__degree': 5, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.992818671454 F1  5.96282068871
4
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   22.5s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   45.0s finished


{'svc__coef0': 5, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  0.999550965424 , Test Accuracy 0.991023339318 F1  5.94523693602
0
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   19.0s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   42.7s finished


{'svc__coef0': 5, 'svc__degree': 5, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.982046678636 F1  5.89573575022
1
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   20.4s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   43.4s finished


{'svc__coef0': 15, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.983842010772 F1  5.9009426887
2
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   23.3s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   46.6s finished


{'svc__coef0': 10, 'svc__degree': 5, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.97486535009 F1  5.84653364315
3
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   20.9s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   44.5s finished


{'svc__coef0': 10, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.983842010772 F1  5.90113404484
4
Fitting 5 folds for each of 42 candidates, totalling 210 fits


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   13.2s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   24.8s
[Parallel(n_jobs=-1)]: Done 210 out of 210 | elapsed:   48.1s finished


{'svc__coef0': 10, 'svc__degree': 4, 'svc__kernel': 'poly'}
Train Accuracy  1.0 , Test Accuracy 0.989228007181 F1  5.94114680107
