In [None]:
!pip install --upgrade scikit-learn


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

dataset_full = pd.read_csv("/content/path/to/file.csv", index_col=0)
dataset_full.shape

In [None]:
#shuffle the dataset
dataset_full = dataset_full.sample(frac=1)
dataset_full.head(5)

In [None]:
dataset_full.iloc[:133]['target']

Visualisation

In [None]:

random_state = np.random.RandomState(0)
#define train and test set
X,Y = dataset_full.iloc[:,:132].values, dataset_full['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.40, random_state=random_state)
# scale and normalize

pipeline = Pipeline([("Standard Scaling", StandardScaler()),
                     ("Normalizing", Normalizer()),
                     ("SVC", SVC(kernel="poly"))
                     ])

pipeline.fit(X_train, y_train)

In [None]:
from sklearn.metrics import PrecisionRecallDisplay

display = PrecisionRecallDisplay.from_estimator(
    pipeline, X_test, y_test, name="SVC"
)
_ = display.ax_.set_title("2-class Precision-Recall curve")

In [None]:
y_score = pipeline.decision_function(X_test)

display = PrecisionRecallDisplay.from_predictions(y_test, y_score, name="SVC")
_ = display.ax_.set_title("2-class Precision-Recall curve")

In [None]:
y_pred = pipeline.predict(X_test)
accuracy = pipeline.score(X_test, y_test)
print("Accuracy: " , accuracy)
print("RMS: %r " % np.sqrt(np.mean((y_pred - y_test) ** 2)))

In [None]:
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize=False))

In [None]:
from sklearn.metrics import plot_confusion_matrix

In [None]:
plot_confusion_matrix(pipeline, X_test, y_test)
plt.show()

We have: true label: 0.0  338, 28
                    1.0   29, 363
                         0.0  1.0
                         Predicted label  

In [None]:
conf_matrix = np.array([[338, 28],[29, 363]])
conf_matrix

In [None]:
# Change figure size and increase dpi for better resolution
plt.figure(figsize=(6,4), dpi=80)
# Scale up the size of all text
sns.set(font_scale = 1.1)

# Plot Confusion Matrix using Seaborn heatmap()
ax = sns.heatmap(conf_matrix, annot=True, fmt='d', )

# set x-axis label and ticks.
ax.set_xlabel("Predicted Gesture Category", fontsize=14, labelpad=20)
ax.xaxis.set_ticklabels(['trust-breaking', 'trust-building'])

# set y-axis label and ticks
ax.set_ylabel("Actual Gesture Category", fontsize=14, labelpad=20)
ax.yaxis.set_ticklabels(['trust-breaking', 'tust-building'])


plt.show()

In [None]:
from joblib import dump
import pickle

dump(pipeline, "gesture_model.joblib")
pickle.dump(pipeline, open('/content/drive/model.pkl', 'wb'))
