In [1]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pandas as pd
import pickle

In [2]:
def train_and_eval(model, train_in, train_out, val_in, val_out):
    model.fit(train_in, train_out)
    predicted_val = model.predict(val_in)

    # Evaluate model
    return accuracy_score(val_out, predicted_val)

In [3]:
data = pd.read_csv("aus.csv")
data = data.drop("file", axis=1)
data = data.drop("face", axis=1)
print(data)

print("Unique classes", data["emotion"].unique(), "\n")


for class0 in data["emotion"].unique():
    print(f"Found {(data['emotion'] == class0).value_counts().iloc[1]} samples for class {class0}")

labels = data["emotion"]
inputs = data.drop("emotion", axis=1)

      emotion     AUS01     AUS02     AUS04     AUS05     AUS06  AUS07  \
0       angry  0.480870  0.221707  0.554721  0.341209  0.538130    1.0   
1       angry  0.368920  0.193828  0.238290  0.269536  0.075385    0.0   
2       angry  0.183939  0.150695  0.466302  0.274774  0.653131    1.0   
3       angry  0.537044  0.418116  0.481464  0.438458  0.144407    0.0   
4       angry  0.443417  0.432919  0.677055  0.334438  0.292933    0.0   
...       ...       ...       ...       ...       ...       ...    ...   
1280  neutral  0.532191  0.356289  0.667120  0.268087  0.183363    0.0   
1281  neutral  0.391457  0.269737  0.255740  0.254209  0.467601    1.0   
1282  neutral  0.315095  0.133612  0.501751  0.290585  0.489786    0.0   
1283  neutral  0.311539  0.211948  0.174492  0.283762  0.273198    1.0   
1284  neutral  0.417826  0.295563  0.422287  0.284784  0.207153    1.0   

         AUS09     AUS10  AUS12  ...     AUS15     AUS17     AUS18  AUS20  \
0     0.537321  0.023429    0.0  .

In [4]:
data_in, test_in, data_out, test_out = train_test_split(
    inputs,
    labels,
    test_size=0.1,
    random_state=42,
    stratify=labels  # balances labels across the sets
)
train_in, val_in, train_out, val_out = train_test_split(
    data_in,
    data_out,
    test_size=(0.2/0.9),  # 20% of the original data
    random_state=42,
    stratify=data_out
)

In [5]:
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

model_svc = []
best_model_svc = None
for (index, kernel) in enumerate(kernels):
    model_svc.append(SVC(kernel=kernel))
    accuracy = train_and_eval(model_svc[index], train_in, train_out, val_in, val_out)
    print("accuracy for index %2d, kernel = %10s: %.4f" % (index, kernel, accuracy))
    if best_model_svc is None or accuracy > best_model_svc[1]:
        best_model_svc = (model_svc[index], accuracy)

accuracy for index  0, kernel =     linear: 0.5759
accuracy for index  1, kernel =       poly: 0.6109
accuracy for index  2, kernel =        rbf: 0.6031
accuracy for index  3, kernel =    sigmoid: 0.1673


In [7]:
with open('model_svc.pkl', 'wb') as f:
    pickle.dump(best_model_svc[0], f)