In [1]:
import Foundation
import TensorFlow
import PythonKit

%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


In [2]:
let pd = Python.import("pandas")
let metrics = Python.import("sklearn.metrics")
let plt = Python.import("matplotlib.pyplot")

In [3]:
let dsURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/labels_ds_v1_preds.csv")
let df = pd.read_csv(dsURL.path)
df.head()

   sample_id  ...      prob
0          1  ...  0.960625
1          2  ...  0.846991
2          3  ...  0.948300
3          4  ...  0.895835
4          5  ...  0.606713

[5 rows x 5 columns]


In [4]:
df.columns

Index(['sample_id', 'text', 'label', 'pred', 'prob'], dtype='object')


In [5]:
let y_true = df.label.to_list()
let y_pred = df.pred.to_list()

In [6]:
let labels = df["label"].unique().sorted()
labels

▿ 5 elements
  - 0 : Doing something
  - 1 : Performing motions with hands
  - 2 : Walking and turning
  - 3 : Walking forward few steps
  - 4 : Walking or running


In [7]:
metrics.confusion_matrix(y_pred, y_true, labels: labels, normalize: "true")

[[0.88135593 0.05649718 0.00376648 0.01883239 0.03954802]
 [0.03571429 0.83163265 0.03231293 0.06462585 0.03571429]
 [0.00518135 0.02072539 0.93005181 0.02590674 0.01813472]
 [0.02537313 0.05820896 0.0119403  0.85223881 0.05223881]
 [0.04181601 0.02986858 0.00477897 0.01792115 0.90561529]]


In [8]:
metrics.confusion_matrix(y_pred, y_true, labels: labels)

[[468  30   2  10  21]
 [ 21 489  19  38  21]
 [  2   8 359  10   7]
 [ 17  39   8 571  35]
 [ 35  25   4  15 758]]


In [9]:
metrics.classification_report(y_true, y_pred, labels: labels)

                               precision    recall  f1-score   support

              Doing something       0.88      0.86      0.87       543
Performing motions with hands       0.83      0.83      0.83       591
          Walking and turning       0.93      0.92      0.92       392
    Walking forward few steps       0.85      0.89      0.87       644
           Walking or running       0.91      0.90      0.90       842

                     accuracy                           0.88      3012
                    macro avg       0.88      0.88      0.88      3012
                 weighted avg       0.88      0.88      0.88      3012



In [43]:
let badPreds = Python.list(df.iterrows()).map {
    (python_tuple) -> Bool in 
    let row = python_tuple[1]
    return (row["label"] != row["pred"])
}
badPreds[0..<3]

▿ 3 elements
  - 0 : false
  - 1 : false
  - 2 : false


In [44]:
extension Bool {
    var intValue: Int {
        return self ? 1 : 0
    }
}

In [45]:
badPreds.reduce(0, { res, elem in
    res + (elem).intValue
})

367


In [48]:
df[badPreds].to_csv("/notebooks/language2motion.gt/data/labels_ds_v1_bad_preds.csv", index: false)

None


In [49]:
df["label"].value_counts()

Walking or running               842
Walking forward few steps        644
Performing motions with hands    591
Doing something                  543
Walking and turning              392
Name: label, dtype: int64
