# Transformer-motion2label2

In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt/code")' Datasets MotionModels ImageClassificationModels TextModels ModelSupport SummaryWriter

Installing packages:
	.package(path: "/notebooks/language2motion.gt/code")
		Datasets
		MotionModels
		ImageClassificationModels
		TextModels
		ModelSupport
		SummaryWriter
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmpu4usvsci/swift-install
[1/2] Compiling MotionModels MotionClassifier.swift
[2/3] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
[3/3] Linking libjupyterInstalledPackages.so
Initializing Swift...
Installation complete!


In [2]:
import Foundation
import TensorFlow
import Datasets
import MotionModels
import ImageClassificationModels
import TextModels
import ModelSupport
import SummaryWriter

In [3]:
import PythonKit

let metrics = Python.import("sklearn.metrics")

In [4]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


# load dataset

In [5]:
let batchSize = 10
let maxSequenceLength =  300 //600
let runName = "run_9"

print("batchSize: \(batchSize)")
print("maxSequenceLength: \(maxSequenceLength)")
print("runName: \(runName)")

// let serializedDatasetURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/motion_dataset_v2.normalized.plist")
let serializedDatasetURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/motion_dataset.motion_flag.normalized.sampled.100.plist")
let labelsURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/labels_ds_v2.csv")

print("\nLoading dataset...")
let dataset = try! Motion2Label2(
    serializedDatasetURL: serializedDatasetURL,
    labelsURL: labelsURL,
    maxSequenceLength: maxSequenceLength,
    batchSize: batchSize
) { 
    // TODO: move this to dataset class
    (example: Motion2LabelExample) -> LabeledMotionBatch in
    let motionFrames = Tensor<Float>(example.motionSample.motionFramesArray)
    let motionFlag = Tensor<Int32>(motionFrames[0..., 44...44].squeezingShape(at: 1))
    let origMotionFramesCount = Tensor<Int32>(Int32(motionFrames.shape[0]))
    let motionBatch = MotionBatch(motionFrames: motionFrames, motionFlag: motionFlag, origMotionFramesCount: origMotionFramesCount)
    let label = Tensor<Int32>(Int32(example.label!.idx))
    return LabeledMotionBatch(data: motionBatch, label: label)
}

print("dataset.trainingExamples.count: \(dataset.trainingExamples.count)")
print("dataset.validationExamples.count: \(dataset.validationExamples.count)")

batchSize: 10
maxSequenceLength: 300
runName: run_9

Loading dataset...
MotionData(motionSamples: 98)
dataset.trainingExamples.count: 57
dataset.validationExamples.count: 14


In [6]:
// instantiate ResNet
var hiddenLayerCount: Int = 6 //12
var attentionHeadCount: Int = 6 //12
var hiddenSize = 64*attentionHeadCount // 64*12 = 768 // 32*6=192 // 64*6=384
let classCount = 5
var featureExtractor = ResNet(classCount: hiddenSize, depth: .resNet18, downsamplingInFirstStage: false, channelCount: 1)

In [7]:
// instantiate FeatureTransformerEncoder
var caseSensitive: Bool = false
var subDirectory: String = "uncased_L-12_H-768_A-12"
let directory = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let vocabularyURL = directory
    .appendingPathComponent(subDirectory)
    .appendingPathComponent("vocab.txt")

let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary,
    caseSensitive: caseSensitive, unknownToken: "[UNK]", maxTokenLength: nil)

var variant: BERT.Variant = .bert          
var intermediateSize: Int = hiddenSize*4 // 3072/768=4

var transformerEncoder = FeatureTransformerEncoder(
    variant: variant,
    vocabulary: vocabulary,
    tokenizer: tokenizer,
    caseSensitive: caseSensitive,
    hiddenSize: hiddenSize,
    hiddenLayerCount: hiddenLayerCount,
    attentionHeadCount: attentionHeadCount,
    intermediateSize: intermediateSize,
    intermediateActivation: gelu,
    hiddenDropoutProbability: 0.1,
    attentionDropoutProbability: 0.1,
    maxSequenceLength: 512,
    typeVocabularySize: 2,
    initializerStandardDeviation: 0.02,
    useOneHotEmbeddings: false)

In [8]:
// instantiate MotionClassifier
var motionClassifier = MotionClassifier(featureExtractor: featureExtractor, transformerEncoder: transformerEncoder, classCount: classCount, maxSequenceLength: maxSequenceLength)

# train

In [9]:
let optimizer = SGD(for: motionClassifier, learningRate: 1e-4)

In [10]:
let summaryWriter = SummaryWriter(logdir: URL(fileURLWithPath: "/notebooks/language2motion.gt/data/tboard/").appendingPathComponent(runName), flushMillis: 30*1000)

In [11]:
print("\nTraining MotionClassifier for the motion2Label task!")
var trainingStepCount = 0
time() {
    for (epoch, epochBatches) in dataset.trainingEpochs.prefix(5).enumerated() {
        print("[Epoch \(epoch + 1)]")
        Context.local.learningPhase = .training
        var trainingLossSum: Float = 0
        var trainingBatchCount = 0
        if epoch == 0 {
            print("epochBatches.count: \(epochBatches.count)")
        }

        for batch in epochBatches {
            let (documents, labels) = (batch.data, Tensor<Int32>(batch.label))
            // let (eagerDocuments, eagerLabels) = (batch.data, Tensor<Int32>(batch.label))
            // let documents = eagerDocuments.copyingTensorsToDevice(to: device)
            // let labels = Tensor(copying: eagerLabels, to: device)
            let (loss, gradients) = valueWithGradient(at: motionClassifier) { model -> Tensor<Float> in
                let logits = model(documents)
                return softmaxCrossEntropy(logits: logits, labels: labels)
            }

            trainingLossSum += loss.scalarized()
            trainingBatchCount += 1
            trainingStepCount += 1
            optimizer.update(&motionClassifier, along: gradients)
            // LazyTensorBarrier()
            summaryWriter.writeScalarSummary(tag: "TrainingLoss", step: trainingStepCount, value: trainingLossSum / Float(trainingBatchCount))
        }
        print(
            """
            Training loss: \(trainingLossSum / Float(trainingBatchCount))
            """
        )
        summaryWriter.writeScalarSummary(tag: "EpochTrainingLoss", step: epoch, value: trainingLossSum / Float(trainingBatchCount))

        if epoch == 0 {
            print("dataset.validationBatches.count: \(dataset.validationBatches.count)")
        }
        Context.local.learningPhase = .inference
        var devLossSum: Float = 0
        var devBatchCount = 0
        var correctGuessCount = 0
        var totalGuessCount = 0

        for batch in dataset.validationBatches {
            let valBatchSize = batch.data.motionFrames.shape[0]

            let (documents, labels) = (batch.data, Tensor<Int32>(batch.label))
            // let (eagerDocuments, eagerLabels) = (batch.data, Tensor<Int32>(batch.label))
            // let documents = eagerDocuments.copyingTensorsToDevice(to: device)
            // let labels = Tensor(copying: eagerLabels, to: device)

            let logits = motionClassifier(documents)
            let loss = softmaxCrossEntropy(logits: logits, labels: labels)
            // LazyTensorBarrier()
            devLossSum += loss.scalarized()
            devBatchCount += 1

            let correctPredictions = logits.argmax(squeezingAxis: 1) .== labels

            correctGuessCount += Int(Tensor<Int32>(correctPredictions).sum().scalarized())
            totalGuessCount += valBatchSize
        }
        
        let testAccuracy = Float(correctGuessCount) / Float(totalGuessCount)
        print(
            """
            Accuracy: \(correctGuessCount)/\(totalGuessCount) (\(testAccuracy)) \
            Eval loss: \(devLossSum / Float(devBatchCount))
            """
        )
        summaryWriter.writeScalarSummary(tag: "EpochTestLoss", step: epoch, value: devLossSum / Float(devBatchCount))
        summaryWriter.writeScalarSummary(tag: "EpochTestAccuracy", step: epoch, value: testAccuracy)

        let preds = motionClassifier.predict(motionSamples: dataset.testMotionSamples, labels: dataset.labels, batchSize: batchSize)
        let y_true = dataset.testMotionSamples.map { dataset.getLabel($0.sampleID)!.label }
        let y_pred = preds.map { $0.className }
        print(metrics.confusion_matrix(y_pred, y_true, labels: dataset.labels))
    }
}


Training MotionClassifier for the motion2Label task!
[Epoch 1]
epochBatches.count: 5
Training loss: 2.1325245
dataset.validationBatches.count: 2
Accuracy: 5/14 (0.35714287) Eval loss: 1.9059558
[[0 0 0 0 0]
 [5 5 1 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
[Epoch 2]
Training loss: 2.1881957
Accuracy: 5/14 (0.35714287) Eval loss: 1.5115287
[[0 0 0 0 0]
 [5 5 1 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
[Epoch 3]
Training loss: 1.8034923
Accuracy: 5/14 (0.35714287) Eval loss: 1.3328139
[[0 0 0 0 0]
 [5 5 1 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
[Epoch 4]
Training loss: 1.8023914
Accuracy: 5/14 (0.35714287) Eval loss: 1.3554273
[[0 0 0 0 0]
 [5 5 1 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
[Epoch 5]
Training loss: 1.7643535
Accuracy: 6/14 (0.42857143) Eval loss: 1.357915
[[0 0 0 0 0]
 [5 5 1 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
average: 78125.08847 ms,   min: 78125.08847 ms,   max: 78125.08847 ms


In [12]:
let preds = motionClassifier.predict(motionSamples: dataset.testMotionSamples, labels: dataset.labels, batchSize: batchSize)
let y_true = dataset.testMotionSamples.map { dataset.getLabel($0.sampleID)!.label }
let y_pred = preds.map { $0.className }
print(metrics.confusion_matrix(y_pred, y_true, labels: dataset.labels))
print(metrics.classification_report(y_true, y_pred, labels: dataset.labels, zero_division: false))

[[0 0 0 0 0]
 [5 5 1 0 3]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
                     precision    recall  f1-score   support

    Doing something       0.00      0.00      0.00         5
Walking and turning       0.36      1.00      0.53         5
  Walking backwards       0.00      0.00      0.00         1
  Walking few steps       0.00      0.00      0.00         0
 Walking or running       0.00      0.00      0.00         3

          micro avg       0.36      0.36      0.36        14
          macro avg       0.07      0.20      0.11        14
       weighted avg       0.13      0.36      0.19        14



# visualize training progress in tensorboard

# write text

doesn't work