# Transformer-motion2label2

In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt/code")' Datasets MotionModels ImageClassificationModels TextModels ModelSupport

Installing packages:
	.package(path: "/notebooks/language2motion.gt/code")
		Datasets
		MotionModels
		ImageClassificationModels
		TextModels
		ModelSupport
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmp3a0xdjz1/swift-install
[1/2] Compiling MotionModels MotionClassifier.swift
[2/3] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
[3/3] Linking libjupyterInstalledPackages.so
Initializing Swift...
Installation complete!


In [2]:
import Foundation
import TensorFlow
import Datasets
import MotionModels
import ImageClassificationModels
import TextModels
import ModelSupport

In [3]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


# load dataset

In [4]:
let batchSize = 25
let maxSequenceLength =  300 //600

print("batchSize: \(batchSize)")
print("maxSequenceLength: \(maxSequenceLength)")

// let serializedDatasetURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/motion_dataset_v2.normalized.plist")
let serializedDatasetURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/motion_dataset.motion_flag.normalized.100.plist")
let labelsURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/labels_ds_v2.csv")

print("\nLoading dataset...")
let dataset = try! Motion2Label2(
    serializedDatasetURL: serializedDatasetURL,
    labelsURL: labelsURL,
    maxSequenceLength: maxSequenceLength,
    batchSize: batchSize
) { 
    // TODO: move this to dataset class
    (example: Motion2LabelExample) -> LabeledMotionBatch in
    let motionFrames = Tensor<Float>(example.motionSample.motionFramesArray)
    let motionFlag = Tensor<Int32>(motionFrames[0..., 44...44].squeezingShape(at: 1))
    let origMotionFramesCount = Tensor<Int32>(Int32(motionFrames.shape[0]))
    let motionBatch = MotionBatch(motionFrames: motionFrames, motionFlag: motionFlag, origMotionFramesCount: origMotionFramesCount)
    let label = Tensor<Int32>(Int32(example.label!.idx))
    return LabeledMotionBatch(data: motionBatch, label: label)
}

print("dataset.trainingExamples.count: \(dataset.trainingExamples.count)")
print("dataset.validationExamples.count: \(dataset.validationExamples.count)")

batchSize: 25
maxSequenceLength: 300

Loading dataset...
MotionData(motionSamples: 99)
dataset.trainingExamples.count: 79
dataset.validationExamples.count: 20


In [5]:
// instantiate ResNet
var hiddenLayerCount: Int = 6 //12
var attentionHeadCount: Int = 6 //12
var hiddenSize = 32*attentionHeadCount // 64*12 = 768 // 32*6=192
let classCount = 5
var featureExtractor = ResNet(classCount: hiddenSize, depth: .resNet18, downsamplingInFirstStage: false, channelCount: 1)

In [6]:
// instantiate FeatureTransformerEncoder
var caseSensitive: Bool = false
var subDirectory: String = "uncased_L-12_H-768_A-12"
let directory = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let vocabularyURL = directory
    .appendingPathComponent(subDirectory)
    .appendingPathComponent("vocab.txt")

let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary,
    caseSensitive: caseSensitive, unknownToken: "[UNK]", maxTokenLength: nil)

var variant: BERT.Variant = .bert          
var intermediateSize: Int = hiddenSize*4 // 3072/768=4

var transformerEncoder = FeatureTransformerEncoder(
    variant: variant,
    vocabulary: vocabulary,
    tokenizer: tokenizer,
    caseSensitive: caseSensitive,
    hiddenSize: hiddenSize,
    hiddenLayerCount: hiddenLayerCount,
    attentionHeadCount: attentionHeadCount,
    intermediateSize: intermediateSize,
    intermediateActivation: gelu,
    hiddenDropoutProbability: 0.1,
    attentionDropoutProbability: 0.1,
    maxSequenceLength: 512,
    typeVocabularySize: 2,
    initializerStandardDeviation: 0.02,
    useOneHotEmbeddings: false)

In [7]:
// instantiate MotionClassifier
var motionClassifier = MotionClassifier(featureExtractor: featureExtractor, transformerEncoder: transformerEncoder, classCount: classCount, maxSequenceLength: maxSequenceLength)

# train

In [8]:
let optimizer = SGD(for: motionClassifier, learningRate: 1e-5)

In [9]:
print("\nTraining MotionClassifier for the motion2Label task!")
time() {
    for (epoch, epochBatches) in dataset.trainingEpochs.prefix(5).enumerated() {
        print("[Epoch \(epoch + 1)]")
        Context.local.learningPhase = .training
        var trainingLossSum: Float = 0
        var trainingBatchCount = 0
        print("epochBatches.count: \(epochBatches.count)")

        for batch in epochBatches {
            print("batch")
            let (documents, labels) = (batch.data, Tensor<Int32>(batch.label))
            // let (eagerDocuments, eagerLabels) = (batch.data, Tensor<Int32>(batch.label))
            // let documents = eagerDocuments.copyingTensorsToDevice(to: device)
            // let labels = Tensor(copying: eagerLabels, to: device)
            let (loss, gradients) = valueWithGradient(at: motionClassifier) { model -> Tensor<Float> in
                let logits = model(documents)
                return softmaxCrossEntropy(logits: logits, labels: labels)
            }

            trainingLossSum += loss.scalarized()
            trainingBatchCount += 1
            optimizer.update(&motionClassifier, along: gradients)
            // LazyTensorBarrier()

        }
        print(
            """
            Training loss: \(trainingLossSum / Float(trainingBatchCount))
            """
        )

        print("dataset.validationBatches.count: \(dataset.validationBatches.count)")
        Context.local.learningPhase = .inference
        var devLossSum: Float = 0
        var devBatchCount = 0
        var correctGuessCount = 0
        var totalGuessCount = 0

        for batch in dataset.validationBatches {
            let valBatchSize = batch.data.motionFrames.shape[0]

            let (documents, labels) = (batch.data, Tensor<Int32>(batch.label))
            // let (eagerDocuments, eagerLabels) = (batch.data, Tensor<Int32>(batch.label))
            // let documents = eagerDocuments.copyingTensorsToDevice(to: device)
            // let labels = Tensor(copying: eagerLabels, to: device)

            let logits = motionClassifier(documents)
            let loss = softmaxCrossEntropy(logits: logits, labels: labels)
            // LazyTensorBarrier()
            devLossSum += loss.scalarized()
            devBatchCount += 1

            let correctPredictions = logits.argmax(squeezingAxis: 1) .== labels

            correctGuessCount += Int(Tensor<Int32>(correctPredictions).sum().scalarized())
            totalGuessCount += valBatchSize
        }
        
        let accuracy = Float(correctGuessCount) / Float(totalGuessCount)
        print(
            """
            Accuracy: \(correctGuessCount)/\(totalGuessCount) (\(accuracy)) \
            Eval loss: \(devLossSum / Float(devBatchCount))
            """
        )
    }
}


Training MotionClassifier for the motion2Label task!
[Epoch 1]
epochBatches.count: 3
batch
batch
batch
Training loss: 2.1620147
dataset.validationBatches.count: 1
Accuracy: 5/20 (0.25) Eval loss: 1.9549204
[Epoch 2]
epochBatches.count: 3
batch
batch
batch
Training loss: 2.4059618
dataset.validationBatches.count: 1
Accuracy: 5/20 (0.25) Eval loss: 1.7977912
[Epoch 3]
epochBatches.count: 3
batch
batch
batch
Training loss: 2.3656132
dataset.validationBatches.count: 1
Accuracy: 5/20 (0.25) Eval loss: 1.7993981
[Epoch 4]
epochBatches.count: 3
batch
batch
batch
Training loss: 2.2538278
dataset.validationBatches.count: 1
Accuracy: 5/20 (0.25) Eval loss: 1.7886127
[Epoch 5]
epochBatches.count: 3
batch
batch
batch
Training loss: 2.283948
dataset.validationBatches.count: 1
Accuracy: 5/20 (0.25) Eval loss: 1.7627157
average: 62885.5428 ms,   min: 62885.5428 ms,   max: 62885.5428 ms
