# Transformer-motion2label

In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt/code")' Batcher ModelSupport Datasets ImageClassificationModels TextModels

Installing packages:
	.package(path: "/notebooks/language2motion.gt/code")
		Batcher
		ModelSupport
		Datasets
		ImageClassificationModels
		TextModels
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmpv4z76f9p/swift-install
[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [2]:
// + load Motion2Label dataset
// + create sliding resnet feature extractor
// TODO: create tiny transformer encoder
// TODO: copy relevant sources here
// TODO: feed sliced features to transformer
// TODO: make it train

In [3]:
import Foundation
import TensorFlow
import PythonKit

import Batcher
import ModelSupport
import Datasets
import ImageClassificationModels
import TextModels

In [4]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


# load dataset

In [5]:
let batchSize = 2
let tensorWidth = 60

let serializedDatasetURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/motion_dataset.normalized.500.plist")
let labelsURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/labels_ds_v2.csv")

let dataset = Motion2Label(
    batchSize: batchSize, 
    serializedDatasetURL: serializedDatasetURL,
    labelsURL: labelsURL,
    tensorWidth: tensorWidth
)
print("dataset.training.count: \(dataset.training.count)")
print("dataset.test.count: \(dataset.test.count)")

MotionData(motionSamples: 494)
trainTensorPairs.count = 395
testTensorPairs.count = 99
dataset.training.count: 198
dataset.test.count: 50


# 1-channel ResNet model

In [6]:
let nOutputs = 128

In [7]:
var resnet = ResNet(classCount: nOutputs, depth: .resNet18, downsamplingInFirstStage: false, channelCount: 1)
// let optimizer = SGD(for: resnet, learningRate: 0.001)

# Sliding ResNet feature extractor

In [8]:
var batchIterator = dataset.training.sequenced()

In [9]:
let batch = batchIterator.next()

In [10]:
let batchTensor = batch!.first
batchTensor.shape

▿ [2, 60, 44, 1]
  ▿ dimensions : 4 elements
    - 0 : 2
    - 1 : 60
    - 2 : 44
    - 3 : 1


In [11]:
let stride = 10
let tWidth = stride*2

In [12]:
func extractMotionFeatures(_ batchTensor: Tensor<Float>, resnet: ResNet) -> Tensor<Float> {
    // sliding resnet feature extractor
    var t2: [Tensor<Float>] = []
    let origBatchSize = batchTensor.shape[0]
    let nElements = (tensorWidth/stride)-1
    for i in 0..<nElements {
        let start = i*stride
        let end = i*stride+tWidth
        // print(start, end)
        let t1 = batchTensor[0..., start..<end]
        // print(t1.shape)
        t2.append(t1)
    }
    let t3 = Tensor(concatenating: t2)
    // print(t3.shape)
    let emb2 = resnet(t3)
    let outShape: Array<Int> = [origBatchSize, nElements, resnet.classifier.weight.shape[1]]
    // print(outShape)
    let emb3 = emb2.reshaped(to: TensorShape(outShape))
    return emb3
}
time() {
    print(batchTensor.shape)
    let emb3 = extractMotionFeatures(batchTensor, resnet: resnet)
    print(emb3.shape)
}

[2, 60, 44, 1]
[2, 5, 128]
average: 391.6182 ms,   min: 391.6182 ms,   max: 391.6182 ms


# Tiny BERT/Transformer

In [13]:
let bertPretrained = BERT.PreTrainedModel.bertBase(cased: false, multilingual: false)
let workspaceURL = URL(
    fileURLWithPath: "bert_models", isDirectory: true,
    relativeTo: URL(
        fileURLWithPath: NSTemporaryDirectory(),
        isDirectory: true))
let bert = try BERT.PreTrainedModel.load(bertPretrained)(from: workspaceURL)
var bertClassifier = BERTClassifier(bert: bert, classCount: 5)

Loading BERT pre-trained model 'BERT Base Uncased'.
Loading resource: uncased_L-12_H-768_A-12


# train