In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter MotionLangModels TrainingLoop Checkpoints

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import ModelSupport
import Datasets
import SummaryWriter
import MotionLangModels
import TrainingLoop
import x10_optimizers_optimizer
import Checkpoints

In [None]:
import PythonKit

%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [None]:
let runName = "run_11"
let batchSize = 100
let maxMotionLength = 50
let maxTextSequenceLength = 40
let nEpochs = 10

var optimizerOpts = OptimizerOpts(
    peakLearningRate: 5e-4,
    beta1: 0.9,
    beta2: 0.999,
    useBiasCorrection: false,
    lrSlopeMultiplier: 2,
    nEpochs: nEpochs
)

// let datasetSize: DatasetSize = .multi_full
let datasetSize: DatasetSize = .multi_mini

print("runName: \(runName)")
print("batchSize: \(batchSize)")
print("maxMotionLength: \(maxMotionLength)")
print("maxTextSequenceLength: \(maxTextSequenceLength)")
print("nEpochs: \(nEpochs)")
print("peakLearningRate: \(optimizerOpts.peakLearningRate)")
print("datasetSize: \(datasetSize)")
print("stepsPerEpoch: \(optimizerOpts.stepsPerEpoch)")

let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.10Hz.\(datasetSize.rawValue)plist")

let logdirURL = dataURL.appendingPathComponent("runs/Motion2lang/", isDirectory: true)
let rundirURL = logdirURL.appendingPathComponent(runName, isDirectory: true)
let checkpointURL = rundirURL.appendingPathComponent("checkpoints", isDirectory: true)

In [None]:
// let device = Device.defaultXLA
let device = Device.defaultTFEager
print("backend: \(device)")

In [None]:
// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = LegacyTextProcessor(vocabulary: vocabulary, tokenizer: tokenizer)

In [None]:
print("\nLoading dataset...")

var dataset = try Motion2Lang(
    motionDatasetURL: motionDatasetURL,
    batchSize: batchSize,
    minMotionLength: 20,
    maxMotionLength: 100,
    trainTestSplit: 0.9,
    device: device
) { (motionSample: MotionSample) -> MotionLangBatch in    
    let singleBatch = textProcessor.preprocess(motionSample: motionSample, maxMotionLength: maxMotionLength, maxTextSequenceLength: maxTextSequenceLength)
    return singleBatch
}

print("Dataset acquired.")

In [None]:
// instantiate model
let modelSize = 128
let config = MotionLangTransformerConfig(
    vocabSize: vocabulary.count,
    nbJoints: 47,
    layerCount: 6,
    modelSize: modelSize,
    feedForwardSize: 512,
    headCount: 4,
    dropoutProbability: 0.1,
    sentenceMaxPositionalLength: 100,
    motionMaxPositionalLength: 500
)

In [None]:
func getModelStats(model: Any) -> (tensors: [String: Tensor<Float>], tt_sum: Double, tt_shape_sum: Int) {
    var tensors = [String: Tensor<Float>]()
    recursivelyObtainTensors(model, scope: "model", tensors: &tensors, separator: "/")
    
    var tt_sum = 0.0
    var tt_shape_sum = 0
    for (k, t) in tensors {
        let t_sum = Double(t.sum().scalar!)
        let t_shape_sum = t.shape.reduce(0, { x, y in x + y })
        tt_sum += t_sum
        tt_shape_sum += t_shape_sum
    }
    
    return (tensors: tensors, tt_sum: tt_sum, tt_shape_sum: tt_shape_sum)
}

In [None]:
func encoderForwardPass(_ sample_id: Int, model: MotionLangTransformer) -> Tensor<Float> {
    let motionSample = dataset.motionSampleDict[sample_id]!
    print("\nsample: \(motionSample.sampleID), \"\(motionSample.annotations[0])\", motion: \(motionSample.timesteps[-1]) sec (\(motionSample.motion.shape[0]) frames)")

    let singleBatch = textProcessor.preprocess(motionSample: motionSample, maxMotionLength: maxMotionLength, maxTextSequenceLength: maxTextSequenceLength)
    let encoded = model.encode(input: singleBatch.source)
    return encoded.lastLayerOutput
}

In [None]:
func decoderForwardPass(_ sample_id: Int, model: MotionLangTransformer) -> Tensor<Float> {
    let motionSample = dataset.motionSampleDict[sample_id]!
    print("\nsample: \(motionSample.sampleID), \"\(motionSample.annotations[0])\", motion: \(motionSample.timesteps[-1]) sec (\(motionSample.motion.shape[0]) frames)")

    let singleBatch = textProcessor.preprocess(motionSample: motionSample, maxMotionLength: maxMotionLength, maxTextSequenceLength: maxTextSequenceLength)
    let encoded = model.encode(input: singleBatch.source)
    let decoded = model.decode(input: singleBatch.source, memory: encoded.lastLayerOutput).lastLayerOutput
    return decoded
}

## test new model against saved

In [None]:
/// create new model
var newModel = MotionLangTransformer(config: config)

In [None]:
let newModelStats = getModelStats(model: newModel)
print("tensor sum \(newModelStats.tt_sum)")
print("shape sum \(newModelStats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: newModel).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: newModel).sum()

In [None]:
try! newModel.writeCheckpoint(to: checkpointURL, name: "newModel.saved")

var loadedModel = try! MotionLangTransformer(checkpoint: logdirURL.appendingPathComponent("run_11/checkpoints"), config: config, name: "newModel.saved")

In [None]:
let loadedModelStats = getModelStats(model: loadedModel)
print("tensor sum \(loadedModelStats.tt_sum)")
print("shape sum \(loadedModelStats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: loadedModel).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: loadedModel).sum()

In [None]:
try! loadedModel.writeCheckpoint(to: checkpointURL, name: "loadedModel.saved")

## test trained model against resaved

In [None]:
var model50 = try! MotionLangTransformer(checkpoint: logdirURL.appendingPathComponent("run_11/checkpoints"), config: config, name: "model.e50")

In [None]:
let model50Stats = getModelStats(model: model50)
print("tensor sum \(model50Stats.tt_sum)")
print("shape sum \(model50Stats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: model50).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: model50).sum()

In [None]:
try! model50.writeCheckpoint(to: checkpointURL, name: "model.e50.re-saved")

In [None]:
var modelResaved = try! MotionLangTransformer(checkpoint: logdirURL.appendingPathComponent("run_11/checkpoints"), config: config, name: "model.e50.re-saved")

In [None]:
let modelResavedStats = getModelStats(model: modelResaved)
print("tensor sum \(modelResavedStats.tt_sum)")
print("shape sum \(modelResavedStats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: modelResaved).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: modelResaved).sum()