In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter MotionLangModels TrainingLoop Checkpoints

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import ModelSupport
import Datasets
import SummaryWriter
import MotionLangModels
import TrainingLoop
import x10_optimizers_optimizer
import Checkpoints

In [None]:
import PythonKit

%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [None]:
let runName = "run_18"
let batchSize = 10
let maxMotionLength = 100
let maxTextSequenceLength = 40
let nEpochs = 1

var optimizerOpts = OptimizerOpts(
    peakLearningRate: 1e-3,
    beta1: 0.9,
    beta2: 0.999,
    useBiasCorrection: false,
    lrSlopeMultiplier: 2,
    nEpochs: nEpochs
)

// let datasetSize: DatasetSize = .multi_full
let datasetSize: DatasetSize = .micro

print("runName: \(runName)")
print("batchSize: \(batchSize)")
print("maxMotionLength: \(maxMotionLength)")
print("maxTextSequenceLength: \(maxTextSequenceLength)")
print("nEpochs: \(nEpochs)")
print("peakLearningRate: \(optimizerOpts.peakLearningRate)")
print("datasetSize: \(datasetSize)")
print("stepsPerEpoch: \(optimizerOpts.stepsPerEpoch)")

let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.10Hz.\(datasetSize.rawValue)plist")

let logdirURL = dataURL.appendingPathComponent("runs/Motion2lang/", isDirectory: true)
let rundirURL = logdirURL.appendingPathComponent(runName, isDirectory: true)
let checkpointURL = rundirURL.appendingPathComponent("checkpoints", isDirectory: true)

In [None]:
// let device = Device.defaultXLA
let device = Device.defaultTFEager
print("backend: \(device)")

In [None]:
// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = LegacyTextProcessor(vocabulary: vocabulary, tokenizer: tokenizer)

In [None]:
print("\nLoading dataset...")

var dataset = try Motion2Lang(
    motionDatasetURL: motionDatasetURL,
    batchSize: batchSize,
    minMotionLength: 20,
    maxMotionLength: 100,
    trainTestSplit: 0.9,
    device: device
) { (motionSample: MotionSample) -> MotionLangBatch in    
    let singleBatch = textProcessor.preprocess(motionSample: motionSample, maxMotionLength: maxMotionLength, maxTextSequenceLength: maxTextSequenceLength)
    return singleBatch
}

print("Dataset acquired.")

In [None]:
// instantiate model
let modelSize = 64
let config = MotionLangTransformerConfig(
    vocabSize: vocabulary.count,
    nbJoints: 47,
    layerCount: 2,
    modelSize: modelSize,
    feedForwardSize: 128,
    headCount: 2,
    dropoutProbability: 0.1,
    sentenceMaxPositionalLength: 100,
    motionMaxPositionalLength: 500
)

## testing helpers

In [None]:
func getModelStats(model: Any) -> (tensors: [String: Tensor<Float>], tt_sum: Double, tt_shape_sum: Int) {
    var tensors = [String: Tensor<Float>]()
    recursivelyObtainTensors(model, scope: "model", tensors: &tensors, separator: "/")
    
    var tt_sum = 0.0
    var tt_shape_sum = 0
    for (k, t) in tensors {
        let t_sum = Double(t.sum().scalar!)
        let t_shape_sum = t.shape.reduce(0, { x, y in x + y })
        tt_sum += t_sum
        tt_shape_sum += t_shape_sum
    }
    
    return (tensors: tensors, tt_sum: tt_sum, tt_shape_sum: tt_shape_sum)
}

In [None]:
func encoderForwardPass(_ sample_id: Int, model: MotionLangTransformer) -> Tensor<Float> {
    let motionSample = dataset.motionSampleDict[sample_id]!
    print("\nsample: \(motionSample.sampleID), \"\(motionSample.annotations[0])\", motion: \(motionSample.timesteps[-1]) sec (\(motionSample.motion.shape[0]) frames)")

    let singleBatch = textProcessor.preprocess(motionSample: motionSample, maxMotionLength: maxMotionLength, maxTextSequenceLength: maxTextSequenceLength)
    let encoded = model.encode(input: singleBatch.source)
    return encoded.lastLayerOutput
}

In [None]:
func decoderForwardPass(_ sample_id: Int, model: MotionLangTransformer) -> Tensor<Float> {
    let motionSample = dataset.motionSampleDict[sample_id]!
    print("\nsample: \(motionSample.sampleID), \"\(motionSample.annotations[0])\", motion: \(motionSample.timesteps[-1]) sec (\(motionSample.motion.shape[0]) frames)")

    let singleBatch = textProcessor.preprocess(motionSample: motionSample, maxMotionLength: maxMotionLength, maxTextSequenceLength: maxTextSequenceLength)
    let encoded = model.encode(input: singleBatch.source)
    let decoded = model.decode(input: singleBatch.source, memory: encoded.lastLayerOutput).lastLayerOutput
    return decoded
}

## train new model

In [None]:
var start_epoch = 0

In [None]:
/// create new model
var newModel = MotionLangTransformer(config: config)

In [None]:
optimizerOpts.stepsPerEpoch = dataset.motionSamples.count/batchSize // function of training set size and batching configuration
let optimizerWrapper = OptimizerWrapper(opts: optimizerOpts, model: newModel)

In [None]:
/// stats recorder
let statsRecorder = StatsRecorder(logdirURL: rundirURL)

@differentiable(wrt: y_pred)
func embeddedSoftmaxCrossEntropy(y_pred: Tensor<Float>, y_true: MotionLangBatch.MLTarget) -> Tensor<Float> {
    let resultSize = y_true.targetTruth.shape.last! * y_true.targetTruth.shape.first!
    let logits = y_pred.reshaped(to: [resultSize, -1])
    let labels = y_true.targetTruth.reshaped(to: [-1])
    // TODO: ignore padded entries
    return softmaxCrossEntropy(logits: logits, labels: labels)
}

In [None]:
// TODO: fix epoch numbering
public func saveCheckpoint<L: TrainingLoopProtocol>(_ loop: inout L, event: TrainingLoopEvent, model: MotionLangTransformer) throws {
    if event == .epochEnd {
        guard let epochIndex = loop.epochIndex else {
            return
        }
        try! model.writeCheckpoint(to: checkpointURL, name: "model.e\(epochIndex+1).in_fit.n")
    }
}

In [None]:
// Training loop
print("\nSetting up the training loop")
let trainingProgress = TrainingProgress(metrics: [.loss])
var trainingLoop: TrainingLoop = TrainingLoop(
    training: dataset.trainEpochs,
    validation: dataset.testBatches,
    optimizer: optimizerWrapper.optimizer,
    lossFunction:  embeddedSoftmaxCrossEntropy,
    callbacks: [trainingProgress.update, statsRecorder.writeStats, optimizerWrapper.learningRateUpdater, saveCheckpoint]
)

print("\nTraining Transformer for the Motion2lang task!")
// FIXME: epoch loop workaround for checkpoint saving
for epochIndex in start_epoch..<start_epoch+nEpochs {
    print("epoch \(epochIndex+1)/\(start_epoch + nEpochs)")
    statsRecorder.epochIndex = epochIndex
    try! trainingLoop.fit(&newModel, epochs: 1, on: device)
    try! newModel.writeCheckpoint(to: checkpointURL, name: "model.e\(epochIndex+1).out_of_fit.n")
}

In [None]:
let newModelStats = getModelStats(model: newModel)
print("tensor sum \(newModelStats.tt_sum)")
print("shape sum \(newModelStats.tt_shape_sum)")

## test in_fit saved model against out_of_fit saved

In [None]:
var inModel = try! MotionLangTransformer(checkpoint: checkpointURL, config: config, name: "model.e1.in_fit.n")

In [None]:
let inModelStats = getModelStats(model: inModel)
print("tensor sum \(inModelStats.tt_sum)")
print("shape sum \(inModelStats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: inModel).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: inModel).sum()

In [None]:
var outModel = try! MotionLangTransformer(checkpoint: checkpointURL, config: config, name: "model.e1.out_of_fit.n")

In [None]:
let outModelStats = getModelStats(model: outModel)
print("tensor sum \(outModelStats.tt_sum)")
print("shape sum \(outModelStats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: outModel).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: outModel).sum()

## compare per tensor values

In [None]:
for (nk, nt) in inModelStats.tensors {
    let lt = outModelStats.tensors[nk]
    let nt_sum = Double(nt.sum().scalar!)
    let lt_sum = Double(lt!.sum().scalar!)
    if nt_sum != lt_sum {
        print(nk)
    }
}


In [None]:
for (nk, nt) in inModelStats.tensors {
    let lt = outModelStats.tensors[nk]
    let nt_sum = Double(nt.sum().scalar!)
    let lt_sum = Double(lt!.sum().scalar!)
    if nt_sum != lt_sum {
        print(nk)
    }
}


In [None]:
Array(newModelStats.tensors.keys).sorted()

In [None]:
extension MotionLangTransformer: ExportableLayer {
    public var nameMappings: [String: String] {
        [
            "embedding": "embedding",
            "motionNorm": "motionNorm",
            "motionDense": "motionDense",
            "generator": "generator"
        ]
    }
}

In [None]:
public func recursivelyObtainTensors2(
    _ obj: Any, scope: String? = nil, tensors: inout [String: Tensor<Float>], separator: String
) {
    
    let m = Mirror(reflecting: obj)
    let nameMappings: [String: String]
    if let exportableLayer = obj as? ExportableLayer {
        if let model = obj as? MotionLangTransformer {
            nameMappings = [
                "embedding": "embedding",
                "motionNorm": "motionNorm",
                "motionDense": "motionDense",
                "generator": "generator"
            ]
        }
        else {
            nameMappings = exportableLayer.nameMappings
        }
        print(type(of:exportableLayer), nameMappings)
    } else {
        if (obj is Int) || (obj is Bool) || (obj is Tensor<Float>) ||
           (obj is Double) || (obj is Float) || (obj is Dropout<Float>) ||
           (obj is Parameter<Float>) || (obj is Sequential<Embedding<Float>, PositionalEncoding>)
        {}
        else {
            let s = "\(scope!) -> \(type(of:obj))"
            if !s.contains("Tensor") {
                // print(s)
            }
        }
        nameMappings = [:]
    }

    var repeatedLabels: [String: Int] = [:]
    func suffix(for label: String) -> String {
        if let currentSuffix = repeatedLabels[label] {
            repeatedLabels[label] = currentSuffix + 1
            return "\(currentSuffix + 1)"
        } else {
            repeatedLabels[label] = 0
            return "0"
        }
    }

    let hasSuffix = (m.children.first?.label == nil)

    var path = scope
    for child in m.children {
        let label = child.label ?? "h"

        if let remappedLabel = nameMappings[label] {
            let labelSuffix = hasSuffix ? suffix(for: remappedLabel) : ""
            let conditionalSeparator = remappedLabel == "" ? "" : separator

            path = (scope != nil ? scope! + conditionalSeparator : "") + remappedLabel + labelSuffix
            if let tensor = child.value as? Tensor<Float> {
                tensors[path!] = tensor
            }
        }
        recursivelyObtainTensors2(child.value, scope: path, tensors: &tensors, separator: separator)
    }
}

In [None]:
var tensors = [String: Tensor<Float>]()
recursivelyObtainTensors2(newModel, scope: "model", tensors: &tensors, separator: "/")

In [None]:
Array(tensors.keys).sorted()

## test trained model against resaved

In [None]:
var model50 = try! MotionLangTransformer(checkpoint: logdirURL.appendingPathComponent("run_11/checkpoints"), config: config, name: "model.e50")

In [None]:
let model50Stats = getModelStats(model: model50)
print("tensor sum \(model50Stats.tt_sum)")
print("shape sum \(model50Stats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: model50).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: model50).sum()

In [None]:
try! model50.writeCheckpoint(to: checkpointURL, name: "model.e50.re-saved")

In [None]:
var modelResaved = try! MotionLangTransformer(checkpoint: logdirURL.appendingPathComponent("run_11/checkpoints"), config: config, name: "model.e50.re-saved")

In [None]:
let modelResavedStats = getModelStats(model: modelResaved)
print("tensor sum \(modelResavedStats.tt_sum)")
print("shape sum \(modelResavedStats.tt_shape_sum)")

In [None]:
encoderForwardPass(dataset.motionSamples[0].sampleID, model: modelResaved).sum()

In [None]:
decoderForwardPass(dataset.motionSamples[0].sampleID, model: modelResaved).sum()