# Forward pass analysis

## visualize masks

In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter LangMotionModels Checkpoints

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import FoundationXML
import ModelSupport
import Datasets
import SummaryWriter
import LangMotionModels
import Checkpoints
import PythonKit

In [None]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [None]:
let plt = Python.import("matplotlib.pyplot")
let np = Python.import("numpy")

## Set training params

In [None]:
let device = Device.defaultTFEager

In [None]:
let maxTextSequenceLength =  20
let maxMotionLength =  100

In [None]:
let datasetSize: DatasetSize = .full
let batchSize = 2

In [None]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.10Hz.\(datasetSize.rawValue)plist")

In [None]:
/// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = TextProcessor(vocabulary: vocabulary, tokenizer: tokenizer)

/// instantiate model
let config = LangMotionTransformerConfig(
    vocabSize: vocabulary.count,
    nbJoints: 47, // TODO: get value from dataset
    nbMixtures: 20,
    layerCount: 6,
    modelSize: 256,
    feedForwardSize: 1024,
    headCount: 8,
    dropoutProbability:  0.1,
    sentenceMaxPositionalLength: 100,
    motionMaxPositionalLength: 500,
    doMotionDense: false    
)

let runName = "run_38"
let epoch = 26

let runURL = dataURL.appendingPathComponent("runs/Lang2motion/\(runName)", isDirectory: true)
let checkpointURL = runURL.appendingPathComponent("checkpoints", isDirectory: true)
let motionsURL = runURL.appendingPathComponent("generated_motions", isDirectory: true)
try! FileManager().createDirectory(at: motionsURL, withIntermediateDirectories: true)

let model = LangMotionTransformer(checkpoint: checkpointURL, config: config, name: "model.e\(epoch)")

In [None]:
/// load dataset
print("\nLoading dataset...")

var dataset = try Lang2Motion(
    motionDatasetURL: motionDatasetURL,
    batchSize: batchSize,
    trainTestSplit: 1.0,
    demultiplyMotions: false,
    device: device
) { (motionSample: MotionSample) -> LangMotionBatch in    
    let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
    let (motionPart, target) = LangMotionBatch.preprocessTargetMotion(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
    let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
    let singleBatch = LangMotionBatch(source: source, target: target)
    return singleBatch
}

print("Dataset acquired.")

# Forward pass

In [None]:
// Loss function
let args = LossArgs(
        nb_joints: config.nbJoints,
        nb_mixtures: config.nbMixtures,
        mixture_regularizer_type: "None",  // ["cv", "l2", "None"]
        mixture_regularizer: 0.0,
        device: device
)

@differentiable(wrt: y_pred)
public func normalMixtureSurrogateLoss2(y_pred: MixtureModelPreds, y_true: LangMotionBatch.Target, args: LossArgs) -> (Tensor<Float>, Tensor<Float>) {
    // masking
    var y_pred = y_pred.squeezed()
    var y_true = y_true.squeezed()
    let ids = Tensor<Int32>(rangeFrom: 0, to: Int32(y_true.stops.shape[1]), stride: 1, on: args.device)
    let indices = ids.gathering(where: y_true.stops .!= Tensor(1, on: args.device))
    y_pred = y_pred.gathering(atIndices: indices, alongAxis: 1)
    y_true = y_true.gathering(atIndices: indices, alongAxis: 1)
    
    let loss = _normalMixtureSurrogateLoss(y_true: y_true, y_pred: y_pred, args: args)
    let mean_loss = loss.mean()
    return (mean_loss, loss)
}

## TODO

In [None]:
// TODO: visualize data:
// - mask(s)
// - signals that go through the decoder
// + check if loss changes when text changes but motion doesn't - changes
// + do we still have one step with big loss? yes, but only first one
// TODO: what would be loss of generated sequence?
// + is learning to stop working? no

## Single batch

In [None]:
let motionSample = dataset.motionSamples[0]
print("sampleID: \(motionSample.sampleID)")
print(motionSample.description)
print(motionSample.annotations)

In [None]:
let s = "A person plays the guitar, dances and kicks, then kneels down."

In [None]:
// let sentence = textProcessor.preprocess(sentence: s, maxTextSequenceLength: maxTextSequenceLength)
let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
// let (motionPart, target) = LangMotionBatch.preprocessTargetMotion2(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
let (motionPart, target) = LangMotionBatch.preprocessTargetMotion(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)


let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
let singleBatch = LangMotionBatch(source: source, target: target)

## transformerOutput, preds

In [None]:
let transformerOutput = model(singleBatch.source)
let singlePreds = transformerOutput.preds

In [None]:
singlePreds.printPreds()

In [None]:
func tensorShow(_ tensor: Tensor<Float>, cmapRange: Int = 6) {
    plt.figure(figsize: [5, 5])
    plt.imshow(tensor.makeNumpyArray(), aspect: "auto", cmap: "Spectral", vmin: -cmapRange, vmax: cmapRange)
    plt.show()
}

## loss

In [None]:
let (avg_loss, loss) = normalMixtureSurrogateLoss2(y_pred: singlePreds, y_true: singleBatch.target, args: args)

## batched computation

## second sample, with old 1-dim attention mask

In [None]:
let motionSample2 = dataset.motionSamples[0]
print("sampleID: \(motionSample2.sampleID)")
print(motionSample2.description)
print(motionSample2.annotations)

In [None]:
let sentence2 = textProcessor.preprocess(sentence: motionSample2.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
let (motionPart2, target2) = LangMotionBatch.preprocessTargetMotion2(sampleID: motionSample2.sampleID, motion: motionSample2.motion, maxMotionLength: maxMotionLength)
let source2 = LangMotionBatch.Source(sentence: sentence2, motionPart: motionPart2, sourceAttentionMask: sentence2.mask)
let singleBatch2 = LangMotionBatch(source: source2, target: target2)

In [None]:
singleBatch.source.sourceAttentionMask.shape

In [None]:
singleBatch2.source.sourceAttentionMask.shape

In [None]:
let transformerOutput2 = model(singleBatch2.source)
let singlePreds2 = transformerOutput2.preds

In [None]:
let (avg_loss2, loss2) = normalMixtureSurrogateLoss2(y_pred: singlePreds2, y_true: singleBatch2.target, args: args)

In [None]:
print(avg_loss2)
loss2

In [None]:
(transformerOutput2.decoded.lastLayerOutput.mean(),
transformerOutput2.decoded.lastLayerOutput.max(),
transformerOutput2.decoded.lastLayerOutput.min())

In [None]:
(transformerOutput.decoded.lastLayerOutput.mean(),
transformerOutput.decoded.lastLayerOutput.max(),
transformerOutput.decoded.lastLayerOutput.min())

In [None]:
transformerOutput2.decoded.lastLayerOutput.shape

In [None]:
transformerOutput2.decoded.lastLayerOutput - transformerOutput.decoded.lastLayerOutput

In [None]:
tensorShow((transformerOutput2.decoded.lastLayerOutput).squeezingShape(at: 0))

In [None]:
tensorShow((transformerOutput.decoded.lastLayerOutput).squeezingShape(at: 0))

In [None]:
tensorShow((transformerOutput2.decoded.lastLayerOutput - transformerOutput.decoded.lastLayerOutput).squeezingShape(at: 0))