# Forward pass analysis

In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter LangMotionModels Checkpoints

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import FoundationXML
import ModelSupport
import Datasets
import SummaryWriter
import LangMotionModels
import Checkpoints
import PythonKit

In [None]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [None]:
let plt = Python.import("matplotlib.pyplot")
let np = Python.import("numpy")

## Set training params

In [None]:
let device = Device.defaultTFEager

In [None]:
let maxTextSequenceLength =  40
let maxMotionLength =  150

In [None]:
let datasetSize: DatasetSize = .midi
let batchSize = 2

In [None]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.10Hz.\(datasetSize.rawValue)plist")

In [None]:
/// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = TextProcessor(vocabulary: vocabulary, tokenizer: tokenizer)

In [None]:
/// load dataset
print("\nLoading dataset...")

var dataset = try Lang2Motion(
    motionDatasetURL: motionDatasetURL,
    batchSize: batchSize,
    minMotionLength: 20,
    maxMotionLength: 150,
    trainTestSplit: 1.0,
    device: device
) { (motionSample: MotionSample) -> LangMotionBatch in    
    let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
    let (motionPart, target) = LangMotionBatch.preprocessTargetMotion(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
    let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
    let singleBatch = LangMotionBatch(source: source, target: target)
    return singleBatch
}

print("Dataset acquired.")

In [None]:
/// instantiate model
let config = LangMotionTransformerConfig(
    vocabSize: vocabulary.count,
    nbJoints: 47,
    nbMixtures: 20,
    layerCount: 6,
    encoderDepth: 256,
    decoderDepth: 512,
    feedForwardSize: 2048,
    headCount: 16,
    dropoutProbability:  0.1,
    sentenceMaxPositionalLength: 100,
    motionMaxPositionalLength: 500,
    encoderSelfAttentionTemp: 1,
    decoderSourceAttentionTemp: 1,
    decoderSelfAttentionTemp: 1
)

let runName = "run_74"
let epoch = 2

let runURL = dataURL.appendingPathComponent("runs/Lang2motion/\(runName)", isDirectory: true)
let checkpointURL = runURL.appendingPathComponent("checkpoints", isDirectory: true)
let motionsURL = runURL.appendingPathComponent("generated_motions", isDirectory: true)
try! FileManager().createDirectory(at: motionsURL, withIntermediateDirectories: true)

let model = LangMotionTransformer(checkpoint: checkpointURL, config: config, name: "model.e\(epoch)")

# Forward pass

In [None]:
var log_mixture_pdf2: Tensor<Float> = Tensor<Float>([1.0])
var log_bernoulli_pdf2: Tensor<Float> = Tensor<Float>([1.0])
var stops2: Tensor<Float> = Tensor<Float>([1.0])
// @differentiable(wrt: y_pred)
public func _normalMixtureSurrogateLoss2(y_true: LangMotionBatch.Target, y_pred: MixtureModelPreds, args: LossArgs) -> Tensor<Float> {
    let TINY: Float = 1e-8
    let pi: Float = 3.1415
    let nb_mixtures = args.nb_mixtures
    let nb_joints = args.nb_joints

    let all_means = y_pred.mixtureMeans
    let all_variances = y_pred.mixtureVars + TINY
    let weights = y_pred.mixtureWeights
    let stops = y_pred.stops.squeezingShape(at: 2)
    stops2 = stops
    
    var log_mixture_pdf: Tensor<Float> = Tensor<Float>(zeros: [weights.shape[0], weights.shape[1]], on: args.device) 
    for mixture_idx in 0..<nb_mixtures {
        let start_idx = mixture_idx * nb_joints
        let means = all_means[0..., 0..., start_idx..<start_idx + nb_joints]
        let variances = all_variances[0..., 0..., start_idx..<start_idx + nb_joints]
        let diff = y_true.motion - means
        let pdf1 = 1.0 / sqrt(variances * 2.0 * pi)
        let pdf2a = diff.squared()
        let pdf2 = exp(-(pdf2a) / (2.0 * variances))
        let pdf = pdf1 * pdf2
        var weighted_pdf = weights[0..., 0..., mixture_idx] * 
            log(pdf + TINY).sum(alongAxes:2).squeezingShape(at: 2)
                
        log_mixture_pdf = log_mixture_pdf + weighted_pdf
    }

    let zeroTensor = Tensor<Float>(repeating: 0.0, shape: log_mixture_pdf.shape, on: args.device)
    log_mixture_pdf = log_mixture_pdf.replacing(with: zeroTensor, where: y_true.stops .== Tensor<Float>(1.0, on: args.device))
    
    let b_pdf1 = Float(1.0) - y_true.stops
    let b_pdf2 = Float(1.0) - stops
    let bernoulli_pdf = y_true.stops * stops + b_pdf1 * b_pdf2
    let log_bernoulli_pdf = log(bernoulli_pdf + TINY)

    var mixture_reg: Float = 0.0
    if args.mixture_regularizer_type == "cv" {
        // We want to use (std / mean)^2 = std^2 / mean^2 = var / mean^2.
        mixture_reg = weights.variance().scalarized() / 
            weights.mean().squared().scalarized()
    } else if args.mixture_regularizer_type == "l2" {
        mixture_reg = weights.squared().sum().scalarized()
    } else {
        mixture_reg = 0.0
    }
    let loss = -(log_mixture_pdf + log_bernoulli_pdf) +
        args.mixture_regularizer * mixture_reg
//     print("log_mixture_pdf: \(log_mixture_pdf)")
//     print("log_bernoulli_pdf: \(log_bernoulli_pdf)")
    log_mixture_pdf2 = -log_mixture_pdf
    log_bernoulli_pdf2 = -log_bernoulli_pdf
    return loss
}

In [None]:
let t1 = Tensor<Float>([1.0])
let t2 = Tensor<Int32>([1])
var y_pred2: MixtureModelPreds = MixtureModelPreds(mixtureMeans: t1, mixtureVars: t1, mixtureWeights: t1, stops: t1)
var y_true2: LangMotionBatch.Target = LangMotionBatch.Target(sampleID: t2, motion: t1, stops:t1, segmentIDs: t2, origMotionFramesCount: t2)
// @differentiable(wrt: y_pred)
public func normalMixtureSurrogateLoss2(y_pred: MixtureModelPreds, y_true: LangMotionBatch.Target, args: LossArgs) -> (Tensor<Float>, Tensor<Float>) {
    // masking
    var y_pred = y_pred.squeezed()
    var y_true = y_true.squeezed()
    let ids = Tensor<Int32>(rangeFrom: 0, to: Int32(y_true.stops.shape[1]), stride: 1, on: args.device)
    let indices = ids.gathering(where: y_true.segmentIDs .!= Tensor(0, on: args.device))
    y_pred = y_pred.gathering(atIndices: indices, alongAxis: 1)
    y_true = y_true.gathering(atIndices: indices, alongAxis: 1)
    
    y_pred2 = y_pred
    y_true2 = y_true
    
    let loss = _normalMixtureSurrogateLoss2(y_true: y_true, y_pred: y_pred, args: args)    
    let mean_loss = loss.mean()
    return (mean_loss, loss)
}

In [None]:
// Loss function
let args = LossArgs(
        nb_joints: config.nbJoints,
        nb_mixtures: config.nbMixtures,
        mixture_regularizer_type: "None",  // ["cv", "l2", "None"]
        mixture_regularizer: 0.0,
        device: device
)

// @differentiable(wrt: y_pred)
func embeddedNormalMixtureSurrogateLoss(y_pred: LangMotionTransformerOutput<Float>, y_true: LangMotionBatch.Target) -> (Tensor<Float>, Tensor<Float>) {
    return normalMixtureSurrogateLoss2(y_pred: y_pred.preds, y_true: y_true, args: args)
}

## Single batch

In [None]:
let shortSamples = dataset.motionSamples.filter { $0.motion.shape[0] < 100 }
print(shortSamples.count)
shortSamples.map { $0.motion.shape[0]} [0...10]

In [None]:
// let motionSample = dataset.motionSamples[0]
let motionSample = shortSamples[1]
print("sampleID: \(motionSample.sampleID)")
print(motionSample.description)
print(motionSample.annotations)

In [None]:
let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
let (motionPart, target) = LangMotionBatch.preprocessTargetMotion(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
let singleBatch = LangMotionBatch(source: source, target: target)

## source motion part

In [None]:
singleBatch.source.motionPart.printMotionPart()

In [None]:
round(singleBatch.source.motionPart.motion[0..., 0..., 0]*1e1)/1e1

In [None]:
singleBatch.source.motionPart.segmentIDs[0, 0..., 0]

## target motion

In [None]:
singleBatch.target.printTarget()

In [None]:
round(singleBatch.target.motion[0..., 0..., 0]*1e1)/1e1

In [None]:
singleBatch.target.segmentIDs

In [None]:
Tensor<Int32>(singleBatch.target.stops)

## run model(batch), transformerOutput, preds

In [None]:
let transformerOutput = model(singleBatch.source)
let singlePreds = transformerOutput.preds

In [None]:
singlePreds.printPreds()

## helpers

In [None]:
func tensorShow(_ tensor: Tensor<Float>, cmapRange: Int = 6) {
    plt.figure(figsize: [5, 5])
    plt.imshow(tensor.makeNumpyArray(), aspect: "auto", cmap: "Spectral", vmin: -cmapRange, vmax: cmapRange)
    plt.show()
}

## loss

In [None]:
let (mean_loss, loss) = embeddedNormalMixtureSurrogateLoss(y_pred: transformerOutput, y_true: singleBatch.target)

In [None]:
print("mean_loss: \(mean_loss)")
print("loss.shape: \(loss.shape)")
round(loss*1e2)/1e2

In [None]:
plt.plot(loss.scalars)
plt.show()

# what about stop signal?

In [None]:
y_true2.printTarget()

In [None]:
round(y_true2.motion[0..., 0..., 0]*1e1)/1e1

In [None]:
y_true2.stops

## log_mixture_pdf

In [None]:
plt.plot(log_mixture_pdf2.scalars)
plt.show()

## log_bernoulli_pdf

In [None]:
print(log_bernoulli_pdf2.scalars)
plt.plot(log_bernoulli_pdf2.scalars)
plt.show()

## stops

In [None]:
print(stops2)
plt.plot(stops2.scalars)
plt.show()