# Forward pass analysis

In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter LangMotionModels Checkpoints

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import FoundationXML
import ModelSupport
import Datasets
import SummaryWriter
import LangMotionModels
import Checkpoints
import PythonKit

In [None]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

## Set training params

In [None]:
let device = Device.defaultTFEager

In [None]:
let maxTextSequenceLength =  20
let maxMotionLength =  100

In [None]:
let datasetSize: DatasetSize = .full
let batchSize = 2

In [None]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.10Hz.\(datasetSize.rawValue)plist")

In [None]:
/// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = TextProcessor(vocabulary: vocabulary, tokenizer: tokenizer)

/// instantiate model
let config = LangMotionTransformerConfig(
    vocabSize: vocabulary.count,
    nbJoints: 47, // TODO: get value from dataset
    nbMixtures: 20,
    layerCount: 6,
    modelSize: 256,
    feedForwardSize: 1024,
    headCount: 8,
    dropoutProbability:  0.1,
    sentenceMaxPositionalLength: 100,
    motionMaxPositionalLength: 500
)

let runName = "run_25"
let epoch = 40

// let runName = "run_16"
// let epoch = 5


let runURL = dataURL.appendingPathComponent("runs/Lang2motion/\(runName)", isDirectory: true)
let checkpointURL = runURL.appendingPathComponent("checkpoints", isDirectory: true)
let motionsURL = runURL.appendingPathComponent("generated_motions", isDirectory: true)
try! FileManager().createDirectory(at: motionsURL, withIntermediateDirectories: true)

let model = LangMotionTransformer(checkpoint: checkpointURL, config: config, name: "model.e\(epoch)")

In [None]:
/// load dataset
print("\nLoading dataset...")

var dataset = try Lang2Motion(
    motionDatasetURL: motionDatasetURL,
    batchSize: batchSize,
    trainTestSplit: 1.0,
    device: device
) { (motionSample: MotionSample) -> LangMotionBatch in    
    let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
    let (motionPart, target) = LangMotionBatch.preprocessTargetMotion(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
    let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
    let singleBatch = LangMotionBatch(source: source, target: target)
    return singleBatch
}

print("Dataset acquired.")

# Forward pass

In [None]:
// + create batch
// + run forward pass
// + compute loss
// TODO: visualize data:
//       - ...

In [None]:
let motionSample = dataset.motionSamples[0]
print("sampleID: \(motionSample.sampleID)")
print(motionSample.description)
print(motionSample.annotations)

In [None]:
extension LangMotionBatch {
    public static func preprocessTargetMotion2(sampleID: Int, motion: Tensor<Float>, maxMotionLength: Int) -> (motionPart: MotionPart, target: Target)
    {
        // print("preprocessTargetMotion(sampleID: \(sampleID), motion: \(motion.shape), maxMotionLength: \(maxMotionLength))")        

        let origMotionFramesCount: Tensor<Int32> = Tensor<Int32>([Int32(motion.shape[0])])
        
        var (paddedMotion, motionFlag) = motion.paddedAndCropped(to: maxMotionLength)
        // print("paddedMotion: \(paddedMotion.shape), motionFlag: \(motionFlag.shape)")
        paddedMotion = paddedMotion.expandingShape(at: 0)
        motionFlag = motionFlag.expandingShape(at: 0)

        // source (motionPart & motion flag)
        let rangeExceptLast = 0..<(paddedMotion.shape[1] - 1)
        let motionPartTensor = paddedMotion[0..., rangeExceptLast, 0...]

        let motionPartFlag = motionFlag[0..., rangeExceptLast]
        let motionPartMask = makeStandardMask(target: motionPartFlag, pad: 0) // FIXME: fix target mask

        let motionPart = MotionPart(motion: motionPartTensor, mask: motionPartMask)
        // motionPart.printMotionPart()

        // target (motion & stops)
        // FIXME: should targetTruthStop encompass current motion frame?
        let targetMotion: Tensor<Float> = paddedMotion[0..., 1..., 0...]
        let targetMotionFlag = motionFlag[0..., 1...]
        let targetStops: Tensor<Float> = 1.0 - Tensor<Float>(targetMotionFlag)

        let target = Target(sampleID: Tensor([Int32(sampleID)]), motion: targetMotion, stops: targetStops, origMotionFramesCount: origMotionFramesCount)
        // target.printTarget()
        return (motionPart: motionPart, target: target)
    }
}

In [None]:
let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
let (motionPart, target) = LangMotionBatch.preprocessTargetMotion2(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
let singleBatch = LangMotionBatch(source: source, target: target)

In [None]:
@differentiable
public func normalMixtureSurrogateLoss2(y_true: LangMotionBatch.Target, y_pred: MixtureModelPreds, args: LossArgs) -> Tensor<Float> {
    let TINY: Float = 1e-8
    let pi: Float = 3.1415
    let nb_mixtures = args.nb_mixtures
    let nb_joints = args.nb_joints

    let all_means = y_pred.mixtureMeans
    let all_variances = y_pred.mixtureVars + TINY
    let weights = y_pred.mixtureWeights
    let stops = y_pred.stops.squeezingShape(at: 2)

    var log_mixture_pdf: Tensor<Float> = Tensor<Float>(zeros: [weights.shape[0], weights.shape[1]], on: args.device) 
    for mixture_idx in 0..<nb_mixtures {
        let start_idx = mixture_idx * nb_joints
        let means = all_means[0..., 0..., start_idx..<start_idx + nb_joints]
        let variances = all_variances[0..., 0..., start_idx..<start_idx + nb_joints]
        let diff = y_true.motion - means
        let pdf1 = 1.0 / sqrt(variances * 2.0 * pi)
        let pdf2a = diff.squared()
        let pdf2 = exp(-(pdf2a) / (2.0 * variances))
        let pdf = pdf1 * pdf2
        let weighted_pdf = weights[0..., 0..., mixture_idx] * 
            log(pdf + TINY).sum(alongAxes:2).squeezingShape(at: 2)
        log_mixture_pdf = log_mixture_pdf + weighted_pdf
    }

    let b_pdf1 = Float(1.0) - y_true.stops
    let b_pdf2 = Float(1.0) - stops
    let bernoulli_pdf = y_true.stops * stops + b_pdf1 * b_pdf2
    let log_bernoulli_pdf = log(bernoulli_pdf + TINY)

    var mixture_reg: Float = 0.0
    if args.mixture_regularizer_type == "cv" {
        // We want to use (std / mean)^2 = std^2 / mean^2 = var / mean^2.
        mixture_reg = weights.variance().scalarized() / 
            weights.mean().squared().scalarized()
    } else if args.mixture_regularizer_type == "l2" {
        mixture_reg = weights.squared().sum().scalarized()
    } else {
        mixture_reg = 0.0
    }
    // TODO: divide loss (component?) by maxMotionLength
    // TODO: move loss averaging here

    // print("log_mixture_pdf: \(log_mixture_pdf)")

    // print("log_bernoulli_pdf: \(log_bernoulli_pdf)")

    let loss = -(log_mixture_pdf + log_bernoulli_pdf) +
        args.mixture_regularizer * mixture_reg
    return loss
}


In [None]:
// Loss function
let args = LossArgs(
        nb_joints: config.nbJoints,
        nb_mixtures: config.nbMixtures,
        mixture_regularizer_type: "None",  // ["cv", "l2", "None"]
        mixture_regularizer: 0.0,
        device: device
)

@differentiable
func embeddedNormalMixtureSurrogateLoss(y_pred: MixtureModelPreds, y_true: LangMotionBatch.Target) -> Tensor<Float> {
    let loss = normalMixtureSurrogateLoss2(y_true: y_true, y_pred: y_pred, args: args)    
    print("loss: \(loss)")
    let n_items: Float = Float(loss.shape[0] * loss.shape[1])
    print("n_items: \(n_items)")
    let avg_loss = loss.sum() / n_items
    print("avg_loss: \(avg_loss)")
    return avg_loss
}

## source: sentence, motionPart

In [None]:
singleBatch.source.printSource()

## target: motion, stops

In [None]:
singleBatch.target.printTarget()

In [None]:
singleBatch.target.stops[0, 0..<38]

In [None]:
singleBatch.target.stops[0, 38...]

## figure out masking in loss function

In [None]:
let batch = LangMotionBatch.reduceDataBatches([singleBatch, singleBatch])

### reshape

In [None]:
batch.target.printTarget()

In [None]:
let batchPreds = model(batch.source)
batchPreds.printPreds()

## preds

In [None]:
let singlePreds = model(singleBatch.source)
singlePreds.printPreds()

## gather

In [None]:
// let y_true = batch.target.squeezed()
// let y_pred = batchPreds.squeezed()
let y_true = singleBatch.target.squeezed()
let y_pred = singlePreds.squeezed()
y_true.printTarget()
y_pred.printPreds()

### mask

In [None]:
let ids = Tensor<Int32>(rangeFrom: 0, to: Int32(y_true.stops.shape[1]), stride: 1)
ids

In [None]:
let indices = ids.gathering(where: y_true.stops .!= Tensor(1))
indices

In [None]:
indices.shape

In [None]:
y_true.printTarget()

In [None]:
@differentiable(wrt: y_pred)
func embeddedNormalMixtureSurrogateLoss2(y_pred: MixtureModelPreds, y_true: LangMotionBatch.Target) -> Tensor<Float> {
    var y_pred = y_pred.squeezed()
    var y_true = y_true.squeezed()
    let ids = Tensor<Int32>(rangeFrom: 0, to: Int32(y_true.stops.shape[1]), stride: 1)
    let indices = ids.gathering(where: y_true.stops .!= Tensor(1))
    y_pred = y_pred.gathering(atIndices: indices, alongAxis: 1)
    y_true = y_true.gathering(atIndices: indices, alongAxis: 1)
    
    let loss = normalMixtureSurrogateLoss2(y_true: y_true, y_pred: y_pred, args: args)    
    print("loss: \(loss)")
    let n_items: Float = Float(loss.shape[0] * loss.shape[1])
    print("n_items: \(n_items)")
    let avg_loss = loss.sum() / n_items
    print("avg_loss: \(avg_loss)")
    return avg_loss
}
let loss = embeddedNormalMixtureSurrogateLoss2(y_pred: batchPreds, y_true: batch.target)
loss

In [None]:
let loss = embeddedNormalMixtureSurrogateLoss2(y_pred: singlePreds, y_true: singleBatch.target)
loss

## loss

In [None]:
let loss = embeddedNormalMixtureSurrogateLoss2(y_pred: singlePreds, y_true: singleBatch.target)
loss

In [None]:
let loss = embeddedNormalMixtureSurrogateLoss(y_pred: singlePreds, y_true: singleBatch.target)
loss

## frame losses

In [None]:
let frameLosses = normalMixtureSurrogateLoss(y_true: singleBatch.target, y_pred: preds, args: args)
frameLosses.shape

active motion losses

In [None]:
frameLosses[0, 0..<38]

stops, should be masked?

In [None]:
frameLosses[0, 38...]

## preds.stops

In [None]:
round(preds.stops[0, 0..<38].squeezingShape(at: 1))

In [None]:
round(preds.stops[0, 38...].squeezingShape(at: 1))