# Generate motion

In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter LangMotionModels

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import ModelSupport
import Datasets
import SummaryWriter
import LangMotionModels

## What's the GPU?

In [None]:
import Foundation

func shell(_ command: String) -> String {
    let task = Process()
    let pipe = Pipe()

    task.standardOutput = pipe
    task.arguments = ["-c", command]
    task.launchPath = "/bin/bash"
    task.launch()

    let data = pipe.fileHandleForReading.readDataToEndOfFile()
    return String(data: data, encoding: .utf8)!
}

func sh(_ command: String) {
    print(shell(command))
}

## Download data

In [None]:
let datasetSize: DatasetSize = .mini
let dataset_name = "motion_dataset_v3.10Hz.\(datasetSize.rawValue)"

## Set training params

In [None]:
let runName = "run_1"
let batchSize = 1
// let batchSize = 150
let maxTextSequenceLength =  20
let maxMotionLength =  100
let nEpochs = 5
let learningRate: Float = 5e-4

print("runName: \(runName)")
print("batchSize: \(batchSize)")
print("maxTextSequenceLength: \(maxTextSequenceLength)")
print("maxMotionLength: \(maxMotionLength)")
print("nEpochs: \(nEpochs)")
print("learningRate: \(learningRate)")

// let dataURL = URL(fileURLWithPath: "/content/data/")
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("\(dataset_name)plist")
let langDatasetURL = dataURL.appendingPathComponent("labels_ds_v2.csv")

## Select eager or X10 backend

In [None]:
// let device = Device.defaultXLA
let device = Device.defaultTFEager
print(device)

## Instantiate model

In [None]:
/// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = TextProcessor2(vocabulary: vocabulary, tokenizer: tokenizer, maxTextSequenceLength: maxTextSequenceLength, maxMotionLength: maxMotionLength)

/// instantiate model
let vocabSize = vocabulary.count
let nbJoints = 47 // TODO: get value from dataset
let layerCount: Int = 6
let modelSize: Int = 256
let feedForwardSize: Int = 1024
let headCount: Int = 8
let dropoutProbability: Double = 0.1

var transformer = LangMotionTransformer(
    vocabSize: vocabSize, 
    nbJoints: nbJoints,
    layerCount: layerCount, 
    modelSize: modelSize, 
    feedForwardSize: feedForwardSize, 
    headCount: headCount, 
    dropoutProbability: dropoutProbability
)

let nbMixtures = 20
// TODO: integrate MotionGaussianMixtureModel with Generator
var mixtureModel = MotionGaussianMixtureModel(inputSize: nbJoints, nbJoints: nbJoints, nbMixtures: nbMixtures)
// mixtureModel.move(to: device)

var model = LangMotionModel(transformer: transformer, mixtureModel: mixtureModel)
model.move(to: device)

## Load dataset

In [None]:
print("\nLoading dataset...")

var dataset = try Lang2Motion(
    motionDatasetURL: motionDatasetURL,
    langDatasetURL: langDatasetURL,
    batchSize: batchSize
) { (example: Lang2Motion.Example) -> LangMotionBatch in    
    let singleBatch = textProcessor.preprocess(example: example)
    return singleBatch
}

print("Dataset acquired.")

## Set up decoding

In [None]:
public func greedyDecodeMotion(sentence: String, prefix: String = "prefix") {
    // FIXME: for generation don't supply motion in a batch, maybe neutral motion frame only
    let randomMotionSample = dataset.trainExamples[0].motionSample
    let example = Lang2Motion.Example(sampleID: -1, sentence: sentence, motionSample: randomMotionSample)
    print("\ngreedyDecodeMotion(sentence: \"\(sentence)\")")

    let singleBatch = textProcessor.preprocess(example: example)
    LangMotionBatch.printBatch(singleBatch)

    print("\nGenerate:")
    print("=========")
    Context.local.learningPhase = .inference
    let singlePreds = model.generate(input: LangMotionBatch(copying: singleBatch, to: device))//.squeezingShape(at: 0)
    singlePreds.printPreds()

    let (motion, log_probs, done) = MotionDecoder.performNormalMixtureSampling(
        preds: singlePreds, nb_joints: nbJoints, nb_mixtures: nbMixtures, maxMotionLength: maxMotionLength)

    let descaled_motion = dataset.scaler.inverse_transform(motion)

    print("\nmotion.shape: \(motion.shape)")
    print("log_probs.count: \(log_probs.count)")
    print("done.shape: \(done.shape)")
    print("done: \(done)")
    // print("log_probs: \(log_probs)")
    // print("descaled_motion: \(descaled_motion)")

    let imageURL = dataURL.appendingPathComponent("motion_images/\(prefix).png")
    motionToImg(url: imageURL, motion: descaled_motion, motionFlag: done, padTo: maxMotionLength, descr: "\(prefix), \(example.sentence)")
    print("Saved image: \(imageURL.path)")
}

## Generate motion

In [None]:
// TODO: show motion inline
greedyDecodeMotion(sentence: "human is walking", prefix: "foo9")