In [None]:
// for local development
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets TranslationModels TextModels ModelSupport SummaryWriter LangMotionModels Checkpoints

In [None]:
import TensorFlow
import TextModels
import TranslationModels
import Foundation
import FoundationXML
import ModelSupport
import Datasets
import SummaryWriter
import LangMotionModels
import Checkpoints
import PythonKit

In [None]:
let np  = Python.import("numpy")

In [None]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

In [None]:
let device = Device.defaultTFEager

In [None]:
let maxTextSequenceLength =  40
let maxMotionLength = 150

In [None]:
let datasetSize: DatasetSize = .micro
let batchSize = 150

In [None]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.10Hz.\(datasetSize.rawValue)plist")

In [None]:
/// instantiate text processor
let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = TextProcessor(vocabulary: vocabulary, tokenizer: tokenizer)

In [None]:
print("\nLoading dataset...")

var dataset = try Lang2Motion(
    motionDatasetURL: motionDatasetURL,
    batchSize: batchSize,
    minMotionLength: 10,
    maxMotionLength: 150,
    trainTestSplit: 1.0,
    device: device
) { (motionSample: MotionSample) -> LangMotionBatch in    
    let sentence = textProcessor.preprocess(sentence: motionSample.annotations[0], maxTextSequenceLength: maxTextSequenceLength)
    let (motionPart, target) = LangMotionBatch.preprocessTargetMotion(sampleID: motionSample.sampleID, motion: motionSample.motion, maxMotionLength: maxMotionLength)
    let source = LangMotionBatch.Source(sentence: sentence, motionPart: motionPart)
    let singleBatch = LangMotionBatch(data: source,label: target)
    return singleBatch
}

print("Dataset acquired.")

# coding

In [None]:
// TODO: do discretization of scaled joint values
// TODO: save & load scaling info

In [None]:
// TODO: get motion tensor

In [None]:
// let motion = dataset.motionSamples[0].motion
// motion.shape

In [None]:
dataset.motionSamples.count

In [None]:
// TODO: flatten without padding

In [None]:
let motions1 = dataset.motionSamples.map { $0.motion }

In [None]:
let motions = Tensor(concatenating: motions1, alongAxis: 0)

In [None]:
motions.shape

In [None]:
//pip install feature-engine

In [None]:
// from feature_engine.discretisers import EqualWidthDiscretiser
// discretizer = EqualFrequencyDiscretiser(q=10, variables = ['var1', 'var2'])

In [None]:
// let discretisers  = Python.import("feature_engine.discretisers")

In [None]:
// let discretizer = discretisers.EqualFrequencyDiscretiser(q: 10, variables: ["var1", "var2"])

In [None]:
// discretizer

In [None]:
let t1 = motions.flattened().expandingShape(at: 1) //motions.reshaped(to: [5907*50*47])
t1.shape

In [None]:
let t1np = t1.makeNumpyArray()

In [None]:
t1np.shape

In [None]:
// discretizer.fit(t1np)

In [None]:
// from sklearn.preprocessing import KBinsDiscretizer
let preprocessing  = Python.import("sklearn.preprocessing")

In [None]:
// discretizer = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy='quantile')
let discretizer2 = preprocessing.KBinsDiscretizer(n_bins: 200, encode: "ordinal", strategy: "quantile")
discretizer2

In [None]:
discretizer2.fit(t1np)

In [None]:
let t2np = discretizer2.transform(t1np)

In [None]:
t2np[0..<10]

In [None]:
discretizer2.inverse_transform([[40]])

## roundtrip

In [None]:
let motion = dataset.motionSamples[0].motion
motion.shape

In [None]:
let t3np = motion.flattened().expandingShape(at: 1).makeNumpyArray()

In [None]:
let t4np = discretizer2.transform(t3np)
t4np.shape

In [None]:
t4np[0..<10]

In [None]:
let dMotion = Tensor<Int32>(Tensor<Float>(numpy: t4np)!.reshaped(like: motion))
dMotion.shape

In [None]:
dMotion[0, 0]

In [None]:
let dMotion_np = dMotion.flattened().expandingShape(at: 1).makeNumpyArray()
let motion_np = discretizer2.inverse_transform(dMotion_np)
motion_np.shape

In [None]:
print(type(of:motion_np[0..<10][0][0]))

In [None]:
Tensor<Double>(numpy: motion_np.asType)

In [None]:
let dMotion2 = Tensor<Float>(Tensor<Double>(numpy: motion_np)!.reshaped(like: motion))
dMotion2.shape

In [None]:
motion[0]

In [None]:
dMotion[0]

In [None]:
dMotion2[0]