In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt/code")' Datasets ModelSupport

Installing packages:
	.package(path: "/notebooks/language2motion.gt/code")
		Datasets
		ModelSupport
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmpv80yq6c5/swift-install
[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [2]:
import Foundation
import TensorFlow
import Datasets
import ModelSupport

In [3]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


# load dataset

In [10]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")

In [11]:
let batchSize = 10
let maxSequenceLength =  300 //600

print("batchSize: \(batchSize)")
print("maxSequenceLength: \(maxSequenceLength)")

let serializedDatasetURL = dataURL.appendingPathComponent("motion_dataset.motion_flag.normalized.plist")
let labelsURL = dataURL.appendingPathComponent("labels_ds_v2.csv")

print("\nLoading dataset...")
let dataset = try! Motion2Label2(
    serializedDatasetURL: serializedDatasetURL,
    labelsURL: labelsURL,
    maxSequenceLength: maxSequenceLength,
    batchSize: batchSize
) { 
    // TODO: move this to dataset class
    (example: Motion2LabelExample) -> LabeledMotionBatch in
    let motionFrames = Tensor<Float>(example.motionSample.motionFramesArray)
    let motionFlag = Tensor<Int32>(motionFrames[0..., 44...44].squeezingShape(at: 1))
    let origMotionFramesCount = Tensor<Int32>(Int32(motionFrames.shape[0]))
    let motionBatch = MotionBatch(motionFrames: motionFrames, motionFlag: motionFlag, origMotionFramesCount: origMotionFramesCount)
    let label = Tensor<Int32>(Int32(example.label!.idx))
    return LabeledMotionBatch(data: motionBatch, label: label)
}

print("dataset.trainingExamples.count: \(dataset.trainingExamples.count)")
print("dataset.validationExamples.count: \(dataset.validationExamples.count)")

batchSize: 10
maxSequenceLength: 300

Loading dataset...
MotionData(motionSamples: 3911)
dataset.trainingExamples.count: 2410
dataset.validationExamples.count: 602


# balance dataset

In [None]:
// + sample n samples per class
// + put together sampled motion samples
// + save plist

In [12]:
let motionSamplesWithAnnotations = dataset.motionData.motionSamples.filter { $0.annotations.count>0 }
motionSamplesWithAnnotations.count

3012


In [13]:
func filterSamples(_ motionSamples: [MotionSample], classIdx: Int) -> [MotionSample] {
    let motionSamplesForClass = motionSamples.filter {
        (ms: MotionSample) -> Bool in
        let labelTuple = dataset.getLabel(ms.sampleID)!
        return labelTuple.idx == classIdx
    }
    return motionSamplesForClass
}

In [14]:
// TODO: code upsampling

In [15]:
let maxPerClass = 103
let motionSampleArrays = (0..<dataset.labels.count).map { 
    (classIdx) -> [MotionSample] in
    let motionSamplesForClass = filterSamples(motionSamplesWithAnnotations, classIdx: classIdx)
    let sampledSamplesForClass = Array(motionSamplesForClass.choose(maxPerClass))
    print((motionSamplesForClass.count, sampledSamplesForClass.count))
    return sampledSamplesForClass
}

(1216, 103)
(644, 103)
(103, 103)
(400, 103)
(649, 103)


In [16]:
motionSampleArrays.count

5


In [17]:
let balancedMotionSamples = motionSampleArrays.reduce([], +)
balancedMotionSamples.count

515


In [18]:
let motionData = dataset.motionData
motionData.motionSamples = balancedMotionSamples
let serializedDatasetURL = dataURL.appendingPathComponent("motion_dataset.motion_flag.balanced.\(motionData.motionSamples.count).plist")
motionData.write(to: serializedDatasetURL)
print(serializedDatasetURL.path)

/notebooks/language2motion.gt/data/motion_dataset.motion_flag.balanced.515.plist
