In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt/code")' Datasets ModelSupport

Installing packages:
	.package(path: "/notebooks/language2motion.gt/code")
		Datasets
		ModelSupport
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmp7ba3j21x/swift-install
[1/2] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [2]:
import Foundation
import TensorFlow
import Datasets
import ModelSupport

In [3]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


# load dataset

In [4]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")

In [5]:
let serializedDatasetURL = dataURL.appendingPathComponent("motion_dataset.motion_flag.normalized.downsampled.sampled.4860.plist")

In [6]:
let motionDataset = MotionDataset(from: serializedDatasetURL)
print(motionDataset.description)

MotionDataset(motionSamples: 4860)


# train-test split

In [None]:
// + load samples
// + create list of sampleIds
// + train-test split sampleIds
// + create train list of motion samples

In [7]:
extension Collection where Iterator.Element == MotionSample {
    func motionSamplesWithIds(_ sampleIds: [Int], mapping: [Int: [Int]]) -> [MotionSample] {
        var motionSamples: [MotionSample] = []
        for sampleId in sampleIds {
            let idxs = mapping[sampleId]
            for idx in idxs! {
                motionSamples.append(self[idx as! Self.Index])
            }
        }
        return motionSamples
    }
    
    
    public func trainTestSplitMotionSamples(split: Double) -> (train: Array<Element>, test: Array<Element>) {
        // splits multiplied samples into train/test buckets, making sure that samples with same ids end up in same bucket
        let allSampleIds = self.map {$0.sampleID}
        let sampleIds = Array(Set(allSampleIds)).sorted()
        let (trainSampleIds, testSampleIds) = sampleIds.trainTestSplit(split: 0.8)
        var mapping: [Int: [Int]] = [:] // sampleIds -> [collection indices]
        for sampleId in sampleIds {
            mapping[sampleId] = []
        }
        for (idx, sampleId) in allSampleIds.enumerated() {
            mapping[sampleId]!.append(idx)
        }
        let trainMotionSamples = motionSamplesWithIds(trainSampleIds, mapping: mapping)
        let testMotionSamples = motionSamplesWithIds(testSampleIds, mapping: mapping)
        return (trainMotionSamples, testMotionSamples)
    }
}

In [8]:
let (train, test) = motionDataset.motionSamples.trainTestSplitMotionSamples(split: 0.8)
(train.count, test.count)

▿ 2 elements
  - .0 : 3890
  - .1 : 970


In [11]:
train[0..<20].map {$0.sampleID}

▿ 20 elements
  - 0 : 882
  - 1 : 882
  - 2 : 882
  - 3 : 882
  - 4 : 882
  - 5 : 882
  - 6 : 882
  - 7 : 882
  - 8 : 882
  - 9 : 882
  - 10 : 363
  - 11 : 363
  - 12 : 363
  - 13 : 363
  - 14 : 363
  - 15 : 363
  - 16 : 363
  - 17 : 363
  - 18 : 363
  - 19 : 363


In [10]:
test[0..<20].map {$0.sampleID}

▿ 20 elements
  - 0 : 3375
  - 1 : 3375
  - 2 : 3375
  - 3 : 3375
  - 4 : 3375
  - 5 : 3375
  - 6 : 3375
  - 7 : 3375
  - 8 : 3375
  - 9 : 3375
  - 10 : 3117
  - 11 : 3117
  - 12 : 3117
  - 13 : 3117
  - 14 : 3117
  - 15 : 3117
  - 16 : 3117
  - 17 : 3117
  - 18 : 3117
  - 19 : 3117
