In [1]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets ModelSupport TextModels

Installing packages:
	.package(path: "/notebooks/language2motion.gt")
		Datasets
		ModelSupport
		TextModels
With SwiftPM flags: ['-c', 'release']
Working in: /tmp/tmpw8cp3c0_/swift-install
[1/2] Compiling Datasets ArrayUtils.swift
[2/3] Compiling jupyterInstalledPackages jupyterInstalledPackages.swift
Initializing Swift...
Installation complete!


In [2]:
import Foundation
import TensorFlow
import PythonKit

import Datasets
import ModelSupport
import TextModels

In [3]:
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


In [4]:
let pd = Python.import("pandas")

In [5]:
let dataURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/")
// let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.norm.10Hz.mini.plist")
let motionDatasetURL = dataURL.appendingPathComponent("motion_dataset_v3.norm.10Hz.plist")
let langDatasetURL = dataURL.appendingPathComponent("labels_ds_v2.csv")

In [6]:
let batchSize = 100
let maxSequenceLength = 50

let vocabularyURL = dataURL.appendingPathComponent("vocab.txt")
let vocabulary: Vocabulary = try! Vocabulary(fromFile: vocabularyURL)
let tokenizer: Tokenizer = BERTTokenizer(vocabulary: vocabulary, caseSensitive: false, unknownToken: "[UNK]", maxTokenLength: nil)
let textProcessor = TextProcessor(vocabulary: vocabulary, tokenizer: tokenizer, maxSequenceLength: maxSequenceLength)

In [7]:
print("\nLoading dataset...")

var dataset = try Motion2Lang(
    motionDatasetURL: motionDatasetURL,
    langDatasetURL: langDatasetURL,
    maxSequenceLength: maxSequenceLength,
    batchSize: batchSize
) { (example: Motion2Lang.Example) -> MotionLangBatch in    
    let singleBatch = textProcessor.preprocess(example: example)
    return singleBatch
}

print("Dataset acquired.")


Loading dataset...
MotionDataset(motionSamples: 39102)
keeping 30120 annotatated motions
keeping 29970 longer motions, with minimum 10 frames
Dataset acquired.


In [8]:
dataset.motionDataset.motionSamples.count

39102


In [9]:
let sampleIDs = dataset.motionSampleDict.keys.map { $0 }
(sampleIDs.count, sampleIDs[0..<3])

▿ 2 elements
  - .0 : 3911
  ▿ .1 : 3 elements
    - 0 : 1768
    - 1 : 968
    - 2 : 1899


In [10]:
dataset.langRecsDict.keys.count

3012


In [11]:
let matchedLangRecs = sampleIDs.map({ dataset.langRecsDict[$0] }).filter {$0 != nil}
matchedLangRecs.count

3012


In [12]:
matchedLangRecs[0..<2]

▿ 2 elements
  ▿ 0 : Optional<LangRec>
    ▿ some : LangRec
      - sampleID : 1768
      - text : "A person walks forward."
      - label : "Walking or running"
  ▿ 1 : Optional<LangRec>
    ▿ some : LangRec
      - sampleID : 968
      - text : "A person walks four fast steps forward."
      - label : "Walking or running"


In [13]:
let pythonDict = Python.dict()
pythonDict["sample_id"] = Python.list(matchedLangRecs.map {Python.int($0!.sampleID)})
pythonDict["text"] = Python.list(matchedLangRecs.map {Python.str($0!.text)})

In [14]:
var df = pd.DataFrame(pythonDict)
df.sort_values(by: "sample_id", inplace: true)
df

      sample_id                                               text
463           1                      A person is walking forwards.
745           2                    A person walks 4 steps forward.
1786          3                                    A human walking
726           4            A person walks forward at medium speed.
2902          5              A human walks four slow steps forward
...         ...                                                ...
28         3961        a person running and stepipng over one step
732        3962                    A person runs straight forward.
2823       3964  A person is sprinting up and down a small hill...
2252       3965  A person runs forward, briefly touches the gro...
966        3966  a person stumbles on the ground but gets up an...

[3012 rows x 2 columns]


In [15]:
// df.to_csv(dataURL.appendingPathComponent("annotations.mini.csv").path, index: false)
df.to_csv(dataURL.appendingPathComponent("annotations.csv").path, index: false)

None
