# Downsample dataset to 10Hz

In [None]:
%install-location /notebooks/language2motion.gt/swift-install
%install-swiftpm-flags -c release
%install '.package(path: "/notebooks/language2motion.gt")' Datasets ModelSupport

In [None]:
import TensorFlow
import Foundation
import Datasets

In [None]:
// TODO: kill motion flag

In [None]:
extension MotionSample {
    public static func downsampledMutlipliedMotionSamples(sampleID: Int, mmmURL: URL, annotationsURL: URL, freq: Int = 10, maxFrames: Int = 500) -> [MotionSample] {
        let mmm_doc = MotionSample.loadMMM(fileURL: mmmURL)
        let jointNames = MotionSample.getJointNames(mmm_doc: mmm_doc)

        let motionFrames = MotionSample.getMotionFrames(mmm_doc: mmm_doc, jointNames: jointNames)
        let annotations = MotionSample.getAnnotations(fileURL: annotationsURL)
        let timesteps: [Float] = motionFrames.map { $0.timestep }

        // calculate factor
        let origFreq = Float(timesteps.count)/timesteps.last!
        let factor = Int(origFreq)/freq
        
        var motionFramesBuckets = [[MotionFrame]](repeating: [], count: factor)
        var timestepsBuckets = [[Float]](repeating: [], count: factor)

        for idx in 0..<motionFrames.count {
            let bucket = idx % factor
            if motionFramesBuckets[bucket].count < maxFrames {
                motionFramesBuckets[bucket].append(motionFrames[idx])
                timestepsBuckets[bucket].append(timesteps[idx])
            }
        }
        // filter out empty buckets
        let nBuckets = (motionFrames.count>=factor) ? factor : motionFrames.count

        return (0..<nBuckets).map {
            MotionSample(
                sampleID: sampleID, 
                motionFrames: motionFramesBuckets[$0], 
                annotations: annotations, 
                jointNames: jointNames, 
                timesteps: timestepsBuckets[$0], 
                grouppedJoints: false, 
                normalized: false
            )
        }
    }
}

In [None]:
extension MotionDataset {
    public convenience init(datasetFolderURL: URL, sampled: Int? = nil, freq: Int? = 10, maxFrames: Int = 500, maxSampleID: Int = 3966) {
        var motionSamples: [MotionSample] = []
        let fm = FileManager()
        
        var sampleIDs: [Int] = Array<Int>((0...maxSampleID))
        if sampled != nil {
            sampleIDs = Array(sampleIDs.choose(sampled!))
        }
        
        for sampleID in sampleIDs {
            let mmmFilename = String(format: "%05d_mmm.xml", sampleID)
            let annotationsFilename = String(format: "%05d_annotations.json", sampleID)
            print("Sample \(sampleID), \(mmmFilename), \(annotationsFilename)")
            
            let mmmURL = datasetFolderURL.appendingPathComponent(mmmFilename)
            let annotationsURL = datasetFolderURL.appendingPathComponent(annotationsFilename)
            
            if fm.fileExists(atPath: mmmURL.path) {
                if freq == nil {
                    let motionSample = MotionSample(sampleID: sampleID, mmmURL: mmmURL, annotationsURL: annotationsURL, grouppedJoints: false, normalized: false, maxFrames: maxFrames)
                    motionSamples.append(motionSample)
                } else {
                    let _motionSamples = MotionSample.downsampledMutlipliedMotionSamples(
                        sampleID: sampleID, 
                        mmmURL: mmmURL, 
                        annotationsURL: annotationsURL, 
                        freq: freq!, 
                        maxFrames: maxFrames
                    )
                    motionSamples.append(contentsOf: _motionSamples)
                }
            } else {
                print("** Sample \(sampleID) doesn't exist.")
            }
        }
        print("motionSamples.count: \(motionSamples.count)")
        self.init(datasetFolderURL: datasetFolderURL, motionSamples: motionSamples)
    }
}

In [None]:
let sampled: Int? = 100 // nil
let freq: Int? = 10
let maxFrames = 500

let datasetFolderURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/2017-06-22/")
let sampledStr = (sampled != nil) ? "sampled." : ""
let freqStr = (freq != nil) ? "\(freq!)Hz." : ""

print("Running MotionData preprocessing (\(String(describing:sampled)))...")

var date = Date()
let motionDataset = MotionDataset(datasetFolderURL: datasetFolderURL, sampled: sampled, freq: freq, maxFrames: maxFrames)
print(abs(date.timeIntervalSinceNow))

let numberStr = "\(motionDataset.motionSamples.count)."
let serializedDatasetURL = URL(fileURLWithPath: "/notebooks/language2motion.gt/data/motion_dataset.motion_flag.\(freqStr)\(sampledStr)\(numberStr)plist")

date = Date()
print("Encoding to property list..., writing to file '\(serializedDatasetURL.path)'")
motionDataset.write(to: serializedDatasetURL)
print("Done in \(abs(date.timeIntervalSinceNow)) sec.")