In [0]:
import Foundation
import TensorFlow
import Python

In [0]:
let subprocess = Python.import("subprocess")

## Download Data and Labels

In [3]:
let urllib = Python.import("urllib.request")
let fileBaseURL = "https://raw.githubusercontent.com/tensorflow/swift-models/master/Datasets/MNIST/"
let files = ["train-images-idx3-ubyte", "train-labels-idx1-ubyte"]

for file in files {
  let command = "wget "+fileBaseURL+file
  //print(fileBaseURL+files[1])
  subprocess.call(command, shell: true)
}

--2019-06-13 15:58:54--  https://raw.githubusercontent.com/tensorflow/swift-models/master/Datasets/MNIST/train-images-idx3-ubyte
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 47040016 (45M) [application/octet-stream]
Saving to: ‘train-images-idx3-ubyte’


2019-06-13 15:59:00 (194 MB/s) - ‘train-images-idx3-ubyte’ saved [47040016/47040016]

--2019-06-13 15:59:00--  https://raw.githubusercontent.com/tensorflow/swift-models/master/MNIST/train-labels-idx1-ubyte
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60008 (59K) [application/octet-str

## Process Data 

In [0]:
var batchSize:Int = 32 

/// Reads a file into an array of bytes.
func readFile(_ path: String) -> [UInt8] {
    let url = URL(fileURLWithPath: path)
    let data = try! Data(contentsOf: url, options: [])
    return [UInt8](data)
}

/// Reads MNIST images and labels from specified file paths.
func readMNIST(imagesFile: String, labelsFile: String) -> (images: Tensor<Float>,
                                                           labels: Tensor<Int32>) {
    print("Reading data.")
    let images = readFile(imagesFile).dropFirst(16).map(Float.init)
    let labels = readFile(labelsFile).dropFirst(8).map(Int32.init)
    let rowCount = Int(labels.count)
    let imageHeight: Int = 28, imageWidth: Int = 28

    print("Constructing data tensors.")
    return (
        images: Tensor(shape: [rowCount, 1, imageHeight, imageWidth], scalars: images)
            .transposed(withPermutations: [0, 2, 3, 1]) / 255, // NHWC
        labels: Tensor(labels)
    )
}

/// Split data into training and test
func splitTrainTest(data: Tensor<Float>, labels: Tensor<Int32>) -> (Tensor<Float>, Tensor<Int32>, Tensor<Float> , Tensor<Int32>) {
  
  let N = Int(data.shape[0])
  let split = Int(0.8 * Float(N))
  
  let trainX = data[0..<split]
  let trainY = labels[0..<split]
  
  let testX = data[split..<N]
  let testY = labels[split..<N]
  
  return (trainX, trainY, testX, testY)
}

/// Extract a batch of certain size 
func minibatch<Scalar>(in x: Tensor<Scalar>, at index: Int) -> Tensor<Scalar> {
    let start = Int(index * batchSize)
    return x[start..<start+Int(batchSize)]
}

In [5]:
// convert into tensors
let (data, trainNumericLabels) = readMNIST(imagesFile: files[0], labelsFile: files[1])
let labels = Tensor<Int32>(trainNumericLabels)

// split into training and testing 
let (trainX, trainY, testX, testY) = splitTrainTest(data: data, labels: labels)

Reading data.
Constructing data tensors.


## CNN Model

In [0]:
struct CNN: Layer {
  
    typealias Input = Tensor<Float>
    typealias Output = Tensor<Float>
  
    var conv1 = Conv2D<Float>(filterShape: (3, 3, 1, 16), activation: relu) 
    var conv2 = Conv2D<Float>(filterShape: (3, 3, 16, 32), activation: relu) 
 
    var pool = MaxPool2D<Float>(poolSize: (2, 2), strides: (2, 2))
  
    var flatten = Flatten<Float>()
  
    var dense1 = Dense<Float>(inputSize: 5*5*32 , outputSize: 128, activation: tanh)
    var dense2 = Dense<Float>(inputSize: 128 , outputSize: 10)

    @differentiable
    func call(_ input: Input) -> Output {
        let convolved = input.sequenced(through: conv1, pool, conv2, pool)
        return convolved.sequenced(through:flatten, dense1, dense2)
    }
}

In [0]:
// report accuracy of a batch 
func getAccuracy(y:Tensor<Int32>, logits:Tensor<Float>) -> Float{
  let out  = Tensor<Int32>(logits.argmax(squeezingAxis: 1) .== y).sum().scalarized()
  return Float(out) / Float(y.shape[0])
}

//round two decimal places 
func roundTwo(_ input:Float) -> Float{
  return (input*100).rounded()/100
}

In [8]:
var model = CNN()
let optimizer = Adam(for: model)

//warmup 
let tensor = Tensor<Float>(zeros: [1, 28, 28, 1])
print(model(tensor))

[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]


## Training

In [10]:
let stepsInEpoch:Int = Int(Float(testX.shape[0]) / Float(batchSize))
var trainLoss:Float = 0.0
var trainAcc :Float = 0.0
var testLoss:Float = 0.0
var testAcc:Float = 0.0 

var batchCount: Float = 0.0
for epoch in 0...4{
  
  //evaluate metrics
  trainLoss = 0.0
  trainAcc  = 0.0
  batchCount = 0.0
    
  for i in 0..<stepsInEpoch {
  
    //get batches
    let X = minibatch(in: trainX, at: i)
    let y = minibatch(in: trainY, at: i)

    //calculate the loss and gradient
    let (loss, grads) = valueWithGradient(at: model) { model -> Tensor<Float> in
            let logits = model(X)
            return softmaxCrossEntropy(logits: logits, labels: y)
    }

    //make an optimizer step 
    optimizer.update(&model.allDifferentiableVariables, along: grads)    
    
    let logits = model(X) //this is slowing down ? 
    let acc = getAccuracy(y:y, logits:logits)
    
    trainLoss += Float(loss.scalarized())
    trainAcc  += acc
    batchCount += 1
  }
  
  trainLoss /= batchCount
  trainAcc  /= batchCount
 
  //training
  testLoss = 0.0
  testAcc  = 0.0
  
  let logits = model(testX)
  let loss = softmaxCrossEntropy(logits: logits, labels: testY)
  let acc = getAccuracy(y:testY, logits:logits)

  testLoss += Float(loss.scalarized())
  testAcc  += acc
  print("epoch: \(epoch+1), train_loss: \(roundTwo(trainLoss)), test_loss: \(roundTwo(testLoss)), train_acc: \(roundTwo(trainAcc)), test_acc: \(roundTwo(testAcc))" )
}

epoch: 1, train_loss: 0.35, test_loss: 0.16, train_acc: 0.92, test_acc: 0.95
epoch: 2, train_loss: 0.1, test_loss: 0.12, train_acc: 0.97, test_acc: 0.96
epoch: 3, train_loss: 0.06, test_loss: 0.09, train_acc: 0.99, test_acc: 0.97
epoch: 4, train_loss: 0.04, test_loss: 0.08, train_acc: 0.99, test_acc: 0.98
epoch: 5, train_loss: 0.03, test_loss: 0.07, train_acc: 1.0, test_acc: 0.98


License https://github.com/tensorflow/swift-models/blob/stable/LICENSE