In [1]:
using Flux, Statistics, ProgressMeter

In [2]:
noisy = rand(Float32, 2, 1000)   

2×1000 Matrix{Float32}:
 0.132733   0.165176  0.0542322  0.204965  …  0.851867  0.0567533  0.99939
 0.0827045  0.473151  0.393471   0.505635     0.29307   0.0958287  0.849575

In [3]:
noisy

2×1000 Matrix{Float32}:
 0.132733   0.165176  0.0542322  0.204965  …  0.851867  0.0567533  0.99939
 0.0827045  0.473151  0.393471   0.505635     0.29307   0.0958287  0.849575

In [4]:
truth = [xor(col[1]>0.5, col[2]>0.5) for col in eachcol(noisy)] 

1000-element Vector{Bool}:
 0
 0
 0
 1
 1
 1
 0
 1
 1
 1
 1
 1
 1
 ⋮
 0
 1
 1
 1
 1
 0
 0
 1
 0
 1
 0
 0

In [5]:
model = Chain(
    Dense(2 => 3, tanh),   # activation function inside layer
    BatchNorm(3),
    Dense(3 => 2),
    softmax) |> gpu 

┌ Info: The GPU function is being called but the GPU is not accessible. 
│ Defaulting back to the CPU. (No action is required if you want to run on the CPU).
└ @ Flux /Users/suyash/.julia/packages/Flux/kq9Et/src/functor.jl:201


Chain(
  Dense(2 => 3, tanh),                  [90m# 9 parameters[39m
  BatchNorm(3),                         [90m# 6 parameters[39m[90m, plus 6[39m
  Dense(3 => 2),                        [90m# 8 parameters[39m
  NNlib.softmax,
) [90m        # Total: 6 trainable arrays, [39m23 parameters,
[90m          # plus 2 non-trainable, 6 parameters, summarysize [39m604 bytes.

In [6]:
out1 = model(noisy |> gpu) |> cpu  

2×1000 Matrix{Float32}:
 0.447656  0.532731  0.568654  0.522085  …  0.227936  0.493412  0.368437
 0.552344  0.467269  0.431346  0.477915     0.772064  0.506588  0.631563

In [7]:
target = Flux.onehotbatch(truth, [true, false])    

2×1000 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 ⋅  ⋅  ⋅  1  1  1  ⋅  1  1  1  1  1  1  …  ⋅  1  1  1  1  ⋅  ⋅  1  ⋅  1  ⋅  ⋅
 1  1  1  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     1  ⋅  ⋅  ⋅  ⋅  1  1  ⋅  1  ⋅  1  1

In [8]:
loader = Flux.DataLoader((noisy, target) |> gpu, batchsize=64, shuffle=true);

In [9]:
optim = Flux.setup(Flux.Adam(0.01), model) 

(layers = ((weight = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0 0.0; 0.0 0.0; 0.0 0.0], Float32[0.0 0.0; 0.0 0.0; 0.0 0.0], (0.9, 0.999))[32m)[39m, bias = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0, 0.0], Float32[0.0, 0.0, 0.0], (0.9, 0.999))[32m)[39m, σ = ()), (λ = (), β = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0, 0.0], Float32[0.0, 0.0, 0.0], (0.9, 0.999))[32m)[39m, γ = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0, 0.0], Float32[0.0, 0.0, 0.0], (0.9, 0.999))[32m)[39m, μ = (), σ² = (), ϵ = (), momentum = (), affine = (), track_stats = (), active = (), chs = ()), (weight = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0 0.0 0.0; 0.0 0.0 0.0], Float32[0.0 0.0 0.0; 0.0 0.0 0.0], (0.9, 0.999))[32m)[39m, bias = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0], Float32[0.0, 0.0], (0.9, 0.999))[32m)[39m, 

In [None]:
losses = []
@showprogress for epoch in 1:1_000
    for (x, y) in loader
        loss, grads = Flux.withgradient(model) do m
            # Evaluate model and loss inside gradient context:
            y_hat = m(x)
            Flux.crossentropy(y_hat, y)
        end
        Flux.update!(optim, model, grads[1])
        push!(losses, loss)  # logging, outside gradient context
    end
end

In [11]:
optim # parameters, momenta and output have all changed
out2 = model(noisy |> gpu) |> cpu  # first row is prob. of true, second row p(false)

mean((out2[1,:] .> 0.5) .== truth) 

0.944

In [None]:
using Plots  # to draw the above figure

p_true = scatter(noisy[1,:], noisy[2,:], zcolor=truth, title="True classification", legend=false)
p_raw =  scatter(noisy[1,:], noisy[2,:], zcolor=out1[1,:], title="Untrained network", label="", clims=(0,1))
p_done = scatter(noisy[1,:], noisy[2,:], zcolor=out2[1,:], title="Trained network", legend=false)

plot(p_true, p_raw, p_done, layout=(1,3), size=(1000,330))