In [1]:
using CSV, DataFrames, MarkdownTables

wifi = CSV.read(
    "../src/data/wifi.tsv",
    DataFrame,
    header=["w1", "w2", "w3", "w4", "w5", "w6", "w7", "room"]
)
wifi = wifi[(wifi.room .== 1) .| (wifi.room .== 2), [:w5, :w7, :room]]


Row,w5,w7,room
Unnamed: 0_level_1,Int64,Int64,Int64
1,-71,-81,1
2,-71,-85,1
3,-76,-84,1
4,-77,-80,1
5,-77,-87,1
6,-76,-83,1
7,-69,-84,1
8,-74,-82,1
9,-76,-82,1
10,-80,-91,1


In [3]:
distance(v1, v2) = sqrt(sum((v1 - v2).^2))

struct Point
    xn::Vector{Float64}
    label::String
end

function knn(X::Array{Point}, v::Vector{Float64}, k::Int)
    ds = [distance(x.xn, v) for x in X]
    return X[sortperm(ds)[1:k]]
end

knn (generic function with 1 method)

In [4]:
X = [
    Point(collect(row[[:w5, :w7]]), string(row[:room]))
    for row in eachrow(wifi)
]

using MLUtils

X_test, X_train = splitobs(X, at=0.15)
X_train = collect(X_train)
X_test = collect(X_test)

"Return the element that occurs most frequently in an array"
function majority(items::Vector{T})::T where T
    c = Dict{T, Int}()
    for it in items
        if !haskey(c, it)
            c[it] = 1
        else
            c[it] += 1
        end
    end
    return sort(collect(c), by=x->x[2], rev=true)[1][1]
end

# Compute the accuracy score
total = 0
correct = 0

for p in X_test
    neighbors = knn(X_train, p.xn, 7)
    label = majority([x.label for x in neighbors])
    if label == p.label
        correct += 1
    end
    total += 1
end

println("Accuracy: $(correct / total * 100.0)%")

Accuracy: 96.0%


In [5]:
using PlotlyJS

plot(scatter(
    x = [p.xn[1] for p in X_train],
    y = [p.xn[2] for p in X_train],
    mode = "markers",
))

[33m[1m│ [22m[39m  path = "/Users/vikasprasad/.jlassetregistry.lock"
[33m[1m└ [22m[39m[90m@ Pidfile ~/.julia/packages/Pidfile/DDu3M/src/Pidfile.jl:260[39m


In [4]:
using Embeddings
ft = load_embeddings(FastText_Text{:en})

const get_word_index = Dict(word=>ii for (ii,word) in enumerate(ft.vocab))

function get_embedding(word)
    ind = get_word_index[word]
    emb = ft.embeddings[:,ind]
    return emb
end



300-element Vector{Float32}:
 -0.4461
 -0.057
  0.097
 -0.0307
 -0.1286
  0.2365
 -0.4935
  0.1958
  0.2297
 -0.1422
 -0.116
  0.6392
  0.2673
  ⋮
 -0.0749
  0.2017
 -0.4288
 -0.41
  0.0268
  0.3526
  0.0606
  0.6332
  0.0938
 -0.2404
 -0.2717
  0.1998

In [9]:
get_embedding("red")

300-element Vector{Float32}:
  0.0161
  0.2165
 -0.1969
  0.0378
  0.1469
 -0.2578
  0.0562
  0.1028
  0.2476
  0.0067
  0.1997
  0.0255
 -0.1383
  ⋮
  0.1356
  0.1344
  0.0168
 -0.0745
  0.1535
  0.0663
 -0.328
 -0.052
 -0.1002
 -0.3015
  0.0973
 -0.0093