In [None]:
using AutoRisk
using BayesNets
using DataFrames
using Discretizers
using Distributions
using HDF5
using Interact
using JLD
using PGFPlots
using TikzPictures

In [None]:
# load the data
# input_filepath = "../../data/datasets/april/risk_mc_32.h5"
input_filepath = "../../data/datasets/risk_bn.h5"
raw_features = h5open(input_filepath, "r") do file
    read(file, "risk/features")
end;
raw_targets = h5open(input_filepath, "r") do file
    read(file, "risk/targets")
end;

In [None]:
debug_size = min(200000, size(raw_features, 3))
timestep = size(raw_features, 2)
features = view(raw_features, :, timestep, 1:debug_size);
targets = view(raw_targets, :, 1:debug_size);

In [None]:
attributes = h5readattr(input_filepath, "risk")
feature_names = attributes["feature_names"];

In [None]:
# sanitize the data
# remove features deterministically associated with a collision
valid_target_inds = find(abs(sum(targets[1:3,:], 1) .- 1.) .> 1e-2)

# clip velocities
vel_ind = find(feature_names .== "velocity")[1]
valid_vel_inds = find(0. .< features[vel_ind, :] .< 30.)

# clip distances
dist_ind = find(feature_names .== "fore_m_dist")[1]
valid_dist_inds = find(0. .< features[dist_ind, :] .< 100.)

valid_inds = intersect(valid_target_inds, valid_vel_inds, valid_dist_inds)
features = features[:, valid_inds];
println(size(valid_inds))
println(size(features))

In [None]:
bn_feature_names = ["velocity", "fore_m_vel", "fore_m_dist"]
inds = [find(feature_names .== name)[1] for name in bn_feature_names]
base_data = features[inds,:];

In [None]:
# add aggressivenss by inferring it from politeness
politeness_index = find(feature_names .== "lane_politeness")[1]
politness_values = features[politeness_index,:];
aggressiveness_values = infer_correlated_aggressiveness(politness_values);
aggressiveness_values = reshape(aggressiveness_values, (1, length(aggressiveness_values)))
push!(bn_feature_names, "aggressiveness")
data = cat(1, base_data, aggressiveness_values)

In [None]:
# get is_attentive separately since it's discrete
is_attentive_index = find(feature_names .== "is_attentive")[1]
num_samples = size(data, 2)
is_attentive_values = ones(Int, num_samples)
for sidx in 1:num_samples
    is_attentive_values[sidx] = features[is_attentive_index,sidx] > .5 ? 2 : 1
end

In [None]:
# discretize the data
# features
num_variables, num_samples = size(data)
num_bins = [6,6,10,4]
disc_data = zeros(Int, num_variables, num_samples)
cutpoints = []
discs = []
algo = DiscretizeUniformWidth # DiscretizeUniformCount
for vidx in 1:num_variables
    disc = LinearDiscretizer(binedges(algo(num_bins[vidx]), data[vidx,:]))
    push!(cutpoints, disc.binedges)
    for sidx in 1:num_samples
        c = 0
        val = data[vidx, sidx]
        for (c, (lo, hi)) in enumerate(zip(disc.binedges, disc.binedges[2:end]))
            if lo <= val < hi
                break
            end
        end
    disc_data[vidx, sidx] = c
    end
end
println(cutpoints)

In [None]:
# convert data to dataframe
training_data = DataFrame(
        velocity = disc_data[1,:], 
        forevelocity = disc_data[2,:],
        foredistance = disc_data[3,:], 
        aggressiveness = disc_data[4,:],
        isattentive = is_attentive_values
);

In [None]:
bn = fit(DiscreteBayesNet, training_data, (
    :isattentive=>:foredistance, 
    :isattentive=>:velocity,
    :aggressiveness=>:foredistance, 
    :aggressiveness=>:velocity,
    :foredistance=>:velocity,
    :forevelocity=>:velocity
    )
)

In [None]:
# report CPDs
for k in [:isattentive, :aggressiveness, :foredistance, :forevelocity, :velocity]
    println(table(bn, k))
end

In [None]:
# map the feature symbols to binedges
var_edges = Dict{Symbol,Vector{Float64}}()
var_edges[:velocity] = cutpoints[1]
var_edges[:forevelocity] = cutpoints[2]
var_edges[:foredistance] = cutpoints[3]
var_edges[:aggressiveness] = cutpoints[4]
var_edges

In [None]:
JLD.save("../../data/bayesnets/base_test.jld", "bn", bn, "var_edges", var_edges)

In [None]:
# proposal bn
# inattentive
is_attentive_values[1:100000] = 1
# close proximity
disc_data[3,1:20000] = 1 
disc_data[3,20000:80000] = 2
disc_data[3,80000:120000] = 3
# aggressive
disc_data[4,1:20000] = 4
disc_data[4,20000:40000] = 3
training_data = DataFrame(
        velocity = disc_data[1,:], 
        forevelocity = disc_data[2,:],
        foredistance = disc_data[3,:], 
        aggressiveness = disc_data[4,:],
        isattentive = is_attentive_values
);

In [None]:
bn = fit(DiscreteBayesNet, training_data, (
    :isattentive=>:foredistance, 
    :isattentive=>:velocity,
    :aggressiveness=>:foredistance, 
    :aggressiveness=>:velocity,
    :foredistance=>:velocity,
    :forevelocity=>:velocity
    )
)

In [None]:
# report CPDs
for k in [:isattentive, :aggressiveness, :foredistance, :forevelocity, :velocity]
    println(table(bn, k))
end

In [None]:
JLD.save("../../data/bayesnets/prop_test.jld", "bn", bn, "var_edges", var_edges)