In [None]:
:dep plotters = { version = "^0.3.4", default_features = false, features = ["evcxr", "all_series", "all_elements"] }
extern crate plotters;
use plotters::prelude::*;

In [2]:
:dep cluster-rs = { path = "." }
use cluster_rs::cmeans::{CMeans, CMeansParams};
use cluster_rs::kmeans::{KMeans, KMeansParams};
use cluster_rs::metrics::silhouette_score_km

In [3]:
:dep serde = {features = ["derive"]}
use serde::Deserialize;

In [4]:
:dep csv

In [5]:
use std::time::Instant;

In [17]:
#[derive(Debug, Deserialize)]
struct IrisRecord {
    col1: f64,
    col2: f64,
    col3: f64,
    col4: f64,
    label: String,
}

In [66]:
let mut data: Vec<Vec<f64>> = vec![];
let mut reader = csv::ReaderBuilder::new()
    .has_headers(false)
    .from_path("data/BDA/Dataset 1/Iris-150.txt")
    .expect("Failed to open csv");
for record in reader.deserialize() {
    let record: IrisRecord = record.unwrap();
    let mut row = vec![vec![record.col1, record.col2, record.col3, record.col4]];
    data.append(&mut row);
}
println!("{}", data.len());

150


In [78]:
let params = KMeansParams::default()
    .n_clusters(3)
    .parallelized(true)
    .verbose(true);

println!("{params:?}");
let km = KMeans::new(params).fit(&data);
let t = Instant::now();
let centroids = km.centroids();
let inertia = km.inertia();
let labels = km.labels();
let duration = t.elapsed();
println!("{centroids:?}");
println!("{inertia:?}");
println!("{:?}", labels);
// println!("{:?}");

println!("{}", duration.as_millis());

KMeansParams { n_clusters: 3, max_iter: 100, n_init: 10, tol: 0.001, parallelized: true, verbose: true }
Total time 6ms
Total time to assign centroids 2ms
Total time to compute new centroids: 4ms
[[6.67889187136852, 3.010706322424035, 5.519696119682865, 1.9773052592676041], [5.795624360993876, 2.7162356286401184, 4.220685520215054, 1.3392104380747418], [5.006000041494139, 3.418000091615167, 1.4640000134206839, 0.24399999541785422]]
97.60634555210095
[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
0


In [68]:
let params = CMeansParams::default()
    .n_clusters(3)
    .parallelized(true)
    .verbose(true)
    .max_iter(100);

println!("{params:?}");
let t = Instant::now();
let cm = CMeans::new(params).fit(&data);
let duration = t.elapsed();
println!("{:?}", cm.centroids());
println!("{}", duration.as_millis());

CMeansParams { n_clusters: 3, max_iter: 100, tol: 0.001, fuzzyness: 2, parallelized: true, verbose: true }
Total time 1ms
Total time to assign centroids 1ms
Total time to compute new centroids: 0ms
[[5.888847867340344, 2.7611082228618096, 4.3637403193799456, 1.3971810484675422], [5.0035603012197924, 3.4030524850478825, 1.48497301964637, 0.2515282131208023], [6.774691851267954, 3.052306639216175, 5.646382456952857, 2.053403389919318]]
2


In [84]:
let colx = 2;
let coly = 3;
// let x_min = data.iter().max_by(|row1, row2| row1[colx].total_cmp(&row2[colx]));
let x_min: f32 = data.iter().fold(f32::NAN, |acc, row| {f32::min(acc, row[colx] as f32)});
let x_max: f32 = data.iter().fold(f32::NAN, |acc, row| {f32::max(acc, row[colx] as f32)});
let y_min: f32 = data.iter().fold(f32::NAN, |acc, row| {f32::min(acc, row[coly] as f32)});
let y_max: f32 = data.iter().fold(f32::NAN, |acc, row| {f32::max(acc, row[coly] as f32)});
// let x_max = data.iter().map(|v| v[colx]).max();
// let y_min = data.iter().map(|v| v[coly]).min();
// let y_max = data.iter().map(|v| v[coly]).max();

In [85]:
// Plotting
let x_lim = x_min..x_max;
let y_lim = y_min..y_max;
// let root = BitMapBackend::new("kmeans.png", (600, 400)).into_drawing_area();

let figure = evcxr_figure((640, 480), |root|{
    let mut ctx = ChartBuilder::on(&root)
        .set_label_area_size(LabelAreaPosition::Left, 40) // Put in some margins
        .set_label_area_size(LabelAreaPosition::Right, 40)
        .set_label_area_size(LabelAreaPosition::Bottom, 40)
        .caption("Iris KMeans", ("sans-serif", 25)) // Set a caption and font
        .build_cartesian_2d(x_lim, y_lim)
        .expect("Couldn't build our ChartBuilder");

    root.fill(&WHITE).unwrap();
    ctx.configure_mesh().draw().unwrap();
    let root_area = ctx.plotting_area();

    
    
    let colors = [RED, BLUE, GREEN];
    for (record, label) in data.iter().zip(&labels) {
        let coords = (record[colx] as f32, record[coly] as f32);
        let point = Circle::new(coords, 3, ShapeStyle::from(&colors[*label]).filled());
        root_area
            .draw(&point)
            .expect("An error occurred while drawing the point!");
    }
    Ok(())
});

In [86]:
figure