## Jupyter Kernel for Rust Programming Language

[evcxr] is an evaluation context for Rust. It is an unofficial google project that offers several related crates, one of which is [evcxr_jupyter] which is a Juypter Kernel for the Rust programming language.

BTW, if you're wondering how to pronounce `EvCxR`; it's pronounced *"Evic-ser"* cos it's an **EV**aluation Conte**X**t for **R**ust.

You can take a [tour of the Juypter Kernel][tour] to get a feel for using Rust in Jupyter Notebooks.

## Setup

You can setup `evcxr_jupyter` by running the following commands:

```sh
cargo install evcxr_jupyter
evcxr_jupyter --install
```

Check full installation instructions [here][install].

### Plotly Jupyter Support

To show plots in Jupyter notebook, install the jupyterlab extension by executing the following command:

```sh
jupyter labextension install jupyterlab-plotly
```

Visit the [Jupyter Support] for plotly guide for more installation help.

[evcxr]: https://github.com/google/evcxr
[evcxr_jupyter]: https://github.com/google/evcxr/blob/main/evcxr_jupyter/README.md
[install]: https://github.com/google/evcxr/blob/main/evcxr_jupyter/README.md#installation
[tour]: https://github.com/google/evcxr/blob/main/evcxr_jupyter/samples/evcxr_jupyter_tour.ipynb
[Jupyter Support]: https://igiagkiozis.github.io/plotly/content/fundamentals/jupyter_support.html

# Iris Dataset

## Install and add dependencies

In [2]:
// Install dependecies: eq. of add it to "Cargo.toml".
:dep eyre = "0.6.8"
:dep ndarray = "0.15.6"
:dep reqwest = { version = "0.11.13", features = ["blocking"] }
:dep polars = { version = "0.26.1", features = ["ndarray", "lazy", "describe", "dtype-full"] }
:dep plotly = { version = "0.6", features=["plotly_ndarray"] }

In [None]:
use std::{io::Cursor, fs::{self, File}, path::Path};

use eyre::Result;
use ndarray::prelude::*;
use polars::prelude::*;
use reqwest::blocking::Client;
use plotly::{Scatter, Plot};
use plotly::common::Mode;

## Load data into `DataFrame` or `LazyFrame`

In [None]:
/// Save dataframe to disk.
pub fn save_df(df: &mut DataFrame, path: &Path) -> Result<()> {
  if !path.exists() {
    // See if parent folder exists.
    let parent = path.parent().unwrap();
    if !parent.is_dir() {
      std::fs::create_dir_all(&parent).unwrap();
    }
    // Create file.
    let mut file = File::create(&path)?;

    // Save dataframe.
    CsvWriter::new(&mut file).finish(df)?;
    println!("File saved to:  {}", path.display());
  } else {
    println!("File already exists.");
  }

  Ok(())
}

/// Load Iris dataset into a dataframe from file path if given, otherwise,
/// download it.
pub fn load_data(path: Option<&Path>) -> Result<LazyFrame> {
  // Overwrite the "species" schema.
  let fields = [Field::new("species", DataType::Categorical(None))];
  let schema = Schema::from(fields.into_iter());

  let df = match path {
    // Load data from file (if it exists).
    Some(p) if p.is_file() => {
      println!("Loading data from {}", p.display());

      LazyCsvReader::new(&p)
        .has_header(true)
        .with_dtype_overwrite(Some(&schema))
        .finish()?
    }
    // Download data.
    _ => {
      println!("Downloading data...");

      let data: Vec<u8> = Client::new()
        .get("https://j.mp/iriscsv")
        .send()?
        .text()?
        .bytes()
        .collect();

      CsvReader::new(Cursor::new(data))
        .has_header(true)
        .with_dtypes(Some(&schema))
        .finish()?
        .lazy()
    }
  };

  Ok(df)
}

In [None]:
// Download & load iris dataset into a dataframe.
// let df = load_data(None).unwrap();

// Load iris dataset from file into a dataframe.
// NOTE: If path doesn't exist, the file is downloaded anyways.
let path = Path::new("../data/iris.csv");
let df_lazy = load_data(Some(&path))?;

let mut df = df_lazy.clone().collect()?;
// Save dataframe to path if it doesn't exist.
save_df(&mut df, &path);

// Display dataframe.
// println!("{}", df);
df

In [None]:
// convert species into categorical values.
// df_lazy.clone()
//   .with_column(
//     col("species")
//       .cast(DataType::Categorical(None))
//   )
//   .collect()?

## Data exploration

In [None]:
// Check number of unique species.
df_lazy.clone()
  .groupby([col("species")])
  .agg([col("*").count()])
  .collect()?

In [None]:
// df.clone().unique(Some(&["species".to_string()]), UniqueKeepStrategy::First)?
// df.clone().is_unique()?.cast(&DataType::UInt8)?.sum::<u8>()
// Number of rows.
println!("Number of rows: {}", df.height());
println!("Number of cols: {}", df.width());
df.describe(None)

In [None]:
// Get the list of column names.
let columns = df.get_column_names_owned();
columns

In [None]:
// Feature columns.
&columns[..&columns.len() - 1]

In [None]:
// Tareget columns.
&columns[&columns.len() - 1..]

In [None]:
// Copyright (c) 2023 Victor I. Afolabi
// 
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT

let feature_names = columns[..&columns.len() - 1].to_owned();
feature_names

## Split dataframe into features & target

In [None]:
// Features dataframe.
let feature_df = df.clone()
    .select(&columns[..&columns.len() - 1])?;

feature_df

In [None]:
// Targets dataframe.
let target_df = df.clone()
  .select(&columns[&columns.len() - 1..])?;

target_df

In [None]:
target_df.shape()

## Converting features and targets into `ndarray`

In [None]:
// Convert target into ndarray.
let target = target_df.clone().to_ndarray::<Float64Type>()?;
target.shape()

In [None]:
// Convert features into ndarray.
let features = feature_df.clone().to_ndarray::<Float64Type>()?;
features

## Alternate loading of a dataframe

In [None]:
// List fields with their datatypes.
let fields: [Field; 5] = [
  Field::new("speal_length", DataType::Float64),
  Field::new("speal_width", DataType::Float64),
  Field::new("petal_length", DataType::Float64),
  Field::new("petal_width", DataType::Float64),
  Field::new("species", DataType::Categorical(None)),
];

// Create dataframe schema.
let schema = Schema::from(fields.into_iter());
println!("{:?}", schema);

// Load dataframe with a given schema.
let df_lazy = LazyCsvReader::new(&path)
  .has_header(true)
  .with_schema(schema.into())
  .finish()?;

df_lazy.collect()?

In [None]:
// Overwrite "species" schema.
let species_schema = Schema::from([
  Field::new("species", DataType::Categorical(None)),
].into_iter());

let df_lazy = LazyCsvReader::new(&path)
  .has_header(true)
  .with_dtype_overwrite(Some(&species_schema))
  .finish()?;

df_lazy.collect()?

In [None]:
let fields = [
  Field::new("sepal_length", DataType::Float64),
  Field::new("sepal_width", DataType::Float64),
  Field::new("petal_length", DataType::Float64),
  Field::new("petal_width", DataType::Float64),
  Field::new("species", DataType::Categorical(None)),
];

let expected_schema = Schema::from(fields.into_iter());
expected_schema

In [None]:
assert_eq!(expected_schema, df.schema())

In [None]:
features

In [None]:
// copyright (c) 2023 victor I. Afolabi
// 
// this software is released under the MIT License.
// https://opensource.org/licenses/MIT

// features.shape(); // [150, 4]
// features.nrows(); // 150
// features.ncols(); // 4
// features.len(); // 600

// target.shape(); // [150, 1]
// target.nrows(); // 150
// target.ncols(); // 1
// target.len(); // 150

In [None]:
let x = Array::from_iter(0..target.len());

let mut plot = Plot::new();
for i in 0..features.ncols() {
  let y = features.slice(s![.., i]).to_owned(); 
  let t = Scatter::from_array(x.clone(), y).name(&columns[i]).mode(Mode::Markers);
  plot.add_trace(t);
}
plot

In [None]:
let x = Array::from_iter(0..features.nrows());

let mut plot = Plot::new();
for i in 0..features.ncols() {
  let y = features.slice(s![.., i]).to_owned();
  let t = Scatter::from_array(y, x.clone()).name(&columns[i]).mode(Mode::Markers);
  plot.add_trace(t);
}

plot

In [None]:
feature_df

In [None]:
features.slice(s![.., 2])

In [None]:
// use plotly::ndarray::ArrayTraces;

// let x = Array::from_iter(0..features.nrows());

// let traces = Scatter::default()
//   .mode(Mode::Markers)
//   .to_traces(x.clone(), features.clone(), ArrayTraces::OverColumns);

// let mut plot = Plot::new();
// plot.add_traces(traces);
// plot

In [None]:
// Copyright (c) 2023 Victor I. Afolabi
// 
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT

let n: usize = 11;
let t: Array<f64, Ix1> = Array::range(0., 10., 10. / n as f64);
let ys: Array<f64, Ix1> = t.iter().map(|v| (*v).powf(2.)).collect();

let trace = Scatter::from_array(t, ys).mode(Mode::LinesMarkers);

let mut plot = Plot::new();
plot.add_trace(trace);
plot

In [None]:
// Copyright (c) 2023 Victor I. Afolabi
// 
// This software is released under the MIT License.
// https://opensource.org/licenses/MIT
// use plotly::ndarray::ArrayTraces;

// let n: usize = 11;
// let t: Array<f64, Ix1> = Array::range(0., 10., 10. / n as f64);
// let mut ys: Array<f64, Ix2> = Array::zeros((11, 11));
// let mut count = 0.;
// for mut row in ys.columns_mut() {
//    for index in 0..row.len() {
//       row[index] = count + (index as f64).powf(2.);
//    }
//    count += 1.;
// }

// let traces = Scatter::default()
//             .mode(Mode::LinesMarkers)
//             .to_traces(t, ys, ArrayTraces::OverColumns);

// let mut plot = Plot::new();
// plot.add_traces(traces);
// plot