## Google Colab Rust Setup

The following cell is used to set up a Rust environment on Colab. Don't execute it locally!

Many thanks to [`mateusvmv`](https://github.com/mateusvmv) for this hack in [`gist.github.com/korakot/ae95315ea6a3a3b33ee26203998a59a3`](https://gist.github.com/korakot/ae95315ea6a3a3b33ee26203998a59a3?permalink_comment_id=4715636#gistcomment-4715636).

In [None]:
# This script sets up and spins up a Jupyter Notebook environment with a Rust kernel using Nix and IPC Proxy. 
!wget -qO- https://gist.github.com/wiseaidev/2af6bef753d48565d11bcd478728c979/archive/3f6df40db09f3517ade41997b541b81f0976c12e.tar.gz | tar xvz --strip-components=1
!bash setup_evcxr_kernel.sh

## Install Dependencies

In [2]:
:dep polars = { version = "0.35.0", features = ["describe", "lazy", "ndarray", "object", "dtype-struct", "concat_str", "mode"] }
// or
// :dep blas-src = { git = "https://github.com/pola-rs/polars", features = ["describe", "lazy", "ndarray", "object", "dtype-struct", "concat_str", "mode"]}

In [64]:
:dep either = { version = "1.9.0" }
// or
// :dep blas-src = { git = "https://github.com/bluss/either" }

---

## Import Libraries

In [66]:
use std::path::Path;
use polars::prelude::*;
use either::Either;

---

## DataFrame Object

### DataFrame Initialization

In [4]:
let df = DataFrame::default();
df

shape: (0, 0)
┌┐
╞╡
└┘

In [5]:
let s1 = Series::new("Name", &["Mahmoud", "Ali"]);
let s2 = Series::new("Age", &[23, 27]);
let s3 = Series::new("Height", &[1.84, 1.78]);

let df: DataFrame = DataFrame::new(vec![s1, s2, s3]).unwrap();
df

shape: (2, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
│ Ali     ┆ 27  ┆ 1.78   │
└─────────┴─────┴────────┘

### Describe

In [6]:
let df1: DataFrame = df!("categorical" => &["d","e","f"],
                         "numeric" => &[1, 2, 3],
                         "object" => &["a", "b", "c"]).unwrap();
df1

shape: (3, 3)
┌─────────────┬─────────┬────────┐
│ categorical ┆ numeric ┆ object │
│ ---         ┆ ---     ┆ ---    │
│ str         ┆ i32     ┆ str    │
╞═════════════╪═════════╪════════╡
│ d           ┆ 1       ┆ a      │
│ e           ┆ 2       ┆ b      │
│ f           ┆ 3       ┆ c      │
└─────────────┴─────────┴────────┘

In [7]:
let df2: DataFrame = df1.describe(None).unwrap();
df2

shape: (9, 4)
┌────────────┬─────────────┬─────────┬────────┐
│ describe   ┆ categorical ┆ numeric ┆ object │
│ ---        ┆ ---         ┆ ---     ┆ ---    │
│ str        ┆ str         ┆ f64     ┆ str    │
╞════════════╪═════════════╪═════════╪════════╡
│ count      ┆ 3           ┆ 3.0     ┆ 3      │
│ null_count ┆ 0           ┆ 0.0     ┆ 0      │
│ mean       ┆ null        ┆ 2.0     ┆ null   │
│ std        ┆ null        ┆ 1.0     ┆ null   │
│ min        ┆ d           ┆ 1.0     ┆ a      │
│ 25%        ┆ null        ┆ 1.5     ┆ null   │
│ 50%        ┆ null        ┆ 2.0     ┆ null   │
│ 75%        ┆ null        ┆ 2.5     ┆ null   │
│ max        ┆ f           ┆ 3.0     ┆ c      │
└────────────┴─────────────┴─────────┴────────┘

### Head

In [8]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// First 10 rows
df.head(None)

shape: (2, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
│ Bob     ┆ 27  ┆ 1.78   │
└─────────┴─────┴────────┘

In [9]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// First row
df.head(Some(1))

shape: (1, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
└─────────┴─────┴────────┘

### Tail

In [10]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// Last 10 rows
df.tail(None)

shape: (2, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
│ Bob     ┆ 27  ┆ 1.78   │
└─────────┴─────┴────────┘

In [11]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// Last row
df.tail(Some(1))

shape: (1, 3)
┌──────┬─────┬────────┐
│ Name ┆ Age ┆ Height │
│ ---  ┆ --- ┆ ---    │
│ str  ┆ i32 ┆ f64    │
╞══════╪═════╪════════╡
│ Bob  ┆ 27  ┆ 1.78   │
└──────┴─────┴────────┘

### Indexing & Slicing

In [12]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 29],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();
{
    let name_col = &df["Name"];
    println!("{}", name_col);
}
// or
// let name_col1 = &df[0];

shape: (3,)
Series: 'Name' [str]
[
	"Mahmoud"
	"Ali"
	"ThePrimeagen"
]


()

In [13]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 29],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

let name_age_cols = df.select(["Name", "Age"]).unwrap();
name_age_cols

shape: (3, 2)
┌──────────────┬─────┐
│ Name         ┆ Age │
│ ---          ┆ --- │
│ str          ┆ i32 │
╞══════════════╪═════╡
│ Mahmoud      ┆ 22  │
│ Ali          ┆ 25  │
│ ThePrimeagen ┆ 29  │
└──────────────┴─────┘

In [14]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 29],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

{
    let name_col = df.column("Name");
    println!("{:?}", name_col);
}

Ok(shape: (3,)
Series: 'Name' [str]
[
	"Mahmoud"
	"Ali"
	"ThePrimeagen"
])


()

In [80]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();


let mask = df.column("Age").expect("Age must exist!").gt(25).unwrap();
let filtered_data = df.filter(&mask).unwrap();
println!("{:?}", filtered_data);

shape: (1, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ ThePrimeagen ┆ 36  ┆ M      ┆ 250000 │
└──────────────┴─────┴────────┴────────┘


In [81]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

df.slice(2, 3)

shape: (1, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ ThePrimeagen ┆ 36  ┆ M      ┆ 250000 │
└──────────────┴─────┴────────┴────────┘

In [89]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

df.transpose(Some("columns"), Some(Either::Right(vec!["Person 1".to_string(), "Person 2".to_string(), "Person 3".to_string()]))).unwrap()

shape: (4, 4)
┌─────────┬──────────┬──────────┬──────────────┐
│ columns ┆ Person 1 ┆ Person 2 ┆ Person 3     │
│ ---     ┆ ---      ┆ ---      ┆ ---          │
│ str     ┆ str      ┆ str      ┆ str          │
╞═════════╪══════════╪══════════╪══════════════╡
│ Name    ┆ Mahmoud  ┆ Ali      ┆ ThePrimeagen │
│ Age     ┆ 22       ┆ 25       ┆ 36           │
│ Gender  ┆ M        ┆ M        ┆ M            │
│ Salary  ┆ 50000    ┆ 60000    ┆ 250000       │
└─────────┴──────────┴──────────┴──────────────┘

## Data Cleaning

### Nulls Count

In [18]:
let df = df!("Name" => &[Some("Mahmoud"),  None, None],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

 df.null_count()

shape: (1, 4)
┌──────┬─────┬────────┬────────┐
│ Name ┆ Age ┆ Gender ┆ Salary │
│ ---  ┆ --- ┆ ---    ┆ ---    │
│ u32  ┆ u32 ┆ u32    ┆ u32    │
╞══════╪═════╪════════╪════════╡
│ 2    ┆ 0   ┆ 0      ┆ 0      │
└──────┴─────┴────────┴────────┘

### Duplicates

In [19]:
let df = df!("Name" => &["Mahmoud",  "Mahmoud", "ThePrimeagen"],
             "Age" => &[22, 22, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 50000, 250000]).unwrap();
let mask = df.is_duplicated().unwrap();
let filtered_data = df.filter(&mask).unwrap();
filtered_data

shape: (2, 4)
┌─────────┬─────┬────────┬────────┐
│ Name    ┆ Age ┆ Gender ┆ Salary │
│ ---     ┆ --- ┆ ---    ┆ ---    │
│ str     ┆ i32 ┆ str    ┆ i32    │
╞═════════╪═════╪════════╪════════╡
│ Mahmoud ┆ 22  ┆ M      ┆ 50000  │
│ Mahmoud ┆ 22  ┆ M      ┆ 50000  │
└─────────┴─────┴────────┴────────┘

### Unique Values

In [20]:
let df = df!("Name" => &["Mahmoud",  "Mahmoud", "ThePrimeagen"],
             "Age" => &[22, 22, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 50000, 250000]).unwrap();
let mask = df.is_unique().unwrap();
let filtered_data = df.filter(&mask).unwrap();
filtered_data

shape: (1, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ ThePrimeagen ┆ 36  ┆ M      ┆ 250000 │
└──────────────┴─────┴────────┴────────┘

### Drop

In [90]:
let df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                        "Color" => &["Red", "Yellow", "Green"]).unwrap();
let df_remain = df.drop("Color").unwrap(); 
println!("{}", df_remain);
println!("{}", df); // the original DataFrame

shape: (3, 1)
┌───────┐
│ Fruit │
│ ---   │
│ str   │
╞═══════╡
│ Apple │
│ Apple │
│ Pear  │
└───────┘
shape: (3, 2)
┌───────┬────────┐
│ Fruit ┆ Color  │
│ ---   ┆ ---    │
│ str   ┆ str    │
╞═══════╪════════╡
│ Apple ┆ Red    │
│ Apple ┆ Yellow │
│ Pear  ┆ Green  │
└───────┴────────┘


In [91]:
let mut df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                      "Color" => &["Red", "Yellow", "Green"]).unwrap();
df.drop_in_place("Color"); // remove the row with index 1 ("Banana") from df
df

shape: (3, 1)
┌───────┐
│ Fruit │
│ ---   │
│ str   │
╞═══════╡
│ Apple │
│ Apple │
│ Pear  │
└───────┘

In [23]:
let df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                      "Color" => &["Red", "Yellow", "Green"]).unwrap();
let df_dropped_col = df.drop_many(&["Color", ""]);
df_dropped_col

shape: (3, 1)
┌───────┐
│ Fruit │
│ ---   │
│ str   │
╞═══════╡
│ Apple │
│ Apple │
│ Pear  │
└───────┘

In [24]:
let df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                      "Color" => &[Some("Red"), None, None]).unwrap();
let df_clean = df.drop_nulls::<String>(None).unwrap();
df_clean

shape: (1, 2)
┌───────┬───────┐
│ Fruit ┆ Color │
│ ---   ┆ ---   │
│ str   ┆ str   │
╞═══════╪═══════╡
│ Apple ┆ Red   │
└───────┴───────┘

In [25]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();
let mask = df.column("Salary").expect("Salary must exist!").is_not_null();
mask.head(None)

shape: (3,)
ChunkedArray: 'Salary' [bool]
[
	true
	true
	false
]

In [26]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();
let filtered_data = df.filter(&mask).unwrap();
filtered_data

shape: (2, 4)
┌─────────┬─────┬────────┬────────┐
│ Name    ┆ Age ┆ Gender ┆ Salary │
│ ---     ┆ --- ┆ ---    ┆ ---    │
│ str     ┆ i32 ┆ str    ┆ i32    │
╞═════════╪═════╪════════╪════════╡
│ Mahmoud ┆ 22  ┆ M      ┆ 50000  │
│ Ali     ┆ 25  ┆ M      ┆ 60000  │
└─────────┴─────┴────────┴────────┘

### Fill

In [27]:
let mut df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

let filtered_nulls = df.fill_null(FillNullStrategy::Forward(None)).unwrap();
filtered_nulls

shape: (3, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ Mahmoud      ┆ 22  ┆ M      ┆ 50000  │
│ Ali          ┆ 25  ┆ M      ┆ 60000  │
│ ThePrimeagen ┆ 36  ┆ M      ┆ 60000  │
└──────────────┴─────┴────────┴────────┘

## Measures of central tendency

### Mean

In [28]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.mean()

shape: (1, 4)
┌──────┬───────────┬────────┬─────────┐
│ Name ┆ Age       ┆ Gender ┆ Salary  │
│ ---  ┆ ---       ┆ ---    ┆ ---     │
│ str  ┆ f64       ┆ str    ┆ f64     │
╞══════╪═══════════╪════════╪═════════╡
│ null ┆ 27.666667 ┆ null   ┆ 55000.0 │
└──────┴───────────┴────────┴─────────┘

### Median

In [29]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.median()

shape: (1, 4)
┌──────┬──────┬────────┬─────────┐
│ Name ┆ Age  ┆ Gender ┆ Salary  │
│ ---  ┆ ---  ┆ ---    ┆ ---     │
│ str  ┆ f64  ┆ str    ┆ f64     │
╞══════╪══════╪════════╪═════════╡
│ null ┆ 25.0 ┆ null   ┆ 55000.0 │
└──────┴──────┴────────┴─────────┘

## Measures of spread

### std

In [30]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.std(1)

shape: (1, 4)
┌──────┬──────────┬────────┬─────────────┐
│ Name ┆ Age      ┆ Gender ┆ Salary      │
│ ---  ┆ ---      ┆ ---    ┆ ---         │
│ str  ┆ f64      ┆ str    ┆ f64         │
╞══════╪══════════╪════════╪═════════════╡
│ null ┆ 7.371115 ┆ null   ┆ 7071.067812 │
└──────┴──────────┴────────┴─────────────┘

### var

In [31]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.var(1)

shape: (1, 4)
┌──────┬───────────┬────────┬────────┐
│ Name ┆ Age       ┆ Gender ┆ Salary │
│ ---  ┆ ---       ┆ ---    ┆ ---    │
│ str  ┆ f64       ┆ str    ┆ f64    │
╞══════╪═══════════╪════════╪════════╡
│ null ┆ 54.333333 ┆ null   ┆ 5e7    │
└──────┴───────────┴────────┴────────┘

## Ndarray

In [41]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.to_ndarray::<Float64Type>(IndexOrder::Fortran).unwrap()

[[NaN, 22.0, NaN, 50000.0],
 [NaN, 25.0, NaN, 60000.0],
 [NaN, 36.0, NaN, NaN]], shape=[3, 4], strides=[1, 3], layout=Ff (0xa), const ndim=2

<hr />

## Aggregation Functions

Download [the flights dataset](https://www.kaggle.com/datasets/deepak007chaubey/flight-on-time-dataset?select=Flight_on_time_HIX.csv) and move it to the `dataset` directory.

In [43]:
fn read_data_frame_from_csv(
    csv_file_path: &Path,
) -> DataFrame {
    CsvReader::from_path(csv_file_path)
        .expect("Cannot open file.")
        .has_header(true)
        .finish()
        .unwrap()
}


let flights_file_path: &Path = Path::new("dataset/Flight_on_time_HIX.csv");
let columns = ["Airline", "Origin_Airport", "Destination_Airport", "Departure_Delay_Minutes", "Arrival_Delay_Minutes"];
let flights_df: DataFrame = read_data_frame_from_csv(flights_file_path).select(columns).unwrap();
flights_df.head(Some(5))

shape: (5, 5)
┌─────────┬────────────────┬─────────────────────┬─────────────────────────┬───────────────────────┐
│ Airline ┆ Origin_Airport ┆ Destination_Airport ┆ Departure_Delay_Minutes ┆ Arrival_Delay_Minutes │
│ ---     ┆ ---            ┆ ---                 ┆ ---                     ┆ ---                   │
│ str     ┆ str            ┆ str                 ┆ i64                     ┆ i64                   │
╞═════════╪════════════════╪═════════════════════╪═════════════════════════╪═══════════════════════╡
│ TR      ┆ IYF            ┆ HIX                 ┆ 62                      ┆ 52                    │
│ TR      ┆ HEN            ┆ HIX                 ┆ 15                      ┆ 8                     │
│ RO      ┆ HIX            ┆ IZN                 ┆ 0                       ┆ 0                     │
│ XM      ┆ HIX            ┆ IZU                 ┆ 34                      ┆ 44                    │
│ XM      ┆ HIX            ┆ LKF                 ┆ 144                     ┆ 

In [46]:
let arr_delay_mean_df: DataFrame = flights_df.group_by(["Airline"]).expect("Airline Column must exist!").select(["Arrival_Delay_Minutes"]).mean().unwrap();
arr_delay_mean_df.head(Some(5))

shape: (5, 2)
┌─────────┬────────────────────────────┐
│ Airline ┆ Arrival_Delay_Minutes_mean │
│ ---     ┆ ---                        │
│ str     ┆ f64                        │
╞═════════╪════════════════════════════╡
│ TR      ┆ 281.309919                 │
│ GB      ┆ 897.5625                   │
│ TJ      ┆ 902.961255                 │
│ YJ      ┆ 11.839243                  │
│ LV      ┆ 10.266467                  │
└─────────┴────────────────────────────┘

In [47]:
let dep_delay_mean_def: DataFrame = flights_df.group_by(["Airline", "Origin_Airport"]).expect("Airline and Origin_Airport Columns must exist!").select(["Departure_Delay_Minutes"]).mean().unwrap();
dep_delay_mean_def.head(Some(5))

shape: (5, 3)
┌─────────┬────────────────┬──────────────────────────────┐
│ Airline ┆ Origin_Airport ┆ Departure_Delay_Minutes_mean │
│ ---     ┆ ---            ┆ ---                          │
│ str     ┆ str            ┆ f64                          │
╞═════════╪════════════════╪══════════════════════════════╡
│ TR      ┆ IYF            ┆ 10.634921                    │
│ TR      ┆ HEN            ┆ 4.0                          │
│ CA      ┆ UKP            ┆ 1.176471                     │
│ TR      ┆ ERR            ┆ 14.526667                    │
│ CA      ┆ EJY            ┆ 15.742857                    │
└─────────┴────────────────┴──────────────────────────────┘

<hr />

## Merging DataFrames

### Inner Join

In [50]:
let df1: DataFrame = df!("Carrier" => &["HA", "EV", "VX", "DL"],
                         "ArrDelay" => &[-3, 28, 0, 1]).unwrap();
let df2: DataFrame = df!("Airline" => &["HA", "EV", "OO", "VX"],
                         "DepDelay" => &[21, -8, 11, -4]).unwrap();

let df3: DataFrame = df1.join(&df2, ["Carrier"], ["Airline"], JoinType::Inner.into()).unwrap();
// or: let df3: DataFrame = df1.inner_join(&df2, ["Carrier"], ["Airline"]).unwrap();
df3.head(Some(5))

shape: (3, 3)
┌─────────┬──────────┬──────────┐
│ Carrier ┆ ArrDelay ┆ DepDelay │
│ ---     ┆ ---      ┆ ---      │
│ str     ┆ i32      ┆ i32      │
╞═════════╪══════════╪══════════╡
│ HA      ┆ -3       ┆ 21       │
│ EV      ┆ 28       ┆ -8       │
│ VX      ┆ 0        ┆ -4       │
└─────────┴──────────┴──────────┘

### Left Join

In [51]:
let df3: DataFrame = df1.left_join(&df2, ["Carrier"], ["Airline"]).unwrap();
df3.head(Some(5))

shape: (4, 3)
┌─────────┬──────────┬──────────┐
│ Carrier ┆ ArrDelay ┆ DepDelay │
│ ---     ┆ ---      ┆ ---      │
│ str     ┆ i32      ┆ i32      │
╞═════════╪══════════╪══════════╡
│ HA      ┆ -3       ┆ 21       │
│ EV      ┆ 28       ┆ -8       │
│ VX      ┆ 0        ┆ -4       │
│ DL      ┆ 1        ┆ null     │
└─────────┴──────────┴──────────┘

### Right Join

In [52]:
let df4: DataFrame = df2.left_join(&df1, ["Airline"], ["Carrier"]).unwrap();
df4.head(Some(5))

shape: (4, 3)
┌─────────┬──────────┬──────────┐
│ Airline ┆ DepDelay ┆ ArrDelay │
│ ---     ┆ ---      ┆ ---      │
│ str     ┆ i32      ┆ i32      │
╞═════════╪══════════╪══════════╡
│ HA      ┆ 21       ┆ -3       │
│ EV      ┆ -8       ┆ 28       │
│ OO      ┆ 11       ┆ null     │
│ VX      ┆ -4       ┆ 0        │
└─────────┴──────────┴──────────┘

### Outer Join

In [53]:
let df5: DataFrame = df1.outer_join(&df2, ["Carrier"], ["Airline"]).unwrap();
df5.head(Some(5))

shape: (5, 3)
┌─────────┬──────────┬──────────┐
│ Carrier ┆ ArrDelay ┆ DepDelay │
│ ---     ┆ ---      ┆ ---      │
│ str     ┆ i32      ┆ i32      │
╞═════════╪══════════╪══════════╡
│ HA      ┆ -3       ┆ 21       │
│ EV      ┆ 28       ┆ -8       │
│ OO      ┆ null     ┆ 11       │
│ VX      ┆ 0        ┆ -4       │
│ DL      ┆ 1        ┆ null     │
└─────────┴──────────┴──────────┘

<hr />