## Install Dependencies

In [2]:
:dep chrono = { version = "0.4.24"}
:dep polars = { version = "0.28.0", features = ["describe", "lazy", "ndarray", "object", "dtype-struct", "concat_str", "mode"] }

<hr />

## Import Libraries

In [3]:
use polars::datatypes::DataType;
use polars::chunked_array::object::{FillNullStrategy, QuantileInterpolOptions};
use polars::prelude::*;
use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
use std::sync::Arc;

<hr />

## Series Object

In [4]:
let series1: Series = [1, 2, 3].iter().collect();
// or
// let series1: Series = Series::new("", &[1, 2, 3]);
series1

shape: (3,)
Series: '' [i32]
[
	1
	2
	3
]

In [5]:
let seasons_ser: Series = Series::new("seasons", &["Winter", "Spring", "Summer", "Fall"]);
seasons_ser

shape: (4,)
Series: 'seasons' [str]
[
	"Winter"
	"Spring"
	"Summer"
	"Fall"
]

In [6]:
let s: Series = Series::new("seasons", &[None, Some(1), Some(2)]);
s

shape: (3,)
Series: 'seasons' [i32]
[
	null
	1
	2
]

In [7]:
Some(f64::NAN)==None

false

In [8]:
f64::NAN==f64::NAN

false

In [9]:
let s: Series = Series::new("seasons", &[None, Some(1), Some(2)]);
s.null_count()

1

In [10]:
s.drop_nulls()

shape: (2,)
Series: 'seasons' [i32]
[
	1
	2
]

In [11]:
let s: Series = Series::new("numbers", &[Some(f64::NAN), Some(1.), Some(2.)]);
s.cast(&DataType::Int64).unwrap()

shape: (3,)
Series: 'numbers' [i64]
[
	null
	1
	2
]

## Series Creation

In [12]:
let s = Series::new_empty("Height", &DataType::Float32);
s

shape: (0,)
Series: 'Height' [f32]
[
]

In [13]:
let s: Series = Series::new("employees", &["Mahmoud", "Ferris"]);
s

shape: (2,)
Series: 'employees' [str]
[
	"Mahmoud"
	"Ferris"
]

In [14]:
let s: Series = Series::new("employees", &["Mahmoud", "Ferris"]);
s.name()

"employees"

In [15]:
let s: Series = Series::new("employees", &vec!["Mahmoud", "Ferris"]);
s

shape: (2,)
Series: 'employees' [str]
[
	"Mahmoud"
	"Ferris"
]

In [16]:
let s = Float64Chunked::new("b", &[1., 2., 3.]).into_series();
s

shape: (3,)
Series: 'b' [f64]
[
	1.0
	2.0
	3.0
]

## Datetime Type

In [17]:
let date: DateTime<Utc> = Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap();
let s = Series::new("b", &[date.date_naive()]);
s

shape: (1,)
Series: 'b' [date]
[
	2020-01-01
]

## Indexing & Slicing

In [18]:
let s: Series = Series::new("employees", &vec!["Ferris", "Mahmoud"]);
s

shape: (2,)
Series: 'employees' [str]
[
	"Ferris"
	"Mahmoud"
]

In [19]:
let s: Series = Series::new("employees", &vec!["Mahmoud", "Ferris"]);
s.slice(0, 1)

shape: (1,)
Series: 'employees' [str]
[
	"Mahmoud"
]

## Length

In [20]:
let s: Series = Series::new("employees", &vec!["Mahmoud", "Ferris"]);
s.len()

2

## Reverse

In [21]:
let s: Series = Series::new("employees", &vec!["Mahmoud", "Ferris"]);
s.reverse()

shape: (2,)
Series: 'employees' [str]
[
	"Ferris"
	"Mahmoud"
]

## Empty

In [22]:
let s: Series = Series::new("employees", &vec!["Mahmoud", "Ferris"]);
s.is_empty()

false

## Drop nulls

In [23]:
let s: Series = Series::new("employees", &vec![Some("Ferris"), None]);
s.drop_nulls()

shape: (1,)
Series: 'employees' [str]
[
	"Ferris"
]

## Summarizing Series

In [40]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04, 0.86, 0.16, 0.26, 0.81]);

// First 10 rows
s.head(None)

shape: (10,)
Series: 'Measurements' [f64]
[
	-1.01
	0.86
	-4.6
	3.98
	0.53
	-7.04
	3.98
	0.53
	-7.04
	0.86
]

In [41]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04, 0.86, 0.16, 0.26, 0.81]);

// First 5 rows
s.head(Some(5))

shape: (5,)
Series: 'Measurements' [f64]
[
	-1.01
	0.86
	-4.6
	3.98
	0.53
]

In [42]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04, 0.86, 0.16, 0.26, 0.81]);

// Last 10 rows
s.tail(None)

shape: (10,)
Series: 'Measurements' [f64]
[
	3.98
	0.53
	-7.04
	3.98
	0.53
	-7.04
	0.86
	0.16
	0.26
	0.81
]

## Append

In [26]:
let mut s1 = Series::new("Age", &[23., 27.]);
let s2 = Series::new("Height", &[1.84, 1.78]);
s1.append(&s2)

Ok(shape: (4,)
Series: 'Age' [f64]
[
	23.0
	27.0
	1.84
	1.78
])

## Cast

In [27]:
let mut s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04]);
s.cast(&DataType::Int32).unwrap()

shape: (9,)
Series: 'Measurements' [i32]
[
	-1
	0
	-4
	3
	0
	-7
	3
	0
	-7
]

## Fill Nulls

### 1. Forward fill

In [28]:
let s = Series::new("some_missing", &[Some(1), None, Some(3), Some(4), None, Some(6)]);
let filled = s.fill_null(FillNullStrategy::Forward(None)).unwrap();
filled

shape: (6,)
Series: 'some_missing' [i32]
[
	1
	1
	3
	4
	4
	6
]

### 2. Backward fill

In [29]:
let s = Series::new("some_missing", &[Some(1), None, Some(3), Some(4), None, Some(6)]);
let filled = s.fill_null(FillNullStrategy::Forward(None)).unwrap();
filled

shape: (6,)
Series: 'some_missing' [i32]
[
	1
	1
	3
	4
	4
	6
]

### 3. Mean fill

In [30]:
let s = Series::new("some_missing", &[Some(1), None, Some(3), Some(4), None, Some(6)]);
let filled = s.fill_null(FillNullStrategy::Mean).unwrap();
filled

shape: (6,)
Series: 'some_missing' [i32]
[
	1
	3
	3
	4
	3
	6
]

### 4. Min fill

In [31]:
let s = Series::new("some_missing", &[Some(1), None, Some(3), Some(4), None, Some(6)]);
let filled = s.fill_null(FillNullStrategy::Min).unwrap();
filled

shape: (6,)
Series: 'some_missing' [i32]
[
	1
	1
	3
	4
	1
	6
]

### 5. Max fill

In [32]:
let s = Series::new("some_missing", &[Some(1), None, Some(3), Some(4), None, Some(6)]);
let filled = s.fill_null(FillNullStrategy::Min).unwrap();
filled

shape: (6,)
Series: 'some_missing' [i32]
[
	1
	1
	3
	4
	1
	6
]

## Sampling

In [33]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04]);
// Take 4 samples with replacement and with shuffle (we are talkin' probability here).
s.sample_n(4, true, true, Some(9999))

Ok(shape: (4,)
Series: 'Measurements' [f64]
[
	0.53
	-1.01
	3.98
	0.53
])

## Descriptive statistics

### Measures of central tendency

### 1. Mean

In [34]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04]);
s.mean().unwrap()

-1.09

### 2. Median

In [35]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04]);
s.median().unwrap()

0.53

### 3. Mode

In [36]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04]);
s.mode().unwrap()

shape: (3,)
Series: 'Measurements' [f64]
[
	3.98
	0.53
	-7.04
]

### Measures of spread

### 1. Interquartile range

In [37]:
let s = Series::new("Measurements", &[-1.01,  0.86, -4.60, 3.98,  0.53, -7.04, 3.98,  0.53, -7.04]);
s.quantile_as_series(0.75, QuantileInterpolOptions::Nearest).unwrap()

shape: (1,)
Series: 'Measurements' [f64]
[
	0.86
]

<hr />

## DataFrame Object

### DataFrame Initialization

In [43]:
let df = DataFrame::default();
df

shape: (0, 0)
┌┐
╞╡
└┘

In [51]:
let s1 = Series::new("Name", &["Mahmoud", "Ali"]);
let s2 = Series::new("Age", &[23, 27]);
let s3 = Series::new("Height", &[1.84, 1.78]);

let df: DataFrame = DataFrame::new(vec![s1, s2, s3]).unwrap();
df

shape: (2, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
│ Ali     ┆ 27  ┆ 1.78   │
└─────────┴─────┴────────┘

### Describe

In [45]:
let df1: DataFrame = df!("categorical" => &["d","e","f"],
                         "numeric" => &[1, 2, 3],
                         "object" => &["a", "b", "c"]).unwrap();
df1

shape: (3, 3)
┌─────────────┬─────────┬────────┐
│ categorical ┆ numeric ┆ object │
│ ---         ┆ ---     ┆ ---    │
│ str         ┆ i32     ┆ str    │
╞═════════════╪═════════╪════════╡
│ d           ┆ 1       ┆ a      │
│ e           ┆ 2       ┆ b      │
│ f           ┆ 3       ┆ c      │
└─────────────┴─────────┴────────┘

In [46]:
let df2: DataFrame = df1.describe(None).unwrap();
df2

shape: (9, 4)
┌────────────┬─────────────┬─────────┬────────┐
│ describe   ┆ categorical ┆ numeric ┆ object │
│ ---        ┆ ---         ┆ ---     ┆ ---    │
│ str        ┆ str         ┆ f64     ┆ str    │
╞════════════╪═════════════╪═════════╪════════╡
│ count      ┆ 3           ┆ 3.0     ┆ 3      │
│ null_count ┆ 0           ┆ 0.0     ┆ 0      │
│ mean       ┆ null        ┆ 2.0     ┆ null   │
│ std        ┆ null        ┆ 1.0     ┆ null   │
│ …          ┆ …           ┆ …       ┆ …      │
│ 25%        ┆ null        ┆ 1.5     ┆ null   │
│ 50%        ┆ null        ┆ 2.0     ┆ null   │
│ 75%        ┆ null        ┆ 2.5     ┆ null   │
│ max        ┆ f           ┆ 3.0     ┆ c      │
└────────────┴─────────────┴─────────┴────────┘

### Head

In [47]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// First 10 rows
df.head(None)

shape: (2, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
│ Bob     ┆ 27  ┆ 1.78   │
└─────────┴─────┴────────┘

In [48]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// First row
df.head(Some(1))

shape: (1, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
└─────────┴─────┴────────┘

### Tail

In [49]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// Last 10 rows
df.tail(None)

shape: (2, 3)
┌─────────┬─────┬────────┐
│ Name    ┆ Age ┆ Height │
│ ---     ┆ --- ┆ ---    │
│ str     ┆ i32 ┆ f64    │
╞═════════╪═════╪════════╡
│ Mahmoud ┆ 23  ┆ 1.84   │
│ Bob     ┆ 27  ┆ 1.78   │
└─────────┴─────┴────────┘

In [50]:
let df: DataFrame = df!("Name" => &["Mahmoud", "Bob"],
                                      "Age" => &[23, 27],
                                      "Height" => &[1.84, 1.78]).unwrap();
// Last row
df.tail(Some(1))

shape: (1, 3)
┌──────┬─────┬────────┐
│ Name ┆ Age ┆ Height │
│ ---  ┆ --- ┆ ---    │
│ str  ┆ i32 ┆ f64    │
╞══════╪═════╪════════╡
│ Bob  ┆ 27  ┆ 1.78   │
└──────┴─────┴────────┘

### Indexing & Slicing

In [56]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 29],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();
{
    let name_col = &df["Name"];
    println!("{}", name_col);
}
// or
// let name_col1 = &df[0];

shape: (3,)
Series: 'Name' [str]
[
	"Mahmoud"
	"Ali"
	"ThePrimeagen"
]


In [69]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 29],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

let name_age_cols = df.select(["Name", "Age"]).unwrap();
name_age_cols

shape: (3, 2)
┌──────────────┬─────┐
│ Name         ┆ Age │
│ ---          ┆ --- │
│ str          ┆ i32 │
╞══════════════╪═════╡
│ Mahmoud      ┆ 22  │
│ Ali          ┆ 25  │
│ ThePrimeagen ┆ 29  │
└──────────────┴─────┘

In [73]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 29],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

{
    let name_col = df.column("Name");
    println!("{:?}", name_col);
}

Ok(shape: (3,)
Series: 'Name' [str]
[
	"Mahmoud"
	"Ali"
	"ThePrimeagen"
])


()

In [72]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();


let mask = df.column("Age").expect("Age must exist!").gt(25).unwrap();
let filtered_data = df.filter(&mask).unwrap();
println!("{:?}", filtered_data);

shape: (1, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ str          ┆ i32 ┆ str    ┆ i32    │
│ ---          ┆ --- ┆ ---    ┆ ---    │
╞══════════════╪═════╪════════╪════════╡
│ ThePrimeagen ┆ 36  ┆ M      ┆ 250000 │
└──────────────┴─────┴────────┴────────┘


In [74]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

df.slice(2, 3)

shape: (1, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ ThePrimeagen ┆ 36  ┆ M      ┆ 250000 │
└──────────────┴─────┴────────┴────────┘

In [75]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

df.transpose().unwrap()[0]

shape: (4,)
Series: 'column_0' [str]
[
	"Mahmoud"
	"22"
	"M"
	"50000"
]

## Data Cleaning

### Nulls Count

In [76]:
let df = df!("Name" => &[Some("Mahmoud"),  None, None],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 60000, 250000]).unwrap();

 df.null_count()

shape: (1, 4)
┌──────┬─────┬────────┬────────┐
│ Name ┆ Age ┆ Gender ┆ Salary │
│ ---  ┆ --- ┆ ---    ┆ ---    │
│ u32  ┆ u32 ┆ u32    ┆ u32    │
╞══════╪═════╪════════╪════════╡
│ 2    ┆ 0   ┆ 0      ┆ 0      │
└──────┴─────┴────────┴────────┘

### Duplicates

In [77]:
let df = df!("Name" => &["Mahmoud",  "Mahmoud", "ThePrimeagen"],
             "Age" => &[22, 22, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 50000, 250000]).unwrap();
let mask = df.is_duplicated().unwrap();
let filtered_data = df.filter(&mask).unwrap();
filtered_data

shape: (2, 4)
┌─────────┬─────┬────────┬────────┐
│ Name    ┆ Age ┆ Gender ┆ Salary │
│ ---     ┆ --- ┆ ---    ┆ ---    │
│ str     ┆ i32 ┆ str    ┆ i32    │
╞═════════╪═════╪════════╪════════╡
│ Mahmoud ┆ 22  ┆ M      ┆ 50000  │
│ Mahmoud ┆ 22  ┆ M      ┆ 50000  │
└─────────┴─────┴────────┴────────┘

### Unique Values

In [78]:
let df = df!("Name" => &["Mahmoud",  "Mahmoud", "ThePrimeagen"],
             "Age" => &[22, 22, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[50000, 50000, 250000]).unwrap();
let mask = df.is_unique().unwrap();
let filtered_data = df.filter(&mask).unwrap();
filtered_data

shape: (1, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ ThePrimeagen ┆ 36  ┆ M      ┆ 250000 │
└──────────────┴─────┴────────┴────────┘

### Drop

In [80]:
let df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                        "Color" => &["Red", "Yellow", "Green"]).unwrap();
let df_remain = df.drop("Color").unwrap(); 
println!("{}", df_remain);
println!("{}", df); // the original DataFrame

shape: (3, 1)
┌───────┐
│ Fruit │
│ ---   │
│ str   │
│ Apple │
╞═══════╡
│ Apple │
│ Pear  │
└───────┘
shape: (3, 2)
┌───────┬────────┐
│ Fruit ┆ Color  │
│ ---   ┆ ---    │
│ str   ┆ str    │
╞═══════╪════════╡
│ Apple ┆ Red    │
│ Apple ┆ Yellow │
│ Pear  ┆ Green  │
└───────┴────────┘


In [81]:
let mut df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                      "Color" => &["Red", "Yellow", "Green"]).unwrap();
df.drop_in_place("Color"); // remove the row with index 1 ("Banana") from df
df

shape: (3, 1)
┌───────┐
│ Fruit │
│ ---   │
│ str   │
╞═══════╡
│ Apple │
│ Apple │
│ Pear  │
└───────┘

In [84]:
let df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                      "Color" => &["Red", "Yellow", "Green"]).unwrap();
let df_dropped_col = df.drop_many(&["Color", ""]);
df_dropped_col

shape: (3, 1)
┌───────┐
│ Fruit │
│ ---   │
│ str   │
╞═══════╡
│ Apple │
│ Apple │
│ Pear  │
└───────┘

In [85]:
let df: DataFrame = df!("Fruit" => &["Apple", "Apple", "Pear"],
                                      "Color" => &[Some("Red"), None, None]).unwrap();
let df_clean = df.drop_nulls::<String>(None).unwrap();
df_clean

shape: (1, 2)
┌───────┬───────┐
│ Fruit ┆ Color │
│ ---   ┆ ---   │
│ str   ┆ str   │
╞═══════╪═══════╡
│ Apple ┆ Red   │
└───────┴───────┘

In [86]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();
let mask = df.column("Salary").expect("Salary must exist!").is_not_null();
mask.head(None)

shape: (3,)
ChunkedArray: 'Salary' [bool]
[
	true
	true
	false
]

In [87]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();
let filtered_data = df.filter(&mask).unwrap();
filtered_data

shape: (2, 4)
┌─────────┬─────┬────────┬────────┐
│ Name    ┆ Age ┆ Gender ┆ Salary │
│ ---     ┆ --- ┆ ---    ┆ ---    │
│ str     ┆ i32 ┆ str    ┆ i32    │
╞═════════╪═════╪════════╪════════╡
│ Mahmoud ┆ 22  ┆ M      ┆ 50000  │
│ Ali     ┆ 25  ┆ M      ┆ 60000  │
└─────────┴─────┴────────┴────────┘

### Fill

In [88]:
let mut df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

let filtered_nulls = df.fill_null(FillNullStrategy::Forward(None)).unwrap();
filtered_nulls

shape: (3, 4)
┌──────────────┬─────┬────────┬────────┐
│ Name         ┆ Age ┆ Gender ┆ Salary │
│ ---          ┆ --- ┆ ---    ┆ ---    │
│ str          ┆ i32 ┆ str    ┆ i32    │
╞══════════════╪═════╪════════╪════════╡
│ Mahmoud      ┆ 22  ┆ M      ┆ 50000  │
│ Ali          ┆ 25  ┆ M      ┆ 60000  │
│ ThePrimeagen ┆ 36  ┆ M      ┆ 60000  │
└──────────────┴─────┴────────┴────────┘

## Measures of central tendency

### Mean

In [89]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.mean()

shape: (1, 4)
┌──────┬───────────┬────────┬─────────┐
│ Name ┆ Age       ┆ Gender ┆ Salary  │
│ ---  ┆ ---       ┆ ---    ┆ ---     │
│ str  ┆ f64       ┆ str    ┆ f64     │
╞══════╪═══════════╪════════╪═════════╡
│ null ┆ 27.666667 ┆ null   ┆ 55000.0 │
└──────┴───────────┴────────┴─────────┘

### Median

In [90]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.median()

shape: (1, 4)
┌──────┬──────┬────────┬─────────┐
│ Name ┆ Age  ┆ Gender ┆ Salary  │
│ ---  ┆ ---  ┆ ---    ┆ ---     │
│ str  ┆ f64  ┆ str    ┆ f64     │
╞══════╪══════╪════════╪═════════╡
│ null ┆ 25.0 ┆ null   ┆ 55000.0 │
└──────┴──────┴────────┴─────────┘

## Measures of spread

### std

In [91]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.std(1)

shape: (1, 4)
┌──────┬──────────┬────────┬─────────────┐
│ Name ┆ Age      ┆ Gender ┆ Salary      │
│ ---  ┆ ---      ┆ ---    ┆ ---         │
│ str  ┆ f64      ┆ str    ┆ f64         │
╞══════╪══════════╪════════╪═════════════╡
│ null ┆ 7.371115 ┆ null   ┆ 7071.067812 │
└──────┴──────────┴────────┴─────────────┘

### var

In [93]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.var(1)

shape: (1, 4)
┌──────┬───────────┬────────┬────────┐
│ Name ┆ Age       ┆ Gender ┆ Salary │
│ ---  ┆ ---       ┆ ---    ┆ ---    │
│ str  ┆ f64       ┆ str    ┆ f64    │
╞══════╪═══════════╪════════╪════════╡
│ null ┆ 54.333333 ┆ null   ┆ 5e7    │
└──────┴───────────┴────────┴────────┘

## Ndarray

In [94]:
let df = df!("Name" => &["Mahmoud", "Ali", "ThePrimeagen"],
             "Age" => &[22, 25, 36],
             "Gender" => &["M", "M", "M"],
             "Salary" => &[Some(50000), Some(60000), None]).unwrap();

df.to_ndarray::<Float64Type>().unwrap()

[[NaN, 22.0, NaN, 50000.0],
 [NaN, 25.0, NaN, 60000.0],
 [NaN, 36.0, NaN, NaN]], shape=[3, 4], strides=[1, 3], layout=Ff (0xa), const ndim=2