### Imports

In [17]:
import pandas as pd
import numpy as np

dogs = pd.read_csv("dogs.csv")
print(dogs.head())

     name        breed  color  height_cm  weight_kg date_of_birth
0   Bella    Chihuahua  Brown         18          2    2018-02-05
1   Amigo     Labrador  Black         59         35    2016-08-12
2  Trevis  St. Bernard  Brown         77         73    2019-07-24
3   Golin        Husky  White         55         30    2015-06-18
4    Lucy     Labrador  White         51         26    2020-04-29


# Basic statistics

####  Most common:
- <code><b>.mean() </b></code> <br>
- <code>.median() , .mode() </code> <br>
- <code>.min() , .max() </code>  - minimum, maximum<br> 
- <code>.var() , .std() </code>  - variance, standard deviation<br>
- <code>.sum() </code> <br>

In [3]:
# mean() = where the center of the data is
dogs["height_cm"].mean()

50.625

In [5]:
# youngest dog 
print(dogs["date_of_birth"].min())
# oldest dog
print(dogs["date_of_birth"].max())

2013-06-27
2020-04-29


## Calculate percentile of columns

In [9]:
# .agg() method = allows to create custom statistics
def pct30(column):
    return column.quantile(0.3)

def pct40(column):
    return column.quantile(0.4)

In [10]:
dogs["weight_kg"].agg([pct30, pct40])

pct30    21.3
pct40    23.4
Name: weight_kg, dtype: float64

In [11]:
dogs[["weight_kg", "height_cm"]].agg(pct30)

weight_kg    21.3
height_cm    49.2
dtype: float64

In [20]:
def iqr(column):
    return column.quantile(0.75) - column.quantile(0.25)

# Update to print IQR and median of 
print(dogs[["weight_kg", "height_cm"]].agg([iqr, np.median]))

        weight_kg  height_cm
iqr          10.5       8.75
median       25.0      52.50


## Cumulative sum of column

#### Cumulative statistics:
- <code><b> .cumsum() </b> </code> <br>
- <code> .cummax() </code> <br>
- <code> .cummin()</code> <br>
- <code> .cumprod()</code> <br>

In [13]:
dogs["weight_kg"]

0     2
1    35
2    73
3    30
4    26
5    21
6    20
7    24
Name: weight_kg, dtype: int64

In [12]:
dogs["weight_kg"].cumsum()

0      2
1     37
2    110
3    140
4    166
5    187
6    207
7    231
Name: weight_kg, dtype: int64

In [29]:
weekly_food = [5, 14, 28, 15, 20, 16, 13, 22]
dogs["weekly_food"] = np.array(weekly_food)
print(dogs)

     name             breed  color  height_cm  weight_kg date_of_birth  \
0   Bella         Chihuahua  Brown         18          2    2018-02-05   
1   Amigo          Labrador  Black         59         35    2016-08-12   
2  Trevis       St. Bernard  Brown         77         73    2019-07-24   
3   Golin             Husky  White         55         30    2015-06-18   
4    Lucy          Labrador  White         51         26    2020-04-29   
5     Max  Golden Retriever  Brown         49         21    2014-01-20   
6    Otto            Poodle  Brown         42         20    2013-06-27   
7    Rexo   German Shepherd  Brown         54         24    2018-05-21   

   weekly_food  
0            5  
1           14  
2           28  
3           15  
4           20  
5           16  
6           13  
7           22  


In [30]:
dogs["weekly_food"].cummax()

0     5
1    14
2    28
3    28
4    28
5    28
6    28
7    28
Name: weekly_food, dtype: int32