# Slicing lists

In [6]:
breeds = ["Labrador", "Poodle", "Chow Chow", "Labrador", "Chihuahua", "St. Bernard"]
breeds

['Labrador', 'Poodle', 'Chow Chow', 'Labrador', 'Chihuahua', 'St. Bernard']

In [7]:
# return elements 3 to 5
breeds[2:5]

['Chow Chow', 'Labrador', 'Chihuahua']

In [8]:
# return first 3 elements
breeds[:3]

['Labrador', 'Poodle', 'Chow Chow']

In [9]:
# return whole list
breeds[:]

['Labrador', 'Poodle', 'Chow Chow', 'Labrador', 'Chihuahua', 'St. Bernard']

# Slicing dataframes

In [10]:
import pandas as pd

dogs = pd.read_csv("dogs_formatted.csv")
print(dogs)

     name             breed  color  height_cm  weight_kg date_of_birth
0   Bella         Chihuahua  Brown         18          2    2018-02-05
1   Amigo          Labrador  Black         59         35    2016-08-12
2  Trevis       St. Bernard  Brown         77         73    2019-07-24
3   Golin             Husky  White         55         30    2015-06-18
4    Lucy          Labrador  White         51         26    2020-04-29
5     Max  Golden Retriever  Brown         49         21    2014-01-20
6    Otto            Poodle  Brown         42         20    2013-06-27
7    Rexo   German Shepherd  Brown         54         24    2018-05-21


## Slicing index

### Before slicing we need to sort the index

In [12]:
# set index, sort index for breed and color
dogs_sort = dogs.set_index(["breed", "color"]).sort_index()
dogs_sort

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Brown,Bella,18,2,2018-02-05
German Shepherd,Brown,Rexo,54,24,2018-05-21
Golden Retriever,Brown,Max,49,21,2014-01-20
Husky,White,Golin,55,30,2015-06-18
Labrador,Black,Amigo,59,35,2016-08-12
Labrador,White,Lucy,51,26,2020-04-29
Poodle,Brown,Otto,42,20,2013-06-27
St. Bernard,Brown,Trevis,77,73,2019-07-24


### Slicing the outer index

In [13]:
# be aware - last value is included!
dogs_sort.loc["Husky":"St. Bernard"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Husky,White,Golin,55,30,2015-06-18
Labrador,Black,Amigo,59,35,2016-08-12
Labrador,White,Lucy,51,26,2020-04-29
Poodle,Brown,Otto,42,20,2013-06-27
St. Bernard,Brown,Trevis,77,73,2019-07-24


### Slicing the inner index

In [14]:
# correct way = include first and last position as tuples
dogs_sort.loc[("Chihuahua", "Brown"):("Labrador", "Black")]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Brown,Bella,18,2,2018-02-05
German Shepherd,Brown,Rexo,54,24,2018-05-21
Golden Retriever,Brown,Max,49,21,2014-01-20
Husky,White,Golin,55,30,2015-06-18
Labrador,Black,Amigo,59,35,2016-08-12


### Slicing columns

In [16]:
# return all rows, but slice columns
dogs_sort.loc[:, "name": "weight_kg"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chihuahua,Brown,Bella,18,2
German Shepherd,Brown,Rexo,54,24
Golden Retriever,Brown,Max,49,21
Husky,White,Golin,55,30
Labrador,Black,Amigo,59,35
Labrador,White,Lucy,51,26
Poodle,Brown,Otto,42,20
St. Bernard,Brown,Trevis,77,73


#### Slicing twice

In [20]:
dogs_sort.loc[("Husky", "White"):("Labrador", "Black"), 
              "name": "height_kg"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1
Husky,White,Golin,55
Labrador,Black,Amigo,59
Labrador,White,Lucy,51


### Date slicing

In [24]:
dogs = pd.read_csv("dogs_formatted.csv")

dogs_dt = dogs.set_index("date_of_birth").sort_index()
dogs_dt

Unnamed: 0_level_0,name,breed,color,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-06-27,Otto,Poodle,Brown,42,20
2014-01-20,Max,Golden Retriever,Brown,49,21
2015-06-18,Golin,Husky,White,55,30
2016-08-12,Amigo,Labrador,Black,59,35
2018-02-05,Bella,Chihuahua,Brown,18,2
2018-05-21,Rexo,German Shepherd,Brown,54,24
2019-07-24,Trevis,St. Bernard,Brown,77,73
2020-04-29,Lucy,Labrador,White,51,26


In [28]:
# return dogs born between 
dogs_dt.loc["2018-01-01":"2020-04-29"]

Unnamed: 0_level_0,name,breed,color,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-02-05,Bella,Chihuahua,Brown,18,2
2018-05-21,Rexo,German Shepherd,Brown,54,24
2019-07-24,Trevis,St. Bernard,Brown,77,73
2020-04-29,Lucy,Labrador,White,51,26


In [29]:
dogs_dt.loc["2015":"2020"]

Unnamed: 0_level_0,name,breed,color,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-06-18,Golin,Husky,White,55,30
2016-08-12,Amigo,Labrador,Black,59,35
2018-02-05,Bella,Chihuahua,Brown,18,2
2018-05-21,Rexo,German Shepherd,Brown,54,24
2019-07-24,Trevis,St. Bernard,Brown,77,73


### Subsetting by row/column number

In [33]:
# be aware - the final values are not included 
print(dogs.iloc[2:5, 1:4])

         breed  color  height_cm
2  St. Bernard  Brown         77
3        Husky  White         55
4     Labrador  White         51
