### Imports

In [3]:
import numpy as np
import pandas as pd

dogs = pd.read_csv("dogs.csv")
print(dogs)

     name             breed  color  height_cm  weight_kg date_of_birth
0   Bella         Chihuahua  Brown         18          2    2018-02-05
1   Amigo          Labrador  Black         59         35    2016-08-12
2  Trevis       St. Bernard  Brown         77         73    2019-07-24
3   Golin             Husky  White         55         30    2015-06-18
4    Lucy          Labrador  White         51         26    2020-04-29
5     Max  Golden Retriever  Brown         49         21    2014-01-20
6    Otto            Poodle  Brown         42         20    2013-06-27
7    Rexo   German Shepherd  Brown         54         24    2018-05-21


## Sorting

In [5]:
# sort dogs by weight, ascending
dogs.sort_values("weight_kg")

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Chihuahua,Brown,18,2,2018-02-05
6,Otto,Poodle,Brown,42,20,2013-06-27
5,Max,Golden Retriever,Brown,49,21,2014-01-20
7,Rexo,German Shepherd,Brown,54,24,2018-05-21
4,Lucy,Labrador,White,51,26,2020-04-29
3,Golin,Husky,White,55,30,2015-06-18
1,Amigo,Labrador,Black,59,35,2016-08-12
2,Trevis,St. Bernard,Brown,77,73,2019-07-24


In [6]:
# sort dogs by weight, descending
dogs.sort_values("weight_kg", ascending=False)

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
2,Trevis,St. Bernard,Brown,77,73,2019-07-24
1,Amigo,Labrador,Black,59,35,2016-08-12
3,Golin,Husky,White,55,30,2015-06-18
4,Lucy,Labrador,White,51,26,2020-04-29
7,Rexo,German Shepherd,Brown,54,24,2018-05-21
5,Max,Golden Retriever,Brown,49,21,2014-01-20
6,Otto,Poodle,Brown,42,20,2013-06-27
0,Bella,Chihuahua,Brown,18,2,2018-02-05


### Sorting by multiple variables

In [8]:
dogs.sort_values(["weight_kg", "height_cm"])

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Chihuahua,Brown,18,2,2018-02-05
6,Otto,Poodle,Brown,42,20,2013-06-27
5,Max,Golden Retriever,Brown,49,21,2014-01-20
7,Rexo,German Shepherd,Brown,54,24,2018-05-21
4,Lucy,Labrador,White,51,26,2020-04-29
3,Golin,Husky,White,55,30,2015-06-18
1,Amigo,Labrador,Black,59,35,2016-08-12
2,Trevis,St. Bernard,Brown,77,73,2019-07-24


In [9]:
# change the direction values are sorted in, list for each variable
dogs.sort_values(["weight_kg", "height_cm"], ascending=[True, False])

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Chihuahua,Brown,18,2,2018-02-05
6,Otto,Poodle,Brown,42,20,2013-06-27
5,Max,Golden Retriever,Brown,49,21,2014-01-20
7,Rexo,German Shepherd,Brown,54,24,2018-05-21
4,Lucy,Labrador,White,51,26,2020-04-29
3,Golin,Husky,White,55,30,2015-06-18
1,Amigo,Labrador,Black,59,35,2016-08-12
2,Trevis,St. Bernard,Brown,77,73,2019-07-24


## Subsetting columns

In [13]:
# subsetting one column
dogs["name"]

0     Bella
1     Amigo
2    Trevis
3     Golin
4      Lucy
5       Max
6      Otto
7      Rexo
Name: name, dtype: object

In [14]:
# subsetting one column
dogs[["name"]]

Unnamed: 0,name
0,Bella
1,Amigo
2,Trevis
3,Golin
4,Lucy
5,Max
6,Otto
7,Rexo


In [15]:
# subsetting more columns
dogs[["breed", "height_cm"]]

Unnamed: 0,breed,height_cm
0,Chihuahua,18
1,Labrador,59
2,St. Bernard,77
3,Husky,55
4,Labrador,51
5,Golden Retriever,49
6,Poodle,42
7,German Shepherd,54


## Subsetting rows

In [16]:
# dogs higher than 50 cm
dogs["height_cm"] > 50

0    False
1     True
2     True
3     True
4     True
5    False
6    False
7     True
Name: height_cm, dtype: bool

In [17]:
dogs[dogs["height_cm"] > 50]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
1,Amigo,Labrador,Black,59,35,2016-08-12
2,Trevis,St. Bernard,Brown,77,73,2019-07-24
3,Golin,Husky,White,55,30,2015-06-18
4,Lucy,Labrador,White,51,26,2020-04-29
7,Rexo,German Shepherd,Brown,54,24,2018-05-21


## Subsetting based on text data

In [18]:
# labrador dogs
dogs[dogs["breed"] == "Labrador"]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
1,Amigo,Labrador,Black,59,35,2016-08-12
4,Lucy,Labrador,White,51,26,2020-04-29


## Subsetting based on dates

In [19]:
# dogs born before 2016
dogs[dogs["date_of_birth"] < "2016-01-01"]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
3,Golin,Husky,White,55,30,2015-06-18
5,Max,Golden Retriever,Brown,49,21,2014-01-20
6,Otto,Poodle,Brown,42,20,2013-06-27


## Subsetting based on multiple conditions

In [21]:
dogs[ (dogs["color"] == "Brown") & (dogs["height_cm"] > 45) ]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
2,Trevis,St. Bernard,Brown,77,73,2019-07-24
5,Max,Golden Retriever,Brown,49,21,2014-01-20
7,Rexo,German Shepherd,Brown,54,24,2018-05-21


## Subsetting using .isin()

In [24]:
# selecting by writing values of column
dogs[ dogs["color"].isin(["Black", "White"]) ]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
1,Amigo,Labrador,Black,59,35,2016-08-12
3,Golin,Husky,White,55,30,2015-06-18
4,Lucy,Labrador,White,51,26,2020-04-29


In [26]:
# selecting by list of values
my_dogs = ["Lucy", "Max", "Rexo"]

selection = dogs[dogs["name"].isin(my_dogs)]
print(selection)

   name             breed  color  height_cm  weight_kg date_of_birth
4  Lucy          Labrador  White         51         26    2020-04-29
5   Max  Golden Retriever  Brown         49         21    2014-01-20
7  Rexo   German Shepherd  Brown         54         24    2018-05-21
