In [2]:
# list comprehension

list_of_squares = []

for x in range(1, 11):
    list_of_squares.append(x**2)

print(list_of_squares)

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]


In [21]:
list_of_squares = tuple(x**2 for x in range(1, 11))
list_of_squares

(1, 4, 9, 16, 25, 36, 49, 64, 81, 100)

In [20]:
next(list_of_squares)

25

In [22]:
squares_dict = {x: x**2 for x in range(1, 11)}

In [23]:
squares_dict.items()

dict_items([(1, 1), (2, 4), (3, 9), (4, 16), (5, 25), (6, 36), (7, 49), (8, 64), (9, 81), (10, 100)])

In [25]:
[val ** .5 for val in squares_dict.values()]

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]

In [14]:
# with an if statement
list_of_odd_squares = [x**2 for x in range(1, 11) if (x % 3 == 0 or x % 2 == 0)]
list_of_odd_squares

[4, 9, 16, 36, 64, 81, 100]

In [9]:
# nested list comprehension
matrix = [[1, 2, 3], [4, 5], [6, 7, 8, 9]] 
  
# Nested List Comprehension to flatten a given 2-D matrix 
flatten_matrix = [val for sublist in matrix for val in sublist] 
  
print(flatten_matrix) 

[1, 2, 3, 4, 5, 6, 7, 8, 9]


## Introduction

#### Our goals today are to be able to: <br/>

- Investigate table data in Pandas
- Manipulate Pandas DataFrames

### Activation:

![excel2](img/excelpic2.jpg)

Most people have used Microsoft Excel or Google sheets. But what are the limitations of excel?

- [Take a minute to read this article](https://www.bbc.com/news/magazine-22223190)
- make a list of problems excel presents

How is using python different?

Python
- create documentation of processes as you code
- reduces chances for human error
- do "drag and drop"
- repeatable
- transparent

## Pandas

<img src="https://cdn-images-1.medium.com/max/1600/1*9IU5fBzJisilYjRAi-f55Q.png" width=600>  




- The data manipulation capabilities of Pandas are built on top of the numpy library.
- Pandas dataframe object represents a spreadsheet with cell values, column names, and row index labels.

### 1. Importing and reading data with Pandas!

#### Let's use pandas to read some csv files so we can interact with them.



In [26]:
# First, let's check which directory we are in so the files we expect to see are there.
!pwd
!ls -la

/Users/enkeboll/code/fis/hbs-ds-060121/module-1/day-5-pandas-1
total 48
drwxr-xr-x   6 enkeboll  staff    192 Jun  5 10:52 [1m[34m.[m[m
drwxr-xr-x   8 enkeboll  staff    256 Jun  5 10:24 [1m[34m..[m[m
drwxr-xr-x   3 enkeboll  staff     96 Jun  5 10:36 [1m[34m.ipynb_checkpoints[m[m
drwxr-xr-x   4 enkeboll  staff    128 Aug 30  2019 [1m[34mdata[m[m
drwxr-xr-x@ 12 enkeboll  staff    384 Jul 23  2019 [1m[34mimg[m[m
-rwxr-xr-x   1 enkeboll  staff  23548 Jun  5 10:52 [35mpandas-1.ipynb[m[m


In [27]:
!ls -la data

total 16
drwxr-xr-x  4 enkeboll  staff  128 Aug 30  2019 [1m[34m.[m[m
drwxr-xr-x  6 enkeboll  staff  192 Jun  5 10:52 [1m[34m..[m[m
-rw-r--r--@ 1 enkeboll  staff   62 Jun  5  2019 example1.csv
-rw-r--r--@ 1 enkeboll  staff  238 Jun  5  2019 made_up_jobs.csv


In [28]:
import pandas as pd

example_csv = pd.read_csv('data/example1.csv')

In [30]:
!cat data/example1.csv

Title1,Title2,Title3
one,two,three
example1,example2,example3


In [29]:
example_csv

Unnamed: 0,Title1,Title2,Title3
0,one,two,three
1,example1,example2,example3


In [31]:
type(example_csv)

pandas.core.frame.DataFrame

In [35]:
type(example_csv.Title3)

pandas.core.series.Series

In [37]:
example_csv.Title1.values

array(['one', 'example1'], dtype=object)

There is also `read_excel`, `read_html`, and many other pandas `read_` functions.  
http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html

In [38]:
example_csv.describe()

Unnamed: 0,Title1,Title2,Title3
count,2,2,2
unique,2,2,2
top,one,two,example3
freq,1,1,1


In [39]:
example_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
Title1    2 non-null object
Title2    2 non-null object
Title3    2 non-null object
dtypes: object(3)
memory usage: 176.0+ bytes


Try loading in the example file in the `data` directory called `made_up_jobs.csv` using pandas.

In [74]:
!cat data/made_up_jobs.csv

ID,Name,Job,Years Employed
0,Bob Bobberty,Underwater Basket Weaver,13
1,Susan Smells,Salad Spinner,5
2,Alex Lastname,Productivity Manager,2
3,Rudy P.,Being cool,55
4,Rudy G.,Being compared to Rudy P,50
5,Sir Wellington,Cheese Stacker, 10


In [75]:
# read in your csv here!
muj = pd.read_csv('data/made_up_jobs.csv', index_col='ID')

# remember that it's nice to be able to look at your data, so let's do that here, too.
muj

Unnamed: 0_level_0,Name,Job,Years Employed
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Bob Bobberty,Underwater Basket Weaver,13
1,Susan Smells,Salad Spinner,5
2,Alex Lastname,Productivity Manager,2
3,Rudy P.,Being cool,55
4,Rudy G.,Being compared to Rudy P,50
5,Sir Wellington,Cheese Stacker,10


In [117]:
muj.Name
# equal to
muj['Name']

ID
0      Bob Bobberty
1      Susan Smells
2     Alex Lastname
3           Rudy P.
4           Rudy G.
5    Sir Wellington
Name: Name, dtype: object

In [118]:
muj['Years Employed'] = muj['Years Employed'].astype(float)
muj.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 5
Data columns (total 3 columns):
Name              6 non-null object
Job               6 non-null object
Years Employed    6 non-null float64
dtypes: float64(1), object(2)
memory usage: 192.0+ bytes


In [44]:
import numpy as np

In [47]:
random_df = pd.DataFrame(np.random.random((10000, 10)))

In [48]:
random_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.262970,0.795147,0.627058,0.047432,0.163821,0.778130,0.045621,0.675131,0.529808,0.595938
1,0.038863,0.192199,0.816843,0.434590,0.317065,0.687343,0.446269,0.229637,0.156577,0.952067
2,0.321275,0.876173,0.977773,0.883401,0.655774,0.374917,0.815883,0.660498,0.819442,0.488484
3,0.414978,0.771785,0.513682,0.211683,0.190344,0.475309,0.104562,0.902778,0.900178,0.712408
4,0.854507,0.635518,0.542003,0.456779,0.827180,0.684076,0.399691,0.568243,0.740362,0.121907
...,...,...,...,...,...,...,...,...,...,...
9995,0.148498,0.772295,0.559093,0.430505,0.985291,0.113317,0.108912,0.821007,0.315096,0.085142
9996,0.198771,0.814615,0.406057,0.793394,0.449674,0.621126,0.931155,0.925804,0.813737,0.676221
9997,0.734184,0.481456,0.027673,0.447181,0.149031,0.378362,0.530358,0.662224,0.603779,0.455470
9998,0.483615,0.359299,0.236002,0.612880,0.675588,0.928716,0.616660,0.509620,0.032966,0.983777


In [54]:
random_df.size

100000

In [56]:
pd.DataFrame([[1, 2, 3], [4, None, 6]])

Unnamed: 0,0,1,2
0,1,2.0,3
1,4,,6


### 2. Utilizing and identifying Pandas objects

- What is a DataFrame object and what is a Series object? 
- How are they different from Python lists?

These are questions we will cover in this section. To start, let's start with this list of fruits.

In [57]:
fruits = ['Apple', 'Orange', 'Watermelon', 'Lemon', 'Mango']

print(fruits)

['Apple', 'Orange', 'Watermelon', 'Lemon', 'Mango']


In [59]:
[x.upper() for x in fruits]

['APPLE', 'ORANGE', 'WATERMELON', 'LEMON', 'MANGO']

In [60]:
fruits

['Apple', 'Orange', 'Watermelon', 'Lemon', 'Mango']

Using our list of fruits, we can create a pandas object called a 'series' which is much like an array or a vector.

In [61]:
fruits_series = pd.Series(fruits)

print(fruits_series)
type(fruits_series)

0         Apple
1        Orange
2    Watermelon
3         Lemon
4         Mango
dtype: object


pandas.core.series.Series

One difference between python **list objects** and pandas **series objects** is the fact that you can define the index manually for a **series objects**.

In [62]:
ind = ['a', 'b', 'c', 'd', 'e']

fruits_series = pd.Series(fruits, index=ind)

print(fruits_series)

a         Apple
b        Orange
c    Watermelon
d         Lemon
e         Mango
dtype: object


With a partner, create your own custom series from a list of lists.

In [70]:
list_of_lists = [['cat'], ['dog'], ['horse'], ['cow'], ['macaw']]

# create custom indices for your series
ind = ['a', 'b', 'c', 'd', 'e']

elements = [x[0].lower() for x in list_of_lists]

# create the series using your list objects
# You can use either a for loop or also pd.Series
list_of_lists_series = pd.Series(elements)

# print your series
print(list_of_lists_series)
type(list_of_lists_series)

0      cat
1      dog
2    horse
3      cow
4    macaw
dtype: object


pandas.core.series.Series

In [64]:
df = pd.DataFrame()

In [66]:
df['fruits'] = fruits_series

In [68]:
df['animals'] = list_of_lists_series

In [69]:
df

Unnamed: 0,fruits,animals
a,Apple,cat
b,Orange,dog
c,Watermelon,horse
d,Lemon,cow
e,Mango,macaw


In [72]:
df['new_animals'] = list_of_lists_series

In [179]:
df

Unnamed: 0,fruits,animals,new_animals
a,Apple,cat,
b,Orange,dog,
c,Watermelon,horse,
d,Lemon,cow,
e,Mango,macaw,


In [182]:
df.loc['a':'d', 'animals']

a      cat
b      dog
c    horse
d      cow
Name: animals, dtype: object

We can do a simliar thing with Python dictionaries. This time, however, we will create a DataFrame object from a python dictionary.

In [111]:
# Dictionary with list object in values
student_dict = {
    'name': ['Samantha', 'Alex', 'Dante'],
    'age': ['35', '17', '26'],
    'city': ['Houston', 'Seattle', 'New york']
}

student_dict.values()

dict_values([['Samantha', 'Alex', 'Dante'], ['35', '17', '26'], ['Houston', 'Seattle', 'New york']])

In [112]:
student_list = [
    {'name': 'Samantha', 'age': '35', 'city': 'Houston'},
    {'name': 'Alex', 'age': '17', 'city': 'Seattle'},
]

pd.DataFrame(student_list)

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle


In [146]:
students_df = pd.DataFrame(student_dict)

students_df

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle
2,Dante,26,New york


In [114]:
students_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
name    3 non-null object
age     3 non-null object
city    3 non-null object
dtypes: object(3)
memory usage: 200.0+ bytes


In [84]:
#to find data types of columns
students_df.dtypes

name    object
age     object
city    object
dtype: object

Let's change the data type of ages to int.

In [115]:
# We can also change a columns type but the change has to make sense.
students_df.age = students_df.age.astype(int)

#Uncomment line below and observe what happens when trying to convert student's name to int or float
# students_df.name = students_df.name.astype(int)

#How about what happens converting numeric to string
students_df.age = students_df.age.astype(int)

students_df.dtypes

name    object
age      int64
city    object
dtype: object

In [101]:
int('12345')

12345

In [93]:
students_df

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle
2,Dante,26,New york


We can also use a custom index for these items. For example, we might want them to be the individual student ID numbers.

In [119]:
school_ids = ['1111', '1145', '0096']

# Notice here we use pd.DataFrame not pd.Series as we did for a pandas series.
students_df = pd.DataFrame(student_dict, index=school_ids)

students_df.head()

Unnamed: 0,name,age,city
1111,Samantha,35,Houston
1145,Alex,17,Seattle
96,Dante,26,New york


In [127]:
students_df.index = students_df.index.rename('id')

In [128]:
students_df

Unnamed: 0_level_0,name,age,city
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1111,Samantha,35,Houston
1145,Alex,17,Seattle
96,Dante,26,New york


Using Pandas, we can also rename column names.

In [129]:
students_df.columns

Index(['name', 'age', 'city'], dtype='object')

In [136]:
students_df.columns = [x.replace(' ', '').upper() for x in students_df.columns]
students_df.head()

Unnamed: 0_level_0,NAME,AGE,HOME
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1111,Samantha,35,Houston
1145,Alex,17,Seattle
96,Dante,26,New york


Or, we can also change the column names using the rename function.

In [139]:
students_df.rename(columns={"AGE": "YEARS"}, inplace=True)

In [140]:
# Notice what happens when we print students_df
students_df

Unnamed: 0_level_0,NAME,YEARS,HOME
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1111,Samantha,35,Houston
1145,Alex,17,Seattle
96,Dante,26,New york


In [None]:
# If you want the file to save over itself, use the option `inplace = True`.
students_df = students_df.rename(columns={'AGE': 'YEARS'})
students_df.head()

Similarly, there is a tool to remove rows and columns from your DataFrame

In [143]:
skinny_df = students_df.drop(columns=['YEARS', 'HOME'])
skinny_df

Unnamed: 0_level_0,NAME
id,Unnamed: 1_level_1
1111,Samantha
1145,Alex
96,Dante


In [144]:
#Notice again what happens if we print students_df 
students_df

Unnamed: 0_level_0,NAME,YEARS,HOME
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1111,Samantha,35,Houston
1145,Alex,17,Seattle
96,Dante,26,New york


In [145]:
students_df.drop(columns=['YEARS', 'HOME'], inplace=True)
students_df

Unnamed: 0_level_0,NAME
id,Unnamed: 1_level_1
1111,Samantha
1145,Alex
96,Dante


If you want the file to save over itself, use the option `inplace = True`.

Every function has options. Let's read more about `drop` [here](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html)

In [153]:
students_df = students_df.drop([1], axis=0)

In [159]:
students_df.index = students_df.index.rename('Andy\'s Index')

In [160]:
students_df

Unnamed: 0_level_0,name,age
Andy's Index,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Samantha,35
2,Dante,26


In [162]:
students_df.drop([2], axis=0)

Unnamed: 0_level_0,name,age
Andy's Index,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Samantha,35


In [163]:
my_df = pd.DataFrame([[1, 2, 3], [4, None, 6]])
my_df

Unnamed: 0,0,1,2
0,1,2.0,3
1,4,,6


In [164]:
my_df.dropna()

Unnamed: 0,0,1,2
0,1,2.0,3


In [167]:
my_df = my_df.dropna()
my_df

Unnamed: 0,0,1,2
0,1,2.0,3


### 3. Filtering Data Using Pandas
There are several ways to grab particular data from a DataFrame. 
- Python lists allow for selection of data only through integer location. 
- You can use a single integer or slice notation to make the selection but NOT a list of integers.
- Dictionaries only allow selection with a single label. Slices and lists of labels are not allowed.

In [168]:
l = [1, 2, 3, 4, 5]
l[[0, 3]]

TypeError: list indices must be integers or slices, not list

### DataFrames can be indexed by column name (label) or row name (index) or by position.   
#### The `.loc` method is used for indexing by name.  
#### While `.iloc` is used for indexing by number.

In [225]:
student_dict = {
    'name': ['Samantha', 'Alex', 'Dante'],
    'age': ['35', '17', '26'],
    'city': ['Houston', 'Seattle', 'New york']
}

students_df = pd.DataFrame(student_dict)

In [178]:
students_df.loc[0:1, 'name':'city']

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle


### Let's take a look at `.iloc`
#### `.iloc` takes slices based on index position.
#### `.iloc` stands for integer location so that should help with remember what it does
#### `.iloc`[row , column]

In [183]:
# returns the first row
students_df.iloc[0]

name    Samantha
age           35
city     Houston
Name: 0, dtype: object

In [184]:
# returns the first column
students_df.iloc[:, 0]

0    Samantha
1        Alex
2       Dante
Name: name, dtype: object

In [188]:
my_list = [1,2,3,4,5]
my_list[0:2]

[1, 2]

In [187]:
students_df.loc[0:2]

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle
2,Dante,26,New york


In [185]:
# returns first two rows notice that ILOC performs regular python slicing.
students_df.iloc[0:2]

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle


In [189]:
# returns the first two columns
students_df.iloc[:, 0:2]

Unnamed: 0,name,age
0,Samantha,35
1,Alex,17
2,Dante,26


In [190]:
# returns first row and columns 1 and 2
students_df.iloc[0:1, 0:2]

Unnamed: 0,name,age
0,Samantha,35


### How would we use `.iloc` to return the last item in the last row?


In [194]:
# return the last item in the last row using iloc
students_df.iloc[-1, -1]

'New york'

### How would we use `.iloc` to return the last item in the last column?


In [195]:
# return the last item in the last column using iloc
students_df.iloc[-1, -1]

'New york'

### What if we only want certain columns or rows?

In [196]:
# Don't do students_df.iloc[0, 2]
students_df.iloc[[0, 2]]

Unnamed: 0,name,age,city
0,Samantha,35,Houston
2,Dante,26,New york


In [197]:
students_df.iloc[[0, 2], [0, 2]]

Unnamed: 0,name,city
0,Samantha,Houston
2,Dante,New york


### Let's take a look at `.loc`
#### Label based method. 
#### Names or labels of the index is used when taking slices.
#### Also supports boolean subsetting.

In [198]:
# We will use loc to return rows and columns based on labels. Let's look at the students_df DataFrame again.
students_df

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle
2,Dante,26,New york


In [199]:
# returns the student information associated with index 0
students_df.loc[0]

name    Samantha
age           35
city     Houston
Name: 0, dtype: object

In [201]:
# returns the student information for row index 0 to 2 inclusive.
# note iloc would return normal python slicing not including 2 as demonstrated above.
students_df.loc[:]

Unnamed: 0,name,age,city
0,Samantha,35,Houston
1,Alex,17,Seattle
2,Dante,26,New york


In [202]:
# returns the column labeled 'age'
students_df.loc[:, 'age']

0    35
1    17
2    26
Name: age, dtype: object

In [203]:
# returns the column labeled 'age' and index values 1 to 2.
# gives us the values of the rows with index from 1 to 2 (inclusive)
# and columns labeled age"
students_df.loc[1:2, 'age']

1    17
2    26
Name: age, dtype: object

In [204]:
# returns the column labeled 'age' and index values 1 to 2.
# gives us the values of the rows with index from 1 to 2 (inclusive)
# and columns labeled age to city (inclusive)"
students_df.loc[1:2, 'age':'city']

Unnamed: 0,age,city
1,17,Seattle
2,26,New york


In [205]:
# What should we get?
students_df.loc[1:2, ['name', 'city']]

Unnamed: 0,name,city
1,Alex,Seattle
2,Dante,New york


In [206]:
# How about?
students_df.loc[[0, 2], ['name', 'city']]

Unnamed: 0,name,city
0,Samantha,Houston
2,Dante,New york


In [207]:
# if index rearranged
school_ids = ['5', '11', '3']
students_df = pd.DataFrame(student_dict, index=school_ids)

In [208]:
students_df

Unnamed: 0,name,age,city
5,Samantha,35,Houston
11,Alex,17,Seattle
3,Dante,26,New york


In [209]:
# What should we get now?
students_df.loc[[0, 2], ['name', 'city']]

KeyError: "None of [Int64Index([0, 2], dtype='int64')] are in the [index]"

In [218]:
# What should we get now?
students_df.loc['5':'3', ['name', 'city']]

Unnamed: 0,name,city
5,Samantha,Houston
11,Alex,Seattle
3,Dante,New york


In [219]:
students_df.iloc[0:2]

Unnamed: 0,name,age,city
5,Samantha,35,Houston
11,Alex,17,Seattle


In [232]:
new_df = students_df.reset_index().set_index('name').loc[:, 'index':'age']

In [233]:
new_df

Unnamed: 0_level_0,index,age
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Samantha,0,35
Alex,1,17
Dante,2,26


In [226]:
students_df.set_index("name")

Unnamed: 0_level_0,age,city
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Samantha,35,Houston
Alex,17,Seattle
Dante,26,New york


In [220]:
students_df.set_index("name", inplace=True)
students_df

Unnamed: 0_level_0,age,city
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Samantha,35,Houston
Alex,17,Seattle
Dante,26,New york


In [221]:
students_df.loc[['Samantha']]

Unnamed: 0_level_0,age,city
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Samantha,35,Houston


In [222]:
# Subsetting nonconsecutive rows
students_df.loc[['Samantha', 'Dante']]

Unnamed: 0_level_0,age,city
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Samantha,35,Houston
Dante,26,New york


In [224]:
# Samantha to the end
students_df.loc['Alex':]

Unnamed: 0_level_0,age,city
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alex,17,Seattle
Dante,26,New york


In [None]:
# return the first and last rows using one loc command

### Boolean Subsetting

In [234]:
student_dict = {
    'name': ['Samantha', 'Alex', 'Dante', 'Samantha'],
    'age': ['35', '17', '26', '21'],
    'city': ['Houston', 'Seattle', 'New york', 'Atlanta'],
    'state': ['Texas', 'Washington', 'New York', 'Georgia']
}

students_df = pd.DataFrame(student_dict)

In [235]:
# The statement data[‘name’] == ‘Samantha’] produces a Pandas Series with a True/False value for every row
# in the ‘data’ DataFrame, where there are “True” values for the rows where the name is “Samantha”.
# These type of boolean arrays can be passed directly to the .loc indexer.
students_df.loc[students_df['name'] == 'Samantha']

Unnamed: 0,name,age,city,state
0,Samantha,35,Houston,Texas
3,Samantha,21,Atlanta,Georgia


In [236]:
students_df[students_df['name'] == 'Samantha']

Unnamed: 0,name,age,city,state
0,Samantha,35,Houston,Texas
3,Samantha,21,Atlanta,Georgia


In [237]:
# What about if we only want the city and state of the selected students with the name Samantha?
students_df.loc[students_df['name'] == 'Samantha', ['city', 'state']]

Unnamed: 0,city,state
0,Houston,Texas
3,Atlanta,Georgia


In [239]:
# What amount if we want to select a student of a specific age?
students_df.loc[students_df.age == '21']

Unnamed: 0,name,age,city,state
3,Samantha,21,Atlanta,Georgia


In [240]:
# What amount if we want to select a student of a specific age?
students_df.loc[(students_df['age'] == '21') &
                (students_df['city'] == 'Atlanta')]

Unnamed: 0,name,age,city,state
3,Samantha,21,Atlanta,Georgia
