# Selecting Data with Pandas

In [12]:
import pandas as pd

# import some data
df = pd.read_csv('people_data.csv', thousands=',')

df.head()

Unnamed: 0,first,last,age,height_cm,weight_kg,income
0,James,Smith,18,75.7,66.9,1000
1,Jane,Watson,18,163.0,56.7,800
2,Adam,Miller,19,176.5,68.9,350
3,Sara,Thompson,19,163.3,58.0,980
4,Tom,Piper,20,168.5,57.5,2500


### Selecting a Column

In [3]:
# by name - method one
df['age']

0    18
1    18
2    19
3    19
4    20
5    20
Name: age, dtype: int64

In [5]:
# by name - method two
df.age

0    18
1    18
2    19
3    19
4    20
5    20
Name: age, dtype: int64

### Selecting multiple columns

In [29]:
df[['first', 'last']]

Unnamed: 0,first,last
0,James,Smith
1,Jane,Watson
2,Adam,Miller
3,Sara,Thompson
4,Tom,Piper
5,Carol,Winters


### Selecting first 3 rows of a column

In [21]:
df['last'][:3]

0     Smith
1    Watson
2    Miller
Name: last, dtype: object

Selecting on a multi-axis by label

In [27]:
df.loc[2:3, ['first', 'last']]

Unnamed: 0,first,last
2,Adam,Miller
3,Sara,Thompson


### Selecting rows

- Slicing with []
- <code>iloc</code> integer-location

In [26]:
# slicing

df[2:4]

Unnamed: 0,first,last,age,height_cm,weight_kg,income
2,Adam,Miller,19,176.5,68.9,350
3,Sara,Thompson,19,163.3,58.0,980


In [25]:
# select first 3 rows based on INDEX

df.iloc[:3]

Unnamed: 0,first,last,age,height_cm,weight_kg,income
0,James,Smith,18,75.7,66.9,1000
1,Jane,Watson,18,163.0,56.7,800
2,Adam,Miller,19,176.5,68.9,350


### Selecting rows based on conditions
- Boolean Indexing

In [15]:
# to return a dataframe
# wrap the condition inside the dataframe

df[df['first'] == 'Adam']

Unnamed: 0,first,last,age,height_cm,weight_kg,income
2,Adam,Miller,19,176.5,68.9,350


In [13]:
# Select rows where income is less than 900

df[df['income'] > 900]

Unnamed: 0,first,last,age,height_cm,weight_kg,income
0,James,Smith,18,75.7,66.9,1000
3,Sara,Thompson,19,163.3,58.0,980
4,Tom,Piper,20,168.5,57.5,2500
5,Carol,Winters,20,177.5,63.4,2950


### Subset rows from a list of values

In [16]:
value_list = ['Tom', 'Carol', 'Jane']

In [19]:
df[df['first'].isin(value_list)]

Unnamed: 0,first,last,age,height_cm,weight_kg,income
1,Jane,Watson,18,163.0,56.7,800
4,Tom,Piper,20,168.5,57.5,2500
5,Carol,Winters,20,177.5,63.4,2950


Grab rows that are NOT in the value list
- just add a tilda **~** to the expression
- ~ means *NOT*

In [20]:
df[~df['first'].isin(value_list)]

Unnamed: 0,first,last,age,height_cm,weight_kg,income
0,James,Smith,18,75.7,66.9,1000
2,Adam,Miller,19,176.5,68.9,350
3,Sara,Thompson,19,163.3,58.0,980
