# 2. Indexing, Selecting & Assigning

## Accessing to columns
- df.column
- df['column']

## 1) Accessor operators
- iloc: index-based selection
- loc: label-based selection

In [7]:
import pandas as pd

contents = {
    'a': [i for i in range(10)],
    'b': [i for i in range(10, 20)],
    'c': [i for i in range(20, 30)]
}

df = pd.DataFrame(contents)

In [8]:
# select the firts row
df.iloc[0]
df.loc[0]

a     0
b    10
c    20
Name: 0, dtype: int64

In [9]:
# select the firts column
df.iloc[:, 0]
df.loc[:, 'a']


0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
Name: a, dtype: int64

In [12]:
# select a few of rows(3~6)
df.iloc[3:7]
df.loc[3:6]     # it contains the last element of the range.

Unnamed: 0,a,b,c
3,3,13,23
4,4,14,24
5,5,15,25
6,6,16,26


In [22]:
# select a few of columns(2~3 = b~c)
df.iloc[:, 1:3]
df.loc[:, 'b':'c']
df.loc[:, ['b', 'c']]

Unnamed: 0,b,c
0,10,20
1,11,21
2,12,22
3,13,23
4,14,24
5,15,25
6,16,26
7,17,27
8,18,28
9,19,29


## 2) Conditional selection

In [56]:
contents = {
    'a': [i for i in range(10)],
    'b': [i for i in range(3, 13)],
    'c': [i for i in range(5, 15)]
}

df = pd.DataFrame(contents)

In [27]:
# select a row that has value 3 in the 'a' column
df.loc[df['a'] == 3]

Unnamed: 0,a,b,c
3,3,6,8


In [33]:
# select some rows that have values which are more than 5 in the a column and more than 7 in the b column simultaneously
df.loc[(df['a'] > 5) & (df['b'] > 7)]

Unnamed: 0,a,b,c
6,6,9,11
7,7,10,12
8,8,11,13
9,9,12,14


In [35]:
# select some rows that have values which are less than 5 in the a column or more than 12 in the b column
df.loc[(df['a'] < 5) | (df['c'] > 12)]

Unnamed: 0,a,b,c
0,0,3,5
1,1,4,6
2,2,5,7
3,3,6,8
4,4,7,9
8,8,11,13
9,9,12,14


In [36]:
# built-in conditional selectors: isin
df.loc[df['a'].isin([3,5])]

Unnamed: 0,a,b,c
3,3,6,8
5,5,8,10


In [57]:
# built-in conditional selectors: isnull, notnull
df.loc[df.index[-1]+1] = [None, 2, 3]

df.loc[df.a.isnull()]

Unnamed: 0,a,b,c
10,,2.0,3.0


In [58]:
# add a column
df['d'] = 0
print(df)

      a     b     c  d
0   0.0   3.0   5.0  0
1   1.0   4.0   6.0  0
2   2.0   5.0   7.0  0
3   3.0   6.0   8.0  0
4   4.0   7.0   9.0  0
5   5.0   8.0  10.0  0
6   6.0   9.0  11.0  0
7   7.0  10.0  12.0  0
8   8.0  11.0  13.0  0
9   9.0  12.0  14.0  0
10  NaN   2.0   3.0  0
