# Load packages

In [None]:
import pandas as pd
import numpy as np

# Pandas Series

## Construction

In [None]:
values = np.arange(5, 8)
my_index = ['a', 'b', 'c']

In [None]:
series_a = pd.Series(values, index=my_index)

In [None]:
my_dic = {'one': 20, 'two': 5, 'three': 3}

In [None]:
pd.Series(my_dic)

one      20
two       5
three     3
dtype: int64

## Selection

In [None]:
series_a

a    5
b    6
c    7
dtype: int64

In [None]:
series_a[:1]

a    5
dtype: int64

In [None]:
series_a[['a', 'c']]

a    5
c    7
dtype: int64

## Operations

In [None]:
series_a + 2

a    7
b    8
c    9
dtype: int64

In [None]:
series_a * 2

a    10
b    12
c    14
dtype: int64

In [None]:
series_b = pd.Series(np.arange(2, 5), index = ['c', 'b', 'k'])

In [None]:
series_b

c    2
b    3
k    4
dtype: int64

In [None]:
series_a + series_b

a    NaN
b    9.0
c    9.0
k    NaN
dtype: float64

# DataFrame

## Construction

In [None]:
series_a

a    5
b    6
c    7
dtype: int64

In [None]:
pd.DataFrame(series_a)

Unnamed: 0,0
a,5
b,6
c,7


In [None]:
arr_data = np.random.rand(4,3)
idx = ['a', 'b', 'c', 'd']
colname = ['A', 'B', 'C']

In [None]:
df_simple = pd.DataFrame(data=arr_data, index=idx, columns=colname)

In [None]:
df_ex = pd.read_csv("/content/drive/My Drive/gv918-data/data_week04/example_data.csv")

In [None]:
df_ex.head()

Unnamed: 0,id,var_a,var_b
0,a,-0.022315,-2
1,b,-0.438391,-17
2,c,-1.141937,-16
3,d,-0.544511,29
4,e,0.885416,21


## Indexing

In [None]:
df_ex.set_index('id', inplace=True)

In [None]:
df_ex

Unnamed: 0_level_0,var_a,var_b
id,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.022315,-2
b,-0.438391,-17
c,-1.141937,-16
d,-0.544511,29
e,0.885416,21
f,-0.234791,-1
g,0.789671,13
h,-0.586962,-19
i,0.030706,-12
j,-0.803223,19


In [None]:
df_simple_copy = df_simple.copy()

In [None]:
df_simple_copy.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [None]:
df_simple_copy.index = ['e', 'f', 'g', 'h']

In [None]:
df_simple_copy

Unnamed: 0,A,B,C
e,0.960451,0.298013,0.549111
f,0.031482,0.093877,0.859992
g,0.645312,0.049133,0.834062
h,0.616809,0.804307,0.803905


### reset index

In [None]:
df_ex.reset_index()

Unnamed: 0,id,var_a,var_b
0,a,-0.022315,-2
1,b,-0.438391,-17
2,c,-1.141937,-16
3,d,-0.544511,29
4,e,0.885416,21
5,f,-0.234791,-1
6,g,0.789671,13
7,h,-0.586962,-19
8,i,0.030706,-12
9,j,-0.803223,19


In [None]:
df_ex

Unnamed: 0_level_0,var_a,var_b
id,Unnamed: 1_level_1,Unnamed: 2_level_1
a,-0.022315,-2
b,-0.438391,-17
c,-1.141937,-16
d,-0.544511,29
e,0.885416,21
f,-0.234791,-1
g,0.789671,13
h,-0.586962,-19
i,0.030706,-12
j,-0.803223,19


In [None]:
df_simple.reset_index()

Unnamed: 0,index,A,B,C
0,a,0.960451,0.298013,0.549111
1,b,0.031482,0.093877,0.859992
2,c,0.645312,0.049133,0.834062
3,d,0.616809,0.804307,0.803905


## Reindexing

In [None]:
idx_new = ['a', 'b', 'c', 'd', 'e']

In [None]:
df_simple

Unnamed: 0,A,B,C
a,0.960451,0.298013,0.549111
b,0.031482,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905


In [None]:
df_simple.reindex(idx_new)

Unnamed: 0,A,B,C
a,0.960451,0.298013,0.549111
b,0.031482,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905
e,,,


In [None]:
df_simple.reindex(idx_new[:3])

Unnamed: 0,A,B,C
a,0.960451,0.298013,0.549111
b,0.031482,0.093877,0.859992
c,0.645312,0.049133,0.834062


## Selecting rows and columns

### Using `[]`

In [None]:
df_ex.reset_index(inplace=True)

#### Row selection

In [None]:
df_ex[:3]

Unnamed: 0,id,var_a,var_b
0,a,-0.022315,-2
1,b,-0.438391,-17
2,c,-1.141937,-16


In [None]:
df_ex[4:8]

Unnamed: 0,id,var_a,var_b
4,e,0.885416,21
5,f,-0.234791,-1
6,g,0.789671,13
7,h,-0.586962,-19


In [None]:
bool_cond = df_ex['var_a'] > 0

In [None]:
df_ex[bool_cond]

Unnamed: 0,id,var_a,var_b
4,e,0.885416,21
6,g,0.789671,13
8,i,0.030706,-12


In [None]:
df_ex[df_ex['var_a'] > 0]

Unnamed: 0,id,var_a,var_b
4,e,0.885416,21
6,g,0.789671,13
8,i,0.030706,-12


In [None]:
df_ex_sub = df_ex[df_ex['var_a'] > 0]

In [None]:
df_ex_sub

Unnamed: 0,id,var_a,var_b
4,e,0.885416,21
6,g,0.789671,13
8,i,0.030706,-12


#### Column Selection

In [None]:
df_ex[['var_a', 'id']]

Unnamed: 0,var_a,id
0,-0.022315,a
1,-0.438391,b
2,-1.141937,c
3,-0.544511,d
4,0.885416,e
5,-0.234791,f
6,0.789671,g
7,-0.586962,h
8,0.030706,i
9,-0.803223,j


### `.drop()`

In [None]:
df_simple

Unnamed: 0,A,B,C
a,0.960451,0.298013,0.549111
b,0.031482,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905


In [None]:
df_simple.drop(['a', 'b'])

Unnamed: 0,A,B,C
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905


In [None]:
df_simple.drop('A', axis=1)

Unnamed: 0,B,C
a,0.298013,0.549111
b,0.093877,0.859992
c,0.049133,0.834062
d,0.804307,0.803905


In [None]:
df_simple_copy2 = df_simple.copy()

In [None]:
df_simple_copy2.drop('a', inplace=True)

In [None]:
df_simple_copy2

Unnamed: 0,A,B,C
b,0.031482,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905


### Using `.loc[]`, `.iloc[]`

In [None]:
df_simple.loc[:, ['A', 'B']]

Unnamed: 0,A,B
a,0.960451,0.298013
b,0.031482,0.093877
c,0.645312,0.049133
d,0.616809,0.804307


In [None]:
df_simple.loc[['a', 'b'], ['A', 'B']]

Unnamed: 0,A,B
a,0.960451,0.298013
b,0.031482,0.093877


In [None]:
df_simple.iloc[[1, 3], [1, 2]]

Unnamed: 0,B,C
b,0.093877,0.859992
d,0.804307,0.803905


### Creating and manipulating columns

In [83]:
df_ex['var_c'] = df_ex['var_a'] + 2

In [84]:
df_ex['var_d'] = df_ex['var_a'] * df_ex['var_b']

In [85]:
df_ex

Unnamed: 0,id,var_a,var_b,var_c,var_d
0,a,-0.022315,-2,1.977685,0.04463
1,b,-0.438391,-17,1.561609,7.452642
2,c,-1.141937,-16,0.858063,18.270987
3,d,-0.544511,29,1.455489,-15.790811
4,e,0.885416,21,2.885416,18.593729
5,f,-0.234791,-1,1.765209,0.234791
6,g,0.789671,13,2.789671,10.265729
7,h,-0.586962,-19,1.413038,11.152287
8,i,0.030706,-12,2.030706,-0.368471
9,j,-0.803223,19,1.196777,-15.261229


In [86]:
df_ex['var_d'] = 0

In [87]:
df_ex

Unnamed: 0,id,var_a,var_b,var_c,var_d
0,a,-0.022315,-2,1.977685,0
1,b,-0.438391,-17,1.561609,0
2,c,-1.141937,-16,0.858063,0
3,d,-0.544511,29,1.455489,0
4,e,0.885416,21,2.885416,0
5,f,-0.234791,-1,1.765209,0
6,g,0.789671,13,2.789671,0
7,h,-0.586962,-19,1.413038,0
8,i,0.030706,-12,2.030706,0
9,j,-0.803223,19,1.196777,0


In [88]:
df_simple_copy2

Unnamed: 0,A,B,C
b,0.031482,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905


In [90]:
df_simple_copy2.loc['b', 'A'] = 100

In [91]:
df_simple_copy2

Unnamed: 0,A,B,C
b,100.0,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,0.804307,0.803905


In [93]:
df_simple_copy2.iloc[2, 1] = np.nan

In [94]:
df_simple_copy2

Unnamed: 0,A,B,C
b,100.0,0.093877,0.859992
c,0.645312,0.049133,0.834062
d,0.616809,,0.803905
