# Setting, Deleting, and Handling

In [138]:
import numpy as np
import pandas as pd

In [139]:
df = pd.DataFrame(
    np.arange(30).reshape(6, 5),
    index=list("abcdef"),
    columns=[f"col{i}" for i in range(1, 6)]
)
df

Unnamed: 0,col1,col2,col3,col4,col5
a,0,1,2,3,4
b,5,6,7,8,9
c,10,11,12,13,14
d,15,16,17,18,19
e,20,21,22,23,24
f,25,26,27,28,29


## Setting

### Modify Multiple Values

In [140]:
df["col1"] *= 10
df.loc[:, ["col2", "col5"]] = np.random.default_rng(42).integers(1000, 9999, size=(6, 2))
df.loc[:, "col3"] = [i for i in range(10000, 10006)]
df

Unnamed: 0,col1,col2,col3,col4,col5
a,0,1803,10000,3,7964
b,50,6890,10001,8,4949
c,100,4896,10002,13,8726
d,150,1773,10003,18,7275
e,200,2813,10004,23,1847
f,250,5737,10005,28,9779


### Modify Single Value

In [141]:
df.loc["a", "col4"] = 999
df.iloc[1, 3] = 8888
df.at["c", "col4"] = 77777
df.iat[3, 3] = 666666
df

Unnamed: 0,col1,col2,col3,col4,col5
a,0,1803,10000,999,7964
b,50,6890,10001,8888,4949
c,100,4896,10002,77777,8726
d,150,1773,10003,666666,7275
e,200,2813,10004,23,1847
f,250,5737,10005,28,9779


### Modify by Boolean Indexing

In [142]:
df[df < 100] = -df
df

Unnamed: 0,col1,col2,col3,col4,col5
a,0,1803,10000,999,7964
b,-50,6890,10001,8888,4949
c,100,4896,10002,77777,8726
d,150,1773,10003,666666,7275
e,200,2813,10004,-23,1847
f,250,5737,10005,-28,9779


### Append

In [143]:
df["total"] = df.sum(axis=1).to_numpy()
df["gt50000"] = df["total"] > 50000
df["foo"] = "bar"
df

Unnamed: 0,col1,col2,col3,col4,col5,total,gt50000,foo
a,0,1803,10000,999,7964,20766,False,bar
b,-50,6890,10001,8888,4949,30678,False,bar
c,100,4896,10002,77777,8726,101501,True,bar
d,150,1773,10003,666666,7275,685867,True,bar
e,200,2813,10004,-23,1847,14841,False,bar
f,250,5737,10005,-28,9779,25743,False,bar


### Insert

In [144]:
df.insert(0, "col0", df["col2"][:2]) # column_index, col_name, values
df

Unnamed: 0,col0,col1,col2,col3,col4,col5,total,gt50000,foo
a,1803.0,0,1803,10000,999,7964,20766,False,bar
b,6890.0,-50,6890,10001,8888,4949,30678,False,bar
c,,100,4896,10002,77777,8726,101501,True,bar
d,,150,1773,10003,666666,7275,685867,True,bar
e,,200,2813,10004,-23,1847,14841,False,bar
f,,250,5737,10005,-28,9779,25743,False,bar


## Deleting

### Delete Column

In [145]:
del df["total"]
df.drop(columns=["foo"], inplace=True)  # same as `df.drop(["foo"], axis=1)`
gt50000 = df.pop("gt50000")
df

Unnamed: 0,col0,col1,col2,col3,col4,col5
a,1803.0,0,1803,10000,999,7964
b,6890.0,-50,6890,10001,8888,4949
c,,100,4896,10002,77777,8726
d,,150,1773,10003,666666,7275
e,,200,2813,10004,-23,1847
f,,250,5737,10005,-28,9779


### Delete Row

In [146]:
df.drop("f")
df.drop(["e", "d"], inplace=True)
df

Unnamed: 0,col0,col1,col2,col3,col4,col5
a,1803.0,0,1803,10000,999,7964
b,6890.0,-50,6890,10001,8888,4949
c,,100,4896,10002,77777,8726
f,,250,5737,10005,-28,9779


## Handling Missing Data (NaN)

### Dropna

`df.dropna(how)` can drop any rows that have missing data.

- how=`any` (default) : If any NA values are present, drop that row or column.
- how=`all` : If all values are NA, drop that row or column.

In [158]:
miss_df = df.copy()
miss_df.dropna(how='any')

Unnamed: 0,col0,col1,col2,col3,col4,col5
a,1803.0,0,1803,10000,999,7964
b,6890.0,-50,6890,10001,8888,4949


### Fillna

`df.fillna(value)` can fill any NaN element with new value.

In [160]:
miss_df.fillna(value=10000000)

Unnamed: 0,col0,col1,col2,col3,col4,col5
a,1803.0,0,1803,10000,999,7964
b,6890.0,-50,6890,10001,8888,4949
c,10000000.0,100,4896,10002,77777,8726
f,10000000.0,250,5737,10005,-28,9779


# Reference

- https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html#setting
- https://pandas.pydata.org/pandas-docs/stable/user_guide/10min.html#missing-data
- https://pandas.pydata.org/pandas-docs/stable/user_guide/dsintro.html#column-selection-addition-deletion
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.fillna.html