# Belajar pandas

## #01: Menyertakan prefix dan suffix pada kolom data frame

In [1]:
import pandas as pd
import numpy as np

print(pd.__version__)
print(np.__version__)

1.3.3
1.21.2


In [2]:
# Persiapan data frame
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1,10, size=(n_rows, n_cols)), columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,6,9,3,8,3
1,5,3,3,5,2
2,9,7,5,2,8
3,3,6,8,2,1
4,4,2,3,3,8


In [3]:
tuple('ABCDE')

('A', 'B', 'C', 'D', 'E')

In [4]:
# Menyertakan prefix kolom
df.add_prefix('kolom_')

Unnamed: 0,kolom_A,kolom_B,kolom_C,kolom_D,kolom_E
0,6,9,3,8,3
1,5,3,3,5,2
2,9,7,5,2,8
3,3,6,8,2,1
4,4,2,3,3,8


In [5]:
#Menyertakan sufix kolom
df.add_suffix('_field')

Unnamed: 0,A_field,B_field,C_field,D_field,E_field
0,6,9,3,8,3
1,5,3,3,5,2
2,9,7,5,2,8
3,3,6,8,2,1
4,4,2,3,3,8


## #02: Pemilihan baris (rows selection) pada data frame

In [6]:
# Persiapan data frame
n_rows = 10
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1,5, size=(n_rows, n_cols)), columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,4,1,2,4,1
1,3,1,2,1,2
2,1,3,4,4,3
3,4,2,1,1,1
4,3,3,4,4,2
5,1,4,2,4,1
6,2,2,3,2,2
7,4,1,1,2,4
8,3,4,2,2,1
9,3,4,4,2,1


In [7]:
# Selection dengan operator logika | (or)
df[(df['A'] == 1) | (df['A'] == 3)]

Unnamed: 0,A,B,C,D,E
1,3,1,2,1,2
2,1,3,4,4,3
4,3,3,4,4,2
5,1,4,2,4,1
8,3,4,2,2,1
9,3,4,4,2,1


In [8]:
# Selection dengan fungsi isin()
df[df['A'].isin([1,3])]

Unnamed: 0,A,B,C,D,E
1,3,1,2,1,2
2,1,3,4,4,3
4,3,3,4,4,2
5,1,4,2,4,1
8,3,4,2,2,1
9,3,4,4,2,1


In [9]:
# Mengenal operator negasi ~
df[~df['A'].isin([1,3])]

Unnamed: 0,A,B,C,D,E
0,4,1,2,4,1
3,4,2,1,1,1
6,2,2,3,2,2
7,4,1,1,2,4


## #03: Konversi tipe data string ke numerik pada kolom data frame

In [10]:
# Persiapan data frame
data = {'col1':['1','2','3','teks'],
        'col2':['1','2','3','4']}

df=pd.DataFrame(data)
df

Unnamed: 0,col1,col2
0,1,1
1,2,2
2,3,3
3,teks,4


In [11]:
df.dtypes

col1    object
col2    object
dtype: object

In [12]:
# konversi tipe data dengan fungsi astype()
df_x = df.astype({'col2':'int64'})
df_x

Unnamed: 0,col1,col2
0,1,1
1,2,2
2,3,3
3,teks,4


In [13]:
df_x.dtypes

col1    object
col2     int64
dtype: object

In [14]:
# Konversi tipe data numerik dengan fungsi to_numeric()
df.apply(pd.to_numeric, errors ='coerce')

Unnamed: 0,col1,col2
0,1.0,1
1,2.0,2
2,3.0,3
3,,4


## #04: Pemiliihan kolom (columns selection) pada pandas data frame berdasarkan tipe data

In [16]:
# Persiapan data frame
n_rows = 5
n_cols = 2
cols = ['bil_pecahan','bil_bulat']

df = pd.DataFrame(np.random.randint(1,20,size=(n_rows, n_cols)), columns=cols)
df['bil_pecahan']=df['bil_pecahan'].astype('float')

df.index=pd.util.testing.makeDateIndex(n_rows, freq='H')
df=df.reset_index()

df['teks']=list('ABCDE')

df

Unnamed: 0,index,bil_pecahan,bil_bulat,teks
0,2000-01-01 00:00:00,17.0,17,A
1,2000-01-01 01:00:00,2.0,13,B
2,2000-01-01 02:00:00,12.0,7,C
3,2000-01-01 03:00:00,5.0,4,D
4,2000-01-01 04:00:00,14.0,13,E


In [17]:
df.dtypes

index          datetime64[ns]
bil_pecahan           float64
bil_bulat               int32
teks                   object
dtype: object

In [18]:
# memilih kolom bertipe data numerik
df.select_dtypes(include='number')

Unnamed: 0,bil_pecahan,bil_bulat
0,17.0,17
1,2.0,13
2,12.0,7
3,5.0,4
4,14.0,13


In [19]:
df.select_dtypes(include='float')

Unnamed: 0,bil_pecahan
0,17.0
1,2.0
2,12.0
3,5.0
4,14.0


In [20]:
df.select_dtypes(include='int')

Unnamed: 0,bil_bulat
0,17
1,13
2,7
3,4
4,13


In [21]:
# Memilih kolom bertipe data string atau object
df.select_dtypes(include='object')

Unnamed: 0,teks
0,A
1,B
2,C
3,D
4,E


In [22]:
# Memilih kolom bertipe data datetime
df.select_dtypes(include='datetime')

Unnamed: 0,index
0,2000-01-01 00:00:00
1,2000-01-01 01:00:00
2,2000-01-01 02:00:00
3,2000-01-01 03:00:00
4,2000-01-01 04:00:00


In [23]:
# Memiliih kolom dengan kombinasi tipe data
df.select_dtypes(include=['number','object'])

Unnamed: 0,bil_pecahan,bil_bulat,teks
0,17.0,17,A
1,2.0,13,B
2,12.0,7,C
3,5.0,4,D
4,14.0,13,E


## #05: Membalik urutan baris dan kolom pada data frame

In [24]:
# Persiapan data frame
n_rows = 5
n_cols = 5
cols = tuple('ABCDE')

df = pd.DataFrame(np.random.randint(1,10,size=(n_rows, n_cols)), columns=cols)
df

Unnamed: 0,A,B,C,D,E
0,9,6,3,1,5
1,4,3,9,3,3
2,3,8,9,2,5
3,8,2,3,4,3
4,8,2,9,2,7


In [25]:
# Membalik urutan kolom
df.loc[:, ::-1]

Unnamed: 0,E,D,C,B,A
0,5,1,3,6,9
1,3,3,9,3,4
2,5,2,9,8,3
3,3,4,3,2,8
4,7,2,9,2,8


In [26]:
# Membalik urutan baris
df.loc[::-1, :]

Unnamed: 0,A,B,C,D,E
4,8,2,9,2,7
3,8,2,3,4,3
2,3,8,9,2,5
1,4,3,9,3,3
0,9,6,3,1,5
