In [1]:
import pandas as pd
import numpy as np

In [31]:
# Merging
biodata = pd.DataFrame({'id': ['id0', 'id1', 'id2', 'id3', 'id4'],
                     'JK': ['P', 'P', 'L', 'L', 'L'], # JK=Jenis Kelamin
                     'Usia': ['18', '20', '21', '17', '19']})

nilai = pd.DataFrame({'id': ['id0', 'id1', 'id2', 'id3', 'id4'],
                      'IPK': ['3.96', '3.44', '3.65', '3.10', '2.80'],
                      'Nilai': ['A', 'AB', 'AB', 'B', 'BC']})

result1 = pd.merge(biodata, nilai, on='id')
result1

Unnamed: 0,id,JK,Usia,IPK,Nilai
0,id0,P,18,3.96,A
1,id1,P,20,3.44,AB
2,id2,L,21,3.65,AB
3,id3,L,17,3.1,B
4,id4,L,19,2.8,BC


In [3]:
# Merging different key id
biodata = pd.DataFrame({'id': ['id0', 'id1', 'id2', 'id3','id4','id5'],
                     'JK': ['P', 'P', 'L', 'L','L','P'], # JK=Jenis Kelamin
                     'Usia': ['18', '20', '21', '17', '19', '20']})

nilai = pd.DataFrame({'id': ['id0', 'id1', 'id2', 'id3'],
                      'IPK': ['3.96', '3.44', '3.65', '3.10'],
                      'Nilai': ['A', 'AB', 'AB', 'B']})

result2 = pd.merge(biodata, nilai, on='id')
result2

Unnamed: 0,id,JK,Usia,IPK,Nilai
0,id0,P,18,3.96,A
1,id1,P,20,3.44,AB
2,id2,L,21,3.65,AB
3,id3,L,17,3.1,B


In [4]:
# Ordering variable/column
result1 = result1[['id', 'JK', 'Usia', 'Nilai','IPK']]
result1

Unnamed: 0,id,JK,Usia,Nilai,IPK
0,id0,P,18,A,3.96
1,id1,P,20,AB,3.44
2,id2,L,21,AB,3.65
3,id3,L,17,B,3.1
4,id4,L,19,B,3.05


In [32]:
# Multiple key
biodata = pd.DataFrame({'id': ['id0', 'id0', 'id1', 'id1'],
                        'Kelas': ['METPEN', 'AD', 'MO', 'ADW'],
                        'JK': ['P', 'P', 'L', 'L'], 
                      'Usia': ['19', '21', '18', '23']})

nilai = pd.DataFrame({'id': ['id0', 'id0', 'id1', 'id1'],
                      'Kelas': ['METPEN', 'AD', 'MO', 'ADW'],
                      'IPK': ['3.96', '3.44', '3.65', '3.10'],
                      'Nilai': ['A', 'AB', 'AB', 'B']})

result3 = pd.merge(biodata, nilai, on=['id', 'Kelas'])
result3

Unnamed: 0,id,Kelas,JK,Usia,IPK,Nilai
0,id0,METPEN,P,19,3.96,A
1,id0,AD,P,21,3.44,AB
2,id1,MO,L,18,3.65,AB
3,id1,ADW,L,23,3.1,B


In [33]:
# Merge methods (default how="inner")
result_left = pd.merge(biodata, nilai, how='left', on=['id', 'Kelas'])
result_left

Unnamed: 0,id,Kelas,JK,Usia,IPK,Nilai
0,id0,METPEN,P,19,3.96,A
1,id0,AD,P,21,3.44,AB
2,id1,MO,L,18,3.65,AB
3,id1,ADW,L,23,3.1,B


In [7]:
# Merge methods (default how="inner")
result_right = pd.merge(biodata, nilai, how='right', on=['id', 'Kelas'])
result_right

Unnamed: 0,id,Kelas,JK,Usia,IPK,Nilai
0,id1,ADW,L,23.0,3.1,B
1,id0,METPEN,,,3.96,A
2,id0,ADK,,,3.44,AB
3,id1,MO,,,3.65,AB


In [8]:
result_outer = pd.merge(biodata, nilai, how='outer', on=['id', 'Kelas'])
result_outer

Unnamed: 0,id,Kelas,JK,Usia,IPK,Nilai
0,id0,Kalkulus,P,19.0,,
1,id0,ADE,P,21.0,,
2,id1,Kalkulus,L,18.0,,
3,id1,ADW,L,23.0,3.1,B
4,id0,METPEN,,,3.96,A
5,id0,ADK,,,3.44,AB
6,id1,MO,,,3.65,AB


In [9]:
result_inner = pd.merge(biodata, nilai, how='inner', on=['id', 'Kelas'])
result_inner

Unnamed: 0,id,Kelas,JK,Usia,IPK,Nilai
0,id1,ADW,L,23,3.1,B


In [10]:
# Joining 
biodata = pd.DataFrame({'JK': ['P', 'P', 'L'], 
                      'Usia': ['19', '21', '18']},
                    index=['id0', 'id1', 'id2'])
nilai = pd.DataFrame({'Kelas': ['METPEN', 'ADK', 'MO'],
                      'IPK': ['3.96', '3.44', '3.65']},
                    index=['id0', 'id2', 'id3'])
result = biodata.join(nilai) #Based Biodata ditambahkan nilai
result

Unnamed: 0,JK,Usia,Kelas,IPK
id0,P,19,METPEN,3.96
id1,P,21,,
id2,L,18,ADK,3.44


In [11]:
result = nilai.join(biodata, how='outer') #Seluruh data masuk
result

Unnamed: 0,Kelas,IPK,JK,Usia
id0,METPEN,3.96,P,19.0
id1,,,P,21.0
id2,ADK,3.44,L,18.0
id3,MO,3.65,,


In [12]:
result = nilai.join(biodata, how='inner') # Hanya data yang lengkap
result

Unnamed: 0,Kelas,IPK,JK,Usia
id0,METPEN,3.96,P,19
id2,ADK,3.44,L,18


In [13]:
result = pd.merge(biodata, nilai, left_index=True, right_index=True, how='outer')
result

Unnamed: 0,JK,Usia,Kelas,IPK
id0,P,19.0,METPEN,3.96
id1,P,21.0,,
id2,L,18.0,ADK,3.44
id3,,,MO,3.65


In [14]:
result = pd.merge(biodata, nilai, left_index=True, right_index=True, how='inner')
result

Unnamed: 0,JK,Usia,Kelas,IPK
id0,P,19,METPEN,3.96
id2,L,18,ADK,3.44


In [15]:
biodata = pd.DataFrame({'JK': ['P', 'L', 'P', 'L'], 
                      'Usia': ['21', '20', '19', '22'],
                   'key': ['id0', 'id1', 'id0', 'id1']})
nilai = pd.DataFrame({'Nilai': ['A', 'B'],
                       'IPK': ['3.40', '3.75']},
                      index=['id0', 'id1'])
result = biodata.join(nilai, on='key')
result

Unnamed: 0,JK,Usia,key,Nilai,IPK
0,P,21,id0,A,3.4
1,L,20,id1,B,3.75
2,P,19,id0,A,3.4
3,L,22,id1,B,3.75


In [16]:
# Concatenating
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                'B': ['B0', 'B1', 'B2', 'B3'],
                'C': ['C0', 'C1', 'C2', 'C3'],
                'D': ['D0', 'D1', 'D2', 'D3']})
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                'B': ['B4', 'B5', 'B6', 'B7'],
                'C': ['C4', 'C5', 'C6', 'C7'],
                'D': ['D4', 'D5', 'D6', 'D7']})
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                'B': ['B8', 'B9', 'B10', 'B11'],
                'C': ['C8', 'C9', 'C10', 'C11'],
                'D': ['D8', 'D9', 'D10', 'D11']})

frames = [df1, df2, df3]
result = pd.concat(frames)
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7
0,A8,B8,C8,D8
1,A9,B9,C9,D9


In [17]:
# Add category
result = pd.concat(frames, keys=['A', 'B', 'C'])
result

Unnamed: 0,Unnamed: 1,A,B,C,D
A,0,A0,B0,C0,D0
A,1,A1,B1,C1,D1
A,2,A2,B2,C2,D2
A,3,A3,B3,C3,D3
B,0,A4,B4,C4,D4
B,1,A5,B5,C5,D5
B,2,A6,B6,C6,D6
B,3,A7,B7,C7,D7
C,0,A8,B8,C8,D8
C,1,A9,B9,C9,D9


In [18]:
# Default join=outer
df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
                    'D': ['D2', 'D3', 'D6', 'D7'],
                    'F': ['F2', 'F3', 'F6', 'F7']},
                    index=[2, 3, 6, 7])

result = pd.concat([df1, df4], axis=0) #axis=1 is for concated by y-axis; default axis=0
result

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  import sys


Unnamed: 0,A,B,C,D,F
0,A0,B0,C0,D0,
1,A1,B1,C1,D1,
2,A2,B2,C2,D2,
3,A3,B3,C3,D3,
2,,B2,,D2,F2
3,,B3,,D3,F3
6,,B6,,D6,F6
7,,B7,,D7,F7


In [21]:
# hanya yang ada di df2 dan df4
result = pd.concat([df2, df4], axis=1, join='inner')
result

Unnamed: 0,A,B,C,D,B.1,D.1,F
2,A6,B6,C6,D6,B2,D2,F2
3,A7,B7,C7,D7,B3,D3,F3


In [22]:
#Menggabungkan berdasarkan df1
result = pd.concat([df1, df4], axis=1, join_axes=[df1.index])
result

Unnamed: 0,A,B,C,D,B.1,D.1,F
0,A0,B0,C0,D0,,,
1,A1,B1,C1,D1,,,
2,A2,B2,C2,D2,B2,D2,F2
3,A3,B3,C3,D3,B3,D3,F3


In [24]:
result = df1.append(df3) #default axis=0
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A8,B8,C8,D8
1,A9,B9,C9,D9
2,A10,B10,C10,D10
3,A11,B11,C11,D11


In [25]:
result = df1.append(df4)
result

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,A,B,C,D,F
0,A0,B0,C0,D0,
1,A1,B1,C1,D1,
2,A2,B2,C2,D2,
3,A3,B3,C3,D3,
2,,B2,,D2,F2
3,,B3,,D3,F3
6,,B6,,D6,F6
7,,B7,,D7,F7


In [26]:
result = df1.append([df2, df3]) #multiple datasets to concatenate
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7
0,A8,B8,C8,D8
1,A9,B9,C9,D9


In [27]:
result = pd.concat([df1, df4], ignore_index=True)
result

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,A,B,C,D,F
0,A0,B0,C0,D0,
1,A1,B1,C1,D1,
2,A2,B2,C2,D2,
3,A3,B3,C3,D3,
4,,B2,,D2,F2
5,,B3,,D3,F3
6,,B6,,D6,F6
7,,B7,,D7,F7


In [28]:
result = df1.append(df4, ignore_index=True)
result

Unnamed: 0,A,B,C,D,F
0,A0,B0,C0,D0,
1,A1,B1,C1,D1,
2,A2,B2,C2,D2,
3,A3,B3,C3,D3,
4,,B2,,D2,F2
5,,B3,,D3,F3
6,,B6,,D6,F6
7,,B7,,D7,F7


In [29]:
s2 = pd.Series(['X0', 'X1', 'X2', 'X3'], index=['A', 'B', 'C', 'D'])
result = df1.append(s2, ignore_index=True)
result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,X0,X1,X2,X3


In [30]:
dicts = [{'A': 1, 'B': 2, 'C': 3, 'X': 4}, #dictionary
         {'A': 5, 'B': 6, 'C': 7, 'Y': 8}]
result = df1.append(dicts, ignore_index=True)
result

Unnamed: 0,A,B,C,D,X,Y
0,A0,B0,C0,D0,,
1,A1,B1,C1,D1,,
2,A2,B2,C2,D2,,
3,A3,B3,C3,D3,,
4,1,2,3,,4.0,
5,5,6,7,,,8.0
