In [1]:
import pandas as pd
import numpy as np

df_obj1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
                        'data1' : np.random.randint(0,10,7)})
df_obj2 = pd.DataFrame({'key': ['a', 'b', 'd'],
                        'data2' : np.random.randint(0,10,3)})

print(df_obj1)
print(df_obj2)

  key  data1
0   b      4
1   b      5
2   a      3
3   c      7
4   a      7
5   a      5
6   b      4
  key  data2
0   a      7
1   b      0
2   d      3


In [3]:
pd.merge(df_obj1, df_obj2, on='key')

Unnamed: 0,key,data1,data2
0,b,4,0
1,b,5,0
2,a,3,7
3,a,7,7
4,a,5,7
5,b,4,0


In [4]:
# 更改列名
df_obj1 = df_obj1.rename(columns={'key':'key1'})
df_obj2 = df_obj2.rename(columns={'key':'key2'})

In [5]:
print(df_obj1)
print(df_obj2)

  key1  data1
0    b      4
1    b      5
2    a      3
3    c      7
4    a      7
5    a      5
6    b      4
  key2  data2
0    a      7
1    b      0
2    d      3


In [6]:
pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2')

Unnamed: 0,key1,data1,key2,data2
0,b,4,b,0
1,b,5,b,0
2,a,3,a,7
3,a,7,a,7
4,a,5,a,7
5,b,4,b,0


In [7]:
pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2',how='outer')

Unnamed: 0,key1,data1,key2,data2
0,a,3.0,a,7.0
1,a,7.0,a,7.0
2,a,5.0,a,7.0
3,b,4.0,b,0.0
4,b,5.0,b,0.0
5,b,4.0,b,0.0
6,c,7.0,,
7,,,d,3.0


In [8]:
pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2',how='left')

Unnamed: 0,key1,data1,key2,data2
0,b,4,b,0.0
1,b,5,b,0.0
2,a,3,a,7.0
3,c,7,,
4,a,7,a,7.0
5,a,5,a,7.0
6,b,4,b,0.0


In [9]:
pd.merge(df_obj1, df_obj2, left_on='key1', right_on='key2',how='right')

Unnamed: 0,key1,data1,key2,data2
0,a,3.0,a,7
1,a,7.0,a,7
2,a,5.0,a,7
3,b,4.0,b,0
4,b,5.0,b,0
5,b,4.0,b,0
6,,,d,3


In [10]:
# 处理重复列名
df_obj1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
                        'data' : np.random.randint(0,10,7)})
df_obj2 = pd.DataFrame({'key': ['a', 'b', 'd'],
                        'data' : np.random.randint(0,10,3)})

print(pd.merge(df_obj1, df_obj2, on='key', suffixes=('_left', '_right')))

  key  data_left  data_right
0   b          4           4
1   b          9           4
2   a          2           9
3   a          3           9
4   a          7           9
5   b          5           4


In [11]:
# 按索引连接
df_obj1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
                        'data1' : np.random.randint(0,10,7)})
df_obj2 = pd.DataFrame({'data2' : np.random.randint(0,10,3)}, index=['a', 'b', 'd'])
print(df_obj1)
print(df_obj2)
print(pd.merge(df_obj1, df_obj2, left_on='key', right_index=True))

  key  data1
0   b      5
1   b      4
2   a      4
3   c      6
4   a      1
5   a      9
6   b      8
   data2
a      2
b      4
d      6
  key  data1  data2
0   b      5      4
1   b      4      4
2   a      4      2
4   a      1      2
5   a      9      2
6   b      8      4


In [12]:
print(pd.merge(df_obj2, df_obj1, left_index=True, right_on='key'))

   data2 key  data1
2      2   a      4
4      2   a      1
5      2   a      9
0      4   b      5
1      4   b      4
6      4   b      8


# concat 合并

In [14]:
ser_obj1 = pd.Series(np.random.randint(0, 10, 5), index=range(0,5))
ser_obj2 = pd.Series(np.random.randint(0, 10, 4), index=range(5,9))
ser_obj3 = pd.Series(np.random.randint(0, 10, 3), index=range(9,12))
print(ser_obj1)
print(ser_obj2)
print(ser_obj3)
print('-'*50)
print(pd.concat([ser_obj1, ser_obj2, ser_obj3]))

0    3
1    3
2    1
3    7
4    2
dtype: int32
5    6
6    5
7    0
8    8
dtype: int32
9     9
10    9
11    7
dtype: int32
--------------------------------------------------
0     3
1     3
2     1
3     7
4     2
5     6
6     5
7     0
8     8
9     9
10    9
11    7
dtype: int32


In [15]:
print(pd.concat([ser_obj1, ser_obj2, ser_obj3],  axis=1))

      0    1    2
0   3.0  NaN  NaN
1   3.0  NaN  NaN
2   1.0  NaN  NaN
3   7.0  NaN  NaN
4   2.0  NaN  NaN
5   NaN  6.0  NaN
6   NaN  5.0  NaN
7   NaN  0.0  NaN
8   NaN  8.0  NaN
9   NaN  NaN  9.0
10  NaN  NaN  9.0
11  NaN  NaN  7.0


In [18]:
ser_obj1 = pd.Series(np.random.randint(0, 10, 5), index=range(5))
ser_obj2 = pd.Series(np.random.randint(0, 10, 4), index=range(4))
ser_obj3 = pd.Series(np.random.randint(0, 10, 3), index=range(3))
print(pd.concat([ser_obj1, ser_obj2, ser_obj3],  axis=1))

   0    1    2
0  4  1.0  1.0
1  9  5.0  1.0
2  5  2.0  7.0
3  4  8.0  NaN
4  3  NaN  NaN
