In [46]:
import pandas as pd
import numpy as np

# Merge, Drop and Extend

#### Create `DataFrame`

In [47]:
forum_users = {
    'User ID': np.array([1, 2, 3, 4, 5]),
    'Username': ['bill', 'john', 'elly', 'fred', 'any'],
    'Age': [18, 35, 25, 38, None],
    'Joined date': pd.to_datetime(['2032-01-01', '2032-02-15', '2032-04-26', '2032-06-21', '2032-09-15']),
    'Total posts': [150, 230, 80, 420, 310],
    'Reputation': [500, 720, 200, 940, 500]
}

df1 = pd.DataFrame(forum_users)
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation
0,1,bill,18.0,2032-01-01,150,500
1,2,john,35.0,2032-02-15,230,720
2,3,elly,25.0,2032-04-26,80,200
3,4,fred,38.0,2032-06-21,420,940
4,5,any,,2032-09-15,310,500


### Add Column

In [48]:
df1['Language'] = 'English' # Set to all records
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language
0,1,bill,18.0,2032-01-01,150,500,English
1,2,john,35.0,2032-02-15,230,720,English
2,3,elly,25.0,2032-04-26,80,200,English
3,4,fred,38.0,2032-06-21,420,940,English
4,5,any,,2032-09-15,310,500,English


In [49]:
df1['Language'] = ['English', 'Ukraine', 'France', 'Portugal', 'English']
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language
0,1,bill,18.0,2032-01-01,150,500,English
1,2,john,35.0,2032-02-15,230,720,Ukraine
2,3,elly,25.0,2032-04-26,80,200,France
3,4,fred,38.0,2032-06-21,420,940,Portugal
4,5,any,,2032-09-15,310,500,English


In [50]:
# Also you can use numpy array
df1['Active'] = np.array([True, False, False, True, True])
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language,Active
0,1,bill,18.0,2032-01-01,150,500,English,True
1,2,john,35.0,2032-02-15,230,720,Ukraine,False
2,3,elly,25.0,2032-04-26,80,200,France,False
3,4,fred,38.0,2032-06-21,420,940,Portugal,True
4,5,any,,2032-09-15,310,500,English,True


### Concat DataFrames

In [51]:
total_comments = {
    'Total Comments': [70, 30, 45, 55, 80]
}
# df2 = pd.DataFrame(total_comments, index=['a', 'b', 'c', 'd', 'e'])
df2 = pd.DataFrame(total_comments)
df2

Unnamed: 0,Total Comments
0,70
1,30
2,45
3,55
4,80


In [52]:
# pd.concat([df1, df2], axis=0) # Default axis 0
pd.concat([df1, df2], axis=1)

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language,Active,Total Comments
0,1,bill,18.0,2032-01-01,150,500,English,True,70
1,2,john,35.0,2032-02-15,230,720,Ukraine,False,30
2,3,elly,25.0,2032-04-26,80,200,France,False,45
3,4,fred,38.0,2032-06-21,420,940,Portugal,True,55
4,5,any,,2032-09-15,310,500,English,True,80


### Drop column

In [53]:
# It just return DataFrame without column 'Language'
# to remove -> inplace=True
df1.drop(['Language'], axis=1)

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Active
0,1,bill,18.0,2032-01-01,150,500,True
1,2,john,35.0,2032-02-15,230,720,False
2,3,elly,25.0,2032-04-26,80,200,False
3,4,fred,38.0,2032-06-21,420,940,True
4,5,any,,2032-09-15,310,500,True


In [54]:
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language,Active
0,1,bill,18.0,2032-01-01,150,500,English,True
1,2,john,35.0,2032-02-15,230,720,Ukraine,False
2,3,elly,25.0,2032-04-26,80,200,France,False
3,4,fred,38.0,2032-06-21,420,940,Portugal,True
4,5,any,,2032-09-15,310,500,English,True


In [55]:
# df1.drop(['Language'], axis=1, inplace=True)
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language,Active
0,1,bill,18.0,2032-01-01,150,500,English,True
1,2,john,35.0,2032-02-15,230,720,Ukraine,False
2,3,elly,25.0,2032-04-26,80,200,France,False
3,4,fred,38.0,2032-06-21,420,940,Portugal,True
4,5,any,,2032-09-15,310,500,English,True


### Drop row

In [56]:
df1.drop([1, 3], inplace=True)
df1

Unnamed: 0,User ID,Username,Age,Joined date,Total posts,Reputation,Language,Active
0,1,bill,18.0,2032-01-01,150,500,English,True
2,3,elly,25.0,2032-04-26,80,200,France,False
4,5,any,,2032-09-15,310,500,English,True
