In [1]:
import pandas as pd

## Creating a df

In [2]:
df = pd.DataFrame(columns = ["age", "name", "id"])
df["age"] = [32,26,20,24,19,31]
df["name"] = ["Joey", "Mark", "Raj", "Priya", "Sheldon", "Tom"]
df["id"] = [2,5,8,1,25,3]

# adding a new column
df["dob"] = ['01-01-1998','02-14-1994','04-10-2000','31-12-1996','02-06-2001','07-03-1989']


df

Unnamed: 0,age,name,id,dob
0,32,Joey,2,01-01-1998
1,26,Mark,5,02-14-1994
2,20,Raj,8,04-10-2000
3,24,Priya,1,31-12-1996
4,19,Sheldon,25,02-06-2001
5,31,Tom,3,07-03-1989


## Selecting only few columns 

In [3]:
df_new = df[['id', 'age']]
df_new

Unnamed: 0,id,age
0,2,32
1,5,26
2,8,20
3,1,24
4,25,19
5,3,31


## Renaming the column 

In [4]:
df.rename({"id":"col_1", "name":"col_2"}, axis = "columns",
         inplace=True)

df

Unnamed: 0,age,col_2,col_1,dob
0,32,Joey,2,01-01-1998
1,26,Mark,5,02-14-1994
2,20,Raj,8,04-10-2000
3,24,Priya,1,31-12-1996
4,19,Sheldon,25,02-06-2001
5,31,Tom,3,07-03-1989


In [6]:
df.columns = ["age", "name", "id", "dob"]
df

Unnamed: 0,age,name,id,dob
0,32,Joey,2,01-01-1998
1,26,Mark,5,02-14-1994
2,20,Raj,8,04-10-2000
3,24,Priya,1,31-12-1996
4,19,Sheldon,25,02-06-2001
5,31,Tom,3,07-03-1989


## Splitting a string column 

In [7]:
df[["day","month", "year"]] = df["dob"].str.split("-", expand = True)
df

Unnamed: 0,age,name,id,dob,day,month,year
0,32,Joey,2,01-01-1998,1,1,1998
1,26,Mark,5,02-14-1994,2,14,1994
2,20,Raj,8,04-10-2000,4,10,2000
3,24,Priya,1,31-12-1996,31,12,1996
4,19,Sheldon,25,02-06-2001,2,6,2001
5,31,Tom,3,07-03-1989,7,3,1989


## Concatenating string columns

In [8]:
df["mod_date"] = df["month"] + '-' + df["day"] + '-' + df["year"]
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date
0,32,Joey,2,01-01-1998,1,1,1998,01-01-1998
1,26,Mark,5,02-14-1994,2,14,1994,14-02-1994
2,20,Raj,8,04-10-2000,4,10,2000,10-04-2000
3,24,Priya,1,31-12-1996,31,12,1996,12-31-1996
4,19,Sheldon,25,02-06-2001,2,6,2001,06-02-2001
5,31,Tom,3,07-03-1989,7,3,1989,03-07-1989


## Filtering with multiple criteria

In [9]:
df_new = df[
            (df["age"] < 25) &
            (df["id"] >=3)
]

df_new

Unnamed: 0,age,name,id,dob,day,month,year,mod_date
2,20,Raj,8,04-10-2000,4,10,2000,10-04-2000
4,19,Sheldon,25,02-06-2001,2,6,2001,06-02-2001


## Continuos variable to categorical

In [10]:
df["cat_age"] = pd.cut(df["age"].values,
                      bins = [0,30,60],
                      labels = ["20's", "30's"]
                      )

df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age
0,32,Joey,2,01-01-1998,1,1,1998,01-01-1998,30's
1,26,Mark,5,02-14-1994,2,14,1994,14-02-1994,20's
2,20,Raj,8,04-10-2000,4,10,2000,10-04-2000,20's
3,24,Priya,1,31-12-1996,31,12,1996,12-31-1996,20's
4,19,Sheldon,25,02-06-2001,2,6,2001,06-02-2001,20's
5,31,Tom,3,07-03-1989,7,3,1989,03-07-1989,30's


## apply() method

In [11]:
df["name"] = df["name"].apply(lambda x : x.lower())
df

Unnamed: 0,age,name,id,dob,day,month,year,mod_date,cat_age
0,32,joey,2,01-01-1998,1,1,1998,01-01-1998,30's
1,26,mark,5,02-14-1994,2,14,1994,14-02-1994,20's
2,20,raj,8,04-10-2000,4,10,2000,10-04-2000,20's
3,24,priya,1,31-12-1996,31,12,1996,12-31-1996,20's
4,19,sheldon,25,02-06-2001,2,6,2001,06-02-2001,20's
5,31,tom,3,07-03-1989,7,3,1989,03-07-1989,30's
