In [1]:
import pandas as pd


In [2]:
people = {
    "first" : ['Vaishnao','Avyukt','Reyansh'],
    "last" : ['Wankar','Harjai','Lamba'],
    "email" : ['vaishnaowankar@gmail.com','avyuktharjai@gmail.com','reyanshlamba@gmail.com']
}

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


# Updating Columns

In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
df.columns = ['first_name','last_name','email']

In [7]:
df

Unnamed: 0,first_name,last_name,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


In [8]:
# changing the representation of columns
df.columns = [x.upper() for x in df.columns]

In [9]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


In [10]:
# replace _ with space
df.columns = df.columns.str.replace('_',' ')
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


In [11]:
# renaming the column names using rename method
df.rename(columns={'FIRST NAME':'first','LAST NAME':'last','EMAIL':'email'},inplace = True)

In [12]:
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


# Updating Rows

In [13]:
# Method 1 : changing each data of row selecting the row with loc
df.loc[2] = ['Reyansh','Oberoi','reyanshoberoi@gmail.com']
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Oberoi,reyanshoberoi@gmail.com


In [14]:
# Method 2 : just changing the specific data
df.loc[2,['last','email']] = ['Lamba','reyanshlamba@gmail.com']
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


In [15]:
# Changing a single value doesn't need to be passed as list 
df.loc[2,'last'] = "Oberoi"
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Oberoi,reyanshlamba@gmail.com


In [16]:
# We can also use at if dealing with single value
df.at[2,'last'] = "Lamba"
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


## Error alert


In [17]:
# using the filter to update the values
filt = (df['email'] == 'vaishnaowankar@gmail.com')
df[filt]['last'] = 'sharma'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'sharma'


In [18]:
# The above is actaully a warning but it still doesnt change out dataframe
df

Unnamed: 0,first,last,email
0,Vaishnao,Wankar,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


In [19]:
# correct way is to use loc with filter
filt = (df['email'] == 'vaishnaowankar@gmail.com')
df.loc[filt,'last'] = 'Sharma'

In [20]:
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


# Changing multiple rows at once

In [21]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


### 🌟🌟🌟 Different methods to update multiple rows at once

# 1. Apply


In [22]:
df['email'].apply(len)

0    24
1    22
2    22
Name: email, dtype: int64

In [25]:
def update_email(email):
    return email.upper()

In [26]:
df['email'].apply(update_email)

0    VAISHNAOWANKAR@GMAIL.COM
1      AVYUKTHARJAI@GMAIL.COM
2      REYANSHLAMBA@GMAIL.COM
Name: email, dtype: object

In [28]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,VAISHNAOWANKAR@GMAIL.COM
1,Avyukt,Harjai,AVYUKTHARJAI@GMAIL.COM
2,Reyansh,Lamba,REYANSHLAMBA@GMAIL.COM


In [29]:
# example using lambda functions
df['email'] = df['email'].apply(lambda x: x.lower())

In [30]:
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com


# Working of len with dataframes

In [31]:
# continuing with the example of len
df['email'].apply(len)

0    24
1    22
2    22
Name: email, dtype: int64

In [33]:
df.apply(len)
# it will basically return the the len of data in each column

first    3
last     3
email    3
dtype: int64

In [34]:
# We can also change the axis 
df.apply(len , axis='columns')

0    3
1    3
2    3
dtype: int64

In [37]:
# Series has a min method which gives the minimum value of all data through row or columns as specified
# It makes more sense working with numbers but here for example we will work with strings
df.apply(pd.Series.min)

first                    Avyukt
last                     Harjai
email    avyuktharjai@gmail.com
dtype: object

### It basically returned the alphabetically lower data from each column

In [38]:
# Similarly we can use lambda functions to do so
df.apply(lambda x : x.min())

first                    Avyukt
last                     Harjai
email    avyuktharjai@gmail.com
dtype: object

## In the above case the x is actually a Series datatype hence we can apply min method directly

# 2. ApplyMap

### applymap works on every element of dataframe

In [39]:
df.applymap(len)

  df.applymap(len)


Unnamed: 0,first,last,email
0,8,6,24
1,6,6,22
2,7,5,22


In [40]:
df.applymap(str.lower)

  df.applymap(str.lower)


Unnamed: 0,first,last,email
0,vaishnao,sharma,vaishnaowankar@gmail.com
1,avyukt,harjai,avyuktharjai@gmail.com
2,reyansh,lamba,reyanshlamba@gmail.com


In [41]:
df.map(len)

Unnamed: 0,first,last,email
0,8,6,24
1,6,6,22
2,7,5,22


In [42]:
df.map(str.upper)

Unnamed: 0,first,last,email
0,VAISHNAO,SHARMA,VAISHNAOWANKAR@GMAIL.COM
1,AVYUKT,HARJAI,AVYUKTHARJAI@GMAIL.COM
2,REYANSH,LAMBA,REYANSHLAMBA@GMAIL.COM


# 3. Map and Replace

In [62]:
original_first_names = ['Vaishnao', 'Rudra', 'Shubh']
df['first'] = original_first_names

In [63]:
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,vaishnaowankar@gmail.com
1,Rudra,Harjai,avyuktharjai@gmail.com
2,Shubh,Lamba,reyanshlamba@gmail.com


In [66]:
df['first'].map({'Rudra':'Avyukt','Shubh':'Reyansh'})
# Note map returns NaN for which we didnt changed the data

0        NaN
1     Avyukt
2    Reyansh
Name: first, dtype: object

In [67]:
df['first'].replace({'Rudra':'Avyukt','Shubh':'Reyansh'})
# We can  use replace if we want to show the unchanged data

0    Vaishnao
1      Avyukt
2     Reyansh
Name: first, dtype: object

In [68]:
# note our original df is not changed
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,vaishnaowankar@gmail.com
1,Rudra,Harjai,avyuktharjai@gmail.com
2,Shubh,Lamba,reyanshlamba@gmail.com


In [69]:
# to change it we need to do the following
df['first'] = df['first'].replace({'Rudra':'Avyukt','Shubh':'Reyansh'})
df

Unnamed: 0,first,last,email
0,Vaishnao,Sharma,vaishnaowankar@gmail.com
1,Avyukt,Harjai,avyuktharjai@gmail.com
2,Reyansh,Lamba,reyanshlamba@gmail.com
