In [1]:
people = {
    'first': ['Vishwas', 'Anurag', 'Ankit', 'Ritik'],
    'last': ['Sharma', 'Sharma', 'Dubey', 'Sharma'],
    'email': ['sharma.vishwas@gmail.com', 'anurag123@gmail.com', 'dubey.ankit456@gmail.com', 'ritik23sharma@gmail.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [5]:
df.set_index('first')

Unnamed: 0_level_0,last,email
first,Unnamed: 1_level_1,Unnamed: 2_level_1
Vishwas,Sharma,sharma.vishwas@gmail.com
Anurag,Sharma,anurag123@gmail.com
Ankit,Dubey,dubey.ankit456@gmail.com
Ritik,Sharma,ritik23sharma@gmail.com


In [6]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [7]:
df.set_index('first', inplace=True)

In [8]:
df

Unnamed: 0_level_0,last,email
first,Unnamed: 1_level_1,Unnamed: 2_level_1
Vishwas,Sharma,sharma.vishwas@gmail.com
Anurag,Sharma,anurag123@gmail.com
Ankit,Dubey,dubey.ankit456@gmail.com
Ritik,Sharma,ritik23sharma@gmail.com


In [9]:
df.iloc[0]

last                       Sharma
email    sharma.vishwas@gmail.com
Name: Vishwas, dtype: object

In [10]:
df.index

Index(['Vishwas', 'Anurag', 'Ankit', 'Ritik'], dtype='object', name='first')

In [11]:
df.loc['Ankit', 'last']

'Dubey'

In [12]:
df.reset_index(inplace=True)

In [13]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## Filtering Data

In [14]:
filt = (df['last'] == 'Sharma')

In [15]:
df[filt]

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [16]:
# Now there is one more way to do so, through loc[]
# and loc[] is beneficial because we can soecify the column name too

In [17]:
df.loc[filt]

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [18]:
df.loc[filt, 'email']

0    sharma.vishwas@gmail.com
1         anurag123@gmail.com
3     ritik23sharma@gmail.com
Name: email, dtype: object

## Use of And(&) and Or(|) operator:

In [19]:
filt = (df['last'] == 'Sharma') & (df['first'] == 'Vishhy')

In [20]:
df.loc[filt, 'email']

Series([], Name: email, dtype: object)

In [21]:
filt1 = (df['last'] == 'Sharma') | (df['first'] == 'Vishhy')

In [22]:
df.loc[filt1, 'email']

0    sharma.vishwas@gmail.com
1         anurag123@gmail.com
3     ritik23sharma@gmail.com
Name: email, dtype: object

In [23]:
df.loc[~filt1, 'email']

2    dubey.ankit456@gmail.com
Name: email, dtype: object

## Changing the column names

In [24]:
df.columns = ['first_name', 'last_name', 'email']

In [25]:
df

Unnamed: 0,first_name,last_name,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [26]:
df.columns = [x.upper() for x in df.columns] # list comprehension
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [27]:
df.columns = df.columns.str.replace(' ', '_')

In [28]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [29]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## to change only a few values

In [30]:
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Dubey,dubey.ankit456@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## Updating data

In [31]:
df.iloc[2] = ['Ankit', 'Trivedi', 'tri.ankit@gmail.com']

In [32]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anurag123@gmail.com
2,Ankit,Trivedi,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [33]:
df.loc[1 ,['first', 'email']] = ['Anu', 'anu234@gmail.com']

In [34]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anu,Sharma,anu234@gmail.com
2,Ankit,Trivedi,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## Important

In [35]:
filt = (df['email'] == 'tri.ankit@gmail.com')
df[filt]

Unnamed: 0,first,last,email
2,Ankit,Trivedi,tri.ankit@gmail.com


In [36]:
df[filt]['last'] = 'Dubey'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Dubey'


In [37]:
df.loc[filt, 'last'] = 'Dubey'  # since previous one was returning a copy

In [38]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anu,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [39]:
df['email'] = df['email'].str.lower()

In [40]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anu,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## advanced methods to update rows
## apply
## map
## applymap
## replace

## 1. Apply (for series objects)

In [41]:
df['email'].str.len()

0    24
1    16
2    19
3    23
Name: email, dtype: int64

In [42]:
df['email'].apply(len)

0    24
1    16
2    19
3    23
Name: email, dtype: int64

In [43]:
def update_email(email):
    return email.upper()

In [44]:
df['email'].apply(update_email)

0    SHARMA.VISHWAS@GMAIL.COM
1            ANU234@GMAIL.COM
2         TRI.ANKIT@GMAIL.COM
3     RITIK23SHARMA@GMAIL.COM
Name: email, dtype: object

In [45]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anu,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


In [46]:
df['email'] = df['email'].apply(update_email)

In [47]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,SHARMA.VISHWAS@GMAIL.COM
1,Anu,Sharma,ANU234@GMAIL.COM
2,Ankit,Dubey,TRI.ANKIT@GMAIL.COM
3,Ritik,Sharma,RITIK23SHARMA@GMAIL.COM


In [48]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [49]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anu,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## Apply (for DF)

In [50]:
df.apply(len)

first    4
last     4
email    4
dtype: int64

In [51]:
df.apply(len, axis='columns')

0    3
1    3
2    3
3    3
dtype: int64

In [52]:
df.apply(pd.Series.min)  # will return the minimum value from each column, here in alpha order since these are strings.

first               Ankit
last                Dubey
email    anu234@gmail.com
dtype: object

In [53]:
df.apply(lambda x: x.min())  # here x is a series not a value

first               Ankit
last                Dubey
email    anu234@gmail.com
dtype: object

## Running apply on a series runs a func on each value of the series
## Running apply on a DF runs a func on each series of the DF

## So to run the func on each each value of the DF we'll use ApplyMap, series can't do that

## 2. Appply Map

In [54]:
df.applymap(len)

Unnamed: 0,first,last,email
0,7,6,24
1,3,6,16
2,5,5,19
3,5,6,23


In [55]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,vishwas,sharma,sharma.vishwas@gmail.com
1,anu,sharma,anu234@gmail.com
2,ankit,dubey,tri.ankit@gmail.com
3,ritik,sharma,ritik23sharma@gmail.com


In [56]:
df

Unnamed: 0,first,last,email
0,Vishwas,Sharma,sharma.vishwas@gmail.com
1,Anu,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## 3. Map (Works only for Series)

### used to substitute each value with a diff. value in a series

In [57]:
df['first'].map({'Vishwas':'Vishhy', 'Anu':'Anurag'})  # Nan: Not a number

0    Vishhy
1    Anurag
2       NaN
3       NaN
Name: first, dtype: object

## 4. Replace

In [58]:
df['first'].replace({'Vishwas':'Vishhy', 'Anu':'Anurag'}, inplace=True)

In [59]:
# df['first'] = df['first'].replace({'Vishwas':'Vishhy', 'Anu':'Anurag'})

In [60]:
df

Unnamed: 0,first,last,email
0,Vishhy,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## Add/Remove Rows/Columns in a DF

In [61]:
df

Unnamed: 0,first,last,email
0,Vishhy,Sharma,sharma.vishwas@gmail.com
1,Anurag,Sharma,anu234@gmail.com
2,Ankit,Dubey,tri.ankit@gmail.com
3,Ritik,Sharma,ritik23sharma@gmail.com


## Adding a column

In [62]:
df['first'] + ' ' + df['last']

0    Vishhy Sharma
1    Anurag Sharma
2      Ankit Dubey
3     Ritik Sharma
dtype: object

In [63]:
df['full_name'] = df['first'] + ' ' + df['last'] # we can use the apply func but we can't use dot intead of [], 
#  since python will think that we're assigning a value to some attribute onto the df object.

In [64]:
df['full_name']

0    Vishhy Sharma
1    Anurag Sharma
2      Ankit Dubey
3     Ritik Sharma
Name: full_name, dtype: object

### Removing a column

In [65]:
df.drop(columns = ['first', 'last'], inplace=True)  # inplace = True

In [66]:
df

Unnamed: 0,email,full_name
0,sharma.vishwas@gmail.com,Vishhy Sharma
1,anu234@gmail.com,Anurag Sharma
2,tri.ankit@gmail.com,Ankit Dubey
3,ritik23sharma@gmail.com,Ritik Sharma


### Now getting those two columns back

In [67]:
df['full_name'].str.split(' ')

0    [Vishhy, Sharma]
1    [Anurag, Sharma]
2      [Ankit, Dubey]
3     [Ritik, Sharma]
Name: full_name, dtype: object

In [68]:
df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Vishhy,Sharma
1,Anurag,Sharma
2,Ankit,Dubey
3,Ritik,Sharma


In [69]:
# Reassigning

In [70]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)

In [71]:
df

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma


## Adding/Removing Rows

In [72]:
# df.append({'first':'Tony'})

In [73]:
df.append({'first':'Tony', 'last':'Stark', 'email':'Stark@jarvis.com'}, ignore_index=True)

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
4,Stark@jarvis.com,,Tony,Stark


In [74]:
people2 = {
    'first': ['Mark', 'Rishabh', 'Shivam', 'Steve'],
    'last': ['Ruffalo', 'Sharma', 'Dubey', 'Smith'],
    'email': ['hulk.smash@gmail.com', 'pant.power@gmail.com', 'dubey.shiv@gmail.com', 'steve_cap@gmail.com']
}

In [75]:
df2 = pd.DataFrame(people2)

In [76]:
df2

Unnamed: 0,first,last,email
0,Mark,Ruffalo,hulk.smash@gmail.com
1,Rishabh,Sharma,pant.power@gmail.com
2,Shivam,Dubey,dubey.shiv@gmail.com
3,Steve,Smith,steve_cap@gmail.com


## Appending DF

In [77]:
df.append(df2) # see, we've got same indexes

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
0,hulk.smash@gmail.com,,Mark,Ruffalo
1,pant.power@gmail.com,,Rishabh,Sharma
2,dubey.shiv@gmail.com,,Shivam,Dubey
3,steve_cap@gmail.com,,Steve,Smith


In [78]:
df = df.append(df2, ignore_index=True)

In [79]:
df

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
5,pant.power@gmail.com,,Rishabh,Sharma
6,dubey.shiv@gmail.com,,Shivam,Dubey
7,steve_cap@gmail.com,,Steve,Smith


## Removing a Row

In [80]:
df = df.drop(index=6)

In [81]:
df

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
5,pant.power@gmail.com,,Rishabh,Sharma
7,steve_cap@gmail.com,,Steve,Smith


#### Through a Conditional

In [82]:
filt4 = df['last'] == 'Smith'
df = df.drop(index = df[filt4].index)

In [83]:
df

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
5,pant.power@gmail.com,,Rishabh,Sharma


## Sorting

In [84]:
df.sort_values(by='last', ascending=False)

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
5,pant.power@gmail.com,,Rishabh,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey


In [85]:
df.sort_values(by=['last', 'first'], ascending=False)

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
5,pant.power@gmail.com,,Rishabh,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey


#### Suppose we want to sort last names in ascending order and first names in descending order

In [86]:
df.sort_values(by=['last', 'first'], ascending=[False, True], inplace=True)

In [87]:
df

Unnamed: 0,email,full_name,first,last
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
5,pant.power@gmail.com,,Rishabh,Sharma
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey


#### To revert this back

In [88]:
df = df.sort_index()

In [89]:
df

Unnamed: 0,email,full_name,first,last
0,sharma.vishwas@gmail.com,Vishhy Sharma,Vishhy,Sharma
1,anu234@gmail.com,Anurag Sharma,Anurag,Sharma
2,tri.ankit@gmail.com,Ankit Dubey,Ankit,Dubey
3,ritik23sharma@gmail.com,Ritik Sharma,Ritik,Sharma
4,hulk.smash@gmail.com,,Mark,Ruffalo
5,pant.power@gmail.com,,Rishabh,Sharma


#### To sort a series not the whole DF

In [90]:
df['last'].sort_values()

2      Dubey
4    Ruffalo
0     Sharma
1     Sharma
3     Sharma
5     Sharma
Name: last, dtype: object

## Grouping and Aggregating

## Cleaning Data: Casting Datatypes and handling missing values

In [91]:
import numpy as np

In [92]:
 people = {'first':['Corey', 'Jane', 'John', 'Chris', np.nan, None, 'NA'],
          'last': ['Schafer', 'Doe', 'Doe', 'Schafer', np.nan, np.nan, 'Missing'],
          'email': ['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com', np.nan, 'Missing', 'Anonymous@email.com', 'NA'],
          'age': ['33', '55', '63', '36', None, None, 'Missing']
          }

In [93]:
df = pd.DataFrame(people)

df.replace('NA', np.nan, inplace=True)
df.replace('Missing', np.nan, inplace=True)

In [94]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [95]:
df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [96]:
df.dropna(axis='index', how='any') # the default arguments
# this will drop the rows since the axis is index and the columns if the axis is columns
# how defines the criteria to drop a row/column. 'any' means that if any value is missing drop it.
# 'all' will drop only iff all values are missing

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [97]:
df.dropna(axis='index', how='all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
5,,,Anonymous@email.com,


In [98]:
df.dropna(axis='columns', how='all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [99]:
df.dropna(axis='columns', how='any')

0
1
2
3
4
5
6


### Suppose we want to drop the rows that hasn't some specific column values, so we'll use a subset argument

In [100]:
df.dropna(axis='index', how='any', subset=['email']) # any row that doesn't had the email was dropped

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
5,,,Anonymous@email.com,


In [101]:
df.dropna(axis='index', how='any', subset=['last', 'email']) # is droping a row when either lastname or email is missing

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [102]:
df.dropna(axis='index', how='all', subset=['last', 'email']) # is droping only of both are missing

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
5,,,Anonymous@email.com,


In [103]:
df.isna()

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,True,True,True,True


### To fill all the None and nan values with some particular value

In [104]:
df.fillna(0) # inplace

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,0,36
4,0,0,0,0
5,0,0,Anonymous@email.com,0
6,0,0,0,0


In [105]:
df.dtypes

first    object
last     object
email    object
age      object
dtype: object

In [106]:
#df['age'].mean()  # this will give an error since the age column has string dtype not int

### So we'll cast the age column to float 

In [107]:
#df['age'] = df['age'].astype(int)  # we can't do this since np.nan has a dtype of float, so we've to cast it into float

In [108]:
type(np.nan)

float

In [109]:
df['age'] = df['age'].astype(float)

In [110]:
df.dtypes

first     object
last      object
email     object
age      float64
dtype: object

In [111]:
df['age'].mean()

46.75

#### We can also convert the whole df to a particular type using df.astype()