# Filter pandas dataframe by column value

In [27]:
import pandas as pd
dict = {'name':["Chaitu", "pankaj", "sudhir", "Zeelan" ,"Zeelans", "Rahul", "Sam", "Hayat"], 
        'degree': ["MBA", "MCA", "M.Tech", "MBA","MCA", "BCA", "M.Tech", "MBA"], 
        'score':[90, 45, 80, 98,95,67,90,87]} 
  
df = pd.DataFrame(dict) 
df

Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
1,pankaj,MCA,45
2,sudhir,M.Tech,80
3,Zeelan,MBA,98
4,Zeelans,MCA,95
5,Rahul,BCA,67
6,Sam,M.Tech,90
7,Hayat,MBA,87


# 1 : DataFrame Way

In [29]:
newdf = df[(df.degree == "MBA") & (df.score >=  90)]
newdf.count()


name      2
degree    2
score     2
dtype: int64

In [30]:
newdf

Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
3,Zeelan,MBA,98


# 2 : Query Function

In [31]:
newdf = df.query('degree == "MBA" & score >=  90')
newdf

Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
3,Zeelan,MBA,98


# 3 : loc function

In [32]:
newdf = df.loc[(df.degree == "MBA") & (df.score >=  90)]
newdf

Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
3,Zeelan,MBA,98


# Filter Pandas Dataframe by Row and Column Position
Suppose you want to select specific rows by their position 
(let's say from second through fifth row). We can use df.iloc[ ] function for the same.

In [33]:
df.iloc[:5,] #First 5 rows
df.iloc[1:5,] #Second to Fifth row
df.iloc[5,0] #Sixth row and 1st column
df.iloc[1:5,0] #Second to Fifth row, first column
df.iloc[1:5,:5] #Second to Fifth row, first 5 columns
df.iloc[2:7,1:3] #Third to Seventh row, 2nd and 3rd column

Unnamed: 0,degree,score
2,M.Tech,80
3,MBA,98
4,MCA,95
5,BCA,67
6,M.Tech,90


In [34]:
df.iloc[1:5,0] #Second to Fifth row, first column

1     pankaj
2     sudhir
3     Zeelan
4    Zeelans
Name: name, dtype: object

# Difference between loc and iloc function

loc considers rows based on index labels. 

Whereas iloc considers rows based on position in the index so it only takes integers. 

In [38]:
import numpy as np
x = pd.DataFrame({"Sales" : np.arange(1,20,2)}, index=[9,8,7,6,0, 1, 2, 3, 4, 5])
x

Unnamed: 0,Sales
9,1
8,3
7,5
6,7
0,9
1,11
2,13
3,15
4,17
5,19


# iloc - Index Position

In [39]:
x.iloc[0:5]

Unnamed: 0,Sales
9,1
8,3
7,5
6,7
0,9


# loc - Index Label

In [40]:
x.loc[0:5]

Unnamed: 0,Sales
0,9
1,11
2,13
3,15
4,17
5,19


# Selecting multiple values of a column

In [42]:
# Long Way
newdf = df.loc[(df.degree == "MBA") | (df.degree == "MCA")]
newdf


Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
1,pankaj,MCA,45
3,Zeelan,MBA,98
4,Zeelans,MCA,95
7,Hayat,MBA,87


In [43]:
# Smart Way
newdf = df[df.degree.isin(["MBA", "MCA"])]
print(newdf)

      name degree  score
0   Chaitu    MBA     90
1   pankaj    MCA     45
3   Zeelan    MBA     98
4  Zeelans    MCA     95
7    Hayat    MBA     87


# Lambda Method for Filtering

In [45]:
newdf = df[df.apply(lambda x: x["degree"] == 'MBA' and x["score"] >= 80, axis=1)]
newdf

Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
3,Zeelan,MBA,98
7,Hayat,MBA,87


# List Comprehension Method for Filtering

In [46]:
newdf = df.iloc[[index for index,row in df.iterrows() if row['degree'] == 'MBA' and row['score'] >= 80]]

In [47]:
newdf

Unnamed: 0,name,degree,score
0,Chaitu,MBA,90
3,Zeelan,MBA,98
7,Hayat,MBA,87
