-
-
Notifications
You must be signed in to change notification settings - Fork 362
/
Copy pathpd_dropna().py
121 lines (84 loc) · 3.73 KB
/
pd_dropna().py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# -*- coding: utf-8 -*-
"""
#Handling Missing Values in Pandas
* Tutorial: https://news.towardsai.net/hmv
* Github: https://github.com/towardsai/tutorials/tree/master/pandas
"""
#Import Required Libraries:
import pandas as pd
#Raw data in form of dictionary:
info = {"Person":["Alan","Berta","Charlie","Danielle","Euler",pd.NA], #Name of Person.
"Age":[32,45,35,28,30,pd.NA], #Age of Person.
"Degree":["CS","Biology","Physics",pd.NA,"Physics","CS"], #Major.
"Country":["USA","Mexico","USA","Canada","USA","Canada"], #Country of study.
"Books":[10,pd.NA,30,40,50,60], #Books owned.
"Batch Size":[200,100,50,200,50,pd.NA] #Batch Size.
}
#Converting the raw data into DataFrame:
data = pd.DataFrame(info)
#Printing the DataFrame:
data
#Dropping the rows where at least one element is missing.
data.dropna()
#Drop the rows where at least one element is missing.
data.dropna(axis=0)
#Drop the rows where at least one element is missing.
data.dropna(axis="rows")
#Drop the columns where at least one element is missing.
data.dropna(axis=1)
#Drop the columns where at least one element is missing.
data.dropna(axis="columns")
#Drop the rows where at least one element is missing.
data.dropna(how="any")
#Import Required Libraries:
import pandas as pd
#Raw data in form of dictionary:
info = {"Person":["Alan","Berta",pd.NA,"Charlie","Danielle","Euler"], #Name of Person.
"Age":[32,45,pd.NA,35,28,30], #Age of Person.
"Degree":["CS","Biology",pd.NA,"Physics",pd.NA,"Physics"], #Major.
"Country":["USA","Mexico",pd.NA,"USA","Canada","USA"], #Country of study.
"Books":[10,pd.NA,pd.NA,30,40,50], #Books owned.
"Batch Size":[200,100,pd.NA,50,200,50] #Batch Size.
}
#Converting the raw data into DataFrame:
data = pd.DataFrame(info)
#Printing the DataFrame:
data
#Drop the rows if all elements are missing.
data.dropna(how="all")
#Keep the rows with at least 5 non missing elements.
data.dropna(thresh=5)
#Import Required Libraries:
import pandas as pd
#Raw data in form of dictionary:
info = {"Person":["Alan","Berta",pd.NA,"Charlie","Danielle","Euler"], #Name of Person.
"Age":[32,pd.NA,pd.NA,35,pd.NA,30], #Age of Person.
"Degree":["CS","Biology",pd.NA,"Physics",pd.NA,"Physics"], #Major.
"Country":["USA",pd.NA,pd.NA,"USA","Canada","USA"], #Country of study.
"Books":[10,pd.NA,pd.NA,30,40,50], #Books owned.
"Batch Size":[200,100,pd.NA,50,200,50] #Batch Size.
}
#Converting the raw data into DataFrame:
data = pd.DataFrame(info)
#Printing the DataFrame:
data
#Define in which columns to look for missing elements.
data.dropna(subset=["Person","Degree","Country"])
#Import Required Libraries:
import pandas as pd
#Raw data in form of dictionary:
info = {"Person":["Alan","Berta","Charlie","Danielle","Euler",pd.NA], #Name of Person.
"Age":[32,45,35,28,30,pd.NA], #Age of Person.
"Degree":["CS","Biology","Physics",pd.NA,"Physics","CS"], #Major.
"Country":["USA","Mexico","USA","Canada","USA","Canada"], #Country of study.
"Books":[10,pd.NA,30,40,50,60], #Books owned.
"Batch Size":[200,100,50,200,50,pd.NA] #Batch Size.
}
#Converting the raw data into DataFrame:
data = pd.DataFrame(info)
#Printing the DataFrame:
data
#Inplace=True will make changes in the original DataFrame.
#It will return nothing.
data.dropna(inplace=True)
data