## Handling Missing Files

In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("Emp_Records.csv")
df.head()

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
0,677509,Lois,36.36,60.0,13.68,168251,Denver
1,940761,Brenda,47.02,60.0,9.01,51063,Stonewall
2,428945,Joe,54.15,68.0,0.98,50155,Michigantown
3,408351,,39.67,51.0,18.3,180294,Hydetown
4,193819,Benjamin,40.31,58.0,4.01,117642,Fremont


In [4]:
df.isna()

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,True,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
95,False,False,False,False,False,False,False
96,False,False,False,False,False,False,False
97,False,False,False,False,False,False,False
98,False,False,False,False,False,False,False


In [5]:
df.isna().sum()

Emp ID             0
First Name         6
Age in Yrs        10
Weight in Kgs     10
Age in Company     9
Salary             0
City               6
dtype: int64

In [30]:
df.isnull().sum()

Emp ID             0
First Name         6
Age in Yrs        10
Weight in Kgs     10
Age in Company     9
Salary             0
City               6
dtype: int64

In [46]:
series = df["First Name"]
series.head()


0        Lois
1      Brenda
2         Joe
3         NaN
4    Benjamin
Name: First Name, dtype: object

In [45]:
type(series)

pandas.core.series.Series

In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Emp ID          100 non-null    int64  
 1   First Name      94 non-null     object 
 2   Age in Yrs      90 non-null     float64
 3   Weight in Kgs   90 non-null     float64
 4   Age in Company  91 non-null     float64
 5   Salary          100 non-null    int64  
 6   City            94 non-null     object 
dtypes: float64(3), int64(2), object(2)
memory usage: 5.6+ KB


In [49]:
my_data = pd.DataFrame(np.random.randint(10,20,[5,5]), columns=list("PQRST"), index=list("ABCDE"))

my_data

Unnamed: 0,P,Q,R,S,T
A,14,14,12,15,11
B,12,17,17,13,18
C,16,14,17,12,14
D,16,17,14,13,19
E,11,12,15,11,18


In [50]:
my_data["P"] == 14

A     True
B    False
C    False
D    False
E    False
Name: P, dtype: bool

In [51]:
my_data.isnull().sum()

P    0
Q    0
R    0
S    0
T    0
dtype: int64

In [53]:
my_data.isna().sum()

P    0
Q    0
R    0
S    0
T    0
dtype: int64

In [94]:
my_data.iloc[2:4, 0:1]

Unnamed: 0,P
C,16
D,16


In [95]:
my_data[2:4]

Unnamed: 0,P,Q,R,S,T
C,16,14,17,,
D,16,17,14,,


In [87]:
my_data.loc["C":"D","S":"T"] = np.nan

In [88]:
my_data

Unnamed: 0,P,Q,R,S,T
A,14,14,12,15.0,11.0
B,12,17,17,13.0,18.0
C,16,14,17,,
D,16,17,14,,
E,11,12,15,11.0,18.0


In [92]:
my_data[my_data.S == 15.0]

Unnamed: 0,P,Q,R,S,T
A,14,14,12,15.0,11.0


In [97]:
my_data.isna().sum()

P    0
Q    0
R    0
S    2
T    2
dtype: int64

In [103]:
my_data.iloc[0:1,0:1] = 15

In [104]:
my_data

Unnamed: 0,P,Q,R,S,T
A,15,14,12,15.0,11.0
B,12,17,17,13.0,18.0
C,16,14,17,,
D,16,17,14,,
E,11,12,15,11.0,18.0


In [105]:
df = pd.read_csv("Emp_Records.csv")
df.head()

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
0,677509,Lois,36.36,60.0,13.68,168251,Denver
1,940761,Brenda,47.02,60.0,9.01,51063,Stonewall
2,428945,Joe,54.15,68.0,0.98,50155,Michigantown
3,408351,,39.67,51.0,18.3,180294,Hydetown
4,193819,Benjamin,40.31,58.0,4.01,117642,Fremont


In [110]:
df.isna().axis= 1

In [113]:
df[df.isna().any(axis=1)]

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
3,408351,,39.67,51.0,18.3,180294,Hydetown
7,380086,,59.12,40.0,34.52,60918,Blanchester
10,231469,,42.5,80.0,8.29,118457,Sabetha
14,441771,,59.47,47.0,26.69,92220,Quecreek
18,890290,Julia,,56.0,12.43,141518,Primm Springs
19,622406,Thomas,,73.0,19.15,73862,Dutchtown
20,979607,,22.64,56.0,1.26,93967,Shreveport
24,560455,Carolyn,,53.0,16.08,42005,Saint Cloud
27,683826,Roger,,77.0,8.5,129625,Mount Vernon
28,474599,Maria,42.39,,,48944,Lawrenceburg


In [116]:
df[df.isna().any(axis=1)]

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
3,408351,,39.67,51.0,18.3,180294,Hydetown
7,380086,,59.12,40.0,34.52,60918,Blanchester
10,231469,,42.5,80.0,8.29,118457,Sabetha
14,441771,,59.47,47.0,26.69,92220,Quecreek
18,890290,Julia,,56.0,12.43,141518,Primm Springs
19,622406,Thomas,,73.0,19.15,73862,Dutchtown
20,979607,,22.64,56.0,1.26,93967,Shreveport
24,560455,Carolyn,,53.0,16.08,42005,Saint Cloud
27,683826,Roger,,77.0,8.5,129625,Mount Vernon
28,474599,Maria,42.39,,,48944,Lawrenceburg


In [117]:
df.describe()

Unnamed: 0,Emp ID,Age in Yrs,Weight in Kgs,Age in Company,Salary
count,100.0,90.0,90.0,91.0,100.0
mean,547652.1,38.303222,58.533333,8.801868,119738.09
std,257664.16679,11.843545,12.232945,8.367442,46185.278194
min,134841.0,21.1,40.0,0.02,42005.0
25%,328643.75,27.8025,51.0,2.3,83979.75
50%,497414.0,36.3,56.5,6.94,118049.5
75%,766040.0,48.5375,61.0,13.305,162509.25
max,979607.0,59.47,90.0,34.52,197537.0


In [119]:
df[df["First Name"].isna()]

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
3,408351,,39.67,51.0,18.3,180294,Hydetown
7,380086,,59.12,40.0,34.52,60918,Blanchester
10,231469,,42.5,80.0,8.29,118457,Sabetha
14,441771,,59.47,47.0,26.69,92220,Quecreek
20,979607,,22.64,56.0,1.26,93967,Shreveport
32,621833,,23.92,,1.83,169245,Bonanza


In [121]:
df.isna().sum()

Emp ID             0
First Name         6
Age in Yrs        10
Weight in Kgs     10
Age in Company     9
Salary             0
City               6
dtype: int64

In [122]:
df[df["Weight in Kgs"].isnull()]

Unnamed: 0,Emp ID,First Name,Age in Yrs,Weight in Kgs,Age in Company,Salary,City
28,474599,Maria,42.39,,,48944,Lawrenceburg
29,335732,Brenda,53.68,,,60508,Mesa
30,329752,Lillian,36.24,,,67251,Panacea
31,893212,Amy,36.14,,,112715,Kline
32,621833,,23.92,,1.83,169245,Bonanza
33,456747,Roy,26.24,,4.93,170895,Liberty
34,278556,Richard,,,0.94,122226,Ohatchee
35,333476,Mary,49.69,,25.62,109394,Nashville
36,218791,Aaron,,,20.08,54402,Eckerty
41,227922,Amanda,35.02,,10.28,114257,
