In [1]:
# Pandas Tutorial

# A Pandas is a powerful data analysis toolkit. It is an open-source. A fast and efficient Dataframe Object for data manipulation. It is used fir reading data structures in different formats csv,tsv,txt,XML,JSON,ZIP etc.

In [2]:
import pandas as pd

# To check pandas version

In [3]:
pd.__version__

'2.2.3'

In [4]:
l1 = [1,1.1,"data values"]
print(l1)

[1, 1.1, 'data values']


# A series is like a column in a table. It is a one-dimensional array holding data of any type. In other words, it is a one-dimensional labelled homogenous array.

In [5]:
s1 = pd.Series(l1)
print(s1)

0              1
1            1.1
2    data values
dtype: object


# To identify the datatype of Series

In [6]:
type(s1)

pandas.core.series.Series

In [7]:
s2 = pd.Series([1,2,1.1,"data values"])
print(s2)

0              1
1              2
2            1.1
3    data values
dtype: object


# Creating an Empty Series

In [8]:
empty_series = pd.Series([])
print(empty_series)

Series([], dtype: object)


# If the data values is in list format, then data values should be equal to index values

In [9]:
s3 = pd.Series([1,2,3,4],index=['a','b','c','d'])
print(s3)

a    1
b    2
c    3
d    4
dtype: int64


# To change the datatype into float

In [10]:
s4 = pd.Series([1,2,3,4],index=['a','b','c','d'],dtype=float,name='data values')
print(s4)

a    1.0
b    2.0
c    3.0
d    4.0
Name: data values, dtype: float64


# Printing Scalar Series

In [11]:
scalar_series = pd.Series(0.5)
print(scalar_series)

0    0.5
dtype: float64


In [12]:
scalar_series = pd.Series(0.5,index=[0,1,2,3])
print(scalar_series)

0    0.5
1    0.5
2    0.5
3    0.5
dtype: float64


In [13]:
dict_series = pd.Series({'a':1,'b':2})
print(dict_series)

a    1
b    2
dtype: int64


# Pandas Series also supports numpy array functions and operators

In [14]:
s5 = pd.Series([1,2,3,4,5])
print(s5)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [15]:
s5[0]

np.int64(1)

In [16]:
s5[3]

np.int64(4)

In [17]:
s5[0:3]

0    1
1    2
2    3
dtype: int64

In [18]:
max(s5)

5

In [19]:
min(s5)

1

In [20]:
print(s5[s5 > 3])

3    4
4    5
dtype: int64


In [21]:
print(s5)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [22]:
s6 = pd.Series([1,2,3,4,5])
print(s6)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [23]:
print(s5+s6)

0     2
1     4
2     6
3     8
4    10
dtype: int64


In [24]:
print(s5-s6)

0    0
1    0
2    0
3    0
4    0
dtype: int64


In [25]:
print(s5*s6)

0     1
1     4
2     9
3    16
4    25
dtype: int64


In [26]:
print(s5/s6)

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
dtype: float64


# Handles Missing Values because of pandas module features. NaN stands for Not a Number.

In [27]:
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([1,2,3])
print(s1+s2)

0    2.0
1    4.0
2    6.0
3    NaN
4    NaN
dtype: float64


# A DataFrame is a 2-dimensional data structure, or table with rows and columns or labelled axes. In other words, it is a 2-dimensional labelled heterogenous tabular data structure and also it is size-mutuable.

# Creating an Empty DataFrame

In [28]:
emp_df = pd.DataFrame([])
print(emp_df)

Empty DataFrame
Columns: []
Index: []


# Creating an DataFrame using List

In [29]:
l = [1,2,3]
print(l)

[1, 2, 3]


In [30]:
df = pd.DataFrame(l)
df

Unnamed: 0,0
0,1
1,2
2,3


In [31]:
d2 = pd.DataFrame([[1,2,3],[4,5,6]])
d2

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


# Creating an DataFrame using Dictionary

In [32]:
dict1 = {'ID':[1,2,3,4]}
print(dict1)

{'ID': [1, 2, 3, 4]}


In [33]:
df1 = pd.DataFrame(dict1)
df1

Unnamed: 0,ID
0,1
1,2
2,3
3,4


In [34]:
dict2 = {'ID':[1,2,3,4],'SN':[1,2,3,4]}
print(dict2)

{'ID': [1, 2, 3, 4], 'SN': [1, 2, 3, 4]}


In [35]:
df2 = pd.DataFrame(dict2)
df2

Unnamed: 0,ID,SN
0,1,1
1,2,2
2,3,3
3,4,4


# Creating an DataFrame from List of Dictionary

In [36]:
l1 = [{'a':1,'b':2,'c':3}]
print(l1)

[{'a': 1, 'b': 2, 'c': 3}]


In [37]:
df3 = pd.DataFrame(l1)
df3

Unnamed: 0,a,b,c
0,1,2,3


In [38]:
l2 = [{'a':1,'b':2,'c':3},{'a':1,'b':2,'c':3}]
print(l2)

[{'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 2, 'c': 3}]


In [39]:
df4 = pd.DataFrame(l2)
df4

Unnamed: 0,a,b,c
0,1,2,3
1,1,2,3


In [40]:
l3 = [{'a':1,'b':2},{'a':1,'b':2,'c':3}]
print(l3)

[{'a': 1, 'b': 2}, {'a': 1, 'b': 2, 'c': 3}]


# Handles Missing Values because of pandas module features. NaN stands for Not a Number.

In [41]:
df5 = pd.DataFrame(l3)
df5

Unnamed: 0,a,b,c
0,1,2,
1,1,2,3.0


# Creating an DataFrame from Dictionary of Series

In [42]:
ds1 = {'ID':pd.Series([1,2,3,4]),'SN':pd.Series([1,2,3,4])}
print(ds1)

{'ID': 0    1
1    2
2    3
3    4
dtype: int64, 'SN': 0    1
1    2
2    3
3    4
dtype: int64}


In [43]:
df6 = pd.DataFrame(ds1)
df6

Unnamed: 0,ID,SN
0,1,1
1,2,2
2,3,3
3,4,4


# The read_csv() function in pandas is used to read data from a CSV (Comma-Separated Values) file into a DataFrame. CSV Format is also used to store tabular data in the form of numbers,plain texts.

In [44]:
#pd.read_csv()

In [45]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book1.csv")

Unnamed: 0,Seat No,Student Name,Age,Percentage
0,101,Sanket Shivde,22,82.85
1,102,Ayush Jain,22,81.38


In [46]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv")

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes
1,France,Paris,English,No
2,Japan,Tokyo,Japanese,Yes
3,Canada,Ottawa,Canadian,Yes


# To check the file stored in which directory, we have to import the 'os' module.

In [47]:
import os

# In Python, os.getcwd() is a function from the os module that returns the current working directory.

In [48]:
print(os.getcwd())

G:\Jupyter Notebook


# Writing CSV Files is used to make some changes in the CSV Files or to do some modifications in the CSV Files. Pandas Library is generally used for Data Preprocessing and Data Cleaning, but mainly used for Data Preprocessing. Data Preprocessing is mainly used for raw data cleaning and to find useful insights.

In [49]:
df = pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv")
df

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes
1,France,Paris,English,No
2,Japan,Tokyo,Japanese,Yes
3,Canada,Ottawa,Canadian,Yes


# CSV File is in DataFrame Format

In [50]:
type(df) 

pandas.core.frame.DataFrame

# To identify the column names in the table

In [51]:
df.columns

Index(['Countries Name', 'Capital', 'Language Spoken', 'Yes or No'], dtype='object')

# nrows parameter in read_csv() function:-
# The nrows parameter in the pandas.read_csv() function allows you to specify the number of rows to read from the CSV file.

In [52]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",nrows=1)

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes


In [53]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv", nrows = 2)

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes
1,France,Paris,English,No


# usecols parameter in read_csv() function:-
# The usecols parameter in the pd.read_csv() function allows you to specify which columns from a CSV file you want to read into your DataFrame.

In [54]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv", usecols = [0,1,2])

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,English
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [55]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv", usecols = [1,2])

Unnamed: 0,Capital,Language Spoken
0,Washington DC,English
1,Paris,English
2,Tokyo,Japanese
3,Ottawa,Canadian


In [56]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv")

Unnamed: 0,0,1,2
0,Countries Name,Capital,Language Spoken
1,USA,Washington DC,English
2,France,Paris,French
3,Japan,Tokyo,Japanese
4,Canada,Ottawa,Canadian


# skiprows parameter in read_csv() function:-
# The skiprows parameter in pandas.read_csv() function allows you to specify the number of rows to skip from the top of the CSV file.

In [57]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv",skiprows = 1)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [58]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv",skiprows = [0])

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [59]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv",skiprows = 3)

Unnamed: 0,France,Paris,French
0,Japan,Tokyo,Japanese
1,Canada,Ottawa,Canadian


In [60]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv",skiprows = [0,1,2])

Unnamed: 0,France,Paris,French
0,Japan,Tokyo,Japanese
1,Canada,Ottawa,Canadian


In [61]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv")

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes
1,France,Paris,English,No
2,Japan,Tokyo,Japanese,Yes
3,Canada,Ottawa,Canadian,Yes


# index_col parameter in read_csv() function:-
# The index_col parameter in read_csv() function allows you to specify which column to use as the index for the DataFrame.

In [62]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",index_col = 'Countries Name')

Unnamed: 0_level_0,Capital,Language Spoken,Yes or No
Countries Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
USA,Washington DC,English,Yes
France,Paris,English,No
Japan,Tokyo,Japanese,Yes
Canada,Ottawa,Canadian,Yes


In [63]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",index_col = 0)

Unnamed: 0_level_0,Capital,Language Spoken,Yes or No
Countries Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
USA,Washington DC,English,Yes
France,Paris,English,No
Japan,Tokyo,Japanese,Yes
Canada,Ottawa,Canadian,Yes


In [64]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",index_col = 'Capital')

Unnamed: 0_level_0,Countries Name,Language Spoken,Yes or No
Capital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Washington DC,USA,English,Yes
Paris,France,English,No
Tokyo,Japan,Japanese,Yes
Ottawa,Canada,Canadian,Yes


In [65]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",index_col = 1)

Unnamed: 0_level_0,Countries Name,Language Spoken,Yes or No
Capital,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Washington DC,USA,English,Yes
Paris,France,English,No
Tokyo,Japan,Japanese,Yes
Ottawa,Canada,Canadian,Yes


In [66]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",index_col = 'Language Spoken')

Unnamed: 0_level_0,Countries Name,Capital,Yes or No
Language Spoken,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
English,USA,Washington DC,Yes
English,France,Paris,No
Japanese,Japan,Tokyo,Yes
Canadian,Canada,Ottawa,Yes


In [67]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",index_col = 2)

Unnamed: 0_level_0,Countries Name,Capital,Yes or No
Language Spoken,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
English,USA,Washington DC,Yes
English,France,Paris,No
Japanese,Japan,Tokyo,Yes
Canadian,Canada,Ottawa,Yes


In [68]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv")

Unnamed: 0,0,1,2
0,Countries Name,Capital,Language Spoken
1,USA,Washington DC,English
2,France,Paris,French
3,Japan,Tokyo,Japanese
4,Canada,Ottawa,Canadian


# header parameter in read_csv() function:-
# The header parameter in the pandas.read_csv() function is used to specify which row (or rows) in your CSV file should be used as the header (column names) for the resulting DataFrame.

In [69]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv",header = 1)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [70]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book3.csv",header = 1)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [71]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book4.csv")

Unnamed: 0.1,Unnamed: 0,Data,Unnamed: 2
0,0,1,2
1,Countries Name,Capital,Language Spoken
2,USA,Washington DC,English
3,France,Paris,French
4,Japan,Tokyo,Japanese
5,Canada,Ottawa,Canadian


In [72]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book4.csv",header = 2)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [73]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book5.csv")

Unnamed: 0,USA,Washington DC,English
0,France,Paris,French
1,Japan,Tokyo,Japanese
2,Canada,Ottawa,Canadian


In [74]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book5.csv",header = None)

Unnamed: 0,0,1,2
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


# prefix parameter in read_csv() function:-
# The prefix parameter in the pandas.read_csv() function is used to add a prefix to the column names when the CSV file does not have a header row.

In [75]:
#pd.read_csv("C:\\Users\\User\\Downloads\\Book5.csv",header = None, prefix = 'Columns')

# names parameter in read_csv() function:-
# The names parameter in the pd.read_csv() function is used to specify the column names for the DataFrame that is created.

In [76]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book5.csv",names = ['Country Name','Capital','Language Spoken'])

Unnamed: 0,Country Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,French
2,Japan,Tokyo,Japanese
3,Canada,Ottawa,Canadian


In [77]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv")

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes
1,France,Paris,English,No
2,Japan,Tokyo,Japanese,Yes
3,Canada,Ottawa,Canadian,Yes


# In pandas, the head() function is used to return the first few rows of a DataFrame or Series. By default, it returns first 5 rows, if a number is not specified.

In [78]:
pd.read_csv("C:\\Users\\User\\Downloads\\fr.1.csv").head()

Unnamed: 0,Round,Date,Team 1,FT,Team 2
0,Journée 1,Fri Aug 21 2020,Girondins de Bordeaux,0-0,FC Nantes
1,Journée 1,Sat Aug 22 2020,Dijon FCO,0-1,Angers SCO
2,Journée 1,Sat Aug 22 2020,Lille OSC,1-1,Stade Rennais FC
3,Journée 1,Sun Aug 23 2020,AS Monaco,2-2,Stade de Reims
4,Journée 1,Sun Aug 23 2020,FC Lorient,3-1,RC Strasbourg


In [79]:
pd.read_csv("C:\\Users\\User\\Downloads\\fr.1.csv").head(1)

Unnamed: 0,Round,Date,Team 1,FT,Team 2
0,Journée 1,Fri Aug 21 2020,Girondins de Bordeaux,0-0,FC Nantes


In [80]:
pd.read_csv("C:\\Users\\User\\Downloads\\fr.1.csv").head(6)

Unnamed: 0,Round,Date,Team 1,FT,Team 2
0,Journée 1,Fri Aug 21 2020,Girondins de Bordeaux,0-0,FC Nantes
1,Journée 1,Sat Aug 22 2020,Dijon FCO,0-1,Angers SCO
2,Journée 1,Sat Aug 22 2020,Lille OSC,1-1,Stade Rennais FC
3,Journée 1,Sun Aug 23 2020,AS Monaco,2-2,Stade de Reims
4,Journée 1,Sun Aug 23 2020,FC Lorient,3-1,RC Strasbourg
5,Journée 1,Sun Aug 23 2020,Nîmes Olympique,4-0,Stade Brestois 29


# In pandas, the tail() function is used to return the last few rows of a DataFrame or Series. By default, it returns last 5 rows, if a number is not specified.

In [81]:
pd.read_csv("C:\\Users\\User\\Downloads\\fr.1.csv").tail()

Unnamed: 0,Round,Date,Team 1,FT,Team 2
375,Journée 38,Sun May 23 2021,FC Nantes,,Montpellier HSC
376,Journée 38,Sun May 23 2021,Stade de Reims,,Girondins de Bordeaux
377,Journée 38,Sun May 23 2021,Stade Rennais FC,,Nîmes Olympique
378,Journée 38,Sun May 23 2021,AS Saint-Étienne,,Dijon FCO
379,Journée 38,Sun May 23 2021,RC Strasbourg,,FC Lorient


In [82]:
pd.read_csv("C:\\Users\\User\\Downloads\\fr.1.csv").tail(1)

Unnamed: 0,Round,Date,Team 1,FT,Team 2
379,Journée 38,Sun May 23 2021,RC Strasbourg,,FC Lorient


In [83]:
pd.read_csv("C:\\Users\\User\\Downloads\\fr.1.csv").tail(5)

Unnamed: 0,Round,Date,Team 1,FT,Team 2
375,Journée 38,Sun May 23 2021,FC Nantes,,Montpellier HSC
376,Journée 38,Sun May 23 2021,Stade de Reims,,Girondins de Bordeaux
377,Journée 38,Sun May 23 2021,Stade Rennais FC,,Nîmes Olympique
378,Journée 38,Sun May 23 2021,AS Saint-Étienne,,Dijon FCO
379,Journée 38,Sun May 23 2021,RC Strasbourg,,FC Lorient


In [84]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book1.csv")

Unnamed: 0,Seat No,Student Name,Age,Percentage
0,101,Sanket Shivde,22,82.85
1,102,Ayush Jain,22,81.38


# dtype parameter in read_csv() function:-
# The dtype parameter in the read_csv() function in pandas allows you to specify the data type of each column in the CSV file.

In [85]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book1.csv",dtype = {"Seat No":"float64","Age":"float64"})

Unnamed: 0,Seat No,Student Name,Age,Percentage
0,101.0,Sanket Shivde,22.0,82.85
1,102.0,Ayush Jain,22.0,81.38


In [86]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv")

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,Yes
1,France,Paris,English,No
2,Japan,Tokyo,Japanese,Yes
3,Canada,Ottawa,Canadian,Yes


# true_values parameter in read_csv() function:-
# The true_values parameter in the pandas.read_csv() function allows you to specify a list of values that should be interpreted as True when reading the CSV file.
# false_values parameter in read_csv() function:-
# The false_values parameter in the pandas.read_csv() function allows you to specify a list of values that should be interpreted as False when reading the CSV file.

In [87]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book2(1).csv",true_values=['Yes'],false_values=['No'])

Unnamed: 0,Countries Name,Capital,Language Spoken,Yes or No
0,USA,Washington DC,English,True
1,France,Paris,English,False
2,Japan,Tokyo,Japanese,True
3,Canada,Ottawa,Canadian,True


In [88]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv")

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,no values,3581655,28748,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,not available,57794,199,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,not available,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,not available,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


# na_values parameter in read_csv() function:-
# The na_values parameter in the pandas.read_csv() function allows you to specify a list of values that should be treated as missing values (NaN) when reading in a CSV file.

In [89]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv",na_values ='not available') 

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500.0,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,no values,3581655,28748.0,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740.0,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,,57794,199.0,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468.0,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700.0,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800.0,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


In [90]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv",na_values = ['not available','no values'])

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500.0,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,,3581655,28748.0,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740.0,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,,57794,199.0,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468.0,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700.0,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800.0,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


In [91]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv")

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,no values,3581655,28748,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,not available,57794,199,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,not available,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,not available,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


In [92]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv",na_values={'Region':'no values','Area':'not available'})

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500.0,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,,3581655,28748.0,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740.0,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,not available,57794,199.0,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468.0,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700.0,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800.0,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


# keep_default_na parameter in read_csv() function:-
# The keep_default_na parameter in the pandas.read_csv() function controls how missing values are handled during the reading process.

In [93]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv",keep_default_na = False)

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,no values,3581655,28748,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,not available,57794,199,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,not available,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,not available,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


In [94]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv",keep_default_na = True)

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,no values,3581655,28748,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,not available,57794,199,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,not available,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,not available,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


# na_filter parameter in read_csv() function:-
# The na_filter parameter in the pandas.read_csv() function controls whether pandas should attempt to convert empty cells to NaN values.

In [95]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 1.csv",na_filter = False)

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,WESTERN EUROPE,3581655,28748,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,WESTERN EUROPE,32930091,2381740,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,WESTERN EUROPE,57794,199,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,1246700,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,1246700,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,1246700,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


In [96]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 1.csv",na_filter = True)

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,WESTERN EUROPE,3581655,28748,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,WESTERN EUROPE,32930091,2381740,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,WESTERN EUROPE,57794,199,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,WESTERN EUROPE,71201,468,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,1246700,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,1246700,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,1246700,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,1246700,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


In [97]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv")

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,Afghanistan,ASIA (EX. NEAR EAST),31056997,647500.0,480,0,2306,16307,700,360,32,1213,22,8765,1.0,466,2034,38.0,24.0,38.0
1,Albania,,3581655,28748.0,1246,126,-493,2152,4500,865,712,2109,442,7449,3.0,1511,522,232.0,188.0,579.0
2,Algeria,,32930091,2381740.0,138,4,-39,31,6000,700,781,322,25,9653,1.0,1714,461,101.0,6.0,298.0
3,American Samoa,,57794,199.0,2904,5829,-2071,927,8000,970,2595,10,15,75,2.0,2246,327,,,
4,Andorra,,71201,468.0,1521,0,66,405,19000,1000,4972,222,0,9778,3.0,871,625,,,
5,Angola,SUB-SAHARAN AFRICA,12127071,,97,13,0,19119,1900,420,78,241,24,9735,,4511,242,96.0,658.0,246.0
6,Anguilla,LATIN AMER. & CARIB,13477,,1321,5980,1076,2103,8600,950,4600,0,0,100,2.0,1417,534,4.0,18.0,78.0
7,Antigua & Barbuda,LATIN AMER. & CARIB,69108,,1560,3454,-615,1946,11000,890,5499,1818,455,7727,2.0,1693,537,38.0,22.0,743.0
8,Argentina,LATIN AMER. & CARIB,39921833,,144,18,61,1518,11200,971,2204,1231,48,8721,3.0,1673,755,95.0,358.0,547.0
9,Armenia,C.W. OF IND. STATES,2976372,29800.0,999,0,-647,2328,3500,986,1957,1755,23,8015,4.0,1207,823,239.0,343.0,418.0


# In pandas, isnull() is a function (and also a method of DataFrames and Series) used to detect missing values in your data. It returns a boolean mask where True indicates a missing value and False indicates a non-missing value.

In [98]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world.csv").isnull()

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True
5,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [99]:
df = pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv").isnull()

# To count the total number of null values in each column of a DataFrame

In [100]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv").isnull().sum()

Country             0
Region              4
Population          0
Area                4
Pop. Density        0
Coastline           0
Net migration       0
Infant mortality    0
GDP                 0
Literacy            0
Phones              0
Arable              0
Crops               0
Other               0
Climate             1
Birthrate           0
Deathrate           0
Agriculture         2
Industry            2
Service             2
dtype: int64

# To count the total number of null values in a DataFrame

In [101]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv").isnull().sum().sum()

np.int64(15)

# In pandas, notnull() is a function (and also a method of DataFrames and Series) used to detect non-missing values in your data. It returns a boolean mask where True indicates a non-missing value and False indicates a missing value.

In [102]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv").notnull()

Unnamed: 0,Country,Region,Population,Area,Pop. Density,Coastline,Net migration,Infant mortality,GDP,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
0,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
1,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
3,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False
4,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False
5,True,True,True,False,True,True,True,True,True,True,True,True,True,True,False,True,True,True,True,True
6,True,True,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
7,True,True,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
8,True,True,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
9,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True


# To count the total number of notnull values in each column of a DataFrame

In [103]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv").notnull().sum()

Country             19
Region              15
Population          19
Area                15
Pop. Density        19
Coastline           19
Net migration       19
Infant mortality    19
GDP                 19
Literacy            19
Phones              19
Arable              19
Crops               19
Other               19
Climate             18
Birthrate           19
Deathrate           19
Agriculture         17
Industry            17
Service             17
dtype: int64

# To count the total number of notnull values in a DataFrame

In [104]:
pd.read_csv("C:\\Users\\User\\Downloads\\countries of the world 2.csv").notnull().sum().sum()

np.int64(365)

In [105]:
import numpy as np
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50])

0     1.0
1     2.0
2     3.0
3     NaN
4    10.0
5    30.0
6    40.0
7     NaN
8    50.0
dtype: float64

In [106]:
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50]).isnull()

0    False
1    False
2    False
3     True
4    False
5    False
6    False
7     True
8    False
dtype: bool

# To count the total number of null values in each column of a Series

In [107]:
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50]).isnull().sum()

np.int64(2)

# To count the total number of null values in a Series

In [108]:
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50]).isnull().sum().sum()

np.int64(2)

In [109]:
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50]).notnull()

0     True
1     True
2     True
3    False
4     True
5     True
6     True
7    False
8     True
dtype: bool

# To count the total number of notnull values in each column of a Series

In [110]:
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50]).notnull().sum()

np.int64(7)

# To count the total number of notnull values in a Series

In [111]:
pd.Series([1,2,3,np.nan,10,30,40,np.nan,50]).notnull().sum().sum()

np.int64(7)

In [112]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [113]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# In Pandas, the dropna() function is used to remove rows or columns containing missing values (NaN) from a DataFrame. 

In [114]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").dropna()

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English


In [115]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# Parameters in dropna():-

# axis:
# 0 or 'index': Drop rows with missing values.
# 1 or 'columns': Drop columns with missing values.

In [116]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").dropna(axis=1)

Unnamed: 0,Countries Name,Capital
0,USA,Washington DC
1,France,Paris
2,Japan,Tokyo
3,Canada,Ottawa


# how parameter in drop_na() function consists of two inputs which are 'any' and 'all'. 
# how:
# 'any': Drop the row/column if any value is missing. 
# 'all': Drop the row/column only if all values are missing.

In [117]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").dropna(how = 'any')

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English


In [118]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").dropna(axis = 1,how = 'any')

Unnamed: 0,Countries Name,Capital
0,USA,Washington DC
1,France,Paris
2,Japan,Tokyo
3,Canada,Ottawa


In [119]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").dropna(how = 'all')

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [120]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [121]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").dropna(axis = 1,how = 'all')

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [122]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [123]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(how = 'all')

Unnamed: 0,Countries Name,Capital,Language Spoken
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [124]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(axis = 1,how = 'all')

Unnamed: 0,Countries Name,Capital
0,,
1,France,Paris
2,Japan,Tokyo
3,Canada,Ottawa


# thresh:
# An integer value specifying the minimum number of non-NA values required to keep the row/column.

In [125]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [126]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(thresh=1)

Unnamed: 0,Countries Name,Capital,Language Spoken
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [127]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(thresh=4)

Unnamed: 0,Countries Name,Capital,Language Spoken


In [128]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(axis=1,thresh=1)

Unnamed: 0,Countries Name,Capital
0,,
1,France,Paris
2,Japan,Tokyo
3,Canada,Ottawa


In [129]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(axis=1,thresh=4)

0
1
2
3


In [130]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# subset:
# A list of column labels to consider for missing values.

In [131]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(subset = 'Countries Name')

Unnamed: 0,Countries Name,Capital,Language Spoken
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [132]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(subset = 'Capital')

Unnamed: 0,Countries Name,Capital,Language Spoken
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [133]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").dropna(subset = 'Language Spoken')

Unnamed: 0,Countries Name,Capital,Language Spoken


In [134]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# inplace:
# If True, modifies the DataFrame directly. If False (default), returns a new DataFrame.

In [135]:
df = pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")
df.dropna(inplace=False)
df

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [136]:
df = pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")
df.dropna(inplace=True)
df

Unnamed: 0,Countries Name,Capital,Language Spoken


In [137]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# In Pandas, the fillna() method is used to fill missing values (represented by NaN) in a DataFrame or Series with specified values.

# Parameters in fillna():-

# value: The value to use to fill the missing data. This can be a scalar value, a dictionary, or a Series.

In [138]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna(0)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,0,0,0.0
1,France,Paris,0.0
2,Japan,Tokyo,0.0
3,Canada,Ottawa,0.0


In [139]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna(2)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,2,2,2.0
1,France,Paris,2.0
2,Japan,Tokyo,2.0
3,Canada,Ottawa,2.0


In [140]:
 pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna({'Countries Name':'None','Capital':1,'Language Spoken':0})

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,1,0.0
1,France,Paris,0.0
2,Japan,Tokyo,0.0
3,Canada,Ottawa,0.0


In [141]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# method: The method to use for filling missing values. Options are 'ffill' (forward fill), 'bfill' (backward fill), or None (default, which uses the value parameter).

In [142]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill')


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,English
2,Japan,Tokyo,English
3,Canada,Ottawa,English


In [143]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='pad')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='pad')


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,English
2,Japan,Tokyo,English
3,Canada,Ottawa,English


In [144]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna(method='bfill')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna(method='bfill')


Unnamed: 0,Countries Name,Capital,Language Spoken
0,France,Paris,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# axis: The axis along which to fill missing values (0 for rows, 1 for columns).

In [145]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',axis=0)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',axis=0)


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,English
2,Japan,Tokyo,English
3,Canada,Ottawa,English


In [146]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',axis=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',axis=1)


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,Paris
2,Japan,Tokyo,Tokyo
3,Canada,Ottawa,Ottawa


In [147]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book8.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,,English
1,France,,English
2,Japan,,Japanese
3,Canada,,Canadian


In [148]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book8.csv").fillna(method='bfill',axis=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book8.csv").fillna(method='bfill',axis=1)


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,English,English
1,France,English,English
2,Japan,Japanese,Japanese
3,Canada,Canadian,Canadian


In [149]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv")

Unnamed: 0,Countries Name,Capital,Language Spoken
0,,,
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


# limit: The maximum number of consecutive missing values to fill.

In [150]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna(0,limit=1)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,0,0,0.0
1,France,Paris,
2,Japan,Tokyo,
3,Canada,Ottawa,


In [151]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book7.csv").fillna(0,limit=2)

Unnamed: 0,Countries Name,Capital,Language Spoken
0,0,0,0.0
1,France,Paris,0.0
2,Japan,Tokyo,
3,Canada,Ottawa,


In [152]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',limit=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',limit=1)


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,English
2,Japan,Tokyo,
3,Canada,Ottawa,


In [153]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',limit=2)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv").fillna(method='ffill',limit=2)


Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,English
2,Japan,Tokyo,English
3,Canada,Ottawa,


In [154]:
dataframe = pd.read_csv("C:\\Users\\User\\Downloads\\Book6.csv")

# inplace: If True, the DataFrame is modified in place. If False (default), a new DataFrame is returned.

In [155]:
dataframe.fillna(1,inplace=True)
dataframe

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,1
2,Japan,Tokyo,1
3,Canada,Ottawa,1


In [156]:
dataframe

Unnamed: 0,Countries Name,Capital,Language Spoken
0,USA,Washington DC,English
1,France,Paris,1
2,Japan,Tokyo,1
3,Canada,Ottawa,1


In [157]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv")

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


# In Pandas, the replace() method can be used to replace values in a DataFrame or Series.

# Parameters in replace():-

# to_replace:
# The value or values to be replaced. Can be a single value, a list, a dictionary, or a regular expression.

# value:
# The new value to replace with. Can be a single value or a dictionary.

In [158]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace(to_replace='DTDC',value='Carrier')

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,Carrier
2,3,Polycab Ceiling Fan,Carrier


In [159]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('DTDC','Carrier')

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,Carrier
2,3,Polycab Ceiling Fan,Carrier


In [160]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace(to_replace=1,value=101)

Unnamed: 0,ID,Product Name,Shipper Name
0,101,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


In [161]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace(to_replace=[1,2,3],value=0)

Unnamed: 0,ID,Product Name,Shipper Name
0,0,Croma Stand Fan,FedEx
1,0,Daikin AC 1 tonne,DTDC
2,0,Polycab Ceiling Fan,DTDC


In [162]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace([1,2,3],0)

Unnamed: 0,ID,Product Name,Shipper Name
0,0,Croma Stand Fan,FedEx
1,0,Daikin AC 1 tonne,DTDC
2,0,Polycab Ceiling Fan,DTDC


In [163]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace(to_replace=[1,2,3],value=[101,102,103])

Unnamed: 0,ID,Product Name,Shipper Name
0,101,Croma Stand Fan,FedEx
1,102,Daikin AC 1 tonne,DTDC
2,103,Polycab Ceiling Fan,DTDC


In [164]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace([1,2,3],[101,102,103])

Unnamed: 0,ID,Product Name,Shipper Name
0,101,Croma Stand Fan,FedEx
1,102,Daikin AC 1 tonne,DTDC
2,103,Polycab Ceiling Fan,DTDC


In [165]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace({'Shipper Name':'DTDC'},'none')

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,none
2,3,Polycab Ceiling Fan,none


In [166]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace({'Shipper Name':'DTDC','Product Name':'Daikin AC 1 tonne'},'none')

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,none,none
2,3,Polycab Ceiling Fan,none


In [167]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv")

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


# regex: If True, to_replace is interpreted as a regular expression.

In [168]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('[A-Za-z]',0,regex=True)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('[A-Za-z]',0,regex=True)


Unnamed: 0,ID,Product Name,Shipper Name
0,1,0,0
1,2,0,0
2,3,0,0


In [169]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace({'Shipper Name':'[A-Za-z]'},0,regex=True)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace({'Shipper Name':'[A-Za-z]'},0,regex=True)


Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,0
1,2,Daikin AC 1 tonne,0
2,3,Polycab Ceiling Fan,0


In [170]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv")

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


# method: It is used for forward or backward filling if to_replace is a list and value is None.

In [171]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('DTDC',method='ffill')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('DTDC',method='ffill')


Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,FedEx
2,3,Polycab Ceiling Fan,FedEx


In [172]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv")

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


In [173]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('FedEx',method='bfill')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace('FedEx',method='bfill')


Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,DTDC
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


In [174]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv")

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


# limit: The maximum number of replacements to make in each column.

In [175]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace("DTDC",method='ffill',limit=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace("DTDC",method='ffill',limit=1)


Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,FedEx
2,3,Polycab Ceiling Fan,DTDC


In [176]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace("FedEx",method='bfill',limit=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv").replace("FedEx",method='bfill',limit=1)


Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,DTDC
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


In [177]:
data = pd.read_csv("C:\\Users\\User\\Downloads\\Book9.csv")
data

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,DTDC
2,3,Polycab Ceiling Fan,DTDC


# inplace: If True, the DataFrame is modified in place. If False (default), a new DataFrame is returned.

In [178]:
data.replace('DTDC','FedEx',inplace=True)
data

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,FedEx
2,3,Polycab Ceiling Fan,FedEx


In [179]:
data

Unnamed: 0,ID,Product Name,Shipper Name
0,1,Croma Stand Fan,FedEx
1,2,Daikin AC 1 tonne,FedEx
2,3,Polycab Ceiling Fan,FedEx


In [180]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv")

Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,,Bullet Train,600.0
2,3.0,,
3,4.0,Express Train,200.0


# In Pandas, interpolate() function is basically used to fill NaN values in dataframe or series.

In [181]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate()
df.interpolate()

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate()


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


# Parameters in interpolate():-

# Method: The method parameter specifies the interpolation technique to use. The default is 'linear', which performs linear interpolation between the known values.

In [182]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='linear')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='linear')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


In [183]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='index')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='index')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


In [184]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='nearest')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='nearest')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,1.0,Bullet Train,600.0
2,3.0,,600.0
3,4.0,Express Train,200.0


In [185]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='polynomial',order=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='polynomial',order=1)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


In [186]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='polynomial',order=2)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='polynomial',order=2)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,266.666667
3,4.0,Express Train,200.0


In [187]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='spline',order=2)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(method='spline',order=2)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,266.666667
3,4.0,Express Train,200.0


In [188]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv")

Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,,Bullet Train,600.0
2,3.0,,
3,4.0,Express Train,200.0


In [189]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").dtypes

Rank             float64
Type of Train     object
Velocity         float64
dtype: object

# axis: The axis along which to interpolate (0 for columns, 1 for rows). Default is 0.

In [190]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(axis=0)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(axis=0)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


# limit: The limit parameter specifies the maximum number of consecutive NaN values to fill.

In [191]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(limit=1)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(limit=1)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


In [192]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(limit=2)

  pd.read_csv("C:\\Users\\User\\Downloads\\Book10.csv").interpolate(limit=2)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,600.0
2,3.0,,400.0
3,4.0,Express Train,200.0


In [193]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv")

Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,,Bullet Train,
2,,Maglev Train,
3,4.0,Express Train,200.0


# Limit Direction: The limit_direction parameter determines the direction in which to fill the NaN values. It can be 'forward', 'backward', or 'both'.

In [194]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit=1,limit_direction='backward')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit=1,limit_direction='backward')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,,Bullet Train,
2,3.0,Maglev Train,533.333333
3,4.0,Express Train,200.0


In [195]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit=2,limit_direction='backward')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit=2,limit_direction='backward')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,866.666667
2,3.0,Maglev Train,533.333333
3,4.0,Express Train,200.0


In [196]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv")

Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,,Bullet Train,
2,,Maglev Train,
3,4.0,Express Train,200.0


In [197]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit=1,limit_direction="both")

  pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit=1,limit_direction="both")


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,866.666667
2,3.0,Maglev Train,533.333333
3,4.0,Express Train,200.0


# limit_area: If limit is specified, consecutive NaNs will be filled with this restriction.

# inside: Only fill NaNs surrounded by valid values.

In [None]:
df.interpolate(limit_area='inside')

In [199]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit_area='inside')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit_area='inside')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,866.666667
2,3.0,Maglev Train,533.333333
3,4.0,Express Train,200.0


# outside: Only fill NaNs outside valid values.

In [200]:
pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit_area='outside')

  pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv").interpolate(limit_area='outside')


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,,Bullet Train,
2,,Maglev Train,
3,4.0,Express Train,200.0


# inplace: If True, the DataFrame is modified in place. If False (default), a new DataFrame is returned.

In [201]:
df = pd.read_csv("C:\\Users\\User\\Downloads\\Book11.csv")
df.interpolate(inplace=True)
df

  df.interpolate(inplace=True)


Unnamed: 0,Rank,Type of Train,Velocity
0,1.0,Aerospace Train,1200.0
1,2.0,Bullet Train,866.666667
2,3.0,Maglev Train,533.333333
3,4.0,Express Train,200.0
