## Pandas
* Pandas is a data-manipulation library.

### Importing pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
type(pd)

module

## Series (1-d)

### Creating a Series

In [3]:
# creating a list:
animals = ["Dog", "Lion", "Tiger", "Fox", "Elephant", 3, np.nan]

# creating a Series using list:
animals_series = pd.Series(data=animals)
animals_series

0         Dog
1        Lion
2       Tiger
3         Fox
4    Elephant
5           3
6         NaN
dtype: object

#### Shape of the series

In [4]:
# Shape:
animals_series.shape

(7,)

#### Datatype of the series

In [5]:
# Datatype:
animals_series.dtypes

dtype('O')

#### Null values

In [6]:
animals_series.isna()

0    False
1    False
2    False
3    False
4    False
5    False
6     True
dtype: bool

In [7]:
# values:
animals_series.values

array(['Dog', 'Lion', 'Tiger', 'Fox', 'Elephant', 3, nan], dtype=object)

In [8]:
# index:
animals_series.index

RangeIndex(start=0, stop=7, step=1)

In [9]:
# how many null values in the series:
print(animals_series.isna().sum())

1


#### Describe the series

In [10]:
animals_series.describe()

count       6
unique      6
top       Dog
freq        1
dtype: object

### Creating Series with custom index

In [11]:
days = ["mon", "tue", "wed", "thu", "fri"]
breakfast = ["pongal", "chappathi", "idly", "dosai", "pongal"]

breakfast_series = pd.Series(index=days, data=breakfast)
breakfast_series

mon       pongal
tue    chappathi
wed         idly
thu        dosai
fri       pongal
dtype: object

#### Adding a string to all the data

In [12]:
breakfast_series_1 = breakfast_series + " Panimalar"
breakfast_series_1

mon       pongal Panimalar
tue    chappathi Panimalar
wed         idly Panimalar
thu        dosai Panimalar
fri       pongal Panimalar
dtype: object

#### Adding a list of strings to the data correspondingly

In [13]:
sidemenu = ["sambar", "kurma", "sambar", "sambar", "sambar"]

breakfast_series_2 = breakfast_series + " " + sidemenu
breakfast_series_2

mon      pongal sambar
tue    chappathi kurma
wed        idly sambar
thu       dosai sambar
fri      pongal sambar
dtype: object

### Creating a series using dictionary

In [14]:
sports = {"Football":"Spain", "NBA":"USA", "Cricket":"India", "Kabadi":"Bangladesh", 0:"Pakistan"}
sports_series = pd.Series(data=sports)
sports_series

Football         Spain
NBA                USA
Cricket          India
Kabadi      Bangladesh
0             Pakistan
dtype: object

In [15]:
# index of the series:
sports_series.index

Index(['Football', 'NBA', 'Cricket', 'Kabadi', 0], dtype='object')

In [16]:
# values of the series:
sports_series.values

array(['Spain', 'USA', 'India', 'Bangladesh', 'Pakistan'], dtype=object)

### Data slicing (loc & iloc)

In [17]:
# index position:
sports_series.iloc[0]

'Spain'

In [18]:
sports_series.iloc[1:4]

NBA               USA
Cricket         India
Kabadi     Bangladesh
dtype: object

In [19]:
# index name:
sports_series.loc[0]

'Pakistan'

In [20]:
sports_series.loc["NBA":0]

NBA               USA
Cricket         India
Kabadi     Bangladesh
0            Pakistan
dtype: object

## DataFrame (2-d)

### Creating a dataframe:

In [21]:
meals = [
    ["pongal", "sambar_rice"],
    ["chappathi", "chicken_rice"],
    ["idly", "briyani"],
    ["dosai", "egg_rice"],
    ["pongal", "rasam_rice"]
]

meals_index = ["mon", "tue", "wed", "thu", "fri"]
meals_columns = ["breakfast", "lunch"]

panimalar_df = pd.DataFrame(data=meals, index=meals_index, columns=meals_columns)
panimalar_df

Unnamed: 0,breakfast,lunch
mon,pongal,sambar_rice
tue,chappathi,chicken_rice
wed,idly,briyani
thu,dosai,egg_rice
fri,pongal,rasam_rice


### Creating a new column

In [22]:
snacks = ["coffee_cake", "tea_biscut", "coffee_cake", "tea_biscut", "coffee_biscut"]

# creating a new column:
panimalar_df["snacks"] = snacks
panimalar_df

Unnamed: 0,breakfast,lunch,snacks
mon,pongal,sambar_rice,coffee_cake
tue,chappathi,chicken_rice,tea_biscut
wed,idly,briyani,coffee_cake
thu,dosai,egg_rice,tea_biscut
fri,pongal,rasam_rice,coffee_biscut


### Creating a dataframe using dictionary

In [23]:
dic = {"mon":["pongal", "rice", "coffee"],
       "tue":["chappathi", "chicken", "tea"],
       "wed":["idly", "briyani", "coffee"],
       "thu":["dosai", "egg", "tea"],
       "fri":["pongal", "sambar", "coffee"],
       "sat":["idly", "rasam", "tea"]}

index = ["breakfast", "lunch", "snacks"]

df2 = pd.DataFrame(data=dic, index=index)
df2

Unnamed: 0,mon,tue,wed,thu,fri,sat
breakfast,pongal,chappathi,idly,dosai,pongal,idly
lunch,rice,chicken,briyani,egg,sambar,rasam
snacks,coffee,tea,coffee,tea,coffee,tea


In [24]:
# transpose the data:
df3 = df2.T
df3

Unnamed: 0,breakfast,lunch,snacks
mon,pongal,rice,coffee
tue,chappathi,chicken,tea
wed,idly,briyani,coffee
thu,dosai,egg,tea
fri,pongal,sambar,coffee
sat,idly,rasam,tea


### Importing a csv file:

In [25]:
# reading a csv file:
data = pd.read_csv("D:/03. Python/datasets/Automobile_data.csv", delimiter=",")

# top n rows:
data.head(3)

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500


In [26]:
# bottom n rows:
data.tail(3)

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
202,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485
203,-1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.1,...,145,idi,3.01,3.4,23.0,106,4800,26,27,22470
204,-1,95,volvo,gas,turbo,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,9.5,114,5400,19,25,22625


### Shape of the data

In [27]:
# shape of the dataframe:
data.shape

(205, 26)

### Names of the column

In [28]:
# columns:
data.columns

Index(['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',
       'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',
       'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type',
       'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke',
       'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg',
       'highway-mpg', 'price'],
      dtype='object')

### Index names

In [29]:
# index:
data.index

RangeIndex(start=0, stop=205, step=1)

### Size of the data

In [30]:
# size:
data.size

5330

### Basic information about the data

In [31]:
# information:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          205 non-null    int64  
 1   normalized-losses  205 non-null    object 
 2   make               205 non-null    object 
 3   fuel-type          205 non-null    object 
 4   aspiration         205 non-null    object 
 5   num-of-doors       205 non-null    object 
 6   body-style         205 non-null    object 
 7   drive-wheels       205 non-null    object 
 8   engine-location    205 non-null    object 
 9   wheel-base         205 non-null    float64
 10  length             205 non-null    float64
 11  width              205 non-null    float64
 12  height             205 non-null    float64
 13  curb-weight        205 non-null    int64  
 14  engine-type        205 non-null    object 
 15  num-of-cylinders   205 non-null    object 
 16  engine-size        205 non

### Accessing columns

In [32]:
# taking a single column:
data[["length"]]

Unnamed: 0,length
0,168.8
1,168.8
2,171.2
3,176.6
4,176.6
...,...
200,188.8
201,188.8
202,188.8
203,188.8


In [33]:
# taking multiple columns:
data[['length', 'width', 'height']]

Unnamed: 0,length,width,height
0,168.8,64.1,48.8
1,168.8,64.1,48.8
2,171.2,65.5,52.4
3,176.6,66.2,54.3
4,176.6,66.4,54.3
...,...,...,...
200,188.8,68.9,55.5
201,188.8,68.8,55.5
202,188.8,68.9,55.5
203,188.8,68.9,55.5


### Datatypes of columns

In [34]:
# datatypes of each column:
data.dtypes

symboling              int64
normalized-losses     object
make                  object
fuel-type             object
aspiration            object
num-of-doors          object
body-style            object
drive-wheels          object
engine-location       object
wheel-base           float64
length               float64
width                float64
height               float64
curb-weight            int64
engine-type           object
num-of-cylinders      object
engine-size            int64
fuel-system           object
bore                  object
stroke                object
compression-ratio    float64
horsepower            object
peak-rpm              object
city-mpg               int64
highway-mpg            int64
price                 object
dtype: object

In [35]:
# datatype of a particular column:
data["highway-mpg"].dtype

dtype('int64')

In [36]:
# datatypes of multiple column:
data[['length', 'width', 'height']].dtypes

length    float64
width     float64
height    float64
dtype: object

### Splitting columns into numeric and categoric:

In [37]:
num_col = [] #->int and float
cate_col = [] #->string(object)

for column in data.columns:
    if data[column].dtype=="object":
        cate_col.append(column)
    else:
        num_col.append(column)

print(f"Numeric columns: {num_col}\nCategoric columns: {cate_col}")

Numeric columns: ['symboling', 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-size', 'compression-ratio', 'city-mpg', 'highway-mpg']
Categoric columns: ['normalized-losses', 'make', 'fuel-type', 'aspiration', 'num-of-doors', 'body-style', 'drive-wheels', 'engine-location', 'engine-type', 'num-of-cylinders', 'fuel-system', 'bore', 'stroke', 'horsepower', 'peak-rpm', 'price']


In [38]:
# create two dataframe:

# num_df:
num_df = data[num_col]

# cate_df:
cate_df = data[cate_col]

In [39]:
num_df

Unnamed: 0,symboling,wheel-base,length,width,height,curb-weight,engine-size,compression-ratio,city-mpg,highway-mpg
0,3,88.6,168.8,64.1,48.8,2548,130,9.0,21,27
1,3,88.6,168.8,64.1,48.8,2548,130,9.0,21,27
2,1,94.5,171.2,65.5,52.4,2823,152,9.0,19,26
3,2,99.8,176.6,66.2,54.3,2337,109,10.0,24,30
4,2,99.4,176.6,66.4,54.3,2824,136,8.0,18,22
...,...,...,...,...,...,...,...,...,...,...
200,-1,109.1,188.8,68.9,55.5,2952,141,9.5,23,28
201,-1,109.1,188.8,68.8,55.5,3049,141,8.7,19,25
202,-1,109.1,188.8,68.9,55.5,3012,173,8.8,18,23
203,-1,109.1,188.8,68.9,55.5,3217,145,23.0,26,27


### describe() method

In [40]:
num_df.describe()

Unnamed: 0,symboling,wheel-base,length,width,height,curb-weight,engine-size,compression-ratio,city-mpg,highway-mpg
count,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0
mean,0.834146,98.756585,174.049268,65.907805,53.724878,2555.565854,126.907317,10.142537,25.219512,30.75122
std,1.245307,6.021776,12.337289,2.145204,2.443522,520.680204,41.642693,3.97204,6.542142,6.886443
min,-2.0,86.6,141.1,60.3,47.8,1488.0,61.0,7.0,13.0,16.0
25%,0.0,94.5,166.3,64.1,52.0,2145.0,97.0,8.6,19.0,25.0
50%,1.0,97.0,173.2,65.5,54.1,2414.0,120.0,9.0,24.0,30.0
75%,2.0,102.4,183.1,66.9,55.5,2935.0,141.0,9.4,30.0,34.0
max,3.0,120.9,208.1,72.3,59.8,4066.0,326.0,23.0,49.0,54.0


In [41]:
cate_df.describe()

Unnamed: 0,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,engine-type,num-of-cylinders,fuel-system,bore,stroke,horsepower,peak-rpm,price
count,205,205,205,205,205,205,205,205,205,205,205,205.0,205.0,205,205,205
unique,52,22,2,2,3,5,3,2,7,7,8,39.0,37.0,60,24,187
top,?,toyota,gas,std,four,sedan,fwd,front,ohc,four,mpfi,3.62,3.4,68,5500,?
freq,41,32,185,168,114,96,120,202,148,159,94,23.0,20.0,19,37,4


### Dropping a column and a row

In [42]:
data.head(5)

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
4,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450


#### columns

In [43]:
# dropping a particular column:
data.drop(["normalized-losses"], axis=1, inplace=True)
data.head()

Unnamed: 0,symboling,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,alfa-romero,gas,std,two,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,3,alfa-romero,gas,std,two,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,1,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,171.2,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,2,audi,gas,std,four,sedan,fwd,front,99.8,176.6,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
4,2,audi,gas,std,four,sedan,4wd,front,99.4,176.6,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450


In [44]:
# dropping multiple columns:
data.drop(["symboling","make"], axis=1, inplace=True)
data.head()

Unnamed: 0,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
4,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450


#### rows

In [45]:
# dropping a particular row:
data.drop([0], axis=0, inplace=True)
data.head()

Unnamed: 0,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
1,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
4,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
5,gas,std,two,sedan,fwd,front,99.8,177.3,66.3,53.1,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250


In [46]:
# dropping multiple rows:
data.drop([1,2,3,4,5], axis=0, inplace=True)
data.head()

Unnamed: 0,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
6,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
7,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
8,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
9,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
10,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


### loc and iloc

In [47]:
data.head()

Unnamed: 0,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
6,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
7,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
8,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
9,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
10,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


#### loc[row, col]

In [48]:
data.loc[[6,7,8,9,10], ["fuel-type","aspiration","num-of-doors"]]

Unnamed: 0,fuel-type,aspiration,num-of-doors
6,gas,std,four
7,gas,std,four
8,gas,turbo,four
9,gas,turbo,two
10,gas,std,two


In [49]:
data.loc[7:12, "length":"engine-size"]

Unnamed: 0,length,width,height,curb-weight,engine-type,num-of-cylinders,engine-size
7,192.7,71.4,55.7,2954,ohc,five,136
8,192.7,71.4,55.9,3086,ohc,five,131
9,178.2,67.9,52.0,3053,ohc,five,131
10,176.8,64.8,54.3,2395,ohc,four,108
11,176.8,64.8,54.3,2395,ohc,four,108
12,176.8,64.8,54.3,2710,ohc,six,164


#### iloc[row, col]

In [50]:
data.iloc[0:5, 7:9]

Unnamed: 0,length,width
6,192.7,71.4
7,192.7,71.4
8,192.7,71.4
9,178.2,67.9
10,176.8,64.8


In [51]:
data.iloc[[0,5,9,7,6], [0,8,6,4,5]]

Unnamed: 0,fuel-type,width,wheel-base,drive-wheels,engine-location
6,gas,71.4,105.8,fwd,front
11,gas,64.8,101.2,rwd,front
15,gas,66.9,103.5,rwd,front
13,gas,64.8,101.2,rwd,front
12,gas,64.8,101.2,rwd,front


### Reset index

In [52]:
data.head()

Unnamed: 0,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
6,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
7,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
8,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
9,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
10,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


In [53]:
data.reset_index(drop=True, inplace=True)
data.head()

Unnamed: 0,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
1,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
2,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
3,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
4,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


### Renaming the column and index

#### Column

In [54]:
data.rename(columns={"fuel-type":"FUEL TYPE", "aspiration":"ASPIRATION"}, inplace=True)
data.head()

Unnamed: 0,FUEL TYPE,ASPIRATION,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
1,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
2,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
3,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
4,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


#### index

In [55]:
data.rename(index={0:"first_index", 1:"second_index"}, inplace=True)
data.head()

Unnamed: 0,FUEL TYPE,ASPIRATION,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
first_index,gas,std,four,sedan,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
second_index,gas,std,four,wagon,fwd,front,105.8,192.7,71.4,55.7,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
2,gas,turbo,four,sedan,fwd,front,105.8,192.7,71.4,55.9,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
3,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
4,gas,std,two,sedan,rwd,front,101.2,176.8,64.8,54.3,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


### Check missing values:

In [56]:
data.isna()

Unnamed: 0,FUEL TYPE,ASPIRATION,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
first_index,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
second_index,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
195,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
196,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
197,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [57]:
data.isna().any()

FUEL TYPE            False
ASPIRATION           False
num-of-doors         False
body-style           False
drive-wheels         False
engine-location      False
wheel-base           False
length               False
width                False
height               False
curb-weight          False
engine-type          False
num-of-cylinders     False
engine-size          False
fuel-system          False
bore                 False
stroke               False
compression-ratio    False
horsepower           False
peak-rpm             False
city-mpg             False
highway-mpg          False
price                False
dtype: bool

In [58]:
data.isna().sum()

FUEL TYPE            0
ASPIRATION           0
num-of-doors         0
body-style           0
drive-wheels         0
engine-location      0
wheel-base           0
length               0
width                0
height               0
curb-weight          0
engine-type          0
num-of-cylinders     0
engine-size          0
fuel-system          0
bore                 0
stroke               0
compression-ratio    0
horsepower           0
peak-rpm             0
city-mpg             0
highway-mpg          0
price                0
dtype: int64