# Install Dependencies

In [1]:
import pandas as pd
from path import Path

# Import Data

In [2]:
# Create path and read in data
data = Path("../Resources/mpg.csv")
df = pd.read_csv(data)

# Explore Data

In [3]:
# Preview data
print(df.shape)
df.head()

(398, 9)


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [4]:
# Check data types
df.dtypes

mpg             float64
cylinders         int64
displacement    float64
horsepower       object
weight            int64
acceleration    float64
model year        int64
origin            int64
car name         object
dtype: object

In [5]:
# Explore horsepower field
print("Horsepower data type: ",df['horsepower'].dtype)
print("Horsepower values: ", df['horsepower'].unique())

Horsepower data type:  object
Horsepower values:  ['130' '165' '150' '140' '198' '220' '215' '225' '190' '170' '160' '95'
 '97' '85' '88' '46' '87' '90' '113' '200' '210' '193' '?' '100' '105'
 '175' '153' '180' '110' '72' '86' '70' '76' '65' '69' '60' '80' '54'
 '208' '155' '112' '92' '145' '137' '158' '167' '94' '107' '230' '49' '75'
 '91' '122' '67' '83' '78' '52' '61' '93' '148' '129' '96' '71' '98' '115'
 '53' '81' '79' '120' '152' '102' '108' '68' '58' '149' '89' '63' '48'
 '66' '139' '103' '125' '133' '138' '135' '142' '77' '62' '132' '84' '64'
 '74' '116' '82']


In [6]:
# If our dataset was to large to use unique() we can use the error handler below 
# to determine the first instance of why horsepower was not imported as an integer
try:
    # Try to convert horsepower to an integer
    df['horsepower'].astype(int)
except ValueError as e:
    # Print any ValueError message
    print(e)

invalid literal for int() with base 10: '?'


In [7]:
# Locate error records
df[df['horsepower']=='?']

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
32,25.0,4,98.0,?,2046,19.0,71,1,ford pinto
126,21.0,6,200.0,?,2875,17.0,74,1,ford maverick
330,40.9,4,85.0,?,1835,17.3,80,2,renault lecar deluxe
336,23.6,4,140.0,?,2905,14.3,80,1,ford mustang cobra
354,34.5,4,100.0,?,2320,15.8,81,2,renault 18i
374,23.0,4,151.0,?,3035,20.5,82,1,amc concord dl


In [8]:
# Explore origin field
print("Origin data type: ",df['origin'].dtype)
print("Origin values: ", df['origin'].unique())

Origin data type:  int64
Origin values:  [1 3 2]
