# Pandas Basics


In [2]:
# Pandas is a high-level data manipulation tool developed by Wes McKinney. It is built on the Numpy package and its key 
# data structure is called the DataFrame. DataFrames allow you to store and manipulate tabular data in rows of 
# observations and columns of variables.

In [1]:
dict = {"country": ["Brazil", "Russia", "India", "China", "South Africa"],
       "capital": ["Brasilia", "Moscow", "New Dehli", "Beijing", "Pretoria"],
       "area": [8.516, 17.10, 3.286, 9.597, 1.221],
       "population": [200.4, 143.5, 1252, 1357, 52.98] }

import pandas as pd
brics = pd.DataFrame(dict)
print(brics)

        country    capital    area  population
0        Brazil   Brasilia   8.516      200.40
1        Russia     Moscow  17.100      143.50
2         India  New Dehli   3.286     1252.00
3         China    Beijing   9.597     1357.00
4  South Africa   Pretoria   1.221       52.98


In [2]:
brics

Unnamed: 0,country,capital,area,population
0,Brazil,Brasilia,8.516,200.4
1,Russia,Moscow,17.1,143.5
2,India,New Dehli,3.286,1252.0
3,China,Beijing,9.597,1357.0
4,South Africa,Pretoria,1.221,52.98


In [5]:
# As you can see with the new brics DataFrame, Pandas has assigned a key for each country as the numerical values 0 
# through 4. If you would like to have different index values, say, the two letter country code, you can do that easily 
# as well.

In [3]:
# Set the index for brics

brics.index = ["BR", "RU", "IN", "CH", "SA"]

# Print out brics with new index values
print(brics)
brics

         country    capital    area  population
BR        Brazil   Brasilia   8.516      200.40
RU        Russia     Moscow  17.100      143.50
IN         India  New Dehli   3.286     1252.00
CH         China    Beijing   9.597     1357.00
SA  South Africa   Pretoria   1.221       52.98


Unnamed: 0,country,capital,area,population
BR,Brazil,Brasilia,8.516,200.4
RU,Russia,Moscow,17.1,143.5
IN,India,New Dehli,3.286,1252.0
CH,China,Beijing,9.597,1357.0
SA,South Africa,Pretoria,1.221,52.98


# Create DataFrame by importing .csv files

In [7]:
# Another way to create a DataFrame is by importing a csv file using Pandas. Now, the csv cars.csv is stored and can be 
# imported using pd.read_csv:

In [4]:
# Import pandas as pd
import pandas as pd

# Import the cars.csv data: cars
cars = pd.read_csv('Cars Data1.csv')

# Print out cars
cars.head()

Unnamed: 0,Make,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
0,Acura,MDX,SUV,Asia,All,"$36,945","$33,337",3.5,6.0,265.0,17.0,23.0,4451.0,106.0,189.0
1,Acura,RSX Type S 2dr,Sedan,Asia,Front,"$23,820","$21,761",2.0,4.0,200.0,24.0,31.0,2778.0,101.0,172.0
2,Acura,TSX 4dr,Sedan,Asia,Front,"$26,990","$24,647",2.4,4.0,200.0,22.0,29.0,3230.0,105.0,183.0
3,Acura,TL 4dr,Sedan,Asia,Front,"$33,195","$30,299",3.2,6.0,270.0,20.0,28.0,3575.0,108.0,186.0
4,Acura,3.5 RL 4dr,Sedan,Asia,Front,"$43,755","$39,014",3.5,6.0,225.0,18.0,24.0,3880.0,115.0,197.0


# Indexing DataFrame

In [9]:
# Import pandas and cars.csv
import pandas as pd
cars = pd.read_csv('Cars Data1.csv', index_col = 0)

# Print out country column as Pandas Series
print(cars['Model'])

# Print out country column as Pandas DataFrame
print(cars[['Model']])

# Print out DataFrame with country and drives_right columns
print(cars[['Model', 'Origin']])

Make
Acura                        MDX
Acura             RSX Type S 2dr
Acura                    TSX 4dr
Acura                     TL 4dr
Acura                 3.5 RL 4dr
                  ...           
Volvo    C70 LPT convertible 2dr
Volvo    C70 HPT convertible 2dr
Volvo                 S80 T6 4dr
Volvo                        V40
Volvo                       XC70
Name: Model, Length: 432, dtype: object
                         Model
Make                          
Acura                      MDX
Acura           RSX Type S 2dr
Acura                  TSX 4dr
Acura                   TL 4dr
Acura               3.5 RL 4dr
...                        ...
Volvo  C70 LPT convertible 2dr
Volvo  C70 HPT convertible 2dr
Volvo               S80 T6 4dr
Volvo                      V40
Volvo                     XC70

[432 rows x 1 columns]
                         Model  Origin
Make                                  
Acura                      MDX    Asia
Acura           RSX Type S 2dr    Asia
Acura    

In [10]:
cars = pd.read_csv('Cars Data1.csv', index_col = 0)
cars

Unnamed: 0_level_0,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Acura,MDX,SUV,Asia,All,"$36,945","$33,337",3.5,6.0,265.0,17.0,23.0,4451.0,106.0,189.0
Acura,RSX Type S 2dr,Sedan,Asia,Front,"$23,820","$21,761",2.0,4.0,200.0,24.0,31.0,2778.0,101.0,172.0
Acura,TSX 4dr,Sedan,Asia,Front,"$26,990","$24,647",2.4,4.0,200.0,22.0,29.0,3230.0,105.0,183.0
Acura,TL 4dr,Sedan,Asia,Front,"$33,195","$30,299",3.2,6.0,270.0,20.0,28.0,3575.0,108.0,186.0
Acura,3.5 RL 4dr,Sedan,Asia,Front,"$43,755","$39,014",3.5,6.0,225.0,18.0,24.0,3880.0,115.0,197.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Volvo,C70 LPT convertible 2dr,Sedan,Europe,Front,"$40,565","$38,203",2.4,5.0,197.0,21.0,28.0,3450.0,105.0,186.0
Volvo,C70 HPT convertible 2dr,Sedan,Europe,Front,"$42,565","$40,083",2.3,5.0,242.0,20.0,26.0,3450.0,105.0,186.0
Volvo,S80 T6 4dr,Sedan,Europe,Front,"$45,210","$42,573",2.9,6.0,268.0,19.0,26.0,3653.0,110.0,190.0
Volvo,V40,Wagon,Europe,Front,"$26,135","$24,641",1.9,4.0,170.0,22.0,29.0,2822.0,101.0,180.0


In [11]:
# Print out country column as Pandas Series
print(cars['Model'])

Make
Acura                        MDX
Acura             RSX Type S 2dr
Acura                    TSX 4dr
Acura                     TL 4dr
Acura                 3.5 RL 4dr
                  ...           
Volvo    C70 LPT convertible 2dr
Volvo    C70 HPT convertible 2dr
Volvo                 S80 T6 4dr
Volvo                        V40
Volvo                       XC70
Name: Model, Length: 432, dtype: object


In [12]:
# Print out country column as Pandas Series
cars['Model'].head()

Make
Acura               MDX
Acura    RSX Type S 2dr
Acura           TSX 4dr
Acura            TL 4dr
Acura        3.5 RL 4dr
Name: Model, dtype: object

In [14]:
# Print out country column as Pandas DataFrame
cars[['Model']].head()

Unnamed: 0_level_0,Model
Make,Unnamed: 1_level_1
Acura,MDX
Acura,RSX Type S 2dr
Acura,TSX 4dr
Acura,TL 4dr
Acura,3.5 RL 4dr


In [15]:
# Print out DataFrame with country and drives_right columns
cars[['Model', 'Origin']].head()

Unnamed: 0_level_0,Model,Origin
Make,Unnamed: 1_level_1,Unnamed: 2_level_1
Acura,MDX,Asia
Acura,RSX Type S 2dr,Asia
Acura,TSX 4dr,Asia
Acura,TL 4dr,Asia
Acura,3.5 RL 4dr,Asia


In [16]:
cars.head()

Unnamed: 0_level_0,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Acura,MDX,SUV,Asia,All,"$36,945","$33,337",3.5,6.0,265.0,17.0,23.0,4451.0,106.0,189.0
Acura,RSX Type S 2dr,Sedan,Asia,Front,"$23,820","$21,761",2.0,4.0,200.0,24.0,31.0,2778.0,101.0,172.0
Acura,TSX 4dr,Sedan,Asia,Front,"$26,990","$24,647",2.4,4.0,200.0,22.0,29.0,3230.0,105.0,183.0
Acura,TL 4dr,Sedan,Asia,Front,"$33,195","$30,299",3.2,6.0,270.0,20.0,28.0,3575.0,108.0,186.0
Acura,3.5 RL 4dr,Sedan,Asia,Front,"$43,755","$39,014",3.5,6.0,225.0,18.0,24.0,3880.0,115.0,197.0


In [17]:
# Square brackets can also be used to access observations (rows) from a DataFrame. For example:

# Print out first 4 observations
print(cars[0:4])

# Print out fifth and sixth observation
print(cars[4:6])

                Model   Type Origin DriveTrain      MSRP   Invoice  \
Make                                                                 
Acura             MDX    SUV   Asia        All  $36,945   $33,337    
Acura  RSX Type S 2dr  Sedan   Asia      Front  $23,820   $21,761    
Acura         TSX 4dr  Sedan   Asia      Front  $26,990   $24,647    
Acura          TL 4dr  Sedan   Asia      Front  $33,195   $30,299    

       EngineSize  Cylinders  Horsepower  MPG_City  MPG_Highway  Weight  \
Make                                                                      
Acura         3.5        6.0       265.0      17.0         23.0  4451.0   
Acura         2.0        4.0       200.0      24.0         31.0  2778.0   
Acura         2.4        4.0       200.0      22.0         29.0  3230.0   
Acura         3.2        6.0       270.0      20.0         28.0  3575.0   

       Wheelbase  Length  
Make                      
Acura      106.0   189.0  
Acura      101.0   172.0  
Acura      105.0   1

In [18]:
# Print out first 4 observations
cars[0:4]

Unnamed: 0_level_0,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Acura,MDX,SUV,Asia,All,"$36,945","$33,337",3.5,6.0,265.0,17.0,23.0,4451.0,106.0,189.0
Acura,RSX Type S 2dr,Sedan,Asia,Front,"$23,820","$21,761",2.0,4.0,200.0,24.0,31.0,2778.0,101.0,172.0
Acura,TSX 4dr,Sedan,Asia,Front,"$26,990","$24,647",2.4,4.0,200.0,22.0,29.0,3230.0,105.0,183.0
Acura,TL 4dr,Sedan,Asia,Front,"$33,195","$30,299",3.2,6.0,270.0,20.0,28.0,3575.0,108.0,186.0


In [19]:
# Print out fifth and sixth observation
cars[4:6]

Unnamed: 0_level_0,Model,Type,Origin,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
Make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Acura,3.5 RL 4dr,Sedan,Asia,Front,"$43,755","$39,014",3.5,6.0,225.0,18.0,24.0,3880.0,115.0,197.0
Acura,3.5 RL w/Navigation 4dr,Sedan,Asia,Front,"$46,100","$41,100",3.5,6.0,225.0,18.0,24.0,3893.0,115.0,197.0


# "loc" and "iloc"

In [13]:
# You can also use loc and iloc to perform just about any data selection operation. loc is label-based, which means that 
# you have to specify rows and columns based on their row and column labels. iloc is integer index based, so you have to 
# specify rows and columns by their integer index like you did in the previous exercise.

In [20]:
# Print out observation for Japan
print(cars.iloc[2])    # integer based selection

Model           TSX 4dr
Type              Sedan
Origin             Asia
DriveTrain        Front
MSRP           $26,990 
Invoice        $24,647 
EngineSize          2.4
Cylinders           4.0
Horsepower        200.0
MPG_City           22.0
MPG_Highway        29.0
Weight           3230.0
Wheelbase         105.0
Length            183.0
Name: Acura, dtype: object


In [25]:
cars = pd.read_csv('Cars Data1.csv', index_col = 3)
cars.loc[['Asia', 'Europe']].head(10)

Unnamed: 0_level_0,Make,Model,Type,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Asia,Acura,MDX,SUV,All,"$36,945","$33,337",3.5,6.0,265.0,17.0,23.0,4451.0,106.0,189.0
Asia,Acura,RSX Type S 2dr,Sedan,Front,"$23,820","$21,761",2.0,4.0,200.0,24.0,31.0,2778.0,101.0,172.0
Asia,Acura,TSX 4dr,Sedan,Front,"$26,990","$24,647",2.4,4.0,200.0,22.0,29.0,3230.0,105.0,183.0
Asia,Acura,TL 4dr,Sedan,Front,"$33,195","$30,299",3.2,6.0,270.0,20.0,28.0,3575.0,108.0,186.0
Asia,Acura,3.5 RL 4dr,Sedan,Front,"$43,755","$39,014",3.5,6.0,225.0,18.0,24.0,3880.0,115.0,197.0
Asia,Acura,3.5 RL w/Navigation 4dr,Sedan,Front,"$46,100","$41,100",3.5,6.0,225.0,18.0,24.0,3893.0,115.0,197.0
Asia,Acura,NSX coupe 2dr manual S,Sports,Rear,"$89,765","$79,978",3.2,6.0,290.0,17.0,24.0,3153.0,100.0,174.0
Asia,Honda,Civic Hybrid 4dr manual (gas/electric),Hybrid,Front,"$20,140","$18,451",1.4,4.0,93.0,46.0,51.0,2732.0,103.0,175.0
Asia,Honda,Insight 2dr (gas/electric),Hybrid,Front,"$19,110","$17,911",2.0,3.0,73.0,60.0,66.0,1850.0,95.0,155.0
Asia,Honda,Pilot LX,SUV,All,"$27,560","$24,843",3.5,6.0,240.0,17.0,22.0,4387.0,106.0,188.0


In [26]:
cars.loc[['Asia', 'Europe']].tail(10)   # Label based selection

Unnamed: 0_level_0,Make,Model,Type,DriveTrain,MSRP,Invoice,EngineSize,Cylinders,Horsepower,MPG_City,MPG_Highway,Weight,Wheelbase,Length
Origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Europe,Volvo,S60 2.5 4dr,Sedan,All,"$31,745","$29,916",2.5,5.0,208.0,20.0,27.0,3903.0,107.0,180.0
Europe,Volvo,S60 T5 4dr,Sedan,Front,"$34,845","$32,902",2.3,5.0,247.0,20.0,28.0,3766.0,107.0,180.0
Europe,Volvo,S60 R 4dr,Sedan,All,"$37,560","$35,382",2.5,5.0,300.0,18.0,25.0,3571.0,107.0,181.0
Europe,Volvo,S80 2.9 4dr,Sedan,Front,"$37,730","$35,542",2.9,6.0,208.0,20.0,28.0,3576.0,110.0,190.0
Europe,Volvo,S80 2.5T 4dr,Sedan,All,"$37,885","$35,688",2.5,5.0,194.0,20.0,27.0,3691.0,110.0,190.0
Europe,Volvo,C70 LPT convertible 2dr,Sedan,Front,"$40,565","$38,203",2.4,5.0,197.0,21.0,28.0,3450.0,105.0,186.0
Europe,Volvo,C70 HPT convertible 2dr,Sedan,Front,"$42,565","$40,083",2.3,5.0,242.0,20.0,26.0,3450.0,105.0,186.0
Europe,Volvo,S80 T6 4dr,Sedan,Front,"$45,210","$42,573",2.9,6.0,268.0,19.0,26.0,3653.0,110.0,190.0
Europe,Volvo,V40,Wagon,Front,"$26,135","$24,641",1.9,4.0,170.0,22.0,29.0,2822.0,101.0,180.0
Europe,Volvo,XC70,Wagon,All,"$35,145","$33,112",2.5,5.0,208.0,20.0,27.0,3823.0,109.0,186.0


# Q?
