# <span style="color:darkblue"> Lecture 15 - Concat </span>

<font size = "5">

In this lecture you will get a chance to practice <br>
the main dataset operations


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
results_raw  = pd.read_csv("data_raw/results.csv")
races_raw    = pd.read_csv("data_raw/races.csv")
circuits_raw = pd.read_csv("data_raw/circuits.csv")

<font size = "5">

Use ".query()" to split data into different parts

In [5]:
circuits_raw['country'].unique()

array(['Australia', 'Malaysia', 'Bahrain', 'Spain', 'Turkey', 'Monaco',
       'Canada', 'France', 'UK', 'Germany', 'Hungary', 'Belgium', 'Italy',
       'Singapore', 'Japan', 'China', 'Brazil', 'USA', 'United States',
       'UAE', 'Argentina', 'Portugal', 'South Africa', 'Mexico', 'Korea',
       'Netherlands', 'Sweden', 'Austria', 'Morocco', 'Switzerland',
       'India', 'Russia', 'Azerbaijan', 'Saudi Arabia', 'Qatar'],
      dtype=object)

In [10]:
circuits_spain = circuits_raw.query('country == "Spain"')
circuits_usa = circuits_raw.query('country == "United States" | country == "USA"')
circuits_malaysia = circuits_raw.query('country == "Malaysia"')

circuits_spain

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
11,12,valencia,Valencia Street Circuit,Valencia,Spain,39.4589,-0.331667,4,http://en.wikipedia.org/wiki/Valencia_Street_C...
25,26,jerez,Circuito de Jerez,Jerez de la Frontera,Spain,36.7083,-6.03417,37,http://en.wikipedia.org/wiki/Circuito_Permanen...
44,45,jarama,Jarama,Madrid,Spain,40.6171,-3.58558,609,http://en.wikipedia.org/wiki/Circuito_Permanen...
48,49,montjuic,Montjuïc,Barcelona,Spain,41.3664,2.15167,79,http://en.wikipedia.org/wiki/Montju%C3%AFc_cir...
66,67,pedralbes,Circuit de Pedralbes,Barcelona,Spain,41.3903,2.11667,85,http://en.wikipedia.org/wiki/Pedralbes_Circuit


<font size = "5">

Cocatenate data back together

- Useful if there are datasets split by geography...
- year, or other subgroup

In [8]:
# Works best if columns are identical
# There are also other advanced options if they are not 
# https://pandas.pydata.org/docs/reference/api/pandas.concat.html

circuits_concat = pd.concat([circuits_spain,circuits_usa, circuits_malaysia])
circuits_concat

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
11,12,valencia,Valencia Street Circuit,Valencia,Spain,39.4589,-0.331667,4,http://en.wikipedia.org/wiki/Valencia_Street_C...
25,26,jerez,Circuito de Jerez,Jerez de la Frontera,Spain,36.7083,-6.03417,37,http://en.wikipedia.org/wiki/Circuito_Permanen...
44,45,jarama,Jarama,Madrid,Spain,40.6171,-3.58558,609,http://en.wikipedia.org/wiki/Circuito_Permanen...
48,49,montjuic,Montjuïc,Barcelona,Spain,41.3664,2.15167,79,http://en.wikipedia.org/wiki/Montju%C3%AFc_cir...
66,67,pedralbes,Circuit de Pedralbes,Barcelona,Spain,41.3903,2.11667,85,http://en.wikipedia.org/wiki/Pedralbes_Circuit
18,19,indianapolis,Indianapolis Motor Speedway,Indianapolis,USA,39.795,-86.2347,223,http://en.wikipedia.org/wiki/Indianapolis_Moto...
22,80,vegas,Las Vegas Strip Street Circuit,Las Vegas,United States,36.1147,-115.173,\N,https://en.wikipedia.org/wiki/Las_Vegas_Grand_...
32,33,phoenix,Phoenix street circuit,Phoenix,USA,33.4479,-112.075,345,http://en.wikipedia.org/wiki/Phoenix_street_ci...
36,37,detroit,Detroit Street Circuit,Detroit,USA,42.3298,-83.0401,177,http://en.wikipedia.org/wiki/Detroit_street_ci...


In [11]:
circuits_spain_drop = circuits_spain.drop(columns=['circuitRef', 'location'])

circuits_spain_drop

Unnamed: 0,circuitId,name,country,lat,lng,alt,url
3,4,Circuit de Barcelona-Catalunya,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
11,12,Valencia Street Circuit,Spain,39.4589,-0.331667,4,http://en.wikipedia.org/wiki/Valencia_Street_C...
25,26,Circuito de Jerez,Spain,36.7083,-6.03417,37,http://en.wikipedia.org/wiki/Circuito_Permanen...
44,45,Jarama,Spain,40.6171,-3.58558,609,http://en.wikipedia.org/wiki/Circuito_Permanen...
48,49,Montjuïc,Spain,41.3664,2.15167,79,http://en.wikipedia.org/wiki/Montju%C3%AFc_cir...
66,67,Circuit de Pedralbes,Spain,41.3903,2.11667,85,http://en.wikipedia.org/wiki/Pedralbes_Circuit


In [12]:
pd.concat([circuits_spain_drop, circuits_usa])

Unnamed: 0,circuitId,name,country,lat,lng,alt,url,circuitRef,location
3,4,Circuit de Barcelona-Catalunya,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...,,
11,12,Valencia Street Circuit,Spain,39.4589,-0.331667,4,http://en.wikipedia.org/wiki/Valencia_Street_C...,,
25,26,Circuito de Jerez,Spain,36.7083,-6.03417,37,http://en.wikipedia.org/wiki/Circuito_Permanen...,,
44,45,Jarama,Spain,40.6171,-3.58558,609,http://en.wikipedia.org/wiki/Circuito_Permanen...,,
48,49,Montjuïc,Spain,41.3664,2.15167,79,http://en.wikipedia.org/wiki/Montju%C3%AFc_cir...,,
66,67,Circuit de Pedralbes,Spain,41.3903,2.11667,85,http://en.wikipedia.org/wiki/Pedralbes_Circuit,,
18,19,Indianapolis Motor Speedway,USA,39.795,-86.2347,223,http://en.wikipedia.org/wiki/Indianapolis_Moto...,indianapolis,Indianapolis
22,80,Las Vegas Strip Street Circuit,United States,36.1147,-115.173,\N,https://en.wikipedia.org/wiki/Las_Vegas_Grand_...,vegas,Las Vegas
32,33,Phoenix street circuit,USA,33.4479,-112.075,345,http://en.wikipedia.org/wiki/Phoenix_street_ci...,phoenix,Phoenix
36,37,Detroit Street Circuit,USA,42.3298,-83.0401,177,http://en.wikipedia.org/wiki/Detroit_street_ci...,detroit,Detroit


<font size = "5">

Try it yourself!

- Concatenate the USA and Malaysia datasets



In [13]:
# Write your own code

pd.concat([circuits_usa, circuits_malaysia])

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
18,19,indianapolis,Indianapolis Motor Speedway,Indianapolis,USA,39.795,-86.2347,223,http://en.wikipedia.org/wiki/Indianapolis_Moto...
22,80,vegas,Las Vegas Strip Street Circuit,Las Vegas,United States,36.1147,-115.173,\N,https://en.wikipedia.org/wiki/Las_Vegas_Grand_...
32,33,phoenix,Phoenix street circuit,Phoenix,USA,33.4479,-112.075,345,http://en.wikipedia.org/wiki/Phoenix_street_ci...
36,37,detroit,Detroit Street Circuit,Detroit,USA,42.3298,-83.0401,177,http://en.wikipedia.org/wiki/Detroit_street_ci...
41,42,dallas,Fair Park,Dallas,USA,32.7774,-96.7587,139,http://en.wikipedia.org/wiki/Fair_Park
42,43,long_beach,Long Beach,California,USA,33.7651,-118.189,12,"http://en.wikipedia.org/wiki/Long_Beach,_Calif..."
43,44,las_vegas,Las Vegas Street Circuit,Nevada,USA,36.1162,-115.174,639,http://en.wikipedia.org/wiki/Las_Vegas_Street_...
45,46,watkins_glen,Watkins Glen,New York State,USA,42.3369,-76.9272,485,http://en.wikipedia.org/wiki/Watkins_Glen_Inte...
59,60,riverside,Riverside International Raceway,California,USA,33.937,-117.273,470,http://en.wikipedia.org/wiki/Riverside_Interna...
62,63,sebring,Sebring International Raceway,Florida,USA,27.4547,-81.3483,18,http://en.wikipedia.org/wiki/Sebring_Raceway
