In [2]:
! pip install requests
# Requests allows us to make HTTP requests which we will use to get data from an API



In [None]:
import json 
import numpy as np 
import pandas as pd 
import requests
import datetime 


In [4]:
url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(url)

In [5]:
response.content
response.status_code

200

In [6]:
# Method to convert the json result into a dataframe
data = pd.DataFrame(response.json())
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

In [7]:
data = data[data['cores'].map(len) == 1]
data = data[data['payloads'].map(len) == 1]

In [8]:
# data cleaning and extraction getting and setting the data based on out need 
data['cores'] = data['cores'].map(lambda x:x[0])
data['payloads'] = data['payloads'].map(lambda x:x[0])


In [9]:
data['date'] = pd.to_datetime(data['date_utc']).dt.date

In [10]:
data = data[data['date'] <= datetime.date(2020,11,13)]

In [11]:
# Extract booster name from rocket 
# Extracting payload mass and orbit from payload 
# Extracting launchpad side, longitude and latitude from launchpad 

data['rocket'].head(1)

0    5e9d0d95eda69955f709d1eb
Name: rocket, dtype: object

In [12]:
# making the induvial columns to obtain the required dataset
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

In [13]:
url1 = "https://api.spacexdata.com/v4/rockets/"
def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            BoosterVersion.append(requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()['name'])

getBoosterVersion(data)
BoosterVersion[0:5]

['Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 9']

In [14]:
def getLaunchSite(data):
    for i in data['launchpad']:
        if i :
            url = 'https://api.spacexdata.com/v4/launchpads/' + str(i)
            response = requests.get(url).json()
            Longitude.append(response['latitude'])
            Latitude.append(response['latitude'])
            LaunchSite.append(response['name'])

In [15]:
getLaunchSite(data)

In [16]:
def  getPayloadData(data):
    for i in data['payloads']:
        if i:
            url = 'https://api.spacexdata.com/v4/payloads/' + str(i)
            response = requests.get(url).json()
            PayloadMass.append(response['mass_kg'])
            Orbit.append(response['orbit'])  

getPayloadData(data)

In [17]:
def getCoreData(data):
    for i in data['cores']:
        if i['core']:
            url = "https://api.spacexdata.com/v4/cores/"+i['core']
            response = requests.get(url).json()
            Block.append(response['block'])
            ReusedCount.append(response['reuse_count'])
            Serial.append(response['serial'])
            Outcome.append(str(i['landing_success'])+' '+str(i['landing_type']))
            Flights.append(i['flight'])
            GridFins.append(i['gridfins'])
            Reused.append(i['reused'])
            Legs.append(i['legs'])
            LandingPad.append(i['landpad'])

getCoreData(data)

In [18]:
BoosterVersion = BoosterVersion[:94]

In [19]:
launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}




In [20]:
for i in launch_dict:
    print(len(launch_dict[i]))

94
94
94
94
94
94
94
94
94
94
94
94
94
94
94
94
94


In [21]:
# DataFrame
df = pd.DataFrame.from_dict(launch_dict)

In [22]:
# Filtering only Falcon 9
df = df[df['BoosterVersion'] == 'Falcon 9']

In [23]:
# Arranging the based on FlightNumber
df.loc[:,'FlightNumber'] = list(range(1,df.shape[0]+1))

In [24]:
df.isnull().sum()

FlightNumber       0
Date               0
BoosterVersion     0
PayloadMass        5
Orbit              0
LaunchSite         0
Outcome            0
Flights            0
GridFins           0
Reused             0
Legs               0
LandingPad        26
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
dtype: int64

In [25]:
# Removing all null values
df['PayloadMass'] = df['PayloadMass'].fillna(df['PayloadMass'].mean())

In [26]:
df.isnull().sum()

FlightNumber       0
Date               0
BoosterVersion     0
PayloadMass        0
Orbit              0
LaunchSite         0
Outcome            0
Flights            0
GridFins           0
Reused             0
Legs               0
LandingPad        26
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
dtype: int64

In [29]:
df.to_csv('API_dataset.csv',index = False)

In [28]:
df.head(5)

Unnamed: 0,FlightNumber,Date,BoosterVersion,PayloadMass,Orbit,LaunchSite,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
4,1,2010-06-04,Falcon 9,6123.547647,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0003,28.561857,28.561857
5,2,2012-05-22,Falcon 9,525.0,LEO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0005,28.561857,28.561857
6,3,2013-03-01,Falcon 9,677.0,ISS,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B0007,28.561857,28.561857
7,4,2013-09-29,Falcon 9,500.0,PO,VAFB SLC 4E,False Ocean,1,False,False,False,,1.0,0,B1003,34.632093,34.632093
8,5,2013-12-03,Falcon 9,3170.0,GTO,CCSFS SLC 40,None None,1,False,False,False,,1.0,0,B1004,28.561857,28.561857
