In [1]:
import pandas as pd 
import numpy as np
from unidecode import unidecode

In [2]:
file_path = "Resources/SpaceMission_cleanedLocFinal.csv"
encoding =  "ISO-8859-1"
space_missions_df = pd.read_csv(file_path, encoding=encoding)

In [3]:
space_missions_df.head()


Unnamed: 0,Company,Location,Date,Time,Rocket,Mission,RocketStatus,MissionStatus,Day/Night Launch
0,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1957-10-04,19:28:00,Sputnik 8K71PS,Sputnik-1,Retired,Success,Night
1,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1957-11-03,02:30:00,Sputnik 8K71PS,Sputnik-2,Retired,Success,Night
2,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA",1957-12-06,16:44:00,Vanguard,Vanguard TV3,Retired,Failure,Day
3,AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA",1958-02-01,03:48:00,Juno I,Explorer 1,Retired,Success,Night
4,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA",1958-02-05,07:33:00,Vanguard,Vanguard TV3BU,Retired,Failure,Day


In [4]:
#Separate launches into two DataFrames
#This will be the US Space Missions and later on we will do the rest of the world's Space Missions
us_launches = space_missions_df[space_missions_df['Location'].str.contains('USA')]
#Display the US DataFrame
us_launches

Unnamed: 0,Company,Location,Date,Time,Rocket,Mission,RocketStatus,MissionStatus,Day/Night Launch
2,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA",1957-12-06,16:44:00,Vanguard,Vanguard TV3,Retired,Failure,Day
3,AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA",1958-02-01,03:48:00,Juno I,Explorer 1,Retired,Success,Night
4,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA",1958-02-05,07:33:00,Vanguard,Vanguard TV3BU,Retired,Failure,Day
5,AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA",1958-03-05,18:27:00,Juno I,Explorer 2,Retired,Failure,Night
6,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA",1958-03-17,12:15:00,Vanguard,Vanguard 1,Retired,Success,Day
...,...,...,...,...,...,...,...,...,...
4619,Astra,"SLC-46, Cape Canaveral SFS, Florida, USA",2022-07-12,17:43:00,Rocket 3,TROPICS Flight 1,Active,Failure,Day
4622,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2022-07-15,00:44:00,Falcon 9 Block 5,CRS SpX-25,Active,Success,Night
4624,SpaceX,"SLC-40, Cape Canaveral SFS, Florida, USA",2022-07-17,14:20:00,Falcon 9 Block 5,Starlink Group 4-22,Active,Success,Day
4625,SpaceX,"SLC-4E, Vandenberg SFB, California, USA",2022-07-22,17:39:00,Falcon 9 Block 5,Starlink Group 3-2,Active,Success,Day


In [5]:
#Split the "Location" column into separate parts
location_parts = us_launches['Location'].str.split(', ', expand=True)
#Rename the new columns dynamically based on the number of columns
new_columns = ['Complex ID', 'Launch Site', 'Territory', 'Country']
location_parts.columns = new_columns
#Concatenate the new columns with the original DataFrame
cleaned_US_df = pd.concat([us_launches, location_parts], axis=1)
#Drop the original "Location" column and the unnecessary column
cleaned_US_df = cleaned_US_df.drop(['Location'], axis=1, errors='ignore')
#Display the updated DataFrame
cleaned_US_df

Unnamed: 0,Company,Date,Time,Rocket,Mission,RocketStatus,MissionStatus,Day/Night Launch,Complex ID,Launch Site,Territory,Country
2,US Navy,1957-12-06,16:44:00,Vanguard,Vanguard TV3,Retired,Failure,Day,LC-18A,Cape Canaveral AFS,Florida,USA
3,AMBA,1958-02-01,03:48:00,Juno I,Explorer 1,Retired,Success,Night,LC-26A,Cape Canaveral AFS,Florida,USA
4,US Navy,1958-02-05,07:33:00,Vanguard,Vanguard TV3BU,Retired,Failure,Day,LC-18A,Cape Canaveral AFS,Florida,USA
5,AMBA,1958-03-05,18:27:00,Juno I,Explorer 2,Retired,Failure,Night,LC-26A,Cape Canaveral AFS,Florida,USA
6,US Navy,1958-03-17,12:15:00,Vanguard,Vanguard 1,Retired,Success,Day,LC-18A,Cape Canaveral AFS,Florida,USA
...,...,...,...,...,...,...,...,...,...,...,...,...
4619,Astra,2022-07-12,17:43:00,Rocket 3,TROPICS Flight 1,Active,Failure,Day,SLC-46,Cape Canaveral SFS,Florida,USA
4622,SpaceX,2022-07-15,00:44:00,Falcon 9 Block 5,CRS SpX-25,Active,Success,Night,LC-39A,Kennedy Space Center,Florida,USA
4624,SpaceX,2022-07-17,14:20:00,Falcon 9 Block 5,Starlink Group 4-22,Active,Success,Day,SLC-40,Cape Canaveral SFS,Florida,USA
4625,SpaceX,2022-07-22,17:39:00,Falcon 9 Block 5,Starlink Group 3-2,Active,Success,Day,SLC-4E,Vandenberg SFB,California,USA


In [6]:
rest_of_world_launches = space_missions_df[~(space_missions_df['Location'].str.contains('USA') )]
rest_of_world_launches

Unnamed: 0,Company,Location,Date,Time,Rocket,Mission,RocketStatus,MissionStatus,Day/Night Launch
0,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1957-10-04,19:28:00,Sputnik 8K71PS,Sputnik-1,Retired,Success,Night
1,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1957-11-03,02:30:00,Sputnik 8K71PS,Sputnik-2,Retired,Success,Night
8,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1958-04-27,09:01:00,Sputnik 8A91,Sputnik-3 #1,Retired,Failure,Day
10,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1958-05-15,07:12:00,Sputnik 8A91,Sputnik-3 #2,Retired,Success,Day
22,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan",1958-09-23,07:40:00,Vostok,E-1 na A1 (Luna-1),Retired,Failure,Day
...,...,...,...,...,...,...,...,...,...
4621,ESA,"ELV-1, Guiana Space Centre - French Guiana, Fr...",2022-07-13,13:13:00,Vega C,LARES 2 & Cubesats,Active,Success,Day
4623,CASC,"LC-9, Taiyuan Satellite Launch Center, China",2022-07-15,22:57:00,Long March 2C,SuperView Neo 2-01 & 02,Active,Success,Night
4626,CASC,"LC-101, Wenchang Satellite Launch Center, China",2022-07-24,06:22:00,Long March 5B,Wentian,Active,Success,Day
4628,CAS Space,", Jiuquan Satellite Launch Center, China",2022-07-27,04:12:00,Zhongke-1A,Demo Flight,Active,Success,Night


In [7]:
# Split the "Location" column into separate parts
new_worldcolumns = rest_of_world_launches['Location'].str.split(', ', expand=True)
# Rename the new columns dynamically based on the number of columns
new_world_columns = ['Complex ID', 'Launch Site', 'Country']
new_worldcolumns.columns = new_world_columns
# Concatenate the new columns with the original DataFrame for the rest of the world
cleaned_world_df = pd.concat([rest_of_world_launches, new_worldcolumns], axis=1)
# Drop the original "Location" column and the unnecessary columns
cleaned_world_df = cleaned_world_df.drop(['Location'], axis=1, errors='ignore')
# Display the updated DataFrame
cleaned_world_df

Unnamed: 0,Company,Date,Time,Rocket,Mission,RocketStatus,MissionStatus,Day/Night Launch,Complex ID,Launch Site,Country
0,RVSN USSR,1957-10-04,19:28:00,Sputnik 8K71PS,Sputnik-1,Retired,Success,Night,Site 1/5,Baikonur Cosmodrome,Kazakhstan
1,RVSN USSR,1957-11-03,02:30:00,Sputnik 8K71PS,Sputnik-2,Retired,Success,Night,Site 1/5,Baikonur Cosmodrome,Kazakhstan
8,RVSN USSR,1958-04-27,09:01:00,Sputnik 8A91,Sputnik-3 #1,Retired,Failure,Day,Site 1/5,Baikonur Cosmodrome,Kazakhstan
10,RVSN USSR,1958-05-15,07:12:00,Sputnik 8A91,Sputnik-3 #2,Retired,Success,Day,Site 1/5,Baikonur Cosmodrome,Kazakhstan
22,RVSN USSR,1958-09-23,07:40:00,Vostok,E-1 na A1 (Luna-1),Retired,Failure,Day,Site 1/5,Baikonur Cosmodrome,Kazakhstan
...,...,...,...,...,...,...,...,...,...,...,...
4621,ESA,2022-07-13,13:13:00,Vega C,LARES 2 & Cubesats,Active,Success,Day,ELV-1,Guiana Space Centre - French Guiana,France
4623,CASC,2022-07-15,22:57:00,Long March 2C,SuperView Neo 2-01 & 02,Active,Success,Night,LC-9,Taiyuan Satellite Launch Center,China
4626,CASC,2022-07-24,06:22:00,Long March 5B,Wentian,Active,Success,Day,LC-101,Wenchang Satellite Launch Center,China
4628,CAS Space,2022-07-27,04:12:00,Zhongke-1A,Demo Flight,Active,Success,Night,,Jiuquan Satellite Launch Center,China


In [8]:
# Merging the DataFrames for cleaned_US_df and cleaned_world_df
# Columns in common to merge on
common_columns = ['Company', 'Date', 'Time', 'Rocket', 'Mission', 'RocketStatus', 'MissionStatus', 'Day/Night Launch', 'Complex ID', 'Launch Site', 'Country']

# Merge the dataframes based on the common columns
merged_df = cleaned_US_df.merge(cleaned_world_df, on=common_columns, how='outer')

# Show merged dataframe
merged_df

Unnamed: 0,Company,Date,Time,Rocket,Mission,RocketStatus,MissionStatus,Day/Night Launch,Complex ID,Launch Site,Territory,Country
0,US Navy,1957-12-06,16:44:00,Vanguard,Vanguard TV3,Retired,Failure,Day,LC-18A,Cape Canaveral AFS,Florida,USA
1,AMBA,1958-02-01,03:48:00,Juno I,Explorer 1,Retired,Success,Night,LC-26A,Cape Canaveral AFS,Florida,USA
2,US Navy,1958-02-05,07:33:00,Vanguard,Vanguard TV3BU,Retired,Failure,Day,LC-18A,Cape Canaveral AFS,Florida,USA
3,AMBA,1958-03-05,18:27:00,Juno I,Explorer 2,Retired,Failure,Night,LC-26A,Cape Canaveral AFS,Florida,USA
4,US Navy,1958-03-17,12:15:00,Vanguard,Vanguard 1,Retired,Success,Day,LC-18A,Cape Canaveral AFS,Florida,USA
...,...,...,...,...,...,...,...,...,...,...,...,...
4625,ESA,2022-07-13,13:13:00,Vega C,LARES 2 & Cubesats,Active,Success,Day,ELV-1,Guiana Space Centre - French Guiana,,France
4626,CASC,2022-07-15,22:57:00,Long March 2C,SuperView Neo 2-01 & 02,Active,Success,Night,LC-9,Taiyuan Satellite Launch Center,,China
4627,CASC,2022-07-24,06:22:00,Long March 5B,Wentian,Active,Success,Day,LC-101,Wenchang Satellite Launch Center,,China
4628,CAS Space,2022-07-27,04:12:00,Zhongke-1A,Demo Flight,Active,Success,Night,,Jiuquan Satellite Launch Center,,China


In [9]:
# Save the merged DataFrame into a CSV file to use in our question analysis
merged_df.to_csv('Resources/SpaceMission_Merged.csv', index=False)