In [1]:
import pandas as pd
import plotly.express as px




Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
spaceflight_data  = pd.read_csv(f'../../../data/processed/global_involvement/space_missions/next_spaceflight_processed.csv')


spaceflight_data

Unnamed: 0,company_name,date,detail,status_rocket,Rocket,status_mission,address,country
0,SpaceX,2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50,Success,"LC-39A, Kennedy Space Center, Florida",USA
1,CASC,2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",China
2,SpaceX,2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,,Success,"Pad A, Boca Chica, Texas",USA
3,Roscosmos,2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65,Success,"Site 200/39, Baikonur Cosmodrome",Kazakhstan
4,ULA,2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145,Success,"SLC-41, Cape Canaveral AFS, Florida",USA
...,...,...,...,...,...,...,...,...
4319,US Navy,1958-02-05 07:33:00+00:00,Vanguard | Vanguard TV3BU,StatusRetired,,Failure,"LC-18A, Cape Canaveral AFS, Florida",USA
4320,AMBA,1958-02-01 03:48:00+00:00,Juno I | Explorer 1,StatusRetired,,Success,"LC-26A, Cape Canaveral AFS, Florida",USA
4321,US Navy,1957-12-06 16:44:00+00:00,Vanguard | Vanguard TV3,StatusRetired,,Failure,"LC-18A, Cape Canaveral AFS, Florida",USA
4322,RVSN USSR,1957-11-03 02:30:00+00:00,Sputnik 8K71PS | Sputnik-2,StatusRetired,,Success,"Site 1/5, Baikonur Cosmodrome",Kazakhstan


### Launch Countries

In [3]:
spaceflight_data['country'].unique()

array(['USA', 'China', 'Kazakhstan', 'Japan', 'Israel', 'New Zealand',
       'Russia', 'Shahrud Missile Test Site', 'France', 'Iran', 'India',
       'New Mexico', 'Yellow Sea', 'North Korea',
       'Pacific Missile Range Facility', 'Pacific Ocean', 'South Korea',
       'Barents Sea', 'Brazil', 'Gran Canaria', 'Kenya', 'Australia'],
      dtype=object)

In [4]:

# create year col
spaceflight_data['date'] = pd.to_datetime(spaceflight_data['date'])
spaceflight_data['year'] = spaceflight_data['date'].dt.year



# list of specified countries
specified_countries = ['USA', 'China', 'Kazakhstan', 'Japan', 'Russia', 'France', 'India']

# categorize anything else as other
spaceflight_data['country'] = spaceflight_data['country'].apply(lambda x: x if x in specified_countries else 'Other')



# sort
all_years = sorted(spaceflight_data['year'].unique())
all_countries = sorted(spaceflight_data['country'].unique())  # ensure countries are alphabetically sorted
all_combinations = pd.MultiIndex.from_product([all_years, all_countries], names=['year', 'country']).to_frame(index=False)



# agreegate
country_distribution_per_year = spaceflight_data.groupby(['year', 'country']).size().reset_index(name='counts')
full_distribution = pd.merge(all_combinations, country_distribution_per_year, on=['year', 'country'], how='left').fillna(0)

full_distribution

Unnamed: 0,year,country,counts
0,1957,China,0.0
1,1957,France,0.0
2,1957,India,0.0
3,1957,Japan,0.0
4,1957,Kazakhstan,2.0
...,...,...,...
507,2020,Japan,3.0
508,2020,Kazakhstan,6.0
509,2020,Other,6.0
510,2020,Russia,3.0


In [5]:

# create chart
fig = px.bar(full_distribution,
             x='country',
             y='counts',
             color='country',
             animation_frame='year',
             animation_group='country',
             range_y=[0, full_distribution['counts'].max() + 10],
             category_orders={"country": all_countries},
             )

# format
fig.update_layout(
    xaxis={'categoryorder': 'array', 'categoryarray': all_countries},
    title='Launches Per Country of Launch Site',
    yaxis_title='Launches',
    xaxis_title='Launch Site Country',
    height=550,
    width=650,
    showlegend=False, 
    
    )


# save and show
fig.write_html(f'../../docs_source/images/visualization/launches_per_country_of_launch_site.html')



fig.show()

### SpaceX

In [6]:
# categorize
spaceflight_data['company_category'] = spaceflight_data['company_name'].apply(lambda x: 'SpaceX' if x == 'SpaceX' else 'Other')


# aggregate 
launch_counts_per_year = spaceflight_data.groupby(['year', 'company_category']).size().reset_index(name='counts')



# generate combinations
all_years = sorted(spaceflight_data['year'].unique())
all_categories = ['SpaceX', 'Other']  # Only two categories
all_combinations = pd.MultiIndex.from_product([all_years, all_categories], names=['year', 'company_category']).to_frame(index=False)


# make distribution df
full_distribution = pd.merge(all_combinations, launch_counts_per_year, on=['year', 'company_category'], how='left').fillna(0)
full_distribution['cumulative_counts'] = full_distribution.groupby('company_category')['counts'].cumsum()
full_distribution.sort_values(by=['year', 'company_category'], inplace=True)




In [7]:

# plot
fig = px.area(full_distribution,
              x='year',
              y='counts',
              color='company_category',  
              line_group='company_category',
              category_orders={"company_category": ['SpaceX', 'Other']}, 
              title='Yearly Launches: SpaceX vs Others')

# format
fig.update_layout(
    yaxis_title='Number of Launches',
    xaxis_title='Year',
    height=550,
    width=650,
    showlegend=True,
    legend_title_text='Company Category',
)



fig.write_html(f'../../docs_source/images/visualization/cumulative_launches_spacex_vs_others.html')

fig.show()
