In [1]:
!pip install --upgrade plotly

Requirement already up-to-date: plotly in /usr/local/lib/python3.6/dist-packages (4.14.3)


In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

## Data Preprocessing

In [3]:
df = pd.read_csv("https://gist.githubusercontent.com/unexpectedjourney/5034d17d85a1d0f46ac254051b5e025a/raw/494f63dd46767c12857df7919eec6c9ab6542797/space_corrected.csv")

In [4]:
df.columns = ['company', 'location', 'datum', 'detail', 'status_rocket',
       'cost', 'status_mission']

In [5]:
df.head()

Unnamed: 0,company,location,datum,detail,status_rocket,cost,status_mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


In [6]:
def clear_cost(value):
    if isinstance(value, str):
        return value.replace(" ", "").replace(",", "")
    return value

In [7]:
df.cost = df.cost.apply(clear_cost)

In [8]:
def get_country(value):
    return value.split(",")[-1].strip()

countries = {
    "Barents Sea": "Russia",
    "Gran Canaria": "USA",
    "New Mexico" : "USA",
    "Pacific Missile Range Facility": "USA",
    "Shahrud Missile Test Site": "Iran",
    "Yellow Sea": "China",
}

In [9]:
df["country"] = df.location.apply(get_country)
df.country = df.country.replace(countries)

In [10]:
df["datum"] = pd.to_datetime(df.datum)
df["year"] = df.datum.apply(lambda x: x.year)
df["month"] = df.datum.apply(lambda x: x.month)
df["day"] = df.datum.apply(lambda x: x.day)

### Rocket cost by years *

In [11]:
rocket_df = df[["year", "cost", "company"]].copy()
rocket_df["cost"] = rocket_df.cost.astype(np.float32)
rocket_df = rocket_df.dropna().groupby(["year"]).agg({"cost": "sum", "company": "count"}).reset_index()
rocket_df.cost /= 1000

In [12]:
fig = px.area(rocket_df, x="year", y="cost")
fig.add_annotation(
    x=1969, y=4.873,
    text="\"Apollo\" program",
    showarrow=True,
    arrowhead=1
)
fig.add_annotation(
    x=1988, y=5.9658,
    text="\"Buran\" orbital flight",
    showarrow=True,
    arrowhead=1
)
fig.add_annotation(
    x=2009, y=5.5889,
    text="Six NASA space missions",
    showarrow=True,
    arrowhead=1
)

fig.update_layout(
    width=900, 
    title="Rocket launches cost(in billions) per years",
    yaxis_ticksuffix='B',
    xaxis = dict(
        tickmode='linear',
        tick0=1960,
        dtick=5
    )
)
fig.show()

This chart shows us the amount of spent money per year by all space companies from 1964 till the 2020 year. We can also see here peaks, which are described by annotations - annotated events took the most of money those years. Money is presented in billions. The line chart fits the best because we can see the change in money spendings per year and their change. 

As you can see here, the most outstanding operations were executed despite the crises, e.g., 2007-2008 years.


### Shuttle launches by years and country *

In [13]:
country_df = df[["country", "year"]].reset_index(drop=True)
country_df.loc[:, "launches"]  = 1
country_df = country_df.groupby(["country", "year"]).agg({"launches": "sum"})
country_df = country_df.reset_index()

In [14]:
fig = go.Figure()

years = []

for p_year in range(df.year.min(), df.year.max() + 1):
    years.append(p_year)
    inner_df = country_df[country_df.year == p_year].sort_values("launches", ascending=False)
    fig.add_trace(
        go.Bar(
            visible=False, 
            x=inner_df.country, 
            y=inner_df.launches,
            name="",
        )
    )

fig.data[0].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        label=years[i],
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": f"Amount of launches by countries in {str(years[i])} year"}],
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Selected year: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    title="Amount of launches by countries in 1957 year",
    sliders=sliders
)

fig.show()

Here I presented the interactive chart that describes the number of country launches per particular year. All data is sorted, so it makes it easier for a user to understand the chart's main idea. I have chosen the bar plot as a chart because it shows the number of separate countries' launcher correctly.   
As you can see on this chart, the leading world players in space are the USA, China, and Kazakhstan.

### Amount of launches by companies *

In [15]:
company_df = df[["company"]].reset_index(drop=True)
company_df.loc[:, "launches"]  = 1
company_df = company_df.groupby(["company"]).agg({"launches": "sum"})
company_df = company_df.reset_index()
company_df.loc[company_df['launches'] < 50, 'company'] = 'Other companies'
company_df = company_df.groupby("company").agg({"launches": "sum"}).reset_index()
company_df["launches_str"] = company_df["launches"].astype(str) + " launches"

In [16]:
fig = px.treemap(company_df, path=['company'], values='launches', title='Amount of launches by companies')
fig.update_traces(textposition='top left', textinfo='label+value')
fig.update_layout(
    width=900
)
fig.layout.hovermode = False
fig.show()

This chart shows the number of launches of individual companies over time. I have chosen the treemap chart because it shows the launch area, and users can define the company with the most significant number of flights intuitively. I did not use a bar plot because "RVSN USSR" has a large number of launches, so it breaks the chart. In this modification, the values are more moderate.   
As you can see on this chart, the top 3 companies with the most significant number of launches are:
1. RVSN USSR
2. Arianespace
3. CASC   

Many companies have a deficient number of flights, so I merged them into one "Others" block.