In [7]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

In [8]:
# Prepare datasets: split the datapoints between lower secondary and primary completion rate
df = pd.read_csv("datasets/normalized_le_and_comp.csv")

df_lower = df[df['Indicator Name'].str.contains('Lower secondary completion rate')]
df_primary = df[df['Indicator Name'].str.contains('Primary completion rate')]

In [9]:
fig = go.Figure()

# Create linear regression line for lower secondary completion rate
x_lower = df_lower['Value']
y_lower = df_lower['Life expectancy']
slope_lower, intercept_lower = np.polyfit(x_lower, y_lower, 1)
regression_line_lower = slope_lower * x_lower + intercept_lower

# Create linear regression line for primary completion rate
x_primary = df_primary['Value']
y_primary = df_primary['Life expectancy']
slope_primary, intercept_primary = np.polyfit(x_primary, y_primary, 1)
regression_line_primary = slope_primary * x_primary + intercept_primary

# Add scatter plot
fig.add_trace(go.Scatter(
  x=df_lower['Value'], y=df_lower['Life expectancy'],
  mode='markers',
  name='Lower secondary completion rate',
  marker=dict(color='#004529', opacity=0.45),
  customdata=df_lower['Country']
))

fig.add_trace(go.Scatter(
  x=df_primary['Value'], y=df_primary['Life expectancy'],
  mode='markers',
  name='Primary completion rate',
  marker=dict(color='#ad5b9c', opacity=0.45),
  customdata=df_primary['Country']
))

# Add regression lines
fig.add_trace(go.Scatter(
  x=x_lower,
  y=regression_line_lower,
  mode='lines',
  name='Regression Line Secondary',
  line=dict(color='#004529')
))

fig.add_trace(go.Scatter(
  x=x_primary,
  y=regression_line_primary,
  mode='lines',
  name='Regression Line Primary',
  line=dict(color='#ad5b9c'),
))

# Add hovermenu
for trace in fig.data:
    if trace.mode == 'markers': # Update hovermenu scatter dots
      trace.hovertemplate = "<b>%{customdata}</b><br>" + \
                            "Completion Rate: %{x}<br>" + \
                            "Life Expectancy: %{y}<br>" + \
                            "<extra></extra>"
    else: # Update line template
       trace.hovertemplate = "Completion Rate: %{x}<br>" + \
                             "Life Expectancy: %{y}<br>" + \
                             "<extra></extra>"

fig.update_layout(
  title='Education vs. Life expectancy<br><sup>Increasing education completion rates show association with higher life expectancy</sup>', title_x=0.5,
  plot_bgcolor='#cff8d6',  
  paper_bgcolor='#cff8d6',
  margin={'l': 80, 'b': 120, 'r': 50, 't': 100},
  xaxis=dict(title='Completion rate (normalized)',
               gridcolor='darkgrey',
               gridwidth=1,
               zeroline=False,
               tickformat='.0%'),
  yaxis=dict(title='Life expectancy (normalized)',
               gridcolor='darkgrey',
               gridwidth=1,
               zeroline=False,
               tickvals=[0.2, 0.4, 0.6, 0.8, 1.0]),
  legend=dict(x=0.04, y=0.96,
              bordercolor='black',
              borderwidth=1,
              font=dict(
                size=10
              )),
  width=780,
  height=520
)

fig.add_annotation(x=-0.07, y=-0.35,
                   showarrow=False,
                   xref='paper', yref='paper',
                   xanchor='left', yanchor='bottom',
                   align='left',
                   text="The purple scatter dots represent a country's completion rate of primary school, while the green dots represent<br>" +
                   "that of lower secondary education. Through these scatter dots, two trendlines have been plotted, showcasing the trend<br>" +
                   "between the two factors",
      font=dict(
        size=12
      ))


fig.show()