# Some functions

In [None]:
def run_regression(models, economies, df, x, y):
    """
    Perform linear regression for one or multiple economies.
    economy = list of economies
    models = {economy: LinearRegression() for economy in economies}
    The function returns a dictionary of economy-model pairs. That is,
    each economy will have its own set of coefficients.
    """
    for economy, model in models.items():
            (model.fit(df.loc[economy, x],
                df.loc[economy, y]))
    return models            

In [None]:
def run_prediction(models, economies, df, ResultsColumn):
    """
    Use coefficients from run_regression to generate predictions.
    Pass a dataframe df with the X and Y data. 
    ResultsColumn = name of prediction results
    """
    df_list =[]
    # run predictions
    for economy, model in models.items():
            years = df['Year']
            years.reset_index(drop=True, inplace=True)
            prediction = model.predict(df.loc[economy,:])
            df_name = pd.DataFrame(np.exp(prediction), columns=ResultsColumn)
            df_name.insert(loc=0,column='Year',value=years)
            df_name.insert(loc=0,column='Economy',value=economy)
            df_list.append(df_name)
    # combine individual economy dataframes to one dataframe
    dfResults = pd.concat(df_list, sort=True)
    return dfResults

In [None]:
def cagr(start_value, end_value, num_periods):
    """
    Calculate compound annual growth rate
    """
    return (end_value / start_value) ** (1 / (num_periods - 1)) - 1

In [None]:
def calcCAGR(df,economies):
    """
    Calculate CAGR for all economies.
    df = dataframe with columns of data for growth rates
    economies = list of economies
    """
    df_list = []
    for economy in economies.flatten():
        df11 = df[df['Economy']==economy]
        for col in df11.drop(['Economy','Year'], axis=1):        
            start_value = float(df11[col].iloc[0])
            end_value = float(df11[col].iloc[-1])
            num_periods = len(df11[col])
            cagr_result = cagr(start_value, end_value, num_periods)
            df_list.append((economy,col,cagr_result))
    df = pd.DataFrame(df_list, columns=['A','B','C'])
    return df

In [None]:
def calcYOY(df,economies):
    """
    Calculate year-over-year for all economies.
    df = dataframe with columns of data for growth rates
    economies = list of economies
    """
    df_list = []
    for economy in economies.flatten():
        df11 = df.loc[economy]
        yoy = df11.pct_change()
        yoy.reset_index(inplace=True)
        yoy.insert(loc=0,column='Economy',value=economy)
        df_list.append(yoy)
    dfPC = pd.concat(df_list)
    return dfPC