In [1]:
import pandas as pd
import pandas_datareader as pdr
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
import datetime

matplotlib.style.use('ggplot')

  from pandas.util.testing import assert_frame_equal


In [2]:
# Directories
OUTPUT_DIR = '../Outputs/country_clusters/'
INPUT_DIR = '../Data/country_data/'

In [3]:
# Load ETF returns for Equities
equity_etfs = pd.read_csv(INPUT_DIR + 'ETF_adj_close.csv', index_col=0)
equity_etfs.index = pd.to_datetime(equity_etfs.index)

equity_etfs_w = equity_etfs.resample('W').last()
equity_rets_w = np.log(equity_etfs_w).diff()
equity_rets_w = equity_rets_w.dropna()

In [5]:
# Load market cap data for countries
country_mkt_cap = pd.read_csv(INPUT_DIR + 'country_market_caps.csv',skiprows=4)
country_mkt_cap = country_mkt_cap[country_mkt_cap.columns[~country_mkt_cap.columns.isin(['Country Code', 'Indicator Name', 'Indicator Code'])]]
country_mkt_cap = country_mkt_cap.T.rename_axis('Year')
country_mkt_cap = country_mkt_cap.rename(columns=country_mkt_cap.iloc[0]).drop(country_mkt_cap.index[0])

# Only select countries in EM universe
# Remove Taiwan since it doesn't exist in the market cap data; change names for Korea, Russia, Egypt for better search
em_countries = equity_rets_w.columns.values
em_countries = np.where(em_countries=='Korea', 'Korea, Rep.', em_countries) 
em_countries = np.where(em_countries=='Russia', 'Russian Federation', em_countries) 
em_countries = np.where(em_countries=='Egypt', 'Egypt, Arab Rep.', em_countries)
em_countries = em_countries[~np.isin(em_countries, 'Taiwan')]

start_year = 2015
country_mkt_cap = country_mkt_cap[em_countries]
country_mkt_cap.index = pd.to_numeric(country_mkt_cap.index)
country_mkt_cap = country_mkt_cap[country_mkt_cap.index>=start_year]
country_mkt_cap.rename(columns={'Korea, Rep.': 'Korea', 'Russian Federation': 'Russia', 'Egypt, Arab Rep.': 'Egypt'}, inplace=True)

In [6]:
# Market portfolio: Uses iShares MSCI EM ETF
market_etfs = pd.read_csv(INPUT_DIR + 'etf_msci_em_agg_indices.csv',skiprows=5, index_col=0)
market_etfs.columns = ['EEM ETF', 'MSCI EM']
market_etfs.index = pd.to_datetime(market_etfs.index)

# Market portfolio returns
market_rets_w = market_etfs.resample('W').last()
market_rets_w = np.log(market_rets_w).diff()
market_rets_w = market_rets_w.dropna()

# Filter by year
start_date = '2015-09-27'
end_date = '2021-01-31'
market_rets_w = market_rets_w[(market_rets_w.index>=start_date) & (market_rets_w.index<=end_date)]

# Purpose of the Notebook
1. Construct country factor portfolios
2. Construct different weighting schemes
3. Backtest and construct performance metrics

# 1. Construct country factor portfolios

## Small cap portfolio
Small cap portfolio contains a third of all countries with the lowest capitalization and uses a 12-months rebalancing rule.

In [7]:
def calc_small_cap(df):
    '''
    Takes input a dataframe and computes small cap countries by year
    + Input: df
    + Output: small_cap_df: a df containing list of small cap EM countries by year
    '''
    small_cap_dict = {}
    num_countries = int(len(df.columns)/3)
    
    for year in df.index.values:
        new_df = df[df.index==year]
        new_df = new_df.apply(pd.to_numeric)
        new_dfT = new_df.T
        new_dfT.columns = ['Market Cap']
        smallest_market_cap = new_dfT.nsmallest(num_countries, 'Market Cap')
        
        # Store
        if smallest_market_cap.empty:
            small_cap_dict[year] = small_cap_dict[year-1]
        else:
            small_cap_dict[year] = list(smallest_market_cap.index.values)
        
    # Convert dictionary to dataframe
    small_cap_df = pd.DataFrame(small_cap_dict.items(), columns = ['Date', 'DateValue'])
    small_cap_df.index = pd.to_datetime(small_cap_df.Date, format='%Y')
    del small_cap_df['Date']
    
    return small_cap_df

In [8]:
small_cap_df = calc_small_cap(country_mkt_cap)

## Momentum portfolio
We calculate every week the cumulative trailing weekly returns for $t$-2 to $t$-52 and form three portfolios containing in equal numbers the highest, medium, and lowest momentum countries.

In [9]:
def calc_momentum(df, window=52):
    rolling_sum = df.rolling(window).sum().dropna()
    rolling_ret_mom = np.exp(rolling_sum) - 1
    num_countries = int(len(df.columns)/3)
    
    mom_df = rolling_ret_mom.apply(lambda x: pd.Series(x.nlargest(num_countries).index), axis=1)
    # Convert columns to list
    mom_df['countries'] = mom_df[mom_df.columns].values.tolist()
    mom_df = mom_df[['countries']]
    
    return mom_df

In [10]:
mom_df = calc_momentum(equity_rets_w)

## Low Country Beta portfolio
We estimate every week the country beta against the ETF EM index (EEM) using a rolling sample of 52 weekly observations and create portfolios of lowest beta countries.

In [11]:
def calc_country_beta(df, market_port_ret, window=52):
    # Find country beta's through rolling regression
    y = market_port_ret
    rolling_betas = {}
    for c in df.columns:
        X = sm.add_constant(df[c])
        model = RollingOLS(y, X, window)
        rolling_res = model.fit(params_only=True)
        rolling_betas[c] = rolling_res.params.dropna()
    
    # Put all beta's for every country and every date in a dataframe
    out_df = pd.DataFrame()
    for key, value in rolling_betas.items():
        col = pd.DataFrame(value[key])
        if out_df.empty:
            out_df = out_df.append(col)
        else:
            out_df = pd.concat([out_df, col], axis=1)
        
    # Sort for lowest betas
    num_countries = int(len(df.columns)/3)
    low_beta_df = out_df.apply(lambda x: pd.Series(x.nlargest(num_countries).index), axis=1)
    # Convert columns to list
    low_beta_df['countries'] = low_beta_df[low_beta_df.columns].values.tolist()
    low_beta_df = low_beta_df[['countries']]
        
    return low_beta_df

In [12]:
low_beta_df = calc_country_beta(equity_rets_w, market_rets_w['EEM ETF'])

## Value portfolio

# 2. Construct weighting methodologies
* Equal weighting (EW)
* Inverse variance (IV)
* Minimum variance (MinVar)

## Equal Weighting (EW)-Size only

In [13]:
# Equal weighting
def calc_EW(ret_df, country_factor_df, month = 9, day = 1):
    out_df = pd.DataFrame()
    for idx, year in enumerate(list(country_factor_df.index.year)):
        # Countries to include in portfolio        
        port_countries = country_factor_df[country_factor_df.index.year==year].iloc[0][0]
        ret_port = ret_df[port_countries]
        
        # Subset return data for next 12 months (Sep-Sep of each year)
        if idx == 0:
            start = datetime.datetime(year, month, day)
        else:
            start = end
        end = datetime.datetime(year+1, month+1, day)
        ret_port = ret_port[(ret_port.index >= start) & (ret_port.index <= end)]
        
        # Create equal weights on every country for the next 12 months
        w = 1/len(port_countries)
        port_ret = pd.DataFrame((w*ret_port).sum(axis=1))
        
        # Store
        out_df = out_df.append(port_ret)
    
    out_df.columns = ['EW returns']
    out_df = out_df[~out_df.index.duplicated(keep='first')]
    
    return out_df

In [14]:
size_ew_rets = calc_EW(equity_rets_w, small_cap_df)

## Equal Weighting (EW)

In [15]:
def calc_EW_w(ret_df, country_factor_df):
    out_df = pd.DataFrame()
    for idx, row in country_factor_df.iterrows():
        country_lst = row['countries']
        ret_df_sub = ret_df[ret_df.index==idx][country_lst]
        
        # Construct weightings
        w = 1/len(country_lst)
        port_ret = pd.DataFrame((w*ret_df_sub).sum(axis=1))
        
        # Store
        out_df = out_df.append(port_ret)
    
    out_df.columns = ['EW returns']
    out_df = out_df[~out_df.index.duplicated(keep='first')]
    
    return out_df

In [16]:
mom_ew_rets = calc_EW_w(equity_rets_w, mom_df)
low_beta_ew_rets = calc_EW_w(equity_rets_w, low_beta_df)

## Inverse Variance (IV)

In [17]:
# Estimate volatilities using GARCH(1,1)
#def calc_est_vol(df)

# 3. Performance

In [18]:
# Generate performance assessment statistics
def calc_performance(rets_series, time = 52):
    '''
    Generates performance metrics for portfolio or market
    + Input: return series (in log returns)
    + Output: mean annualized returns, SR, volatility
    '''
    perf_df = pd.DataFrame(rets_series)
    perf_df.columns = ['returns']
    
    # Compute mean return, annualized
    def calc_mean_return(df, column):
        mean_log_return = np.mean(df[column]) 
        # Convert back to simple returns
        mean_return_annualized = (np.exp(mean_log_return) - 1) * time
        return mean_return_annualized
    
    # Compute volatility, annualized
    def calc_vol(df, column):
        std_annualized = df[column].std() * np.sqrt(time)
        return std_annualized
    
    # Compute all metrics for portfolio
    mean_return_ann = calc_mean_return(perf_df, 'returns')
    std_annualized = calc_vol(perf_df, 'returns')
    
    all_ = [round(mean_return_ann*100,2), round(std_annualized*100,2)]
    
    return all_

# Generate graphs of returns
#def plot

In [19]:
# Country factor portfolios
size_ew_perf = calc_performance(size_ew_rets['EW returns'])
mom_ew_perf = calc_performance(mom_ew_rets['EW returns'])
low_beta_ew_perf = calc_performance(low_beta_ew_rets['EW returns'])

# Market
market_perf = calc_performance(market_rets_w['EEM ETF'])



In [20]:
size_ew_perf

[0.63, 22.91]

In [21]:
mom_ew_perf

[21.82, 17.05]

In [22]:
low_beta_ew_perf

[8.67, 18.1]

In [23]:
market_perf

[8.45, 20.52]