In [1]:
# Import the required libraries and dependencies
import pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans, AgglomerativeClustering, Birch
from sklearn.preprocessing import StandardScaler
import hvplot.pandas

In [2]:
# Read the CSV file into a Pandas DataFrame
# Set the index using the Ticker column
rate_df = pd.read_csv(
    Path("../Resources/global_carry_trades.csv"))

# Review the DataFrame
rate_df.head()

Unnamed: 0,interest_differential,next_month_currency_return,IMF Country Code
0,0.001414,-0.061174,GBR
1,-0.00057,-0.05812,BEL
2,0.001478,-0.056031,DNK
3,0.000655,-0.056991,FRA
4,-0.002928,-0.067056,DEU


## Prepare the Data 

In [5]:
# Use the StandardScaler module and fit_transform function to 
# scale all columns with numerical values
scaled_data = StandardScaler().fit_transform(rate_df[["interest_differential", "next_month_currency_return"]])


# Diplay the first three rows of the scaled data
scaled_data[0:3]

array([[-0.24270991, -1.93608838],
       [-0.8539933 , -1.84109498],
       [-0.22308154, -1.77613322]])

In [6]:
# Create a DataFrame called with the scaled data
# The column names should match those referenced in the StandardScaler step
scaled_df = pd.DataFrame(
    scaled_data, columns=["interest_differential", "next_month_currency_return"]
)

scaled_df


Unnamed: 0,interest_differential,next_month_currency_return
0,-0.242710,-1.936088
1,-0.853993,-1.841095
2,-0.223082,-1.776133
3,-0.476617,-1.805994
4,-1.580459,-2.119073
...,...,...
994,0.122649,-0.846237
995,-0.038476,-0.722418
996,-2.065714,-0.113693
997,-0.283230,-1.169689


In [25]:
# Encode (convert to dummy variables) the "IMF Country Code" column
IMF_code_dummies = pd.get_dummies(rate_df["IMF Country Code"], dtype = int)

# Review the DataFrame
IMF_code_dummies.head()


Unnamed: 0,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [12]:
# Concatenate the scaled data DataFrame with the "IMF Country Code" encoded dummies 
# DataFrame to create a new DataFrame
combined_df = pd.concat([scaled_df, IMF_code_dummies], axis=1)


# Display the combined DataFrame.
combined_df.head()


Unnamed: 0,interest_differential,next_month_currency_return,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE
0,-0.24271,-1.936088,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1,-0.853993,-1.841095,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,-0.223082,-1.776133,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
3,-0.476617,-1.805994,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4,-1.580459,-2.119073,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


## Fit and Predict with KMeans


In [13]:
# Initialize the K-Means model with n_clusters=3
k_model = KMeans(n_clusters=3, random_state=1)


# Fit the model for the rate_df_scaled DataFrame
k_model.fit(scaled_df)

# Save the predicted model clusters to a new DataFrame.
k_model_clusters = k_model.predict(scaled_df)

# View the country clusters
k_model_clusters


  super()._check_params_vs_input(X, default_n_init=10)


array([1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 0, 2, 1, 0, 2, 0, 2, 0, 2, 2, 0, 2,
       0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 1,
       2, 2, 2, 1, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2,
       2, 0, 2, 2, 0, 2, 0, 0, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 0, 2, 1,
       1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 1, 2, 2, 2, 2,
       2, 2, 1, 2, 2, 2, 2, 0, 1, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 0, 2,
       1, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 2,
       2, 0, 2, 0, 0, 2, 0, 2, 0, 0, 2, 1, 2, 2, 2, 2, 1, 2, 0, 2, 2, 2,
       2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 0, 2, 0, 1, 2, 0, 0, 0,
       2, 0, 0, 0, 0, 0, 2, 2, 2, 1, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 1, 2,
       2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 0,
       2, 0, 2, 0, 1, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 1,
       2, 2, 2, 2, 1, 2, 1, 1, 2, 0, 2, 0, 0, 2, 0,

In [16]:
# Create a copy of the concatenated DataFrame
combined_df_copy = combined_df.copy()

# Create a new column in the copy of the concatenated DataFrame with the predicted clusters
combined_df_copy["county_clusters"] = k_model_clusters

# Review the DataFrame
combined_df_copy.head()


Unnamed: 0,interest_differential,next_month_currency_return,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE,county_clusters
0,-0.24271,-1.936088,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1
1,-0.853993,-1.841095,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2
2,-0.223082,-1.776133,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1
3,-0.476617,-1.805994,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
4,-1.580459,-2.119073,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2


## Plot and Analyze the Results

In [27]:
# Group the saved DataFrame by cluster using `groupby` to calculate average currency returns
combined_df_copy.groupby("county_clusters")['next_month_currency_return'].mean()

county_clusters
0    0.623489
1   -1.188580
2   -0.308021
Name: next_month_currency_return, dtype: float64

In [29]:
# plot the clusters
combined_df_copy.hvplot.scatter(
    x="interest_differential",
    y="next_month_currency_return",
    by="county_clusters",
    hover_cols=["IMF Country Code"],
)
# combined_df_copy.plot( kind = 'scatter', 
#                       x = 'interest_differential', 
#                       y = 'next_month_currency_return', 
#                       c = 'county_clusters', 
#                       cmap = 'viridis'
# )


* Based on this plot, which cluster of country appears to provide both the highest interest spread and currency return?

## Bonus

In [23]:
# Initialize a Birch model with n_clusters=5
birch_model = Birch(n_clusters=5)

# Fit the model for the df_bitcoin_scaled DataFrame
birch_model.fit(scaled_df)

# Predict the model segments (clusters)
birch_model_clusters = birch_model.predict(scaled_df)

# View the stock segments
birch_model_clusters

# Create a copy of the concatenated DataFrame
birch_combined_df_copy = combined_df.copy()

# Create a new column in the copy of the concatenated DataFrame with the predicted clusters
birch_combined_df_copy["county_clusters"] = birch_model_clusters

# Review the DataFrame
birch_combined_df_copy.head()
birch_model_clusters


array([0, 1, 0, 0, 1, 0, 1, 2, 0, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1,
       2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2,
       1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
       1, 2, 1, 1, 2, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 2, 1, 2,
       2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1,
       2, 1, 2, 1, 1, 1, 1, 2, 0, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1,
       2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1,
       1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2,
       1, 2, 0, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 1, 2, 1, 2, 0, 1, 2, 2, 2,
       1, 1, 2, 1, 2, 2, 1, 1, 1, 0, 2, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 0, 1,
       1, 1, 1, 0, 1, 0, 0, 1, 2, 1, 0, 0, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2,
       1, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 2, 1, 2, 2, 1, 2,

In [24]:
#plot the clusters
birch_combined_df_copy.hvplot.scatter(
    x="interest_differential",
    y="next_month_currency_return",
    by="county_clusters",
    hover_cols=["IMF Country Code"],
)