In [1]:
import wandb
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import datetime
pio.templates.default = 'plotly_dark'

In [2]:
def score_metrics(y_true, y_pred):
    return {
        'mae': mean_absolute_error(y_true, y_pred),
        'mse': mean_squared_error(y_true, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
        'r2': r2_score(y_true, y_pred)
    }

In [3]:
df = pd.read_csv('../data/processed/eth_hourly.csv')
df['TimeStamp'] = pd.to_datetime(df['TimeStamp'])
df.sort_values(by='TimeStamp', ascending=True, inplace=True)
final_test = df[df['TimeStamp'] > datetime.datetime(year=2021, month=5, day=21)]
df.drop(final_test.index, axis=0, inplace=True)
df.drop('TimeStamp', axis=1, inplace=True)
df.head()

Unnamed: 0,open,high,low,CurrentClose,Volume_USD,NextClose
0,733.12,736.48,731.19,733.04,4246576.84,734.64
1,733.04,735.99,731.7,734.64,2044880.32,731.32
2,734.64,734.65,722.0,731.32,7891317.14,728.44
3,731.32,732.0,728.44,728.44,2111099.12,735.21
4,728.44,739.3,725.52,735.21,7197617.75,732.1


In [4]:
X = df.drop('NextClose', axis=1)
y = df['NextClose']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=43)

In [6]:
wandb.init(project="ETH-Price")
model = LinearRegression()
model.fit(X_train, y_train)
wandb.sklearn.plot_summary_metrics(model=model, X=X_train, y=y_train, X_test=X_test, y_test=y_test)
wandb.sklearn.plot_regressor(model, X_train, X_test, y_train, y_test,  model_name='LinearRegression')
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

train_metrics = score_metrics(y_train, train_preds)
test_metrics = score_metrics(y_test, test_preds)

print(f'train_metrics: {train_metrics}\ntest_metrics: {test_metrics}')
wandb.log({'model_name': 'LinearRegression'})
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mzbloss[0m (use `wandb login --relogin` to force relogin)


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Plotting LinearRegression.
[34m[1mwandb[0m: Logged summary metrics.
[34m[1mwandb[0m: Logged learning curve.
[34m[1mwandb[0m: Logged outlier candidates.
[34m[1mwandb[0m: Logged residuals.


train_metrics: {'mae': 3.8481437846378106, 'mse': 129.8626299829561, 'rmse': 11.395728584998682, 'r2': 0.9996922473677077}
test_metrics: {'mae': 3.5992873848833566, 'mse': 106.72907175175999, 'rmse': 10.330976321324137, 'r2': 0.9997325303565244}


VBox(children=(Label(value=' 0.16MB of 0.16MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,12
_timestamp,1624366661
_step,5
model_name,LinearRegression


0,1
_runtime,▁▃▃▅▆█
_timestamp,▁▃▃▅▆█
_step,▁▂▄▅▇█


In [7]:
wandb.init(project="ETH-Price")
model = GradientBoostingRegressor()
model.fit(X_train, y_train)
wandb.sklearn.plot_summary_metrics(model=model, X=X_train, y=y_train, X_test=X_test, y_test=y_test)
wandb.sklearn.plot_regressor(model, X_train, X_test, y_train, y_test,  model_name='GradientBoostingRegressor')
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

train_metrics = score_metrics(y_train, train_preds)
test_metrics = score_metrics(y_test, test_preds)

print(f'train_metrics: {train_metrics}\ntest_metrics: {test_metrics}')
wandb.log({'model_name': 'GradientBoostingRegressor'})
wandb.finish()

[34m[1mwandb[0m: 
[34m[1mwandb[0m: Plotting GradientBoostingRegressor.
[34m[1mwandb[0m: Logged summary metrics.
[34m[1mwandb[0m: Logged learning curve.
[34m[1mwandb[0m: Logged outlier candidates.
[34m[1mwandb[0m: Logged residuals.


train_metrics: {'mae': 4.450938684124185, 'mse': 97.83224624146709, 'rmse': 9.891018463306349, 'r2': 0.999768154000055}
test_metrics: {'mae': 4.892725023231083, 'mse': 152.28385863923634, 'rmse': 12.340334624281319, 'r2': 0.9996183672479411}


VBox(children=(Label(value=' 0.16MB of 0.16MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,45
_timestamp,1624366710
_step,5
model_name,GradientBoostingRegr...


0,1
_runtime,▁▂▆▇██
_timestamp,▁▂▆▇██
_step,▁▂▄▅▇█


In [8]:
wandb.init(project="ETH-Price")
model = RandomForestRegressor()
model.fit(X_train, y_train)
wandb.sklearn.plot_summary_metrics(model=model, X=X_train, y=y_train, X_test=X_test, y_test=y_test)
wandb.sklearn.plot_regressor(model, X_train, X_test, y_train, y_test,  model_name='RandomForestRegressor')

train_preds = model.predict(X_train)
test_preds = model.predict(X_test)

train_metrics = score_metrics(y_train, train_preds)
test_metrics = score_metrics(y_test, test_preds)

print(f'train_metrics: {train_metrics}\ntest_metrics: {test_metrics}')
wandb.log({'model_name': 'RandomForestRegressor'})
wandb.finish()

[34m[1mwandb[0m: 
[34m[1mwandb[0m: Plotting RandomForestRegressor.
[34m[1mwandb[0m: Logged summary metrics.
[34m[1mwandb[0m: Logged learning curve.
[34m[1mwandb[0m: Logged outlier candidates.
[34m[1mwandb[0m: Logged residuals.


train_metrics: {'mae': 2.1116402394524796, 'mse': 45.639834568879216, 'rmse': 6.755726057862264, 'r2': 0.9998918412538865}
test_metrics: {'mae': 4.001829284339044, 'mse': 126.36999356619424, 'rmse': 11.241440902579804, 'r2': 0.9996833089937878}


VBox(children=(Label(value=' 0.16MB of 0.16MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
_runtime,94
_timestamp,1624366808
_step,5
model_name,RandomForestRegresso...


0,1
_runtime,▁▁▆▇██
_timestamp,▁▁▆▇██
_step,▁▂▄▅▇█


## Choose best model

In [10]:
model = LinearRegression()
model.fit(X_train, y_train)
train_preds = model.predict(X_train)
test_preds = model.predict(X_test)
train_metrics = score_metrics(y_true=y_train, y_pred=train_preds)
test_metrics = score_metrics(y_true=y_test, y_pred=test_preds)

print(train_metrics)
print(test_metrics)

{'mae': 3.816305427095081, 'mse': 129.6920853281712, 'rmse': 11.388243294212291, 'r2': 0.9996926515299093}
{'mae': 3.5687386027484904, 'mse': 105.44880409152111, 'rmse': 10.26882681183791, 'r2': 0.9997357387863273}


actuals minus model prediction means that when the result is negative, we overpredicted the price. When it is positive, we under predicted.


So on highly negative residuals, the model believed the price be higher than it actually was. In these scenarios, since the model believed the price would go up, we would have bought with the intention of selling at the higher price.

In [11]:
train_residuals = y_train - train_preds
test_residuals = y_test - test_preds
residuals = pd.concat(
    (train_residuals.reset_index(drop=True), test_residuals.reset_index(drop=True)), 
    axis=1, 
    ignore_index=True
)
residuals.columns = ['train', 'test']

In [12]:
px.box(pd.melt(residuals), x='variable', y='value')

In [13]:
test = X_test
test['NextClose'] = y_test
test['ModelPrediction'] = test_preds
test['Residuals'] = test['NextClose'] - test['ModelPrediction']
test['action'] = test['ModelPrediction'] < test['CurrentClose']
test['action'] = test['action'].apply(lambda x: 'buy' if x == False else 'sell')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [14]:
final_preds = model.predict(final_test.drop(['NextClose', 'TimeStamp'], axis=1))
final_test['ModelPrediction'] = final_preds
final_test['Residuals'] = final_test['NextClose'] - final_test['ModelPrediction']
final_test['action'] = final_test['ModelPrediction'] < final_test['CurrentClose']
final_test['action'] = final_test['action'].apply(lambda x: 'buy' if x == False else 'sell')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [15]:
px.line(final_test, y='Residuals', color='action')

## Simulating

Say we start with a certain amount of money, and we buy or sell in increments of 10% of our current sum of money. How much money would we net after our final test?

In [16]:
initial_money = 100
percent_of_total_money_to_move = 0.10
eth_wallet_balance = 0.0
total_assets = 0.0
amount_of_eth_to_exchange = 0.0
amount_of_usd_to_exchange = 0.0

trading_history = []
for step, (idx, values) in enumerate(final_test.iterrows()):
    
    if step == 0:
        total_money = initial_money
    
    current_close = values['CurrentClose']
    next_close = values['NextClose']
    model_prediction = values['ModelPrediction']
    residuals = values['Residuals']
    action = values['action']
    
    amount_of_usd_to_exchange = percent_of_total_money_to_move * total_money
    amount_of_eth_to_exchange = amount_of_usd_to_exchange / current_close
    
    if total_money <= 0:
        break
    else:
        
        if action == 'sell':
            if amount_of_eth_to_exchange > eth_wallet_balance:
                amount_of_eth_to_exchange = eth_wallet_balance

            total_money += amount_of_eth_to_exchange * current_close
            eth_wallet_balance -= amount_of_eth_to_exchange
            assert eth_wallet_balance >= 0, f'eth_wallet_balance is negative: {eth_wallet_balance}'

        elif action == 'buy':
            if amount_of_usd_to_exchange > total_money:
                amount_of_usd_to_exchange = total_money

            total_money -= amount_of_usd_to_exchange
            eth_wallet_balance += amount_of_eth_to_exchange

        else:
            pass
        
        total_assets = eth_wallet_balance * current_close + total_money
    
        actions_this_turn = {
            'action': action,
            'amount_of_eth_to_exchange': amount_of_eth_to_exchange,
            'amount_of_usd_to_exchange': amount_of_usd_to_exchange,
            'eth_wallet_balance': eth_wallet_balance,
            'total_money': total_money,
            'total_assets': total_assets
        }
        trading_history.append(actions_this_turn)
        
trading_history = pd.DataFrame(trading_history)
trading_history.head()

Unnamed: 0,action,amount_of_eth_to_exchange,amount_of_usd_to_exchange,eth_wallet_balance,total_money,total_assets
0,buy,0.003446,10.0,0.003446,90.0,100.0
1,sell,0.003143,9.0,0.000303,99.0,99.868031
2,sell,0.000303,9.9,0.0,99.848148,99.848148
3,sell,0.0,9.984815,0.0,99.848148,99.848148
4,sell,0.0,9.984815,0.0,99.848148,99.848148


In [17]:
trading_history['TimeStamp'] = final_test['TimeStamp'].values

In [18]:
fig = px.line(
    data_frame=trading_history, 
    x='TimeStamp',
    y='total_assets', 
    #color='action', 
    title='Total Assets [In USD]'
)
fig.update_layout(
    yaxis_title="Total Assets [$USD]",
    legend_title="Buy or Sell Action", 
    yaxis_tickprefix = '$', 
    yaxis_tickformat = ',.'
)

fig.show()

In [19]:
fig = px.line(
    data_frame=trading_history, 
    x='TimeStamp',
    y='eth_wallet_balance', 
    title='Total ETH '
)
fig.update_layout(
    yaxis_title="Total ETH",
)

fig.show()

In [20]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(
        x=trading_history['TimeStamp'], 
        y=trading_history['eth_wallet_balance'], 
        name="ETH Wallet Balance"
    ),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(
        x=trading_history['TimeStamp'], 
        y=final_test['CurrentClose'].values, 
        name="ETH Price"
    ),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="ETH Wallet vs Price of ETH",
)

# Set x-axis title
fig.update_xaxes(title_text="TimeStamp")

# Set y-axes titles
fig.update_yaxes(title_text="ETH Wallet Balance", secondary_y=False)
fig.update_yaxes(title_text="Price of ETH [In $USD]", secondary_y=True, )

fig.show()


In [21]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(
        x=trading_history['TimeStamp'], 
        y=trading_history['eth_wallet_balance'], 
        name="ETH Wallet Balance"
    ),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(
        x=trading_history['TimeStamp'], 
        y=trading_history['total_assets'], 
        name="Total Assets"
    ),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="ETH Wallet vs Total Assets",
)

# Set x-axis title
fig.update_xaxes(title_text="TimeStamp")

# Set y-axes titles
fig.update_yaxes(title_text="ETH Wallet Balance", secondary_y=False)
fig.update_yaxes(title_text="Total Assets [In $USD]", secondary_y=True, )

fig.show()
