In [37]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [3]:
# load data
bnb = pd.read_csv('bnb.csv')
btc = pd.read_csv('btc.csv')
bch = pd.read_csv('bch.csv')
eos = pd.read_csv('eos.csv')
eth = pd.read_csv('eth.csv')
etc = pd.read_csv('etc.csv')
ltc = pd.read_csv('ltc.csv')
ada = pd.read_csv('ada.csv')
trx = pd.read_csv('trx.csv')
# the objective is to predict btc and ada

### btc

In [None]:
# close price: bch, eos, etc, trx
# volume: bnb, bch, eos, eth, etc, ltc, ada, trx
# target: bnb, bch, eos, eth, etc, ltc, ada, trx

In [4]:
# combine into one dataframe
bnb = bnb[['timestamp', 'Close', 'Volume', 'Target']]
bnb.columns = ['timestamp', 'bnb_close', 'bnb_volume', 'bnb_target']

btc = btc[['Close', 'Volume', 'Target']]
btc.columns = ['btc_close', 'btc_volume', 'btc_target']

bch = bch[['Close', 'Volume', 'Target']]
bch.columns = ['bch_close', 'bch_volume', 'bch_target']

eos = eos[['Close', 'Volume', 'Target']]
eos.columns = ['eos_close', 'eos_volume', 'eos_target']

eth = eth[['Close', 'Volume', 'Target']]
eth.columns = ['eth_close', 'eth_volume', 'eth_target']

etc = etc[['Close', 'Volume', 'Target']]
etc.columns = ['etc_close', 'etc_volume', 'etc_target']

ltc = ltc[['Close', 'Volume', 'Target']]
ltc.columns = ['ltc_close', 'ltc_volume', 'ltc_target']

ada = ada[['Close', 'Volume', 'Target']]
ada.columns = ['ada_close', 'ada_volume', 'ada_target']

trx = trx[['Close', 'Volume', 'Target']]
trx.columns = ['trx_close', 'trx_volume', 'trx_target']

df = pd.concat([bnb, btc, bch, eos, eth, etc, ltc, ada, trx], axis=1)

In [16]:
# check correctness
# df[df.isna().any(axis=1)]

In [38]:
def model_training(data, features, target, split_ratio, scaler, model_list):
    x, y = data[features], data[target]
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=split_ratio, random_state=42)
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    for model_name, clf in model_list.items():
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        print(model_name)
        print('R^2 Score:', r2_score(y_test, y_pred))
        print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
        print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
        print('Root Mean Squared Error', np.sqrt(mean_squared_error(y_test, y_pred)))
        print()

In [42]:
split_ratio = 0.2
scaler = StandardScaler()
target = 'btc_target'
features = ['timestamp', 'bnb_volume', 'bch_close', 'bch_volume', 'eos_close', 
            'eos_volume', 'eth_volume', 'etc_close', 'etc_volume', 'ltc_volume', 
            'ada_volume', 'trx_close', 'trx_volume']
model_list = {'linear regression': LinearRegression(), 
              'elastic net': ElasticNet()
             }

model_training(df, features, target, split_ratio, scaler, model_list)

linear regression
R^2 Score: 0.000455060665171203
Mean Absolute Error: 0.0012006421044132643
Mean Squared Error: 3.8908477421002225e-06
Root Mean Squared Error 0.001972523191777532

elastic net
R^2 Score: -1.6999412588702967e-07
Mean Absolute Error: 0.0012008426210859068
Mean Squared Error: 3.892619781668589e-06
Root Mean Squared Error 0.001972972321566775



In [44]:
df.to_csv('df.csv', index=False)