In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/BTC-USD.csv .
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/BCH-USD.csv .
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/LTC-USD.csv .
!cp drive/'My Drive'/'Colab Notebooks'/Data/crypto_data/ETH-USD.csv .

In [28]:
import pandas as pd
import numpy as np
import random
from sklearn import preprocessing
from collections import deque

# we will use last 60 minits of our data to make predictions
SEQ_LEN = 60
# period in minits
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = 'LTC-USD'

def classify(current, future):
    # if the price in the future greater then now return 1
    # 1 - is buy
    if float(future) > float(current):
        return 1
    # 0 - is sell
    else:
        return 0

# preparing our data for recurrent neural network
def preprocess_df(df):
    df = df.drop('future', 1)

    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)

    df.dropna(inplace=True)

    sequential_data = []
    # make queue from list with max len when it reaches the max len it pops the 
    # old items 
    prev_days = deque(maxlen=SEQ_LEN) 

    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])

    random.shuffle(sequential_data)


df = pd.read_csv('LTC-USD.csv', 
                 names=['time', 'low', 'high', 'open', 'close', 'volume'])

main_df = pd.DataFrame()

ratios = ['BTC-USD', 'LTC-USD', 'ETH-USD', 'BCH-USD']
for ratio in ratios:
    dataset = f'{ratio}.csv'

    df = pd.read_csv(dataset, 
                     names=['time', 'low', 'high', 'open', 'close', 'volume'])

    df.rename(columns={'close' : f'{ratio}_close', 
                       'volume': f'{ratio}_volume'}, 
              inplace=True)  

    df.set_index('time', inplace=True)
    df = df[[f'{ratio}_close', f'{ratio}_volume']]   

    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)

main_df['future'] = main_df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)


main_df['target'] = list(map(classify, main_df[f'{RATIO_TO_PREDICT}_close'],
                             main_df['future']))

times = sorted(main_df.index.values)
last_5pct = times[-int(0.05 * len(times))]

validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

preprocess_df(main_df)


            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0
