In [1]:
import torch
import pandas as pd
import csv

In [8]:
# Dataset config
TIME_BEG = 1451606400 # = year 2016
TIME_INTERVAL = 60 # minutes
NUM_ENCODE_TIME_STEPS = 48
NUM_DECODE_TIME_STEPS = 5

In [9]:
prices = None

with open('kaggle_1min.csv', 'r') as f:
    reader = csv.reader(f)
    prices = list(reader)

In [10]:
# Process with Panda dataframe
prices_df = pd.DataFrame(prices[1:], columns=prices[0])

print("Original dataframe: ")
print(prices_df)

# Want Volume Weighted Average Price (VWAP) since TIME_BEG
prices_df = prices_df.drop(columns=["Open", "High", "Low", "Close", "Volume_(Currency)"])
prices_df = prices_df.astype(float)
prices_df = prices_df.fillna(0) # Replace NaN with 0 (time where trade volume = 0)
prices_df = prices_df.loc[prices_df["Timestamp"] >= TIME_BEG]
prices_df = prices_df.reset_index(drop=True)

print("Processed dataframe: ")
print(prices_df)

Original dataframe: 
          Timestamp      Open      High       Low     Close Volume_(BTC)  \
0        1325317920      4.39      4.39      4.39      4.39   0.45558087   
1        1325317980       NaN       NaN       NaN       NaN          NaN   
2        1325318040       NaN       NaN       NaN       NaN          NaN   
3        1325318100       NaN       NaN       NaN       NaN          NaN   
4        1325318160       NaN       NaN       NaN       NaN          NaN   
...             ...       ...       ...       ...       ...          ...   
4727772  1609372560  28801.47  28829.42  28785.64  28829.42   0.96522104   
4727773  1609372620  28829.42   28863.9  28829.42  28857.06   2.36883117   
4727774  1609372680  28850.49  28900.52  28850.49  28882.82   2.46658976   
4727775  1609372740  28910.54  28911.52   28867.6   28881.3     7.332773   
4727776  1609372800  28893.21  28928.49  28893.21  28928.49   5.75767938   

        Volume_(Currency) Weighted_Price  
0            2.00000001

In [31]:
# Summarize prices by each TIME_INTERVAL using WVAP calculations
prices_tensor = torch.from_numpy(prices_df.values)
shape = (prices_tensor.shape[0] // TIME_INTERVAL, 2)
condensed_prices_tensor = torch.empty(shape)

print("Original price data shape: ", prices_tensor.shape)
print("Time condensed price data shape: ", condensed_prices_tensor.shape)

j = 0
for i in range(0, prices_tensor.shape[0] - TIME_INTERVAL, TIME_INTERVAL):
    condensed_prices_tensor[j, 0] = prices_tensor[i, 0]

    volume_weighted_sum = torch.sum(prices_tensor[i:i+TIME_INTERVAL, 1] * prices_tensor[i:i+TIME_INTERVAL, 2])
    total_volume = torch.sum(prices_tensor[i:i+TIME_INTERVAL, 1])
    condensed_prices_tensor[j, 1] = volume_weighted_sum / total_volume
    j += 1

print("Condensed price data: \n", condensed_prices_tensor)

Original price data shape:  torch.Size([2629441, 3])
Time condensed price data shape:  torch.Size([43824, 2])
Condensed price data: 
 tensor([[1.4516e+09, 4.3146e+02],
        [1.4516e+09, 4.3027e+02],
        [1.4516e+09, 4.3068e+02],
        ...,
        [1.6094e+09, 2.8804e+04],
        [1.6094e+09, 2.8800e+04],
        [1.6094e+09, 2.8817e+04]])


In [35]:
# Make input time serie data and target time serie data tensor 
X_shape = (condensed_prices_tensor.shape[0] - NUM_ENCODE_TIME_STEPS - NUM_DECODE_TIME_STEPS + 1, NUM_ENCODE_TIME_STEPS)
y_shape = (condensed_prices_tensor.shape[0] - NUM_ENCODE_TIME_STEPS - NUM_DECODE_TIME_STEPS + 1, NUM_DECODE_TIME_STEPS)

print(X_shape)
print(y_shape)

X = torch.empty(X_shape)
y = torch.empty(y_shape)

for i in range(X.shape[0]):
    input_end_i = i + NUM_ENCODE_TIME_STEPS
    target_end_i = input_end_i + NUM_DECODE_TIME_STEPS
    X[i] = condensed_prices_tensor[i:input_end_i, 1]
    y[i] = condensed_prices_tensor[input_end_i:target_end_i, 1]

print("Training X:\n", X)
print("Training y:\n", y)

(43772, 48)
(43772, 5)
Training X:
 tensor([[  431.4624,   430.2702,   430.6817,  ...,   433.9706,   433.3792,
           433.0172],
        [  430.2702,   430.6817,   432.4590,  ...,   433.3792,   433.0172,
           433.4024],
        [  430.6817,   432.4590,   434.3698,  ...,   433.0172,   433.4024,
           431.0793],
        ...,
        [27086.8359, 27063.0703, 27063.4414,  ..., 28040.3770, 28151.8555,
         28090.8438],
        [27063.0703, 27063.4414, 26852.5723,  ..., 28151.8555, 28090.8438,
         28208.1328],
        [27063.4414, 26852.5723, 26799.5469,  ..., 28090.8438, 28208.1328,
         28395.8477]])
Training y:
 tensor([[  433.4024,   431.0793,   430.3223,   431.5531,   431.4103],
        [  431.0793,   430.3223,   431.5531,   431.4103,   431.1282],
        [  430.3223,   431.5531,   431.4103,   431.1282,   430.4728],
        ...,
        [28208.1328, 28395.8477, 28751.2402, 28812.7051, 28803.8555],
        [28395.8477, 28751.2402, 28812.7051, 28803.8555, 28800

In [36]:

torch.save(X, "./../X.pt")
torch.save(y, "./../y.pt")

In [41]:
X = torch.load("./../X.pt")
y = torch.load("./../y.pt")
print(X)
print(y)

tensor([[  431.4624,   430.2702,   430.6817,  ...,   433.9706,   433.3792,
           433.0172],
        [  430.2702,   430.6817,   432.4590,  ...,   433.3792,   433.0172,
           433.4024],
        [  430.6817,   432.4590,   434.3698,  ...,   433.0172,   433.4024,
           431.0793],
        ...,
        [27086.8359, 27063.0703, 27063.4414,  ..., 28040.3770, 28151.8555,
         28090.8438],
        [27063.0703, 27063.4414, 26852.5723,  ..., 28151.8555, 28090.8438,
         28208.1328],
        [27063.4414, 26852.5723, 26799.5469,  ..., 28090.8438, 28208.1328,
         28395.8477]])
tensor([[  433.4024,   431.0793,   430.3223,   431.5531,   431.4103],
        [  431.0793,   430.3223,   431.5531,   431.4103,   431.1282],
        [  430.3223,   431.5531,   431.4103,   431.1282,   430.4728],
        ...,
        [28208.1328, 28395.8477, 28751.2402, 28812.7051, 28803.8555],
        [28395.8477, 28751.2402, 28812.7051, 28803.8555, 28800.2773],
        [28751.2402, 28812.7051, 28803.85