# Data_Preparation

In [1]:
import numpy as np

## 1. Load the Data

In [2]:
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10])
data = np.array(data)
print(data[:5, :])
print(data.shape)

[[ 1 10]
 [ 2 20]
 [ 3 30]
 [ 4 40]
 [ 5 50]]
(5000, 2)


## 2. Drop the time column

In [3]:
data = data[:, 1]
print(data.shape)

(5000,)


## 3. Split into samples
5,000 time steps is too long; LSTMs work better with 200-to-400 time steps.

(e.g. 5000/200 = 25)

In [4]:
samples = list()
length = 200
# step over the 5,000 in jumps of 200
for i in range(0,n,length):
    # grab from i to i + 200
    sample = data[i:i+length]
    samples.append(sample)
print(len(samples))

25


## 4. Reshape Subsequences
The LSTM needs data with the format of **[samples, timesteps, features]**. We have 25
samples, 200 time steps per sample, and 1 feature.

### 4.1 Convert list of arrays into 2d array

In [5]:
data = np.array(samples)
print(data.shape)

(25, 200)


### 4.2 Reshape into [samples, timesteps, features]

In [6]:
data = data.reshape((len(samples), length, 1))
print(data.shape)

(25, 200, 1)
