-
Notifications
You must be signed in to change notification settings - Fork 228
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(timeseries): Add time-series stock example.
- Loading branch information
Showing
13 changed files
with
63 additions
and
0 deletions.
There are no files selected for viewing
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
""" | ||
Get the stock data from Yahoo finance data | ||
Data from the period 01 January 2017 - 24 January 2021 | ||
""" | ||
import os | ||
import requests as req | ||
import pandas as pd | ||
|
||
from ydata_synthetic.preprocessing.timeseries.utils import real_data_loading | ||
|
||
def transformations(seq_len: int): | ||
try: | ||
stock_df = pd.read_csv('../data/stock.csv') | ||
except: | ||
stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1483228800&period2=1611446400&interval=1d&events=history&includeAdjustedClose=true' | ||
request = req.get(stock_url) | ||
url_content = request.content | ||
|
||
file_path = os.path.join(os.path.dirname(os.path.join('..', os.path.dirname(__file__))), 'data') | ||
stock_csv = open(os.path.join(file_path, 'stock.csv'), 'wb') | ||
stock_csv.write(url_content) | ||
# Reading the stock data | ||
stock_df = pd.read_csv('../data/stock.csv') | ||
|
||
#Data transformations to be applied prior to be used with the synthesizer model | ||
processed_data = real_data_loading(stock_df, seq_len=seq_len) | ||
|
||
return processed_data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
""" | ||
Utility functions to be shared by the time-series preprocessing required to feed the data into the synthesizers | ||
""" | ||
import numpy as np | ||
from sklearn.preprocessing import MinMaxScaler | ||
|
||
# Method implemented here: https://github.com/jsyoon0823/TimeGAN/blob/master/data_loading.py | ||
# Originally used in TimeGAN research | ||
def real_data_loading(data: np.array, seq_len): | ||
"""Load and preprocess real-world datasets. | ||
Args: | ||
- data_name: Numpy array with the values from a a Dataset | ||
- seq_len: sequence length | ||
Returns: | ||
- data: preprocessed data. | ||
""" | ||
# Flip the data to make chronological data | ||
ori_data = data[::-1] | ||
# Normalize the data | ||
ori_data = MinMaxScaler(ori_data) | ||
|
||
# Preprocess the dataset | ||
temp_data = [] | ||
# Cut data by sequence length | ||
for i in range(0, len(ori_data) - seq_len): | ||
_x = ori_data[i:i + seq_len] | ||
temp_data.append(_x) | ||
|
||
# Mix the datasets (to make it similar to i.i.d) | ||
idx = np.random.permutation(len(temp_data)) | ||
data = [] | ||
for i in range(len(temp_data)): | ||
data.append(temp_data[idx[i]]) | ||
return data |