Skip to content

Commit

Permalink
feat(timeseries): Add time-series stock example.
Browse files Browse the repository at this point in the history
  • Loading branch information
fabclmnt committed Jan 24, 2021
1 parent 343eeea commit b3d6df9
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 0 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
Empty file.
File renamed without changes.
Empty file.
28 changes: 28 additions & 0 deletions src/ydata_synthetic/preprocessing/timeseries/stock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
Get the stock data from Yahoo finance data
Data from the period 01 January 2017 - 24 January 2021
"""
import os
import requests as req
import pandas as pd

from ydata_synthetic.preprocessing.timeseries.utils import real_data_loading

def transformations(seq_len: int):
try:
stock_df = pd.read_csv('../data/stock.csv')
except:
stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1483228800&period2=1611446400&interval=1d&events=history&includeAdjustedClose=true'
request = req.get(stock_url)
url_content = request.content

file_path = os.path.join(os.path.dirname(os.path.join('..', os.path.dirname(__file__))), 'data')
stock_csv = open(os.path.join(file_path, 'stock.csv'), 'wb')
stock_csv.write(url_content)
# Reading the stock data
stock_df = pd.read_csv('../data/stock.csv')

#Data transformations to be applied prior to be used with the synthesizer model
processed_data = real_data_loading(stock_df, seq_len=seq_len)

return processed_data
35 changes: 35 additions & 0 deletions src/ydata_synthetic/preprocessing/timeseries/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
Utility functions to be shared by the time-series preprocessing required to feed the data into the synthesizers
"""
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Method implemented here: https://github.com/jsyoon0823/TimeGAN/blob/master/data_loading.py
# Originally used in TimeGAN research
def real_data_loading(data: np.array, seq_len):
"""Load and preprocess real-world datasets.
Args:
- data_name: Numpy array with the values from a a Dataset
- seq_len: sequence length
Returns:
- data: preprocessed data.
"""
# Flip the data to make chronological data
ori_data = data[::-1]
# Normalize the data
ori_data = MinMaxScaler(ori_data)

# Preprocess the dataset
temp_data = []
# Cut data by sequence length
for i in range(0, len(ori_data) - seq_len):
_x = ori_data[i:i + seq_len]
temp_data.append(_x)

# Mix the datasets (to make it similar to i.i.d)
idx = np.random.permutation(len(temp_data))
data = []
for i in range(len(temp_data)):
data.append(temp_data[idx[i]])
return data

0 comments on commit b3d6df9

Please sign in to comment.