feat(timeseries): TimeGAN stock data example.

ydataai · Jan 25, 2021 · 6f832f7 · 6f832f7
1 parent 6ad8132
commit 6f832f7
Show file tree

Hide file tree

Showing 9 changed files with 296 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -18,22 +18,27 @@ It consists in a set of different GANs architectures developed ussing Tensorflow
 
 # Quickstart
 ```
-pip install ydata-synthetic
+pip install git+https://github.com/ydataai/ydata-synthetic.git
 ```
 
 ## Examples
 Here you can find usage examples of the package and models to synthesize tabular data.
 
-**Credit Fraud dataset** 
+**Credit Fraud dataset**   [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-synthetic/blob/master/examples/regular/gan_example.ipynb)
 
-[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-synthetic/blob/master/examples/gan_example.ipynb)
+**Stock dataset** [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-synthetic/blob/master/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb)
 
 # Project Resources
 - Synthetic GitHub: https://github.com/ydataai/ydata-synthetic
 - Synthetic Data Community Slack: [click here to join](http://slack.ydata.ai/)
 
 ### In this repo you can find the following GAN architectures:
+
+#### Tabular data
 - [GAN](https://arxiv.org/abs/1406.2661)
 - [CGAN (Conditional GAN)](https://arxiv.org/abs/1411.1784)
 - [WGAN (Wasserstein GAN)](https://arxiv.org/abs/1701.07875)
 - [WGAN-GP (Wassertein GAN with Gradient Penalty)](https://arxiv.org/abs/1704.00028)
+
+#### Sequential data
+- [TimeGAN](https://papers.nips.cc/paper/2019/file/c9efe5f26cd17ba6216bbe2a7d26d490-Paper.pdf)
diff --git a/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb b/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,7 @@ pandas==1.2.*
 numpy==1.18.*
 scikit-learn==0.22.*
 matplotlib==3.3.2
+seaborn==0.11.*
 tensorflow==2.3.*
 tensorflow-privacy==0.5.1
 easydict==1.9

diff --git a/src/ydata_synthetic/preprocessing/timeseries/__init__.py b/src/ydata_synthetic/preprocessing/timeseries/__init__.py
@@ -0,0 +1,5 @@
+from ydata_synthetic.preprocessing.timeseries.stock import transformations as processed_stock
+
+__all__ = [
+    "processed_stock",
+]
diff --git a/src/ydata_synthetic/preprocessing/timeseries/stock.py b/src/ydata_synthetic/preprocessing/timeseries/stock.py
@@ -10,7 +10,8 @@
 
 def transformations(seq_len: int):
     try:
-        stock_df = pd.read_csv('../data/stock.csv')
+        file_path = os.path.join(os.path.dirname(os.path.join('..', os.path.dirname(__file__))), 'data')
+        stock_df = pd.read_csv(os.path.join(file_path, 'stock.csv'))
     except:
         stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1483228800&period2=1611446400&interval=1d&events=history&includeAdjustedClose=true'
         request = req.get(stock_url)

diff --git a/src/ydata_synthetic/synthesizers/time_series/timegan/__init__.py b/src/ydata_synthetic/synthesizers/time_series/timegan/__init__.py
diff --git a/src/ydata_synthetic/synthesizers/timeseries/__init__.py b/src/ydata_synthetic/synthesizers/timeseries/__init__.py
@@ -0,0 +1,5 @@
+from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN
+
+__all__ = [
+    'TimeGAN',
+]
diff --git a/...etic/synthesizers/time_series/__init__.py → ...nthesizers/timeseries/timegan/__init__.py b/...etic/synthesizers/time_series/__init__.py → ...nthesizers/timeseries/timegan/__init__.py
diff --git a/...synthesizers/time_series/timegan/model.py → .../synthesizers/timeseries/timegan/model.py b/...synthesizers/time_series/timegan/model.py → .../synthesizers/timeseries/timegan/model.py
@@ -2,9 +2,8 @@
 TimeGAN class implemented accordingly with:
 Original code can be found here: https://bitbucket.org/mvdschaar/mlforhealthlabpub/src/master/alg/timegan/
 """
-from tensorflow import function, GradientTape, sqrt, abs, reduce_mean, ones_like, zeros_like, random, float32
+from tensorflow import function, GradientTape, sqrt, abs, reduce_mean, ones_like, zeros_like, convert_to_tensor,float32
 from tensorflow import data as tfdata
-from tensorflow import train as tftrain
 from tensorflow import config as tfconfig
 from tensorflow import nn
 from tensorflow.keras import Model, Sequential, Input
@@ -222,6 +221,7 @@ def train_discriminator(self, x, z):
         return discriminator_loss
 
     def get_batch_data(self, data, n_windows):
+        data = convert_to_tensor(data, dtype=float32)
         return iter(tfdata.Dataset.from_tensor_slices(data)
                                 .shuffle(buffer_size=n_windows)
                                 .batch(self.batch_size).repeat())