From 6f832f71bf92746e8cb83235799891ee9912120f Mon Sep 17 00:00:00 2001 From: fabclmnt Date: Mon, 25 Jan 2021 00:16:19 +0000 Subject: [PATCH] feat(timeseries): TimeGAN stock data example. --- README.md | 11 +- .../TimeGAN_Synthetic_stock_data.ipynb | 273 ++++++++++++++++++ requirements.txt | 1 + .../preprocessing/timeseries/__init__.py | 5 + .../preprocessing/timeseries/stock.py | 3 +- .../time_series/timegan/__init__.py | 0 .../synthesizers/timeseries/__init__.py | 5 + .../timegan}/__init__.py | 0 .../timegan/model.py | 4 +- 9 files changed, 296 insertions(+), 6 deletions(-) create mode 100644 examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb delete mode 100644 src/ydata_synthetic/synthesizers/time_series/timegan/__init__.py create mode 100644 src/ydata_synthetic/synthesizers/timeseries/__init__.py rename src/ydata_synthetic/synthesizers/{time_series => timeseries/timegan}/__init__.py (100%) rename src/ydata_synthetic/synthesizers/{time_series => timeseries}/timegan/model.py (99%) diff --git a/README.md b/README.md index 0da26209..1da074de 100644 --- a/README.md +++ b/README.md @@ -18,22 +18,27 @@ It consists in a set of different GANs architectures developed ussing Tensorflow # Quickstart ``` -pip install ydata-synthetic +pip install git+https://github.com/ydataai/ydata-synthetic.git ``` ## Examples Here you can find usage examples of the package and models to synthesize tabular data. -**Credit Fraud dataset** +**Credit Fraud dataset** [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-synthetic/blob/master/examples/regular/gan_example.ipynb) -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-synthetic/blob/master/examples/gan_example.ipynb) +**Stock dataset** [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ydataai/ydata-synthetic/blob/master/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb) # Project Resources - Synthetic GitHub: https://github.com/ydataai/ydata-synthetic - Synthetic Data Community Slack: [click here to join](http://slack.ydata.ai/) ### In this repo you can find the following GAN architectures: + +#### Tabular data - [GAN](https://arxiv.org/abs/1406.2661) - [CGAN (Conditional GAN)](https://arxiv.org/abs/1411.1784) - [WGAN (Wasserstein GAN)](https://arxiv.org/abs/1701.07875) - [WGAN-GP (Wassertein GAN with Gradient Penalty)](https://arxiv.org/abs/1704.00028) + +#### Sequential data +- [TimeGAN](https://papers.nips.cc/paper/2019/file/c9efe5f26cd17ba6216bbe2a7d26d490-Paper.pdf) diff --git a/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb b/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb new file mode 100644 index 00000000..404db628 --- /dev/null +++ b/examples/timeseries/TimeGAN_Synthetic_stock_data.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "J9XZAsnQjCVz" + }, + "outputs": [], + "source": [ + "# Note: You can select between running the Notebook on \"CPU\" or \"GPU\"\n", + "# Click \"Runtime > Change Runtime time\" and set \"GPU\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "FGzo4LZqjOWA" + }, + "outputs": [], + "source": [ + "#Uncomment to install ydata-synthetic lib\n", + "#! pip install git+https://github.com/ydataai/ydata-synthetic.git" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Time Series synthetic data generation with TimeGAN\n", + "\n", + "- TimeGAN - Implemented accordingly with the [paper](https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks)\n", + "- This notebook is an example of how TimeGan can be used to generate synthetic time-series data.\n", + "\n", + "## Dataset and imports\n", + "\n", + "- The data used in this notebook was downloaded from [Yahoo finance](https://finance.yahoo.com/quote/GOOG/history?p=GOOG) and includes:\n", + " - **6 variables** - Open, High, Low, Close, Adj Close, Volume\n", + " - **1022 events** registered between the period of 1 January 2017 - 24 January 2021.\n", + " - The data was processed using a MinMaxScaler (all the variables were numeric)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "aEIlLGWpjtWL" + }, + "outputs": [], + "source": [ + "#Importing the required libs for the exercise\n", + "\n", + "from os import path\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from ydata_synthetic.preprocessing.timeseries import processed_stock\n", + "from ydata_synthetic.synthesizers.time_series import TimeGAN" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Define Model hyperparameters\n", + "\n", + "**Networks:**\n", + "- Generator\n", + "- Discriminator\n", + "- Embedder\n", + "- Recovery Network\n", + "\n", + "TimeGAN is a Generative model based on RNN networks. In this package the implemented version follows a very simple architecture that is shared by the four elements of the GAN.\n", + "\n", + "Similarly to other parameters, the architectures of each element should be optimized and tailored to the data." + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "#Specific to TimeGANs\n", + "seq_len=24\n", + "n_seq = 6\n", + "hidden_dim=24\n", + "gamma=1\n", + "\n", + "noise_dim = 32\n", + "dim = 128\n", + "batch_size = 128\n", + "\n", + "log_step = 100\n", + "learning_rate = 5e-4\n", + "\n", + "gan_args = [batch_size, learning_rate, noise_dim, 24, 2, (0, 1), dim]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## The data" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "997 (24, 6)\n" + ] + } + ], + "source": [ + "stock_data = processed_stock(seq_len=seq_len)\n", + "print(len(stock_data),stock_data[0].shape)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Training the TimeGAN synthetizer" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "if path.exists('synthesizer_stock.pkl'):\n", + " synth = TimeGAN.load('synthesizer_stock.pkl')\n", + "else:\n", + " synth = TimeGAN(model_parameters=gan_args, hidden_dim=24, seq_len=seq_len, n_seq=n_seq, gamma=1)\n", + " synth.train(stock_data, train_steps=10000)\n", + " synth.save('synthesizer_stock.pkl')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### The generated synthetic stock data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Synthetic data generation: 0%| | 0/8 [00:00 87\u001B[0;31m _result = pywrap_tfe.TFE_Py_FastPathExecute(\n\u001B[0m\u001B[1;32m 88\u001B[0m \u001B[0m_ctx\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_context_handle\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtld\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdevice_name\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m\"CudnnRNN\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mname\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;31m_FallbackException\u001B[0m: Expecting float value for attr dropout, got int", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001B[0;31mUnknownError\u001B[0m Traceback (most recent call last)", + "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0msynth_data\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0msynth\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0msample\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;36m1000\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 2\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/Documents/YData/github/ydata-synthetic/src/ydata_synthetic/synthesizers/time_series/timegan/model.py\u001B[0m in \u001B[0;36msample\u001B[0;34m(self, n_samples)\u001B[0m\n\u001B[1;32m 277\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0m_\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mtrange\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msteps\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdesc\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m'Synthetic data generation'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 278\u001B[0m \u001B[0mZ_\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnext\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mget_batch_noise\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 279\u001B[0;31m \u001B[0mrecords\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mgenerator\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mZ_\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 280\u001B[0m \u001B[0mdata\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mappend\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mrecords\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 281\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mnp\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0marray\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mnp\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvstack\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 966\u001B[0m with base_layer_utils.autocast_context_manager(\n\u001B[1;32m 967\u001B[0m self._compute_dtype):\n\u001B[0;32m--> 968\u001B[0;31m \u001B[0moutputs\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcall\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcast_inputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 969\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_handle_activity_regularization\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 970\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_set_mask_metadata\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_masks\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py\u001B[0m in \u001B[0;36mcall\u001B[0;34m(self, inputs, training, mask)\u001B[0m\n\u001B[1;32m 715\u001B[0m ' implement a `call` method.')\n\u001B[1;32m 716\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 717\u001B[0;31m return self._run_internal_graph(\n\u001B[0m\u001B[1;32m 718\u001B[0m \u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtraining\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mtraining\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mmask\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mmask\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 719\u001B[0m convert_kwargs_to_constants=base_layer_utils.call_context().saving)\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py\u001B[0m in \u001B[0;36m_run_internal_graph\u001B[0;34m(self, inputs, training, mask, convert_kwargs_to_constants)\u001B[0m\n\u001B[1;32m 886\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 887\u001B[0m \u001B[0;31m# Compute outputs.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 888\u001B[0;31m \u001B[0moutput_tensors\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mlayer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcomputed_tensors\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 889\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 890\u001B[0m \u001B[0;31m# Update tensor_dict.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 966\u001B[0m with base_layer_utils.autocast_context_manager(\n\u001B[1;32m 967\u001B[0m self._compute_dtype):\n\u001B[0;32m--> 968\u001B[0;31m \u001B[0moutputs\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcall\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcast_inputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 969\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_handle_activity_regularization\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 970\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_set_mask_metadata\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_masks\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py\u001B[0m in \u001B[0;36mcall\u001B[0;34m(self, inputs, training, mask)\u001B[0m\n\u001B[1;32m 275\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbuilt\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 276\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_init_graph_network\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0moutputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mname\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mname\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 277\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0msuper\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mSequential\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcall\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtraining\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mtraining\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mmask\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mmask\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 278\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 279\u001B[0m \u001B[0moutputs\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0minputs\u001B[0m \u001B[0;31m# handle the corner case where self.layers is empty\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py\u001B[0m in \u001B[0;36mcall\u001B[0;34m(self, inputs, training, mask)\u001B[0m\n\u001B[1;32m 715\u001B[0m ' implement a `call` method.')\n\u001B[1;32m 716\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 717\u001B[0;31m return self._run_internal_graph(\n\u001B[0m\u001B[1;32m 718\u001B[0m \u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtraining\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mtraining\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mmask\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mmask\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 719\u001B[0m convert_kwargs_to_constants=base_layer_utils.call_context().saving)\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py\u001B[0m in \u001B[0;36m_run_internal_graph\u001B[0;34m(self, inputs, training, mask, convert_kwargs_to_constants)\u001B[0m\n\u001B[1;32m 886\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 887\u001B[0m \u001B[0;31m# Compute outputs.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 888\u001B[0;31m \u001B[0moutput_tensors\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mlayer\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcomputed_tensors\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 889\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 890\u001B[0m \u001B[0;31m# Update tensor_dict.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, inputs, initial_state, constants, **kwargs)\u001B[0m\n\u001B[1;32m 652\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 653\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0minitial_state\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mNone\u001B[0m \u001B[0;32mand\u001B[0m \u001B[0mconstants\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 654\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0msuper\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mRNN\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__call__\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 655\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 656\u001B[0m \u001B[0;31m# If any of `initial_state` or `constants` are specified and are Keras\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, *args, **kwargs)\u001B[0m\n\u001B[1;32m 966\u001B[0m with base_layer_utils.autocast_context_manager(\n\u001B[1;32m 967\u001B[0m self._compute_dtype):\n\u001B[0;32m--> 968\u001B[0;31m \u001B[0moutputs\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcall\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcast_inputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m*\u001B[0m\u001B[0margs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 969\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_handle_activity_regularization\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 970\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_set_mask_metadata\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_masks\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py\u001B[0m in \u001B[0;36mcall\u001B[0;34m(self, inputs, mask, training, initial_state)\u001B[0m\n\u001B[1;32m 435\u001B[0m \u001B[0mruntime\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0m_runtime\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0m_RUNTIME_UNKNOWN\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 436\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 437\u001B[0;31m last_output, outputs, runtime, states = self._defun_gru_call(\n\u001B[0m\u001B[1;32m 438\u001B[0m inputs, initial_state, training, mask, row_lengths)\n\u001B[1;32m 439\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py\u001B[0m in \u001B[0;36m_defun_gru_call\u001B[0;34m(self, inputs, initial_state, training, mask, sequence_lengths)\u001B[0m\n\u001B[1;32m 493\u001B[0m \u001B[0;31m# Under eager context, check the device placement and prefer the\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 494\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mcan_use_gpu\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 495\u001B[0;31m \u001B[0mlast_output\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mnew_h\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mruntime\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mgpu_gru\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m**\u001B[0m\u001B[0mgpu_gru_kwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 496\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 497\u001B[0m \u001B[0mlast_output\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0moutputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mnew_h\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mruntime\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mstandard_gru\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m**\u001B[0m\u001B[0mnormal_gru_kwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py\u001B[0m in \u001B[0;36mgpu_gru\u001B[0;34m(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major, go_backwards, sequence_lengths)\u001B[0m\n\u001B[1;32m 653\u001B[0m \u001B[0;31m# Reverse axis 0 since the input is already convert to time major.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 654\u001B[0m \u001B[0minputs\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0marray_ops\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mreverse\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0maxis\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 655\u001B[0;31m outputs, h, _, _ = gen_cudnn_rnn_ops.cudnn_rnn(\n\u001B[0m\u001B[1;32m 656\u001B[0m \u001B[0minputs\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_h\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0minit_h\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_c\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mparams\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mparams\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mis_training\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 657\u001B[0m rnn_mode='gru')\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py\u001B[0m in \u001B[0;36mcudnn_rnn\u001B[0;34m(input, input_h, input_c, params, rnn_mode, input_mode, direction, dropout, seed, seed2, is_training, name)\u001B[0m\n\u001B[1;32m 94\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0m_core\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_FallbackException\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 95\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 96\u001B[0;31m return cudnn_rnn_eager_fallback(\n\u001B[0m\u001B[1;32m 97\u001B[0m \u001B[0minput\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_h\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minput_c\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mparams\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mrnn_mode\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mrnn_mode\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 98\u001B[0m \u001B[0minput_mode\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0minput_mode\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdirection\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mdirection\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdropout\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mdropout\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py\u001B[0m in \u001B[0;36mcudnn_rnn_eager_fallback\u001B[0;34m(input, input_h, input_c, params, rnn_mode, input_mode, direction, dropout, seed, seed2, is_training, name, ctx)\u001B[0m\n\u001B[1;32m 176\u001B[0m \u001B[0;34m\"direction\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdirection\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m\"dropout\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdropout\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m\"seed\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mseed\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m\"seed2\"\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mseed2\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 177\u001B[0m \"is_training\", is_training)\n\u001B[0;32m--> 178\u001B[0;31m _result = _execute.execute(b\"CudnnRNN\", 4, inputs=_inputs_flat,\n\u001B[0m\u001B[1;32m 179\u001B[0m attrs=_attrs, ctx=ctx, name=name)\n\u001B[1;32m 180\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0m_execute\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmust_record_gradient\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/miniconda3/envs/ydata_synth/lib/python3.8/site-packages/tensorflow/python/eager/execute.py\u001B[0m in \u001B[0;36mquick_execute\u001B[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001B[0m\n\u001B[1;32m 57\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 58\u001B[0m \u001B[0mctx\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mensure_initialized\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 59\u001B[0;31m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001B[0m\u001B[1;32m 60\u001B[0m inputs, attrs, num_outputs)\n\u001B[1;32m 61\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0mcore\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_NotOkStatusException\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;31mUnknownError\u001B[0m: Fail to find the dnn implementation. [Op:CudnnRNN]" + ] + } + ], + "source": [ + "synth_data = synth.sample(1000)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "TimeGAN - Synthetic stock data.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "ydata_synth", + "language": "python", + "name": "ydata_synth" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3546b07b..43eaa867 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ pandas==1.2.* numpy==1.18.* scikit-learn==0.22.* matplotlib==3.3.2 +seaborn==0.11.* tensorflow==2.3.* tensorflow-privacy==0.5.1 easydict==1.9 diff --git a/src/ydata_synthetic/preprocessing/timeseries/__init__.py b/src/ydata_synthetic/preprocessing/timeseries/__init__.py index e69de29b..e8eff6c2 100644 --- a/src/ydata_synthetic/preprocessing/timeseries/__init__.py +++ b/src/ydata_synthetic/preprocessing/timeseries/__init__.py @@ -0,0 +1,5 @@ +from ydata_synthetic.preprocessing.timeseries.stock import transformations as processed_stock + +__all__ = [ + "processed_stock", +] diff --git a/src/ydata_synthetic/preprocessing/timeseries/stock.py b/src/ydata_synthetic/preprocessing/timeseries/stock.py index b2707d64..a378887a 100644 --- a/src/ydata_synthetic/preprocessing/timeseries/stock.py +++ b/src/ydata_synthetic/preprocessing/timeseries/stock.py @@ -10,7 +10,8 @@ def transformations(seq_len: int): try: - stock_df = pd.read_csv('../data/stock.csv') + file_path = os.path.join(os.path.dirname(os.path.join('..', os.path.dirname(__file__))), 'data') + stock_df = pd.read_csv(os.path.join(file_path, 'stock.csv')) except: stock_url = 'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1483228800&period2=1611446400&interval=1d&events=history&includeAdjustedClose=true' request = req.get(stock_url) diff --git a/src/ydata_synthetic/synthesizers/time_series/timegan/__init__.py b/src/ydata_synthetic/synthesizers/time_series/timegan/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/ydata_synthetic/synthesizers/timeseries/__init__.py b/src/ydata_synthetic/synthesizers/timeseries/__init__.py new file mode 100644 index 00000000..a3523536 --- /dev/null +++ b/src/ydata_synthetic/synthesizers/timeseries/__init__.py @@ -0,0 +1,5 @@ +from ydata_synthetic.synthesizers.timeseries.timegan.model import TimeGAN + +__all__ = [ + 'TimeGAN', +] diff --git a/src/ydata_synthetic/synthesizers/time_series/__init__.py b/src/ydata_synthetic/synthesizers/timeseries/timegan/__init__.py similarity index 100% rename from src/ydata_synthetic/synthesizers/time_series/__init__.py rename to src/ydata_synthetic/synthesizers/timeseries/timegan/__init__.py diff --git a/src/ydata_synthetic/synthesizers/time_series/timegan/model.py b/src/ydata_synthetic/synthesizers/timeseries/timegan/model.py similarity index 99% rename from src/ydata_synthetic/synthesizers/time_series/timegan/model.py rename to src/ydata_synthetic/synthesizers/timeseries/timegan/model.py index 9bf8eee9..94538f8e 100644 --- a/src/ydata_synthetic/synthesizers/time_series/timegan/model.py +++ b/src/ydata_synthetic/synthesizers/timeseries/timegan/model.py @@ -2,9 +2,8 @@ TimeGAN class implemented accordingly with: Original code can be found here: https://bitbucket.org/mvdschaar/mlforhealthlabpub/src/master/alg/timegan/ """ -from tensorflow import function, GradientTape, sqrt, abs, reduce_mean, ones_like, zeros_like, random, float32 +from tensorflow import function, GradientTape, sqrt, abs, reduce_mean, ones_like, zeros_like, convert_to_tensor,float32 from tensorflow import data as tfdata -from tensorflow import train as tftrain from tensorflow import config as tfconfig from tensorflow import nn from tensorflow.keras import Model, Sequential, Input @@ -222,6 +221,7 @@ def train_discriminator(self, x, z): return discriminator_loss def get_batch_data(self, data, n_windows): + data = convert_to_tensor(data, dtype=float32) return iter(tfdata.Dataset.from_tensor_slices(data) .shuffle(buffer_size=n_windows) .batch(self.batch_size).repeat())