# Genarate Stocks Demo Data

Run the code below to generate the kv and time-series tabels and stream used in the demo<br>
need to set the following environment variables with real credentials prior to running this notebook.<br>

In [None]:
%env V3IO_PASSWORD = <V3IO-Password>

In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import v3io_frames as v3f
client = v3f.Client('v3io-framesd:8081', password=os.getenv('V3IO_PASSWORD'))

## Create TSDB, KV, and Stream tabels 

In [5]:
client.create(backend='tsdb', table='stock_metrics',attrs={'rate':'1/m'})

In [6]:
client.create(backend='stream', table='stock_stream',attrs={'retention_hours':48,'shards':1})

In [None]:
kvtbl = '{"price":{"GOOG":1039.55,"AMZN":1641.03,"AAPL":169.6,"MSFT":107.59,"INTC":47.21},"volume":{"GOOG":1807725,"AMZN":7494808,"AAPL":62025994,"MSFT":40801525,"INTC":23289000},"symbol":{"GOOG":"GOOG","AMZN":"AMZN","AAPL":"AAPL","MSFT":"MSFT","INTC":"INTC"},"exchange":{"GOOG":"NASDAQ","AMZN":"NASDAQ","AAPL":"NASDAQ","MSFT":"NASDAQ","INTC":"NASDAQ"},"last_trade":{"GOOG":"2018-12-10 16:00:01","AMZN":"2018-12-10 16:00:02","AAPL":"2018-12-10 16:00:02","MSFT":"2018-12-10 16:00:02","INTC":"2018-12-10 16:00:02"},"name":{"GOOG":"Alphabet Inc Class C","AMZN":"Amazon.com, Inc.","AAPL":"Apple Inc.","MSFT":"Microsoft Corporation","INTC":"Intel Corporation"},"currency":{"GOOG":"USD","AMZN":"USD","AAPL":"USD","MSFT":"USD","INTC":"USD"},"timezone":{"GOOG":"EST","AMZN":"EST","AAPL":"EST","MSFT":"EST","INTC":"EST"}}'
client.write(backend='kv', table='stocks',dfs=pd.read_json(kvtbl))

## Fill the time-series table with simulated one day historical data

In [None]:
from pandas_datareader import data
import datetime

# read the stocks kv table
sdf = client.read(backend='kv', table='stocks') 
stocklist = sdf.index.tolist()
today = datetime.datetime.now()
past = today + datetime.timedelta(-8*60)

for sym in stocklist:
    # read historical daily data and replace index to emulate 1 minute tick data  
    stocks = data.DataReader(sym, 'yahoo', past, today)
    start = datetime.datetime.now().replace(hour=9, minute=0, second=0, microsecond=0) + datetime.timedelta(-1)
    stocks.index= pd.date_range(start=start, periods=len(stocks.index), freq='60s', tz='EST')
    
    # adjust columns to fit the demo fields
    stocks = stocks.drop(columns=['High','Low','Close', 'Adj Close'])
    stocks.rename(columns={'Open': 'price', 'Volume': 'volume'}, inplace=True)    
    stocks['sentiments']=np.random.uniform(low=0.0, high=2, size=(len(stocks),))-1
    stocks['symbol']=sym
    stocks['exchange']='NASDAQ'
    stocks['exchange']=sdf.loc[sym].exchange
    
    # set multi-index columns so the TSDB will assign correct labels per series (non time index fields are converted to labels)
    stocks.index.name='time'
    stocks.reset_index(level=0, inplace=True)    
    stocks =stocks.set_index(['time','symbol','exchange'])

    client.write(backend='tsdb', table='stock_metrics',dfs=stocks)

## 2nd option: Fill the time-series table with a week worth or historical data from WorldTradingData API 
Require obtaining a (free) API token from [World Trading Data](https://www.worldtradingdata.com) and setting the environment variable below 

In [None]:
%env API_TOKEN = <WorldTradingData API Token>

# read the stocks kv table
sdf = client.read(backend='kv', table='stocks') 
stocklist = sdf.index.tolist()

# create all stocks data based on stocks table & WTD history API
# need the symbol & exchange name from stocks table 
urlt = 'https://www.worldtradingdata.com/api/v1/intraday?symbol={0}&range=7&sort=asc&interval=1&output=csv&api_token=' + os.getenv('API_TOKEN')
for sym in stocklist:
    url = urlt.format(sym)
    df = pd.read_csv(url,skiprows=[0])
    df.drop(['Open','High','Low'], axis=1, inplace=True)
    df.rename(columns={'Close': 'price', 'Volume': 'volume'}, inplace=True)
    df.index = pd.to_datetime(df.index)
    df.index.name='time'
    df['exchange']=sdf.loc[sym].exchange
    df['symbol']=sym
    df.reset_index(level=0, inplace=True)
    newdf =df.set_index(['time','symbol','exchange'])
    print(newdf.head())
    client.write(backend='tsdb', table='stock_metrics',dfs=newdf)

# generate random sentiment series per stock
end = datetime.now().replace(minute=0, second=0, microsecond=0)
rng = pd.date_range(end=end, periods=7200, freq='60s', tz='EST')

for sym in sdf.index.tolist():
    sen = np.random.uniform(low=0.0, high=2, size=(len(rng),))-1
    sndf = pd.DataFrame(index=[rng,[sym]*len(rng)], columns=['sentiment'])
    sndf['sentiment'] = sen
    sndf.index.names=['time','symbol']
    print(sndf.head())
    client.write(backend='tsdb', table='stock_metrics',dfs=sndf)

## Delete KV, TSDB, and Stream tabels
in case we want to start things from scratch, commented out in case people run the entire notebook 

In [None]:
#client.delete('kv','stocks')

In [None]:
#client.delete('tsdb','stock_metrics')

In [None]:
#client.delete('stream','stock_stream')