# Time Series Prediction with BQML and AutoML

## Set up environment variables and load necessary libraries

In [1]:
PROJECT = "qwiklabs-gcp-ml-49b827b781ab" # REPLACE WITH YOUR PROJECT NAME
BUCKET = "qwiklabs-gcp-ml-49b827b781ab" # REPLACE WITH YOUR BUCKET
REGION = "us-central1" # REPLACE WITH YOUR BUCKET REGION e.g. us-central1

In [2]:
%env
PROJECT = PROJECT
REGION = REGION

In [None]:
!sudo pip freeze | grep google-cloud-bigquery==1.6.1 || sudo pip install google-cloud-bigquery==1.6.1

## Review the dataset

In [7]:
%%bigquery --project $PROJECT
#standardSQL
SELECT
  *
FROM
  stock_market.eps_percent_change_sp500
LIMIT
  10

Unnamed: 0,symbol,Date,Open,Close,tomorrow_close,tomo_close_m_close,close_MIN_prior_5_days,close_MIN_prior_20_days,close_MIN_prior_260_days,close_MAX_prior_5_days,...,days_on_market,scaled_change,s_p_scaled_change,normalized_change,company,industry,direction,consensus_EPS,reported_EPS,surprise
0,A,2005-08-15,29.7,30.33,30.48,0.15,0.834158,0.834158,0.648863,0.870755,...,1442,0.004946,-0.011776,0.016722,Agilent Technologies Inc,Health Care,UP,0.26,0.28,7.69
1,A,2005-11-14,32.8,32.9,34.5,1.6,1.0,0.929179,0.620061,1.004255,...,1506,0.048632,-0.00385,0.052482,Agilent Technologies Inc,Health Care,UP,0.37,0.38,2.7
2,A,2008-07-01,35.04,35.57,35.36,-0.21,0.999157,0.999157,0.819511,1.038516,...,2166,-0.005904,-0.018204,0.0123,Agilent Technologies Inc,Health Care,UP,0.48,0.46,-4.17
3,A,2007-05-14,36.46,35.91,37.78,1.87,1.002785,0.957115,0.754107,1.042607,...,1880,0.052075,-0.001304,0.053379,Agilent Technologies Inc,Health Care,UP,0.44,0.43,-2.27
4,A,2008-07-01,35.04,35.57,35.36,-0.21,0.999157,0.999157,0.819511,1.038516,...,2166,-0.005904,-0.018204,0.0123,Agilent Technologies Inc,Health Care,UP,0.39,0.32,-17.95
5,A,2003-08-18,21.78,22.46,24.29,1.83,0.935886,0.913179,0.483081,0.969724,...,940,0.081478,0.002611,0.078868,Agilent Technologies Inc,Health Care,UP,-0.07,-0.02,71.43
6,A,2011-11-15,37.96,38.25,38.58,0.33,0.943007,0.862484,0.768627,1.013072,...,3018,0.008627,-0.016616,0.025244,Agilent Technologies Inc,Health Care,UP,0.81,0.84,3.7
7,A,2004-08-12,21.78,19.68,20.52,0.84,1.106199,1.106199,1.045732,1.161585,...,1188,0.042683,0.001477,0.041206,Agilent Technologies Inc,Health Care,UP,0.28,0.3,7.14
8,A,2005-02-14,23.54,24.02,24.39,0.37,0.959201,0.89259,0.819317,0.988759,...,1316,0.015404,0.0033,0.012104,Agilent Technologies Inc,Health Care,UP,0.19,0.2,5.26
9,A,2010-08-16,27.11,27.16,29.28,2.12,1.006996,1.006996,0.856775,1.097938,...,2701,0.078056,0.012192,0.065864,Agilent Technologies Inc,Health Care,UP,0.48,0.54,12.5


## Using BQML

### Create classification model for direction

In [9]:
%%bigquery --project $PROJECT
#standardSQL
CREATE OR REPLACE MODEL
  stock_market.direction_model OPTIONS(model_type = "logistic_reg",
    input_label_cols = ["direction"]) AS
  -- query to fetch training data
SELECT
    symbol,
    Date,
    Open,
    Close,
    close_MIN_prior_5_days,
    close_MIN_prior_20_days,
    close_MIN_prior_260_days,
    close_MAX_prior_5_days,
    close_MAX_prior_20_days,
    close_MAX_prior_260_days,
    close_AVG_prior_5_days,
    close_AVG_prior_20_days,
    close_AVG_prior_260_days,
    close_STDDEV_prior_5_days,
    close_STDDEV_prior_20_days,
    close_STDDEV_prior_260_days,
    direction
FROM
  `stock_market.eps_percent_change_sp500`
WHERE
    tomorrow_close IS NOT NULL
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15) = 1
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) <= 15*70

### Get training statistics and examine training info

In [10]:
%%bigquery --project $PROJECT
#standardSQL
SELECT
  *
FROM
  ML.EVALUATE(MODEL `stock_market.direction_model`,
    (
    SELECT
      symbol,
      Date,
      Open,
      Close,
      close_MIN_prior_5_days,
      close_MIN_prior_20_days,
      close_MIN_prior_260_days,
      close_MAX_prior_5_days,
      close_MAX_prior_20_days,
      close_MAX_prior_260_days,
      close_AVG_prior_5_days,
      close_AVG_prior_20_days,
      close_AVG_prior_260_days,
      close_STDDEV_prior_5_days,
      close_STDDEV_prior_20_days,
      close_STDDEV_prior_260_days,
      direction
    FROM
      `stock_market.eps_percent_change_sp500`
    WHERE
      tomorrow_close IS NOT NULL
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15) = 1
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) > 15*70
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) <= 15*85))

Unnamed: 0,precision,recall,accuracy,f1_score,log_loss,roc_auc
0,0.381882,0.380783,0.423313,0.371426,1.097675,0.527729


### ML.TRAINING_INFO

In [12]:
%%bigquery --project $PROJECT
#standardSQL
SELECT
  *
FROM
  ML.TRAINING_INFO(MODEL `stock_market.direction_model`)
ORDER BY iteration

Unnamed: 0,training_run,iteration,loss,eval_loss,learning_rate,duration_ms
0,0,0,0.323757,0.358371,0.2,3119
1,0,1,0.25992,0.352092,0.4,4563


### Compare to simple benchmark

In [13]:
%%bigquery --project $PROJECT
#standardSQL
WITH
  eval_data AS (
  SELECT
    symbol,
    Date,
    Open,
    Close,
    close_MIN_prior_5_days,
    close_MIN_prior_20_days,
    close_MIN_prior_260_days,
    close_MAX_prior_5_days,
    close_MAX_prior_20_days,
    close_MAX_prior_260_days,
    close_AVG_prior_5_days,
    close_AVG_prior_20_days,
    close_AVG_prior_260_days,
    close_STDDEV_prior_5_days,
    close_STDDEV_prior_20_days,
    close_STDDEV_prior_260_days,
    direction
  FROM
    `stock_market.eps_percent_change_sp500`
  WHERE
    tomorrow_close IS NOT NULL
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15) = 1
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) > 15*70
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) <= 15*85)
SELECT
  direction,
  (COUNT(direction)* 100 / (
    SELECT
      COUNT(*)
    FROM
      eval_data)) AS percentage
FROM
  eval_data
GROUP BY
  direction

Unnamed: 0,direction,percentage
0,UP,28.834356
1,DOWN,27.607362
2,STAY,43.558282


### Create regression model for normalized change

In [14]:
%%bigquery --project $PROJECT
#standardSQL
CREATE OR REPLACE MODEL
  stock_market.price_model OPTIONS(model_type = "linear_reg",
    input_label_cols = ["normalized_change"]) AS
  -- query to fetch training data
SELECT
    symbol,
    Date,
    Open,
    Close,
    close_MIN_prior_5_days,
    close_MIN_prior_20_days,
    close_MIN_prior_260_days,
    close_MAX_prior_5_days,
    close_MAX_prior_20_days,
    close_MAX_prior_260_days,
    close_AVG_prior_5_days,
    close_AVG_prior_20_days,
    close_AVG_prior_260_days,
    close_STDDEV_prior_5_days,
    close_STDDEV_prior_20_days,
    close_STDDEV_prior_260_days,
    normalized_change
FROM
  `stock_market.eps_percent_change_sp500`
WHERE
    normalized_change IS NOT NULL
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15) = 1
    AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) <= 15*70

### Get training statistics and examine training info

In [15]:
%%bigquery --project $PROJECT
#standardSQL
SELECT
  *
FROM
  ML.EVALUATE(MODEL `stock_market.price_model`,
    (
    SELECT
      symbol,
      Date,
      Open,
      Close,
      close_MIN_prior_5_days,
      close_MIN_prior_20_days,
      close_MIN_prior_260_days,
      close_MAX_prior_5_days,
      close_MAX_prior_20_days,
      close_MAX_prior_260_days,
      close_AVG_prior_5_days,
      close_AVG_prior_20_days,
      close_AVG_prior_260_days,
      close_STDDEV_prior_5_days,
      close_STDDEV_prior_20_days,
      close_STDDEV_prior_260_days,
      normalized_change
    FROM
      `stock_market.eps_percent_change_sp500`
    WHERE
      normalized_change IS NOT NULL
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15) = 1
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) > 15*70
      AND MOD(ABS(FARM_FINGERPRINT(symbol)), 15*100) <= 15*85))

Unnamed: 0,mean_absolute_error,mean_squared_error,mean_squared_log_error,median_absolute_error,r2_score,explained_variance
0,0.017551,0.000846,0.000917,0.011669,-0.064331,-0.049406


### ML.TRAINING_INFO

In [16]:
%%bigquery --project $PROJECT
#standardSQL
SELECT
  *
FROM
  ML.TRAINING_INFO(MODEL `stock_market.price_model`)
ORDER BY iteration

Unnamed: 0,training_run,iteration,loss,eval_loss,learning_rate,duration_ms
0,0,0,0.002882,0.003546,0.2,3180
