<a href="https://colab.research.google.com/github/voidcentury/aided_reading/blob/master/time_series_cv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
""" Colab/Drive setup """

from google.colab import drive
drive.mount('/content/drive')

HOME = '/content/drive/My\ Drive/Colab\ Notebooks/numerai'
DATA_PATH = HOME + '/data/numerai_train_vals.csv'
MODELS_DIR = HOME + '/models'
WANDB_AUTH_FILE = HOME + '/wandb-login'

In [3]:
""" GitHub setup """

import os
from getpass import getpass
import urllib

user = 'voidcentury'
password = getpass('Password: ')
password = urllib.parse.quote(password)       # your password is converted into url format

cmd_string = 'git clone https://{0}:{1}@github.com/voidcentury/beat_numerai.git'.format(user, password)

os.system(cmd_string)
%cd beat_numerai

Password: ··········
/content/beat_numerai


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor

from dataset import DataSet
from preprocess import preprocess
from cross_validate import ForwardLag
from metrics import *
from tqdm import tqdm_notebook as tqdm

In [10]:
data = DataSet(include_val1=True, url=DATA_PATH)

params = {
    'max_depth': 4,
    'learning_rate': 0.1,
    'n_estimators': 200,
    'colsample_bytree': 0.1
}
model = XGBRegressor(**params)

In [28]:
""" wandb setup """

!pip install wandb
! chmod u+x $WANDB_AUTH_FILE
!$WANDB_AUTH_FILE

PROJECT_NAME = "test"
import wandb

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[32mSuccessfully logged in to Weights & Biases![0m


W&B Run: https://app.wandb.ai/voidcentury/test/runs/2c10f8ns

In [26]:
def model_eval(model, X_val, y_val, val_eras):
    # spearman by era
    scores = []
    for erano, era_series in val_eras.groupby(val_eras):
        inds = era_series.index
        preds = model.predict(X_val.loc[inds])
        score = spearman(y_val.loc[inds], preds)
        scores.append(score)
    return np.mean(scores)

In [29]:
wandb.init(entity='voidcentury', project=PROJECT_NAME, config=params)
forward_cv = ForwardLag(data.X_train, data.y_train, data.eras_train, 
                        n_splits=3, n_train_eras=50, n_val_eras=15, n_lag_eras=8)

cv_scores = []
for (X_train, y_train, X_val, y_val, val_eras) in forward_cv.get_splits():
    model.fit(X_train, y_train)
    score = model_eval(model, X_val, y_val, val_eras)
    cv_scores.append(score)
    wandb.log({"val-fold score": score})

avg_cv_score = np.mean(cv_scores)
print("Model val score:",)
wandb.run.summary["val_score"] = avg_cv_score

Number of shifting eras:  29
Train eras ranges:
 [[  1  50]
 [ 30  79]
 [ 60 109]] 

Val eras ranges:
 [[ 59  73]
 [ 88 102]
 [118 132]]
Model val score:


In [9]:
1

1