# AO Sydney fastai sweep

### Notebook setup

Import pre-installed packages 

In [None]:
import pandas as pd
from pathlib import Path
import fastai
from fastai.tabular.all import *
from fastai.tabular.core import Normalize, TabularPandas, TabDataLoader
from fastai.callback.wandb import *
fastai.__version__

In [None]:
from fastcore.basics import range_of, ifnone

from fastai.callback.progress import ProgressCallback
from fastai.callback.schedule import lr_find

from fastai.data.block import CategoryBlock
from fastai.data.core import DataLoaders
from fastai.data.external import untar_data, URLs
from fastai.data.transforms import RandomSplitter

from fastai.learner import load_learner, Learner
from fastai.metrics import accuracy

from fastai.tabular.core import Categorify, FillMissing, FillStrategy, Normalize, TabularPandas, TabDataLoader
from fastai.tabular.model import TabularModel
from fastai.tabular.learner import tabular_learner

Package versions: 
* *Python v. 3.7.7*<br>
* *Pandas v. 1.1.4*<br>
* *PyTorch v. 1.6.0*<br>
* *Fastai v. 2.0.11*

Set seed for reproducable results

In [None]:
seed = 42

# python RNG
import random
random.seed(seed)

# pytorch RNGs
import torch
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

# numpy RNG
import numpy as np
np.random.seed(seed)

Create paths to NSQIP data folder

In [None]:
fluxData_df = pd.read_feather('data/fluxData.feather')
zernikeData_df = pd.read_feather('data/zernikeData.feather')
dataFrame = pd.concat([fluxData_df, zernikeData_df],axis = 1)


In [None]:
zernikeData_df.describe()

In [None]:
splits = RandomSplitter()(range_of(fluxData_df))

In [None]:
y_names =list(zernikeData_df.columns.values)
x_names = list(fluxData_df.columns.values)
norm = Normalize()
to = TabularPandas(dataFrame,norm,  cont_names= x_names, y_names = y_names, splits= splits)
norms = to.procs.normalize

In [None]:
to.conts.head()

## The dataloader

In [None]:
dls = to.dataloaders()
dls.show_batch()

In [None]:
#dls = TabularDataLoaders.from_df(dataFrame, y_names = y_names)

In [None]:
trn_dl = TabDataLoader(to.train, bs=64, shuffle=True, drop_last=True)
val_dl = TabDataLoader(to.valid, bs=128)

In [None]:
dls.show_batch()

## Define learner

In [None]:
import wandb
wandb.init(project= "FastAiSweepLoop");

In [None]:
learn = tabular_learner(dls ,metrics = rmse, layers = [2000, 1050, 100], y_range= [-.5,.5], wd= 0.1)
#learn.summary()

In [None]:
# import wandb
# #!wandb login
# wandb.init(project= "FastAiSweep")


In [None]:
 #lr_steep = learn.lr_find()
#printf(F"minimimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
from fastai.callback.wandb import *
learn.fit( 40, 1e-3, cbs = [WandbCallback(log_dataset = True, log_model=True)])