# AO Sydney fastai sweep

### Notebook setup

In [2]:
!pip install keras-one-cycle-lr

Collecting keras-one-cycle-lr
  Using cached keras_one_cycle_lr-0.0.1-py3-none-any.whl (5.7 kB)
Installing collected packages: keras-one-cycle-lr
Successfully installed keras-one-cycle-lr-0.0.1


Import pre-installed packages 

In [4]:
import pandas as pd
from clr import LRFinder
from pathlib import Path
import fastai
from fastai.tabular.all import *
from fastai.tabular.core import Normalize, TabularPandas, TabDataLoader
from fastai.callback.wandb import *
fastai.__version__

ModuleNotFoundError: No module named 'clr'

In [None]:
from fastcore.basics import range_of, ifnone

from fastai.callback.progress import ProgressCallback
from fastai.callback.schedule import lr_find

from fastai.data.block import CategoryBlock
from fastai.data.core import DataLoaders
from fastai.data.external import untar_data, URLs
from fastai.data.transforms import RandomSplitter

from fastai.learner import load_learner, Learner
from fastai.metrics import accuracy

from fastai.tabular.core import Categorify, FillMissing, FillStrategy, Normalize, TabularPandas, TabDataLoader
from fastai.tabular.model import TabularModel
from fastai.tabular.learner import tabular_learner

Package versions: 
* *Python v. 3.7.7*<br>
* *Pandas v. 1.1.4*<br>
* *PyTorch v. 1.6.0*<br>
* *Fastai v. 2.0.11*

Set seed for reproducable results

In [None]:
seed = 42

# python RNG
import random
random.seed(seed)

# pytorch RNGs
import torch
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

# numpy RNG
import numpy as np
np.random.seed(seed)

Create paths to NSQIP data folder

In [None]:
fluxData_df = pd.read_feather('data/fluxData.feather')
zernikeData_df = pd.read_feather('data/zernikeData.feather')
dataFrame = pd.concat([fluxData_df, zernikeData_df],axis = 1)


In [None]:
zernikeData_df.describe()

In [None]:
splits = RandomSplitter()(range_of(fluxData_df))

In [None]:
y_names =list(zernikeData_df.columns.values)
x_names = list(fluxData_df.columns.values)
norm = Normalize()
to = TabularPandas(dataFrame,norm,  cont_names= x_names, y_names = y_names, splits= splits)
norms = to.procs.normalize

In [27]:
to.y_names

(#9) ['zernikeCoeef1','zernikeCoeef2','zernikeCoeef3','zernikeCoeef4','zernikeCoeef5','zernikeCoeef6','zernikeCoeef7','zernikeCoeef8','zernikeCoeef9']

In [22]:
to.conts.describe()

Unnamed: 0,FluxCoeef1,FluxCoeef2,FluxCoeef3,FluxCoeef4,FluxCoeef5,FluxCoeef6,FluxCoeef7,FluxCoeef8,FluxCoeef9,FluxCoeef10,FluxCoeef11,FluxCoeef12,FluxCoeef13,FluxCoeef14,FluxCoeef15,FluxCoeef16,FluxCoeef17,FluxCoeef18,FluxCoeef19
count,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0,58740.0
mean,-0.000646,-0.001099,-0.000491,0.000792,0.000554,0.001122,0.00191,0.001645,-0.000436,0.003208,-0.003271,-0.001035,-0.001496,0.000856,-0.000353,0.002753,0.002,-0.002226,-0.001174
std,1.00074,1.000308,1.000037,1.000388,1.000174,0.999119,1.000171,1.000778,0.999263,1.001906,0.998667,1.000121,1.000983,1.001988,0.997984,1.001484,1.000311,1.00166,0.997171
min,-3.170261,-2.039658,-1.510622,-2.118599,-2.948699,-2.490017,-1.789847,-2.587601,-1.376331,-1.224014,-2.118872,-2.845668,-1.961091,-1.428087,-1.2408,-1.45421,-1.370395,-2.037752,-1.401945
25%,-0.735536,-0.764998,-0.816176,-0.753933,-0.711789,-0.767949,-0.776244,-0.746119,-0.779701,-0.669968,-0.764225,-0.709663,-0.787686,-0.724918,-0.700702,-0.804573,-0.774073,-0.757406,-0.744749
50%,0.027266,-0.116311,-0.169947,-0.075162,-0.041012,-0.038722,-0.144902,-0.085099,-0.226624,-0.31166,-0.085128,-0.093792,-0.119502,-0.243746,-0.313004,-0.199387,-0.247279,-0.12625,-0.245513
75%,0.779837,0.660905,0.629065,0.662446,0.673908,0.71255,0.644501,0.669253,0.558697,0.332114,0.667285,0.624278,0.682407,0.463252,0.395634,0.607782,0.522053,0.631649,0.48944
max,2.446889,4.019042,4.890652,3.832741,5.047637,3.429444,4.136732,3.896131,5.736265,7.782055,6.025187,5.223766,3.619025,6.431697,6.375229,4.7291,5.257224,5.134041,6.683915


## The dataloader

In [None]:
dls = to.dataloaders()
dls.show_batch()

In [None]:
#dls = TabularDataLoaders.from_df(dataFrame, y_names = y_names)

In [None]:
trn_dl = TabDataLoader(to.train, bs=64, shuffle=True, drop_last=True)
val_dl = TabDataLoader(to.valid, bs=128)

In [None]:
dls.show_batch()

## Define learner

In [None]:
import wandb
wandb.init(project= "FastAiSweepLoop");

In [28]:
learn = tabular_learner(dls ,metrics = rmse, layers = [2000, 1050, 100], y_range= [-.5,.5], wd= 0.1)
learn.summary()

TabularModel (Input shape: 64 x torch.Size([64, 19]))
Layer (type)         Output Shape         Param #    Trainable 
                     64 x 19             
BatchNorm1d                               38         True      
____________________________________________________________________________
                     64 x 2000           
Linear                                    38000      True      
ReLU                                                           
BatchNorm1d                               4000       True      
____________________________________________________________________________
                     64 x 1050           
Linear                                    2100000    True      
ReLU                                                           
BatchNorm1d                               2100       True      
____________________________________________________________________________
                     64 x 100            
Linear                             

In [None]:
# import wandb
# #!wandb login
# wandb.init(project= "FastAiSweep")


In [None]:
 #lr_steep = learn.lr_find()
#printf(F"minimimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [31]:
learn.fit_one_clyle(100)

ModuleAttributeError: 'TabularModel' object has no attribute 'fit_one_clyle'

In [None]:
learn.fit(100)

In [None]:
from fastai.callback.wandb import *
learn.fit( 40, 1e-3, cbs = [WandbCallback(log_dataset = True, log_model=True)])