# Baseline solution

In [None]:
import os
import pandas as pd

import wandb
from fastai.vision.all import *

PROJECT_NAME = 'lemon-test1'
ENTITY = 'wandb_course'
PROCESSED_DATA_AT = 'lemon_dataset_split_data'

In [None]:
run = wandb.init(project=PROJECT_NAME, entity=ENTITY, job_type="training")

In [None]:
# find the most recent ("latest") version of the processed data
processed_data_at = run.use_artifact(f'{PROCESSED_DATA_AT}:latest')

# Download the dataset
processed_dataset_dir = processed_data_at.download()

In [None]:
df = pd.read_csv(os.path.join(processed_dataset_dir, 'data_split.csv'))

In [None]:
# we will not use the hold out dataset stage at this moment
df = df[df.stage != 'test'].reset_index(drop=True)

# this will tell our trainer how we want to split data between training and validation
df['valid'] = df.stage == 'valid'

# let's check it visually
df.head()

In [None]:
path = Path(processed_dataset_dir)

In [None]:
from ml_collections import config_dict

cfg = config_dict.ConfigDict()
cfg.img_size = 256
cfg.target_column = 'mold'
cfg.bs = 32
cfg.seed = 42
cfg.arch = 'resnet18'

In [None]:
wandb.config.update(cfg)

In [None]:
dls = ImageDataLoaders.from_df(df, path=path, seed=cfg.seed, fn_col='file_name', 
                               label_col=cfg.target_column, valid_col='valid', 
                               item_tfms=Resize(cfg.img_size), bs=cfg.bs)

In [None]:
dls.show_batch()

In [None]:
df[df.valid == True]['mold'].value_counts()

In [None]:
# baseline accuracy
df[df.valid == True]['mold'].value_counts()[0] / len(df[df.valid == True])

In [None]:
from fastai.callback.wandb import *

In [None]:
learn = vision_learner(dls, 
                       cfg.arch,
                       metrics=[accuracy, Precision(), Recall(), F1Score()],
                       cbs=[WandbCallback(log_preds=False, log_model=True), SaveModelCallback(monitor='f1_score')])

learn.fine_tune(2)

In [None]:
inp,preds,targs,out = learn.get_preds(with_input=True, with_decoded=True)

In [None]:
inp.shape, preds.shape, targs.shape, out.shape

In [None]:
imgs = [wandb.Image(t.permute(1,2,0)) for t in inp]
pred_proba = preds[:,1].numpy().tolist()
targets = targs.numpy().tolist()
predictions = out.numpy().tolist()

In [None]:
df = pd.DataFrame(list(zip(imgs, pred_proba, predictions, targets)),
               columns =['image', 'probability', 'prediction', 'target'])

In [None]:
run.log({'predictions_table': wandb.Table(dataframe=df)})

In [None]:
run.finish()