In [23]:
%env WANDB_SILENT=True

env: WANDB_SILENT=True


In [24]:
import wandb
from fastai.callback.wandb import *
from fastai.tabular.all import *
from munch import Munch

In [25]:
config = Munch({
    "test_size": 0.25,
    "batch_size": 2056,
    "layers": [256, 128, 64],
    "loss_func": "cross_entropy",
    "epoch": 64,
})

In [26]:
# start logging a wandb run
wandb.init(project='DrivenData__Richtor_predictor', name="weighted_cross_entropy__3_layers__without_cyclic_rate",config=config, save_code=True, reinit=True)

In [27]:
root_path = Path("./data")

In [28]:
data_df = pd.read_csv(root_path / "train_values.csv", index_col=0)
label_df = pd.read_csv(root_path / "train_labels.csv", index_col=0)

df = pd.merge(data_df, label_df, left_index=True, right_index=True)

In [29]:
# TODO: df.corr()

In [30]:
cont_names = """geo_level_1_id
geo_level_2_id
geo_level_3_id
count_floors_pre_eq
age
area_percentage
height_percentage
count_families""".split("\n")

cat_names = list(filter(lambda x: x not in [*cont_names, "damage_grade"], df.columns))

procs = [Categorify, FillMissing, Normalize]

y_names = "damage_grade"
y_block = CategoryBlock

splits = TrainTestSplitter(test_size=config.test_size, random_state=42, stratify=df.damage_grade)(range_of(df))

In [31]:
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names, y_names=y_names, splits=splits, y_block=y_block)

In [32]:
dls = to.dataloaders(bs=config.batch_size)

In [33]:
metrics = F1Score(average="micro")

weight = torch.from_numpy(df.damage_grade.value_counts(normalize=True).values).float()

loss_func = FocalLossFlat(weight=weight)

if config.loss_func == "cross_entropy":
    loss_func = CrossEntropyLossFlat(weight=weight)

cbs=[EarlyStoppingCallback(monitor='valid_loss', min_delta=1e-4, patience=2), SaveModelCallback(), WandbCallback()]

learn = tabular_learner(dls, layers=config.layers, opt_func=Adam, loss_func=loss_func, metrics=metrics, cbs=cbs)

In [35]:
suggest = learn.lr_find(show_plot=False);
lr = suggest.valley
wandb.config.lr = lr

In [36]:
learn.fit(config.epoch, lr=lr)

epoch,train_loss,valid_loss,f1_score,time
0,0.802982,0.683452,0.597827,00:02
1,0.667435,0.639721,0.599101,00:02
2,0.634261,0.620524,0.627496,00:02
3,0.618504,0.615964,0.629614,00:02
4,0.607332,0.606347,0.638455,00:02
5,0.598518,0.602571,0.6292,00:02
6,0.592484,0.601743,0.636245,00:02
7,0.586784,0.594679,0.636675,00:02
8,0.579432,0.594925,0.641402,00:02
9,0.572124,0.58753,0.641387,00:02


Better model found at epoch 0 with valid_loss value: 0.6834521889686584.
Better model found at epoch 1 with valid_loss value: 0.6397207379341125.
Better model found at epoch 2 with valid_loss value: 0.6205243468284607.
Better model found at epoch 3 with valid_loss value: 0.6159644722938538.
Better model found at epoch 4 with valid_loss value: 0.6063470840454102.
Better model found at epoch 5 with valid_loss value: 0.6025705933570862.
Better model found at epoch 6 with valid_loss value: 0.6017429828643799.
Better model found at epoch 7 with valid_loss value: 0.5946789979934692.
Better model found at epoch 9 with valid_loss value: 0.5875301957130432.
Better model found at epoch 10 with valid_loss value: 0.5852291584014893.
No improvement since epoch 10: early stopping
