In [1]:
import os, pathlib, shutil
from collections import Counter
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import v2
import torch.nn as nn
import numpy as np
import torch
from torch.profiler import profile, record_function, ProfilerActivity
import train
from torchvision.transforms import InterpolationMode

src = ""
if os.name == "nt":
    src = "D:/599DL4VProject/the_wildfire_dataset"
elif os.name == "posix":
    src = "/home/asromelo/Desktop/Projects/599_proj/the_wildfire_dataset/"

wf1TrainPath = pathlib.Path(src + '/train').resolve()
wf1ValidPath = pathlib.Path(src + '/valid').resolve()
wf1TestPath = pathlib.Path(src + '/test').resolve()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cuda = True if torch.cuda.is_available() else False



Random Idea: What if we train and evaluate using batch of very small random crops (64x64) for each image then adjust loss via bayesian or expected value? Actual set of feature needed to classify a sample can be surprisingly small.

In [13]:
p = 0.3
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

tfset = v2.Compose([
        v2.ToImageTensor(),
        v2.ConvertImageDtype(torch.uint8),
        v2.Resize(size=1080, max_size=2160, interpolation=InterpolationMode.BILINEAR, antialias=True),
        v2.RandomCrop(size=512, pad_if_needed=True),
        v2.RandomChoice(transforms=[
            v2.RandomInvert(p),
            v2.ColorJitter(p,p,p,p),
            v2.RandomEqualize(p),
            v2.RandomHorizontalFlip(p),
        ], p=[0.2,0.2,0.2,0.2]),
        v2.Resize(size=128, antialias=False),
        v2.ConvertImageDtype(dtype=torch.float),
        #v2.Normalize(mean, std)
])

tfset.to(device)

testset = v2.Compose([
    v2.ToImageTensor(),
    v2.ConvertImageDtype(torch.uint8),
    v2.Resize(size=1080, max_size=2160, interpolation=InterpolationMode.BILINEAR, antialias=True),
    v2.CenterCrop(size=512),
    v2.Resize(size=128, antialias=False),
    v2.ConvertImageDtype(dtype=torch.float)
]).to(device)

wf1Train = train.ForestFireDataset(root=str(wf1TrainPath), transform=tfset)
wf1Valid = train.ForestFireDataset(root=str(wf1ValidPath), transform=testset)
wf1Test = train.ForestFireDataset(root=str(wf1TestPath), transform=testset)

wf1fire = Subset(wf1Test, [i for i, label in enumerate(wf1Test.targets) if label == 1])
wf1nofire = Subset(wf1Train, [i for i, label in enumerate(wf1Train.targets) if label == 0])

batch_size = 32
wf1TrLoader = DataLoader(dataset=wf1nofire, batch_size=batch_size, shuffle=True, num_workers=2)
wf1VaLoader = DataLoader(dataset=wf1Valid, batch_size=batch_size, num_workers=2)
wf1TsLoader = DataLoader(dataset=wf1Test, batch_size=batch_size, num_workers=2)

In [None]:
epochs = 10
trainer = train.GANProject(img_size=128, debug=False, small=True, activation=nn.LeakyReLU(0.2))

In [3]:
# Analysis
print("Training Set: ", Counter(wf1Train.targets))
print("Valid Set: ", Counter(wf1Valid.targets))
print("Test Set: ", Counter(wf1Test.targets))

acts = []
for name, module in trainer.named_modules():
    if name == 'classifier' or name == 'features':
        continue
    module.register_forward_hook(lambda m, input, output: acts.append(output[0].detach()))

X, y_true = next(iter(wf1TrLoader))
a,b,c = trainer.singleton(X)

model_param_size = sum([p.nelement() for p in trainer.parameters()])
grad_size = model_param_size
print(model_param_size)
batch_mem = batch_size * 3 * 128 * 128
opt_size = sum([p.nelement() for p in trainer.opt_Gen.param_groups[0]['params']]) + sum([p.nelement() for p in trainer.opt_Dsc.param_groups[0]['params']]) + sum([p.nelement() for p in trainer.opt_Enc.param_groups[0]['params']])
act_size = sum([a.nelement() for a in acts])

total_elements = model_param_size + grad_size + batch_mem + opt_size + act_size
conv_mb = total_elements * 4 / 1024**2
print(conv_mb)

Training Set:  Counter({1: 1157, 0: 730})
Valid Set:  Counter({1: 246, 0: 156})
Test Set:  Counter({1: 251, 0: 159})
12992900
451.15754318237305


In [3]:
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True) as prof:
    with record_function("model_training"):
        trainer.train_model(trainLoader=wf1TrLoader, validLoader=wf1VaLoader, metric='loss', epochs=epochs)

print(prof.key_averages().table(row_limit=30))

Epoch 0 Time: 399.27s
Train:  {'rec_loss': 0.08473903185700717, 'd_loss': -0.552120010166952, 'ec_loss': 0, 'feature_loss': 1324.9366438356165}
Validation:  {'rec_loss': 0.05728999655045087, 'd_loss': -0.05836157063346597, 'ec_loss': 21873.116915422885, 'accuracy': 0.0009653226504871502}
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ----

In [3]:
if device == 'cuda':
    torch.backends.cudnn.benchmark = True
else:
    torch.jit.onednn_fusion_enabled = True
trainer.train_model(trainLoader=wf1TrLoader, validLoader=wf1VaLoader, metric='loss', epochs=epochs)

Epoch 0 Time: 410.34s
Train:  {'rec_loss': 0.08689464412323417, 'd_loss': -0.0663987930506876, 'ec_loss': 0, 'feature_loss': 1008.8556506849316}
Validation:  {'rec_loss': 0.053801422688498426, 'd_loss': 0.045036799872099464, 'ec_loss': 8979.912935323384, 'accuracy': 0.0009653226504871502}
Epoch 1 Time: 413.67s
Train:  {'rec_loss': 0.024357510919440283, 'd_loss': -0.15707351475545805, 'ec_loss': 0, 'feature_loss': 13502.598630136987}
Validation:  {'rec_loss': 0.015552279961049853, 'd_loss': -0.012041884275218148, 'ec_loss': 10416.596393034826, 'accuracy': 0.0009653226504871502}
Epoch 2 Time: 396.51s
Train:  {'rec_loss': 0.009450414082775378, 'd_loss': -0.013519710384003104, 'ec_loss': 0, 'feature_loss': 20635.57397260274}
Validation:  {'rec_loss': 0.010691284540280774, 'd_loss': 1.02629709006542e-05, 'ec_loss': 17508.57338308458, 'accuracy': 0.0009653226504871502}
Epoch 3 Time: 396.28s
Train:  {'rec_loss': 0.006036439007275725, 'd_loss': -5.12942844006705e-06, 'ec_loss': 0, 'feature_los

In [5]:
test_result = trainer.evaluate(wf1TsLoader)
print(test_result)



{'rec_loss': 0.0023254228801262086, 'd_loss': 3.619270367979458e-07, 'ec_loss': 23309.519512195122, 'accuracy': 0.0009458655264319443}


# For usage with pre-trained models

In [3]:
t_epoch = 9
gen_path = pathlib.Path(f'genproject_e{t_epoch}.pt').resolve()
dsc_path = pathlib.Path(f'dscproject_e{t_epoch}.pt').resolve()
brc_path = pathlib.Path(f'brcproject_e{t_epoch}.pt').resolve()
gen = torch.load(gen_path, map_location=device)
dsc = torch.load(dsc_path, map_location=device)
brc = torch.load(brc_path, map_location=device)
pretrain = train.GANProject(load_unet=gen, load_cnn=dsc, load_branch=brc, img_size=128, debug=False)

In [None]:
wf1FireLoader = DataLoader(dataset=wf1fire, batch_size=batch_size)
test_result = pretrain.evaluate(wf1TsLoader)
print(test_result)