In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from tqdm import tqdm_notebook as progress_bar

# Nicer plotting
plt.rcParams["font.weight"] = "bold"
plt.rcParams["font.size"] = "18"
plt.rcParams["axes.labelweight"] = "bold"

# Force only P100 GPU
import os
os.environ['CUDA_VISIBLE_DEVICES']="0"

import torch

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Set up local parameters

In [2]:
n_epochs = 200
name = 'Aug_01_75000_2layer'
data = '/data/schreihf/PvFinder/July_31_75000.npz'
output = Path('Aug_01_75000_2layer')
batch = 32
learning_rate = 1e-3

In [3]:
# Make the output directory if it does not exist
output.mkdir(exist_ok=True)

In [4]:
# When you type import X.py,
# Python searches sys.path for a python
# file named X.py to import.
#
# Add the directory with the model
# definitions to the path so we can import it
import sys
sys.path.append('../model')

from collectdata import collect_data
from loss import Loss
from training import trainNet
from models import SimpleCNN2Layer as Model

In [5]:
dataset_train, dataset_val, _ = collect_data(
    data, 55_000, 10_000,
    verbose=True, device=device)

Loading /data/schreihf/PvFinder/July_31_75000.npz
Loaded files in 14.93 s
Samples in Training: 55000 Validation: 10000 Test: 10000
Constructed datasets on device in 4.617 s


In [6]:
model = Model()
loss_fn = Loss()

In [None]:
print("Let's use", torch.cuda.device_count(), "GPUs!")
    
model = model.to(device)
loss_fn = loss_fn.to(device)

Let's use 1 GPUs!


In [None]:
# Make a progress bar
progress = progress_bar(range(n_epochs), dynamic_ncols=True)

# Run the epochs
for results in trainNet(model, dataset_train, dataset_val,
                            loss_fn, batch, progress,
                            learning_rate=learning_rate, verbose=False):
        
    # Pretty print a description
    progress.set_postfix(train=results.cost[-1], val=results.val[-1])

    # Save each model state dictionary
    if output:
        torch.save(model.state_dict(), output / f'{name}_{results.epoch}.pyt')

In [None]:
print("Training finished, took {:.2f}s".format(sum(results.time_epoch))

In [None]:
torch.save(model.state_dict(), output / f'{name}_final.pyt')

In [None]:
fig=plt.figure() 
fig.set_figheight(10)
fig.set_figwidth(15)
plt.plot(np.arange(len(results.cost))+1, results.cost, 'o-',color='r',label='Train')
plt.plot(np.arange(len(results.val))+1, results.val, 'o-' , color='b', label='Validation')
plt.xlabel('Number of epoch', weight='bold', size= 20)
plt.ylabel('Average cost per bin of a batch',  weight='bold', size= 20)
plt.yscale('log') 
plt.tick_params('y', colors = 'k',labelsize=16 )
plt.tick_params('x', colors = 'k',labelsize=16 )
plt.legend()
fig.savefig(name + '.png')