In [1]:
proj_list = [
    'boringssl_total', 'c-ares_total',
    'freetype2_total', 'guetzli_total',
    'harfbuzz_total', 'libpng_total',
    'libssh_total', 'libxml2_total',
    'pcre_total', 'proj4_total',
    're2_total', 'sqlite3_total',
    'total', 'vorbis_total',
    'woff2_total', 'wpantund_total'
]

In [2]:
from sklearn.model_selection import train_test_split
import torch

from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

import data
import data_loader as dl
import initializer as init
import trainer
import tester
import model_util as mu



In [3]:
max_len, source_code_tokens, token_choices = data.getInfo()

In [4]:
prefix_np, postfix_np, label_np = data.getSingleProjectData(proj_list, proj_list[0])

Getting data for "boringssl_total" from "c-ares_total"
Getting data for "boringssl_total" from "freetype2_total"
Getting data for "boringssl_total" from "guetzli_total"
Getting data for "boringssl_total" from "harfbuzz_total"
Getting data for "boringssl_total" from "libpng_total"
Getting data for "boringssl_total" from "libssh_total"
Getting data for "boringssl_total" from "libxml2_total"
Getting data for "boringssl_total" from "pcre_total"
Getting data for "boringssl_total" from "proj4_total"
Getting data for "boringssl_total" from "re2_total"
Getting data for "boringssl_total" from "sqlite3_total"
Getting data for "boringssl_total" from "vorbis_total"
Getting data for "boringssl_total" from "woff2_total"
Getting data for "boringssl_total" from "wpantund_total"


In [5]:
train_prefix, test_prefix, train_postfix, test_postfix, train_label, test_label = train_test_split(
    prefix_np, postfix_np, label_np, test_size = 0.1, random_state = 43
)

train_prefix, val_prefix, train_postfix, val_postfix, train_label, val_label = train_test_split(
    train_prefix, train_postfix, train_label, test_size = 0.1, random_state = 43
)

In [6]:
train_dataloader, val_dataloader, test_dataloader =\
    dl.data_loader(
        train_prefix, train_postfix,
        val_prefix, val_postfix,
        test_prefix, test_postfix,
        train_label, val_label, test_label
    )

In [7]:
# PyTorch TensorBoard support
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('../tensorboard/dev/tests')

In [8]:
if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
Device name: NVIDIA GeForce RTX 3070


In [9]:
# ====================
# set parameters here
# ====================

title = 'dev-test1'
epochs = 20

input_size = max_len
embed_dim = 200
hidden_size = 100
num_classes = len(token_choices)
rnn_layers = 1

num_filters = [100, 200, 100]
kernel_sizes = [15, 21, 114]

dropout = 0.0

learning_rate = 0.001
weight_decay = 1e-4

model_name = "RNN"
optim_name = "Adam"
loss_fn_name = "CEL"

pretrained_model = None
freeze_embedding = False,

In [None]:
trainer.set_seed(42)

model, optimizer, loss_fn = init.initialize_model(
    vocab_size=input_size,
    embed_dim=embed_dim,
    hidden_size=hidden_size,
    num_classes=num_classes,
    rnn_layers=rnn_layers,
    num_filters=num_filters,
    kernel_sizes=kernel_sizes,
    dropout=dropout,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    model_name=model_name,
    optim_name=optim_name,
    loss_fn_name=loss_fn_name,
    pretrained_model=pretrained_model,
    freeze_embedding=freeze_embedding,
    device=device,
)

print(model)

In [None]:
trainer.train(
    epochs=epochs,
    title=title,
    writer=writer,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    device=device,
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn
)

In [None]:
mu.saveModel(title, model)

In [None]:
# model = mu.getModel('cnn-test10')
# print(model)

In [None]:
tester.test(test_dataloader=test_dataloader,
            device=device,
            model=model,
            title=title)

In [None]:
mu.graphModel(train_dataloader, model, writer)