In [1]:
proj_list = [
    'boringssl_total', 'c-ares_total',
    'freetype2_total', 'guetzli_total',
    'harfbuzz_total', 'libpng_total',
    'libssh_total', 'libxml2_total',
    'pcre_total', 'proj4_total',
    're2_total', 'sqlite3_total',
    'total', 'vorbis_total',
    'woff2_total', 'wpantund_total'
]

In [2]:
target_project = 1

In [3]:
from sklearn.model_selection import train_test_split
import torch

from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

import data
import data_loader as dl
import initializer as init
import trainer
import tester
import predictor
import model_util as mu
import pretrained_model as pm

import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
prefix_np, postfix_np, label_np = data.getSingleProjectData(proj_list, proj_list[target_project])

Getting data for "c-ares_total" from "boringssl_total"
Getting data for "c-ares_total" from "freetype2_total"
Getting data for "c-ares_total" from "guetzli_total"
Getting data for "c-ares_total" from "harfbuzz_total"
Getting data for "c-ares_total" from "libpng_total"
Getting data for "c-ares_total" from "libssh_total"
Getting data for "c-ares_total" from "libxml2_total"
Getting data for "c-ares_total" from "pcre_total"
Getting data for "c-ares_total" from "proj4_total"
Getting data for "c-ares_total" from "re2_total"
Getting data for "c-ares_total" from "sqlite3_total"
Getting data for "c-ares_total" from "vorbis_total"
Getting data for "c-ares_total" from "woff2_total"
Getting data for "c-ares_total" from "wpantund_total"


In [5]:
test_prefix, test_postfix, test_label = data.getTestData(proj_list[target_project])

In [6]:
train_prefix, val_prefix, train_postfix, val_postfix, train_label, val_label = train_test_split(
    prefix_np, postfix_np, label_np, test_size = 0.2, random_state = 43
)

# train_prefix, val_prefix, train_postfix, val_postfix, train_label, val_label = train_test_split(
#     train_prefix, train_postfix, train_label, test_size = 0.1, random_state = 43
# )

In [7]:
train_dataloader, val_dataloader, test_dataloader =\
    dl.data_loader(
        train_prefix, train_postfix,
        val_prefix, val_postfix,
        test_prefix, test_postfix,
        train_label, val_label, test_label,
        batch_size=1000
    )

In [8]:
# PyTorch TensorBoard support
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('../tensorboard/ctp/tests')

In [9]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
Device name: NVIDIA GeForce RTX 3070


In [10]:
# ====================
# set parameters here
# ====================

title = proj_list[target_project]
epochs = 20

embed_dim = 50
max_len, source_code_tokens, token_choices = data.getInfo()
pretrained_token2vec = pm.load_pretrained_model(source_code_tokens, embed_dim)
pretrained_token2vec = torch.tensor(pretrained_token2vec)


input_size = max_len
hidden_size = 50
num_classes = max(token_choices) + 1
rnn_layers = 1

num_filters = [100, 200, 100]
kernel_sizes = [15, 21, 114]

dropout = 0.0

learning_rate = 0.001
weight_decay = 1e-4

model_name = "RNN"
optim_name = "Adam"
loss_fn_name = "CEL"

pretrained_model = pretrained_token2vec
freeze_embedding = False,

In [11]:
trainer.set_seed(42)

model, optimizer, loss_fn = init.initialize_model(
    vocab_size=input_size,
    embed_dim=embed_dim,
    hidden_size=hidden_size,
    num_classes=num_classes,
    rnn_layers=rnn_layers,
    num_filters=num_filters,
    kernel_sizes=kernel_sizes,
    dropout=dropout,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    model_name=model_name,
    optim_name=optim_name,
    loss_fn_name=loss_fn_name,
    pretrained_model=pretrained_model,
    freeze_embedding=freeze_embedding,
    device=device,
)

print(model)

doing with pretrained model!!!
C_rnn(
  (emb): Embedding(213, 50)
  (lstm1): RNN(
    (rnn): LSTM(50, 50, batch_first=True, bidirectional=True)
  )
  (lstm2): RNN(
    (rnn): LSTM(50, 50, batch_first=True, bidirectional=True)
  )
  (fc1): Linear(in_features=200, out_features=300, bias=True)
  (fc2): Linear(in_features=300, out_features=213, bias=True)
  (dp1): Dropout(p=0.0, inplace=False)
  (dp2): Dropout(p=0.0, inplace=False)
)


In [12]:
trainer.train(
    epochs=epochs,
    title=title,
    writer=writer,
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    device=device,
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn
)

Start training...

 Epoch  |  Train Loss  | Train Acc  | Val Loss | Val Acc | Elapsed
--------------------------------------------------------------------------------
   1    |   0.827845   | 76.748887  | 0.472256 | 85.88  | 138.99
   2    |   0.417609   | 87.265795  | 0.380978 | 88.25  | 139.72
   3    |   0.353278   | 88.961929  | 0.333733 | 89.49  | 139.04
   4    |   0.320987   | 89.850449  | 0.316475 | 90.05  | 137.74
   5    |   0.301160   | 90.376064  | 0.295883 | 90.55  | 138.30
   6    |   0.287661   | 90.774658  | 0.281096 | 90.98  | 138.70
   7    |   0.277546   | 91.063335  | 0.271534 | 91.26  | 137.75
   8    |   0.269649   | 91.289692  | 0.275595 | 91.13  | 138.98
   9    |   0.263473   | 91.468333  | 0.264117 | 91.42  | 137.07
  10    |   0.258648   | 91.600195  | 0.266007 | 91.45  | 138.87
  11    |   0.254092   | 91.730808  | 0.256908 | 91.66  | 139.72
  12    |   0.250631   | 91.860445  | 0.250787 | 91.86  | 138.90
  13    |   0.247658   | 91.929285  | 0.247601 | 91.8

In [13]:
mu.saveModel(title, model)

In [14]:
model = mu.getModel(title)
print(model)

C_rnn(
  (emb): Embedding(213, 50)
  (lstm1): RNN(
    (rnn): LSTM(50, 50, batch_first=True, bidirectional=True)
  )
  (lstm2): RNN(
    (rnn): LSTM(50, 50, batch_first=True, bidirectional=True)
  )
  (fc1): Linear(in_features=200, out_features=300, bias=True)
  (fc2): Linear(in_features=300, out_features=213, bias=True)
  (dp1): Dropout(p=0.0, inplace=False)
  (dp2): Dropout(p=0.0, inplace=False)
)


In [15]:
loss, acc = tester.test(test_dataloader=test_dataloader,
                        device=device,
                        model=model,
                        title=title)

test loss:  0.3477874783606365
test acc:  88.55689655172414


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


saved precision and recall results to file!


In [16]:
with open('../result/final', 'a') as f:
    text = title + '\t |\tloss: ' + str(loss) + '\t |\tacc: ' + str(acc) + '\n'
    f.write(text)

In [17]:
mu.graphModel(train_dataloader, model, writer)

uploaded model graph to tensorboard!


In [18]:
# prefix =[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# postfix = [2, 91, 2, 56, 2, 106, 47, 2, 134, 128, 50, 88, 48, 47, 2, 49, 48, 2, 56, 2, 91, 2, 119, 91, 2, 56, 2, 106, 47, 2, 119, 128, 50, 88, 48, 47, 2, 49, 48, 2, 56, 2, 91, 2, 119, 91, 2, 56, 2, 106, 47, 2, 119, 128, 50, 88, 33, 124, 49, 48, 134, 47, 2, 119]
# label_type = 128

In [19]:
# prefix =[0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 119, 2, 47, 134, 48, 49, 124, 33, 88, 50, 128, 119, 2, 47, 106, 2, 56, 2, 91, 119, 2, 91, 2, 56, 2, 48, 49, 2, 47, 48, 88, 50, 128, 119, 2, 47, 106, 2, 56, 2, 91, 119, 2, 91, 2, 56, 2, 48, 49, 2, 47, 48, 88, 50]
# postfix = [48, 2, 56, 2, 106, 91, 2, 56, 2, 91, 2, 56, 2, 106, 47, 2, 134, 50, 88, 48, 47, 2, 49, 48, 2, 56, 2, 106, 91, 2, 56, 2, 91, 2, 56, 2, 106, 47, 2, 134, 128, 50, 88, 48, 47, 2, 49, 48, 2, 56, 2, 106, 91, 2, 56, 2, 91, 2, 56, 2, 106, 47, 2, 134]
# label_type = 128

In [20]:
# predictor.predict(prefix, postfix, model)