**Title**: CVSS prediction\
**Description**: Load all pre-trained models to predict CVSS score\
**Developer**: Teck Lim\
**Create date**: 04/06/2021

# Import packages

In [16]:
import os
import pandas as pd
import json
import matplotlib.pyplot as plt
import numpy as np
import math
from google.colab import drive

!pip install transformers



In [2]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# CVSS Calculator

In [3]:
def round_up(input):
    int_input = round(input * 100000)
    if int_input % 10000 == 0:
        return int_input / 100000.0
    else:
        return (math.floor(int_input / 10000) + 1) / 10.0

def get_av_score(metric):
    if metric == 'network':
        return 0.85
    elif metric == 'adjacent_network':
        return 0.62
    elif metric == 'local':
        return 0.55
    elif metric == 'physical':
        return 0.20
    else:
        raise ValueError('Invalid metric value')

def get_ac_score(metric):
    if metric == 'low':
        return 0.77
    elif metric == 'high':
        return 0.44
    else:
        raise ValueError('Invalid metric value')

def get_pr_score(metric, s):
    if metric == 'none':
        return 0.85
    elif metric == 'low':
        return 0.68 if s == 'changed' else 0.62
    elif metric == 'high':
        return 0.50 if s == 'changed' else 0.27
    else:
        raise ValueError('Invalid metric value')

def get_ui_score(metric):
    if metric == 'none':
        return 0.85
    elif metric == 'required':
        return 0.62
    else:
        raise ValueError('Invalid metric value')

def get_c_score(metric):
    if metric == 'high':
        return 0.56
    elif metric == 'low':
        return 0.22
    elif metric == 'none':
        return 0
    else:
        raise ValueError('Invalid metric value')

def get_i_score(metric):
    if metric == 'high':
        return 0.56
    elif metric == 'low':
        return 0.22
    elif metric == 'none':
        return 0
    else:
        raise ValueError('Invalid metric value')

def get_a_score(metric):
    if metric == 'high':
        return 0.56
    elif metric == 'low':
        return 0.22
    elif metric == 'none':
        return 0
    else:
        raise ValueError('Invalid metric value')

def calculcate_iss(c, i, a):
    return 1 - (1-get_c_score(c)) * (1-get_i_score(i)) * (1-get_a_score(a))

def calculate_impact(s, c, i, a):
    iss = calculcate_iss(c, i, a)
    if s == 'unchanged':
        return 6.42 * iss
    elif s == 'changed':
        return (7.52 * (iss - 0.029)) - (3.25 * (iss - 0.02)**15)
    else:
        raise ValueError('Invalid metric value')

def calculate_exploitability(av, ac, pr, ui, s):
    return 8.22 * get_av_score(av) * get_ac_score(ac) * get_pr_score(pr, s) * get_ui_score(ui)

def calculate_scores(av, ac, pr, ui, s, c, i, a):
    av = av.lower()
    ac = ac.lower()
    pr = pr.lower()
    ui = ui.lower()
    s = s.lower()
    c = c.lower()
    i = i.lower()
    a = a.lower()

    impact = calculate_impact(s, c, i, a)
    exploitability = calculate_exploitability(av, ac, pr, ui, s)
    if impact <= 0:
        base = 0
    if s == 'unchanged':
        base = min((impact + exploitability), 10)
    elif s == 'changed':
        base = min(1.08 * (impact + exploitability), 10)
    return round_up(base), round(impact, 1), round(exploitability, 1)

In [4]:
calculate_scores('Network', 'High', 'Low', 'Required', 'Unchanged', 'Low', 'Low', 'Low')

(4.6, 3.4, 1.2)

## Validation

In [5]:
file_path = './gdrive/Shareddrives/twlim_ucsd_drive/Data/cve_train.csv'
df_train = pd.read_csv(file_path)

In [6]:
for idx, row in df_train.iterrows():
    av = row['attack_vector'].lower()
    ac = row['attack_complexity'].lower()
    pr = row['privileges_required'].lower()
    ui = row['user_interaction'].lower()
    s = row['scope'].lower()
    c = row['confidentiality'].lower()
    i = row['integrity'].lower()
    a = row['availability'].lower()
    base_score = row['base_score']
    exploitability_score = row['exploitability_score']
    impact_score = row['impact_score']

    try:
        cal_base_score, cal_impact_score, cal_exploitability_score = calculate_scores(av, ac, pr, ui, s, c, i, a)
    except Exception as e:
        print('Index: {}, {}'.format(idx, row['cve_id']))
        continue

    if base_score != cal_base_score or exploitability_score != cal_exploitability_score or impact_score != cal_impact_score:
        print('Index: {}, {}'.format(idx, row['cve_id']))
        print('Base score: {}, {}'.format(base_score, cal_base_score))
        print('Exploitability score: {}, {}'.format(exploitability_score, cal_exploitability_score))
        print('Impact score: {}, {}'.format(impact_score, cal_impact_score))
        continue

# Load the pre-trained models

In [7]:
# TODO: Need to load the correct models
av_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/AV'
ac_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/AC'
ui_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/UI'
pr_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/PR'
s_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/SC'
c_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/CI'
i_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/I'
a_output_dir = './gdrive/Shareddrives/twlim_ucsd_drive/Model/A'

In [8]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device('cuda')
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device('cpu')

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [9]:
from transformers import BertForSequenceClassification, BertTokenizer

av_model = BertForSequenceClassification.from_pretrained(av_output_dir, output_hidden_states=True)
av_tokenizer = BertTokenizer.from_pretrained(av_output_dir)
av_model.to(device)

ac_model = BertForSequenceClassification.from_pretrained(ac_output_dir, output_hidden_states=True)
ac_tokenizer = BertTokenizer.from_pretrained(ac_output_dir)
ac_model.to(device)

pr_model = BertForSequenceClassification.from_pretrained(pr_output_dir, output_hidden_states=True)
pr_tokenizer = BertTokenizer.from_pretrained(pr_output_dir)
pr_model.to(device)

ui_model = BertForSequenceClassification.from_pretrained(ui_output_dir, output_hidden_states=True)
ui_tokenizer = BertTokenizer.from_pretrained(ui_output_dir)
ui_model.to(device)

s_model = BertForSequenceClassification.from_pretrained(s_output_dir, output_hidden_states=True)
s_tokenizer = BertTokenizer.from_pretrained(s_output_dir)
s_model.to(device)

c_model = BertForSequenceClassification.from_pretrained(c_output_dir, output_hidden_states=True)
c_tokenizer = BertTokenizer.from_pretrained(c_output_dir)
c_model.to(device)

i_model = BertForSequenceClassification.from_pretrained(i_output_dir, output_hidden_states=True)
i_tokenizer = BertTokenizer.from_pretrained(i_output_dir)
i_model.to(device)

a_model = BertForSequenceClassification.from_pretrained(a_output_dir, output_hidden_states=True)
a_tokenizer = BertTokenizer.from_pretrained(a_output_dir)
a_model.to(device)

print('All models loaded')

All models loaded


In [10]:
import torch
def text_to_embedding(tokenizer, model, max_len, in_text):
    encoded_dict = tokenizer.encode_plus(
                        in_text,                      # Sentence to encode.
                        add_special_tokens = True,    # Add '[CLS]' and '[SEP]'
                        max_length = max_len,         # Pad & truncate all sentences.
                        padding='max_length',
                        # pad_to_max_length = True,
                        truncation=True,
                        return_attention_mask = True, # Construct attn. masks.
                        return_tensors = 'pt',        # Return pytorch tensors.
                    )
    input_ids = encoded_dict['input_ids']
    attn_mask = encoded_dict['attention_mask']

    model.eval()

    input_ids = input_ids.to(device)
    attn_mask = attn_mask.to(device)

    with torch.no_grad():
        result = model(input_ids=input_ids,
                    token_type_ids=None,
                    attention_mask=attn_mask)

    # print(result.hidden_states[12][0][0])
    layer_i = 12
    batch_i = 0
    token_i = 0

    logits = result.logits
    logits = logits.detach().cpu().numpy()

    vec = result.hidden_states[layer_i][batch_i][token_i]
    vec = vec.detach().cpu().numpy()

    return logits, vec

# Predictions

## Single input

In [11]:
import textwrap

def print_custom(text, enabled=True):
    if enabled:
        print(text)

def predict(input_text, enabled=True):
    wrapper = textwrap.TextWrapper(initial_indent='  ', subsequent_indent='  ', width=120)
    print_custom('Embedding: \n\n{}'.format(wrapper.fill(input_text)), enabled)

    print_custom('\nPredictions:\n', enabled)
    logits, vec = text_to_embedding(av_tokenizer, av_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_av = 'Network'
    elif np.argmax(logits, axis=1) == 1:
        predicted_av = 'Adjacent_Network'
    elif np.argmax(logits, axis=1) == 2:
        predicted_av = 'Local'
    else:
        predicted_av = 'Physical'
    print_custom('  AV: {}'.format(predicted_av), enabled)
    # print('\nEmbedding shape:', str(vec.shape))

    logits, vec = text_to_embedding(ac_tokenizer, ac_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_ac = 'Low'
    else:
        predicted_ac = 'High'
    print_custom('  AC: {}'.format(predicted_ac), enabled)

    logits, vec = text_to_embedding(pr_tokenizer, pr_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_pr = 'None'
    elif np.argmax(logits, axis=1) == 1:
        predicted_pr = 'Low'
    else:
        predicted_pr = 'High'
    print_custom('  PR: {}'.format(predicted_pr), enabled)

    logits, vec = text_to_embedding(ui_tokenizer, ui_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_ui = 'None'
    else:
        predicted_ui = 'Required'
    print_custom('  UI: {}'.format(predicted_ui), enabled)

    logits, vec = text_to_embedding(s_tokenizer, s_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_s = 'Unchanged'
    else:
        predicted_s = 'Changed'
    print_custom('  S : {}'.format(predicted_s), enabled)

    logits, vec = text_to_embedding(c_tokenizer, c_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_c = 'None'
    elif np.argmax(logits, axis=1) == 1:
        predicted_c = 'Low'
    else:
        predicted_c = 'High'
    print_custom('  C : {}'.format(predicted_c), enabled)

    logits, vec = text_to_embedding(i_tokenizer, i_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_i = 'None'
    elif np.argmax(logits, axis=1) == 1:
        predicted_i = 'Low'
    else:
        predicted_i = 'High'
    print_custom('  I : {}'.format(predicted_i), enabled)

    logits, vec = text_to_embedding(a_tokenizer, a_model, 512, input_text)
    if np.argmax(logits, axis=1) == 0:
        predicted_a = 'None'
    elif np.argmax(logits, axis=1) == 1:
        predicted_a = 'Low'
    else:
        predicted_a = 'High'
    print_custom('  A : {}'.format(predicted_a), enabled)

    b, i, e = calculate_scores(predicted_av, predicted_ac, predicted_pr, predicted_ui, 
                            predicted_s, predicted_c, predicted_i, predicted_a)
    print_custom('\nCVSS scores:\n', enabled)
    print_custom('  Base score: {}'.format(b), enabled)
    print_custom('  Impact score: {}'.format(i), enabled)
    print_custom('  Exploitability score: {}'.format(e), enabled)
    return b, i, e

In [12]:
input_text_1 = 'Sudo before 1.6.6 contains an off-by-one error that can result in a heap-based buffer overflow that may allow ' \
      'local users to gain root privileges via special characters in the -p (prompt) argument, which are not properly expanded.'
input_text_2 = 'Ubiquiti Networks EdgeSwitch version 1.7.3 and prior suffer from an improperly neutralized element in an OS command ' \
      'due to lack of protection on the admin CLI, leading to code execution and privilege escalation greater than administrators themselves ' \
      'are allowed. An attacker with access to an admin account could escape the restricted CLI and execute arbitrary shell instructions.'
input_text_3 = 'A "javascript:" url loaded by a malicious page can obfuscate its location by blanking the URL displayed in the addressbar, ' \
      'allowing for an attacker to spoof an existing page without the malicious page\'s address being displayed correctly. This vulnerability affects Firefox < 52.'
input_text_4 = "stack over flow that caused by user inserting long text in chrome browser address url bar"

input_text = input_text_4
b, i, e = predict(input_text)

Embedding: 

  stack over flow that caused by user inserting long text in chrome browser address url bar

Predictions:

  AV: Network
  AC: Low
  PR: None
  UI: Required
  S : Unchanged
  C : None
  I : Low
  A : None

CVSS scores:

  Base score: 4.3
  Impact score: 1.4
  Exploitability score: 2.8


## Batch input data 

In [13]:
file_path = './gdrive/Shareddrives/twlim_ucsd_drive/Data/cve_test.csv'
df_test = pd.read_csv(file_path)
df_test = df_test.head(1000)

In [14]:
predicted_b_labels = list()
predicted_i_labels = list()
predicted_e_labels = list()
for idx, row in df_test.iterrows():
    b, i, e = predict(row['description'], False)
    predicted_b_labels.append(b)
    predicted_i_labels.append(i)
    predicted_e_labels.append(e)
    if (idx + 1) % 100 == 0:
        print('Processing index: {}'.format(idx + 1))

Processing index: 100
Processing index: 200
Processing index: 300
Processing index: 400
Processing index: 500
Processing index: 600
Processing index: 700
Processing index: 800
Processing index: 900
Processing index: 1000


In [15]:
from sklearn.metrics import r2_score, mean_squared_error
print('Base score:')
print('  MSE: {:.4f}'.format(mean_squared_error(predicted_b_labels, df_test['base_score'])))
print('  R2 : {:.4f}'.format(r2_score(predicted_b_labels, df_test['base_score'])))
print('Impact score:')
print('  MSE: {:.4f}'.format(mean_squared_error(predicted_i_labels, df_test['impact_score'])))
print('  R2 : {:.4f}'.format(r2_score(predicted_i_labels, df_test['impact_score'])))
print('Exploitability score:')
print('  MSE: {:.4f}'.format(mean_squared_error(predicted_e_labels, df_test['exploitability_score'])))
print('  R2 : {:.4f}'.format(r2_score(predicted_e_labels, df_test['exploitability_score'])))

Base score:
  MSE: 1.3615
  R2 : 0.5025
Impact score:
  MSE: 0.9196
  R2 : 0.5963
Exploitability score:
  MSE: 0.4170
  R2 : 0.4720


In [None]:
df_test['base_score_predicted'] = predicted_b_labels
df_test['exploitability_score_predicted'] = predicted_e_labels
df_test['impact_score_predicted'] = predicted_i_labels

In [None]:
test_results = './gdrive/Shareddrives/twlim_ucsd_drive/Data/cve_test_results_ivan.csv'
df_test.to_csv(test_results)