In [52]:
!pip install datasets accelerate peft bitsandbytes transformers trl



In [53]:
!pip install safetensors



In [54]:
import os
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

In [55]:
import pandas as pd

In [56]:
df_final = pd.read_csv('llama-2-cpacket-data.csv')

In [57]:
df_final.head()

Unnamed: 0,text
0,<s>[INST] The network flow has total header by...
1,<s>[INST] The network flow has total header by...
2,<s>[INST] The network flow has total header by...
3,<s>[INST] The network flow has total header by...
4,<s>[INST] The network flow has total header by...


In [58]:
dataset = Dataset.from_pandas(df_final)

In [59]:
dataset = dataset.shuffle(seed=42)

In [60]:
train_test_split = dataset.train_test_split(test_size=0.2, seed=42)  # 80% train, 20% validation
train_dataset = train_test_split['train']
val_dataset = train_test_split['test']

In [61]:
# Import necessary libraries
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
from datasets import load_dataset

In [62]:


# Load your validation dataset
#val_dataset = load_dataset('your_val_dataset')  # Replace with your actual dataset

# Preprocess the val_dataset for Post-Training Evaluation

# Function to split the prompt and expected response
def preprocess_data(example):
    prompt = example['text'].split("[/INST]")[0] + "[/INST]"  # Prompt
    expected_response = example['text'].split("[/INST]")[1].strip()  # Expected response

    return {
        'prompt': prompt,
        'expected_response': expected_response
    }

In [63]:
# Apply preprocessing to your validation dataset
val_dataset = val_dataset.map(preprocess_data)

Map:   0%|          | 0/7500 [00:00<?, ? examples/s]

In [64]:
# Set compute dtype
compute_dtype = getattr(torch, "float16")

# Quantization configuration (same as during training)
from transformers import BitsAndBytesConfig
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [65]:
print(torch.cuda.is_available())  # This should return True if GPU is available
print(torch.cuda.device_count())

True
1


In [66]:
base_model = "NousResearch/Llama-2-7b-chat-hf"

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0},
    torch_dtype=compute_dtype
)

# Disable cache if necessary
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [67]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [74]:
# Load the base model with quantization


# Load the LoRA adapter weights (your fine-tuned model)
adapter_path = 'model/'  # Replace with the path to your adapter files
model = PeftModel.from_pretrained(model, adapter_path)

In [75]:
# Load the tokenizer


In [76]:
# Initialize the text generation pipeline with your fine-tuned model
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=500,
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausal

In [77]:
print(val_dataset.select(range(20)))

Dataset({
    features: ['text', 'prompt', 'expected_response'],
    num_rows: 20
})


In [82]:
# Generate responses for the validation set with tqdm progress bar
generated_labels = []

# tqdm wrapper for progress tracking
for example in tqdm(val_dataset.select(range(20)), desc="Generating responses"):
    prompt = example['prompt']  # Use the preprocessed prompt
    result = pipe(f"<s>{prompt}")[0]['generated_text']

    # Extract the generated response (remove the prompt part)
    generated_response = result.split("[/INST]")[1].strip()

    # Extract the label (Attack, Benign, Suspicious) from the generated response
    if "Attack" in generated_response:
        generated_labels.append("Attack")
    elif "Benign" in generated_response:
        generated_labels.append("Benign")
    elif "Suspicious" in generated_response:
        generated_labels.append("Suspicious")
    else:
        generated_labels.append("Unknown")  # Handle cases where no label is found


Generating responses: 100%|██████████| 20/20 [06:03<00:00, 18.18s/it]


In [83]:
print(generated_labels)

['Attack', 'Attack', 'Attack', 'Suspicious', 'Suspicious', 'Benign', 'Suspicious', 'Benign', 'Suspicious', 'Attack', 'Attack', 'Suspicious', 'Benign', 'Suspicious', 'Suspicious', 'Attack', 'Benign', 'Benign', 'Benign', 'Suspicious']


In [84]:
# Extract true labels with tqdm progress bar
true_labels = []

# tqdm wrapper for progress tracking
for example in tqdm(val_dataset.select(range(20)), desc="Extracting true labels"):
    expected_response = example['expected_response']  # True response

    # Extract the label (Attack, Benign, Suspicious) from the expected response
    if "Attack" in expected_response:
        true_labels.append("Attack")
    elif "Benign" in expected_response:
        true_labels.append("Benign")
    elif "Suspicious" in expected_response:
        true_labels.append("Suspicious")
    else:
        true_labels.append("Unknown")  # Handle cases where no label is found
print(true_labels)

Extracting true labels: 100%|██████████| 20/20 [00:00<00:00, 7880.33it/s]

['Suspicious', 'Attack', 'Attack', 'Attack', 'Attack', 'Benign', 'Suspicious', 'Benign', 'Attack', 'Attack', 'Attack', 'Suspicious', 'Benign', 'Suspicious', 'Benign', 'Attack', 'Suspicious', 'Benign', 'Benign', 'Suspicious']





In [85]:
# Compare the extracted labels and compute accuracy
from sklearn.metrics import accuracy_score, classification_report

# Calculate accuracy
accuracy = accuracy_score(true_labels, generated_labels)
print(f"Accuracy: {accuracy}")

# Optionally, print a more detailed classification report
print(classification_report(true_labels, generated_labels, labels=["Attack", "Benign", "Suspicious"]))

Accuracy: 0.7
              precision    recall  f1-score   support

      Attack       0.83      0.62      0.71         8
      Benign       0.83      0.83      0.83         6
  Suspicious       0.50      0.67      0.57         6

    accuracy                           0.70        20
   macro avg       0.72      0.71      0.71        20
weighted avg       0.73      0.70      0.71        20

