In [1]:
print("Hello")
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("GPU is available!")
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("GPU is not available.")

Hello
GPU is available!
GPU Name: NVIDIA GeForce RTX 4090


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [3]:
# !pip install -r requirements.txt

# !pip install -q -U torch --index-url https://download.pytorch.org/whl/cu117
# !pip install -q -U transformers=="4.39.1"
# !pip install -q accelerate=="0.28.0"
# !pip install -q -i https://pypi.org/simple/ bitsandbytes=="0.43.0"
# !pip install -q -U datasets=="2.17.0"
# !pip install -q -U git+https://github.com/huggingface/trl
# !pip install -q -U git+https://github.com/huggingface/peft

# !pip install torch==2.1.2 tensorboard
# !pip install --upgrade transformers==4.38.2
# !pip install datasets==2.16.1
# !pip install accelerate==0.26.1
# !pip install evaluate==0.4.1
# !pip install bitsandbytes==0.42.0
# !pip install trl==0.7.11
# !pip install peft==0.8.2

In [4]:
# !pip install transformers==4.40


In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
import bitsandbytes as bnb

from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [7]:
print(f"pytorch version {torch.__version__}")

pytorch version 2.3.1+cu121


# 1 Load Dataset

In [8]:
filename = "./data/all-data.csv"

df = pd.read_csv(filename, 
                 names=["sentiment", "text"],
                 encoding="utf-8", encoding_errors="replace")
df.shape

(4846, 2)

In [9]:
df.sentiment.value_counts()

sentiment
neutral     2879
positive    1363
negative     604
Name: count, dtype: int64

In [10]:
pd.set_option('display.max_colwidth', 200)
df.head()

Unnamed: 0,sentiment,text
0,neutral,"According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing ."
1,neutral,"Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said ."
2,negative,The international electronic industry company Elcoteq has laid off tens of employees from its Tallinn facility ; contrary to earlier layoffs the company contracted the ranks of its office workers ...
3,positive,With the new production plant the company would increase its capacity to meet the expected increase in demand and would improve the use of raw materials and therefore increase the production profi...
4,positive,"According to the company 's updated strategy for the years 2009-2012 , Basware targets a long-term net sales growth in the range of 20 % -40 % with an operating profit margin of 10 % -20 % of net ..."


# 2 Split into training, test and eval data set

In [11]:
from main.feature_engineer import DataFrameSplitter


splitter = DataFrameSplitter(df, train_size=300)

# Perform the split to obtain train, test, and eval sets
X_train_df, X_test_df, X_eval_df = splitter.split()
X_test_df.to_csv("data/X_test_df.csv", index=False)


# Print shapes and target column breakup of the train data
print(f'Shape of train, test, and eval data are {X_train_df.shape}, {X_test_df.shape}, {X_eval_df.shape}')
print(f'\nBreak up by target column in train data is\n{X_train_df.sentiment.value_counts()}')
    

Shape of train, test, and eval data are (900, 2), (900, 2), (150, 2)

Break up by target column in train data is
sentiment
neutral     300
positive    300
negative    300
Name: count, dtype: int64


# 3 Prompt Engineering

In [12]:
from main.prompt_engineer import PromptGenerator

# Assuming 'X_train_df', 'X_eval_df', and 'X_test_df' are your DataFrames containing text (and sentiment for 'X_train_df' and 'X_eval_df')
prompt_generator = PromptGenerator()

# Generate training and validation prompts
X_train_df = prompt_generator.generate_dataframe_prompts(X_train_df, prompt_type='train')
X_eval_df = prompt_generator.generate_dataframe_prompts(X_eval_df, prompt_type='train')




# Generate test prompts
y_true = X_test_df.sentiment
X_test_df = prompt_generator.generate_dataframe_prompts(X_test_df, prompt_type='test')


In [13]:
#convert pandas dataframe to Huggingface dataset
train_data = Dataset.from_pandas(X_train_df)
eval_data = Dataset.from_pandas(X_eval_df)

# 4 Load Base Model

In [14]:
from main.model_base import QuantizedBaseModelInitializer

# model_name = "MaziyarPanahi/Mistral-7B-Instruct-v0.2"
#model_name = "NousResearch/Llama-2-7b-hf"'
# model_name = "mhenrichsen/gemma-7b"
# model_name = "microsoft/phi-2"
# model_name = "microsoft/Phi-3-mini-4k-instruct"
model_name = "NousResearch/Meta-Llama-3-8B-Instruct"
# Create an instance of QuantizedBaseModelInitializer
initializer = QuantizedBaseModelInitializer(model_name)

# Initialize the model and tokenizer with quantization
model, tokenizer = initializer.initialize()

config.json:   0%|          | 0.00/904 [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_phi3.py:   0%|          | 0.00/73.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


model.safetensors.index.json:   0%|          | 0.00/16.3k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/568 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# 5 Evaluate Base Model Performance

In [15]:
from main.predict import ModelPredictor
from main.evaluation import ModelEvaluator



predictor = ModelPredictor(model, tokenizer)
y_pred = predictor.predict(X_test_df)


evaluator = ModelEvaluator()
evaluator.evaluate(y_true, y_pred)

Predicting:   0%|          | 0/900 [00:00<?, ?it/s]

You are not running the flash-attention implementation, expect numerical differences.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Accuracy: 0.349
Accuracy for label 0: 0.050
Accuracy for label 1: 0.963
Accuracy for label 2: 0.033

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.05      0.09       300
           1       0.33      0.96      0.50       300
           2       0.59      0.03      0.06       300

    accuracy                           0.35       900
   macro avg       0.57      0.35      0.22       900
weighted avg       0.57      0.35      0.22       900


Confusion Matrix:
[[ 15 285   0]
 [  4 289   7]
 [  0 290  10]]


# 7 Fine-tune base model on given dataset

In [16]:
from main.train_with_fine_tuning import PEFTModelTrainer
trainer = PEFTModelTrainer(model, tokenizer, train_data, eval_data)

# Start the training process
trainer.train_model()

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/150 [00:00<?, ? examples/s]


Training started at 2024-06-06 10:08:55.646846-04:00


Epoch,Training Loss,Validation Loss
0,0.8891,0.856143
2,0.6313,0.801291


Training completed at 2024-06-06 10:17:28.339277-04:00
Training duration was 0:08:32.692431


# 8 Evaluate Fine-Tune Model Performance

In [17]:

predictor = ModelPredictor(model, tokenizer)
y_pred = predictor.predict(X_test_df)


evaluator = ModelEvaluator()
evaluator.evaluate(y_true, y_pred)

Predicting:   0%|          | 0/900 [00:00<?, ?it/s]

Accuracy: 0.872
Accuracy for label 0: 0.933
Accuracy for label 1: 0.873
Accuracy for label 2: 0.810

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.93      0.95       300
           1       0.79      0.87      0.83       300
           2       0.88      0.81      0.85       300

    accuracy                           0.87       900
   macro avg       0.88      0.87      0.87       900
weighted avg       0.88      0.87      0.87       900


Confusion Matrix:
[[280  17   3]
 [  9 262  29]
 [  3  54 243]]


# 9 Predict with Fine-Tune Model