In [None]:
!pip install -q accelerate>=0.21.0 -U
!pip install -q transformers[torch]
!pip install datasets

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[?25l     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/547.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m542.7/547.8 kB[0m [31m17.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m547.8/547.8 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m40.8/40.8 MB[0m [31m16.7 MB/s[0m eta [36m0:00:

### pre-eliminary test

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define the labels
labels = ["Text-To-Text", "Image-To-Text", "Text-To-Audio", "Audio-To-Text", "Text-To-Image"]

# Load the model and tokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels))
model.to(device)  # Move the model to the appropriate device

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Function to train the model
def train_model(train_texts, train_labels):
    dataset = CustomDataset(train_texts, train_labels, tokenizer)

    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
    )

    trainer.train()

# Training data
train_texts = [
    "Summarize this article in 100 words.",
    "Describe the main elements in this photograph.",
    "Convert this paragraph into spoken words.",
    "Transcribe the speech in this audio file.",
    "Create an image of a sunset over a mountain range."
]
train_labels = [0, 1, 2, 3, 4]  # Corresponding to the order in 'labels'

# Train the model
train_model(train_texts, train_labels)

# Function to classify prompts
def classify_prompt(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
    # Move inputs to the same device as the model
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    probs = outputs.logits.softmax(dim=-1)
    predicted_label = labels[probs.argmax().item()]
    return predicted_label

# Example usage
prompts_to_classify = [
    "Translate this sentence to French.",
    "What objects can you see in this image?",
    "Convert this text to speech with a British accent.",
    "Transcribe the conversation in this audio file.",
    "Generate an image of a futuristic cityscape."
]

for prompt in prompts_to_classify:
    print(f"Prompt: {prompt}")
    print(f"Classification: {classify_prompt(prompt)}\n")

Using device: cuda


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss


Prompt: Translate this sentence to French.
Classification: Text-To-Text

Prompt: What objects can you see in this image?
Classification: Text-To-Text

Prompt: Convert this text to speech with a British accent.
Classification: Text-To-Text

Prompt: Transcribe the conversation in this audio file.
Classification: Text-To-Text

Prompt: Generate an image of a futuristic cityscape.
Classification: Text-To-Text



In [None]:
import json
json_dataset = [
  {"prompt": "Summarize the main points of this article in 3 sentences.", "classification": "Text-To-Text"},
  {"prompt": "Translate the following paragraph from English to Spanish.", "classification": "Text-To-Text"},
  {"prompt": "Rewrite this sentence to make it more formal.", "classification": "Text-To-Text"},
  {"prompt": "Describe the key elements visible in this photograph.", "classification": "Image-To-Text"},
  {"prompt": "What emotions are conveyed by the facial expressions in this image?", "classification": "Image-To-Text"},
  {"prompt": "List the objects present in this still life painting.", "classification": "Image-To-Text"},
  {"prompt": "Convert this text into an audio file with a calm, soothing voice.", "classification": "Text-To-Audio"},
  {"prompt": "Generate speech from this written dialogue, using different voices for each character.", "classification": "Text-To-Audio"},
  {"prompt": "Create a text-to-speech version of this poem with appropriate pauses and intonation.", "classification": "Text-To-Audio"},
  {"prompt": "Transcribe the conversation in this audio recording.", "classification": "Audio-To-Text"},
  {"prompt": "Convert this podcast episode into a written transcript.", "classification": "Audio-To-Text"},
  {"prompt": "What is being said in this voice message?", "classification": "Audio-To-Text"},
  {"prompt": "Generate an image of a serene lake surrounded by pine trees at sunset.", "classification": "Text-To-Image"},
  {"prompt": "Create a digital illustration of a futuristic cityscape with flying cars.", "classification": "Text-To-Image"},
  {"prompt": "Produce a realistic image of a golden retriever playing in a park.", "classification": "Text-To-Image"},
  {"prompt": "Paraphrase this paragraph to make it easier to understand.", "classification": "Text-To-Text"},
  {"prompt": "Generate a concise summary of the key points in this research paper.", "classification": "Text-To-Text"},
  {"prompt": "Translate this poem from French to English, maintaining its rhythm if possible.", "classification": "Text-To-Text"},
  {"prompt": "Identify and list all the animals present in this wildlife photograph.", "classification": "Image-To-Text"},
  {"prompt": "Describe the architectural style of the building shown in this image.", "classification": "Image-To-Text"},
  {"prompt": "What weather conditions are depicted in this landscape picture?", "classification": "Image-To-Text"},
  {"prompt": "Transform this written story into an audiobook narration.", "classification": "Text-To-Audio"},
  {"prompt": "Create a spoken version of this news article with a professional tone.", "classification": "Text-To-Audio"},
  {"prompt": "Convert this text to speech, emphasizing key words and phrases.", "classification": "Text-To-Audio"},
  {"prompt": "Provide a written transcript of this TED talk.", "classification": "Audio-To-Text"},
  {"prompt": "Convert this recorded lecture into text format.", "classification": "Audio-To-Text"},
  {"prompt": "Transcribe the lyrics from this song recording.", "classification": "Audio-To-Text"},
  {"prompt": "Create an image of a mystical forest with glowing fireflies and a full moon.", "classification": "Text-To-Image"},
  {"prompt": "Generate a photorealistic portrait of a wise old man with a long white beard.", "classification": "Text-To-Image"},
  {"prompt": "Produce a digital painting of a bustling marketplace in a medieval town.", "classification": "Text-To-Image"},
  {"prompt": "Simplify this complex technical explanation for a general audience.", "classification": "Text-To-Text"},
  {"prompt": "Create a bullet-point list of the main arguments in this essay.", "classification": "Text-To-Text"},
  {"prompt": "Translate this legal document from English to Mandarin Chinese.", "classification": "Text-To-Text"},
  {"prompt": "What type of clothing are the people wearing in this historical photograph?", "classification": "Image-To-Text"},
  {"prompt": "Describe the composition and use of color in this abstract painting.", "classification": "Image-To-Text"},
  {"prompt": "Identify the make and model of the car shown in this image.", "classification": "Image-To-Text"},
  {"prompt": "Convert this children's story into a cheerful audio narration with sound effects.", "classification": "Text-To-Audio"},
  {"prompt": "Create a dramatic reading of this Shakespearean soliloquy.", "classification": "Text-To-Audio"},
  {"prompt": "Generate a text-to-speech version of this recipe with clear pronunciation.", "classification": "Text-To-Audio"},
  {"prompt": "Transcribe the dialogue from this movie scene.", "classification": "Audio-To-Text"},
  {"prompt": "Convert this recorded interview into a written Q&A format.", "classification": "Audio-To-Text"},
  {"prompt": "Provide a text transcription of this voice mail message.", "classification": "Audio-To-Text"},
  {"prompt": "Create an image of a steampunk-inspired flying machine.", "classification": "Text-To-Image"},
  {"prompt": "Generate a digital illustration of a peaceful Zen garden.", "classification": "Text-To-Image"},
  {"prompt": "Produce a realistic image of a cozy cabin in a snowy forest.", "classification": "Text-To-Image"},
  {"prompt": "Expand this brief outline into a full paragraph.", "classification": "Text-To-Text"},
  {"prompt": "Condense this long article into a 200-word summary.", "classification": "Text-To-Text"},
  {"prompt": "Translate this technical manual from German to English.", "classification": "Text-To-Text"},
  {"prompt": "What text can be seen on the signs in this street photograph?", "classification": "Image-To-Text"},
  {"prompt": "Describe the facial features and expression of the person in this portrait.", "classification": "Image-To-Text"},
  {"prompt": "What breed of dog is shown in this image?", "classification": "Image-To-Text"},
  {"prompt": "Convert this motivational quote into an enthusiastic audio clip.", "classification": "Text-To-Audio"},
  {"prompt": "Create a calming bedtime story narration from this text.", "classification": "Text-To-Audio"},
  {"prompt": "Generate a text-to-speech version of this medical instructions with clear enunciation.", "classification": "Text-To-Audio"},
  {"prompt": "Transcribe the lyrics and identify the instruments in this music recording.", "classification": "Audio-To-Text"},
  {"prompt": "Convert this standup comedy routine into a written script with timing notes.", "classification": "Audio-To-Text"},
  {"prompt": "Provide a text transcript of this nature documentary narration.", "classification": "Audio-To-Text"},
  {"prompt": "Create an image of an underwater scene with colorful coral and tropical fish.", "classification": "Text-To-Image"},
  {"prompt": "Generate a digital painting of a dragon perched on a mountain peak.", "classification": "Text-To-Image"},
  {"prompt": "Produce a photorealistic image of a bustling New York City street at night.", "classification": "Text-To-Image"},
  {"prompt": "Rephrase this academic text for a middle school audience.", "classification": "Text-To-Text"},
  {"prompt": "Create a rhyming version of this short story.", "classification": "Text-To-Text"},
  {"prompt": "Translate this marketing slogan from English to Japanese, preserving the tone.", "classification": "Text-To-Text"},
  {"prompt": "Describe the body language of the people in this group photo.", "classification": "Image-To-Text"},
  {"prompt": "What type of geological formations can be seen in this landscape image?", "classification": "Image-To-Text"},
  {"prompt": "Identify the species of flowers shown in this botanical photograph.", "classification": "Image-To-Text"},
  {"prompt": "Convert this written speech into an audio format with appropriate pauses and emphasis.", "classification": "Text-To-Audio"},
  {"prompt": "Create a spooky narration of this ghost story with eerie background effects.", "classification": "Text-To-Audio"},
  {"prompt": "Generate a text-to-speech version of this weather forecast with a professional tone.", "classification": "Text-To-Audio"},
  {"prompt": "Transcribe the conversation and background noises in this ambient recording.", "classification": "Audio-To-Text"},
  {"prompt": "Convert this radio advertisement into a written script format.", "classification": "Audio-To-Text"},
  {"prompt": "Provide a text transcript of this foreign language lesson, including pronunciation notes.", "classification": "Audio-To-Text"},
  {"prompt": "Create an image of a retro-futuristic space station orbiting a ringed planet.", "classification": "Text-To-Image"},
  {"prompt": "Generate a digital illustration of a whimsical treehouse in an enchanted forest.", "classification": "Text-To-Image"},
  {"prompt": "Produce a photorealistic image of a tranquil Japanese tea garden in autumn.", "classification": "Text-To-Image"}
]

# save the datast into dataset.json
with open('dataset.json', 'w') as f:
    json.dump(json_dataset, f)

In [None]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split

# Define the labels
labels = ["Text-To-Text", "Image-To-Text", "Text-To-Audio", "Audio-To-Text", "Text-To-Image"]

# Load data from JSON file
def load_data(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    texts = [item['prompt'] for item in data]
    label_indices = [labels.index(item['classification']) for item in data]  # Convert classification to numeric label
    return texts, label_indices

# Load the data
file_path = 'dataset.json'
texts, label_indices = load_data(file_path)

# Split data into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, label_indices, test_size=0.2, random_state=42)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load the model and tokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels))
model.to(device)

# Custom dataset class (unchanged)
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Function to train the model
def train_model(train_texts, train_labels, val_texts, val_labels):
    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)

    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=10,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
    )

    trainer.train()

# Train the model
train_model(train_texts, train_labels, val_texts, val_labels)

# Function to classify prompts
def classify_prompt(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    probs = outputs.logits.softmax(dim=-1)
    predicted_label = labels[probs.argmax().item()]
    return predicted_label

# Example usage
prompts_to_classify = [
    "Translate this sentence to French.",
    "What objects can you see in this image?",
    "Convert this text to speech with a British accent.",
    "Transcribe the conversation in this audio file.",
    "Generate an image of a futuristic cityscape."
]

for prompt in prompts_to_classify:
    print(f"Prompt: {prompt}")
    print(f"Classification: {classify_prompt(prompt)}\n")

Using device: cuda


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,No log,1.668461
2,No log,1.668148
3,No log,1.667409
4,No log,1.666166
5,No log,1.664101
6,No log,1.661463
7,No log,1.658535
8,No log,1.654888
9,No log,1.649631
10,No log,1.639919


Prompt: Translate this sentence to French.
Classification: Text-To-Text

Prompt: What objects can you see in this image?
Classification: Text-To-Image

Prompt: Convert this text to speech with a British accent.
Classification: Text-To-Text

Prompt: Transcribe the conversation in this audio file.
Classification: Text-To-Text

Prompt: Generate an image of a futuristic cityscape.
Classification: Text-To-Image



### with metrics of evaluations

In [None]:
import json
import torch
import logging
import csv
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import load_metric

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Define the labels
labels = ["Text-To-Text", "Image-To-Text", "Text-To-Audio", "Audio-To-Text", "Text-To-Image"]

# Load data from JSON file
def load_data(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    texts = [item['prompt'] for item in data]
    label_indices = [labels.index(item['classification']) for item in data]
    return texts, label_indices

# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Function to compute metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted', zero_division=1)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Function to save metrics to CSV
def save_metrics_to_csv(metrics, filename='metrics.csv'):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Accuracy', 'F1', 'Precision', 'Recall'])
        for metric in metrics:
            if 'eval_accuracy' in metric:
                writer.writerow([
                    metric['epoch'],
                    metric['eval_accuracy'],
                    metric['eval_f1'],
                    metric['eval_precision'],
                    metric['eval_recall']
                ])
    logger.info(f"Metrics saved to {filename}")

# Function to train the model
def train_model(train_texts, train_labels, val_texts, val_labels):
    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)


    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=best_params['num_epochs'],
        per_device_train_batch_size=best_params['batch_size'],
        learning_rate=best_params['learning_rate'],
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="no",  # Don't save every checkpoint
        load_best_model_at_end=False,  # We're handling this ourselves
        metric_for_best_model='f1',
        greater_is_better=True,
        push_to_hub=False,
    )



    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[BestModelCallback(save_threshold=0.0)]  # Set threshold to 0 to always
    )

    trainer.train()

    # Evaluate the model after training
    eval_results = trainer.evaluate()
    logger.info(f"Final evaluation results: {eval_results}")

    # Save the evaluation metrics
    save_metrics_to_csv(trainer.state.log_history)

# Main execution
if __name__ == "__main__":
    # Load the data
    file_path = 'dataset.json'
    texts, label_indices = load_data(file_path)

    # Split data into training and validation sets
    train_texts, val_texts, train_labels, val_labels = train_test_split(texts, label_indices, test_size=0.2, random_state=42)

    # Check if CUDA is available
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logger.info(f"Using device: {device}")

    # Load the model and tokenizer
    model_name = "distilbert-base-uncased"  # Using the base model without task-specific head
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(labels))
    model.to(device)

    # Train the model
    train_model(train_texts, train_labels, val_texts, val_labels)

    # Function to classify prompts
    def classify_prompt(prompt):
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
        probs = outputs.logits.softmax(dim=-1)
        predicted_label = labels[probs.argmax().item()]
        return predicted_label

    # Example usage
    prompts_to_classify = [
        "Translate this sentence to French.",
        "What objects can you see in this image?",
        "Convert this text to speech with a British accent.",
        "Transcribe the conversation in this audio file.",
        "Generate an image of a futuristic cityscape."
    ]

    for prompt in prompts_to_classify:
        logger.info(f"Prompt: {prompt}")
        logger.info(f"Classification: {classify_prompt(prompt)}\n")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.60894,0.2,0.085714,0.721212,0.2
2,No log,1.608889,0.2,0.085714,0.721212,0.2
3,1.602600,1.608193,0.2,0.085714,0.721212,0.2
4,1.602600,1.607185,0.2,0.085714,0.721212,0.2
5,1.598700,1.605837,0.2,0.085714,0.721212,0.2
6,1.598700,1.604504,0.266667,0.130403,0.753333,0.266667
7,1.598700,1.60366,0.333333,0.227186,0.668333,0.333333
8,1.571800,1.603388,0.466667,0.342857,0.743333,0.466667
9,1.571800,1.60231,0.4,0.297619,0.708889,0.4
10,1.537700,1.597129,0.4,0.302381,0.713333,0.4


### enhancement

In [None]:
import json
import random

prompts = [
    # Text-To-Text
    {"prompt": "Summarize the main points of this article in 3 sentences.", "classification": "Text-To-Text"},
    {"prompt": "Translate the following paragraph from English to French.", "classification": "Text-To-Text"},
    {"prompt": "Rewrite this sentence to make it more formal.", "classification": "Text-To-Text"},
    {"prompt": "Generate a short story based on these keywords: moon, astronaut, mystery.", "classification": "Text-To-Text"},
    {"prompt": "Convert this technical jargon into layman's terms.", "classification": "Text-To-Text"},
    {"prompt": "Paraphrase this quote without changing its meaning.", "classification": "Text-To-Text"},
    {"prompt": "Create a poem using the following words: sunset, ocean, whisper.", "classification": "Text-To-Text"},
    {"prompt": "Explain the concept of photosynthesis as if you're talking to a 5-year-old.", "classification": "Text-To-Text"},
    {"prompt": "Write a professional email declining a job offer.", "classification": "Text-To-Text"},
    {"prompt": "Translate this idiomatic expression into three different languages.", "classification": "Text-To-Text"},
    {"prompt": "Summarize the plot of 'Romeo and Juliet' in one paragraph.", "classification": "Text-To-Text"},
    {"prompt": "Rewrite this negative review in a more positive tone.", "classification": "Text-To-Text"},
    {"prompt": "Create a list of 10 interview questions for a software developer position.", "classification": "Text-To-Text"},
    {"prompt": "Write a step-by-step guide on how to change a car tire.", "classification": "Text-To-Text"},
    {"prompt": "Translate this legal document from legalese to plain English.", "classification": "Text-To-Text"},
    {"prompt": "Generate a catchy slogan for a new eco-friendly product.", "classification": "Text-To-Text"},
    {"prompt": "Rewrite this paragraph to improve its clarity and coherence.", "classification": "Text-To-Text"},
    {"prompt": "Create a short script for a 30-second commercial about a new smartphone.", "classification": "Text-To-Text"},
    {"prompt": "Summarize the key points of the theory of relativity in simple terms.", "classification": "Text-To-Text"},
    {"prompt": "Write a persuasive argument for implementing a four-day work week.", "classification": "Text-To-Text"},
    {"prompt": "Translate this recipe from metric to imperial measurements.", "classification": "Text-To-Text"},
    {"prompt": "Create a fictional news article about the discovery of a new planet.", "classification": "Text-To-Text"},
    {"prompt": "Rewrite this academic paper abstract for a general audience.", "classification": "Text-To-Text"},
    {"prompt": "Generate a list of potential names for a new line of organic snacks.", "classification": "Text-To-Text"},
    {"prompt": "Write a condolence letter to a friend who lost a family member.", "classification": "Text-To-Text"},
    {"prompt": "Translate this poem from English to Japanese, preserving its rhythm if possible.", "classification": "Text-To-Text"},
    {"prompt": "Create a user manual for a complex smart home system.", "classification": "Text-To-Text"},
    {"prompt": "Summarize the main arguments of this political debate in bullet points.", "classification": "Text-To-Text"},
    {"prompt": "Write a job description for a position as a data scientist.", "classification": "Text-To-Text"},
    {"prompt": "Rewrite this children's story to make it suitable for adult readers.", "classification": "Text-To-Text"},
    {"prompt": "Generate a list of FAQs for a new online banking service.", "classification": "Text-To-Text"},
    {"prompt": "Translate this business proposal from English to Mandarin Chinese.", "classification": "Text-To-Text"},
    {"prompt": "Write a brief biography of Albert Einstein focusing on his major contributions.", "classification": "Text-To-Text"},
    {"prompt": "Create a detailed outline for a research paper on climate change.", "classification": "Text-To-Text"},
    {"prompt": "Rewrite this technical manual in a more user-friendly style.", "classification": "Text-To-Text"},
    {"prompt": "Generate a series of social media posts promoting a new fitness app.", "classification": "Text-To-Text"},
    {"prompt": "Summarize the plot of 'War and Peace' in 500 words or less.", "classification": "Text-To-Text"},
    {"prompt": "Write a set of instructions for assembling a piece of furniture.", "classification": "Text-To-Text"},
    {"prompt": "Create a glossary of terms related to artificial intelligence.", "classification": "Text-To-Text"},
    {"prompt": "Translate this medical diagnosis from medical terminology to layman's terms.", "classification": "Text-To-Text"},

    # Image-To-Text
    {"prompt": "Describe the key elements visible in this photograph.", "classification": "Image-To-Text"},
    {"prompt": "What emotions are conveyed by the facial expressions in this image?", "classification": "Image-To-Text"},
    {"prompt": "List the objects present in this still life painting.", "classification": "Image-To-Text"},
    {"prompt": "Describe the composition and use of color in this abstract artwork.", "classification": "Image-To-Text"},
    {"prompt": "What is the main subject of this landscape photograph?", "classification": "Image-To-Text"},
    {"prompt": "Analyze the body language of the people in this group photo.", "classification": "Image-To-Text"},
    {"prompt": "Describe the architectural style of the building in this image.", "classification": "Image-To-Text"},
    {"prompt": "What period of history does this painting appear to depict?", "classification": "Image-To-Text"},
    {"prompt": "Identify the species of animals visible in this wildlife photograph.", "classification": "Image-To-Text"},
    {"prompt": "Describe the weather conditions apparent in this outdoor scene.", "classification": "Image-To-Text"},
    {"prompt": "What details can you observe about the clothing worn by people in this image?", "classification": "Image-To-Text"},
    {"prompt": "Describe the overall mood or atmosphere conveyed by this photograph.", "classification": "Image-To-Text"},
    {"prompt": "What can you infer about the setting of this image based on visual cues?", "classification": "Image-To-Text"},
    {"prompt": "Identify any symbols or metaphors present in this piece of art.", "classification": "Image-To-Text"},
    {"prompt": "Describe the texture and materials of the objects in this close-up photo.", "classification": "Image-To-Text"},
    {"prompt": "What action or event appears to be taking place in this image?", "classification": "Image-To-Text"},
    {"prompt": "Analyze the use of light and shadow in this black and white photograph.", "classification": "Image-To-Text"},
    {"prompt": "Describe the layout and design elements of this infographic.", "classification": "Image-To-Text"},
    {"prompt": "What can you deduce about the time period of this historical photograph?", "classification": "Image-To-Text"},
    {"prompt": "Identify the type of ecosystem shown in this nature photograph.", "classification": "Image-To-Text"},
    {"prompt": "Describe the visual style and artistic techniques used in this illustration.", "classification": "Image-To-Text"},
    {"prompt": "What story or narrative is suggested by the elements in this image?", "classification": "Image-To-Text"},
    {"prompt": "Analyze the composition and framing of this portrait photograph.", "classification": "Image-To-Text"},
    {"prompt": "Describe the visual characteristics of the food items in this culinary photo.", "classification": "Image-To-Text"},
    {"prompt": "What cultural elements or traditions are represented in this image?", "classification": "Image-To-Text"},
    {"prompt": "What cultural elements or traditions are represented in this image?", "classification": "Image-To-Text"},
    {"prompt": "Describe the typography and visual hierarchy in this poster design.", "classification": "Image-To-Text"},
    {"prompt": "Analyze the perspective and depth in this architectural rendering.", "classification": "Image-To-Text"},
    {"prompt": "What emotions or mood does this abstract painting evoke?", "classification": "Image-To-Text"},
    {"prompt": "Describe the visual elements that contribute to the brand identity in this logo.", "classification": "Image-To-Text"},
    {"prompt": "What can you infer about the character from this portrait painting?", "classification": "Image-To-Text"},
    {"prompt": "Identify the key features of the landscape in this satellite image.", "classification": "Image-To-Text"},
    {"prompt": "Describe the visual storytelling elements in this comic book panel.", "classification": "Image-To-Text"},
    {"prompt": "What technical details can you observe in this engineering diagram?", "classification": "Image-To-Text"},
    {"prompt": "Analyze the use of negative space in this minimalist design.", "classification": "Image-To-Text"},
    {"prompt": "Describe the visual representation of data in this scientific graph.", "classification": "Image-To-Text"},
    {"prompt": "What details can you notice about the fashion and style in this vintage photograph?", "classification": "Image-To-Text"},
    {"prompt": "Identify the geometric shapes and patterns in this abstract sculpture.", "classification": "Image-To-Text"},
    {"prompt": "Describe the visual elements that create movement in this action photograph.", "classification": "Image-To-Text"},

    # Text-To-Audio
    {"prompt": "Convert this text into an audio file with a calm, soothing voice.", "classification": "Text-To-Audio"},
    {"prompt": "Generate speech from this written dialogue, using different voices for each character.", "classification": "Text-To-Audio"},
    {"prompt": "Create a text-to-speech version of this poem with appropriate pauses and intonation.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this news article into an audio format suitable for a radio broadcast.", "classification": "Text-To-Audio"},
    {"prompt": "Generate an audiobook narration for this short story excerpt.", "classification": "Text-To-Audio"},
    {"prompt": "Create a voiceover for this product description with an enthusiastic tone.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this academic lecture transcript into an audio format.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this weather forecast.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio version of this recipe with clear, step-by-step instructions.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this motivational quote into an inspiring audio clip.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this children's bedtime story.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio narration for this museum exhibit description.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this meditation script into a calming audio guide.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this technical manual with clear pronunciation.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio version of this travel itinerary with an excited tone.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this foreign language phrase book into audio for language learners.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this legal disclaimer at a faster pace.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio narration for this nature documentary script.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this stand-up comedy routine into an audio performance.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this emergency evacuation procedure.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio version of this guided city tour script.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this fictional character's monologue into an emotive audio performance.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this scientific paper abstract.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio narration for this historical timeline.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this set of assembly instructions into a clear audio guide.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this company mission statement.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio version of this workout routine with energetic narration.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this restaurant menu into an audio format for visually impaired customers.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this product user manual.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio narration for this art gallery guide.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this daily affirmation into a motivational audio clip.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this financial report summary.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio version of this guided meditation script with a tranquil voice.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this theatre play script into an audio drama performance.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this software tutorial.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio narration for this wildlife identification guide.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this poetry collection into an expressive audio recitation.", "classification": "Text-To-Audio"},
    {"prompt": "Generate a text-to-speech version of this product recall notice.", "classification": "Text-To-Audio"},
    {"prompt": "Create an audio version of this historical speech with appropriate gravitas.", "classification": "Text-To-Audio"},
    {"prompt": "Convert this educational lesson plan into an engaging audio lecture.", "classification": "Text-To-Audio"},

    # Audio-To-Text
    {"prompt": "Transcribe the conversation in this audio recording.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this podcast episode into a written transcript.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the speech given in this audio clip.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the lyrics of this song into text format.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this audio book chapter into a written document.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text transcript of this recorded interview.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the dialogue from this movie scene.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this voice memo into written notes.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the instructions given in this audio guide.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the questions and answers from this recorded Q&A session.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this recorded lecture into a written transcript.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the news report in this audio clip.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the testimonials given in this audio compilation.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this audio description of a product into written text.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text transcript of this recorded business meeting.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the medical diagnosis given in this audio recording.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this audio tour guide narration into written text.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the recipe instructions in this cooking podcast.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the customer feedback from this recorded phone call.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this oral history interview into a written document.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text transcript of this recorded court proceeding.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the meditation instructions from this guided audio session.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this audio description of an artwork into written text.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the weather forecast from this radio broadcast.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the safety instructions given in this airline audio announcement.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this recorded brainstorming session into written notes.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text transcript of this foreign language learning audio lesson.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the financial advice given in this personal finance podcast.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this audio commentary of a sports event into written text.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the guided tour in this museum audio guide.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the technical instructions from this DIY video voiceover.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this recorded stand-up comedy routine into a written script.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text transcript of this nature documentary narration.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the workout instructions from this fitness audio guide.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this audio description of a software interface into written documentation.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text version of the motivational speech in this audio recording.", "classification": "Audio-To-Text"},
    {"prompt": "Transcribe the legal terms and conditions from this audio disclaimer.", "classification": "Audio-To-Text"},
    {"prompt": "Convert this recorded focus group discussion into a written summary.", "classification": "Audio-To-Text"},
    {"prompt": "Provide a text transcript of this audiobook chapter.", "classification": "Audio-To-Text"},

    # Text-To-Image
    {"prompt": "Generate an image of a serene mountain landscape at sunset.", "classification": "Text-To-Image"},
    {"prompt": "Create a digital painting of a futuristic cityscape with flying cars.", "classification": "Text-To-Image"},
    {"prompt": "Produce a photorealistic image of a blooming cherry blossom tree.", "classification": "Text-To-Image"},
    {"prompt": "Generate an abstract representation of the concept of time.", "classification": "Text-To-Image"},
    {"prompt": "Create a stylized portrait of a wise old man with a long beard.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of a cozy cabin in a snowy forest.", "classification": "Text-To-Image"},
    {"prompt": "Generate a surreal underwater scene with bioluminescent creatures.", "classification": "Text-To-Image"},
    {"prompt": "Create a detailed illustration of a steampunk-inspired flying machine.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of a bustling medieval marketplace.", "classification": "Text-To-Image"},
    {"prompt": "Generate a minimalist logo for a tech startup named 'Quantum Leap'.", "classification": "Text-To-Image"},
    {"prompt": "Create a whimsical illustration of animals having a tea party.", "classification": "Text-To-Image"},
    {"prompt": "Produce a photorealistic image of a mouth-watering gourmet burger.", "classification": "Text-To-Image"},
    {"prompt": "Generate an image of a tranquil Japanese zen garden.", "classification": "Text-To-Image"},
    {"prompt": "Create a comic-style illustration of a superhero in action.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of a vintage car on a scenic coastal road.", "classification": "Text-To-Image"},
    {"prompt": "Generate a realistic portrait of a person with heterochromia (different colored eyes).", "classification": "Text-To-Image"},
    {"prompt": "Create an image of a fantastical treehouse in a giant redwood forest.", "classification": "Text-To-Image"},
    {"prompt": "Produce a photorealistic image of a dewdrop on a leaf.", "classification": "Text-To-Image"},
    {"prompt": "Generate an impressionist-style painting of a ballet dancer in motion.", "classification": "Text-To-Image"},
    {"prompt": "Create an image of a post-apocalyptic urban landscape.", "classification": "Text-To-Image"},
    {"prompt": "Produce a detailed technical drawing of a complex machine.", "classification": "Text-To-Image"},
    {"prompt": "Generate an image of a magical library with floating books and staircases.", "classification": "Text-To-Image"},
    {"prompt": "Create a photorealistic image of a exotic tropical fruit cut in half.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of a retro-futuristic space station on a distant planet.", "classification": "Text-To-Image"},
    {"prompt": "Generate a realistic image of a thunderstorm over a wheat field.", "classification": "Text-To-Image"},
    {"prompt": "Create an Art Nouveau-style poster for a fictional 1920s movie.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of a intricate mandala design with vibrant colors.", "classification": "Text-To-Image"},
    {"prompt": "Generate a photorealistic image of a glass of red wine on a wooden table.", "classification": "Text-To-Image"},
    {"prompt": "Create an image of a fantastical underwater city inhabited by merpeople.", "classification": "Text-To-Image"},
    {"prompt": "Produce a detailed architectural rendering of a modern eco-friendly house.", "classification": "Text-To-Image"},
    {"prompt": "Generate an image of a majestic lion standing on a rocky outcrop.", "classification": "Text-To-Image"},
    {"prompt": "Create a pixel art representation of a classic video game character.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of a serene yoga retreat in a tropical setting.", "classification": "Text-To-Image"},
    {"prompt": "Generate a photorealistic image of a rustic farmhouse kitchen.", "classification": "Text-To-Image"},
    {"prompt": "Create an image of a futuristic sports car with gull-wing doors.", "classification": "Text-To-Image"},
    {"prompt": "Produce a detailed cross-section diagram of a beehive.", "classification": "Text-To-Image"},
    {"prompt": "Generate an image of a cozy reading nook with a view of a stormy sea.", "classification": "Text-To-Image"},
    {"prompt": "Create a photorealistic image of a person skydiving through a sunset sky.", "classification": "Text-To-Image"},
    {"prompt": "Produce an image of an ancient Egyptian temple complex at night.", "classification": "Text-To-Image"},
    {"prompt": "Generate a whimsical illustration of a child's imagination coming to life.", "classification": "Text-To-Image"}
]

# Shuffle the prompts
random.shuffle(prompts)

# Convert to JSON
json_data = json.dumps(prompts, indent=2)

# Write to a file
with open('prompt_dataset.json', 'w') as f:
    f.write(json_data)

print("Dataset created and saved as 'prompt_dataset.json'")

Dataset created and saved as 'prompt_dataset.json'


In [None]:
!pip -q install torch
!pip -q install transformers
!pip -q install scikit-learn
!pip -q install datasets
!pip -q install numpy
!pip -q install imbalanced-learn
!pip -q install nlpaug
!pip -q install optuna

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m410.5/410.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m380.1/380.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m233.4/233.4 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m78.6/78.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q -U accelerate
!pip install -q transformers[torch]

In [None]:
import logging
import json
import torch
import optuna
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, TrainerCallback
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import load_metric
from collections import Counter
import numpy as np
from imblearn.over_sampling import SMOTE
import nlpaug.augmenter.word as naw
import shutil


# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class EarlyStoppingCallback(TrainerCallback):
    def __init__(self, early_stopping_threshold=0.9):
        self.early_stopping_threshold = early_stopping_threshold

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        if metrics.get("eval_f1", 0) > self.early_stopping_threshold:
            control.should_training_stop = True
            logger.info(f"F1 score {metrics['eval_f1']:.4f} exceeded threshold {self.early_stopping_threshold}. Stopping training.")
        return control


class BestModelCallback(TrainerCallback):
    def __init__(self, save_threshold=0.0):
        self.best_metric = 0
        self.save_threshold = save_threshold

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        metric_value = metrics.get("eval_f1", 0)
        if metric_value > self.best_metric and metric_value >= self.save_threshold:
            self.best_metric = metric_value
            # Save metrics and hyperparameters
            output_dir = f"./best_model_info"
            os.makedirs(output_dir, exist_ok=True)

            # Save metrics
            with open(f"{output_dir}/best_metrics.json", "w") as f:
                json.dump(metrics, f)

            # Save hyperparameters
            with open(f"{output_dir}/hyperparameters.json", "w") as f:
                json.dump(args.to_dict(), f)

            logger.info(f"New best model metrics saved with F1 score: {metric_value:.3f}")


# Additional helper functions
def check_disk_space(path, required_space_gb):
    total, used, free = shutil.disk_usage(path)
    free_space_gb = free // (2**30)  # Convert bytes to GB
    if free_space_gb < required_space_gb:
        raise RuntimeError(f"Not enough disk space. {free_space_gb}GB available, {required_space_gb}GB required.")


def load_data(file_path):
    # Define the complete list of expected labels
    labels = ['Text-To-Text', 'Image-To-Text', 'Text-To-Image', 'Text-To-Audio', 'Audio-To-Text']

    with open(file_path, 'r') as f:
        data = json.load(f)

    texts = []
    label_indices = []
    unknown_labels = set()

    for item in data:
        texts.append(item['prompt'])
        try:
            label_indices.append(labels.index(item['classification']))
        except ValueError:
            unknown_labels.add(item['classification'])
            # Assign a default label index (e.g., -1) for unknown labels
            label_indices.append(-1)

    if unknown_labels:
        logger.warning(f"Unknown labels found in the dataset: {unknown_labels}")
        logger.warning("These will be assigned a label index of -1. You may want to update your label list.")

    return texts, label_indices


class CustomDataset(Dataset):
    # def __init__(self, texts, labels, tokenizer, max_length=512):
    def __init__(self, texts, labels, tokenizer, max_length=128):  # Reduced from 512
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }


def check_gpu_memory():
    if torch.cuda.is_available():
        gpu_memory = torch.cuda.get_device_properties(0).total_memory
        logger.info(f"Total GPU memory: {gpu_memory / 1e9:.2f} GB")
    else:
        logger.warning("CUDA is not available. Training will be done on CPU.")


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted', zero_division=1)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


# Define the PerformanceCallback class
class PerformanceCallback(TrainerCallback):
    def __init__(self, patience=3):
        self.best_f1 = 0
        self.stagnant_epochs = 0
        self.patience = patience

    def on_evaluate(self, args, state, control, metrics, **kwargs):
        current_f1 = metrics.get('eval_f1', 0)
        if current_f1 > self.best_f1:
            self.best_f1 = current_f1
            self.stagnant_epochs = 0
            logger.info(f"New best F1 score: {self.best_f1}")
        else:
            self.stagnant_epochs += 1
            if self.stagnant_epochs >= self.patience:
                logger.warning(f"Model performance stagnant for {self.stagnant_epochs} epochs. Consider early stopping or adjusting hyperparameters.")



# Function to save training logs
def save_training_logs(trainer, filename='training_logs.txt'):
    with open(filename, 'w') as f:
        for log in trainer.state.log_history:
            f.write(json.dumps(log) + '\n')
    logger.info(f"Training logs saved to {filename}")

# metrics function
def hyperparameter_tuning(train_texts, train_labels, val_texts, val_labels, model_name, num_trials=10, timeout=3600):
    def objective(trial):
        lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
        # batch_size = trial.suggest_categorical('batch_size', [8, 16, 32, 64])
        batch_size = trial.suggest_categorical('batch_size', [4, 8, 16])
        num_epochs = trial.suggest_int('num_epochs', 3, 20)

        # Address class imbalance
        class_counts = Counter(train_labels)
        class_weights = {cls: 1.0 / count for cls, count in class_counts.items()}
        sample_weights = [class_weights[label] for label in train_labels]

        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(set(train_labels)))
        tokenizer = AutoTokenizer.from_pretrained(model_name)

        train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
        val_dataset = CustomDataset(val_texts, val_labels, tokenizer)

        training_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=64,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=10,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model='f1',
            greater_is_better=True,
            push_to_hub=False,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics,
        )

        trainer.train()

        eval_results = trainer.evaluate()
        return eval_results['eval_f1']

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=num_trials, timeout=timeout)

    best_params = study.best_params
    best_f1 = study.best_value

    logger.info(f"Best hyperparameters: {best_params}")
    logger.info(f"Best F1 score: {best_f1}")

    return best_params

def assess_dataset_quality(texts, labels, min_samples_per_class=100):
    class_counts = Counter(labels)
    total_samples = len(texts)
    num_classes = len(set(labels))

    logger.info(f"Total samples: {total_samples}")
    logger.info(f"Number of classes: {num_classes}")
    logger.info(f"Class distribution: {class_counts}")

    if total_samples < num_classes * min_samples_per_class:
        logger.warning("Dataset might be too small.")
        return False

    if min(class_counts.values()) < min_samples_per_class:
        logger.warning("Some classes have too few samples.")
        return False

    if max(class_counts.values()) / min(class_counts.values()) > 10:
        logger.warning("Dataset is highly imbalanced.")
        return False

    logger.info("Dataset appears to be of good quality.")
    return True

# def train_model_with_type(train_texts, train_labels, val_texts, val_labels, model_type='distilbert'):
#     model_name_map = {
#       'bert': 'prajjwal1/bert-tiny',  # A much smaller BERT model
#       'roberta': 'roberta-base',
#       'distilbert': 'distilbert-base-uncased'
#     }

#     model_name = model_name_map.get(model_type.lower())
#     if model_name is None:
#         raise ValueError(f"Unknown model type: {model_type}")

#     best_params = hyperparameter_tuning(train_texts, train_labels, val_texts, val_labels, model_name)

#     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(set(train_labels)))
#     tokenizer = AutoTokenizer.from_pretrained(model_name)

#     train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
#     val_dataset = CustomDataset(val_texts, val_labels, tokenizer)

#     training_args = TrainingArguments(
#         output_dir='./results',
#         num_train_epochs=best_params['num_epochs'],
#         per_device_train_batch_size=best_params['batch_size'],
#         learning_rate=best_params['learning_rate'],
#         per_device_eval_batch_size=64,
#         warmup_steps=500,
#         weight_decay=0.01,
#         logging_dir='./logs',
#         logging_steps=10,
#         evaluation_strategy="epoch",
#         save_strategy="no",  # Don't save checkpoints
#         load_best_model_at_end=False,  # We're not saving checkpoints, so we can't load them
#         metric_for_best_model='f1',
#         greater_is_better=True,
#         push_to_hub=False,
#     )

#     trainer = Trainer(
#         model=model,
#         args=training_args,
#         train_dataset=train_dataset,
#         eval_dataset=val_dataset,
#         compute_metrics=compute_metrics,
#         callbacks=[BestModelCallback(save_threshold=0.0)]  # Set threshold to 0 to always save the best model
#     )

#     performance_callback = PerformanceCallback()
#     trainer.add_callback(performance_callback)
#     trainer.train()

#     eval_results = trainer.evaluate()
#     logger.info(f"Final evaluation results: {eval_results}")

#     # Save the final model
#     final_output_dir = f'./final_model_{model_type}'
#     model.save_pretrained(final_output_dir)
#     tokenizer.save_pretrained(final_output_dir)
#     logger.info(f"Final model saved to {final_output_dir}")

#     return model, tokenizer, eval_results, trainer

def train_model_with_type(train_texts, train_labels, val_texts, val_labels, model_type='distilbert'):
    model_name_map = {
      'bert': 'prajjwal1/bert-tiny',
      'roberta': 'roberta-base',
      'distilbert': 'distilbert-base-uncased'
    }

    model_name = model_name_map.get(model_type.lower())
    if model_name is None:
        raise ValueError(f"Unknown model type: {model_type}")

    best_params = hyperparameter_tuning(train_texts, train_labels, val_texts, val_labels, model_name)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(set(train_labels)))
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    train_dataset = CustomDataset(train_texts, train_labels, tokenizer)
    val_dataset = CustomDataset(val_texts, val_labels, tokenizer)

    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=best_params['num_epochs'],
        per_device_train_batch_size=best_params['batch_size'],
        learning_rate=best_params['learning_rate'],
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model='f1',
        greater_is_better=True,
        push_to_hub=False,
    )

    early_stopping_callback = EarlyStoppingCallback(early_stopping_threshold=0.9)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
        callbacks=[early_stopping_callback, BestModelCallback(save_threshold=0.9)]
    )

    trainer.train()

    eval_results = trainer.evaluate()
    logger.info(f"Final evaluation results: {eval_results}")

    # Save the final model
    final_output_dir = f'./final_model_{model_type}'
    model.save_pretrained(final_output_dir)
    tokenizer.save_pretrained(final_output_dir)
    logger.info(f"Final model saved to {final_output_dir}")

    return model, tokenizer, eval_results, trainer

def train_single_model(train_texts, train_labels, val_texts, val_labels, model_type):
    logger.info(f"Training {model_type} model...")
    model, tokenizer, eval_results, trainer = train_model_with_type(train_texts, train_labels, val_texts, val_labels, model_type)
    logger.info(f"{model_type} model training completed. Evaluation results: {eval_results}")

    # Load the best model
    best_model_path = "./best_model"
    best_model = AutoModelForSequenceClassification.from_pretrained(best_model_path)

    return eval_results, best_model, tokenizer


def augment_data(texts, labels):
    augmenter = naw.SynonymAug(aug_src='wordnet')
    augmented_texts = []
    augmented_labels = []

    for text, label in zip(texts, labels):
        augmented_text = augmenter.augment(text)
        augmented_texts.append(augmented_text)
        augmented_labels.append(label)

    return texts + augmented_texts, labels + augmented_labels

def save_model(model, tokenizer, output_dir):
    try:
        model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)
        logger.info(f"Model and tokenizer saved to {output_dir}")
    except RuntimeError as e:
        logger.error(f"Failed to save model due to: {str(e)}")
        logger.info("Attempting to save model in a different format...")
        torch.save(model.state_dict(), f"{output_dir}/model_state_dict.pt")
        tokenizer.save_pretrained(output_dir)
        logger.info(f"Model state dict and tokenizer saved to {output_dir}")

def load_model(model_dir):
    model = AutoModelForSequenceClassification.from_pretrained(model_dir)
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    logger.info(f"Model and tokenizer loaded from {model_dir}")
    return model, tokenizer


def classify_prompt(prompt, model, tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

    # Map the predicted class index back to the label
    labels = ['Text-To-Text', 'Image-To-Text', 'Text-To-Image', 'Text-To-Audio', 'Audio-To-Text']
    return labels[predicted_class]

if __name__ == "__main__":
    # Load your data
    texts, labels = load_data('prompt_dataset.json')

    # Assess dataset quality
    if not assess_dataset_quality(texts, labels):
        logger.warning("Consider improving the dataset before proceeding.")

    # Augment data if needed
    texts, labels = augment_data(texts, labels)

    # Split the data
    train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

    # Check GPU memory
    check_gpu_memory()

 # Train models sequentially
    model_types = ['bert', 'roberta', 'distilbert']
    results = {}
    best_models = {}

    # for model_type in model_types:
    #     try:
    #         eval_results, best_model, tokenizer = train_single_model(train_texts, train_labels, val_texts, val_labels, model_type)
    #         results[model_type] = eval_results
    #         best_models[model_type] = (best_model, tokenizer)
    #         save_training_logs(trainer, f'training_logs_{model_type}.txt')
    #     except Exception as e:
    #         logger.error(f"Error training {model_type} model: {str(e)}")

    # # Compare results
    # for model_type, eval_results in results.items():
    #     logger.info(f"{model_type} model results: {eval_results}")

    for model_type in model_types:
        try:
            eval_results, best_model, tokenizer = train_single_model(train_texts, train_labels, val_texts, val_labels, model_type)
            results[model_type] = eval_results
            best_models[model_type] = (best_model, tokenizer)
            save_training_logs(trainer, f'training_logs_{model_type}.txt')

            # Stop if F1 score is above 90%
            if eval_results['eval_f1'] > 0.9:
                logger.info(f"F1 score {eval_results['eval_f1']:.4f} exceeded 90%. Stopping further training.")
                break
        except Exception as e:
            logger.error(f"Error training {model_type} model: {str(e)}")

    if results:
        # Find the best model
        best_model_type = max(results, key=lambda k: results[k]['eval_f1'])
        logger.info(f"Best model: {best_model_type}")

        # Use the best model for classification
        model, tokenizer = best_models[best_model_type]

        # Example usage of classification
        prompts_to_classify = [
            "Translate this sentence to French.",
            "What objects can you see in this image?",
            "Convert this text to speech with a British accent.",
            "Transcribe the conversation in this audio file.",
            "Generate an image of a futuristic cityscape."
        ]

        for prompt in prompts_to_classify:
            logger.info(f"Prompt: {prompt}")
            logger.info(f"Classification: {classify_prompt(prompt, model, tokenizer)}\n")
    else:
        logger.error("No models were successfully trained. Please check the errors and try again.")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[I 2024-06-25 19:49:40,830] A new study created in memory with name: no-name-d731f86f-c1d0-4620-872f-d7bdd7e4d35f
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6018,1.592934,0.2,0.116599,0.64,0.2
2,1.6202,1.590199,0.1875,0.109902,0.632551,0.1875
3,1.6097,1.586068,0.1875,0.113969,0.635909,0.1875
4,1.6156,1.580656,0.2,0.122426,0.64,0.2
5,1.5989,1.57324,0.2375,0.146056,0.659277,0.2375
6,1.5853,1.564565,0.2875,0.214807,0.59959,0.2875
7,1.578,1.555957,0.35,0.293419,0.660577,0.35
8,1.5581,1.541642,0.475,0.468874,0.613854,0.475
9,1.5406,1.526196,0.6375,0.646073,0.696429,0.6375
10,1.522,1.495741,0.7,0.700348,0.729251,0.7


[I 2024-06-25 19:49:57,484] Trial 0 finished with value: 0.802787947346771 and parameters: {'learning_rate': 6.7136553830889e-05, 'batch_size': 16, 'num_epochs': 13}. Best is trial 0 with value: 0.802787947346771.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6013,1.602374,0.225,0.082653,0.825625,0.225
2,1.5869,1.578058,0.275,0.16766,0.675792,0.275
3,1.541,1.522973,0.5375,0.43069,0.71369,0.5375
4,1.4986,1.443104,0.6375,0.5629,0.756818,0.6375
5,1.3836,1.294899,0.75,0.731412,0.810308,0.75
6,1.1888,1.053242,0.8625,0.857081,0.868523,0.8625
7,0.8813,0.837455,0.9125,0.910939,0.912795,0.9125
8,0.7796,0.705562,0.925,0.923724,0.925069,0.925
9,0.6454,0.607623,0.9375,0.936837,0.936959,0.9375
10,0.5941,0.561789,0.9625,0.962133,0.962423,0.9625


[I 2024-06-25 19:50:12,689] Trial 1 finished with value: 0.9621332046332046 and parameters: {'learning_rate': 0.000714273208791782, 'batch_size': 4, 'num_epochs': 11}. Best is trial 1 with value: 0.9621332046332046.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6337,1.616714,0.2,0.112373,0.465941,0.2
2,1.6291,1.61415,0.2,0.112373,0.465941,0.2
3,1.6232,1.610603,0.2125,0.121053,0.473056,0.2125
4,1.6223,1.60533,0.225,0.123994,0.474643,0.225
5,1.5954,1.598485,0.225,0.121238,0.467658,0.225
6,1.5964,1.589669,0.2625,0.162962,0.477584,0.2625
7,1.6005,1.577299,0.325,0.239173,0.424955,0.325
8,1.5845,1.560123,0.3625,0.298214,0.399738,0.3625
9,1.5506,1.543832,0.5125,0.459583,0.549409,0.5125
10,1.5261,1.521876,0.5,0.443583,0.594892,0.5


[I 2024-06-25 19:50:23,222] Trial 2 finished with value: 0.4595834943088947 and parameters: {'learning_rate': 0.0003505403521738316, 'batch_size': 16, 'num_epochs': 13}. Best is trial 1 with value: 0.9621332046332046.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6086,1.608544,0.225,0.082653,0.825625,0.225
2,1.6178,1.606016,0.225,0.082653,0.825625,0.225
3,1.5975,1.601817,0.225,0.082653,0.825625,0.225
4,1.6037,1.595652,0.225,0.082653,0.825625,0.225
5,1.5968,1.586482,0.225,0.083505,0.576266,0.225
6,1.5918,1.574999,0.2875,0.190549,0.70625,0.2875
7,1.5743,1.559733,0.4,0.298953,0.653201,0.4
8,1.561,1.540763,0.5125,0.411841,0.696905,0.5125
9,1.5442,1.51826,0.5125,0.411035,0.693281,0.5125
10,1.5241,1.49047,0.625,0.568782,0.745527,0.625


[I 2024-06-25 19:50:34,436] Trial 3 finished with value: 0.6894444444444444 and parameters: {'learning_rate': 0.0015279058490968742, 'batch_size': 16, 'num_epochs': 14}. Best is trial 1 with value: 0.9621332046332046.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.623,1.605746,0.225,0.082653,0.825625,0.225
2,1.6203,1.597782,0.225,0.084375,0.676923,0.225
3,1.5892,1.584607,0.2375,0.106658,0.335479,0.2375
4,1.5883,1.566615,0.275,0.158359,0.569518,0.275
5,1.5543,1.536365,0.3375,0.188901,0.38117,0.3375
6,1.5175,1.489582,0.4875,0.421591,0.562307,0.4875
7,1.4426,1.421089,0.6875,0.67687,0.73848,0.6875
8,1.3753,1.314743,0.75,0.747988,0.810213,0.75
9,1.2273,1.180968,0.85,0.847078,0.864022,0.85
10,1.1206,1.031243,0.9,0.898232,0.901557,0.9


[I 2024-06-25 19:50:52,687] Trial 4 finished with value: 0.9628787878787879 and parameters: {'learning_rate': 0.0002439728717360042, 'batch_size': 8, 'num_epochs': 18}. Best is trial 4 with value: 0.9628787878787879.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6329,1.624631,0.2875,0.170769,0.756132,0.2875
2,1.6308,1.60557,0.3,0.189014,0.746472,0.3
3,1.5755,1.578081,0.3125,0.198344,0.756723,0.3125
4,1.5343,1.517071,0.525,0.458166,0.718873,0.525
5,1.4545,1.361654,0.75,0.732798,0.841871,0.75


[I 2024-06-25 19:51:00,834] Trial 5 finished with value: 0.732798309178744 and parameters: {'learning_rate': 1.863160705040402e-05, 'batch_size': 4, 'num_epochs': 5}. Best is trial 4 with value: 0.9628787878787879.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6437,1.631027,0.1625,0.077902,0.304043,0.1625
2,1.5851,1.615993,0.225,0.122908,0.539973,0.225
3,1.5698,1.581749,0.3375,0.240747,0.553409,0.3375
4,1.5231,1.482139,0.525,0.476,0.603804,0.525
5,1.3377,1.293361,0.75,0.751382,0.824138,0.75


[I 2024-06-25 19:51:09,546] Trial 6 finished with value: 0.7513815346105709 and parameters: {'learning_rate': 0.00015010475862877533, 'batch_size': 4, 'num_epochs': 5}. Best is trial 4 with value: 0.9628787878787879.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6345,1.63643,0.1625,0.076662,0.303117,0.1625
2,1.6122,1.634247,0.1625,0.076662,0.303117,0.1625
3,1.6146,1.63119,0.15,0.070107,0.522905,0.15
4,1.6099,1.626837,0.2,0.129719,0.643032,0.2
5,1.6018,1.621324,0.2125,0.135584,0.535085,0.2125
6,1.6002,1.613555,0.225,0.156025,0.535769,0.225
7,1.5812,1.606111,0.2875,0.170706,0.669687,0.2875
8,1.5699,1.592027,0.3125,0.214445,0.672665,0.3125
9,1.5565,1.571509,0.3625,0.265416,0.63125,0.3625
10,1.5368,1.539315,0.4125,0.327491,0.624519,0.4125


[I 2024-06-25 19:51:16,917] Trial 7 finished with value: 0.3274906474397136 and parameters: {'learning_rate': 1.445185163318551e-05, 'batch_size': 16, 'num_epochs': 10}. Best is trial 4 with value: 0.9628787878787879.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.5884,1.59093,0.25,0.127728,0.68961,0.25
2,1.5832,1.56732,0.3625,0.257559,0.661235,0.3625
3,1.5027,1.506263,0.5875,0.527221,0.680327,0.5875
4,1.4819,1.409548,0.725,0.662139,0.779712,0.725
5,1.3444,1.243159,0.8125,0.776231,0.84692,0.8125
6,1.1501,1.040283,0.9,0.895693,0.908059,0.9
7,0.9182,0.837672,0.9125,0.91255,0.929891,0.9125
8,0.7422,0.678043,0.95,0.950086,0.955844,0.95
9,0.5756,0.56527,1.0,1.0,1.0,1.0
10,0.528,0.480559,0.9875,0.987571,0.988462,0.9875


[I 2024-06-25 19:51:37,273] Trial 8 finished with value: 1.0 and parameters: {'learning_rate': 0.00010240722332830406, 'batch_size': 4, 'num_epochs': 15}. Best is trial 8 with value: 1.0.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6009,1.622782,0.25,0.135279,0.71875,0.25
2,1.6279,1.619573,0.25,0.135279,0.71875,0.25
3,1.6183,1.614344,0.25,0.135551,0.719278,0.25
4,1.6123,1.607522,0.2625,0.143446,0.725047,0.2625
5,1.6143,1.597592,0.275,0.153571,0.734091,0.275
6,1.6074,1.585669,0.275,0.15557,0.73702,0.275
7,1.5787,1.568385,0.3,0.194427,0.529429,0.3
8,1.5509,1.546814,0.3875,0.335214,0.625962,0.3875
9,1.5364,1.527908,0.45,0.399009,0.648974,0.45
10,1.5205,1.501333,0.5875,0.557843,0.699797,0.5875


[I 2024-06-25 19:51:47,621] Trial 9 finished with value: 0.669781164522213 and parameters: {'learning_rate': 0.00034592603800285665, 'batch_size': 16, 'num_epochs': 11}. Best is trial 8 with value: 1.0.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.629,1.60224,0.2375,0.144537,0.503063,0.2375
2,1.601,1.553958,0.35,0.287071,0.440908,0.35
3,1.4676,1.426639,0.5375,0.510846,0.617512,0.5375
4,1.2472,1.188491,0.6125,0.563988,0.600922,0.6125
5,0.9623,0.893064,0.8875,0.88689,0.914145,0.8875
6,0.6533,0.617385,0.9,0.899719,0.925987,0.9


ERROR:__main__:Error training bert model: name 'os' is not defined
[I 2024-06-25 19:51:58,424] A new study created in memory with name: no-name-98d3b116-7750-4afe-80c6-7d4a61d92bfb
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6114,1.615186,0.25,0.1,0.8125,0.25
2,1.6069,1.607074,0.25,0.1,0.8125,0.25
3,1.5894,1.577491,0.4,0.313636,0.823529,0.4
4,1.4184,1.267326,0.7375,0.668985,0.869079,0.7375
5,0.9452,0.574375,1.0,1.0,1.0,1.0
6,0.3412,0.206698,0.9625,0.962879,0.97,0.9625
7,0.1026,0.101313,0.975,0.974906,0.976316,0.975
8,0.1037,0.0228,1.0,1.0,1.0,1.0
9,0.0267,0.018258,1.0,1.0,1.0,1.0
10,0.0094,0.006199,1.0,1.0,1.0,1.0


[I 2024-06-25 19:57:55,642] Trial 0 finished with value: 1.0 and parameters: {'learning_rate': 0.0027341875686389253, 'batch_size': 16, 'num_epochs': 13}. Best is trial 0 with value: 1.0.
  lr = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.6368,1.612647,0.225,0.082653,0.825625,0.225
2,1.6022,1.562257,0.375,0.207237,0.769231,0.375
3,0.8591,0.645879,0.875,0.874232,0.902311,0.875
4,0.1883,0.068302,1.0,1.0,1.0,1.0
5,0.0138,0.061536,0.9875,0.987509,0.988158,0.9875
6,0.0048,0.060103,0.9875,0.987509,0.988158,0.9875
7,0.0027,0.108739,0.975,0.975009,0.975658,0.975
8,0.0423,0.053424,0.9875,0.987509,0.988158,0.9875
9,0.0017,0.030523,0.975,0.97508,0.976619,0.975
10,0.0102,0.097372,0.975,0.974923,0.9775,0.975


In [None]:
!df -h

Filesystem      Size  Used Avail Use% Mounted on
overlay          79G   79G     0 100% /
tmpfs            64M     0   64M   0% /dev
shm             5.7G  4.0K  5.7G   1% /dev/shm
/dev/root       2.0G  1.2G  820M  59% /usr/sbin/docker-init
/dev/sda1       119G  115G  4.0G  97% /opt/bin/.nvidia
tmpfs           6.4G  1.2M  6.4G   1% /var/colab
tmpfs           6.4G     0  6.4G   0% /proc/acpi
tmpfs           6.4G     0  6.4G   0% /proc/scsi
tmpfs           6.4G     0  6.4G   0% /sys/firmware
