In [None]:
! pip install transformers==4.28.0 datasets


# Image classification

In [None]:
from huggingface_hub import notebook_login

notebook_login()

## Load dataset

In [None]:
from datasets import load_dataset

food = load_dataset("juanfengyun/SEEDTrain", split="train[:325]")

In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
from datasets import load_dataset

ds = load_dataset("/content/drive/MyDrive/MFCC/BB1", split="train")

Split the dataset's `train` split into a train and test set with the [train_test_split](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.train_test_split) method:

In [None]:
ds

In [None]:
ds.data

In [None]:
ds = ds.train_test_split(test_size=0.2)

In [None]:
ds


In [None]:
labels = ds["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

Now you can convert the label id to a label name:

In [None]:
labels

In [None]:
id2label

## Preprocess

The next step is to load a ViT image processor to process the image into a tensor:

In [None]:
from transformers import AutoImageProcessor

checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [None]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

Then create a preprocessing function to apply the transforms and return the `pixel_values` - the inputs to the model - of the image:

In [None]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [None]:
ds = ds.with_transform(transforms)

In [None]:
testDs = ds["test"]

In [None]:
testDs

In [None]:
testDs = load_dataset("/content/drive/MyDrive/Colab Notebooks/SpeechClassification/test")

In [None]:
testDs

In [None]:
prepared_Test = testDs.with_transform(transforms)

In [None]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

## Evaluate

In [None]:
!pip install evaluate

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")

In [None]:
import evaluate


In [None]:
metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

In [None]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metrics.compute(predictions=predictions, references=labels)

Your `compute_metrics` function is ready to go now, and you'll return to it when you set up your training.

In [None]:
!pip install wandb
import wandb

wandb.login()

## Train

In [None]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

In [None]:
pip
install - -upgrade
accelerate

In [None]:
pip
install
transformers == 4.28
.0

In [None]:
training_args = TrainingArguments(
    output_dir="./my_MFCC_VITmodelBBMetrics",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=80,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=True,
    #  report_to="wandb",
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=ds["train"],
    # eval_dataset=food["test"],
    eval_dataset=ds["test"],
    tokenizer=image_processor,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
wandb.finish()

In [None]:
trainer.save_state()

In [None]:
trainer.state.log_history

In [None]:
type(trainer.state.log_history[0])

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
data = pd.DataFrame(trainer.state.log_history)
data


In [None]:
data.dropna(subset=['eval_accuracy', 'eval_loss'])

In [None]:
from matplotlib import pyplot as plt

_df_13['eval_accuracy'].plot(kind='line', figsize=(8, 4), title='eval_accuracy')
plt.gca().spines[['top', 'right']].set_visible(False)

In [None]:
from matplotlib import pyplot as plt

_df_15['eval_samples_per_second'].plot(kind='line', figsize=(8, 4), title='eval_samples_per_second')
plt.gca().spines[['top', 'right']].set_visible(False)
plt.tight_layout()

In [None]:
trainer.push_to_hub()

## Inference

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=ds["train"],
    # eval_dataset=food["test"],
    eval_dataset=ds["test"],
    tokenizer=image_processor,
    compute_metrics=compute_metrics,
)


In [None]:
ds['test']

In [None]:
trainer.evaluate(ds['test'])

In [None]:
labels1 = prepared_Test["train"].features["label"].names
label2id1, id2label1 = dict(), dict()
for i, label in enumerate(labels1):
    label2id1[label] = str(i)
    id2label1[str(i)] = label

In [None]:
image = ds["image"]
image[2]

In [None]:
type(image)

In [None]:
for i in range(10):
    image[i]

In [None]:
labels1[0]

In [None]:
from transformers import pipeline

classifier = pipeline("image-classification", model="my_MFCC_VITmodel")
for i in range(110):
    list = classifier(image[i])
list
# for img in image:
#   classifier(img)

In [None]:
from transformers import AutoImageProcessor
import torch

image_processor = AutoImageProcessor.from_pretrained("my_awesome_food_model")
inputs = image_processor(image, return_tensors="pt")

In [None]:
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained("my_awesome_food_model")
with torch.no_grad():
    logits = model(**inputs).logits

In [None]:
predicted_label = logits.argmax(-1).item()
model.config.id2label[predicted_label]