In [12]:
import os
import datasets
from transformers import ViTFeatureExtractor
# from tensorflow import keras
# from tensorflow.keras import layers
import numpy as np
from huggingface_hub import HfFolder
import tensorflow as tf
from transformers import DefaultDataCollator
from transformers import TFViTForImageClassification, create_optimizer
from transformers import TFViTModel
from transformers.keras_callbacks import PushToHubCallback
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer
import torch
# from tensorflow.keras.callbacks import TensorBoard as TensorboardCallback, EarlyStopping


In [2]:
# model_id = "google/vit-base-patch16-224-in21k"
model_id = "EdBianchi/vit-fire-detection"

In [3]:
def create_image_folder_dataset(root_path):
    """creates `Dataset` from image folder structure"""

    # get class names by folders names
    _CLASS_NAMES= os.listdir(root_path)
    # defines `datasets` features`
    features=datasets.Features({
                        "img": datasets.Image(),
                        "label": datasets.features.ClassLabel(names=_CLASS_NAMES),
                    })
    # temp list holding datapoints for creation
    img_data_files=[]
    label_data_files=[]
    # load images into list for creation
    for img_class in os.listdir(root_path):
        for img in os.listdir(os.path.join(root_path,img_class)):
            path_=os.path.join(root_path,img_class,img)
            img_data_files.append(path_)
            label_data_files.append(img_class)
    # create dataset
    ds = datasets.Dataset.from_dict({"img":img_data_files,"label":label_data_files},features=features)
    return ds

In [4]:
ds_train = create_image_folder_dataset("data/train")
img_class_labels = ds_train.features["label"].names


In [5]:
ds_train[0]

{'img': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1920x1080>,
 'label': 0}

In [6]:
feature_extractor = ViTFeatureExtractor.from_pretrained(model_id)
 
# learn more about data augmentation here: https://www.tensorflow.org/tutorials/images/data_augmentation
# data_augmentation = keras.Sequential(
#     [
#         layers.Resizing(feature_extractor.size, feature_extractor.size),
#         layers.Rescaling(1./255),
#         layers.RandomFlip("horizontal"),
#         layers.RandomRotation(factor=0.02),
#         layers.RandomZoom(
#             height_factor=0.2, width_factor=0.2
#         ),
#     ],
#     name="data_augmentation",
# )
data_augmentation = tf.keras.Sequential(
    [
        tf.keras.layers.Resizing(feature_extractor.size, feature_extractor.size),
        tf.keras.layers.Rescaling(1./255),
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomRotation(factor=0.02),
        tf.keras.layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)

# use keras image data augementation processing
def augmentation(examples):
    # print(examples["img"])
    # examples["pixel_values"] = [data_augmentation(image) for image in examples["img"]]
    examples["pixel_values"] = [
        data_augmentation(np.array(image.convert("RGB"))) for image in examples["img"]
    ]
    return examples
 
 
# basic processing (only resizing)
def process(examples):
    # examples.update(feature_extractor(examples['img'], ))
    # Convert images to RGB if they aren't already
    rgb_images = [np.array(image.convert("RGB")) for image in examples["img"]]
    # Apply the feature extractor to the RGB images
    examples.update(feature_extractor(images=rgb_images, return_tensors="np"))
    return examples
 
# we are also renaming our label col to labels to use `.to_tf_dataset` later
ds_train = ds_train.rename_column("label", "labels")



In [7]:
ds_train_processed = ds_train.map(process, batched=True)
ds_train_processed

Map: 100%|██████████| 13947/13947 [01:55<00:00, 120.46 examples/s]


Dataset({
    features: ['img', 'labels', 'pixel_values'],
    num_rows: 13947
})

In [8]:
test_size = 0.2
ds_train_processed = ds_train_processed.shuffle().train_test_split(test_size=test_size)

In [9]:
id2label = {str(i): label for i, label in enumerate(img_class_labels)}
label2id = {v: k for k, v in id2label.items()}
 
num_train_epochs = 5
train_batch_size = 32
eval_batch_size = 32
learning_rate = 3e-5
weight_decay_rate=0.01
num_warmup_steps=0
output_dir=model_id.split("/")[1] 
hub_token = HfFolder.get_token() # or your token directly "hf_xxx"
hub_model_id = f'{model_id.split("/")[1]}-fire'
fp16=True
 
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
if fp16:
  tf.keras.mixed_precision.set_global_policy("mixed_float16")

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


In [10]:
# Data collator that will dynamically pad the inputs received, as well as the labels.
data_collator = DefaultDataCollator(return_tensors="tf")
 
# converting our train dataset to tf.data.Dataset
tf_train_dataset = ds_train_processed["train"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=train_batch_size,
   collate_fn=data_collator)
 
# converting our test dataset to tf.data.Dataset
tf_eval_dataset = ds_train_processed["test"].to_tf_dataset(
   columns=['pixel_values'],
   label_cols=["labels"],
   shuffle=True,
   batch_size=eval_batch_size,
   collate_fn=data_collator)

Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor)  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor)  
New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor})  
             : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) 


In [13]:
# create optimizer with weigh decay
num_train_steps = len(tf_train_dataset) * num_train_epochs
optimizer, lr_schedule = create_optimizer(
    init_lr=learning_rate,
    num_train_steps=num_train_steps,
    weight_decay_rate=weight_decay_rate,
    num_warmup_steps=num_warmup_steps,
)
 
# load pre-trained ViT model
# model = TFViTForImageClassification.from_pretrained(
#     model_id,
#     num_labels=len(img_class_labels),
#     id2label=id2label,
#     label2id=label2id,
# )

model_id = "EdBianchi/vit-fire-detection"
processor = AutoImageProcessor.from_pretrained(model_id)
model = AutoModelForImageClassification.from_pretrained(
    model_id,
    num_labels=3,  # Set the number of classes to 3
    id2label={0: "class_0", 1: "class_1", 2: "class_2"},  # Update this with your class names
    label2id={"class_0": 0, "class_1": 1, "class_2": 2},
    from_pt=True  # Load the model from PyTorch weights
)
 
# define loss
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
 
# define metrics
metrics=[
    tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
    tf.keras.metrics.SparseTopKCategoricalAccuracy(3, name="top-3-accuracy"),
]
 
# compile model
model.compile(
    optimizer=optimizer,
    loss=loss,
    metrics=metrics
)

TypeError: __init__() got an unexpected keyword argument 'from_pt'

In [12]:
base_model = TFViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
 
 
# inputs
pixel_values = tf.keras.layers.Input(shape=(3,224,224), name='pixel_values', dtype='float32')
 
# model layer
vit = base_model.vit(pixel_values)[0]
classifier = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(vit[:, 0, :])
 
# model
keras_model = tf.keras.Model(inputs=pixel_values, outputs=classifier)

All PyTorch model weights were used when initializing TFViTModel.

All the weights of TFViTModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTModel for predictions without further training.


In [13]:
# callbacks=[]
 
# callbacks.append(TensorboardCallback(log_dir=os.path.join(output_dir,"logs")))
# callbacks.append(EarlyStopping(monitor="val_accuracy",patience=1))
# if hub_token:
#     callbacks.append(PushToHubCallback(output_dir=output_dir,
#                                      hub_model_id=hub_model_id,
#                                      hub_token=hub_token))

NameError: name 'TensorboardCallback' is not defined

In [14]:
train_results = model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    # callbacks=callbacks,
    epochs=num_train_epochs,
)

Epoch 1/5
  3/349 [..............................] - ETA: 1:01:18 - loss: 1.0579 - accuracy: 0.5625 - top-3-accuracy: 1.0000

KeyboardInterrupt: 