In [19]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image as pil_image
import sklearn.metrics
import numpy as np
import tensorflow as tf
import tensorflow
from tensorflow import keras
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import AutoFeatureExtractor
from transformers import TFAutoModelForTokenClassification
from transformers import TFAutoModelForImageClassification
from keras.layers import Input
import imageio

In [20]:
from transformers import AutoConfig, TFAutoModelForImageClassification, AutoFeatureExtractor

# Download model and configuration from huggingface.co and cache.
config = AutoConfig.from_pretrained("google/vit-base-patch16-224-in21k")
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=5)
model = TFAutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=5)
#feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=5)

# image = imageio.imread('DGE_test\\2\Cow_11\\100_DGE.tif')
# inputs = feature_extractor(images=image, return_tensors="tf")

# def preprocessing_func():
#     return feature_extractor(images=image, return_tensors="tf")

Some layers from the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing TFViTForImageClassification: ['vit/pooler/dense/kernel:0', 'vit/pooler/dense/bias:0']
- This IS expected if you are initializing TFViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [21]:
from tensorflow.keras.utils import Sequence
import skimage.transform
import PIL

class ViTDataGen(Sequence):
    # The input to the data generator will be the dataframe and which columns to use
    def __init__(self, df, X_col, y_col,
                 directory,
                 batch_size,
                 input_size=(224, 224, 3), #changed from (3,224,224)
                 target_size=None,
                 bitdepth=None,
                 shuffle=True):
        
        self.df = df.copy() # dataframe
        self.X_col = X_col # column for X data (filename)
        self.y_col = y_col # column for y data (class label)
        self.directory = directory # base directory for data
        self.batch_size = batch_size # batch size
        self.input_size = input_size # size expected by network (224,224,3) for VGG
        self.target_size = target_size # resized image for spatial res sims
        self.bitdepth = bitdepth # quantized image for bitdepth sims
        self.shuffle = shuffle # whether to shuffle batches
        
        self.n = len(self.df) # number of data points
        self.nclasses = df[y_col].nunique() # number of classes
            
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)
    
    def __get_input(self, path, directory, input_size, target_size, bitdepth):
        
        img = imageio.imread(directory+path)
    
        # with fits.open(directory+path) as img: # read in fits image
        #     img.verify('silentfix')
        #     img = img[0].data
            
        #img = np.expand_dims(img,axis=2) # copy single channel to three to create rgb dimensioned image
        #img = np.tile(img,(1,1,3))
        
        # scale to target_size
        if target_size is not None:
            img = skimage.transform.resize(img, (target_size[0],target_size[1]), order=1, mode='reflect',\
                                           clip=True, preserve_range=True, anti_aliasing=True)
        
        # scale to input_size (expected dimensions for input to network)
        img = skimage.transform.resize(img, (input_size[0],input_size[1]), order=1, mode='reflect',\
                                       clip=True, preserve_range=True, anti_aliasing=True)
        
        # put bitdepth stuff here eventually
        
        # scale intensities to range [0,255] as expected by VGG preprocessing function
        # can cheat a bit here and treat each channel the same since these are grayscale images
        # img = img + 5978.7 # -5978.7 is minimum of entire magnetogram dataset
        # img = img/(2*5978.7)*255 # +5978.7 is maximum of entire magnetogram dataset
        # img[img<-2550] = -2550
        # img = img + 2550 # -5978.7 is minimum of entire magnetogram dataset
        # img = img/(5100)*255 # +5978.7 is maximum of entire magnetogram dataset
        
        
        
        img = feature_extractor(img, return_tensor="tf") # preprocess according to ViT expectations

        return img
    
    
    def __get_output(self, label, num_classes):
        #print(label)
        #print(label, num_classes)
        return tensorflow.keras.utils.to_categorical(label, num_classes=num_classes)
    
    def __get_data(self, batches):
        # Generates data containing batch_size samples

        path_batch = batches[self.X_col]
        
        label_batch = batches[self.y_col]

        X_batch = np.asarray([self.__get_input(x, self.directory, self.input_size, self.target_size, self.bitdepth)\
                              for x in path_batch])

        y_batch = np.asarray([self.__get_output(y, self.nclasses) for y in label_batch])
        #print(y_batch)
        
        return X_batch, y_batch
    
    def __getitem__(self, index):
        
        batches = self.df[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__get_data(batches)        
        return X, y
    
    def __len__(self):
        return self.n // self.batch_size

In [22]:
model.summary()

Model: "tf_vi_t_for_image_classification_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vit (TFViTMainLayer)        multiple                  85798656  
                                                                 
 classifier (Dense)          multiple                  3845      
                                                                 
Total params: 85,802,501
Trainable params: 85,802,501
Non-trainable params: 0
_________________________________________________________________


In [23]:
# train_df = pd.read_csv('train_data.csv',dtype=str)
# val_df = pd.read_csv('val_data.csv',dtype=str)
# test_df = pd.read_csv('test_data.csv',dtype=str)

# train_df = np.expand_dims(np.asarray(train_df['class']).astype(int), axis=1)-2
# val_df = np.expand_dims(np.asarray(val_df['class']).astype(int), axis=1)-2
# test_df = np.expand_dims(np.asarray(test_df['class']).astype(int), axis=1)-2

# train_generator = ViTDataGen(train_df, X_col='filename', y_col='class', directory='',\
#                               batch_size=128, input_size=(224,224,3),\
#                            target_size=None, bitdepth=None, shuffle=True)
# val_generator = ViTDataGen(val_df, X_col='filename', y_col='class', directory='',\
#                             batch_size=128, input_size=(224,224,3),\
#                            target_size=None, bitdepth=None, shuffle=True)

In [24]:
# from keras.preprocessing.image import ImageDataGenerator

# train_df = pd.read_csv('train_data.csv',dtype=str)
# val_df = pd.read_csv('val_data.csv',dtype=str)
# test_df = pd.read_csv('test_data.csv',dtype=str)

# # train_df["class"] = train_df["class"].astype(str).astype(int)
# # val_df["class"] = val_df["class"].astype(str).astype(int)
# # test_df["class"] = test_df["class"].astype(str).astype(int)

# train_df = np.expand_dims(np.asarray(train_df['class']).astype(int), axis=1)-2
# val_df = np.expand_dims(np.asarray(val_df['class']).astype(int), axis=1)-2
# test_df = np.expand_dims(np.asarray(test_df['class']).astype(int), axis=1)-2

# train_datagen = ImageDataGenerator(fill_mode = "reflect", data_format = "channels_last")
# train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,\
#                                                     directory='', preprocessing_function=feature_extractor,\
#                                                     xcol='filename',y_col='class',\
#                                                     target_size=(224,224), color_mode='rgb',\
#                                                     batch_size=8, class_mode='raw',\
#                                                     shuffle=True)
# val_datagen = ImageDataGenerator(fill_mode = "reflect", data_format = "channels_last")
# val_generator = val_datagen.flow_from_dataframe(dataframe=val_df,\
#                                                 directory = '', preprocessing_function=feature_extractor,\
#                                                 xcol='filename',ycol='class',\
#                                                 target_size=(224,224), color_mode='rgb',\
#                                                 batch_size=8, class_mode='raw',\
#                                                 shuffle=True)
# test_datagen = ImageDataGenerator(fill_mode = "reflect", data_format = "channels_last")
# test_generator = test_datagen.flow_from_dataframe(dataframe=test_df,\
#                                                 directory = '', preprocessing_function=feature_extractor,\
#                                                 xcol='filename',ycol='class',\
#                                                 target_size=(224,224), color_mode='rgb',\
#                                                 batch_size=8, class_mode='raw',\
#                                                 shuffle=False)


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [25]:
import skimage.transform

train_df = pd.read_csv('train_data.csv',dtype=str)
val_df = pd.read_csv('val_data.csv',dtype=str)
test_df = pd.read_csv('test_data.csv',dtype=str)

def read_data(data_frame):
    num_imgs_train = len(train_df['class'])
    x_train = np.zeros((num_imgs_train, 224, 224, 3))

    img_names = list(train_df['filename'])
    y_train = np.expand_dims(np.asarray(train_df['class']).astype(int), axis=1)-2


    for i in range(0,num_imgs_train):
        img = imageio.imread(img_names[i])
        x_train[i, :, :, :] = skimage.transform.resize(img, (224, 224, 3))
    return x_train, y_train

x_train, y_train = read_data(train_df)
x_val, y_val = read_data(val_df)
x_test, y_test = read_data(test_df)

In [27]:
# from sklearn.utils import class_weight 
# class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(train_df['class']), y =train_df['class'])
# class_weights = dict(enumerate(class_weights))
# print(class_weights)

batch_size = 100

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-7),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), #had from_logits=True but im not sure it actually needs that? #was sparse
    metrics=tf.metrics.SparseCategoricalAccuracy(), #was sparse
)

history = model.fit(
        x=x_train,
        y=y_train,
        steps_per_epoch = np.ceil(len(y_train)/batch_size),
        batch_size=batch_size,
        epochs=2,
        validation_data = (x_val, y_val),
        validation_steps = np.ceil(len(y_val)/batch_size),
        validation_freq=1
    )


# step_size_train = np.ceil(train_generator.n/train_generator.batch_size)
# step_size_val = np.ceil(val_generator.n/val_generator.batch_size)


# history = model.fit(
#     train_generator, steps_per_epoch=step_size_train, epochs=2, verbose=1,\
#     validation_data=val_generator, validation_steps=step_size_val,\
#     validation_freq=1 #, class_weight=class_weights #class weights break everything and for some reaosn it thinks there are still 10 classes?
#     )
# history = model.fit(
#     train_generator, epochs=1, verbose=1,\
#     validation_data=val_generator
#     )

Epoch 1/2


ValueError: in user code:

    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\engine\training.py", line 878, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\engine\training.py", line 867, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\engine\training.py", line 860, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\modeling_tf_utils.py", line 996, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ValueError: Exception encountered when calling layer "tf_vi_t_for_image_classification_3" (type TFViTForImageClassification).
    
    in user code:
    
        File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\modeling_tf_utils.py", line 759, in run_call_with_unpacked_inputs  *
            return func(self, **unpacked_inputs)
        File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\models\vit\modeling_tf_vit.py", line 789, in call  *
            outputs = self.vit(
        File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
    
        ValueError: Exception encountered when calling layer "vit" (type TFViTMainLayer).
        
        in user code:
        
            File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\modeling_tf_utils.py", line 759, in run_call_with_unpacked_inputs  *
                return func(self, **unpacked_inputs)
            File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\models\vit\modeling_tf_vit.py", line 494, in call  *
                embedding_output = self.embeddings(
            File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
        
            ValueError: Exception encountered when calling layer "embeddings" (type TFViTEmbeddings).
            
            in user code:
            
                File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\models\vit\modeling_tf_vit.py", line 123, in call  *
                    embeddings = self.patch_embeddings(
                File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
                    raise e.with_traceback(filtered_tb) from None
            
                ValueError: Exception encountered when calling layer "patch_embeddings" (type TFPatchEmbeddings).
                
                in user code:
                
                    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\transformers\models\vit\modeling_tf_vit.py", line 189, in call  *
                        projection = self.projection(pixel_values)
                    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler  **
                        raise e.with_traceback(filtered_tb) from None
                    File "c:\Users\zacha\anaconda3\envs\ML_env\lib\site-packages\keras\engine\input_spec.py", line 247, in assert_input_compatibility
                        raise ValueError(
                
                    ValueError: Input 0 of layer "projection" is incompatible with the layer: expected axis -1of input shape to have value 3, but received input with shape (None, 224, 3, 224)
                
                
                Call arguments received:
                  • pixel_values=tf.Tensor(shape=(None, 224, 224, 3), dtype=float32)
                  • interpolate_pos_encoding=None
                  • training=True
            
            
            Call arguments received:
              • pixel_values=tf.Tensor(shape=(None, 224, 224, 3), dtype=float32)
              • interpolate_pos_encoding=None
              • training=True
        
        
        Call arguments received:
          • self=tf.Tensor(shape=(None, 224, 224, 3), dtype=float32)
          • pixel_values=None
          • head_mask=None
          • output_attentions=False
          • output_hidden_states=False
          • interpolate_pos_encoding=None
          • return_dict=True
          • training=True
    
    
    Call arguments received:
      • self=tf.Tensor(shape=(None, 224, 224, 3), dtype=float32)
      • pixel_values=None
      • head_mask=None
      • output_attentions=None
      • output_hidden_states=None
      • interpolate_pos_encoding=None
      • return_dict=None
      • labels=None
      • training=True


In [None]:
# from PIL import Image
# import requests
# from transformers import CLIPProcessor, TFCLIPVisionModel

# model = TFCLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")
# processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")