<a href="https://colab.research.google.com/github/viniciusrpb/sibgrapi2015_algasegmentation_levelset/blob/master/selenastraceae_vit_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Dataset: Green Alga (Selenastraceae)
## Technique: Vision Transformer


In [None]:
!pip install datasets
!pip install -U torchmetrics
!pip install -U transformers[torch] accelerate
!pip install evaluate

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import torchvision
from transformers import TrainingArguments, Trainer,ViTFeatureExtractor,ViTForImageClassification,DeiTForImageClassification,BeitForImageClassification,DeiTFeatureExtractor,BeitFeatureExtractor
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import numpy as np
import pandas as pd
import os
import evaluate
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, recall_score, precision_score
import seaborn as sn
import matplotlib.pyplot as plt
from PIL import Image

In [4]:
!cp -r "/content/drive/My Drive/alga/dataset" "selenastraceae"
path_data = "selenastraceae"

In [12]:
dataset = torchvision.datasets.ImageFolder(root='selenastraceae')

full_ds = DataLoader(dataset, batch_size=32, shuffle=True,num_workers=4)

In [11]:
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]



In [17]:
def fn_collator(batch):
    encodings = feature_extractor([x[0] for x in batch], return_tensors='pt')
    encodings['labels'] = torch.tensor([x[1] for x in batch], dtype=torch.long)
    return encodings

def compute_metric_f1(p):
    metric = evaluate.load("f1")
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids, average="macro")

In [126]:
def generate_data(deep_features,df):

    string = 'DY\n'
    rows = len(deep_features)
    columns = len(deep_features[0])
    string += str(rows)+"\n"
    string += str(columns)+"\n"

    attrs = ''
    for i in range(1,columns+1):
        attrs += "attrib"+str(i)+";"

    string += attrs+"\n"

    for i in range(0,rows):
        linha = str(df['filename'].iloc[i])+";"
        for j in range(0,columns):
            linha += str(deep_features[i][j])+";"
        linha += str(df['label'].iloc[i])
        string += linha+"\n"

    arquivo = open('selenastraceae.data','w')
    arquivo.write(string)
    arquivo.close()

In [14]:
full_ds = torchvision.datasets.ImageFolder(path_data)

dic_label2id = {}
dic_id2label = {}
for i, class_name in enumerate(dataset.classes):
    dic_label2id[class_name] = str(i)
    dic_id2label[str(i)] = class_name

num_labels = len(dataset.classes)

In [18]:
vit_model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=num_labels,
    id2label=dic_id2label,
    label2id=dic_label2id
)

training_args = TrainingArguments(
    output_dir="./vit-base-selenastraceae",
    per_device_train_batch_size=16,
    evaluation_strategy="steps",
    num_train_epochs=10,
    fp16=True,
    save_steps=100,
    eval_steps=100,
    logging_steps=6,
    learning_rate=3e-5,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    report_to='tensorboard',
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=vit_model,
    args=training_args,
    data_collator=fn_collator,
    compute_metrics=compute_metric_f1,
    train_dataset = full_ds,
    eval_dataset = full_ds,
    tokenizer=feature_extractor,
)

train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

metrics = trainer.evaluate(full_ds)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1
100,1.4186,1.373812,0.592233
200,0.7992,0.852445,0.759506
300,0.514,0.577736,0.854939
400,0.4114,0.408929,0.902486
500,0.3057,0.309557,0.915762
600,0.2383,0.244483,0.974996
700,0.2377,0.210812,0.996728
800,0.2075,0.192044,1.0
900,0.1842,0.183428,1.0


Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

***** train metrics *****
  epoch                    =         10.0
  total_flos               = 1091338136GF
  train_loss               =       0.5965
  train_runtime            =   0:19:11.29
  train_samples_per_second =       13.133
  train_steps_per_second   =        0.825


In [128]:
from transformers import ViTConfig
from datasets import load_dataset
from transformers import AutoImageProcessor

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")

outputs = []
for image in full_ds:
    input = image_processor(image[0], return_tensors="pt").to(device)
    hidden_states = vit_model(**input,output_hidden_states = True).hidden_states
    features = hidden_states[-1]
    outputs.append(features[0][0].detach().cpu().numpy())

In [146]:
dic = {}
dic['filename'] = []
dic['label'] = []

for imagem in dataset.imgs:
    dic['filename'].append(imagem[0])
    dic['label'].append(dic_id2label[str(imagem[1])])

In [129]:
generate_data(outputs,pd.DataFrame(dic))

In [156]:
'''from sklearn.manifold import TSNE
import plotly
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.express as px

X = np.array(outputs)

tsne = TSNE(n_components=2,perplexity=5,learning_rate='auto',metric='euclidean', init='pca')
X_tsne = tsne.fit_transform(X)'''

all_data = {}
all_data['x'] = X_tsne[:,0]
all_data['y'] = X_tsne[:,1]
all_data['label'] = dic['label']
all_data['filename'] = dic['filename']

fig = px.scatter(
    all_data,
    x='x',
    y='y',
    color='label',
    template="simple_white",
    #labels={ 'label': 'label' },
    #color_discrete_sequence= px.colors.qualitative.Plotly + px.colors.qualitative.Bold,
    hover_name='filename',
    hover_data={'x':False,
                'y':False,
                'label':True},
    width=700)

fig.update_traces(showlegend=True)

fig.update_traces(marker=dict(size=8,
                              line=dict(width=1,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))

fig.update_layout(xaxis={'visible': False},
                  yaxis={'visible': False},
                  margin=dict(l=0,r=0,b=0,t=0))


plt.savefig('TSNE_ALL.eps', format='eps')
fig.show()

<Figure size 640x480 with 0 Axes>