In [1]:
import os
import yaml
from torch.utils.data import DataLoader
import argparse

from GeospatialFM.data import get_datasets
from GeospatialFM.models import *
# from utils import load_config
from torchgeo.samplers import RandomGeoSampler
from matplotlib import pyplot as plt

from transformers import TrainingArguments, Trainer
from transformers import AdamW, get_linear_schedule_with_warmup
from GeospatialFM.utils import setup, get_eval_fn
from GeospatialFM.data import *

from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
import numpy as np
from torch.utils.data import ConcatDataset

%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda:0')

In [5]:
args = {'exp_name': None,
        'config_file': 'GeospatialFM/configs/bigearthnet/bn_rn50_dino.yaml',
        'opts': None, 
        'save_config': False}
args = argparse.Namespace(**args)
args.debug = True
args

Namespace(exp_name=None, config_file='GeospatialFM/configs/bigearthnet/bn_rn50_dino.yaml', opts=None, save_config=False, debug=True)

In [6]:
cfg, _ = setup(args)

In [7]:
cfg['DATASET']

{'root': './data/geospatial', 'name': 'BigEarthNet', 'kwargs': {'bands': 's2', 'num_classes': 19, 'pad_s2': True}, 'train_transforms': {'crop_size': 224, 'hflip_prob': 0.5, 'normalize': False}, 'eval_transforms': {'crop_size': 224, 'resize_size': 256, 'normalize': False}, 'eval_metric': 'classification', 'use_train_transform': True, 'train_frac': 0.1, 'train_split': 'trainval'}

In [9]:
train_ds, val_ds, test_ds = get_datasets(cfg['DATASET'])
training_args = TrainingArguments(**cfg['TRAINER'])
model = construct_model(cfg['MODEL'])
model = model.to(device)
compute_metrics = get_eval_fn(cfg['DATASET'])

ResNet(
  (conv1): Conv2d(13, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    

AttributeError: 'NoneType' object has no attribute 'data'

In [34]:
train_ds[0]['image'][10]

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [36]:
train_ds[0]['label']

tensor(4)

In [14]:
ret = model(train_ds[0]['image'].unsqueeze(0).to(device))

In [17]:
train_ds = ConcatDataset([train_ds, val_ds])
train_dl = DataLoader(train_ds, batch_size=512, shuffle=True, num_workers=8)
test_dl = DataLoader(test_ds, batch_size=512, shuffle=False, num_workers=8)

In [16]:
ret.shape

torch.Size([1, 19])

In [18]:
trainer = Trainer(
    model=model,                # the instantiated 🤗 Transformers model to be trained
    args=training_args,                   # training arguments, defined above
    train_dataset=train_ds,    # training dataset
    eval_dataset=test_ds,      # evaluation dataset
    compute_metrics=compute_metrics,
)

trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mehzoahis[0m. Use [1m`wandb login --relogin`[0m to force relogin


KeyboardInterrupt: 

In [7]:
def extract_features(model, dataloader, device):
    x_all = []
    y_all = []

    for batch in tqdm(dataloader):
        images = batch["image"].to(device)
        labels = batch["label"].numpy()
        
        with torch.inference_mode():
            features = model(images).cpu().numpy()
        
        x_all.append(features)
        y_all.append(labels)

    x_all = np.concatenate(x_all, axis=0)
    y_all = np.concatenate(y_all, axis=0)

    return x_all, y_all

In [8]:
x_all, y_all = extract_features(model.base_model, train_dl, device)

100%|██████████| 43/43 [01:22<00:00,  1.92s/it]


In [9]:
x_test, y_test = extract_features(model.base_model, test_dl, device)

100%|██████████| 11/11 [00:30<00:00,  2.75s/it]


In [10]:
linear_model = LogisticRegression(C=50.0, max_iter=1000)
linear_model.fit(x_all, y_all)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [11]:
linear_model.score(x_test, y_test)

0.9377777777777778

In [4]:
model_cfg = cfg['MODEL']
model_cfg

{'architecture': 'vit_small_patch16_224', 'bands': 13, 'num_classes': 10, 'pretrained_ckpt': 'ViTSmall16_Weights.SENTINEL2_ALL_DINO', 'lp': False, 'head_extra_kwargs': {'use_bias': True}, 'load_pretrained_from': 'torchgeo'}

In [5]:
model = construct_model(model_cfg)

In [6]:
model

EncoderDecoder(
  (base_model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(13, 384, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate

In [12]:
weights = tgm.get_weight(model_cfg['pretrained_ckpt'])
encoder = tgm.get_model(model_cfg['architecture'], weights=weights)

In [14]:
encoder.head.in_features

384

In [1]:
# get the last layer of the encoder
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
timm.list_models('*dino*')

['vit_base_patch14_dinov2',
 'vit_giant_patch14_dinov2',
 'vit_large_patch14_dinov2',
 'vit_small_patch14_dinov2']

In [6]:
224/14

16.0