In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

In [17]:
# Define the transformer-based model architecture
class DeTR(nn.Module):
    def __init__(self):
        super(TransformerModel, self).__init__()
        # Define the transformer backbone
        self.transformer = nn.Transformer(num_encoder_layers=6, num_decoder_layers=6, 
                                          d_model=256, dim_feedforward=1024, dropout=0.1)
        # Define the detection head
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 4) # 4 outputs for bounding box regression
        self.fc4 = nn.Linear(64, num_classes) # num_classes outputs for object classification
    
    def forward(self, x):
        # Reshape the input image
        x = x.permute(0, 3, 1, 2)
        b, c, h, w = x.size()
        x = x.reshape(b, c, h*w)
        # Pass the image through the transformer backbone
        x = self.transformer(x, x)
        x = x.mean(dim=1) # take the average of the patches
        # Pass the features through the detection head
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        bbox_output = self.fc3(x)
        class_output = nn.functional.softmax(self.fc4(x), dim=1)
        return bbox_output, class_output

In [18]:
# Define the dataset
class ObjectDetectionDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image = self.images[index]
        label = self.labels[index]
        return image, label

In [19]:
# Define the loss function
def loss_fn(outputs, targets):
    bbox_outputs, class_outputs = outputs
    bbox_targets, class_targets = targets
    # Compute the smooth L1 loss for bounding box regression
    bbox_loss = nn.functional.smooth_l1_loss(bbox_outputs, bbox_targets)
    # Compute the cross-entropy loss for object classification
    class_loss = nn.functional.cross_entropy(class_outputs, class_targets)
    # Compute the overall loss
    loss = bbox_loss + class_loss
    return loss

In [12]:
# Define the training function
def train(model, optimizer, dataloader):
    model.train()
    for epoch in range(10):
        for i, (images, targets) in enumerate(dataloader):
            optimizer.zero_grad()
            bbox_targets, class_targets = targets
            bbox_outputs, class_outputs = model(images)
            loss = loss_fn((bbox_outputs, class_outputs), (bbox_targets, class_targets))
            loss.backward()
            optimizer.step()
            
            if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, 10, i+1, len(train_loader), loss.item()))

In [20]:
# Define the validation function
def validate(model, dataloader):
    model.eval()
    with torch.no_grad():
        total_bbox_loss = 0
        total_class_loss = 0
        total_samples = 0
        for images, targets in dataloader:
            bbox_targets, class_targets = targets
            bbox_outputs, class_outputs = model(images)
            bbox_loss = nn.functional.smooth_l1_loss(bbox_outputs, bbox_targets)
            class_loss = nn.functional.cross_entropy(class_outputs, class_targets)
            total_bbox_loss += bbox_loss.item() * images.size(0)
            total_class_loss += class_loss.item() * images.size(0)
            total_samples += images.size(0)
        avg_bbox_loss = total_bbox_loss / total_samples
        avg_class_loss = total_class_loss / total_samples
    return avg_bbox_loss, avg_class_loss

In [23]:
num_classes=91

In [24]:
train_dataset = CocoDetection(root='datasets/coco/train2017',
                              annFile='datasets/coco/annotations/instances_train2017.json',)
#                               transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/felipe/anaconda3/envs/graffiti/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_538340/335357733.py", line 1, in <module>
    train_dataset = CocoDetection(root='datasets/coco/train2017',
  File "/home/felipe/anaconda3/envs/graffiti/lib/python3.8/site-packages/torchvision/datasets/coco.py", line 112, in __init__
ModuleNotFoundError: No module named 'pycocotools'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/felipe/anaconda3/envs/graffiti/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2057, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
  File "/home/felipe/anaconda3/envs/graffiti/lib/python3.8/site-packages/IPython/core/ultratb.py", line 1288, in structured_traceback
    return FormattedTB.structured_traceback(
 

In [None]:
model = DeTR()
optimizer = optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train(model, optimizer, train_loader)

In [None]:
os.exit()

In [None]:
# Define the main function
def main():
    # Define the hyperparameters
    learning_rate = 0.001
    batch_size = 32

In [15]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torchvision.transforms import transforms
from torchvision.models import ViT

# Set up the dataset and data loader
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Set up the model
model = ViT(num_classes=91)

# Set up the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

# Train the model
for epoch in range(10):
    for i, (images, targets) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, 10, i+1, len(train_loader), loss.item()))

# Save the model
torch.save(model.state_dict(), 'vit_object_detection.pth')

ImportError: cannot import name 'ViT' from 'torchvision.models' (/home/felipe/anaconda3/envs/graffiti/lib/python3.8/site-packages/torchvision/models/__init__.py)