In [1]:
from google.colab import drive
drive.mount('/content/drive')
import os
# List the contents of your shared drives
print(os.listdir('/content/drive/MyDrive'))

Mounted at /content/drive
['VID_20210528_174657_178.mp4', 'VID_20210528_174701_440.mp4', 'Infrastructure VS. AD-HOC.pdf.gdrive', 'افكار لكتابه السيره الذاتية.gdoc', 'British Council Certificate.pdf', 'IMG_20211010_131359 (5).jpg', 'IMG_20211010_131359 (4).jpg', 'IMG_20211010_131359 (3).jpg', 'IMG_20211010_131359 (2).jpg', 'IMG_20211010_131359 (1).jpg', 'IMG_20211010_131359.jpg', 'IMG_20211010_192217.jpg', 'Screenshot_2021-10-21-11-18-21-80_40deb401b9ffe8e1df2f1cc5ba480b12.jpg', 'exp1s.pdf', 'EXP2.pdf', 'KIRSSOF.pdf', '16372573386196998407231969063766.jpg', '16372575382426078223689767951657.jpg', '555555.pdf', 'abdelrhman Zain Mohamed 2101646(2).pdf', 'EXP 5.pdf', 'EXP 6.pdf', 'Abdelrhman zain mohamed 2101646(3).pdf', 'نسخة من Sections 7 & 8.pdf', 'exp1sssss.pdf', 'abdelrhman zain 1 .pdf', 'ABDELRHman zain 2 pdf.pdf', 'ABDEL rhman zain . 3.pdf', 'exp measure 4 .pdf', 'Introduction.pdf', 'exp 6.pdf', 'exp 7.pdf', 'exp 8.pdf', 'exp9.pdf', 'exp 10  (1).pdf', 'exp 10 .pdf', 'exp 11 .pdf', '

In [None]:
import zipfile

zip_path = '/content/drive/MyDrive/full_data.zip'
extract_path = '/content/full_data/'

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extraction complete.")

In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset

class GOT10KDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = self._load_samples()

    def _load_samples(self):
        samples = []
        for folder in os.listdir(self.root_dir):
            folder_path = os.path.join(self.root_dir, folder)
            if os.path.isdir(folder_path):
                template_path = os.path.join(folder_path, 'template.jpg')
                search_path = os.path.join(folder_path, 'search.jpg')
                cls_label = torch.tensor([0, 1])  # Example classification label
                reg_label = torch.tensor([0.5, 0.5, 0.5, 0.5])  # Example regression label
                samples.append((template_path, search_path, cls_label, reg_label))
        return samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        template_path, search_path, cls_label, reg_label = self.samples[idx]
        template = Image.open(template_path).convert('RGB')
        search = Image.open(search_path).convert('RGB')

        if self.transform:
            template = self.transform(template)
            search = self.transform(search)

        return template, search, cls_label, reg_label

# Example of using the dataset
from torchvision import transforms

data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = GOT10KDataset(root_dir='/content/full_data', transform=data_transform)


In [None]:
from torch.utils.data import DataLoader, random_split

batch_size = 4
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Conv2d, Linear, Dropout, MultiheadAttention, GroupNorm

class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
        self.features = nn.Sequential(*list(alexnet.features.children())[:-1])

    def forward(self, x):
        outputs = []
        for layer in self.features:
            x = layer(x)
            outputs.append(x)
        return outputs[-3:]

class FeatureEncoder(nn.Module):
    def __init__(self, in_channels, out_channels, num_heads=8):
        super(FeatureEncoder, self).__init__()
        self.conv = Conv2d(in_channels, out_channels, kernel_size=1)
        self.positional_encoding = nn.Parameter(torch.randn(1, out_channels, 1, 1))
        self.multihead_attn = MultiheadAttention(embed_dim=out_channels, num_heads=num_heads)
        self.norm = GroupNorm(8, out_channels)

    def forward(self, x1, x2):
        x1 = self.conv(x1)
        x2 = self.conv(x2)
        x1 += self.positional_encoding
        x2 += self.positional_encoding

        B, C, H, W = x1.size()
        x1_flat = x1.view(B, C, -1).permute(2, 0, 1)
        x2_flat = x2.view(B, C, -1).permute(2, 0, 1)

        attn_output, _ = self.multihead_attn(x1_flat, x2_flat, x2_flat)
        attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)

        output = self.norm(x1 + attn_output)
        return output

class FeatureDecoder(nn.Module):
    def __init__(self, in_channels, num_heads=8):
        super(FeatureDecoder, self).__init__()
        self.multihead_attn = MultiheadAttention(embed_dim=in_channels, num_heads=num_heads)
        self.norm1 = GroupNorm(8, in_channels)
        self.norm2 = GroupNorm(8, in_channels)
        self.ffn = nn.Sequential(
            Linear(in_channels, in_channels * 4),
            nn.ReLU(),
            Linear(in_channels * 4, in_channels),
        )
        self.dropout = Dropout(0.1)

    def forward(self, x):
        B, C, H, W = x.size()
        x_flat = x.view(B, C, -1).permute(2, 0, 1)

        attn_output, _ = self.multihead_attn(x_flat, x_flat, x_flat)
        attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)

        x = self.norm1(x + attn_output)
        x = x + self.ffn(x.view(B, C, -1).permute(2, 0, 1)).permute(1, 2, 0).view(B, C, H, W)
        x = self.norm2(x)

        return x

class ClassificationAndRegression(nn.Module):
    def __init__(self, in_channels):
        super(ClassificationAndRegression, self).__init__()
        self.cls_conv = nn.Conv2d(in_channels, 2, kernel_size=1)
        self.reg_conv = nn.Conv2d(in_channels, 4, kernel_size=1)

    def forward(self, x):
        cls_output = self.cls_conv(x)
        reg_output = self.reg_conv(x)
        return cls_output, reg_output

class ModulationLayer(nn.Module):
    def __init__(self, in_channels, reduction_ratio=16):
        super(ModulationLayer, self).__init__()
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Linear(in_channels, in_channels // reduction_ratio, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(in_channels // reduction_ratio, in_channels, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.gap(x).view(b, c)
        y = self.relu(self.fc1(y))
        y = self.sigmoid(self.fc2(y)).view(b, c, 1, 1)
        return x * y.expand_as(x)

class HiFT(nn.Module):
    def __init__(self):
        super(HiFT, self).__init__()
        self.feature_extractor = FeatureExtractor()
        self.feature_encoder = FeatureEncoder(in_channels=256, out_channels=256)
        self.modulation_layer = ModulationLayer(in_channels=256)
        self.feature_decoder = FeatureDecoder(in_channels=256)
        self.classification_and_regression = ClassificationAndRegression(in_channels=256)
        self.concat_conv = nn.Conv2d(256 * 2, 256, kernel_size=1)

    def concatenate_and_conv(self, z, x):
        concatenated = torch.cat((z, x), dim=1)
        fused = self.concat_conv(concatenated)
        return fused

    def forward(self, z, x):
        z_features = self.feature_extractor(z)
        x_features = self.feature_extractor(x)

        encoded_features = []
        for i in range(3):
            encoded = self.feature_encoder(z_features[i], x_features[i])
            modulated = self.modulation_layer(encoded)
            fused_features = self.concatenate_and_conv(z_features[i], x_features[i])
            decoded = self.feature_decoder(fused_features)
            encoded_features.append(decoded)

        final_features = sum(encoded_features)
        cls_output, reg_output = self.classification_and_regression(final_features)

        return cls_output, reg_output


In [None]:
import torch.optim as optim

def train(model, dataloader, epochs, device):
    model.train()
    criterion_cls = nn.CrossEntropyLoss()
    criterion_reg = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        running_loss = 0.0
        for i, (template, search, cls_label, reg_label) in enumerate(dataloader):
            template, search, cls_label, reg_label = template.to(device), search.to(device), cls_label.to(device), reg_label.to(device)

            optimizer.zero_grad()

            cls_output, reg_output = model(template, search)

            cls_loss = criterion_cls(cls_output, cls_label)
            reg_loss = criterion_reg(reg_output, reg_label)
            loss = cls_loss + reg_loss

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 10 == 9:
                print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}")
                running_loss = 0.0

    print('Finished Training')

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate model and move to device
model = HiFT().to(device)

# Train the model for 10 epochs
train(model, train_dataloader, epochs=10, device=device)


In [None]:
def validate(model, dataloader, device):
    model.eval()
    total_loss = 0.0
    criterion_cls = nn.CrossEntropyLoss()
    criterion_reg = nn.MSELoss()

    with torch.no_grad():
        for template, search, cls_label, reg_label in dataloader:
            template, search, cls_label, reg_label = template.to(device), search.to(device), cls_label.to(device), reg_label.to(device)

            cls_output, reg_output = model(template, search)

            cls_loss = criterion_cls(cls_output, cls_label)
            reg_loss = criterion_reg(reg_output, reg_label)
            loss = cls_loss + reg_loss

            total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Validation Loss: {avg_loss:.3f}")

# Validate the model
validate(model, val_dataloader, device)
