In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet50
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

print(torch.__version__)
print(torchvision.__version__)

# 1. Data Loading and Preprocessing
def load_data(data_dir):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    dataset = ImageFolder(root=data_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)
    
    return dataloader

# 2. Multi-scale Feature Extraction
class MultiScaleFeatureExtractor(torch.nn.Module):
    def __init__(self):
        super(MultiScaleFeatureExtractor, self).__init__()
        self.resnet = resnet50(pretrained=True)
        
    def forward(self, x):
        features = []
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)
        
        x = self.resnet.layer1(x)
        features.append(x)  # Fine scale
        
        x = self.resnet.layer2(x)
        features.append(x)  # Medium scale
        
        x = self.resnet.layer3(x)
        features.append(x)  # Coarse scale
        
        return features

# 3. Position Encoding
def generate_position_encoding(feature_map):
    _, _, h, w = feature_map.shape
    position_encoding = torch.zeros(h, w, 2)
    for i in range(h):
        for j in range(w):
            position_encoding[i, j, 0] = i / h
            position_encoding[i, j, 1] = j / w
    return position_encoding

# Main execution
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load data
    # Note: You need to replace 'path/to/imagenet' with the actual path to your ImageNet dataset
    dataloader = load_data('../test_images')
    
    # Initialize feature extractor
    feature_extractor = MultiScaleFeatureExtractor().to(device)
    
    # Extract features for a batch
    for images, _ in dataloader:
        images = images.to(device)
        multi_scale_features = feature_extractor(images)
        break  # Just process one batch for demonstration
    
    # Generate position encodings
    position_encodings = [generate_position_encoding(features) for features in multi_scale_features]
    
    # Visualize a sample image and its feature maps
    plt.figure(figsize=(20, 5))
    plt.subplot(1, 4, 1)
    plt.imshow(images[0].cpu().permute(1, 2, 0))
    plt.title("Original Image")
    
    for i, features in enumerate(multi_scale_features):
        plt.subplot(1, 4, i+2)
        plt.imshow(features[0].sum(dim=0).cpu().detach().numpy())
        plt.title(f"Scale {i+1} Features")
    
    plt.tight_layout()
    plt.show()

print("Data preparation and feature extraction complete.")

2.4.1
0.19.1


FileNotFoundError: [Errno 2] No such file or directory: 'test_images'

In [None]:
# Main execution
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load data
    trainloader = load_data()
    
    # Initialize feature extractor
    feature_extractor = MultiScaleFeatureExtractor().to(device)
    
    # Extract features for a batch
    for images, _ in trainloader:
        images = images.to(device)
        multi_scale_features = feature_extractor(images)
        break  # Just process one batch for demonstration
    
    # Generate position encodings
    position_encodings = [generate_position_encoding(features) for features in multi_scale_features]
    
    # Visualize a sample image and its feature maps
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 4, 1)
    plt.imshow(images[0].cpu().permute(1, 2, 0))
    plt.title("Original Image")
    
    for i, features in enumerate(multi_scale_features):
        plt.subplot(1, 4, i+2)
        plt.imshow(features[0].sum(dim=0).cpu().detach().numpy())
        plt.title(f"Scale {i+1} Features")
    
    plt.tight_layout()
    plt.show()

print("Data preparation and feature extraction complete.")

TypeError: load_data() missing 1 required positional argument: 'data_dir'