In [80]:
import sys
sys.path.append('../..')

In [81]:
import logging
import numpy as np

import torch
from emgrep.datasets.EMGRepDataloader import EMGRepDataloader

In [82]:
logging.basicConfig(level=logging.INFO)

In [83]:
data_selection = [(subject, day, time) for subject in [1, 2] for day in [1, 2] for time in [1, 2]]

emgrep_ds = EMGRepDataloader(
    data_path='../../data/01_raw/',
    data_selection=data_selection,
    positive_mode='subject',
)

INFO:root:Loading data...
100%|██████████| 8/8 [00:09<00:00,  1.24s/it]


In [84]:
emgrep_dl = emgrep_ds.get_dataloader()

In [85]:
len(emgrep_dl)

3400

In [86]:
batch = next(iter(emgrep_dl))

In [87]:
x, y = batch[0], batch[1]

In [88]:
x.shape, y.shape

(torch.Size([1, 2, 10, 16, 300]), torch.Size([1, 2, 10, 300, 1]))

In [89]:
# simple cnn encoder network which takes in a sequence of blocks and outputs a single vector 
# containing the encoded representation of the sequence

# all blocks of size N x 512 x F or smaller should be mapped to a single vector of size N x 1 x H
# where H is the hidden size of the encoder network

from torch import nn

class EncoderNetwork(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        # Compute the highest power of 2 less than or equal to out_channels
        max_power = int(np.log(out_channels) / np.log(2))
        result = [in_channels] + [2**f for f in range(5, max_power + 1)]

        self.convs = nn.ModuleList([
            block
            for in_channels, out_channels in zip(result[:-1], result[1:])
            for block in [self.block(in_channels, in_channels), self.block(in_channels, out_channels)]
        ])

        self.output_conv = nn.AdaptiveAvgPool1d(1)

    def block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(out_channels),
            nn.MaxPool1d(kernel_size=2, padding=1),
         )

    def forward(self, x):

        BS, K, NB, C, L = x.shape
        # x is of shape N x 2 x num_blocks x F x block_len
        # reshape to N * 2 * num_blocks x F x block_len
        logging.info(f"Input:   {x.shape}")
        x = x.reshape(-1, C, L)
        logging.info(f"Reshape: {x.shape}")

        for i, conv in enumerate(self.convs):
            x = conv(x)
            logging.info(f"Conv {i}:  {x.shape}")

        x = self.output_conv(x)
        logging.info(f"Out:     {x.shape}")

        # reshape to have shape N x 2 x num_blocks x H
        x = x.reshape(BS, K, NB, -1)
        logging.info(f"Reshape: {x.shape}")


        return x

In [90]:
in_channels = 16
hidden_dim = 258

encoder = EncoderNetwork(in_channels, hidden_dim)

n_params = sum(p.numel() for p in encoder.parameters() if p.requires_grad)
logging.info(f"Number of trainable parameters: {n_params / 1e6} M")

INFO:root:Number of trainable parameters: 0.198 M


In [91]:
random_input = torch.randn(3, 2, 10, in_channels, 512)
features = encoder(random_input)

INFO:root:Input:   torch.Size([3, 2, 10, 16, 512])
INFO:root:Reshape: torch.Size([60, 16, 512])
INFO:root:Conv 0:  torch.Size([60, 16, 257])
INFO:root:Conv 1:  torch.Size([60, 32, 129])
INFO:root:Conv 2:  torch.Size([60, 32, 65])
INFO:root:Conv 3:  torch.Size([60, 64, 33])
INFO:root:Conv 4:  torch.Size([60, 64, 17])
INFO:root:Conv 5:  torch.Size([60, 128, 9])
INFO:root:Conv 6:  torch.Size([60, 128, 5])
INFO:root:Conv 7:  torch.Size([60, 256, 3])
INFO:root:Out:     torch.Size([60, 256, 1])
INFO:root:Reshape: torch.Size([3, 2, 10, 256])


In [92]:
random_input = torch.randn(3, 2, 10, in_channels, 300)
features = encoder(random_input)

INFO:root:Input:   torch.Size([3, 2, 10, 16, 300])
INFO:root:Reshape: torch.Size([60, 16, 300])
INFO:root:Conv 0:  torch.Size([60, 16, 151])
INFO:root:Conv 1:  torch.Size([60, 32, 76])
INFO:root:Conv 2:  torch.Size([60, 32, 39])
INFO:root:Conv 3:  torch.Size([60, 64, 20])
INFO:root:Conv 4:  torch.Size([60, 64, 11])
INFO:root:Conv 5:  torch.Size([60, 128, 6])
INFO:root:Conv 6:  torch.Size([60, 128, 4])
INFO:root:Conv 7:  torch.Size([60, 256, 3])
INFO:root:Out:     torch.Size([60, 256, 1])
INFO:root:Reshape: torch.Size([3, 2, 10, 256])


In [93]:
# Refactored version of the encoder
import math

import torch.nn as nn

class EncoderNetwork(nn.Module):
    def __init__(self, in_channels: int, hidden_dim: int):
        """Encoder network for encoding a sequence of blocks into a single vector.

        Args:
            in_channels (int): Number of input channels. 
            hidden_dim (int): Feature dimension of the output vector for each block. Will be 
            rounded to the next power of 2.
        """        
        super().__init__()
        
        max_power = int(math.log(hidden_dim, 2))
        result = [in_channels] + [2**f for f in range(5, max_power + 1)]
        
        self.convs = nn.Sequential(*[
            nn.Sequential(
                nn.Conv1d(in_channels, in_channels, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.BatchNorm1d(in_channels),
                nn.MaxPool1d(kernel_size=2, padding=1),
                nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.BatchNorm1d(out_channels),
                nn.MaxPool1d(kernel_size=2, padding=1)
            )
            for in_channels, out_channels in zip(result[:-1], result[1:])
        ])
        
        # output_conv is used to map the time dimension to a single value
        # -> each block will be mapped to a feature with dimension hidden_dim
        self.output_conv = nn.AdaptiveAvgPool1d(1)

    def forward(self, x):
        N, K, num_blocks, F, block_len = x.shape
        x = x.view(N * K * num_blocks, F, block_len)
        
        x = self.convs(x)
        x = self.output_conv(x)
        
        x = x.view(N, K, num_blocks, -1)
        
        return x


In [97]:
encoder = EncoderNetwork(in_channels, hidden_dim)

n_params = sum(p.numel() for p in encoder.parameters() if p.requires_grad)

print(f"Number of trainable parameters: {n_params / 1e6} M")

Number of trainable parameters: 0.198 M


In [98]:
out = encoder(random_input)

out.shape

torch.Size([3, 2, 10, 256])

In [99]:
# print model summary
from torchsummary import summary

summary(encoder, input_size=(2, 10, in_channels, 512), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv1d-1              [-1, 16, 512]             784
              ReLU-2              [-1, 16, 512]               0
       BatchNorm1d-3              [-1, 16, 512]              32
         MaxPool1d-4              [-1, 16, 257]               0
            Conv1d-5              [-1, 32, 257]           1,568
              ReLU-6              [-1, 32, 257]               0
       BatchNorm1d-7              [-1, 32, 257]              64
         MaxPool1d-8              [-1, 32, 129]               0
            Conv1d-9              [-1, 32, 129]           3,104
             ReLU-10              [-1, 32, 129]               0
      BatchNorm1d-11              [-1, 32, 129]              64
        MaxPool1d-12               [-1, 32, 65]               0
           Conv1d-13               [-1, 64, 65]           6,208
             ReLU-14               [-1,

In [104]:
import os
import random

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

from PIL import Image

IN = "/Volumes/Extreme_SSD/MegaDepth/scenes/"
OUT = "/Volumes/Extreme_SSD/MegaDepth/overview/"

if not os.path.exists(OUT):
    os.makedirs(OUT)

def plot_random_images(root_dir, output_dir, n_images=25):
    """Plot n_images random images from the root_dir and save the plot to output_dir.

    Args:
        root_dir (str): Path to the root directory containing the images.
        output_dir (str): Path to the output directory where the plot should be saved.
        n_images (int, optional): Number of images to plot. Defaults to 25.
    """    
    # get all folders in root_dir
    scenes = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]
    
    # get n_images random images from each folder
    for scene in tqdm(scenes):
        images = []
        scene_dir = os.path.join(root_dir, scene, "images")
        image_files = [f for f in os.listdir(scene_dir) if os.path.isfile(os.path.join(scene_dir, f))]
        image_files = random.sample(image_files, n_images)
        for image_file in image_files:
            image_path = os.path.join(scene_dir, image_file)
            images.append(np.array(Image.open(image_path)))
        
        # plot the images
        fig, axes = plt.subplots(5, 5, figsize=(15, 15))
        for i, ax in enumerate(axes.flatten()):
            ax.imshow(images[i])
            ax.axis("off")
            ax.set_title(f"Image {i}")
        plt.tight_layout()

        # save the plot
        plt.savefig(os.path.join(output_dir, f"{scene}_images.png"))
        plt.close()

# plot the images
plot_random_images(IN, OUT)

100%|██████████| 38/38 [02:01<00:00,  3.19s/it]
