In [1]:
import argparse
import glob
import os
from copy import copy
from pprint import pprint

import cv2
import numpy as np
import torch
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils import imwrite
from gfpgan import GFPGANer
from gfpgan.archs.stylegan2_clean_arch import ModulatedConv2d
from realesrgan import RealESRGANer
from tqdm import tqdm
from utils import *

from concrete.ml.torch.hybrid_model import HybridFHEModel

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


Overview of the GFP-GAN Architecture

The GFP-GAN pipeline is divided into 3 main components:

1. Face Cropping (restorer.face_helper): To detect and crop faces from input images.

    Composition:
    - 82 Standard Convolutional Layers with:
        + Kernel Sizes: (7, 7) or (3, 3) or (1, 1)
        + Strides: (2, 2) or (1, 1)
        + Padding: (3, 3) or (1, 1)


    - **Absence of Grouped or Dilated Convolutions or Depthwise Convolutions**

        + Grouped Convolutions: Convolutions where the input is divided into parts/groups, we have a set of filters for each group, the result is concatenated.
        (groups=1 by default).

        + Dilated Convolutions: Convolutions where the kernel is expanded by inserting zeros between its elements, increasing the receptive field without increasing the number of parameters.
        (dilation=1 by default).

    - **Modulated**:

        + The convolutional weights are dynamically adjusted (modulated) for each input sample based on a style vector.
        + This modulation allows the network to adapt its convolutional filters per sample, enabling more control over generated features.
        + Modulate Weights Process: For each ModulatedConv2d layer:
            - The style vector is transformed (usually via another linear layer) to obtain modulation weights.
            - These weights modulate the convolutional filters.
            - Demodulation: After modulation, weights can vary in magnitude, leading to instability during training. Demodulation normalizes the weights to maintain a consistent signal magnitude across the layers.


        ```python
        Style Vector (w)
                |
        Modulation Weights (s)
                |
        Modulated Weights (s * k)
                |
        (Optional) Demodulation
                |
        Convolution Operation
                |
        Output Feature Maps
        ```

2. Face Restoration (restorer.gfpgan): To restore and enhance the quality of cropped facial images.

    Composition:
    - 32 Linear Layers
    - 79 Standard Convolutional Layers with:
        + Kernel Sizes: (3, 3) or (1, 1)
        + Strides: (1, 1)
        + Padding: (1, 1)
    - 23 Modulated Convolutional Layers (ModulatedConv2d), with: Kernel Sizes: 3 or 11

3. Background Enhancement (restorer.upsampler): To enhance the background details of the images after face restoration.

    Composition:
    - 351 Standard Convolutional Layers with fixed configurations:
        + Kernel Size: (3, 3)
        + Stride: (1, 1)
        + Padding: (1, 1)


H_out​ = ⌊​H_in​ + 2 × P_h​ − D_h ​* (K_h ​− 1) − 1 ⌋ / S_h ​+ 1


In [2]:
class Args:

    def __init__(self):

        self.input = "GFPGAN/inputs/whole_imgs"
        self.output = "results"
        self.version = "1.4"
        self.upscale = 5
        self.bg_upsampler = "realesrgan"
        self.bg_tile = 400
        self.suffix = None
        self.only_center_face = False
        self.aligned = False
        self.ext = "auto"
        self.weight = 0.5


args = Args()

In [3]:
use_background_improvement = True

if args.bg_upsampler == "realesrgan":
    if use_background_improvement:

        half = True if torch.cuda.is_available() else False

        model = RRDBNet(
            num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2
        )
        # No linear modules in this model
        bg_upsampler = RealESRGANer(
            scale=2,  # Do not change this value
            model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth",
            model=model,
            tile=args.bg_tile,
            tile_pad=10,
            pre_pad=0,
            half=half,
        )  # need to set False in CPU mode

In [4]:
if args.version == "1.3":
    arch = "clean"
    channel_multiplier = 2
    model_name = "GFPGANv1.3"
    url = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth"
    local_model_path = "GFPGANv1.3.pth"
elif args.version == "1.4":
    arch = "clean"
    channel_multiplier = 2
    model_name = "GFPGANv1.4"
    url = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
    local_model_path = "GFPGANv1.4.pth"

# determine model paths
model_path = os.path.join("experiments/pretrained_models", model_name + ".pth")
if not os.path.isfile(model_path):
    model_path = os.path.join("gfpgan/weights", model_name + ".pth")
if not os.path.isfile(model_path):
    # download pre-trained models from url
    model_path = url

restorer = GFPGANer(
    model_path=model_path,
    upscale=args.upscale,
    arch=arch,
    channel_multiplier=channel_multiplier,
    bg_upsampler=bg_upsampler,
)

In [5]:
def compute_total_data(layer_shapes, selected_class_name):

    total_data = 0
    for layer_name, info in layer_shapes.items():
        class_name = info["class_name"]

        if class_name == selected_class_name:
            input_shapes = info["input_shapes"]
            output_shapes = info["output_shapes"]

            try:
                C_in, H_in, W_in = extract_dimensions(input_shapes, layer_name)
                C_out, H_out, W_out = extract_dimensions(output_shapes, layer_name)

                input_size = C_in * H_in * W_in
                output_size = C_out * H_out * W_out

                layer_data = input_size + output_size
                total_data += layer_data

            except ValueError as e:
                print(e)

    return total_data

In [6]:
# Define a sample input tensor
input_tensor = torch.randn(1, 3, 256, 256)

### Model 1 - Face cropping and extraction

In [7]:
face_helper_model = restorer.face_helper.face_det

restorer_face_helper_linear = extract_specific_module(
    face_helper_model, dtype_layer=torch.nn.Linear, verbose=False
)
restorer_face_helper_conv2d = extract_specific_module(
    face_helper_model, dtype_layer=torch.nn.Conv2d, verbose=False
)

print(
    f"{len(restorer_face_helper_linear)}-Linear Layers, {len(restorer_face_helper_conv2d)}-Conv Layers"
)

0-Linear Layers, 82-Conv Layers


In [8]:
# Get the input and output shapes for each layer
layer_shapes = custom_torch_summary(face_helper_model, input_tensor, verbose=5)

Layer: body.conv1 - Conv2d
  Input shapes: [torch.Size([1, 3, 256, 256])]
  Output shapes: torch.Size([1, 64, 128, 128])

Layer: body.bn1 - BatchNorm2d
  Input shapes: [torch.Size([1, 64, 128, 128])]
  Output shapes: torch.Size([1, 64, 128, 128])

Layer: body.relu - ReLU
  Input shapes: [torch.Size([1, 64, 128, 128])]
  Output shapes: torch.Size([1, 64, 128, 128])

Layer: body.maxpool - MaxPool2d
  Input shapes: [torch.Size([1, 64, 128, 128])]
  Output shapes: torch.Size([1, 64, 64, 64])

Layer: body.layer1.0.conv1 - Conv2d
  Input shapes: [torch.Size([1, 64, 64, 64])]
  Output shapes: torch.Size([1, 64, 64, 64])

Layer: body.layer1.0.bn1 - BatchNorm2d
  Input shapes: [torch.Size([1, 64, 64, 64])]
  Output shapes: torch.Size([1, 64, 64, 64])



In [9]:
total_data = compute_total_data(layer_shapes, selected_class_name="Conv2d")

In [10]:
# Data transmission
total_data *= 2
total_data

65794048

In [11]:
expansion_factor = 5
total_data *= expansion_factor
print(total_data)

bytes_per_value = 2  # For 16-bit precision
total_data_bytes = total_data * bytes_per_value

# Convert to MB and GB
total_data_mb = total_data_bytes / (1024**2)
total_data_gb = total_data_bytes / (1024**3)

print(f"{total_data_bytes} bytes")
print(f"{total_data_mb:.2f} MB")
print(f"{total_data_gb:.2f} GB")

328970240
657940480 bytes
627.46 MB
0.61 GB


### Model 2

In [12]:
restorer_gfpgan = restorer.gfpgan

restorer_gfpgan_linear = extract_specific_module(
    restorer_gfpgan, dtype_layer=torch.nn.Linear, verbose=False
)

restorer_gfpgan_conv2d = extract_specific_module(
    restorer_gfpgan, dtype_layer=torch.nn.Conv2d, verbose=False
)

restorer_gfpgan_modulated_conv2d = extract_specific_module(
    restorer_gfpgan, dtype_layer=ModulatedConv2d, verbose=False
)

print(
    f"{len(restorer_gfpgan_linear)}-Linear Layers, {len(restorer_gfpgan_conv2d)}-Conv Layers - {len(restorer_gfpgan_modulated_conv2d)}-Conv Modulated Layers"
)

32-Linear Layers, 79-Conv Layers - 23-Conv Modulated Layers


In [13]:
# # Get the input and output shapes for each layer
# layer_shapes = custom_torch_summary(restorer_gfpgan, input_tensor, verbose=5)

### Model 3

In [14]:
upsampler_model = restorer.bg_upsampler.model

restorer_upsampler_linear = extract_specific_module(
    upsampler_model, dtype_layer=torch.nn.Linear, verbose=False
)
restorer_upsampler_conv2d = extract_specific_module(
    upsampler_model, dtype_layer=torch.nn.Conv2d, verbose=False
)

print(
    f"{len(restorer_upsampler_linear)}-Linear Layers, {len(restorer_upsampler_conv2d)}-Conv Layers"
)

0-Linear Layers, 351-Conv Layers


In [15]:
from torchsummary import summary

summary(upsampler_model, input_tensor.squeeze().shape)

# <!!!!> Saved in upsampler_sizes_v2.txt and upsampler_sizes.txt

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 128, 128]           6,976
            Conv2d-2         [-1, 32, 128, 128]          18,464
         LeakyReLU-3         [-1, 32, 128, 128]               0
            Conv2d-4         [-1, 32, 128, 128]          27,680
         LeakyReLU-5         [-1, 32, 128, 128]               0
            Conv2d-6         [-1, 32, 128, 128]          36,896
         LeakyReLU-7         [-1, 32, 128, 128]               0
            Conv2d-8         [-1, 32, 128, 128]          46,112
         LeakyReLU-9         [-1, 32, 128, 128]               0
           Conv2d-10         [-1, 64, 128, 128]         110,656
ResidualDenseBlock-11         [-1, 64, 128, 128]               0
           Conv2d-12         [-1, 32, 128, 128]          18,464
        LeakyReLU-13         [-1, 32, 128, 128]               0
           Conv2d-14         [-1, 32, 

In [16]:
# Initial input shape

with open("upsampler_sizes_v2.txt", "r") as f:
    data = f.readlines()

parsed_data = [parse_line(line) for line in data][1:]

conv_layers, total_data = filter_conv_layers(data, input_tensor.squeeze().shape)

conv_layers[0:5], total_data

([{'class_name': 'Conv2d-1',
   'input_shape': torch.Size([3, 256, 256]),
   'output_shape': (64, 128, 128),
   'total_size': 1245184},
  {'class_name': 'Conv2d-2',
   'input_shape': (64, 128, 128),
   'output_shape': (32, 128, 128),
   'total_size': 1572864},
  {'class_name': 'Conv2d-4',
   'input_shape': (32, 128, 128),
   'output_shape': (32, 128, 128),
   'total_size': 1048576},
  {'class_name': 'Conv2d-6',
   'input_shape': (32, 128, 128),
   'output_shape': (32, 128, 128),
   'total_size': 1048576},
  {'class_name': 'Conv2d-8',
   'input_shape': (32, 128, 128),
   'output_shape': (32, 128, 128),
   'total_size': 1048576}],
 514785280)

In [17]:
# Data transmission
total_data *= 2
total_data

1029570560

In [18]:
expansion_factor = 5
total_data *= expansion_factor
print(total_data)

bytes_per_value = 2  # For 16-bit precision
total_data_bytes = total_data * bytes_per_value

# Convert to MB and GB
total_data_mb = total_data_bytes / (1024**2)
total_data_gb = total_data_bytes / (1024**3)

print(f"{total_data_bytes} bytes")
print(f"{total_data_mb:.2f} MB")
print(f"{total_data_gb:.2f} GB")

5147852800
10295705600 bytes
9818.75 MB
9.59 GB
