# Model Prepare

## Create something

In [1]:
%cd /content
!mkdir human_input
!mkdir shirt_input
!mkdir our_data_folder
%cd our_data_folder
!mkdir test
%cd test
!mkdir cloth
!mkdir cloth-mask
!mkdir image
!mkdir image-densepose
!mkdir image-parse-agnostic-v3.2
!mkdir image-parse-v3
!mkdir openpose_img
!mkdir openpose_json
%cd /content

/content
/content/our_data_folder
/content/our_data_folder/test
/content


## Openpose

In [2]:
%cd /content
import os
from os.path import exists, join, basename, splitext

git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # see: https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/949
  # install new CMake becaue of CUDA10
  !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz
  !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local
  # clone openpose
  !git clone -q --depth 1 $git_repo_url
  !sed -i 's/execute_process(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/execute_process(COMMAND git checkout f019d0dfe86f49d1140961f8c7dec22130c83154 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/g' openpose/CMakeLists.txt
  # install system dependencies
  !apt-get -qq install -y libatlas-base-dev libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev opencl-headers ocl-icd-opencl-dev libviennacl-dev
  # install python dependencies
  !pip install -q youtube-dl
  # build openpose
  !cd openpose && rm -rf build || true && mkdir build && cd build && cmake .. -DUSE_CUDNN=OFF && make -j`nproc`

/content
Selecting previously unselected package libgflags2.2.
(Reading database ... 122349 files and directories currently installed.)
Preparing to unpack .../00-libgflags2.2_2.2.2-1build1_amd64.deb ...
Unpacking libgflags2.2 (2.2.2-1build1) ...
Selecting previously unselected package libgflags-dev.
Preparing to unpack .../01-libgflags-dev_2.2.2-1build1_amd64.deb ...
Unpacking libgflags-dev (2.2.2-1build1) ...
Selecting previously unselected package libgoogle-glog0v5.
Preparing to unpack .../02-libgoogle-glog0v5_0.4.0-1build1_amd64.deb ...
Unpacking libgoogle-glog0v5 (0.4.0-1build1) ...
Selecting previously unselected package libgoogle-glog-dev.
Preparing to unpack .../03-libgoogle-glog-dev_0.4.0-1build1_amd64.deb ...
Unpacking libgoogle-glog-dev (0.4.0-1build1) ...
Selecting previously unselected package libleveldb1d:amd64.
Preparing to unpack .../04-libleveldb1d_1.22-3ubuntu2_amd64.deb ...
Unpacking libleveldb1d:amd64 (1.22-3ubuntu2) ...
Selecting previously unselected package lible

## Human parse

In [3]:
%cd /content
!git clone https://github.com/Engineering-Course/CIHP_PGN.git
%cd /content/CIHP_PGN
!gdown --id 1Mqpse5Gen4V4403wFEpv3w3JAsWw2uhk
!unzip CIHP_pgn.zip
!pip install --upgrade tf_slim

/content
Cloning into 'CIHP_PGN'...
remote: Enumerating objects: 4402, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 4402 (delta 14), reused 21 (delta 6), pack-reused 4364[K
Receiving objects: 100% (4402/4402), 852.01 KiB | 12.72 MiB/s, done.
Resolving deltas: 100% (720/720), done.
/content/CIHP_PGN
Downloading...
From: https://drive.google.com/uc?id=1Mqpse5Gen4V4403wFEpv3w3JAsWw2uhk
To: /content/CIHP_PGN/CIHP_pgn.zip
100% 1.23G/1.23G [00:25<00:00, 48.5MB/s]
Archive:  CIHP_pgn.zip
   creating: CIHP_pgn/
  inflating: CIHP_pgn/checkpoint     
  inflating: CIHP_pgn/model.ckpt-593292.data-00000-of-00001  
  inflating: CIHP_pgn/model.ckpt-593292.index  
  inflating: CIHP_pgn/model.ckpt-593292.meta  
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### Overwriting network.py

In [4]:
%%writefile /content/CIHP_PGN/kaffe/tensorflow/network.py
import numpy as np
import tensorflow.compat.v1 as tf
import tf_slim as slim
slim = slim

DEFAULT_PADDING = 'SAME'


def layer(op):
    '''Decorator for composable network layers.'''

    def layer_decorated(self, *args, **kwargs):
        # Automatically set a name if not provided.
        name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
        # Figure out the layer inputs.
        if len(self.terminals) == 0:
            raise RuntimeError('No input variables found for layer %s.' % name)
        elif len(self.terminals) == 1:
            layer_input = self.terminals[0]
        else:
            layer_input = list(self.terminals)
        # Perform the operation and get the output.
        layer_output = op(self, layer_input, *args, **kwargs)
        # Add to layer LUT.
        self.layers[name] = layer_output
        # This output is now the input for the next layer.
        self.feed(layer_output)
        # Return self for chained calls.
        return self

    return layer_decorated


class Network(object):

    def __init__(self, inputs, trainable=True, is_training=False, n_classes=20, keep_prob=1):
        # The input nodes for this network
        self.inputs = inputs
        # The current list of terminal nodes
        self.terminals = []
        # Mapping from layer names to layers
        self.layers = dict(inputs)
        # If true, the resulting variables are set as trainable
        self.trainable = trainable
        # Switch variable for dropout
        self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),
                                                       shape=[],
                                                       name='use_dropout')
        self.setup(is_training, n_classes, keep_prob)

    def setup(self, is_training, n_classes, keep_prob):
        '''Construct the network. '''
        raise NotImplementedError('Must be implemented by the subclass.')

    def load(self, data_path, session, ignore_missing=False):
        '''Load network weights.
        data_path: The path to the numpy-serialized network weights
        session: The current TensorFlow session
        ignore_missing: If true, serialized weights for missing layers are ignored.
        '''
        data_dict = np.load(data_path).item()
        for op_name in data_dict:
            with tf.variable_scope(op_name, reuse=True):
                for param_name, data in data_dict[op_name].iteritems():
                    try:
                        var = tf.get_variable(param_name)
                        session.run(var.assign(data))
                    except ValueError:
                        if not ignore_missing:
                            raise

    def feed(self, *args):
        '''Set the input(s) for the next operation by replacing the terminal nodes.
        The arguments can be either layer names or the actual layers.
        '''
        assert len(args) != 0
        self.terminals = []
        for fed_layer in args:
            if isinstance(fed_layer, str):
                try:
                    fed_layer = self.layers[fed_layer]
                except KeyError:
                    raise KeyError('Unknown layer name fed: %s' % fed_layer)
            self.terminals.append(fed_layer)
        return self

    def get_output(self):
        '''Returns the current network output.'''
        return self.terminals[-1]

    def get_unique_name(self, prefix):
        '''Returns an index-suffixed unique name for the given prefix.
        This is used for auto-generating layer names based on the type-prefix.
        '''
        ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
        return '%s_%d' % (prefix, ident)

    def make_var(self, name, shape):
        '''Creates a new TensorFlow variable.'''
        return tf.get_variable(name, shape, trainable=self.trainable)

    def make_w_var(self, name, shape):
        '''Creates a new TensorFlow variable.'''
        stddev=0.01
        return tf.get_variable(name, shape, initializer=tf.truncated_normal_initializer(stddev=stddev), trainable=self.trainable)

    def make_b_var(self, name, shape):
        return tf.get_variable(name, shape, initializer=tf.constant_initializer(0.0), trainable=self.trainable)

    def validate_padding(self, padding):
        '''Verifies that the padding is one of the supported ones.'''
        assert padding in ('SAME', 'VALID')

    @layer
    def conv(self,
             input,
             k_h,
             k_w,
             c_o,
             s_h,
             s_w,
             name,
             relu=True,
             padding=DEFAULT_PADDING,
             group=1,
             biased=True):
        # Verify that the padding is acceptable
        self.validate_padding(padding)
        # Get the number of channels in the input
        c_i = input.get_shape()[-1]
        # Verify that the grouping parameter is valid
        assert c_i % group == 0
        assert c_o % group == 0
        # Convolution for a given input and kernel
        convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
        with tf.variable_scope(name) as scope:
            kernel = self.make_w_var('weights', shape=[k_h, k_w, c_i // group, c_o])
            if group == 1:
                # This is the common-case. Convolve the input without any further complications.
                output = convolve(input, kernel)
            else:
                # Split the input into groups and then convolve each of them independently
                input_groups = tf.split(3, group, input)
                kernel_groups = tf.split(3, group, kernel)
                output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
                # Concatenate the groups
                output = tf.concat(3, output_groups)
            # Add the biases
            if biased:
                biases = self.make_b_var('biases', [c_o])
                output = tf.nn.bias_add(output, biases)
            if relu:
                # ReLU non-linearity
                output = tf.nn.relu(output, name=scope.name)
            return output

    @layer
    def atrous_conv(self,
                    input,
                    k_h,
                    k_w,
                    c_o,
                    dilation,
                    name,
                    relu=True,
                    padding=DEFAULT_PADDING,
                    group=1,
                    biased=True):
        # Verify that the padding is acceptable
        self.validate_padding(padding)
        # Get the number of channels in the input
        c_i = input.get_shape()[-1]
        # Verify that the grouping parameter is valid
        assert c_i % group == 0
        assert c_o % group == 0
        # Convolution for a given input and kernel
        convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding)
        with tf.variable_scope(name) as scope:
            kernel = self.make_w_var('weights', shape=[k_h, k_w, c_i // group, c_o])
            if group == 1:
                # This is the common-case. Convolve the input without any further complications.
                output = convolve(input, kernel)
            else:
                # Split the input into groups and then convolve each of them independently
                input_groups = tf.split(3, group, input)
                kernel_groups = tf.split(3, group, kernel)
                output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
                # Concatenate the groups
                output = tf.concat(3, output_groups)
            # Add the biases
            if biased:
                biases = self.make_b_var('biases', [c_o])
                output = tf.nn.bias_add(output, biases)
            if relu:
                # ReLU non-linearity
                output = tf.nn.relu(output, name=scope.name)
            return output
        
    @layer
    def relu(self, input, name):
        return tf.nn.relu(input, name=name)

    @layer
    def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
        self.validate_padding(padding)
        return tf.nn.max_pool(input,
                              ksize=[1, k_h, k_w, 1],
                              strides=[1, s_h, s_w, 1],
                              padding=padding,
                              name=name)

    @layer
    def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
        self.validate_padding(padding)
        return tf.nn.avg_pool(input,
                              ksize=[1, k_h, k_w, 1],
                              strides=[1, s_h, s_w, 1],
                              padding=padding,
                              name=name)

    @layer
    def lrn(self, input, radius, alpha, beta, name, bias=1.0):
        return tf.nn.local_response_normalization(input,
                                                  depth_radius=radius,
                                                  alpha=alpha,
                                                  beta=beta,
                                                  bias=bias,
                                                  name=name)

    @layer
    def concat(self, inputs, axis, name):
        return tf.concat(values=inputs, axis=axis, name=name)

    @layer
    def add(self, inputs, name):
        return tf.add_n(inputs, name=name)

    @layer
    def fc(self, input, num_out, name, relu=True):
        with tf.variable_scope(name) as scope:
            input_shape = input.get_shape()
            if input_shape.ndims == 4:
                # The input is spatial. Vectorize it first.
                dim = 1
                for d in input_shape[1:].as_list():
                    dim *= d
                feed_in = tf.reshape(input, [-1, dim])
            else:
                feed_in, dim = (input, input_shape[-1].value)
            weights = self.make_var('weights', shape=[dim, num_out])
            biases = self.make_var('biases', [num_out])
            op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
            fc = op(feed_in, weights, biases, name=scope.name)
            return fc

    @layer
    def softmax(self, input, name):
        input_shape = map(lambda v: v.value, input.get_shape())
        if len(input_shape) > 2:
            # For certain models (like NiN), the singleton spatial dimensions
            # need to be explicitly squeezed, since they're not broadcast-able
            # in TensorFlow's NHWC ordering (unlike Caffe's NCHW).
            if input_shape[1] == 1 and input_shape[2] == 1:
                input = tf.squeeze(input, squeeze_dims=[1, 2])
            else:
                raise ValueError('Rank 2 tensor input expected for softmax!')
        return tf.nn.softmax(input, name)
        
    @layer
    def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True):
        with tf.variable_scope(name) as scope:
            output = slim.batch_norm(
                input,
                activation_fn=activation_fn,
                is_training=is_training,
                updates_collections=None,
                scale=scale,
                scope=scope)
            return output

    @layer
    def dropout(self, input, keep_prob, name):
        keep = 1 - self.use_dropout + (self.use_dropout * keep_prob)
        return tf.nn.dropout(input, keep, name=name)

    @layer
    def upsample(self, input, size_h, size_w, name):
        with tf.variable_scope(name) as scope:
            return tf.image.resize_images(input, size=[size_h, size_w])

    @layer
    def pyramid_pooling(self, input, o_c, pool_size, name):
        with tf.variable_scope(name) as scope:
            dims = tf.shape(input)
            out_height, out_width = dims[1], dims[2]
            pool_ly = tf.nn.avg_pool(input, ksize=[1, pool_size, pool_size, 1], strides=[1, pool_size, pool_size, 1],
                                     padding=DEFAULT_PADDING, name='pool_ly')
            weight = self.make_w_var('weights', shape=[3, 3, pool_ly.get_shape()[-1], o_c])
            biases = self.make_var('biases', o_c)
            conv_ly = tf.nn.conv2d(pool_ly, weight, strides=[1, 1, 1, 1], padding='SAME', name='conv_ly')
            conv_ly = tf.nn.bias_add(conv_ly, biases)
            conv_ly = tf.nn.relu(conv_ly, name='relu_ly')
            output = tf.image.resize_bilinear(conv_ly, [out_height, out_width])
            return output


Overwriting /content/CIHP_PGN/kaffe/tensorflow/network.py


### Overwriting image_reade_inf.py

In [5]:
%%writefile /content/CIHP_PGN/utils/image_reade_inf.py
import os

import numpy as np
import tensorflow as tf
import random

IGNORE_LABEL = 255
IMG_MEAN = np.array((125.0, 114.4, 107.9), dtype=np.float32)


def image_scaling(img):
    """
    Randomly scales the images between 0.5 to 1.5 times the original size.
    Args:
      img: Training image to scale.
      label: Segmentation mask to scale.
    """

    scale = tf.random_uniform([1], minval=0.5, maxval=2.0, dtype=tf.float32,
                              seed=None)
    h_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[0]), scale))
    w_new = tf.to_int32(tf.multiply(tf.to_float(tf.shape(img)[1]), scale))
    new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
    img = tf.image.resize_images(img, new_shape)
    # label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0),
    #                                          new_shape)
    # label = tf.squeeze(label, squeeze_dims=[0])
    # edge = tf.image.resize_nearest_neighbor(tf.expand_dims(edge, 0), new_shape)
    # edge = tf.squeeze(edge, squeeze_dims=[0])

    return img


def image_mirroring(img):
    """
    Randomly mirrors the images.
    Args:
      img: Training image to mirror.
      label: Segmentation mask to mirror.
    """

    distort_left_right_random = \
    tf.random_uniform([1], 0, 1.0, dtype=tf.float32)[0]
    mirror = tf.less(tf.stack([1.0, distort_left_right_random, 1.0]), 0.5)
    mirror = tf.boolean_mask([0, 1, 2], mirror)
    img = tf.reverse(img, mirror)
    # label = tf.reverse(label, mirror)
    # edge = tf.reverse(edge, mirror)
    return img


def random_resize_img_labels(image, label, resized_h, resized_w):
    scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32,
                              seed=None)
    h_new = tf.to_int32(tf.multiply(tf.to_float(resized_h), scale))
    w_new = tf.to_int32(tf.multiply(tf.to_float(resized_w), scale))

    new_shape = tf.squeeze(tf.stack([h_new, w_new]), squeeze_dims=[1])
    img = tf.image.resize_images(image, new_shape)
    label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0),
                                             new_shape)
    label = tf.squeeze(label, squeeze_dims=[0])
    return img, label


def resize_img_labels(image, label, resized_h, resized_w):
    new_shape = tf.stack([tf.to_int32(resized_h), tf.to_int32(resized_w)])
    img = tf.image.resize_images(image, new_shape)
    label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0),
                                             new_shape)
    label = tf.squeeze(label, squeeze_dims=[0])
    return img, label


def random_crop_and_pad_image_and_labels(image, crop_h, crop_w,
                                         ignore_label=255):
    """
    Randomly crop and pads the input images.
    Args:
      image: Training image to crop/ pad.
      label: Segmentation mask to crop/ pad.
      crop_h: Height of cropped segment.
      crop_w: Width of cropped segment.
      ignore_label: Label to ignore during the training.
    """

    # label = tf.cast(label, dtype=tf.float32)
    # label = label - ignore_label  # Needs to be subtracted and later added due to 0 padding.
    # edge = tf.cast(edge, dtype=tf.float32)
    # edge = edge - 0

    combined = tf.concat([image, label, edge], 2)
    image_shape = tf.shape(image)
    combined_pad = tf.image.pad_to_bounding_box(combined, 0, 0,
                                                tf.maximum(crop_h,
                                                           image_shape[0]),
                                                tf.maximum(crop_w,
                                                           image_shape[1]))

    last_image_dim = tf.shape(image)[-1]
    last_label_dim = tf.shape(label)[-1]
    combined_crop = tf.random_crop(combined_pad, [crop_h, crop_w, 4 + 1])
    img_crop = combined_crop[:, :, :last_image_dim]
    label_crop = combined_crop[:, :,
                 last_image_dim:last_image_dim + last_label_dim]
    edge_crop = combined_crop[:, :, last_image_dim + last_label_dim:]
    label_crop = label_crop + ignore_label
    label_crop = tf.cast(label_crop, dtype=tf.uint8)
    edge_crop = edge_crop + 0
    edge_crop = tf.cast(edge_crop, dtype=tf.uint8)

    # Set static shape so that tensorflow knows shape at compile time. 
    img_crop.set_shape((crop_h, crop_w, 3))
    label_crop.set_shape((crop_h, crop_w, 1))
    edge_crop.set_shape((crop_h, crop_w, 1))
    return img_crop, label_crop, edge_crop


def read_labeled_image_reverse_list(data_dir, data_list):
    """Reads txt file containing paths to images and ground truth masks.
    
    Args:
      data_dir: path to the directory with images and masks.
      data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
       
    Returns:
      Two lists with all file names for images and masks, respectively.
    """
    f = open(data_list, 'r')
    images = []
    masks = []
    masks_rev = []
    for line in f:
        try:
            image, mask, mask_rev = line.strip("\n").split(' ')
        except ValueError:  # Adhoc for test.
            image = mask = mask_rev = line.strip("\n")
        images.append(data_dir + image)
        masks.append(data_dir + mask)
        masks_rev.append(data_dir + mask_rev)
    return images, masks, masks_rev


def read_labeled_image_list(data_dir, data_list):
    """Reads txt file containing paths to images and ground truth masks.
    
    Args:
      data_dir: path to the directory with images and masks.
      data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
       
    Returns:
      Two lists with all file names for images and masks, respectively.
    """
    f = open(data_list, 'r')
    images = []
    masks = []
    for line in f:
        try:
            image, mask = line.strip("\n").split(' ')
        except ValueError:  # Adhoc for test.
            image = mask = line.strip("\n")
        images.append(data_dir + image)
        masks.append(data_dir + mask)
    return images, masks


def read_edge_list(data_dir, data_id_list):
    f = open(data_id_list, 'r')
    edges = []
    for line in f:
        edge = line.strip("\n")
        edges.append(data_dir + '/edges/' + edge + '.png')
    return edges


def read_images_from_disk(input_queue, input_size, random_scale,
                          random_mirror=False):  # optional pre-processing arguments
    """Read one image and its corresponding mask with optional pre-processing.
    
    Args:
      input_queue: tf queue with paths to the image and its mask.
      input_size: a tuple with (height, width) values.
                  If not given, return images of original size.
      random_scale: whether to randomly scale the images prior
                    to random crop.
      random_mirror: whether to randomly mirror the images prior
                    to random crop.
      
    Returns:
      Two tensors: the decoded image and its mask.
    """
    img_contents = tf.io.read_file(input_queue[0])
    # label_contents = tf.read_file(input_queue[1])
    # edge_contents = tf.read_file(input_queue[2])

    img = tf.image.decode_jpeg(img_contents, channels=3)
    img_r, img_g, img_b = tf.split(value=img, num_or_size_splits=3, axis=2)
    img = tf.cast(tf.concat([img_b, img_g, img_r], 2), dtype=tf.float32)
    # Extract mean.
    img -= IMG_MEAN
    # label = tf.image.decode_png(label_contents, channels=1)
    # edge = tf.image.decode_png(edge_contents, channels=1)

    # if input_size is not None:
    #     h, w = input_size
    #
    #     # Randomly scale the images and labels.
    #     if random_scale:
    #         img, label, edge = image_scaling(img)
    #
    #     # Randomly mirror the images and labels.
    #     if random_mirror:
    #         img, label, edge = image_mirroring(img)
    #
    #     # Randomly crops the images and labels.
    #     img, label, edge = random_crop_and_pad_image_and_labels(img, h, w,
    #                                                             IGNORE_LABEL)

    return img


class ImageReader(object):
    '''Generic ImageReader which reads images and corresponding segmentation
       masks from the disk, and enqueues them into a TensorFlow queue.
    '''

    def __init__(self, data_dir, input_size,
                 random_scale,
                 random_mirror, shuffle, coord):
        '''Initialise an ImageReader.
        
        Args:
          data_dir: path to the directory with images and masks.
          data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
          data_id_list: path to the file of image id.
          input_size: a tuple with (height, width) values, to which all the images will be resized.
          random_scale: whether to randomly scale the images prior to random crop.
          random_mirror: whether to randomly mirror the images prior to random crop.
          coord: TensorFlow queue coordinator.
        '''
        self.image_list = data_dir

        self.input_size = input_size
        self.coord = coord

        # self.image_list, self.label_list = read_labeled_image_list(
        #     self.data_dir, self.data_list)
        # self.edge_list = read_edge_list(self.data_dir, self.data_id_list)
        self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
        # self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
        # self.edges = tf.convert_to_tensor(self.edge_list, dtype=tf.string)
        self.queue = data_dir
        # self.queue = tf.data.Dataset.from_tensor_slices([self.images])
        print(self.queue)
        self.image = read_images_from_disk(self.queue, self.input_size,
                                                                  random_scale,
                                                                  random_mirror)

    def dequeue(self, num_elements):
        '''Pack images and labels into a batch.
        
        Args:
          num_elements: the batch size.
          
        Returns:
          Two tensors of size (batch_size, h, w, {3, 1}) for images and masks.'''
        batch_list = [self.image, self.label, self.edge]
        image_batch, label_batch, edge_batch = tf.train.batch(
            [self.image, self.label, self.edge], num_elements)
        return image_batch, label_batch, edge_batch


Overwriting /content/CIHP_PGN/utils/image_reade_inf.py


### Overwriting inf_pgn.py

In [6]:
%%writefile /content/CIHP_PGN/inf_pgn.py
from __future__ import print_function
import argparse
from datetime import datetime
import os
import sys
import time
import scipy.misc
import scipy.io as sio
import cv2
import argparse
from glob import glob
# os.environ["CUDA_VISIBLE_DEVICES"]="0"

import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
from PIL import Image
from utils.image_reade_inf import *
from utils.ops import  *
from utils.utils import *
from utils.model_pgn import *

argp = argparse.ArgumentParser(description="Inference pipeline")
argp.add_argument('-i',
                  '--directory',
                  type=str, help='Path of the input dir',
                  default='./datasets/images')
argp.add_argument('-o',
                  '--output',
                  type=str, help='Path of the input dir',
                  default='./datasets/output')

args = argp.parse_args()

image_list_inp = []
for i in glob(os.path.join(args.directory, '**'), recursive=True):
    if os.path.isfile(i):
        image_list_inp.append(i)
# print(image_list)
image_list_inp = image_list_inp[:5]
# sys.exit(2)
N_CLASSES = 20
NUM_STEPS = len(image_list_inp)
RESTORE_FROM = '/content/CIHP_PGN/CIHP_pgn/'


def main():
    """Create the model and start the evaluation process."""
    # Create queue coordinator.
    coord = tf.train.Coordinator()
    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(image_list_inp, None, False,
                             False, False, coord)
        image = reader.image
        image_rev = tf.reverse(image, tf.stack([1]))
        image_list = reader.image_list

    image_batch = tf.stack([image, image_rev])
    h_orig, w_orig = tf.cast(tf.shape(image_batch)[1], tf.float32), tf.cast(tf.shape(image_batch)[2], tf.float32)
    image_batch050 = tf.image.resize(image_batch, tf.stack([tf.cast(tf.multiply(h_orig, 0.50), tf.int32), tf.cast(tf.multiply(w_orig, 0.50), tf.int32)]))
    image_batch075 = tf.image.resize(image_batch, tf.stack([tf.cast(tf.multiply(h_orig, 0.75), tf.int32), tf.cast(tf.multiply(w_orig, 0.75), tf.int32)]))
    image_batch125 = tf.image.resize(image_batch, tf.stack([tf.cast(tf.multiply(h_orig, 1.25), tf.int32), tf.cast(tf.multiply(w_orig, 1.25), tf.int32)]))
    image_batch150 = tf.image.resize(image_batch, tf.stack([tf.cast(tf.multiply(h_orig, 1.50), tf.int32), tf.cast(tf.multiply(w_orig, 1.50), tf.int32)]))
    image_batch175 = tf.image.resize(image_batch, tf.stack([tf.cast(tf.multiply(h_orig, 1.75), tf.int32), tf.cast(tf.multiply(w_orig, 1.75), tf.int32)]))
         
    # Create network.
    with tf.compat.v1.variable_scope('', reuse=False):
        net_100 = PGNModel({'data': image_batch}, is_training=False, n_classes=N_CLASSES)
    with tf.compat.v1.variable_scope('', reuse=True):
        net_050 = PGNModel({'data': image_batch050}, is_training=False, n_classes=N_CLASSES)
    with tf.compat.v1.variable_scope('', reuse=True):
        net_075 = PGNModel({'data': image_batch075}, is_training=False, n_classes=N_CLASSES)
    with tf.compat.v1.variable_scope('', reuse=True):
        net_125 = PGNModel({'data': image_batch125}, is_training=False, n_classes=N_CLASSES)
    with tf.compat.v1.variable_scope('', reuse=True):
        net_150 = PGNModel({'data': image_batch150}, is_training=False, n_classes=N_CLASSES)
    with tf.compat.v1.variable_scope('', reuse=True):
        net_175 = PGNModel({'data': image_batch175}, is_training=False, n_classes=N_CLASSES)
    # parsing net

    parsing_out1_050 = net_050.layers['parsing_fc']
    parsing_out1_075 = net_075.layers['parsing_fc']
    parsing_out1_100 = net_100.layers['parsing_fc']
    parsing_out1_125 = net_125.layers['parsing_fc']
    parsing_out1_150 = net_150.layers['parsing_fc']
    parsing_out1_175 = net_175.layers['parsing_fc']

    parsing_out2_050 = net_050.layers['parsing_rf_fc']
    parsing_out2_075 = net_075.layers['parsing_rf_fc']
    parsing_out2_100 = net_100.layers['parsing_rf_fc']
    parsing_out2_125 = net_125.layers['parsing_rf_fc']
    parsing_out2_150 = net_150.layers['parsing_rf_fc']
    parsing_out2_175 = net_175.layers['parsing_rf_fc']

    # edge net
    edge_out2_100 = net_100.layers['edge_rf_fc']
    edge_out2_125 = net_125.layers['edge_rf_fc']
    edge_out2_150 = net_150.layers['edge_rf_fc']
    edge_out2_175 = net_175.layers['edge_rf_fc']


    # combine resize
    parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize(parsing_out1_050, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out1_075, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out1_100, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out1_125, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out1_150, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out1_175, tf.shape(image_batch)[1:3,])]), axis=0)

    parsing_out2 = tf.reduce_mean(tf.stack([tf.image.resize(parsing_out2_050, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out2_075, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out2_100, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out2_125, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out2_150, tf.shape(image_batch)[1:3,]),
                                            tf.image.resize(parsing_out2_175, tf.shape(image_batch)[1:3,])]), axis=0)


    edge_out2_100 = tf.image.resize(edge_out2_100, tf.shape(image_batch)[1:3,])
    edge_out2_125 = tf.image.resize(edge_out2_125, tf.shape(image_batch)[1:3,])
    edge_out2_150 = tf.image.resize(edge_out2_150, tf.shape(image_batch)[1:3,])
    edge_out2_175 = tf.image.resize(edge_out2_175, tf.shape(image_batch)[1:3,])
    edge_out2 = tf.reduce_mean(tf.stack([edge_out2_100, edge_out2_125, edge_out2_150, edge_out2_175]), axis=0)
                                           
    raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2]), axis=0)
    head_output, tail_output = tf.unstack(raw_output, num=2, axis=0)
    tail_list = tf.unstack(tail_output, num=20, axis=2)
    tail_list_rev = [None] * 20
    for xx in range(14):
        tail_list_rev[xx] = tail_list[xx]
    tail_list_rev[14] = tail_list[15]
    tail_list_rev[15] = tail_list[14]
    tail_list_rev[16] = tail_list[17]
    tail_list_rev[17] = tail_list[16]
    tail_list_rev[18] = tail_list[19]
    tail_list_rev[19] = tail_list[18]
    tail_output_rev = tf.stack(tail_list_rev, axis=2)
    tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1]))
    
    raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0)
    raw_output_all = tf.expand_dims(raw_output_all, axis=0)
    pred_scores = tf.reduce_max(raw_output_all, axis=3)
    raw_output_all = tf.argmax(raw_output_all, axis=3)
    pred_all = tf.expand_dims(raw_output_all, axis=3) # Create 4-d tensor.


    raw_edge = tf.reduce_mean(tf.stack([edge_out2]), axis=0)
    head_output, tail_output = tf.unstack(raw_edge, num=2, axis=0)
    tail_output_rev = tf.reverse(tail_output, tf.stack([1]))
    raw_edge_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0)
    raw_edge_all = tf.expand_dims(raw_edge_all, axis=0)
    pred_edge = tf.sigmoid(raw_edge_all)
    res_edge = tf.cast(tf.greater(pred_edge, 0.5), tf.int32)

    # prepare ground truth 
    # preds = tf.reshape(pred_all, [-1,])
    # weights = tf.cast(tf.less_equal(gt, N_CLASSES - 1), tf.int32) # Ignoring all labels greater than or equal to n_classes.
    # mIoU, update_op_iou = tf.contrib.metrics.streaming_mean_iou(preds, gt, num_classes=N_CLASSES, weights=weights)
    # macc, update_op_acc = tf.contrib.metrics.streaming_accuracy(preds, gt, weights=weights)
    #
    # # precision and recall
    # recall, update_op_recall = tf.contrib.metrics.streaming_recall(res_edge, edge_gt_batch)
    # precision, update_op_precision = tf.contrib.metrics.streaming_precision(res_edge, edge_gt_batch)

    # update_op = tf.group(update_op_iou, update_op_acc, update_op_recall, update_op_precision)

    # Which variables to load.
    restore_var = tf.compat.v1.global_variables()
    # Set up tf session and initialize variables. 
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    tf.compat.v1.disable_eager_execution()
    sess = tf.compat.v1.Session(config=config)
    init = tf.compat.v1.global_variables_initializer()

    sess.run(init)
    sess.run(tf.compat.v1.local_variables_initializer())
    
    # Load weights.

    # loader = tf.compat.v1.train.Saver(var_list=restore_var)
    # if RESTORE_FROM is not None:
    #     if load(loader, sess, RESTORE_FROM):
    #         print(" [*] Load SUCCESS")
    #     else:
    #         print(" [!] Load failed...")
    
    loader = tf.compat.v1.train.Saver(var_list=restore_var)
    if RESTORE_FROM is not None:
        if load(loader, sess, RESTORE_FROM):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

    # Start queue threads.
    threads = tf.compat.v1.train.start_queue_runners(coord=coord, sess=sess)

    # evaluate prosessing
    parsing_dir = os.path.join(args.output, 'cihp_parsing_maps')
    if not os.path.exists(parsing_dir):
        os.makedirs(parsing_dir)
    edge_dir = os.path.join(args.output, 'cihp_edge_maps')
    if not os.path.exists(edge_dir):
        os.makedirs(edge_dir)
    # Iterate over training steps.
    for step in range(NUM_STEPS):
        # if step > 100:
        #     break
        print(step)
        tf.compat.v1.disable_eager_execution()
        parsing_, scores, edge_ = sess.run([pred_all, pred_scores, pred_edge])
        if step % 1 == 0:
            print('step {:d}'.format(step))
            print (image_list[step])
        img_split = image_list[step].split('/')
        img_id = img_split[-1][:-4]
        
        msk = decode_labels(parsing_, num_classes=N_CLASSES)

        parsing_im = Image.fromarray(msk[0])
        # print("here")
        parsing_im.save('{}/{}_vis.png'.format(parsing_dir, img_id))
        cv2.imwrite('{}/{}.png'.format(parsing_dir, img_id), parsing_[0,:,:,0])
        # sio.savemat('{}/{}.mat'.format(parsing_dir, img_id), {'data': scores[0,:,:]})
        
        # cv2.imwrite('{}/{}.png'.format(edge_dir, img_id), edge_[0,:,:,0] * 255)

        # Agnostic
        msk = decode_labels(parsing_, num_classes=N_CLASSES, get_agn=True)

        parsing_im = Image.fromarray(msk[0])
        # print("here")
        parsing_im.save('{}/{}_agn_vis.png'.format(parsing_dir, img_id))
        cv2.imwrite('{}/{}_agn.png'.format(parsing_dir, img_id), parsing_[0,:,:,0])
        # sio.savemat('{}/{}.mat'.format(parsing_dir, img_id), {'data': scores[0,:,:]})
        
        # cv2.imwrite('{}/{}.png'.format(edge_dir, img_id), edge_[0,:,:,0] * 255)
        print("here")



    # res_mIou = mIoU.eval(session=sess)
    # res_macc = macc.eval(session=sess)
    # res_recall = recall.eval(session=sess)
    # res_precision = precision.eval(session=sess)
    # f1 = 2 * res_precision * res_recall / (res_precision + res_recall)
    # print('Mean IoU: {:.4f}, Mean Acc: {:.4f}'.format(res_mIou, res_macc))
    # print('Recall: {:.4f}, Precision: {:.4f}, F1 score: {:.4f}'.format(res_recall, res_precision, f1))

    coord.request_stop()
    coord.join(threads)
    


if __name__ == '__main__':
    main()


##############################################################333

Overwriting /content/CIHP_PGN/inf_pgn.py


### Overwriting Utils

In [7]:
%%writefile /content/CIHP_PGN/utils/utils.py
from PIL import Image
import numpy as np
import tensorflow as tf
import os
import scipy.misc
from scipy.stats import multivariate_normal
import matplotlib.pyplot as plt

n_classes = 20
# colour map
label_colours = [(0,0,0)
                , (128,0,0), (255,0,0), (0,85,0), (170,0,51), (255,85,0), (0,0,85), (0,119,221), (85,85,0), (0,85,85), (85,51,0), (52,86,128), (0,128,0)
                , (0,0,255), (51,170,221), (0,255,255), (85,255,170), (170,255,85), (255,255,0), (255,170,0)]
agnostics = [5, 6, 7, 10]
# label_colours = [(0,0,0)
#                 # 0=background
#                 ,(128,0,0), (0,128,0), (128,128,0), (0,0,128), (128,0,128), (0,128,128)]
#                 # 1=head, 2=torso, 3=upper arm, 4=lower arm, 5=upper leg, # 6=lower leg
# image mean
IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
    
def decode_labels(mask, num_images=1, num_classes=21, get_agn=False):
    """Decode batch of segmentation masks.
    
    Args:
      mask: result of inference after taking argmax.
      num_images: number of images to decode from the batch.
      num_classes: number of classes to predict (including background).
    
    Returns:
      A batch with num_images RGB images of the same size as the input. 
    """
    n, h, w, c = mask.shape
    assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
    outputs = np.zeros((num_images, h, w, 3), dtype=np.uint8)
    for i in range(num_images):
      img = Image.new('RGB', (len(mask[i, 0]), len(mask[i])))
      pixels = img.load()
      for j_, j in enumerate(mask[i, :, :, 0]):
          for k_, k in enumerate(j):
              if get_agn and k in agnostics:
                  pixels[k_,j_] = label_colours[0]
                  continue
              if k < num_classes:
                  pixels[k_,j_] = label_colours[k]
      outputs[i] = np.array(img)
    return outputs

def prepare_label(input_batch, new_size, one_hot=True):
    """Resize masks and perform one-hot encoding.

    Args:
      input_batch: input tensor of shape [batch_size H W 1].
      new_size: a tensor with new height and width.

    Returns:
      Outputs a tensor of shape [batch_size h w 21]
      with last dimension comprised of 0's and 1's only.
    """
    with tf.name_scope('label_encode'):
        input_batch = tf.image.resize_nearest_neighbor(input_batch, new_size) # as labels are integer numbers, need to use NN interp.
        input_batch = tf.squeeze(input_batch, squeeze_dims=[3]) # reducing the channel dimension.
        if one_hot:
          input_batch = tf.one_hot(input_batch, depth=n_classes)
    return input_batch

def inv_preprocess(imgs, num_images):
  """Inverse preprocessing of the batch of images.
     Add the mean vector and convert from BGR to RGB.
   
  Args:
    imgs: batch of input images.
    num_images: number of images to apply the inverse transformations on.
  
  Returns:
    The batch of the size num_images with the same spatial dimensions as the input.
  """
  n, h, w, c = imgs.shape
  assert(n >= num_images), 'Batch size %d should be greater or equal than number of images to save %d.' % (n, num_images)
  outputs = np.zeros((num_images, h, w, c), dtype=np.uint8)
  for i in range(num_images):
    outputs[i] = (imgs[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8)
  return outputs


def save(saver, sess, logdir, step):
    '''Save weights.   
    Args:
     saver: TensorFlow Saver object.
     sess: TensorFlow session.
     logdir: path to the snapshots directory.
     step: current training step.
    '''
    if not os.path.exists(logdir):
        os.makedirs(logdir)   
    model_name = 'model.ckpt'
    checkpoint_path = os.path.join(logdir, model_name)
      
    if not os.path.exists(logdir):
      os.makedirs(logdir)
    saver.save(sess, checkpoint_path, global_step=step)
    print('The checkpoint has been created.')

def load(saver, sess, ckpt_path):
    '''Load trained weights.
    
    Args:
      saver: TensorFlow saver object.
      sess: TensorFlow session.
      ckpt_path: path to checkpoint file with parameters.
    ''' 
    ckpt = tf.train.get_checkpoint_state(ckpt_path)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
        saver.restore(sess, os.path.join(ckpt_path, ckpt_name))
        print("Restored model parameters from {}".format(ckpt_name))
        return True
    else:
        return False  


Overwriting /content/CIHP_PGN/utils/utils.py


## Densepose

In [8]:
%cd /content
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
!pip install "git+https://github.com/facebookresearch/detectron2@main#subdirectory=projects/DensePose"

/content
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 KB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyyaml
  Building wheel for pyyaml (setup.py) ... [?25l[?25hdone
  Created wheel for pyyaml: filename=PyYAML-5.1-cp39-cp39-linux_x86_64.whl size=44089 sha256=a11ca6b06a599c96e42e6f0a974e1994f96d039bf5baa48500e837d8dc24b45a
  Stored in directory: /root/.cache/pip/wheels/68/be/8f/b6c454cd264e0b349b47f8ee00755511f277618af9e5dae20d
Successfully built pyyaml
Installing collected packages: pyyaml
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 6.0
    Uninstalling PyYAML-6.0:
      Successfully uninstalled PyYAML-6.0
[31mERROR: pip's dependency resolver does not currently take into

In [9]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
torch:  2.0 ; cuda:  cu118
detectron2: 0.6


### Overwriting base.py

In [10]:
%%writefile /content/detectron2/projects/DensePose/densepose/vis/base.py
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import numpy as np
import cv2
import torch

Image = np.ndarray
Boxes = torch.Tensor


class MatrixVisualizer(object):
    """
    Base visualizer for matrix data
    """

    def __init__(
        self,
        inplace=True,
        cmap=cv2.COLORMAP_PARULA,
        val_scale=1.0,
        alpha=0.7,
        interp_method_matrix=cv2.INTER_LINEAR,
        interp_method_mask=cv2.INTER_NEAREST,
    ):
        self.inplace = inplace
        self.cmap = cmap
        self.val_scale = val_scale
        self.alpha = alpha
        self.interp_method_matrix = interp_method_matrix
        self.interp_method_mask = interp_method_mask

    def visualize(self, image_bgr, mask, matrix, bbox_xywh):
        self._check_image(image_bgr)
        self._check_mask_matrix(mask, matrix)
        if self.inplace:
            image_target_bgr = image_bgr
        else:
            image_target_bgr = image_bgr * 0
        x, y, w, h = [int(v) for v in bbox_xywh]
        if w <= 0 or h <= 0:
            return image_bgr
        mask, matrix = self._resize(mask, matrix, w, h)
        mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
        matrix_scaled = matrix.astype(np.float32) * self.val_scale
        _EPSILON = 1e-6
        if np.any(matrix_scaled > 255 + _EPSILON):
            logger = logging.getLogger(__name__)
            logger.warning(
                f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
            )
        matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
        matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
        black = np.zeros(matrix_vis.shape)
        black_bgr = np.zeros(image_target_bgr.shape)
        matrix_vis[mask_bg] = black[mask_bg]#image_target_bgr[y : y + h, x : x + w, :][mask_bg]
        cv2.imwrite("densepose_result.jpg",matrix_vis)
        image_target_bgr[y : y + h, x : x + w, :] = (
            matrix_vis
        )
        image_target_bgr[:y, :, :] = black_bgr[:y, : , :]
        image_target_bgr[y+h:, :, :] = black_bgr[y+h, : , :]
        image_target_bgr[:, :x, :] = black_bgr[:, :x , :]
        image_target_bgr[:, x+w:, :] = black_bgr[:, x+w:, :]
        cv2.imwrite("densepose_result.jpg",image_target_bgr)
        return image_target_bgr.astype(np.uint8)

    def _resize(self, mask, matrix, w, h):
        if (w != mask.shape[1]) or (h != mask.shape[0]):
            mask = cv2.resize(mask, (w, h), self.interp_method_mask)
        if (w != matrix.shape[1]) or (h != matrix.shape[0]):
            matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
        return mask, matrix

    def _check_image(self, image_rgb):
        assert len(image_rgb.shape) == 3
        assert image_rgb.shape[2] == 3
        assert image_rgb.dtype == np.uint8

    def _check_mask_matrix(self, mask, matrix):
        assert len(matrix.shape) == 2
        assert len(mask.shape) == 2
        assert mask.dtype == np.uint8


class RectangleVisualizer(object):

    _COLOR_GREEN = (18, 127, 15)

    def __init__(self, color=_COLOR_GREEN, thickness=1):
        self.color = color
        self.thickness = thickness

    def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
        x, y, w, h = bbox_xywh
        color = color or self.color
        thickness = thickness or self.thickness
        cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
        return image_bgr


class PointsVisualizer(object):

    _COLOR_GREEN = (18, 127, 15)

    def __init__(self, color_bgr=_COLOR_GREEN, r=5):
        self.color_bgr = color_bgr
        self.r = r

    def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
        for j, pt_xy in enumerate(pts_xy):
            x, y = pt_xy
            color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
            r = rs[j] if rs is not None else self.r
            cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
        return image_bgr


class TextVisualizer(object):

    _COLOR_GRAY = (218, 227, 218)
    _COLOR_WHITE = (255, 255, 255)

    def __init__(
        self,
        font_face=cv2.FONT_HERSHEY_SIMPLEX,
        font_color_bgr=_COLOR_GRAY,
        font_scale=0.35,
        font_line_type=cv2.LINE_AA,
        font_line_thickness=1,
        fill_color_bgr=_COLOR_WHITE,
        fill_color_transparency=1.0,
        frame_color_bgr=_COLOR_WHITE,
        frame_color_transparency=1.0,
        frame_thickness=1,
    ):
        self.font_face = font_face
        self.font_color_bgr = font_color_bgr
        self.font_scale = font_scale
        self.font_line_type = font_line_type
        self.font_line_thickness = font_line_thickness
        self.fill_color_bgr = fill_color_bgr
        self.fill_color_transparency = fill_color_transparency
        self.frame_color_bgr = frame_color_bgr
        self.frame_color_transparency = frame_color_transparency
        self.frame_thickness = frame_thickness

    def visualize(self, image_bgr, txt, topleft_xy):
        txt_w, txt_h = self.get_text_size_wh(txt)
        topleft_xy = tuple(map(int, topleft_xy))
        x, y = topleft_xy
        if self.frame_color_transparency < 1.0:
            t = self.frame_thickness
            image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
                image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
                * self.frame_color_transparency
                + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
            ).astype(np.float)
        if self.fill_color_transparency < 1.0:
            image_bgr[y : y + txt_h, x : x + txt_w, :] = (
                image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
                + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
            ).astype(np.float)
        cv2.putText(
            image_bgr,
            txt,
            topleft_xy,
            self.font_face,
            self.font_scale,
            self.font_color_bgr,
            self.font_line_thickness,
            self.font_line_type,
        )
        return image_bgr

    def get_text_size_wh(self, txt):
        ((txt_w, txt_h), _) = cv2.getTextSize(
            txt, self.font_face, self.font_scale, self.font_line_thickness
        )
        return txt_w, txt_h


class CompoundVisualizer(object):
    def __init__(self, visualizers):
        self.visualizers = visualizers

    def visualize(self, image_bgr, data):
        assert len(data) == len(
            self.visualizers
        ), "The number of datas {} should match the number of visualizers" " {}".format(
            len(data), len(self.visualizers)
        )
        image = image_bgr
        for i, visualizer in enumerate(self.visualizers):
            image = visualizer.visualize(image, data[i])
        return image

    def __str__(self):
        visualizer_str = ", ".join([str(v) for v in self.visualizers])
        return "Compound Visualizer [{}]".format(visualizer_str)


Overwriting /content/detectron2/projects/DensePose/densepose/vis/base.py


### Overwriting densepose_result.py

In [11]:
%%writefile /content/detectron2/projects/DensePose/densepose/vis/densepose_results.py
# Copyright (c) Facebook, Inc. and its affiliates.
import logging
import numpy as np
from typing import List, Optional, Tuple
import cv2
import torch

from densepose.structures import DensePoseDataRelative

from ..structures import DensePoseChartResult
from .base import Boxes, Image, MatrixVisualizer


class DensePoseResultsVisualizer(object):
    def visualize(
        self,
        image_bgr: Image,
        results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]],
    ) -> Image:
        densepose_result, boxes_xywh = results_and_boxes_xywh
        #print(densepose_result)
        #cv2.imwrite("result.jpg", densepose_result)
        if densepose_result is None or boxes_xywh is None:
            return image_bgr
        boxes_xywh = boxes_xywh.cpu().numpy()
        context = self.create_visualization_context(image_bgr)
        for i, result in enumerate(densepose_result):
            iuv_array = torch.cat(
                (result.labels[None].type(torch.float32), result.uv * 255.0)
            ).type(torch.uint8)
            self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh[i])
        image_bgr = self.context_to_image_bgr(context)
        return image_bgr

    def create_visualization_context(self, image_bgr: Image):
        return image_bgr

    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
        pass

    def context_to_image_bgr(self, context):
        return context

    def get_image_bgr_from_context(self, context):
        return context


class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
    def __init__(
        self,
        data_extractor,
        segm_extractor,
        inplace=False,
        cmap=cv2.COLORMAP_PARULA,
        alpha=0.7,
        val_scale=1.0,
        **kwargs,
    ):
        self.mask_visualizer = MatrixVisualizer(
            inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
        )
        self.data_extractor = data_extractor
        self.segm_extractor = segm_extractor

    def context_to_image_bgr(self, context):
        return context

    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
        image_bgr = self.get_image_bgr_from_context(context)
        matrix = self.data_extractor(iuv_arr)
        segm = self.segm_extractor(iuv_arr)
        mask = np.zeros(matrix.shape, dtype=np.uint8)
        mask[segm > 0] = 1
        image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)


def _extract_i_from_iuvarr(iuv_arr):
    return iuv_arr[0, :, :]


def _extract_u_from_iuvarr(iuv_arr):
    return iuv_arr[1, :, :]


def _extract_v_from_iuvarr(iuv_arr):
    return iuv_arr[2, :, :]


class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
    def __init__(self, levels=10, **kwargs):
        self.levels = levels
        self.plot_args = kwargs

    def create_visualization_context(self, image_bgr: Image):
        import matplotlib.pyplot as plt
        from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas

        context = {}
        context["image_bgr"] = image_bgr
        dpi = 100
        height_inches = float(image_bgr.shape[0]) / dpi
        width_inches = float(image_bgr.shape[1]) / dpi
        fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
        plt.axes([0, 0, 1, 1])
        plt.axis("off")
        context["fig"] = fig
        canvas = FigureCanvas(fig)
        context["canvas"] = canvas
        extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
        plt.imshow(image_bgr[:, :, ::-1], extent=extent)
        return context

    def context_to_image_bgr(self, context):
        fig = context["fig"]
        w, h = map(int, fig.get_size_inches() * fig.get_dpi())
        canvas = context["canvas"]
        canvas.draw()
        image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
        image_rgb = image_1d.reshape(h, w, 3)
        image_bgr = image_rgb[:, :, ::-1].copy()
        return image_bgr

    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None:
        import matplotlib.pyplot as plt

        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
        extent = (
            bbox_xywh[0],
            bbox_xywh[0] + bbox_xywh[2],
            bbox_xywh[1],
            bbox_xywh[1] + bbox_xywh[3],
        )
        plt.contour(u, self.levels, extent=extent, **self.plot_args)
        plt.contour(v, self.levels, extent=extent, **self.plot_args)


class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
    """
    Contour visualization using marching squares
    """

    def __init__(self, levels=10, **kwargs):
        # TODO: colormap is hardcoded
        cmap = cv2.COLORMAP_PARULA
        if isinstance(levels, int):
            self.levels = np.linspace(0, 1, levels)
        else:
            self.levels = levels
        if "linewidths" in kwargs:
            self.linewidths = kwargs["linewidths"]
        else:
            self.linewidths = [1] * len(self.levels)
        self.plot_args = kwargs
        img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
        self.level_colors_bgr = [
            [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
        ]

    def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None:
        image_bgr = self.get_image_bgr_from_context(context)
        segm = _extract_i_from_iuvarr(iuv_arr)
        u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
        v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
        self._contours(image_bgr, u, segm, bbox_xywh)
        self._contours(image_bgr, v, segm, bbox_xywh)

    def _contours(self, image_bgr, arr, segm, bbox_xywh):
        for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
            mask = segm == part_idx
            if not np.any(mask):
                continue
            arr_min = np.amin(arr[mask])
            arr_max = np.amax(arr[mask])
            I, J = np.nonzero(mask)
            i0 = np.amin(I)
            i1 = np.amax(I) + 1
            j0 = np.amin(J)
            j1 = np.amax(J) + 1
            if (j1 == j0 + 1) or (i1 == i0 + 1):
                continue
            Nw = arr.shape[1] - 1
            Nh = arr.shape[0] - 1
            for level_idx, level in enumerate(self.levels):
                if (level < arr_min) or (level > arr_max):
                    continue
                vp = arr[i0:i1, j0:j1] >= level
                bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
                mp = mask[i0:i1, j0:j1]
                bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
                it = np.nditer(bin_codes, flags=["multi_index"])
                color_bgr = self.level_colors_bgr[level_idx]
                linewidth = self.linewidths[level_idx]
                while not it.finished:
                    if (it[0] != 0) and (it[0] != 15):
                        i, j = it.multi_index
                        if bin_mask_codes[i, j] != 0:
                            self._draw_line(
                                image_bgr,
                                arr,
                                mask,
                                level,
                                color_bgr,
                                linewidth,
                                it[0],
                                it.multi_index,
                                bbox_xywh,
                                Nw,
                                Nh,
                                (i0, j0),
                            )
                    it.iternext()

    def _draw_line(
        self,
        image_bgr,
        arr,
        mask,
        v,
        color_bgr,
        linewidth,
        bin_code,
        multi_idx,
        bbox_xywh,
        Nw,
        Nh,
        offset,
    ):
        lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
        x0, y0, w, h = bbox_xywh
        x1 = x0 + w
        y1 = y0 + h
        for line in lines:
            x0r, y0r = line[0]
            x1r, y1r = line[1]
            pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
            pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
            cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)

    def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
        i0, j0 = offset
        i, j = multi_idx
        i += i0
        j += j0
        v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
        x0i = float(j) / Nw
        y0j = float(i) / Nh
        He = 1.0 / Nh
        We = 1.0 / Nw
        if (bin_code == 1) or (bin_code == 14):
            a = (v - v0) / (v1 - v0)
            b = (v - v0) / (v3 - v0)
            pt1 = (x0i, y0j + a * He)
            pt2 = (x0i + b * We, y0j)
            return [(pt1, pt2)]
        elif (bin_code == 2) or (bin_code == 13):
            a = (v - v0) / (v1 - v0)
            b = (v - v1) / (v2 - v1)
            pt1 = (x0i, y0j + a * He)
            pt2 = (x0i + b * We, y0j + He)
            return [(pt1, pt2)]
        elif (bin_code == 3) or (bin_code == 12):
            a = (v - v0) / (v3 - v0)
            b = (v - v1) / (v2 - v1)
            pt1 = (x0i + a * We, y0j)
            pt2 = (x0i + b * We, y0j + He)
            return [(pt1, pt2)]
        elif (bin_code == 4) or (bin_code == 11):
            a = (v - v1) / (v2 - v1)
            b = (v - v3) / (v2 - v3)
            pt1 = (x0i + a * We, y0j + He)
            pt2 = (x0i + We, y0j + b * He)
            return [(pt1, pt2)]
        elif (bin_code == 6) or (bin_code == 9):
            a = (v - v0) / (v1 - v0)
            b = (v - v3) / (v2 - v3)
            pt1 = (x0i, y0j + a * He)
            pt2 = (x0i + We, y0j + b * He)
            return [(pt1, pt2)]
        elif (bin_code == 7) or (bin_code == 8):
            a = (v - v0) / (v3 - v0)
            b = (v - v3) / (v2 - v3)
            pt1 = (x0i + a * We, y0j)
            pt2 = (x0i + We, y0j + b * He)
            return [(pt1, pt2)]
        elif bin_code == 5:
            a1 = (v - v0) / (v1 - v0)
            b1 = (v - v1) / (v2 - v1)
            pt11 = (x0i, y0j + a1 * He)
            pt12 = (x0i + b1 * We, y0j + He)
            a2 = (v - v0) / (v3 - v0)
            b2 = (v - v3) / (v2 - v3)
            pt21 = (x0i + a2 * We, y0j)
            pt22 = (x0i + We, y0j + b2 * He)
            return [(pt11, pt12), (pt21, pt22)]
        elif bin_code == 10:
            a1 = (v - v0) / (v3 - v0)
            b1 = (v - v0) / (v1 - v0)
            pt11 = (x0i + a1 * We, y0j)
            pt12 = (x0i, y0j + b1 * He)
            a2 = (v - v1) / (v2 - v1)
            b2 = (v - v3) / (v2 - v3)
            pt21 = (x0i + a2 * We, y0j + He)
            pt22 = (x0i + We, y0j + b2 * He)
            return [(pt11, pt12), (pt21, pt22)]
        return []


try:
    import matplotlib

    matplotlib.use("Agg")
    DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
except ModuleNotFoundError:
    logger = logging.getLogger(__name__)
    logger.warning("Could not import matplotlib, using custom contour visualizer")
    DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer


class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
        super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
            _extract_i_from_iuvarr,
            _extract_i_from_iuvarr,
            inplace,
            cmap,
            alpha,
            val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
            **kwargs,
        )


class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
        super(DensePoseResultsUVisualizer, self).__init__(
            _extract_u_from_iuvarr,
            _extract_i_from_iuvarr,
            inplace,
            cmap,
            alpha,
            val_scale=1.0,
            **kwargs,
        )


class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
    def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
        super(DensePoseResultsVVisualizer, self).__init__(
            _extract_v_from_iuvarr,
            _extract_i_from_iuvarr,
            inplace,
            cmap,
            alpha,
            val_scale=1.0,
            **kwargs,
        )


Overwriting /content/detectron2/projects/DensePose/densepose/vis/densepose_results.py


## Cloth mask

In [12]:
%cd /content
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
!pip install rembg

/content
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rembg
  Downloading rembg-2.0.32-py3-none-any.whl (14 kB)
Collecting filetype>=1.2.0
  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Collecting uvicorn>=0.20.0
  Downloading uvicorn-0.21.1-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.8/57.8 KB[0m [31m200.6 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting asyncer>=0.0.2
  Downloading asyncer-0.0.2-py3-none-any.whl (8.3 kB)
Collecting pillow>=9.3.0
  Downloading Pillow-9.5.0-cp39-cp39-manylinux_2_28_x86_64.whl (3.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.4/3.4 MB[0m [31m66.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pymatting>=1.1.8
  Downloading PyMatting-1.1.8-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 KB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting onnxruntime>=1

# Run model

In [13]:
%cd /content
!git clone https://github.com/sangyun884/HR-VITON
!pip install opencv-python torchgeometry Pillow tqdm tensorboardX scikit-image scipy
!gdown --id 1XJTCdRBOPVgVTmqzhVGFAgMm2NLkw5uQ
!gdown --id 1BkSA8UJo-6eOkKcXTFOHK80Esc4vBmVC

/content
Cloning into 'HR-VITON'...
remote: Enumerating objects: 129, done.[K
remote: Counting objects: 100% (55/55), done.[K
remote: Compressing objects: 100% (43/43), done.[K
remote: Total 129 (delta 31), reused 18 (delta 12), pack-reused 74[K
Receiving objects: 100% (129/129), 16.06 MiB | 18.60 MiB/s, done.
Resolving deltas: 100% (65/65), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchgeometry
  Downloading torchgeometry-0.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.7/42.7 KB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboardX
  Downloading tensorboardX-2.6-py2.py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.5/114.5 KB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorboardX, torchgeometry
Successfully installed tensorboardX-2.6 torchgeometry-0.1.2
Downloading...

In [14]:
%%writefile /content/HR-VITON/get_parse_agnostic.py
import json
from os import path as osp
import os

import numpy as np
from PIL import Image, ImageDraw

import argparse

from tqdm import tqdm


def get_im_parse_agnostic(im_parse, pose_data, w=768, h=1024):
    parse_array = np.array(im_parse)
    parse_upper = ((parse_array == 5).astype(np.float32) +
                    (parse_array == 6).astype(np.float32) +
                    (parse_array == 7).astype(np.float32))
    parse_neck = (parse_array == 10).astype(np.float32)

    r = 10
    agnostic = im_parse.copy()

    # mask arms
    for parse_id, pose_ids in [(14, [2, 5, 6, 7]), (15, [5, 2, 3, 4])]:
        mask_arm = Image.new('L', (w, h), 'black')
        mask_arm_draw = ImageDraw.Draw(mask_arm)
        i_prev = pose_ids[0]
        for i in pose_ids[1:]:
            if (pose_data[i_prev, 0] == 0.0 and pose_data[i_prev, 1] == 0.0) or (pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
                continue
            mask_arm_draw.line([tuple(pose_data[j]) for j in [i_prev, i]], 'white', width=r*10)
            pointx, pointy = pose_data[i]
            radius = r*4 if i == pose_ids[-1] else r*15
            mask_arm_draw.ellipse((pointx-radius, pointy-radius, pointx+radius, pointy+radius), 'white', 'white')
            i_prev = i
        parse_arm = (np.array(mask_arm) / 255) * (parse_array == parse_id).astype(np.float32)
        agnostic.paste(0, None, Image.fromarray(np.uint8(parse_arm * 255), 'L'))

    # mask torso & neck
    agnostic.paste(0, None, Image.fromarray(np.uint8(parse_upper * 255), 'L'))
    agnostic.paste(0, None, Image.fromarray(np.uint8(parse_neck * 255), 'L'))

    return agnostic


if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', type=str, help="dataset dir")
    parser.add_argument('--output_path', type=str, help="output dir")

    args = parser.parse_args()
    data_path = args.data_path
    output_path = args.output_path
    
    os.makedirs(output_path, exist_ok=True)
    
    for im_name in tqdm(os.listdir(osp.join(data_path, 'image-parse-v3'))):
        # load pose image
        pose_name = im_name.replace('.png', '_keypoints.json')
        if pose_name in '.ipynb_checkpoints':
          continue
        try:
            with open(osp.join(data_path, 'openpose_json', pose_name), 'r') as f:
                pose_label = json.load(f)
                pose_data = pose_label['people'][0]['pose_keypoints_2d']
                pose_data = np.array(pose_data)
                pose_data = pose_data.reshape((-1, 3))[:, :2]
        except IndexError:
            print(pose_name)
            continue

        # load parsing image
        parse_name = im_name.replace('.jpg', '.png')
        im_parse = Image.open(osp.join(data_path, 'image-parse-v3', parse_name))

        agnostic = get_im_parse_agnostic(im_parse, pose_data)
        
        agnostic.save(osp.join(output_path, parse_name))

Overwriting /content/HR-VITON/get_parse_agnostic.py


## Overwriting file test_generator.py

In [15]:
%%writefile /content/HR-VITON/test_generator.py
import torch
import torch.nn as nn

from torchvision.utils import make_grid as make_image_grid
from torchvision.utils import save_image
import argparse
import os
import time
from cp_dataset_test import CPDatasetTest, CPDataLoader

from networks import ConditionGenerator, load_checkpoint, make_grid
from network_generator import SPADEGenerator
from tensorboardX import SummaryWriter
from utils import *

import torchgeometry as tgm
from collections import OrderedDict

def remove_overlap(seg_out, warped_cm):
    
    assert len(warped_cm.shape) == 4
    
    warped_cm = warped_cm - (torch.cat([seg_out[:, 1:3, :, :], seg_out[:, 5:, :, :]], dim=1)).sum(dim=1, keepdim=True) * warped_cm
    return warped_cm
def get_opt():
    parser = argparse.ArgumentParser()

    parser.add_argument("--gpu_ids", default="")
    parser.add_argument('-j', '--workers', type=int, default=4)
    parser.add_argument('-b', '--batch-size', type=int, default=1)
    parser.add_argument('--fp16', action='store_true', help='use amp')
    # Cuda availability
    parser.add_argument('--cuda',default=False, help='cuda or cpu')

    parser.add_argument('--test_name', type=str, default='test', help='test name')
    parser.add_argument("--dataroot", default="/content/our_data_folder")
    parser.add_argument("--datamode", default="test")
    parser.add_argument("--data_list", default="/content/our_data_folder/test_pairs.txt")
    parser.add_argument("--output_dir", type=str, default="./Output")
    parser.add_argument("--datasetting", default="unpaired")
    parser.add_argument("--fine_width", type=int, default=768)
    parser.add_argument("--fine_height", type=int, default=1024)

    parser.add_argument('--tensorboard_dir', type=str, default='./data/zalando-hd-resize/tensorboard', help='save tensorboard infos')
    parser.add_argument('--checkpoint_dir', type=str, default='checkpoints', help='save checkpoint infos')
    parser.add_argument('--tocg_checkpoint', type=str, default='/content/mtviton.pth', help='tocg checkpoint')
    parser.add_argument('--gen_checkpoint', type=str, default='/content/gen.pth', help='G checkpoint')

    parser.add_argument("--tensorboard_count", type=int, default=100)
    parser.add_argument("--shuffle", action='store_true', help='shuffle input data')
    parser.add_argument("--semantic_nc", type=int, default=13)
    parser.add_argument("--output_nc", type=int, default=13)
    parser.add_argument('--gen_semantic_nc', type=int, default=7, help='# of input label classes without unknown class')
    
    # network
    parser.add_argument("--warp_feature", choices=['encoder', 'T1'], default="T1")
    parser.add_argument("--out_layer", choices=['relu', 'conv'], default="relu")
    
    # training
    parser.add_argument("--clothmask_composition", type=str, choices=['no_composition', 'detach', 'warp_grad'], default='warp_grad')
        
    # Hyper-parameters
    parser.add_argument('--upsample', type=str, default='bilinear', choices=['nearest', 'bilinear'])
    parser.add_argument('--occlusion', action='store_true', help="Occlusion handling")

    # generator
    parser.add_argument('--norm_G', type=str, default='spectralaliasinstance', help='instance normalization or batch normalization')
    parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer')
    parser.add_argument('--init_type', type=str, default='xavier', help='network initialization [normal|xavier|kaiming|orthogonal]')
    parser.add_argument('--init_variance', type=float, default=0.02, help='variance of the initialization distribution')
    parser.add_argument('--num_upsampling_layers', choices=('normal', 'more', 'most'), default='most', # normal: 256, more: 512
                        help="If 'more', adds upsampling layer between the two middle resnet blocks. If 'most', also add one more upsampling + resnet layer at the end of the generator")

    opt = parser.parse_args()
    return opt

def load_checkpoint_G(model, checkpoint_path,opt):
    if not os.path.exists(checkpoint_path):
        print("Invalid path!")
        return
    state_dict = torch.load(checkpoint_path)
    new_state_dict = OrderedDict([(k.replace('ace', 'alias').replace('.Spade', ''), v) for (k, v) in state_dict.items()])
    new_state_dict._metadata = OrderedDict([(k.replace('ace', 'alias').replace('.Spade', ''), v) for (k, v) in state_dict._metadata.items()])
    model.load_state_dict(new_state_dict, strict=True)
    if opt.cuda :
        model.cuda()



def test(opt, test_loader, tocg, generator):
    gauss = tgm.image.GaussianBlur((15, 15), (3, 3))
    if opt.cuda:
        gauss = gauss.cuda()
    
    # Model
    if opt.cuda :
        tocg.cuda()
    tocg.eval()
    generator.eval()
    
    if opt.output_dir is not None:
        output_dir = opt.output_dir
    else:
        output_dir = os.path.join('./output', opt.test_name,
                            opt.datamode, opt.datasetting, 'generator', 'output')
    grid_dir = os.path.join('./output', opt.test_name,
                             opt.datamode, opt.datasetting, 'generator', 'grid')
    
    os.makedirs(grid_dir, exist_ok=True)
    
    os.makedirs(output_dir, exist_ok=True)
    
    num = 0
    iter_start_time = time.time()
    with torch.no_grad():
        for inputs in test_loader.data_loader:

            if opt.cuda :
                pose_map = inputs['pose'].cuda()
                pre_clothes_mask = inputs['cloth_mask'][opt.datasetting].cuda()
                label = inputs['parse']
                parse_agnostic = inputs['parse_agnostic']
                agnostic = inputs['agnostic'].cuda()
                clothes = inputs['cloth'][opt.datasetting].cuda() # target cloth
                densepose = inputs['densepose'].cuda()
                im = inputs['image']
                input_label, input_parse_agnostic = label.cuda(), parse_agnostic.cuda()
                pre_clothes_mask = torch.FloatTensor((pre_clothes_mask.detach().cpu().numpy() > 0.5).astype(np.float)).cuda()
            else :
                pose_map = inputs['pose']
                pre_clothes_mask = inputs['cloth_mask'][opt.datasetting]
                label = inputs['parse']
                parse_agnostic = inputs['parse_agnostic']
                agnostic = inputs['agnostic']
                clothes = inputs['cloth'][opt.datasetting] # target cloth
                densepose = inputs['densepose']
                im = inputs['image']
                input_label, input_parse_agnostic = label, parse_agnostic
                pre_clothes_mask = torch.FloatTensor((pre_clothes_mask.detach().cpu().numpy() > 0.5).astype(np.float))



            # down
            pose_map_down = F.interpolate(pose_map, size=(256, 192), mode='bilinear')
            pre_clothes_mask_down = F.interpolate(pre_clothes_mask, size=(256, 192), mode='nearest')
            input_label_down = F.interpolate(input_label, size=(256, 192), mode='bilinear')
            input_parse_agnostic_down = F.interpolate(input_parse_agnostic, size=(256, 192), mode='nearest')
            agnostic_down = F.interpolate(agnostic, size=(256, 192), mode='nearest')
            clothes_down = F.interpolate(clothes, size=(256, 192), mode='bilinear')
            densepose_down = F.interpolate(densepose, size=(256, 192), mode='bilinear')

            shape = pre_clothes_mask.shape
            
            # multi-task inputs
            input1 = torch.cat([clothes_down, pre_clothes_mask_down], 1)
            input2 = torch.cat([input_parse_agnostic_down, densepose_down], 1)

            # forward
            flow_list, fake_segmap, warped_cloth_paired, warped_clothmask_paired = tocg(opt,input1, input2)
            
            # warped cloth mask one hot
            if opt.cuda :
                warped_cm_onehot = torch.FloatTensor((warped_clothmask_paired.detach().cpu().numpy() > 0.5).astype(np.float)).cuda()
            else :
                warped_cm_onehot = torch.FloatTensor((warped_clothmask_paired.detach().cpu().numpy() > 0.5).astype(np.float))

            if opt.clothmask_composition != 'no_composition':
                if opt.clothmask_composition == 'detach':
                    cloth_mask = torch.ones_like(fake_segmap)
                    cloth_mask[:,3:4, :, :] = warped_cm_onehot
                    fake_segmap = fake_segmap * cloth_mask
                    
                if opt.clothmask_composition == 'warp_grad':
                    cloth_mask = torch.ones_like(fake_segmap)
                    cloth_mask[:,3:4, :, :] = warped_clothmask_paired
                    fake_segmap = fake_segmap * cloth_mask
                    
            # make generator input parse map
            fake_parse_gauss = gauss(F.interpolate(fake_segmap, size=(opt.fine_height, opt.fine_width), mode='bilinear'))
            fake_parse = fake_parse_gauss.argmax(dim=1)[:, None]

            if opt.cuda :
                old_parse = torch.FloatTensor(fake_parse.size(0), 13, opt.fine_height, opt.fine_width).zero_().cuda()
            else:
                old_parse = torch.FloatTensor(fake_parse.size(0), 13, opt.fine_height, opt.fine_width).zero_()
            old_parse.scatter_(1, fake_parse, 1.0)

            labels = {
                0:  ['background',  [0]],
                1:  ['paste',       [2, 4, 7, 8, 9, 10, 11]],
                2:  ['upper',       [3]],
                3:  ['hair',        [1]],
                4:  ['left_arm',    [5]],
                5:  ['right_arm',   [6]],
                6:  ['noise',       [12]]
            }
            if opt.cuda :
                parse = torch.FloatTensor(fake_parse.size(0), 7, opt.fine_height, opt.fine_width).zero_().cuda()
            else:
                parse = torch.FloatTensor(fake_parse.size(0), 7, opt.fine_height, opt.fine_width).zero_()
            for i in range(len(labels)):
                for label in labels[i][1]:
                    parse[:, i] += old_parse[:, label]
                    
            # warped cloth
            N, _, iH, iW = clothes.shape
            flow = F.interpolate(flow_list[-1].permute(0, 3, 1, 2), size=(iH, iW), mode='bilinear').permute(0, 2, 3, 1)
            flow_norm = torch.cat([flow[:, :, :, 0:1] / ((96 - 1.0) / 2.0), flow[:, :, :, 1:2] / ((128 - 1.0) / 2.0)], 3)
            
            grid = make_grid(N, iH, iW,opt)
            warped_grid = grid + flow_norm
            warped_cloth = F.grid_sample(clothes, warped_grid, padding_mode='border')
            warped_clothmask = F.grid_sample(pre_clothes_mask, warped_grid, padding_mode='border')
            if opt.occlusion:
                warped_clothmask = remove_overlap(F.softmax(fake_parse_gauss, dim=1), warped_clothmask)
                warped_cloth = warped_cloth * warped_clothmask + torch.ones_like(warped_cloth) * (1-warped_clothmask)
            

            output = generator(torch.cat((agnostic, densepose, warped_cloth), dim=1), parse)
            # visualize
            unpaired_names = []
            for i in range(shape[0]):
                grid = make_image_grid([(clothes[i].cpu() / 2 + 0.5), (pre_clothes_mask[i].cpu()).expand(3, -1, -1), visualize_segmap(parse_agnostic.cpu(), batch=i), ((densepose.cpu()[i]+1)/2),
                                        (warped_cloth[i].cpu().detach() / 2 + 0.5), (warped_clothmask[i].cpu().detach()).expand(3, -1, -1), visualize_segmap(fake_parse_gauss.cpu(), batch=i),
                                        (pose_map[i].cpu()/2 +0.5), (warped_cloth[i].cpu()/2 + 0.5), (agnostic[i].cpu()/2 + 0.5),
                                        (im[i]/2 +0.5), (output[i].cpu()/2 +0.5)],
                                        nrow=4)
                unpaired_name = (inputs['c_name']['paired'][i].split('.')[0] + '_' + inputs['c_name'][opt.datasetting][i].split('.')[0] + '.png')
                save_image(grid, os.path.join(grid_dir, unpaired_name))
                unpaired_names.append(unpaired_name)
                
            # save output
            save_images(output, unpaired_names, output_dir)
                
            num += shape[0]
            print(num)

    print(f"Test time {time.time() - iter_start_time}")


def main():
    opt = get_opt()
    print(opt)
    print("Start to test %s!")
    os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu_ids
    
    # create test dataset & loader
    test_dataset = CPDatasetTest(opt)
    test_loader = CPDataLoader(opt, test_dataset)
    
    # visualization
    # if not os.path.exists(opt.tensorboard_dir):
    #     os.makedirs(opt.tensorboard_dir)
    # board = SummaryWriter(log_dir=os.path.join(opt.tensorboard_dir, opt.test_name, opt.datamode, opt.datasetting))

    ## Model
    # tocg
    input1_nc = 4  # cloth + cloth-mask
    input2_nc = opt.semantic_nc + 3  # parse_agnostic + densepose
    tocg = ConditionGenerator(opt, input1_nc=input1_nc, input2_nc=input2_nc, output_nc=opt.output_nc, ngf=96, norm_layer=nn.BatchNorm2d)
       
    # generator
    opt.semantic_nc = 7
    generator = SPADEGenerator(opt, 3+3+3)
    generator.print_network()
       
    # Load Checkpoint
    load_checkpoint(tocg, opt.tocg_checkpoint,opt)
    load_checkpoint_G(generator, opt.gen_checkpoint,opt)

    # Train
    test(opt, test_loader, tocg, generator)

    print("Finished testing!")


if __name__ == "__main__":
    main()

Overwriting /content/HR-VITON/test_generator.py


In [16]:
%%writefile /content/HR-VITON/get_parse_agnostic.py
import json
from os import path as osp
import os

import numpy as np
from PIL import Image, ImageDraw

import argparse

from tqdm import tqdm


def get_im_parse_agnostic(im_parse, pose_data, w=768, h=1024):
    parse_array = np.array(im_parse)
    parse_upper = ((parse_array == 5).astype(np.float32) +
                    (parse_array == 6).astype(np.float32) +
                    (parse_array == 7).astype(np.float32))
    parse_neck = (parse_array == 10).astype(np.float32)

    r = 10
    agnostic = im_parse.copy()

    # mask arms
    for parse_id, pose_ids in [(14, [2, 5, 6, 7]), (15, [5, 2, 3, 4])]:
        mask_arm = Image.new('L', (w, h), 'black')
        mask_arm_draw = ImageDraw.Draw(mask_arm)
        i_prev = pose_ids[0]
        for i in pose_ids[1:]:
            if (pose_data[i_prev, 0] == 0.0 and pose_data[i_prev, 1] == 0.0) or (pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
                continue
            mask_arm_draw.line([tuple(pose_data[j]) for j in [i_prev, i]], 'white', width=r*10)
            pointx, pointy = pose_data[i]
            radius = r*4 if i == pose_ids[-1] else r*15
            mask_arm_draw.ellipse((pointx-radius, pointy-radius, pointx+radius, pointy+radius), 'white', 'white')
            i_prev = i
        parse_arm = (np.array(mask_arm) / 255) * (parse_array == parse_id).astype(np.float32)
        agnostic.paste(0, None, Image.fromarray(np.uint8(parse_arm * 255), 'L'))

    # mask torso & neck
    agnostic.paste(0, None, Image.fromarray(np.uint8(parse_upper * 255), 'L'))
    agnostic.paste(0, None, Image.fromarray(np.uint8(parse_neck * 255), 'L'))

    return agnostic


if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', type=str, help="dataset dir")
    parser.add_argument('--output_path', type=str, help="output dir")

    args = parser.parse_args()
    data_path = args.data_path
    output_path = args.output_path
    
    os.makedirs(output_path, exist_ok=True)
    
    for im_name in tqdm(os.listdir(osp.join(data_path, 'image-parse-v3'))):
        # load pose image
        pose_name = im_name.replace('.png', '_keypoints.json')
        if pose_name in '.ipynb_checkpoints':
          continue
        try:
            with open(osp.join(data_path, 'openpose_json', pose_name), 'r') as f:
                pose_label = json.load(f)
                pose_data = pose_label['people'][0]['pose_keypoints_2d']
                pose_data = np.array(pose_data)
                pose_data = pose_data.reshape((-1, 3))[:, :2]
        except IndexError:
            print(pose_name)
            continue

        # load parsing image
        parse_name = im_name.replace('.jpg', '.png')
        im_parse = Image.open(osp.join(data_path, 'image-parse-v3', parse_name))

        agnostic = get_im_parse_agnostic(im_parse, pose_data)
        
        agnostic.save(osp.join(output_path, parse_name))

Overwriting /content/HR-VITON/get_parse_agnostic.py


#Streamlit

In [17]:
!pip install -q streamlit

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/9.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/9.7 MB[0m [31m85.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m8.8/9.7 MB[0m [31m128.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m9.7/9.7 MB[0m [31m130.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m72.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164

In [18]:
%cd /content

/content


## Openpose

In [19]:
%%writefile /content/openpose.py
import cv2
import os
import shutil
os.chdir('/content/')
path_folder_image = '/content/our_data_folder/test/image/'
path_human_input = '/content/human_input/'
path_shirt_input = '/content/shirt_input/'
img = cv2.imread(os.path.join(path_human_input, os.listdir(path_human_input)[-1]))
print(os.path.join(path_human_input, os.listdir(path_human_input)[-1]))
img = cv2.resize(img, (768, 1024))
tmp_path_image = os.path.join(path_folder_image, os.listdir(path_human_input)[-1])
cv2.imwrite(tmp_path_image, img)

print('Hihi')

path_folder_cloth = '/content/our_data_folder/test/cloth/'
img = cv2.imread(os.path.join(path_shirt_input, os.listdir(path_shirt_input)[-1]))
img = cv2.resize(img, (768, 1024))
tmp_path_cloth = os.path.join(path_folder_cloth, os.listdir(path_shirt_input)[-1])
cv2.imwrite(tmp_path_cloth, img)

os.chdir('/content/')
shutil.copy(tmp_path_image, '/content/our_data_folder/test/cloth')
shutil.copy(tmp_path_image, '/content/our_data_folder/test/cloth-mask')
shutil.copy(tmp_path_cloth, path_folder_image)

Writing /content/openpose.py


In [20]:
%%writefile /content/openpose.sh
cd openpose && ./build/examples/openpose/openpose.bin --image_dir /content/our_data_folder/test/image --hand --disable_blending --display 0 --write_json /content/our_data_folder/test/openpose_json --write_images /content/our_data_folder/test/openpose_img --num_gpu 1 --num_gpu_start 0

Writing /content/openpose.sh


## Human parse

In [21]:
%%writefile /content/humanparse1.py
from PIL import Image
import cv2
import os
os.chdir('/content/CIHP_PGN')
if not os.path.exists('/content/CIHP_PGN/image_resize'):
  os.mkdir('/content/CIHP_PGN/image_resize')
file_name = os.listdir('/content/human_input/')[-1]
img = cv2.imread("/content/our_data_folder/test/image/" + file_name)
img_r = cv2.resize(img, (192, 256))
cv2.imwrite("/content/CIHP_PGN/image_resize/" + file_name, img_r)

if not os.path.exists('output'):
  os.mkdir('output')


Writing /content/humanparse1.py


In [22]:
%%writefile /content/humanparse1.sh
python /content/CIHP_PGN/inf_pgn.py --directory /content/CIHP_PGN/image_resize --output /content/CIHP_PGN/output

Writing /content/humanparse1.sh


In [23]:
%%writefile /content/humanparse2.py
import cv2
import torch
import torchvision.transforms as transforms
from PIL import Image, ImageFilter
import os
os.chdir('/content/CIHP_PGN')
file_name = os.listdir('/content/human_input/')[-1]

img = Image.open("/content/CIHP_PGN/output/cihp_parsing_maps/" + file_name[:-4] + ".png")
img_r = transforms.Resize(768, interpolation=0)(img)
img_s = img_r.filter(ImageFilter.ModeFilter(size=7))
img_s.save("/content/our_data_folder/test/image-parse-v3/" + file_name[:-3] + 'png')

img = Image.open("/content/CIHP_PGN/output/cihp_parsing_maps/" + file_name[:-4] + "_agn_vis.png")
img_r = transforms.Resize(768, interpolation=0)(img)
img_s = img_r.filter(ImageFilter.ModeFilter(size=7))
img_s.save("/content/our_data_folder/test/image-parse-agnostic-v3.2/" + file_name[:-3] + 'png')

# img = cv2.imread("/content/CIHP_PGN/output/cihp_parsing_maps/" + file_name[:-4] + "_vis.png")
# img_r = cv2.resize(img, (768, 1024))
# cv2.imwrite(("/content/our_data_folder/test/image-parse-v3/" + file_name[:-3] + 'png'), img_r)

# img = cv2.imread("/content/CIHP_PGN/output/cihp_parsing_maps/" + file_name[:-4] + "_agn.png")
# img_r = cv2.resize(img, (768, 1024))
# cv2.imwrite(("/content/our_data_folder/test/image-parse-agnostic-v3.2/" + file_name[:-3] + 'png'), img_r)


Writing /content/humanparse2.py


In [24]:
%%writefile /content/humanparse2.sh
python3 /content/HR-VITON/get_parse_agnostic.py --data_path /content/our_data_folder/test/ --output_path /content/our_data_folder/test/image-parse-agnostic-v3.2

Writing /content/humanparse2.sh


## Densepose

In [40]:
%%writefile /content/densepose1.py
import os
import shutil
import subprocess
os.chdir('/content/detectron2')
image_path = "/content/our_data_folder/test/image/"
file_name = os.listdir(image_path)[-1]
image_path = os.path.join(image_path, file_name)
cmd_str = f'python /content/detectron2/projects/DensePose/apply_net.py show /content/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl {image_path} dp_segm -v'
subprocess.run(cmd_str, shell=True)

Overwriting /content/densepose1.py


In [26]:
%%writefile /content/densepose.sh
#!/bin/bash

# Run Python script
python /content/detectron2/projects/DensePose/apply_net.py show /content/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl image_path dp_segm -v


Writing /content/densepose.sh


In [27]:
%%writefile /content/densepose2.py
import shutil
import os
image_path = "/content/our_data_folder/test/image/"
file_name = os.listdir("/content/human_input")[-1]
image_path = os.path.join(image_path, file_name)
shutil.copy('/content/detectron2/densepose_result.jpg', '/content/our_data_folder/test/image-densepose')

os.rename('/content/our_data_folder/test/image-densepose/densepose_result.jpg', '/content/our_data_folder/test/image-densepose/'+file_name)

Writing /content/densepose2.py


## Cloth mask

In [28]:
%%writefile /content/clothmask.py
from rembg import remove
import os
import cv2
from google.colab.patches import cv2_imshow
import numpy as np

file_name = os.listdir('/content/shirt_input')[-1]
image_path = os.path.join('/content/our_data_folder/test/cloth/', file_name)

img = cv2.imread(image_path)
output = remove(img)
img = output[:, :, :3]
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
masked = np.where(img == 0, 0, 255)
cv2_imshow(masked)

cv2.imwrite(os.path.join('/content/our_data_folder/test/cloth-mask/', file_name), masked)


Writing /content/clothmask.py


## VITON

In [73]:
%%writefile /content/viton.py
import os
import cv2
cloth_path = os.listdir('/content/our_data_folder/test/cloth')
image_path = os.listdir('/content/our_data_folder/test/image')
image_people = os.listdir('/content/human_input/')[-1]
image_cloth = os.listdir('/content/shirt_input/')[-1]
with open('/content/our_data_folder/test_pairs.txt', 'w+') as f:
  f.write(image_people + ' ' + image_cloth)

img_original = cv2.imread(os.path.join('/content/human_input/', image_people))
shape_original = img_original.shape
img_res = cv2.imread("/content/Output/aperson_ashirt.png")
img_r = cv2.resize(img_res, shape_original[:2][::-1])
cv2.imwrite('/content/res.png', img_r)

Overwriting /content/viton.py


In [30]:
%%writefile /content/viton.sh
python /content/HR-VITON/test_generator.py

Writing /content/viton.sh


In [31]:
# !rm -rf /content/our_data_folder/test/cloth
# !mkdir /content/our_data_folder/test/cloth
# !rm -rf /content/our_data_folder/test/cloth-mask
# !mkdir /content/our_data_folder/test/cloth-mask
# !rm -rf /content/our_data_folder/test/image
# !mkdir /content/our_data_folder/test/image

In [64]:
%%writefile /content/HR-VITON/cp_dataset_test.py
import torch
import torch.utils.data as data
import torchvision.transforms as transforms

from PIL import Image, ImageDraw

import os.path as osp
import numpy as np
import json


class CPDatasetTest(data.Dataset):
    """
        Test Dataset for CP-VTON.
    """
    def __init__(self, opt):
        super(CPDatasetTest, self).__init__()
        # base setting
        self.opt = opt
        self.root = opt.dataroot
        self.datamode = opt.datamode # train or test or self-defined
        self.data_list = opt.data_list
        self.fine_height = opt.fine_height
        self.fine_width = opt.fine_width
        self.semantic_nc = opt.semantic_nc
        self.data_path = osp.join(opt.dataroot, opt.datamode)
        self.transform = transforms.Compose([  \
                transforms.ToTensor(),   \
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        # load data list
        im_names = []
        c_names = []
        with open(osp.join(opt.dataroot, opt.data_list), 'r') as f:
            for line in f.readlines():
                im_name, c_name = line.strip().split()
                im_names.append(im_name)
                c_names.append(c_name)

        self.im_names = im_names
        self.c_names = dict()
        self.c_names['paired'] = im_names
        self.c_names['unpaired'] = c_names

    def name(self):
        return "CPDataset"
    def get_agnostic(self, im, im_parse, pose_data):
        parse_array = np.array(im_parse)
        parse_head = ((parse_array == 4).astype(np.float32) +
                      (parse_array == 13).astype(np.float32))
        parse_lower = ((parse_array == 9).astype(np.float32) +
                       (parse_array == 12).astype(np.float32) +
                       (parse_array == 16).astype(np.float32) +
                       (parse_array == 17).astype(np.float32) +
                       (parse_array == 18).astype(np.float32) +
                       (parse_array == 19).astype(np.float32))

        agnostic = im.copy()
        agnostic_draw = ImageDraw.Draw(agnostic)

        length_a = np.linalg.norm(pose_data[5] - pose_data[2])
        length_b = np.linalg.norm(pose_data[12] - pose_data[9])
        point = (pose_data[9] + pose_data[12]) / 2
        pose_data[9] = point + (pose_data[9] - point) / length_b * length_a
        pose_data[12] = point + (pose_data[12] - point) / length_b * length_a

        r = int(length_a / 16) + 1

        # mask torso
        for i in [9, 12]:
            pointx, pointy = pose_data[i]
            agnostic_draw.ellipse((pointx-r*2, pointy-r*5, pointx+r*2, pointy+r*5), 'gray', 'gray')
        agnostic_draw.line([tuple(pose_data[i]) for i in [2, 9]], 'gray', width=r*5)
        agnostic_draw.line([tuple(pose_data[i]) for i in [5, 12]], 'gray', width=r*5)
        agnostic_draw.line([tuple(pose_data[i]) for i in [9, 12]], 'gray', width=r*11)
        agnostic_draw.polygon([tuple(pose_data[i]) for i in [2, 5, 12, 9]], 'gray', 'gray')

        # mask neck
        pointx, pointy = pose_data[1]
        agnostic_draw.rectangle((pointx-r*2, pointy-r*4, pointx+r*2, pointy), 'gray', 'gray')

        # mask arms
        agnostic_draw.line([tuple(pose_data[i]) for i in [2, 5]], 'gray', width=r*11)
        for i in [2, 5]:
            pointx, pointy = pose_data[i]
            agnostic_draw.ellipse((pointx-r*4, pointy-r*5, pointx+r*4, pointy+r*5), 'gray', 'gray')
        for i in [3, 4, 6, 7]:
            if (pose_data[i-1, 0] == 0.0 and pose_data[i-1, 1] == 0.0) or (pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
                continue
            agnostic_draw.line([tuple(pose_data[j]) for j in [i - 1, i]], 'gray', width=r*10)
            pointx, pointy = pose_data[i]
            agnostic_draw.ellipse((pointx-r*4, pointy-r*4, pointx+r*4, pointy+r*4), 'gray', 'gray')

        for parse_id, pose_ids in [(14, [5, 6, 7]), (15, [2, 3, 4])]:
            mask_arm = Image.new('L', (768, 1024), 'white')
            mask_arm_draw = ImageDraw.Draw(mask_arm)
            pointx, pointy = pose_data[pose_ids[0]]
            mask_arm_draw.ellipse((pointx-r*4, pointy-r*5, pointx+r*4, pointy+r*5), 'black', 'black')
            for i in pose_ids[1:]:
                if (pose_data[i-1, 0] == 0.0 and pose_data[i-1, 1] == 0.0) or (pose_data[i, 0] == 0.0 and pose_data[i, 1] == 0.0):
                    continue
                mask_arm_draw.line([tuple(pose_data[j]) for j in [i - 1, i]], 'black', width=r*10)
                pointx, pointy = pose_data[i]
                if i != pose_ids[-1]:
                    mask_arm_draw.ellipse((pointx-r*4, pointy-r*4, pointx+r*4, pointy+r*4), 'black', 'black')
            mask_arm_draw.ellipse((pointx-r*3, pointy-r*3, pointx+r*3, pointy+r*3), 'black', 'black')

            parse_arm = (np.array(mask_arm) / 255) * (parse_array == parse_id).astype(np.float32)
            agnostic.paste(im, None, Image.fromarray(np.uint8(parse_arm * 255), 'L'))

        agnostic.paste(im, None, Image.fromarray(np.uint8(parse_head * 255), 'L'))
        agnostic.paste(im, None, Image.fromarray(np.uint8(parse_lower * 255), 'L'))
        return agnostic
    def __getitem__(self, index):
        im_name = self.im_names[index]
        c_name = {}
        c = {}
        cm = {}
        for key in self.c_names:
            c_name[key] = self.c_names[key][index]
            c[key] = Image.open(osp.join(self.data_path, 'cloth', c_name[key])).convert('RGB')
            c[key] = transforms.Resize(self.fine_width, interpolation=2)(c[key])
            cm[key] = Image.open(osp.join(self.data_path, 'cloth-mask', c_name[key]))
            cm[key] = transforms.Resize(self.fine_width, interpolation=0)(cm[key])

            c[key] = self.transform(c[key])  # [-1,1]
            cm_array = np.array(cm[key])
            cm_array = (cm_array >= 128).astype(np.float32)
            cm[key] = torch.from_numpy(cm_array)  # [0,1]
            cm[key].unsqueeze_(0)

        # person image
        im_pil_big = Image.open(osp.join(self.data_path, 'image', im_name))
        im_pil = transforms.Resize(self.fine_width, interpolation=2)(im_pil_big)
        
        im = self.transform(im_pil)

        # load parsing image
        parse_name = im_name.replace('.jpg', '.png')
        im_parse_pil_big = Image.open(osp.join(self.data_path, 'image-parse-v3', parse_name))
        im_parse_pil = transforms.Resize(self.fine_width, interpolation=0)(im_parse_pil_big)
        parse = torch.from_numpy(np.array(im_parse_pil)[None]).long()
        im_parse = self.transform(im_parse_pil.convert('RGB'))
        
        labels = {
            0:  ['background',  [0, 10]],
            1:  ['hair',        [1, 2]],
            2:  ['face',        [4, 13]],
            3:  ['upper',       [5, 6, 7]],
            4:  ['bottom',      [9, 12]],
            5:  ['left_arm',    [14]],
            6:  ['right_arm',   [15]],
            7:  ['left_leg',    [16]],
            8:  ['right_leg',   [17]],
            9:  ['left_shoe',   [18]],
            10: ['right_shoe',  [19]],
            11: ['socks',       [8]],
            12: ['noise',       [3, 11]]
        }

        parse_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
        parse_map = parse_map.scatter_(0, parse, 1.0)
        new_parse_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()
        
        for i in range(len(labels)):
            for label in labels[i][1]:
                new_parse_map[i] += parse_map[label]
        
        parse_onehot = torch.FloatTensor(1, self.fine_height, self.fine_width).zero_()
        for i in range(len(labels)):
            for label in labels[i][1]:
                parse_onehot[0] += parse_map[label] * i

        # load image-parse-agnostic
        image_parse_agnostic = Image.open(osp.join(self.data_path, 'image-parse-agnostic-v3.2', parse_name))
        image_parse_agnostic = transforms.Resize(self.fine_width, interpolation=0)(image_parse_agnostic)
        parse_agnostic = torch.from_numpy(np.array(image_parse_agnostic)[None]).long()
        image_parse_agnostic = self.transform(image_parse_agnostic.convert('RGB'))

        parse_agnostic_map = torch.FloatTensor(20, self.fine_height, self.fine_width).zero_()
        parse_agnostic_map = parse_agnostic_map.scatter_(0, parse_agnostic, 1.0)
        new_parse_agnostic_map = torch.FloatTensor(self.semantic_nc, self.fine_height, self.fine_width).zero_()
        for i in range(len(labels)):
            for label in labels[i][1]:
                new_parse_agnostic_map[i] += parse_agnostic_map[label]
                

        # parse cloth & parse cloth mask
        pcm = new_parse_map[3:4]
        im_c = im * pcm + (1 - pcm)
        
        # load pose points
        if 'jpg' in im_name:
            pose_name = im_name.replace('.jpg', '_rendered.png')
        else:
            pose_name = im_name.replace('.png', '_rendered.png')
        pose_map = Image.open(osp.join(self.data_path, 'openpose_img', pose_name))
        pose_map = transforms.Resize(self.fine_width, interpolation=2)(pose_map)
        pose_map = self.transform(pose_map)  # [-1,1]

        if 'jpg' in im_name:
            pose_name = im_name.replace('.jpg', '_keypoints.json')
        else:
            pose_name = im_name.replace('.png', '_keypoints.json')
        with open(osp.join(self.data_path, 'openpose_json', pose_name), 'r') as f:
            pose_label = json.load(f)
            pose_data = pose_label['people'][0]['pose_keypoints_2d']
            pose_data = np.array(pose_data)
            pose_data = pose_data.reshape((-1, 3))[:, :2]

        
        # load densepose
        densepose_name = im_name.replace('image', 'image-densepose')
        densepose_map = Image.open(osp.join(self.data_path, 'image-densepose', densepose_name))
        densepose_map = transforms.Resize(self.fine_width, interpolation=2)(densepose_map)
        densepose_map = self.transform(densepose_map)  # [-1,1]
        agnostic = self.get_agnostic(im_pil_big, im_parse_pil_big, pose_data)
        agnostic = transforms.Resize(self.fine_width, interpolation=2)(agnostic)
        agnostic = self.transform(agnostic)
        


        result = {
            'c_name':   c_name,     # for visualization
            'im_name':  im_name,    # for visualization or ground truth
            # intput 1 (clothfloww)
            'cloth':    c,          # for input
            'cloth_mask':     cm,   # for input
            # intput 2 (segnet)
            'parse_agnostic': new_parse_agnostic_map,
            'densepose': densepose_map,
            'pose': pose_map,       # for conditioning
            # GT
            'parse_onehot' : parse_onehot,  # Cross Entropy
            'parse': new_parse_map, # GAN Loss real
            'pcm': pcm,             # L1 Loss & vis
            'parse_cloth': im_c,    # VGG Loss & vis
            # visualization
            'image':    im,         # for visualization
            'agnostic' : agnostic
            }
        
        return result

    def __len__(self):
        return len(self.im_names)
    

class CPDataLoader(object):
    def __init__(self, opt, dataset):
        super(CPDataLoader, self).__init__()
        if opt.shuffle :
            train_sampler = torch.utils.data.sampler.RandomSampler(dataset)
        else:
            train_sampler = None

        self.data_loader = torch.utils.data.DataLoader(
                dataset, batch_size=opt.batch_size, shuffle=(train_sampler is None),
                num_workers=opt.workers, pin_memory=True, drop_last=True, sampler=train_sampler)
        self.dataset = dataset
        self.data_iter = self.data_loader.__iter__()

    def next_batch(self):
        try:
            batch = self.data_iter.__next__()
        except StopIteration:
            self.data_iter = self.data_loader.__iter__()
            batch = self.data_iter.__next__()

        return batch

Overwriting /content/HR-VITON/cp_dataset_test.py


# Run app

In [71]:
%%writefile app.py
import streamlit as st
import pickle
from PIL import Image
import subprocess
import sys
import os
import cv2
from google.colab.patches import cv2_imshow

def load_image():
    col1, col2 = st.columns(2)
    
    image_human_name = ''
    image_shirt_name = ''
    with col1:
        st.header("Your image")
        st.text('You need to upload your photo to try on the clothes')
        path = '/content/human_input/'
        option = st.selectbox(
        "How do you want to import photos?",
        ("Upload from your device", "Link human image"),
        )
        if option == 'Upload from your device':
            image_human = st.file_uploader(label='Pick an image of the human')
            if image_human is not None:
                image_human_name = 'aperson.jpg'
                with open(os.path.join(path,image_human_name),"wb+") as f:
                    f.write(image_human.getbuffer())
                image_human = Image.open(image_human)
                image_human = image_human.resize((200, 300))
                st.image(image_human)
        else:
            link_image = st.text_input(
                "Enter link of your image 👇",
            )
            if link_image:
                try:
                    response = requests.get(link_image)
                    if response.status_code == 200:
                        image_human = Image.open(BytesIO(response.content))
                        image_human = image_human.resize((200, 300))
                        st.image(link_image, caption='Valid image URL')
                        image_human_name = "a"
                        with open(os.path.join(path,image_human_name),"wb+") as f:
                            f.write(image_human.getbuffer())
                    else:
                        st.write('Invalid image URL')
                except:
                    st.write('Invalid image URL') 



    with col2:
        st.header("Your shirt")
        st.text('You need to upload a photo of the shirt you want to try on')
        path = '/content/shirt_input/'
        option = st.selectbox(
        "How do you want to import photos?",
        ("Upload from your device", "Link shirt image"),
        )
        if option == 'Upload from your device':
            image_shirt = st.file_uploader(label='Pick an image of the shirt')
            if image_shirt is not None:
                image_shirt_name = 'ashirt.png'
                print(image_shirt_name)
                with open(os.path.join(path, image_shirt_name),"wb+") as f:
                    f.write(image_shirt.getbuffer())
                image_shirt = Image.open(image_shirt)
                image_shirt = image_shirt.resize((200, 300))
                st.image(image_shirt)
        else:
            link_image = st.text_input(
                "Enter link of your shirt image 👇",
            )
            if link_image:
                try:
                    response = requests.get(link_image)
                    if response.status_code == 200:
                        image_shirt = Image.open(BytesIO(response.content))
                        image_shirt = image_shirt.resize((200, 300))
                        st.image(link_image, caption='Valid image URL')
                        image_shirt_name = "a"
                        with open(os.path.join(path, 'ashirt.png'),"wb+") as f:
                            f.write(image_shirt.getbuffer())
                    else:
                        st.write('Invalid image URL')
                except:
                    st.write('Invalid image URL')  
    return image_human_name, image_shirt_name

def load_result():
    st.header("Your result")
    st.image("https://static.streamlit.io/examples/dog.jpg")


def main():
    human, shirt = "", ""
    human, shirt = load_image()
    print(human, shirt)
    if st.button("Run application"):
        if human =="" or shirt =="":
            st.error('You must have human image and shirt image', icon="🚨")
        else:
            subprocess.run(["python", "openpose.py"])
            print("Hihi")
            subprocess.run(["bash", "openpose.sh"])
            subprocess.run(["python", "humanparse1.py"])
            subprocess.run(["bash", "humanparse1.sh"])
            subprocess.run(["python", "humanparse2.py"])
            subprocess.run(["bash", "humanparse2.sh"])
            subprocess.run(["bash", "humanparse3.sh"])
            subprocess.run(["python", "densepose1.py"])
            # subprocess.run(["bash", "densepose.sh"])
            subprocess.run(["python", "densepose2.py"])
            subprocess.run(["python", "clothmask.py"])
            subprocess.run(["python", "viton.py"])
            subprocess.run(["bash", "viton.sh"])
            st.header("Your result")
            if 'res.png' in os.listdir('/content'):
                img = Image.open('/content/res.png')
                st.image(img)
                os.remove('/content/shirt_input/'+shirt)
                os.remove('/content/human_input/'+human)

st.title("Minathon 2023")
st.header("Team name: mInAThoN 2o2E")
st.header("My project: Virtual Try-On")

if __name__=='__main__':
    main()

Overwriting app.py


In [72]:
!rm -rf /content/Output
!mkdir /content/Output

In [33]:
!pip install pyngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyngrok
  Downloading pyngrok-5.2.1.tar.gz (761 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m761.3/761.3 KB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-5.2.1-py3-none-any.whl size=19790 sha256=0022468e595a3944eecd66b4eb3210e71947264b62184ed6a7d2612d8188580c
  Stored in directory: /root/.cache/pip/wheels/f6/89/59/49d4249e00957e94813ac136a335d10ed2e09a856c5096f95c
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-5.2.1


In [34]:
from pyngrok import ngrok
ngrok.set_auth_token("2OAUGl1HDHvJlopyYozcvQj03mi_2z1yRNEHPY2QiTWtrCc7c") #ngrok.com



In [49]:
!nohup streamlit run app.py --server.port 80 &
url = ngrok.connect(port = '80')
print(url)

nohup: appending output to 'nohup.out'
NgrokTunnel: "http://27d5-34-142-179-200.ngrok-free.app" -> "http://localhost:80"


In [47]:
import subprocess
subprocess.run(["python", "openpose.py"])
print("Hihi")
subprocess.run(["bash", "openpose.sh"])
subprocess.run(["python", "humanparse1.py"])
subprocess.run(["bash", "humanparse1.sh"])
subprocess.run(["python", "humanparse2.py"])
subprocess.run(["bash", "humanparse2.sh"])
# subprocess.run(["bash", "humanparse3.sh"])
subprocess.run(["python", "densepose1.py"])
# subprocess.run(["bash", "densepose.sh"])
subprocess.run(["python", "densepose2.py"])
subprocess.run(["python", "clothmask.py"])
subprocess.run(["python", "viton.py"])
subprocess.run(["bash", "viton.sh"])

Hihi


CompletedProcess(args=['bash', 'viton.sh'], returncode=1)