## Preparations

In [None]:
!git clone https://github.com/yiranran/Audio-driven-TalkingFace-HeadPose.git

In [None]:
cd Audio-driven-TalkingFace-HeadPose/

### Install python packages

In [None]:
pip install -r requirements_colab.txt

### Use octave instead of matlab

In [None]:
!apt-get update

In [None]:
!apt install octave liboctave-dev

In [None]:
!wget https://nchc.dl.sourceforge.net/project/octave/Octave%20Forge%20Packages/Individual%20Package%20Releases/image-2.12.0.tar.gz

In [None]:
!octave --eval "pkg install image-2.12.0.tar.gz"

### Mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

### Copy pre-trained models here and unzip them..

In [None]:
!unzip /content/gdrive/My\ Drive/TalkingFace/Models.zip
!cp -r Models/Audio .
!cp -r Models/Deep3DFaceReconstruction .
!cp -r Models/render-to-video .

## Fine-tuning on a person

### Extract frames

Use the 25fps example video 31.mp4 as an example.
For your video, need to convert to 25 fps using ffmpeg, and rename to [number].mp4

In [None]:
!cd Data/; python extract_frame1.py 31.mp4

### Something wrong with tensorflow-gpu, better uninstall and install again...

In [None]:
!pip list | grep tensorflow

In [None]:
!pip uninstall tensorflow tensorflow-gpu

In [None]:
!pip install tensorflow-gpu==1.14.0

### Build tf_mesh_renderer

In [None]:
!cp /usr/local/lib/python3.6/dist-packages/tensorflow/libtensorflow_framework.so.1 /usr/lib/
!cd /usr/lib/ && ln -s libtensorflow_framework.so.1 libtensorflow_framework.so
!cd Deep3DFaceReconstruction/tf_mesh_renderer/mesh_renderer/kernels/;\
  g++ -std=c++11 -shared rasterize_triangles_grad.cc rasterize_triangles_op.cc rasterize_triangles_impl.cc rasterize_triangles_impl.h -o rasterize_triangles_kernel.so -fPIC -D_GLIBCXX_USE_CXX11_ABI=0 -I /usr/local/lib/python3.6/dist-packages/tensorflow/include -I /usr/local/lib/python3.6/dist-packages/tensorflow/include/external/nsync/public -L /usr/local/lib/python3.6/dist-packages/tensorflow -ltensorflow_framework -O2

### Edit rasterize_triangles.py using pycat and %%writefile

28 line: change to os.path.join('/content/Audio-driven-TalkingFace-HeadPose/Deep3DFaceReconstruction',

In [None]:
pycat Deep3DFaceReconstruction/tf_mesh_renderer/mesh_renderer/rasterize_triangles.py

In [None]:
%%writefile Deep3DFaceReconstruction/tf_mesh_renderer/mesh_renderer/rasterize_triangles.py
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Differentiable triangle rasterizer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

from . import camera_utils

rasterize_triangles_module = tf.load_op_library(
    #os.path.join(os.environ['TEST_SRCDIR'],
    os.path.join('/content/Audio-driven-TalkingFace-HeadPose/Deep3DFaceReconstruction',
    'tf_mesh_renderer/mesh_renderer/kernels/rasterize_triangles_kernel.so'))


def rasterize(world_space_vertices, attributes, triangles, camera_matrices,
              image_width, image_height, background_value):
  """Rasterizes a mesh and computes interpolated vertex attributes.

  Applies projection matrices and then calls rasterize_clip_space().

  Args:
    world_space_vertices: 3-D float32 tensor of xyz positions with shape
      [batch_size, vertex_count, 3].
    attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
      attribute_count]. Each vertex attribute is interpolated across the
      triangle using barycentric interpolation.
    triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
      should contain vertex indices describing a triangle such that the
      triangle's normal points toward the viewer if the forward order of the
      triplet defines a clockwise winding of the vertices. Gradients with
      respect to this tensor are not available.
    camera_matrices: 3-D float tensor with shape [batch_size, 4, 4] containing
      model-view-perspective projection matrices.
    image_width: int specifying desired output image width in pixels.
    image_height: int specifying desired output image height in pixels.
    background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
      that lie outside all triangles take this value.

  Returns:
    A 4-D float32 tensor with shape [batch_size, image_height, image_width,
    attribute_count], containing the interpolated vertex attributes at
    each pixel.

  Raises:
    ValueError: An invalid argument to the method is detected.
  """
  clip_space_vertices = camera_utils.transform_homogeneous(
      camera_matrices, world_space_vertices)
  return rasterize_clip_space(clip_space_vertices, attributes, triangles,
                              image_width, image_height, background_value)


def rasterize_clip_space(clip_space_vertices, attributes, triangles,
                         image_width, image_height, background_value):
  """Rasterizes the input mesh expressed in clip-space (xyzw) coordinates.

  Interpolates vertex attributes using perspective-correct interpolation and
  clips triangles that lie outside the viewing frustum.

  Args:
    clip_space_vertices: 3-D float32 tensor of homogenous vertices (xyzw) with
      shape [batch_size, vertex_count, 4].
    attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
      attribute_count]. Each vertex attribute is interpolated across the
      triangle using barycentric interpolation.
    triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
      should contain vertex indices describing a triangle such that the
      triangle's normal points toward the viewer if the forward order of the
      triplet defines a clockwise winding of the vertices. Gradients with
      respect to this tensor are not available.
    image_width: int specifying desired output image width in pixels.
    image_height: int specifying desired output image height in pixels.
    background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
      that lie outside all triangles take this value.

  Returns:
    A 4-D float32 tensor with shape [batch_size, image_height, image_width,
    attribute_count], containing the interpolated vertex attributes at
    each pixel.

  Raises:
    ValueError: An invalid argument to the method is detected.
  """
  if not image_width > 0:
    raise ValueError('Image width must be > 0.')
  if not image_height > 0:
    raise ValueError('Image height must be > 0.')
  if len(clip_space_vertices.shape) != 3:
    raise ValueError('The vertex buffer must be 3D.')

  vertex_count = clip_space_vertices.shape[1].value

  batch_size = tf.shape(clip_space_vertices)[0]
  
  per_image_barycentric_coordinates = tf.TensorArray(dtype=tf.float32,
    size=batch_size)
  per_image_vertex_ids = tf.TensorArray(dtype=tf.int32, size=batch_size)

  def batch_loop_condition(b, *args):
    return b < batch_size

  def batch_loop_iteration(b, per_image_barycentric_coordinates,
    per_image_vertex_ids):
    barycentric_coords, triangle_ids, _ = (
        rasterize_triangles_module.rasterize_triangles(
            clip_space_vertices[b, :, :], triangles, image_width,
            image_height))
    per_image_barycentric_coordinates = \
      per_image_barycentric_coordinates.write(
        b, tf.reshape(barycentric_coords, [-1, 3]))

    vertex_ids = tf.gather(triangles, tf.reshape(triangle_ids, [-1]))
    reindexed_ids = tf.add(vertex_ids, b * clip_space_vertices.shape[1].value)
    per_image_vertex_ids = per_image_vertex_ids.write(b, reindexed_ids)

    return b+1, per_image_barycentric_coordinates, per_image_vertex_ids

  b = tf.constant(0)
  _, per_image_barycentric_coordinates, per_image_vertex_ids = tf.while_loop(
    batch_loop_condition, batch_loop_iteration,
    [b, per_image_barycentric_coordinates, per_image_vertex_ids])

  barycentric_coordinates = tf.reshape(
    per_image_barycentric_coordinates.stack(), [-1, 3])
  vertex_ids = tf.reshape(per_image_vertex_ids.stack(), [-1, 3])

  # Indexes with each pixel's clip-space triangle's extrema (the pixel's
  # 'corner points') ids to get the relevant properties for deferred shading.
  flattened_vertex_attributes = tf.reshape(attributes,
                                           [batch_size * vertex_count, -1])
  corner_attributes = tf.gather(flattened_vertex_attributes, vertex_ids)

  # Computes the pixel attributes by interpolating the known attributes at the
  # corner points of the triangle interpolated with the barycentric coordinates.
  weighted_vertex_attributes = tf.multiply(
      corner_attributes, tf.expand_dims(barycentric_coordinates, axis=2))
  summed_attributes = tf.reduce_sum(weighted_vertex_attributes, axis=1)
  attribute_images = tf.reshape(summed_attributes,
                                [batch_size, image_height, image_width, -1])

  # Barycentric coordinates should approximately sum to one where there is
  # rendered geometry, but be exactly zero where there is not.
  alphas = tf.clip_by_value(
      tf.reduce_sum(2.0 * barycentric_coordinates, axis=1), 0.0, 1.0)
  alphas = tf.reshape(alphas, [batch_size, image_height, image_width, 1])

  attributes_with_background = (
      alphas * attribute_images + (1.0 - alphas) * background_value)

  return attributes_with_background


@tf.RegisterGradient('RasterizeTriangles')
def _rasterize_triangles_grad(op, df_dbarys, df_dids, df_dz):
  # Gradients are only supported for barycentric coordinates. Gradients for the
  # z-buffer are not currently implemented. If you need gradients w.r.t. z,
  # include z as a vertex attribute when calling rasterize_triangles.
  del df_dids, df_dz
  return rasterize_triangles_module.rasterize_triangles_grad(
      op.inputs[0], op.inputs[1], op.outputs[0], op.outputs[1], df_dbarys,
      op.get_attr('image_width'), op.get_attr('image_height')), None

In [None]:
%%writefile Audio/code/mesh_renderer/rasterize_triangles.py
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Differentiable triangle rasterizer."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf

from . import camera_utils

rasterize_triangles_module = tf.load_op_library(
    #os.path.join(os.environ['TEST_SRCDIR'],
    os.path.join('/content/Audio-driven-TalkingFace-HeadPose/Deep3DFaceReconstruction',
    'tf_mesh_renderer/mesh_renderer/kernels/rasterize_triangles_kernel.so'))


def rasterize(world_space_vertices, attributes, triangles, camera_matrices,
              image_width, image_height, background_value):
  """Rasterizes a mesh and computes interpolated vertex attributes.

  Applies projection matrices and then calls rasterize_clip_space().

  Args:
    world_space_vertices: 3-D float32 tensor of xyz positions with shape
      [batch_size, vertex_count, 3].
    attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
      attribute_count]. Each vertex attribute is interpolated across the
      triangle using barycentric interpolation.
    triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
      should contain vertex indices describing a triangle such that the
      triangle's normal points toward the viewer if the forward order of the
      triplet defines a clockwise winding of the vertices. Gradients with
      respect to this tensor are not available.
    camera_matrices: 3-D float tensor with shape [batch_size, 4, 4] containing
      model-view-perspective projection matrices.
    image_width: int specifying desired output image width in pixels.
    image_height: int specifying desired output image height in pixels.
    background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
      that lie outside all triangles take this value.

  Returns:
    A 4-D float32 tensor with shape [batch_size, image_height, image_width,
    attribute_count], containing the interpolated vertex attributes at
    each pixel.

  Raises:
    ValueError: An invalid argument to the method is detected.
  """
  clip_space_vertices = camera_utils.transform_homogeneous(
      camera_matrices, world_space_vertices)
  return rasterize_clip_space(clip_space_vertices, attributes, triangles,
                              image_width, image_height, background_value)


def rasterize_clip_space(clip_space_vertices, attributes, triangles,
                         image_width, image_height, background_value):
  """Rasterizes the input mesh expressed in clip-space (xyzw) coordinates.

  Interpolates vertex attributes using perspective-correct interpolation and
  clips triangles that lie outside the viewing frustum.

  Args:
    clip_space_vertices: 3-D float32 tensor of homogenous vertices (xyzw) with
      shape [batch_size, vertex_count, 4].
    attributes: 3-D float32 tensor with shape [batch_size, vertex_count,
      attribute_count]. Each vertex attribute is interpolated across the
      triangle using barycentric interpolation.
    triangles: 2-D int32 tensor with shape [triangle_count, 3]. Each triplet
      should contain vertex indices describing a triangle such that the
      triangle's normal points toward the viewer if the forward order of the
      triplet defines a clockwise winding of the vertices. Gradients with
      respect to this tensor are not available.
    image_width: int specifying desired output image width in pixels.
    image_height: int specifying desired output image height in pixels.
    background_value: a 1-D float32 tensor with shape [attribute_count]. Pixels
      that lie outside all triangles take this value.

  Returns:
    A 4-D float32 tensor with shape [batch_size, image_height, image_width,
    attribute_count], containing the interpolated vertex attributes at
    each pixel.

  Raises:
    ValueError: An invalid argument to the method is detected.
  """
  if not image_width > 0:
    raise ValueError('Image width must be > 0.')
  if not image_height > 0:
    raise ValueError('Image height must be > 0.')
  if len(clip_space_vertices.shape) != 3:
    raise ValueError('The vertex buffer must be 3D.')

  vertex_count = clip_space_vertices.shape[1].value

  batch_size = tf.shape(clip_space_vertices)[0]
  
  per_image_barycentric_coordinates = tf.TensorArray(dtype=tf.float32,
    size=batch_size)
  per_image_vertex_ids = tf.TensorArray(dtype=tf.int32, size=batch_size)

  def batch_loop_condition(b, *args):
    return b < batch_size

  def batch_loop_iteration(b, per_image_barycentric_coordinates,
    per_image_vertex_ids):
    barycentric_coords, triangle_ids, _ = (
        rasterize_triangles_module.rasterize_triangles(
            clip_space_vertices[b, :, :], triangles, image_width,
            image_height))
    per_image_barycentric_coordinates = \
      per_image_barycentric_coordinates.write(
        b, tf.reshape(barycentric_coords, [-1, 3]))

    vertex_ids = tf.gather(triangles, tf.reshape(triangle_ids, [-1]))
    reindexed_ids = tf.add(vertex_ids, b * clip_space_vertices.shape[1].value)
    per_image_vertex_ids = per_image_vertex_ids.write(b, reindexed_ids)

    return b+1, per_image_barycentric_coordinates, per_image_vertex_ids

  b = tf.constant(0)
  _, per_image_barycentric_coordinates, per_image_vertex_ids = tf.while_loop(
    batch_loop_condition, batch_loop_iteration,
    [b, per_image_barycentric_coordinates, per_image_vertex_ids])

  barycentric_coordinates = tf.reshape(
    per_image_barycentric_coordinates.stack(), [-1, 3])
  vertex_ids = tf.reshape(per_image_vertex_ids.stack(), [-1, 3])

  # Indexes with each pixel's clip-space triangle's extrema (the pixel's
  # 'corner points') ids to get the relevant properties for deferred shading.
  flattened_vertex_attributes = tf.reshape(attributes,
                                           [batch_size * vertex_count, -1])
  corner_attributes = tf.gather(flattened_vertex_attributes, vertex_ids)

  # Computes the pixel attributes by interpolating the known attributes at the
  # corner points of the triangle interpolated with the barycentric coordinates.
  weighted_vertex_attributes = tf.multiply(
      corner_attributes, tf.expand_dims(barycentric_coordinates, axis=2))
  summed_attributes = tf.reduce_sum(weighted_vertex_attributes, axis=1)
  attribute_images = tf.reshape(summed_attributes,
                                [batch_size, image_height, image_width, -1])

  # Barycentric coordinates should approximately sum to one where there is
  # rendered geometry, but be exactly zero where there is not.
  alphas = tf.clip_by_value(
      tf.reduce_sum(2.0 * barycentric_coordinates, axis=1), 0.0, 1.0)
  alphas = tf.reshape(alphas, [batch_size, image_height, image_width, 1])

  attributes_with_background = (
      alphas * attribute_images + (1.0 - alphas) * background_value)

  return attributes_with_background


@tf.RegisterGradient('RasterizeTriangles')
def _rasterize_triangles_grad(op, df_dbarys, df_dids, df_dz):
  # Gradients are only supported for barycentric coordinates. Gradients for the
  # z-buffer are not currently implemented. If you need gradients w.r.t. z,
  # include z as a vertex attribute when calling rasterize_triangles.
  del df_dids, df_dz
  return rasterize_triangles_module.rasterize_triangles_grad(
      op.inputs[0], op.inputs[1], op.outputs[0], op.outputs[1], df_dbarys,
      op.get_attr('image_width'), op.get_attr('image_height')), None

### Run 3D face reconstruction

In [None]:
!cd Deep3DFaceReconstruction/; CUDA_VISIBLE_DEVICES=0 python demo_19news.py ../Data/31

### Finetune audio net

In [None]:
!cd Audio/code/; python train_19news_1.py 31 0

### Edit render-to-video/train_19news_1.py using pycat and %%writefile

change "matlab -nojvm -nosplash -nodesktop -nodisplay -r" to "octave --eval"

In [None]:
pycat render-to-video/train_19news_1.py

In [None]:
%%writefile render-to-video/train_19news_1.py
import os, sys, glob

def get_news(n):
        trainN=300; testN=100
        video = '19_news/'+str(n);name = str(n)+'_bmold_win3';start = 0;
        print(video,name)

        rootdir = os.path.join(os.getcwd(),'../Deep3DFaceReconstruction/output/render/')
        srcdir = os.path.join(rootdir,video)
        srcdir2 = srcdir.replace(video,video+'/bm')

        if 'bmold' not in name:
                cmd = "cd "+rootdir+"/..; octave --eval \"pkg load image; alpha_blend_news('" + video + "'," + str(start) + "," + str(trainN+testN) + "); quit;\""
        else:
                cmd = "cd "+rootdir+"/..; octave --eval \"pkg load image; alpha_blend_newsold('" + video + "'," + str(start) + "," + str(trainN+testN) + "); quit;\""
        os.system(cmd)
        if not os.path.exists('datasets/list/trainA'):
                os.makedirs('datasets/list/trainA')
        if not os.path.exists('datasets/list/trainB'):
                os.makedirs('datasets/list/trainB')
        f1 = open('datasets/list/trainA/%s.txt'%name,'w')
        f2 = open('datasets/list/trainB/%s.txt'%name,'w')
        if 'win3' in name:
                start1 = start + 2
        else:
                start1 = start
        for i in range(start1,start+trainN):
                if 'bmold' not in name:
                        print(os.path.join(srcdir2,'frame%d_render_bm.png'%i),file=f1)
                else:
                        print(os.path.join(srcdir2,'frame%d_renderold_bm.png'%i),file=f1)
                print(os.path.join(srcdir,'frame%d.png'%i),file=f2)
        f1.close()
        f2.close()
        if not os.path.exists('datasets/list/testA'):
                os.makedirs('datasets/list/testA')
        if not os.path.exists('datasets/list/testB'):
                os.makedirs('datasets/list/testB')
        f1 = open('datasets/list/testA/%s.txt'%name,'w')
        f2 = open('datasets/list/testB/%s.txt'%name,'w')
        for i in range(start+trainN,start+trainN+testN):
                if 'bmold' not in name:
                        print(os.path.join(srcdir2,'frame%d_render_bm.png'%i),file=f1)
                else:
                        print(os.path.join(srcdir2,'frame%d_renderold_bm.png'%i),file=f1)
                print(os.path.join(srcdir,'frame%d.png'%i),file=f2)
        f1.close()
        f2.close()

def save_each_60(folder):
        pths = sorted(glob.glob(folder+'/*.pth'))
        for pth in pths:
                epoch = os.path.basename(pth).split('_')[0]
                if epoch == '60':
                        continue
                os.remove(pth)

n = int(sys.argv[1])
gpu_id = int(sys.argv[2])

# prepare training data, and write two txt as training list
get_news(n)

# prepare arcface feature
cmd = 'cd arcface/; python test_batch.py --imglist trainB/%d_bmold_win3.txt --gpu %d' % (n,gpu_id)
os.system(cmd)
cmd = 'cd arcface/; python test_batch.py --imglist testB/%d_bmold_win3.txt --gpu %d' % (n,gpu_id)
os.system(cmd)


# fine tune the mapping
n = str(n)
cmd = 'python train.py --dataroot %s_bmold_win3 --name memory_seq_p2p/%s --model memory_seq --continue_train --epoch 0 --epoch_count 1 --lambda_mask 2 --lr 0.0001 --display_env memory_seq_%s --gpu_ids %d --niter 60 --niter_decay 0' % (n,n,n,gpu_id)
os.system(cmd)
save_each_60('checkpoints/memory_seq_p2p/%s'%n)

epoch = 60
cmd = 'python test.py --dataroot %s_bmold_win3 --name memory_seq_p2p/%s --model memory_seq --num_test 200 --epoch %d --gpu_ids %d --imagefolder images%d' % (n,n,epoch,gpu_id,epoch)
os.system(cmd)

### Finetune gan

Finetune on the target person

In [None]:
!cd render-to-video/; python train_19news_1.py 31 0

## Test

### Change matlab in test_personalized.py & test_personalized2.py to octave

In [None]:
pycat Audio/code/test_personalized.py

In [None]:
%%writefile Audio/code/test_personalized.py
#encoding:utf-8
#test different audio
import os
from choose_bg_gexinghua2_reassign import choose_bg_gexinghua2_reassign2
from trans_with_bigbg import merge_with_bigbg
import glob
import pdb
from PIL import Image
import numpy as np
import sys

def getsingle(srcdir,name,varybg=0,multi=0):
        srcroot = os.getcwd()
        if not varybg:
                imgs = glob.glob(os.path.join(srcroot,srcdir,'*_blend.png'))
                print('srcdir',os.path.join(srcroot,srcdir,'*_blend.png'))
        else:
                imgs = glob.glob(os.path.join(srcroot,srcdir,'*_blend2.png'))
                print('srcdir',os.path.join(srcroot,srcdir,'*_blend2.png'))
        if not os.path.exists('../../render-to-video/datasets/list/testSingle'):
                os.makedirs('../../render-to-video/datasets/list/testSingle')
        f1 = open('../../render-to-video/datasets/list/testSingle/%s.txt'%name,'w')
        imgs = sorted(imgs)
        if multi:
                imgs = imgs[2:]
        for im in imgs:
                print(im, file=f1)
        f1.close()

gpu_id = 0 if len(sys.argv) < 4 else int(sys.argv[3])
start=0;ganepoch=60;audioepoch=99


audiobasen=sys.argv[1]
n = int(sys.argv[2])#person id

if __name__ == "__main__":
        person = str(n)
        if os.path.exists(os.path.join('../audio/',audiobasen+'.wav')):
                in_file = os.path.join('../audio/',audiobasen+'.wav')
        elif os.path.exists(os.path.join('../audio/',audiobasen+'.mp3')):
                in_file = os.path.join('../audio/',audiobasen+'.mp3')
        else:
                print('audio file not exists, please put in %s'%os.path.join(os.getcwd(),'../audio'))
                exit(-1)

        audio_exp_name = 'atcnet_pose0_con3/'+person
        audiomodel=os.path.join(audio_exp_name,audiobasen+'_%d'%audioepoch)
        sample_dir = os.path.join('../results/',audiomodel)
        ganmodel='memory_seq_p2p/%s'%person;post='_full9'
        pingyi = 1;
        seq='rseq_'+person+'_'+audiobasen+post
        if audioepoch == 49:
                seq='rseq_'+person+'_'+audiobasen+'_%d%s'%(audioepoch,post)


        ## 1.audio to 3dmm
        if not os.path.exists(sample_dir+'/00000.npy'):
                add = '--model_name ../model/%s/atcnet_lstm_%d.pth --pose 1 --relativeframe 0' % (audio_exp_name,audioepoch)
                print('python atcnet_test1.py --device_ids %d %s --sample_dir %s --in_file %s' % (gpu_id,add,sample_dir,in_file))
                os.system('python atcnet_test1.py --device_ids %d %s --sample_dir %s --in_file %s' % (gpu_id,add,sample_dir,in_file))

        ## 2.background matching
        speed=1
        num = 300
        bgdir = choose_bg_gexinghua2_reassign2('19_news/'+person, audiobasen, start, audiomodel, num=num, tran=pingyi, speed=speed)


        ## 3.render to save_dir
        coeff_dir = os.path.join(sample_dir,'reassign')
        rootdir = '../../Deep3DFaceReconstruction/output/coeff/'
        tex2_path = ''
        coef_path1 = rootdir+'19_news/'+person+'/frame%d.mat'%start
        save_dir = os.path.join(sample_dir,'R_%s_reassign2'%person)
        relativeframe = 2
        os.system('CUDA_VISIBLE_DEVICES=%d python render_for_view2.py %s %s %s %d %d %s'%(gpu_id,coeff_dir,coef_path1,save_dir, relativeframe,pingyi,tex2_path))


        ## 4.blend rendered with background
        srcdir = save_dir
        #if not os.path.exists(save_dir+'/00000_blend2.png'):
        cmd = "cd ../results; octave --eval \"pkg load image; alpha_blend_vbg('" + bgdir + "','" + srcdir + "'); quit;\""
        os.system(cmd)

        ## 5.gan
        sample_dir2 = '../../render-to-video/results/%s/test_%d/images%s/'%(ganmodel,ganepoch,seq)
        #if not os.path.exists(sample_dir2):
        getsingle(save_dir,seq,1,1)
        os.system('cd ../../render-to-video; python test_memory.py --dataroot %s --name %s --netG unetac_adain_256 --model test --Nw 3 --norm batch --dataset_mode single_multi --use_memory 1 --attention 1 --num_test 10000 --epoch %d --gpu_ids %d --imagefolder images%s'%(seq,ganmodel,ganepoch,gpu_id,seq))


        os.system('cp '+sample_dir2+'/R_'+person+'_reassign2-00002_blend2_fake.png '+sample_dir2+'/R_'+person+'_reassign2-00000_blend2_fake.png')
        os.system('cp '+sample_dir2+'/R_'+person+'_reassign2-00002_blend2_fake.png '+sample_dir2+'/R_'+person+'_reassign2-00001_blend2_fake.png')
        
        video_name = os.path.join(sample_dir,'%s_%swav_results%s.mp4'%(person,audiobasen,post))
        command = 'ffmpeg -loglevel panic -framerate 25  -i ' + sample_dir2 +  '/R_' + person + '_reassign2-%05d_blend2_fake.png -c:v libx264 -y -vf format=yuv420p ' + video_name
        os.system(command)
        command = 'ffmpeg -loglevel panic -i ' + video_name + ' -i ' + in_file + ' -vcodec copy  -acodec copy -y  ' + video_name.replace('.mp4','.mov')
        os.system(command)
        os.remove(video_name)
        print('saved to',video_name.replace('.mp4','.mov'))

        merge_with_bigbg(audiobasen,n)

In [None]:
pycat Audio/code/test_personalized2.py

In [None]:
%%writefile Audio/code/test_personalized2.py
#encoding:utf-8
#pose from short video
import os
from trans_with_bigbg import merge_with_bigbg
import glob
import pdb
from PIL import Image
import numpy as np
import sys
from scipy.io import loadmat,savemat
import math
import shutil

def getsingle(srcdir,name,varybg=0,multi=0):
        srcroot = os.getcwd()
        if not varybg:
                imgs = glob.glob(os.path.join(srcroot,srcdir,'*_blend.png'))
                print('srcdir',os.path.join(srcroot,srcdir,'*_blend.png'))
        else:
                imgs = glob.glob(os.path.join(srcroot,srcdir,'*_blend2.png'))
                print('srcdir',os.path.join(srcroot,srcdir,'*_blend2.png'))
        if not os.path.exists('../../render-to-video/datasets/list/testSingle'):
                os.makedirs('../../render-to-video/datasets/list/testSingle')
        f1 = open('../../render-to-video/datasets/list/testSingle/%s.txt'%name,'w')
        imgs = sorted(imgs)
        if multi:
                imgs = imgs[2:]
        for im in imgs:
                print(im, file=f1)
        f1.close()

def dreassign2(video, audio, start, audiomodel='', num=300, debug=0, tran=0):
        print(video,audio,start,audiomodel)
        rootdir = '../..//Deep3DFaceReconstruction/'
        matdir = os.path.join(rootdir,'output/coeff',video)
        pngdir = os.path.join(rootdir,'output/render',video)
        L = 64
        folder_to_process = '../results/' + audiomodel
        files = sorted(glob.glob(os.path.join(folder_to_process,'*.npy')))
        tardir = os.path.join('../results/chosenbg','%s_%s'%(audio,video))
        if audiomodel != '':
                tardir = os.path.join('../results/chosenbg','%s_%s_%s'%(audio,video,audiomodel.replace('/','_')))
        tardir2 = os.path.join(tardir, 'reassign')
        print(tardir2)
        if not os.path.exists(tardir2):
                os.makedirs(tardir2)

        sucai = np.zeros((num,6))
        lm_5p = np.zeros((num,2))
        for i in range(start,start+num):
                coeff = loadmat(os.path.join(matdir,'frame%d.mat')%i)
                sucai[i-start,:3] = coeff['coeff'][:,224:227]
                sucai[i-start,3:] = coeff['coeff'][:,254:257]
                if tran:
                        lm_5p[i-start,:] = np.mean(coeff['lm_5p'],axis=0)
        N = len(files)
        datas = np.zeros((N,3))
        datasall = np.zeros((N,70))
        for i in range(N):
                temp = np.load(files[i])
                datas[i] = temp[L:L+3]
                datasall[i] = temp
        # reassign
        assigns = [0] * N
        for i in range(N):
                p = math.floor(i/num) % 2
                if p == 0:
                        assigns[i] = i%num
                else:
                        assigns[i] = num-1-(i%num)
        print(assigns)
        if not os.path.exists(folder_to_process+'/reassign'):
                os.mkdir(folder_to_process+'/reassign')
        for i in range(N):
                if tran == 0:
                        data = datasall[i]
                        data[L:L+6] = sucai[assigns[i]]
                else:
                        data = np.zeros((L+9))
                        data[:L] = datasall[i,:L]
                        data[L:L+6] = sucai[assigns[i]]
                        data[L+6:L+8] = lm_5p[assigns[i]]
                        data[L+8] = assigns[i]+start
                savename = os.path.join(folder_to_process,'reassign','%05d.npy'%i)
                np.save(savename, data)
                if tran == 0 or tran == 2:
                        shutil.copy(os.path.join(pngdir,'frame%d.png'%(assigns[i]+start)),
                                os.path.join(tardir2,'%05d.png'%i))
                elif tran == 1:
                        shutil.copy(os.path.join(pngdir,'frame%d_input2.png'%(assigns[i]+start)),
                                os.path.join(tardir2,'%05d.png'%i))
        
        if debug:
                os.system('ffmpeg -loglevel panic -framerate 25 -i ' + tardir2 + '/%05d.png -c:v libx264 -y -vf format=yuv420p ' + tardir2 + '.mp4')
        
        return tardir2

gpu_id = 0 if len(sys.argv) < 4 else int(sys.argv[3])
start=0;ganepoch=60;audioepoch=99


audiobasen=sys.argv[1]
n = int(sys.argv[2])#person id

if __name__ == "__main__":
        person = str(n)
        if os.path.exists(os.path.join('../audio/',audiobasen+'.wav')):
                in_file = os.path.join('../audio/',audiobasen+'.wav')
        elif os.path.exists(os.path.join('../audio/',audiobasen+'.mp3')):
                in_file = os.path.join('../audio/',audiobasen+'.mp3')
        else:
                print('audio file not exists, please put in %s'%os.path.join(os.getcwd(),'../audio'))
                exit(-1)

        audio_exp_name = 'atcnet_pose0_con3/'+person
        audiomodel=os.path.join(audio_exp_name,audiobasen+'_%d'%audioepoch)
        sample_dir = os.path.join('../results/',audiomodel)
        ganmodel='memory_seq_p2p/%s'%person;post='_full9'
        pingyi = 1;
        seq='rseq_'+person+'_'+audiobasen+post
        if audioepoch == 49:
                seq='rseq_'+person+'_'+audiobasen+'_%d%s'%(audioepoch,post)


        ## 1.audio to 3dmm
        if not os.path.exists(sample_dir+'/00000.npy'):
                add = '--model_name ../model/%s/atcnet_lstm_%d.pth --pose 1 --relativeframe 0' % (audio_exp_name,audioepoch)
                print('python atcnet_test1.py --device_ids %d %s --sample_dir %s --in_file %s' % (gpu_id,add,sample_dir,in_file))
                os.system('python atcnet_test1.py --device_ids %d %s --sample_dir %s --in_file %s' % (gpu_id,add,sample_dir,in_file))

        ## 2.background matching
        num = 300
        bgdir = dreassign2('19_news/'+person, audiobasen, start, audiomodel, num=num, tran=pingyi)


        ## 3.render to save_dir
        coeff_dir = os.path.join(sample_dir,'reassign')
        rootdir = '../../Deep3DFaceReconstruction/output/coeff/'
        tex2_path = ''
        coef_path1 = rootdir+'19_news/'+person+'/frame%d.mat'%start
        save_dir = os.path.join(sample_dir,'R_%s_reassign2'%person)
        relativeframe = 2
        os.system('CUDA_VISIBLE_DEVICES=%d python render_for_view2.py %s %s %s %d %d %s'%(gpu_id,coeff_dir,coef_path1,save_dir, relativeframe,pingyi,tex2_path))


        ## 4.blend rendered with background
        srcdir = save_dir
        #if not os.path.exists(save_dir+'/00000_blend2.png'):
        cmd = "cd ../results; octave --eval \"pkg load image; alpha_blend_vbg('" + bgdir + "','" + srcdir + "'); quit;\""
        os.system(cmd)

        ## 5.gan
        sample_dir2 = '../../render-to-video/results/%s/test_%d/images%s/'%(ganmodel,ganepoch,seq)
        #if not os.path.exists(sample_dir2):
        getsingle(save_dir,seq,1,1)
        os.system('cd ../../render-to-video; python test_memory.py --dataroot %s --name %s --netG unetac_adain_256 --model test --Nw 3 --norm batch --dataset_mode single_multi --use_memory 1 --attention 1 --num_test 10000 --epoch %d --gpu_ids %d --imagefolder images%s'%(seq,ganmodel,ganepoch,gpu_id,seq))


        os.system('cp '+sample_dir2+'/R_'+person+'_reassign2-00002_blend2_fake.png '+sample_dir2+'/R_'+person+'_reassign2-00000_blend2_fake.png')
        os.system('cp '+sample_dir2+'/R_'+person+'_reassign2-00002_blend2_fake.png '+sample_dir2+'/R_'+person+'_reassign2-00001_blend2_fake.png')
        
        video_name = os.path.join(sample_dir,'%s_%swav_results%s.mp4'%(person,audiobasen,post))
        command = 'ffmpeg -loglevel panic -framerate 25  -i ' + sample_dir2 +  '/R_' + person + '_reassign2-%05d_blend2_fake.png -c:v libx264 -y -vf format=yuv420p ' + video_name
        os.system(command)
        command = 'ffmpeg -loglevel panic -i ' + video_name + ' -i ' + in_file + ' -vcodec copy  -acodec copy -y  ' + video_name.replace('.mp4','.mov')
        os.system(command)
        os.remove(video_name)
        print('saved to',video_name.replace('.mp4','.mov'))

        merge_with_bigbg(audiobasen,n)

### Test on person 31 with audio 03Fsi1831.wav

In [None]:
!cd Audio/code/; python test_personalized2.py 03Fsi1831 31 0

Results saved to ../results/atcnet_pose0_con3/31/03Fsi1831_99/31_03Fsi1831wav_results_full9.mov
and ../results/atcnet_pose0_con3/31/03Fsi1831_99/31_03Fsi1831wav_results_transbigbg.mov

### Show the result video

In [None]:
from IPython.display import HTML
from base64 import b64encode

video_path = 'Audio/results/atcnet_pose0_con3/31/03Fsi1831_99/31_03Fsi1831wav_results_transbigbg.mov'

mp4 = open(video_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

In [None]:
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)