# **Dr Watson**
1. Ensure you're on a GPU runtime
  * Runtime -> Change Runtime Type -> Hardware accelerator -> GPU
2. Files will not load, please download them from my repo and update the file paths

## General Dependencies

In [0]:
%tensorflow_version 1.x
!pip install -q gpt-2-simple
import gpt_2_simple as gpt2

from google.colab import files

import numpy as np
import imageio
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
import warnings
warnings.filterwarnings("ignore")

## Text Generation



1. Download GPT model
2. Downoad fine-tuning text
3. Finetune GPT-2 for style, outputting training process

In [0]:
gpt2.download_gpt2(model_name='124M')

tuning_file = files.upload()#drWatson/text_gen/sherlock.txt

new_session = gpt2.start_tf_sess()

gpt2.finetune(sess=new_session,
              dataset=tuning_file,
              model_name='124M',
              steps=1000,
              restore_from='fresh',
              run_name='run0',
              print_every=10,
              sample_every=200,
              save_every=500)

4. Set your destination filepath
5. Generate 20 samples, 500 tokens in length

In [0]:
gen_file = 'gen_sherlock.txt'# YOUR FILEPATH HERE
gpt2.generate_to_file(sess=new_session,
                      destination_path=gen_file,
                      length=500,
                      run_name='run0',
                      temperature=1.0,
                      nsamples=20,
                      batch_size=4)

## Audio Generation

1. Install/Import specific dependencies

In [0]:
import os
from os.path import exists, join, basename, splitext
voice_repo_url = 'https://github.com/CorentinJ/Real-Time-Voice-Cloning.git'
project_name = splitext(basename(voice_repo_url))[0]
if not exists(project_name):
  # clone and install
  !git clone -q --recursive {voice_repo_url}
  # install dependencies
  !cd {project_name} && pip install -q -r requirements.txt
  !pip install -q gdown
  !apt-get install -qq libportaudio2
  !pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip

  # download pretrained model
  !cd {project_name} && gdown https://drive.google.com/uc?id=1n1sPXvT34yXFLT47QZA6FIRGrwMeSsZc && unzip pretrained.zip

import sys
sys.path.append(project_name)

from IPython.display import display, Audio, clear_output, HTML
from IPython.utils import io
import ipywidgets as widgets
from dl_colab_notebooks.audio import upload_audio

from synthesizer.inference import Synthesizer
from encoder import inference as encoder
from vocoder import inference as vocoder
from pathlib import Path

2. Load encoder, synthesizer, and vocoder from R-TVC
3. Prepare text synthesis (takes embedding tensor, text string)

In [0]:
encoder.load_model(project_name / Path("encoder/saved_models/pretrained.pt"))
synthesizer = Synthesizer(project_name / Path("synthesizer/saved_models/logs-pretrained/taco_pretrained"))
vocoder.load_model(project_name / Path("vocoder/saved_models/pretrained/pretrained.pt"))

def synthesize(embed, text):
  print("Synthesizing new audio...")
  specs = synthesizer.synthesize_spectrograms([text], [embed])
  generated_wav = vocoder.infer_waveform(specs[0])
  generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
  clear_output()
  display(Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True))

4. Load saved embedding
5. Synthesize audio from text sample

In [0]:
embedding = np.load(files.upload()) #drWatson/voice/embedding_8.npy

with open(files.upload(), mode='r') as text_file: #drWatson/samples/text_samples/sample0.txt
  text = text_file.read()[:150]
  synthesize(embedding, text)

In [0]:
with open(files.upload(), mode='r') as text_file: #drWatson/samples/text_samples/sample1.txt
  text = text_file.read()[36:150]
  synthesize(embedding, text)

In [0]:
with open(files.upload(), mode='r') as text_file: #drWatson/samples/text_samples/sample2.txt
  text = text_file.read()[:150]
  synthesize(embedding, text)

## Motion Transfer

1. Import specific dependencies
2. Mount drive (must have Google account)

In [0]:
!git clone https://github.com/AliaksandrSiarohin/first-order-model
!cd first-order-model

from google.colab import drive
drive.mount('/content/gdrive')

from demo import load_checkpoints
from demo import make_animation
from skimage import img_as_ubyte

Cloning into 'first-order-model'...
remote: Enumerating objects: 4, done.[K
remote: Counting objects: 100% (4/4), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 219 (delta 0), reused 1 (delta 0), pack-reused 215[K
Receiving objects: 100% (219/219), 72.34 MiB | 30.37 MiB/s, done.
Resolving deltas: 100% (106/106), done.
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


**Add folder https://drive.google.com/drive/folders/1kZ1gCnpfU0BnpdU47pLM_TQ6RypDDqgw?usp=sharing  to your google drive.**

3. Prepare side-by-side-by-side video display
4. Load trained checkpoints from first-order-motion-model

In [0]:
def display(source, driving, generated=None):
    fig = plt.figure(figsize=(8 + 4 * (generated is not None), 6))

    ims = []
    for i in range(len(driving)):
        cols = [source]
        cols.append(driving[i])
        if generated is not None:
            cols.append(generated[i])
        im = plt.imshow(np.concatenate(cols, axis=1), animated=True)
        plt.axis('off')
        ims.append([im])

    ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=1000)
    plt.close()
    return ani

generator, kp_detector = load_checkpoints(config_path='config/vox-256.yaml', checkpoint_path='/content/gdrive/My Drive/first-order-motion-model/vox-cpk.pth.tar')

5. Load reference image, driving video
6. Resize inputs
7. Generate predicted video (no sound)

In [0]:
#https://github.com/imageio/imageio/issues/168
def extract_frames(videofilename):
  try:
    frames = []
    for frame in imageio.get_reader(videofilename):
        frames.append(frame)
  except RuntimeError:
      print('runtime error')
  return frames

source_image = imageio.imread(files.upload())# drWatson/watson_graphic.png
driving_video = extract_frames(files.upload())# drWatson/samples/video_samples/training_samples/sample2_train.mp4


source_image = resize(source_image, (256, 256))[..., :3]
driving_video = [resize(frame, (256, 256))[..., :3] for frame in driving_video]

predictions = make_animation(source_image, driving_video, generator, kp_detector, relative=True, adapt_movement_scale=False)
imageio.mimsave('gen_sample2.mp4', [img_as_ubyte(frame) for frame in predictions])# Stored at drWatson/samples/video_samples/tuned_samples/sample2.mp4

HTML(display(source_image, driving_video, predictions).to_html5_video())

100%|██████████| 174/174 [00:06<00:00, 25.88it/s]
