In [None]:
# ------------------------------------------------------------------------------
#     Copyright 2022 Google LLC. All Rights Reserved.
#
#     Licensed under the Apache License, Version 2.0 (the "License");
#     you may not use this file except in compliance with the License.
#     You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
#     Unless required by applicable law or agreed to in writing, software
#     distributed under the License is distributed on an "AS IS" BASIS,
#     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#     See the License for the specific language governing permissions and
#     limitations under the License.
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
# User Interface
# ------------------------------------------------------------------------------

#@markdown #  Train your own DDSP-VST Model
#@markdown 🎻🎺🎸🎵 [g.co/magenta/train-ddsp-vst](g.co/magenta/train-ddsp-vst)

#@markdown <br/>

#@markdown ## Instructions

#@markdown * Create a folder in Google Drive with your training audio (`.wav` or `.mp3`)

Name = 'My Instrument' #@param {type:"string"}
Name = Name.replace(' ', '_')


#@markdown * Press the ▶️ button in the upper left!

#@markdown * Login to your Google account when asked

#@markdown *  Select your folder with the file chooser below when asked

#@markdown *  Wait (with this window open) for training to finish and download the model

#@markdown *  If something breaks, resume training by refreshing this page, press ▶️, and choose the same folder



#@markdown <br/>

#@markdown <br/>

#@markdown ## Data
#@markdown Custom models can train on as little as 10 minutes of audio (`.wav` or `.mp3`). You can get the best results from "monophonic" (only one note at a time) audio from a single recording session (same mic, same reverb). All of your data is private, used locally, and erased as soon as your colab session ends.

#@markdown We recommend using Google Drive to load data faster and save your model during training. Just create a folder on your drive with your audio files in it, and select the folder. If you don't use drive, you can still upload audio through the browser (slower) and download the final trained model.


#@markdown ## Training
#@markdown Training typically takes ~2-3 hours with free Colab, and less than an hour with ColabPro+. Free colab can sometimes disconnects before models finish training, but there are some unofficial [ways around this](https://stackoverflow.com/questions/57113226/how-to-prevent-google-colab-from-disconnecting). If you do get disconnected, don't worry, just press play again and choose the same folder. The training will resume where it left off.




#@markdown ## Export

#@markdown After training, the colab should automatically export, zip, and download your model folder. To use, just unzip and drop the full folder in plugin model folder (Mac: `~/Documents/Magenta/DDSP/Models`, which you can also find from inside the plugin).

#@markdown If it doesn't automatically download, you can also find it in your training folder (`ddsp-training-{date-time}/{Name}`). Also, you'll likely see a bunch of warnings like `Value in checkpoint could not be found in the restored object`, don't worry that's normal :).


#@markdown <br/> <br/>
#@markdown ## Advanced Options

##@markdown <a href="https://colab.research.google.com/github/magenta/ddsp/blob/main/ddsp/colab/demos/Train_VST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#@markdown <br/>
#@markdown  Usually we will produce good results after training between 30k-50k steps, but your results may vary depending on your audio files/instrument. Too few steps will often have the model sound bland/generic, too many steps can often lead to more "sputtering" and big volume fluctuations

Training_Steps = 30000 #@param {type:"integer"}

#@markdown <br/>
#@markdown Ignore previous checkpoints in the folder and start a fresh run from step 0

Ignore_Previous = False #@param {type:"boolean"}


#@markdown <br/>
#@markdown Use Google Drive for training? Otherwise, loads audio from browser, which is much slower
Google_Drive = True #@param {type:"boolean"}



# Sample_Rate = '16kHz'  #@param ['16kHz', '32kHz', '48kHz']
# Sample_Rate = {'16kHz': 16000, '32kHz': 32000, '48kHz': 48000}[Sample_Rate]
# Model_Gin_File = 'models/vst/vst.gin'


#@markdown ---
#@markdown <sub> This notebook sends anonymous usage data (i.e. training time) to Google Analytics to help improve/debug training. All audio and model information is private, and not sent or stored. For more information, see [Google's privacy policy](https://policies.google.com/privacy). </sub>




# ------------------------------------------------------------------------------
# Imports (not DDSP Dependent)
# ------------------------------------------------------------------------------
# Supress warnings that obscure output.
import warnings
warnings.filterwarnings("ignore")

import datetime
import glob
import os
import shutil
import time
import IPython
import json
import subprocess

from google.colab import drive
import tensorflow as tf

!pip install ipyfilechooser==0.6.0 &> /dev/null
from ipyfilechooser import FileChooser

# ------------------------------------------------------------------------------
# Logging
# ------------------------------------------------------------------------------
# The below functions (load_gtag and log_event) handle Google Analytics event
# logging. The logging is anonymous and stores only very basic statistics of the
# training (i.e. whether it completed, how long it took, etc.) to help debug
# and improve the training experience.
# No data or audio is stored or transferred. Everything happens locally to this
# colab instance (and your google drive), and is deleted once the browser
# window is closed.


def load_gtag():
  """Loads gtag.js."""
  # Note: gtag.js MUST be loaded in the same cell execution as the one doing
  # synthesis. It does NOT persist across cell executions!
  html_code = '''
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-ZKDC5WXJQN"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());
  gtag('config', 'G-ZKDC5WXJQN',
       {'referrer': document.referrer.split('?')[0],
        'anonymize_ip': true,
        'page_title': '',
        'page_referrer': '',
        'cookie_prefix': 'magenta',
        'cookie_domain': 'auto',
        'cookie_expires': 0,
        'cookie_flags': 'SameSite=None;Secure'});
</script>
'''
  IPython.display.display(IPython.display.HTML(html_code))

def log_event(event_name, event_details):
  """Log event with name and details dictionary."""
  details_json = json.dumps(event_details)
  js_string = "gtag('event', '%s', %s);" % (event_name, details_json)
  IPython.display.display(IPython.display.Javascript(js_string))

load_gtag()


# ------------------------------------------------------------------------------
# Functions
# ------------------------------------------------------------------------------
def directory_has_files(target_dir):
  n_files = len(glob.glob(os.path.join(target_dir, '*')))
  return n_files > 0


def get_audio_files(drive_dir, audio_dir):
  if drive_dir:
    mp3_files = glob.glob(os.path.join(drive_dir, '*.mp3'))
    wav_files = glob.glob(os.path.join(drive_dir, '*.wav'))
    audio_paths = mp3_files + wav_files
    if len(audio_paths) < 1:
      raise FileNotFoundError("Sorry, it seems that there aren't any MP3 or "
                              f"WAV files in your folder ({drive_dir}). Try "
                              "running again and choose a different folder.")
  else:
    audio_paths, _ = colab_utils.upload()

  # Copy Audio.
  print('Copying audio to colab for training...')
  for src in audio_paths:
    target = os.path.join(audio_dir,
                          os.path.basename(src).replace(' ', '_'))
    print('Copying {} to {}'.format(src, target))
    shutil.copy(src, target)
    # !cp $src $target


def prepare_dataset(audio_dir,
                    data_dir,
                    sample_rate=16000,
                    frame_rate=50,
                    example_secs=4.0,
                    hop_secs=1.0,
                    viterbi=True,
                    center=True):
  if directory_has_files(data_dir):
    print(f'Dataset already exists in `{data_dir}`')
    return
  else:
    # Otherwise prepare new dataset locally.
    print(f'Preparing new dataset from `{audio_dir}`')

    print()
    print('Creating dataset...')
    print('This usually takes around 2-3 minutes for each minute of audio')
    print('(10 minutes of training audio -> 20-30 minutes)')

    audio_filepattern = os.path.join(audio_dir, '*')
    audio_fp_str = f'"{audio_filepattern}"'
    tfrecord_path_str = f'"{data_dir}/train.tfrecord"'

    !ddsp_prepare_tfrecord \
    --input_audio_filepatterns=$audio_fp_str \
    --output_tfrecord_path=$tfrecord_path_str \
    --num_shards=10 \
    --sample_rate=$sample_rate \
    --frame_rate=$frame_rate \
    --example_secs=$example_secs \
    --hop_secs=$hop_secs \
    --viterbi=$viterbi \
    --center=$center &> /dev/null


def train(model_dir, data_dir, steps=30000):
  file_pattern = os.path.join(data_dir, 'train.tfrecord*')
  fp_str = f"TFRecordProvider.file_pattern='{file_pattern}'"
  !ddsp_run \
  --mode=train \
  --save_dir="$model_dir" \
  --gin_file=models/vst/vst.gin \
  --gin_file=datasets/tfrecord.gin \
  --gin_param="$fp_str" \
  --gin_param="TFRecordProvider.centered=True" \
  --gin_param="TFRecordProvider.frame_rate=50" \
  --gin_param="batch_size=16" \
  --gin_param="train_util.train.num_steps=$steps" \
  --gin_param="train_util.train.steps_per_save=300" \
  --gin_param="trainers.Trainer.checkpoints_to_keep=3"

  # --gin_param="train.data_provider=@ExperimentalDataProvider()" \
  # --gin_param="ExperimentalRecordProvider.data_dir='$data_dir'" \
  # --gin_param="ExperimentalRecordProvider.sample_rate=16000" \
  # --gin_param="ExperimentalRecordProvider.frame_rate=50" \


def reset_state(data_dir, audio_dir, model_dir):
  model_dir_str = f'"{model_dir}"'
  if tf.io.gfile.exists(data_dir):
    !rm -r $data_dir
    !rm -r $audio_dir
  !mkdir -p $data_dir
  !mkdir -p $audio_dir
  !mkdir -p $model_dir_str


def export_and_download(model_dir, model_name=Name):
  export_path = os.path.join(model_dir, model_name)

  model_dir_str=f'"{model_dir}"'
  export_path_str=f'"{export_path}"'

  !ddsp_export \
  --name=$model_name \
  --model_path=$model_dir_str \
  --save_dir=$export_path_str \
  --inference_model=vst_stateless_predict_controls \
  --tflite \
  --notfjs

  # Zip the whole directory.
  zip_fname = f'{model_name}.zip'
  zip_fp = os.path.join(model_dir, zip_fname)
  print(f'Export complete! Zipping {export_path} to {zip_fp}')
  !cd $model_dir_str && zip -r $zip_fname ./$model_name

  # Download.
  print(f'Zipping Complete! Downloading... {zip_fname}')
  print(f'You can also find your model at {export_path}')
  colab_utils.download(zip_fp)


def get_model_dir(base_dir):
  base_str = 'ddsp-training'
  dirs = tf.io.gfile.glob(os.path.join(base_dir, f'{base_str}-*'))
  if dirs and not Ignore_Previous:
    model_dir = dirs[-1]  # Sorted, so last is most recent.
  else:
    now = datetime.datetime.now().strftime('%Y-%m-%d-%H%M')
    model_dir = os.path.join(base_dir, f'{base_str}-{now}')
  return model_dir


def get_gpu_type():
    gpu_info = []
    try:
        bash_command = "nvidia-smi --query-gpu=name --format=csv"
        output = subprocess.getoutput(bash_command)
        lines = output.split("\n")
        lines.pop(0)
        return lines[0]
    except OSError:
        print("GPU device is not available")
        return ''

# ------------------------------------------------------------------------------
# Run
# ------------------------------------------------------------------------------
def run(Google_Drive=True):
  """Create and display a FileChooser widget."""
  log_event('runStarted', {})
  gpu_type = get_gpu_type()
  print(f'Using a {gpu_type} GPU...')
  log_event('gpuType', {'event_category': gpu_type})
  if Google_Drive:
    log_event('trainingOnDrive', {})
  else:
    log_event('trainingLocally', {})

  if Google_Drive:
    print('Mounting Google Drive...')
    drive.mount('gdrive', force_remount=True, timeout_ms=10000)
    initial_dir = 'gdrive/MyDrive'

    def run_after_select(chooser):
      drive_dir = chooser.selected_path
      run_training(drive_dir=drive_dir)

    fc = FileChooser(initial_dir)
    fc.show_only_dirs = True
    fc.title = '<b>Pick a folder with (.mp3/.wav) files for training. (Files will not be visible here)...</b>'
    fc.register_callback(run_after_select)
    display(fc)


  else:
    print('Skipping Drive Setup...')
    print('Upload Audio Manually...')
    run_training(drive_dir='')


def run_training(drive_dir=''):
  log_event('runTrainingStarted', {})
  # ------------------------------------------------------------------------------
  # Install DDSP here to allow selecting folder first
  # ------------------------------------------------------------------------------
  print('Installing DDSP...')
  print('This should take about 2 minutes...')
  !sudo apt-get install libportaudio2 &> /dev/null
  !pip install -U ddsp[data_preparation] &> /dev/null

  # ------------------------------------------------------------------------------
  # Import DDSP
  # ------------------------------------------------------------------------------
  from ddsp.colab import colab_utils
  globals()['colab_utils'] = colab_utils

  # ------------------------------------------------------------------------------
  # Setup
  # ------------------------------------------------------------------------------
  # Save data locally, but model on drive.
  data_dir = 'data/'
  audio_dir = 'audio/'
  model_dir = get_model_dir(drive_dir)

  reset_state(data_dir, audio_dir, model_dir)

  # ------------------------------------------------------------------------------
  # Dataset
  # ------------------------------------------------------------------------------
  tick = time.time()

  get_audio_files(drive_dir, audio_dir)
  prepare_dataset(audio_dir, data_dir)

  log_event('datasetMins', {'value': round((time.time() - tick) // 60)})


  # ------------------------------------------------------------------------------
  # Train
  # ------------------------------------------------------------------------------
  tick = time.time()

  print()
  print('Training...')
  train(model_dir, data_dir, steps=Training_Steps)

  log_event('trainMins', {
      'event_category': str(Training_Steps),
      'value': round((time.time() - tick) // 60),
  })


  # ------------------------------------------------------------------------------
  # Export
  # ------------------------------------------------------------------------------
  tick = time.time()

  print()
  print('Exporting model...')
  export_and_download(model_dir)

  log_event('exportMins', {'value': round((time.time() - tick) // 60)})


# The single command.
run(Google_Drive)
