## Script to test Riffusion on its own seed inputs

Seeds Riffusion with seeds from ```seed_images``` directory, which is what Riffusion-v1 currently uses to seed the diffusive process.

In [1]:
# # UNCOMMENT AND RUN THIS BLOCK IF USING GOOGLE COLAB

# from google.colab import drive
# drive.mount("/content/drive/")

# ## cd into desired directory 
# %cd drive/MyDrive/<my-directory>
# %ls

# # pull git repo 
# # get training data
# !git clone https://github.com/zachary-shah/riff-cnet.git
# %cd riff-cnet/riffusion_img2img

# # may need to install some dependencies (only run once)
# !pip install -q -r requirements.txt
# !pip install Pillow==9.0.0
# !pip install Pillow==9.4.0

In [None]:
# necessary imports
from pathlib import Path
import PIL

import os, sys
sys.path.append('../')
from riffusion.spectrogram_image_converter import SpectrogramImageConverter
from riffusion.spectrogram_params import SpectrogramParams

from datatypes import Img2ImgInput
from riffusion_img2img_pipeline import RiffusionImg2ImgPipeline

In [3]:
# Load the inial image by ID
seed_id = f"og_beat"
prompt = "generate a trumpet melody"
device = "cuda"
seed_images_dir = "/seed_images"
output_image_dir = "/output_images"
output_audio_dir = "/output_audio"
init_image_path = Path(seed_images_dir, seed_id + ".png")
init_image = PIL.Image.open(str(init_image_path)).convert("RGB")
save_str = seed_id + " - " + prompt

# create spectrogram to audio converter 
params = SpectrogramParams(
    min_frequency=0,
    max_frequency=10000,
)
converter = SpectrogramImageConverter(params=params, device=device)

# listen to intial seed audio 
seed_segment = converter.audio_from_spectrogram_image(
    init_image,
    apply_filters=True,
)
seed_segment

In [None]:
# setup model input
inputs = Img2ImgInput(
         text_prompt=prompt,
         seed=0,
         init_spectrogram=init_image,
         mask_image=None,
         negative_prompt=None,
         denoising=0.75,
         guidance=7.0,
         ddim_steps=50,
         ddim_eta=0.0,
    )

# load pipeline 
pipeline = RiffusionImg2ImgPipeline.load_checkpoint(
    checkpoint="riffusion/riffusion-model-v1",
    use_traced_unet=True,
    device=device,
)

# Execute the model to get the spectrogram image
output_image = pipeline.riffuse(
    inputs,
    use_reweighting=True,
)

# save image
output_image.save(os.path.join(output_image_dir, save_str + '.png'))

In [7]:
# Reconstruct audio from the image
segment = converter.audio_from_spectrogram_image(
    output_image,
    apply_filters=True,
)
# save audio
segment.export(os.path.join(output_audio_dir, save_str + '.mp4'), format="mp4")

# listen to sampled audio
segment