In [3]:
# Import necessary libraries
import numpy as np
import torch
import torch.nn as nn
import torchaudio
from tqdm import tqdm

# Define a simple diffusion model
class SimpleDiffusionModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleDiffusionModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def forward(self, x, t):
        # Add time embedding (simple addition for demonstration)
        time_embedding = torch.sin(t).unsqueeze(-1)
        return self.net(x + time_embedding)

# Define the forward diffusion process
def forward_diffusion(x, timesteps):
    noise = torch.randn_like(x)
    alpha = torch.linspace(0.1, 1.0, timesteps)
    noisy_x = x * alpha[-1] + noise * (1 - alpha[-1])
    return noisy_x, noise

# Define the reverse diffusion process
def reverse_diffusion(model, noisy_x, timesteps):
    alpha = torch.linspace(0.1, 1.0, timesteps)
    x = noisy_x
    for t in reversed(range(timesteps)):
        t_tensor = torch.tensor([t / timesteps], dtype=torch.float32)
        pred_noise = model(x, t_tensor)
        x = (x - pred_noise * (1 - alpha[t])) / alpha[t]
    return x

# Generate audio using the diffusion model
def generate_audio(model, timesteps, audio_length):
    noisy_audio = torch.randn((1, audio_length))
    generated_audio = reverse_diffusion(model, noisy_audio, timesteps)
    return generated_audio

# Main script
if __name__ == "__main__":
    # Parameters
    audio_length = 16000  # 1 second of audio at 16kHz
    timesteps = 100

    # Initialize the model
    model = SimpleDiffusionModel(input_dim=audio_length)

    # Generate audio
    generated_audio = generate_audio(model, timesteps, audio_length)

    # Save the generated audio
    torchaudio.save("../output/generated_audio_DIFFUSION"
    ".wav", generated_audio.detach(), sample_rate=16000)

    print("Audio generation complete. Saved as 'generated_audio.wav'.")

Audio generation complete. Saved as 'generated_audio.wav'.
