In [1]:
!pip install datasets rank-bm25 torch torchaudio transformers diffusers google-colab



ERROR: Could not find a version that satisfies the requirement google-colab (from versions: none)
ERROR: No matching distribution found for google-colab


In [2]:
import torch
import torchaudio
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.feature_extraction.text import TfidfVectorizer
from rank_bm25 import BM25Okapi
import numpy as np
from diffusers import AudioLDMPipeline
import warnings
warnings.filterwarnings('ignore')




In [3]:
# Ensure we are using a GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


Using device: cuda


In [4]:
def load_and_preprocess_data():
    try:
        with open("train.txt", "r", encoding='utf-8') as file:
            stories = file.read().strip().split('\n')
        tokenized_stories = [story.lower().split() for story in stories]
        return stories, tokenized_stories
    except FileNotFoundError:
        print("Error: train.txt not found. Please ensure the file exists in the current directory.")
        return [], []

# Load and preprocess data
stories, tokenized_stories = load_and_preprocess_data()
if not stories:
    raise Exception("No stories loaded. Cannot proceed.")

# BM25 Retrieval


In [17]:

from diffusers import AudioLDMPipeline

bm25 = BM25Okapi(tokenized_stories)
query = "A scared felling of getting lost in mountains"
retrieved_texts = bm25.get_top_n(query.lower().split(), stories, n=3)
print(retrieved_texts[0])
# Load T5 model with error handling
try:
    tokenizer = T5Tokenizer.from_pretrained("t5-small")
    model = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)
except Exception as e:
    print(f"Error loading T5 model: {str(e)}")
    raise



I went to visit family in the mountains last week. They have a lot of wildlife near their homes. I was a little scared to go outside while I was there. My friend assured me it was safe. I was still scared.


In [6]:
def generate_music_features(text, tokenizer, model):
    try:
        input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
        outputs = model.generate(input_ids, max_length=50)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)
    except Exception as e:
        print(f"Error generating features for text: {str(e)}")
        return ""

# Generate embeddings with progress tracking


In [18]:
repo_id = "cvssp/audioldm-s-full-v2"
pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
pipe = pipe.to(device)


print("Generating music embeddings...")
music_embeddings = []
for text in retrieved_texts:
    embedding = generate_music_features(text, tokenizer, model)
    if embedding:
        music_embeddings.append(embedding)

# Extract features using TF-IDF

if music_embeddings:
    vectorizer = TfidfVectorizer()
    music_features = vectorizer.fit_transform(music_embeddings).toarray()
    music_tensor = torch.tensor(music_features, dtype=torch.float32).to(device)

    try:
        # Load AudioLDM model
        print("Loading AudioLDM model...")
        audio_ldm = AudioLDMPipeline.from_pretrained("cvssp/audioldm-s-full-v2")
        audio_ldm.to(device)

        # Generate music
        
        print("Generating music...")
        generated_output = audio_ldm(
            prompt=retrieved_texts[0],
            num_inference_steps=50,
            audio_length_in_s=15.0
        )

        # Save the generated audio
        generated_music = generated_output.audios[0]
        
        try:
            waveform = torch.tensor(generated_music).unsqueeze(0)
            torchaudio.save("generated_music.wav", waveform, 16000)
            print("🎵 Music successfully generated and saved as 'generated_music.wav'!")
        except Exception as e:
            print(f"Error saving audio file: {str(e)}")
    
    except Exception as e:
        print(f"Error in music generation: {str(e)}")
else:
    print("No valid embeddings generated. Cannot proceed with music generation.")

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

Generating music embeddings...
Loading AudioLDM model...


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

Generating music...


  0%|          | 0/50 [00:00<?, ?it/s]

🎵 Music successfully generated and saved as 'generated_music.wav'!
