# Using an LSTM to Generate Songs

In this exercise, we're going to build a _small_ language model. Something that learns a sequence of _characters_ and can use it to generate text.

Think of it as a fancy version of our Markov chains.


## Imports and Set Up

```python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive')

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
```

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from google.colab import drive

# # Mount Google Drive
# drive.mount('/content/gdrive')

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


# Load our Dataset

Pick an artist you want to train on...

```python
print(df["Artist"].unique())
```

And then filter to that artist (I chose "Taylor Swift" as an example).

```python
# Load the dataset
df = pd.read_csv('/content/gdrive/MyDrive/datasets/songs.csv')
df = df[df["Artist"] == "Taylor Swift"]
lyrics = df['Lyrics'].str.cat(sep='\n')

# Display the first few lines of the lyrics
print(lyrics[:500])
```

In [4]:
# Load the dataset
df = pd.read_csv('songs.csv')
df = df[df["Artist"] == "Taylor Swift"]
lyrics = df['Lyrics'].str.cat(sep='\n')

# Display the first few lines of the lyrics
print(lyrics[:500])

Vintage tee, brand new phone
High heels on cobblestones
When you are young, they assume you know nothing
Sequin smile, black lipstick
Sensual politics
When you are young, they assume you know nothing

But I knew you
Dancin' in your Levi's
Drunk under a streetlight, I
I knew you
Hand under my sweatshirt
Baby, kiss it better, I

And when I felt like I was an old cardigan
Under someone's bed
You put me on and said I was your favorite

A friend to all is a friend to none
Chase two girls, lose the on


# Preprocess and Build a Dataset

This code is going to preprocess our lyrics and build the dataset.

Don't feel like you need to fully understand it, but if you read through it, you'll see that we're converting all our characters into numbers. These will be how we translate our text into numeric feature vectors.

Here's the preprocessing...

```python
# Create a character-level vocabulary
chars = sorted(list(set(lyrics)))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

# Convert lyrics to indices
lyrics_idx = [char_to_idx[char] for char in lyrics]

# Define sequence length
seq_length = 100
step = 1

sequences = []
next_chars = []

for i in range(0, len(lyrics_idx) - seq_length, step):
    sequences.append(lyrics_idx[i:i + seq_length])
    next_chars.append(lyrics_idx[i + seq_length])

print(f'Number of sequences: {len(sequences)}')

# Convert to numpy arrays
X = np.zeros((len(sequences), seq_length), dtype=int)
y = np.zeros((len(sequences)), dtype=int)

for i, seq in enumerate(sequences):
    X[i] = seq
    y[i] = next_chars[i]
```

And then we build the dataset class.

```python
class LyricsDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long).to(device)
        self.y = torch.tensor(y, dtype=torch.long).to(device)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = LyricsDataset(X, y)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)
```

In [5]:
# Create a character-level vocabulary|
chars = sorted(list(set(lyrics)))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}

# Convert lyrics to indices
lyrics_idx = [char_to_idx[char] for char in lyrics]

# Define sequence length
seq_length = 100
step = 1

sequences = []
next_chars = []

for i in range(0, len(lyrics_idx) - seq_length, step):
    sequences.append(lyrics_idx[i:i + seq_length])
    next_chars.append(lyrics_idx[i + seq_length])

print(f'Number of sequences: {len(sequences)}')

# Convert to numpy arrays
X = np.zeros((len(sequences), seq_length), dtype=int)
y = np.zeros((len(sequences)), dtype=int)

for i, seq in enumerate(sequences):
    X[i] = seq
    y[i] = next_chars[i]

class LyricsDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long).to(device)
        self.y = torch.tensor(y, dtype=torch.long).to(device)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

dataset = LyricsDataset(X, y)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)




Number of sequences: 98256


# Build the LSTM Network

```python
class LyricsLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(LyricsLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.lstm.num_layers, batch_size, self.lstm.hidden_size).zero_(),
                  weight.new(self.lstm.num_layers, batch_size, self.lstm.hidden_size).zero_())
        return hidden
```

And initialize our model (feel free to play with the hyper parameters here.

```python
# Hyperparameters
vocab_size = len(chars)
embedding_dim = 128
hidden_dim = 256
num_layers = 2

model = LyricsLSTM(vocab_size, embedding_dim, hidden_dim, num_layers)
```

# Initialize our Loss Function and Optimizer

```python
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
```

In [6]:
class LyricsLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(LyricsLSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

    def init_hidden(self, batch_size):
        weight = next(self.parameters()).data
        hidden = (weight.new(self.lstm.num_layers, batch_size, self.lstm.hidden_size).zero_(),
                  weight.new(self.lstm.num_layers, batch_size, self.lstm.hidden_size).zero_())
        return hidden

# Hyperparameters
vocab_size = len(chars)
embedding_dim = 128
hidden_dim = 256
num_layers = 2

model = LyricsLSTM(vocab_size, embedding_dim, hidden_dim, num_layers)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
print(model)


LyricsLSTM(
  (embedding): Embedding(82, 128)
  (lstm): LSTM(128, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=256, out_features=82, bias=True)
)


# Train our LSTM

Start with epoch=1 and increase if you'd like more training

```python
num_epochs = 1

model.to(device)
model.train()
for epoch in range(num_epochs):
    hidden = model.init_hidden(64)
    for i, (X_batch, y_batch) in enumerate(dataloader):
        if len(X_batch) != 64: # This is a bug
            break
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        hidden = tuple([each.data for each in hidden])
        output, hidden = model(X_batch, hidden)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(f'Epoch {epoch}/{num_epochs}, Batch {i}/{len(dataloader)}, Loss: {loss.item()}')
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')
```

In [7]:
num_epochs = 10

model.to(device)
model.train()
for epoch in range(num_epochs):
    hidden = model.init_hidden(64)
    for i, (X_batch, y_batch) in enumerate(dataloader):
        if len(X_batch) != 64: # This is a bug
            break
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        optimizer.zero_grad()
        hidden = tuple([each.data for each in hidden])
        output, hidden = model(X_batch, hidden)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(f'Epoch {epoch}/{num_epochs}, Batch {i}/{len(dataloader)}, Loss: {loss.item()}')
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')


Epoch 0/10, Batch 0/1536, Loss: 4.416355609893799
Epoch 0/10, Batch 100/1536, Loss: 2.4865458011627197
Epoch 0/10, Batch 200/1536, Loss: 2.670980930328369
Epoch 0/10, Batch 300/1536, Loss: 2.2667417526245117
Epoch 0/10, Batch 400/1536, Loss: 2.381537437438965
Epoch 0/10, Batch 500/1536, Loss: 1.6872880458831787
Epoch 0/10, Batch 600/1536, Loss: 2.084592819213867
Epoch 0/10, Batch 700/1536, Loss: 1.7127478122711182
Epoch 0/10, Batch 800/1536, Loss: 1.8348757028579712
Epoch 0/10, Batch 900/1536, Loss: 2.165177345275879
Epoch 0/10, Batch 1000/1536, Loss: 1.9124559164047241
Epoch 0/10, Batch 1100/1536, Loss: 1.9726910591125488
Epoch 0/10, Batch 1200/1536, Loss: 1.4817055463790894
Epoch 0/10, Batch 1300/1536, Loss: 1.5168049335479736
Epoch 0/10, Batch 1400/1536, Loss: 1.9429641962051392
Epoch 0/10, Batch 1500/1536, Loss: 1.8944038152694702
Epoch 1/10, Loss: 1.5725018978118896
Epoch 1/10, Batch 0/1536, Loss: 1.5998836755752563
Epoch 1/10, Batch 100/1536, Loss: 1.6420053243637085
Epoch 1/10, 

# Choose Your Starting Text and Generate Lyrics

```python
model.eval()

start_text = "Once upon a time"
length = 100

chars = [char for char in start_text]
hidden = model.init_hidden(1)
for char in start_text:
    char_idx = torch.tensor([[char_to_idx[char]]], dtype=torch.long).to(device)
    output, hidden = model(char_idx, hidden)
char_idx = char_idx.squeeze(0)

for _ in range(length):
    output, hidden = model(char_idx.unsqueeze(0), hidden)
    prob = torch.softmax(output, dim=1).data
    char_idx = torch.multinomial(prob, 1)[0]
    chars.append(idx_to_char[char_idx.item()])

generated_text = ''.join(chars)

print(generated_text)
```

In [10]:
import time
model.eval()

start_text = "We're learning about LLMs and generation "
length = 1000

chars = [char for char in start_text]
hidden = model.init_hidden(1)
for char in start_text:
    char_idx = torch.tensor([[char_to_idx[char]]], dtype=torch.long).to(device)
    output, hidden = model(char_idx, hidden)
char_idx = char_idx.squeeze(0)

for _ in range(length):
    output, hidden = model(char_idx.unsqueeze(0), hidden)
    prob = torch.softmax(output, dim=1).data
    char_idx = torch.multinomial(prob, 1)[0]
    print(idx_to_char[char_idx.item()], end="")
    time.sleep(0.1)

# generated_text = ''.join(chars)

# print(generated_text)







lose, at to be lome
And how this time you other
Like I was a "mar)
With your heart room, baby
We could ever leave me, Mr. "Perfect before you win still gots
With you I remember are train
I guess it couldn't be shit
Do the dark again of your body
It's for the man (Deain't down, did
I can get for me for body worked
Dawn red losin', precedes around you"

But I don't like a gate
I wanna be your A-Epeated 'tore red look at you around (You're being too my life, oh-oh
Baby, I I forget me to be smokens
At herp's crize, I do tell the only jeans, I
Can head now, go bry
And that's my man (Far), the goodbye it
But I enjoy would've been the purol, lover
I'm a vendending and crime, I was there
Are routes times, change in the kestic for mine
I'm still on that red, undidies is my baby
I'll be usin' for the better in the clandelf
Dow you like a game

I can feel the other blaoks in a naster, I wole are back every time
'Cause we never go out of so grace

And I've got a hunting red, hund, ah
Awer the were