In [1]:
import os
import json
import glob
import torch
import matplotlib.pyplot as plt

from midi import generate_word_files
from model_without_genre import (
    LSTMGenerator,
    build_vocab,
    load_sequences,
    create_loaders,
    train_model,
)

DATA_PATH = r"C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately"
seq_length=64

In [11]:
for txt in glob.glob(f"{DATA_PATH}/**/*.txt", recursive=True):
    os.remove(txt)

In [12]:
generate_word_files(DATA_PATH, seq_length)

Error processing C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\train\Classical\Classical\Claude Debussy\Clair De Lune.mid: badly formatted midi bytes, got: b''
Error processing C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\train\Rock\Progressive Rock\Dream Theater\The Big Medley - In The Flesh  Carry On Wayward Son  Bohemian Rhapsody  Lovin_ Touchin_ Squeezin  Cruise Control  Turn It On AgainLive - Uncovered Version 1995.mid: [Errno 2] No such file or directory: 'C:\\projects\\studia\\POLSLrepo_sem7\\music_generator\\data\\all_separately\\train\\Rock\\Progressive Rock\\Dream Theater\\The Big Medley - In The Flesh  Carry On Wayward Son  Bohemian Rhapsody  Lovin_ Touchin_ Squeezin  Cruise Control  Turn It On AgainLive - Uncovered Version 1995.mid'


In [13]:
asd = dict()

for root, dirs, files in os.walk(r"C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately"):
   for name in files:
       ext = os.path.splitext(name)[1]
       if ext in asd:
           asd[ext]+=1
       else:
           asd[ext] = 1
asd

{'.mid': 8799, '.txt': 15942}

In [None]:
word_to_idx, idx_to_word = build_vocab(DATA_PATH)

with open(os.path.join(DATA_PATH, "word_to_idx.json"), "w") as f:
    json.dump(word_to_idx, f)
with open(os.path.join(DATA_PATH, "idx_to_word.json"), "w") as f:
    json.dump(idx_to_word, f)

In [2]:
with open(os.path.join(DATA_PATH, "word_to_idx.json"), 'r') as f:
    word_to_idx = json.load(f)
    print(word_to_idx)
with open(os.path.join(DATA_PATH, "idx_to_word.json"), 'r') as f:
    idx_to_word = json.load(f)
    print(idx_to_word)
vocab_size = len(word_to_idx)

{'PAD': 0, 'PAUSE_16th': 1, 'PAUSE_eighth': 2, 'D4_eighth_zero': 3, 'PAUSE_quarter': 4, 'E4_eighth_zero': 5, 'D4_16th_zero': 6, 'C4_eighth_zero': 7, 'E4_16th_zero': 8, 'G4_eighth_zero': 9, 'G4_16th_zero': 10, 'C4_16th_zero': 11, 'F4_eighth_zero': 12, 'D4_quarter_zero': 13, 'A3_eighth_zero': 14, 'A4_eighth_zero': 15, 'E4_quarter_zero': 16, 'B3_eighth_zero': 17, 'A4_16th_zero': 18, 'F4_16th_zero': 19, 'A3_16th_zero': 20, 'C4_quarter_zero': 21, 'F#4_eighth_zero': 22, 'B3_16th_zero': 23, 'G4_quarter_zero': 24, 'F#4_16th_zero': 25, 'C#4_eighth_zero': 26, 'E-4_eighth_zero': 27, 'G3_eighth_zero': 28, 'G3_16th_zero': 29, 'C5_16th_zero': 30, 'E-4_16th_zero': 31, 'C#4_16th_zero': 32, 'B-3_eighth_zero': 33, 'A3_quarter_zero': 34, 'D5_16th_zero': 35, 'F4_quarter_zero': 36, 'B4_eighth_zero': 37, 'C5_eighth_zero': 38, 'B3_quarter_zero': 39, 'A4_quarter_zero': 40, 'B-3_16th_zero': 41, 'B4_16th_zero': 42, 'PAUSE_half': 43, 'D5_eighth_zero': 44, 'D4_half_zero': 45, 'A3_eighth_eighth': 46, 'E5_16th_zero

In [3]:
for genre in os.listdir(r"C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\train"):
    TEST_PATH = os.path.join(DATA_PATH, "test", genre)
    TRAIN_PATH = os.path.join(DATA_PATH, "train", genre)
    MODELS_PATH = os.path.join(DATA_PATH, "models", genre)
    print(TEST_PATH)
    os.makedirs(MODELS_PATH, exist_ok=True)
    print(genre)
    
    batch_size=64
    embed_size=128
    hidden_size=256
    num_layers=2
    num_epochs=30
    lr=0.001
    log_interval=1000
    
    # Filter for piano-like tracks
    test_files = glob.glob(f"{TEST_PATH}/**/*piano*.txt", recursive=True)
    print(f"Test dataset for {genre} gathered, len = {len(test_files)}")
    train_files = glob.glob(f"{TRAIN_PATH}/**/*piano*.txt", recursive=True)
    print(f"Train dataset for {genre} gathered, len = {len(train_files)}")

    # Load sequences
    train_inputs, train_outputs = load_sequences(train_files, word_to_idx, seq_length)
    print("Train sequences loaded")

    test_inputs, test_outputs = load_sequences(test_files, word_to_idx, seq_length)
    print("Test sequences loaded")

    # Create data loaders
    train_loader = create_loaders(train_inputs, train_outputs, batch_size)
    test_loader = create_loaders(test_inputs, test_outputs, batch_size)
    print("Loaders created")

    # Initialize and train model
    model = LSTMGenerator(vocab_size, embed_size, hidden_size, num_layers)
    print("Model created")
    
    print(f"Training on piano-like tracks for {genre}...")
    train_losses, test_losses = train_model(
        model,
        train_loader,
        test_loader,
        num_epochs,
        MODELS_PATH,
        lr,
        log_interval,
    )

    print("Training completed and model saved!")


    # Plot training and validation losses
    plt.figure()  # Create a new figure
    plt.plot(train_losses, label="Training loss")
    plt.plot(test_losses, label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title(f"Losses for {genre}")
    plt.legend(frameon=False)

    # Save the plot
    loss_plot_path = os.path.join(MODELS_PATH, "losses.png")
    plt.savefig(loss_plot_path)
    print(f"Loss plot saved at {loss_plot_path}")

    # Clear the plot for the next genre
    plt.clf()


C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\test\Ambient
Ambient
Test dataset for Ambient gathered, len = 20
Train dataset for Ambient gathered, len = 271
Train sequences loaded
Test sequences loaded
Loaders created
Model created
Training on piano-like tracks for Ambient...
Resuming from epoch 31...
Loaded model from C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\models\Ambient\last_model.pt.
Training completed and model saved!
Loss plot saved at C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\models\Ambient\losses.png
C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately\test\Blues
Blues
Test dataset for Blues gathered, len = 21
Train dataset for Blues gathered, len = 119
Train sequences loaded
Test sequences loaded
Loaders created
Model created
Training on piano-like tracks for Blues...
Resuming from epoch 31...
Loaded model from C:\projects\studia\POLSLrepo_sem7\music_generator\data\all_separately

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>