In [None]:
import torch
import os
import librosa
import wave
import numpy as np
import torchaudio
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

In [None]:
DATA_PATH = './training_data/'
WAV_PATH = os.path.join(DATA_PATH, 'genres_original')
PNG_PATH = os.path.join(DATA_PATH, 'images_original')

In [None]:
### Arguments
### path: path of either wav or png directory
### Returns
### dictionary of 'genre' -> number
def extract_classes(path=WAV_PATH):
    classes = {}
    i = 0
    for genre in os.listdir(path):
        classes[genre] = i
        i += 1
    return classes
CLASSES = extract_classes()

In [None]:
WAV_DURATION = 30 #seconds

In [None]:
def waveFileTransform(file_path):
    with wave.open(file_path, 'rb') as wav_file:
        print(wav_file.readframes(wav_file.getnframes()))

In [None]:


### duration: duration of each training example
### sr: sample rate of the wav file
### path: path of the wav files
def extract_wav(path=WAV_PATH, duration=2, sr=1000):
    dim = sr * duration
    samples_per_wav = WAV_DURATION // duration
    X = np.zeros((1, dim))
    Y = np.zeros((1))
    for genre in os.listdir(WAV_PATH):
        genre_path = os.path.join(WAV_PATH, genre)
        for piece in os.listdir(genre_path):
            file_path = os.path.join(genre_path, piece)
            try:
                waveFileTransform(file_path)
                x, _ = librosa.load(file_path, sr=sr)
                x = np.pad(x, (0, 30), mode='constant', constant_values=0)
                sample_x = x[0: samples_per_wav * dim].reshape(samples_per_wav, dim)
                sample_y = np.ones((samples_per_wav)) * CLASSES[genre]
                X = np.concatenate((X, sample_x))
                Y = np.concatenate((Y, sample_y))
            except:
                print("librosa load failed on " + file_path)
    return X[1:,:], Y[1:]
X, Y = extract_wav()

In [None]:
def PCA(X, out_dim=256, full_matrices=False):
    X = X - X.mean()
    X = X / X.std()
    U, S, Vh = np.linalg.svd(X)
    return np.matmul(U[:, 0:out_dim], np.diag(S[0:out_dim]))
X_PCA = PCA(X)

In [None]:
print(X.shape)
print(Y.shape)
print(X_PCA.shape)

In [None]:
BATCH_SIZE = 64

In [None]:
train_target = torch.tensor(Y) # train ist ein DF
train = torch.tensor(X)
train_tensor = TensorDataset(train, train_target)
test_tensor = data_utils.TensorDataset(train, train_target)

train_loader = DataLoader(dataset = train_tensor, batch_size = BATCH_SIZE, shuffle = True)
test_loader = DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

In [None]:
for X, y in train_loader:
    print(X.shape)
    break

In [None]:
torch.full(size=(9,), fill_value=1)

In [None]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.Softmax()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()