In [None]:
!pip install librosa



In [None]:
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
import torch as T
import torch.nn as nn
from torch.utils import data
import torch.optim as optim
import random
import time
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
class audio_train(data.Dataset):

    def __init__(self, n=100):
        self.n = n

    def __len__(self):
        return self.n 

    def __getitem__(self, idx):

        filename = random.randint(1, 100) #파일의 개수에 따라 변동

        #filename = '인생의 회전목마(무잡음)' #내가 지정한 파일로 학습하깅

        self.filename = '/content/drive/MyDrive/FreeResearch/mp3/{0}.mp3'.format(filename)
        offset = random.uniform(1, 13)   #노래 길이에 따라 변동 가능
        audio_data, sample_rate = librosa.load(self.filename, sr=22050, mono=True, offset=offset, duration=1, res_type='kaiser_best')

        noisename = random.randint(1, 18) #파일의 개수에 따라 변동

        #noisename = '' #이거도 내가 지정한 파일로 학습 가능

        self.noisename = '/content/drive/MyDrive/FreeResearch/noise/{0}.m4a'.format(noisename)
        offset_n = random.randrange(1, 300)   #잡음 길이에 따라 변동 가능
        noise_data, sample_rate = librosa.load(self.noisename, sr=22050, mono=True, offset=offset_n, duration=1, res_type='kaiser_best')

        #잡음 크기 조절도 넣어야함

        noised_audio = audio_data + noise_data

        return noised_audio, audio_data

In [None]:
class Network_Model(nn.Module):
    def __init__(self, length=1):

        super().__init__() #부모 클래스의 __init__ 함수 실행
        self.length = length * 22050

        self.fe = nn.Sequential(
                nn.Linear(self.length, 7000), # Encoding
                nn.ReLU(),
                nn.Linear(7000, 5000),        #
                nn.ReLU(),
                nn.Linear(5000, 3000),         #
                nn.ReLU(),
                nn.Linear(3000, 2000),         #
                nn.ReLU(),
                nn.Linear(2000, 1000),         #
                nn.ReLU(),
                nn.Linear(1000, 300),         #
                nn.ReLU(),
                nn.Linear(300, 1000),         #
                nn.ReLU(),
                nn.Linear(1000, 2000),         #
                nn.ReLU(),
                nn.Linear(2000, 3000),         #
                nn.ReLU(),
                nn.Linear(3000, 5000),         # Decoding
                nn.ReLU(),
                nn.Linear(5000, 7000),        #
                nn.ReLU(),
                nn.Linear(7000, self.length)  #
        )

    def forward(self, x):

        x = self.fe(x)
        x = x.view(self.length,) #1차원 벡터

        return x

In [None]:
def loss(result, answer):

    loss = T.sum((result-answer)**2, dim = 1) #dim을 0으로 해야할까 1로 해야할까...
    loss = T.mean(loss)

    return loss

In [None]:
import warnings

warnings.filterwarnings(action = 'ignore')

In [None]:
def train():
    
    device = 'cuda'
    model = Network_Model().to(device)

    learning_rate = 1e-3
    sample_rate = 22050

    trainsets = audio_train()
    trainloader = data.DataLoader(trainsets, batch_size=1, shuffle=True)

    opt = optim.Adam(model.parameters(), lr = learning_rate)
    model.train(True)

    max_epoch = 100

    tt = time.time()

    for epoch in range(1, max_epoch+1):
        los = []
        for i, (noised_audio, audio_data) in enumerate(trainloader):
            noised_audio = T.as_tensor(noised_audio,dtype=T.float32, device=device)   #이렇게 하면 train()시에 warning이 안 뜸
            audio_data = T.as_tensor(audio_data,dtype=T.float32, device=device)

            X_audio = model(noised_audio)

            losss = loss(X_audio, audio_data)
            opt.zero_grad()
            losss.backward()
            opt.step()

            los.append(losss.item())

        if epoch%1 == 0:

            print()

            ls = np.mean(los)
            text = f' epoch: {epoch:4d}/{max_epoch:4d} | loss:{ls:0.4f}'
            print(text)
            print()
            print('time : %0.2fs' %time.time()-tt)
            print('learning rate : %0.4f' %learning_rate)
            print()
            
            learning_rate *= 0.93 #학습이 진행되면서 점점 세밀하게 이동하도록 설정
  
            #if epoch == 10:
            #    learning_rate = 1e-4

            print()

    global result
    global clean

    X_audio = X_audio.to('cpu')
    result = X_audio.detach().numpy()

    audio_data = audio_data.to('cpu')
    audio_data = audio_data.detach().numpy()
    clean = audio_data.reshape((22050,))

    PATH = '/content/drive/MyDrive/model'
    T.save(model.state_dict(), PATH)

In [None]:
train()

RuntimeError: ignored

In [None]:
(X_audio, audio_data) = train()

In [None]:
ipd.Audio(X_audio, rate = 22050)

In [None]:
ipd.Audio(audio_data, rate = 22050)

In [None]:
class noise_canceler:

    def __init__(self, filename):
        self.filename = '{0}.m4a'.format(filename)

        plt.figure(figsize=(15,4))

    def noise_canceling(self):
        global model

        data_noise, sample_rate = librosa.load(self.filename, sr=22050, mono=True, offset=0.0, duration=50, res_type='kaiser_best') #길이 조정좀여
        #sample_rate : sr, offset : 음악의 시작 위치, duration : 보여줄 길이 

        librosa.display.waveplot(data_noise ,sr=22050, max_points=50000.0, x_axis='time', offset=0.0, max_sr=1000)

        data_clean = model(data_noise)

        librosa.display.waveplot(data_clean ,sr=22050, max_points=50000.0, x_axis='time', offset=0.0, max_sr=1000)

        sf.write('{0}_unnoise.wav'.format(self.filename), data_clean, sample_rate, subtype='PCM_24') #좀 기다려야함