# importação de bibliotecas

In [26]:
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
from scipy.signal import find_peaks, butter, filtfilt
from scipy.fft import fft, ifft
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Declaração de funções

In [2]:
def lowpass_filter(data, cutoff_freq, sample_rate, filter_order=5):
    nyquist_rate = 0.5 * sample_rate
    normalized_cutoff = cutoff_freq/ nyquist_rate
    b_coeficiente,a_coeficiente = butter(filter_order, normalized_cutoff, btype='low',analog=False)
    filtered_signal = filtfilt(b_coeficiente,a_coeficiente,data)
    return filtered_signal

def normalize_audio_int32(data):
    max_val = np.max(np.abs(data))
    normalized_data = data / max_val * 2147483647
    return normalized_data.astype(np.int32)

# Input do audio

In [3]:
sample_rate, audio_signal = wavfile.read('teste.wav')
time_axis = np.linspace(0, len(audio_signal) / sample_rate, len(audio_signal))

# Se o sinal tiver mais de um canal, selecionar apenas o primeiro canal
if len(audio_signal.shape) > 1:
    audio_signal = audio_signal[:, 0]

print(f"Sample Rate: {sample_rate}")
print(f"Audio Signal Shape: {audio_signal.shape}")
print(f"Max Value (Original Signal): {np.max(audio_signal)}")
print(f"Min Value (Original Signal): {np.min(audio_signal)}")

# Aplicar FFT ao sinal de áudio
fft_signal = fft(audio_signal)
frequencies = np.fft.fftfreq(len(audio_signal), 1 / sample_rate)

# Filtrar frequências acima de 1000 Hz
cutoff_frequency = 1000
fft_signal[np.abs(frequencies) > cutoff_frequency] = 0

# Transformar de volta ao domínio do tempo
filtered_signal = np.real(ifft(fft_signal))

# variáveis para treinamento da IA
threshold = np.mean(audio_signal) + 2 * np.std(audio_signal)
labels = (audio_signal > threshold).astype(int)
max_val = np.max(np.abs(audio_signal))
normalized_signal = audio_signal / max_val

windows_size = 100
overlap = 50
x =[]
y = []

for i in range(0,len(normalized_signal) - windows_size +1, windows_size - overlap):
    x.append(normalized_signal[i:i + windows_size])
    y.append(labels[i + windows_size//2])

X = np.array(x)
Y = np.array(y)

X_tensor = torch.tensor(X, dtype=torch.float32)
Y_tensor = torch.tensor(Y, dtype=torch.float32)

dataset = TensorDataset(X_tensor,Y_tensor)
dataLoader = DataLoader(dataset,batch_size=32, shuffle=True)

class PeakDetector(nn.Module):
    def __init__(self):
        super(PeakDetector, self).__init__()
        self.fc1 = nn.Linear(windows_size, 100) # teste com 5 fc e com 200 de de inicio deu errado
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50,10)
        self.fc4 = nn.Linear(10,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x
    
model = PeakDetector()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001) # teste com 0,0001 deu errado

num_epochs = 400
for epoch in range(num_epochs):
    for inputs, targets in dataLoader:
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Eposh {epoch+1}/{num_epochs}, loss: {loss.item()}')

with torch.no_grad():
    test_signal = normalized_signal[:windows_size]
    teste_tensor = torch.tensor(test_signal, dtype=torch.float32).unsqueeze(0)
    predicted_peak = model(teste_tensor).item()

    print(f'Probabilidade de pico na janela de teste: {predicted_peak}')

  sample_rate, audio_signal = wavfile.read('teste.wav')


Sample Rate: 96000
Audio Signal Shape: (1681979,)
Max Value (Original Signal): 0.9989989995956421
Min Value (Original Signal): -0.9989989995956421
Eposh 1/400, loss: 2.004200041483273e-06
Eposh 2/400, loss: 4.2192486944259144e-06
Eposh 3/400, loss: 2.9150794489396503e-07
Eposh 4/400, loss: 2.4866076273610815e-06
Eposh 5/400, loss: 0.0002540891873650253
Eposh 6/400, loss: 1.528683969809208e-05
Eposh 7/400, loss: 0.00017635001859162003
Eposh 8/400, loss: 2.957467870601249e-07
Eposh 9/400, loss: 1.1720426300598774e-06
Eposh 10/400, loss: 3.4242253605043516e-05
Eposh 11/400, loss: 0.00016208940360229462
Eposh 12/400, loss: 4.9534417456698066e-08
Eposh 13/400, loss: 1.601879340284995e-08
Eposh 14/400, loss: 7.0644162519784e-08
Eposh 15/400, loss: 4.627894156783441e-08
Eposh 16/400, loss: 9.57718682137454e-10
Eposh 17/400, loss: 2.286329390699393e-06
Eposh 18/400, loss: 5.560892532230355e-06
Eposh 19/400, loss: 0.12766791880130768
Eposh 20/400, loss: 2.7310366590427293e-07
Eposh 21/400, loss

In [4]:
# passar os picos detectados para um array
detected_peaks = []
with torch.no_grad():
    for i in range(len(normalized_signal) - windows_size):
        window = normalized_signal[i:i + windows_size]
        window_tensor = torch.tensor(window, dtype=torch.float32).unsqueeze(0)
        peak_prob = model(window_tensor).item()
        detected_peaks.append(peak_prob)

In [32]:
len_to_one_second = 0
for i in range(len(time_axis)):
    if time_axis[i] == 1:
        len_to_one_second = i
        break

aux = 0
tempos = []
for i in range(len(detected_peaks)):
    if detected_peaks[i] >= 0.8 and detected_peaks[i+1] >= 0.8:
        aux =+ 1
        if aux >= len_to_one_second:
            tempos.append(time_axis[i])
    elif detected_peaks[i] >= 0.8 and detected_peaks[i+1] < 0.8:
        aux = 0


In [37]:
tempos = np.int16(tempos)
teste = []

for i in range(len(tempos)):
    if tempos[i] not in teste:
        teste.append(tempos[i])

aux = []
for i in range(len(teste) -1):
    if teste[i] == teste[i+1] -1:
        aux.append(teste[i])

print(aux)
print(teste)


[1, 2, 3, 4, 11, 12, 13, 14]
[1, 2, 3, 4, 5, 11, 12, 13, 14, 15]
