In [15]:
import librosa
import os
import pickle
import numpy as np
import pandas as pd
import random
from tqdm import tqdm

from preprocessing import perform_vad, resample_wav, padding, cut_wav_into_clips, extract_features


# Feature extraction

In [9]:
wav_dir = "train\\audio"

## `unknown` detection / `label` classification

### Training part

In [10]:
labels_path = 'train\\testing_list.txt'
with open(labels_path, "r") as f:
    lines = f.readlines()
file_paths_train = [line.strip() for line in lines]


In [11]:
features_train = []

for i, file in tqdm(enumerate(file_paths_train), total=len(file_paths_train), leave=True):
    wav_file = os.path.join(dir,file)
    wav_file2 = 'working_sample.wav'
    label = file.split('/')[0]

    # Preprocess the data  
    perform_vad(wav_file, wav_file2)
    padding(wav_file2, wav_file2, 1000)
    resample_wav(wav_file2, wav_file2, 8000)

    # Extract features
    features = extract_features(wav_file2)

    # Add to the list
    features_train.append([features, label])

  1%|          | 73/6835 [00:03<05:46, 19.50it/s]


KeyboardInterrupt: 

In [15]:
with open('extracted_features\\features_training.pkl', 'wb') as f:
    pickle.dump(features_train, f)

### Validation part

In [12]:
labels_path = 'train\\validation_list.txt'
with open(labels_path, "r") as f:
    lines = f.readlines()
file_paths_val = [line.strip() for line in lines]

In [13]:
features_val = []

for i, file in tqdm(enumerate(file_paths_val), total=len(file_paths_val), leave=True):
    wav_file = os.path.join(dir,file)
    wav_file2 = 'working_sample.wav'
    label = file.split('/')[0]

    # Preprocess the data  
    perform_vad(wav_file, wav_file2)
    padding(wav_file2, wav_file2, 1000)
    resample_wav(wav_file2, wav_file2, 8000)

    # Extract features
    features = extract_features(wav_file2)

    # Add to the list
    features_train.append([features, label])

  1%|          | 69/6798 [00:04<06:36, 16.96it/s]


KeyboardInterrupt: 

In [4]:
with open('extracted_features\\features_validation.pkl', 'wb') as f:
    pickle.dump(features_val, f)

## `silence` detection

### Silence clips list creation

In [None]:
input_folder = "train\\audio\\_background_noise_"
output_folder = "train\\audio\\silence"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        cut_wav_into_clips(f"{input_folder}\\{filename}", output_folder, filename, clip_duration_ms=1000)

In [23]:
dir_path = "train\\audio\\silence"

file_list = os.listdir(dir_path)

random.shuffle(file_list)

# Split the list into two parts
split_index = int(len(file_list) * 0.5)
part1 = file_list[:split_index]
part2 = file_list[split_index:]

# Define the output file paths
file1_path = "train\\silence_testing_list.txt"
file2_path = "train\\silence_validation_list.txt"

# Write the paths to the files to the output text files
with open(file1_path, "w") as f1:
    for file in part1:
        f1.write("silence/" + file + "\n")

with open(file2_path, "w") as f2:
    for file in part2:
        f2.write("silence/" + file + "\n")

### Training part

In [34]:
with open('train\\testing_list.txt', 'r') as file1, open('train\\silence_testing_list.txt', 'r') as file2, open('combined.txt', 'w') as outfile:
    lines = file1.readlines() + file2.readlines()
file_paths_train_silence = [line.strip() for line in lines]

In [35]:
features_train_silence = []

for i, file in tqdm(enumerate(file_paths_train_silence), total=len(file_paths_train_silence), leave=True):
    wav_file = os.path.join(dir,file)
    wav_file2 = 'working_sample.wav'
    label = file.split('/')[0]

    # Preprocess the data  (without VAD)
    padding(wav_file, wav_file2, 1000)
    resample_wav(wav_file2, wav_file2, 8000)

    # Extract features
    features = extract_features(wav_file2)

    # Add to the list
    features_train_silence.append([features, label])

100%|██████████| 7036/7036 [07:15<00:00, 16.14it/s]


In [36]:
with open('extracted_features\\silence_detection_training.pkl', 'wb') as f:
    pickle.dump(features_train_silence, f)

### Validation part

In [37]:
with open('train\\validation_list.txt', 'r') as file1, open('train\\silence_validation_list.txt', 'r') as file2:
    lines = file1.readlines() + file2.readlines()
file_paths_val_silence = [line.strip() for line in lines]

In [38]:
features_val_silence = []

for i, file in tqdm(enumerate(file_paths_val_silence), total=len(file_paths_val_silence), leave=True):
    wav_file = os.path.join(dir,file)
    wav_file2 = 'working_sample.wav'
    label = file.split('/')[0]

    # Preprocess the data  (without VAD)
    padding(wav_file, wav_file2, 1000)
    resample_wav(wav_file2, wav_file2, 8000)

    # Extract features
    features = extract_features(wav_file2)

    # Add to the list
    features_val_silence.append([features, label])

100%|██████████| 6999/6999 [07:52<00:00, 14.81it/s]


In [39]:
with open('extracted_features\\silence_detection_validation.pkl', 'wb') as f:
    pickle.dump(features_val_silence, f)