# Automated Functionalities 


Below code  is an automated code that filters out the audio files into folders of each by checking the path.json file . It also changes the audio file name to  a unique id using the uuid 

In [4]:
# import json
# import shutil
# import uuid
# import os

# with open('media/path.json', 'r') as meta_data:
#     data = json.load(meta_data)
#     for item in data :
#         file_name = uuid.uuid4()
#         shutil.copy(item["audio"],  f'traffic_dataset/{["idle", "light", "medium", "heavy"][item["density"]]}/{file_name}.wav')

## Slice Files 
- Below code can be used to slice files into definend duration , to a specific class folder 
- In the input folder `BASE_INPUT_DIR` keep only files of a particular class and put the class name in the `CLASS_NAME` varibale. Files currently in the folder will be splitted down and store into a folder with the class name in the `BASE_OUTPUT_DIR`
- Clear and load the `BASE_INPUT_DIR` with new files and also change the `CLASS_NAME` accodingly 
- **\*\*\*\*TRY NOT TO RUN THE CODE MULTIPLE TIMES WITH SAME INPUT FILES\*\*\*\*** - it will create multiple files of the same audio 

In [8]:
import os
import librosa
import soundfile as sf
import uuid
import shutil

def slice_files(BASE_INPUT_DIR, BASE_OUTPUT_DIR, SLICE_LENGHT=5):
    # list files in the input directory 
    files = [f for f in os.listdir(BASE_INPUT_DIR) if f.endswith('.wav')]
    # create a folder in the name of class if it is not present 
    if not os.path.exists(f"{BASE_OUTPUT_DIR}"):
        os.mkdir(f"{BASE_OUTPUT_DIR}")
    # Create a backup folder for clean up if it is not present 
    if not os.path.exists("backup"):
        os.mkdir("backup")

    # slice
    for file in files:
        data,sr = librosa.load(f"{BASE_INPUT_DIR}/{file}")
        if 'idle' in file:
            CLASS_NAME = 'idle'
        elif 'light' in file:
            CLASS_NAME = 'light'
        elif 'medium' in file:
            CLASS_NAME = 'medium'
        elif 'heavy' in file:
            CLASS_NAME = 'heavy'
        else:
            continue

        for i in range(0, len(data), (sr*SLICE_LENGHT)):
            slice = data[i:(i+(sr*SLICE_LENGHT))]
            file_name = uuid.uuid4() # create a unque id for the files 
            sf.write(f"{BASE_OUTPUT_DIR}/{CLASS_NAME}/{file_name}.wav", slice, sr)
        
        # clean up the processed files to avoid uncessessory errors 
        shutil.move(f"{BASE_INPUT_DIR}/{file}", "backup")


if __name__ == "__main__":

    BASE_INPUT_DIR = "../full_audio_files" # audio files that are not processed
    BASE_OUTPUT_DIR = "traffic_dataset" # output base path 
    # CLASS_NAME = "medium" 
    SLICE_LENGHT = 5 # define the slice duration in seconds
    slice_files(BASE_INPUT_DIR, BASE_OUTPUT_DIR, SLICE_LENGHT)

## Importing Model For testing

In [9]:
import librosa
import numpy as np
def extract_features(y, sr):
    
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    zcr = librosa.feature.zero_crossing_rate(y)[0]
    rmse = librosa.feature.rms(y=y)[0]
    
    features = np.hstack((np.mean(mfccs, axis=1), 
                          np.mean(spectral_centroid), 
                          np.mean(spectral_rolloff), 
                          np.mean(zcr), 
                          np.mean(rmse)))
    return features


In [16]:
import pickle

file_path = "../full_audio_files/test/light, main gate  22 Mar, 10.12 AM.wav"
with open('model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

with open("label_encoder.pkl", 'rb') as file :
    loaded_label = pickle.load(file)

# extract features of input file to predict 
test_data = []
data, sr = librosa.load(file_path, sr=8000)
for i in range(0, len(data), (sr*5)):
    test_data.append(extract_features(data[i:(i+(sr*5))], sr))

# prediction
preds = loaded_model.predict(test_data)
aggregate_class_index = round(np.mean(preds))
forecast = loaded_label.inverse_transform([aggregate_class_index])[0]
forecast

'medium'