In [25]:
from pathlib import Path
import librosa

# Analyze trimmed wavs

In [26]:
trimmed_files = Path(".").joinpath("trimmed_files")
print(f"trimmed files ... {len(list(trimmed_files.rglob("*.wav")))}")
original_files = Path(".").joinpath("datasets")
print(f"original files ... {len(list(original_files.glob("**/*/*.wav")))}")

trimmed files ... 2041
original files ... 2041


In [27]:
durations = []
for audio_file in trimmed_files.glob("**/*/*.wav"):
    audio_data, sampling_rate = librosa.load(audio_file, sr=None)
    duration = librosa.get_duration(y=audio_data, sr=sampling_rate)
    durations.append(duration)
print(f"Shortest trimmed audio file: {min(durations)}")

durations = []
for audio_file in original_files.glob("**/*/*.wav"):
    audio_data, sampling_rate = librosa.load(audio_file, sr=None)
    duration = librosa.get_duration(y=audio_data, sr=sampling_rate)
    durations.append(duration)
print(f"Shortest original audio file: {min(durations)}")


Shortest trimmed audio file: 0.273
Shortest original audio file: 0.3933


### Get time difference between trimmed and original

In [28]:
trimmed_files = sorted(trimmed_files.glob("**/*/*.wav"))
original_files = sorted(original_files.glob("**/*/*.wav"))
differences = []
for trimmed, original in zip(trimmed_files, original_files):
    audio_data_trimmed, sampling_rate = librosa.load(trimmed, sr=None)
    trimmed_len = librosa.get_duration(y=audio_data_trimmed, sr=sampling_rate)
    
    audio_data_original, sampling_rate = librosa.load(original, sr=None)
    original_len = librosa.get_duration(y=audio_data_original, sr=sampling_rate)
    differences.append(original_len-trimmed_len)

print(f"Max diff: {max(differences)}, min diff: {min(differences)}")

Max diff: 0.21183999999999997, min diff: 0.0


In [4]:
import pandas as pd
import numpy as np

def compute_statistics(file_path):
    # Load the CSV file
    data = pd.read_csv(file_path)
    
    # Assuming the second column is at index 1
    second_column = data.iloc[:, 1]
    
    # Calculate the average (mean)
    average = np.mean(second_column)
    
    # Calculate the standard deviation
    std_deviation = np.std(second_column)
    
    return average, std_deviation

# Example usage
file_path = 'results/290511d6-4bec-4915-b90b-10d0ab5cb884/results.csv'  # Replace with your CSV file path
average, std_deviation = compute_statistics(file_path)
print(f"Average: {average}")
print(f"Standard Deviation: {std_deviation}")


Average: 0.8444983362019506
Standard Deviation: 0.004936863129041041


Check librosa load eefect on results


In [10]:
import pandas as pd

def compute_stats(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Exclude the first column
    df_excluded = df.iloc[:, 1:]
    
    # Compute mean and standard deviation for each column
    means = df_excluded.mean()
    std_devs = df_excluded.std()
    
    results = {}
    # Print the results
    for col in df_excluded.columns:
        print(f"Column: {col}")
        print(f"Mean: {means[col]}")
        print(f"Standard Deviation: {std_devs[col]}")
        

# Example usage
file_path = 'results/8c7956e8-6725-48fc-95a8-1fd5a7753ee6/results.csv'  # Replace with your CSV file path
compute_stats(file_path)

data = pd.read_csv(file_path)

# Get the second column (index 1)
second_column = data.iloc[:, 1]

# Get the indices of the 10 highest values from the second column
top_10_indices = second_column.nlargest(10).index

# Get the entire rows for the 10 highest values
top_10_rows = data.loc[top_10_indices]
pd.set_option("display.max_colwidth", None)
# Print the 10 highest value rows
print("\nRows with the 10 highest values from the second column:\n", top_10_rows["params"])

Column: mean_test_accuracy
Mean: 0.832396528973035
Standard Deviation: 0.004756109074516514

Column: mean_test_recall
Mean: 0.819455737704918
Standard Deviation: 0.005852096080797522

Column: mean_test_specificity
Mean: 0.8669211462450591
Standard Deviation: 0.008891936101145092


Rows with the 10 highest values from the second column:
 64     {'classifier__C': 2600, 'classifier__degree': 5, 'classifier__gamma': 'auto', 'classifier__kernel': 'poly'}
898    {'classifier__C': 2600, 'classifier__degree': 5, 'classifier__gamma': 'auto', 'classifier__kernel': 'poly'}
109    {'classifier__C': 2600, 'classifier__degree': 5, 'classifier__gamma': 'auto', 'classifier__kernel': 'poly'}
333    {'classifier__C': 2600, 'classifier__degree': 5, 'classifier__gamma': 'auto', 'classifier__kernel': 'poly'}
691    {'classifier__C': 2600, 'classifier__degree': 5, 'classifier__gamma': 'auto', 'classifier__kernel': 'poly'}
104    {'classifier__C': 2600, 'classifier__degree': 5, 'classifier__gamma': 'auto', '