In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ks_2samp
import git
from pathlib import Path
import warnings
import os
import matplotlib.pyplot as plt
import seaborn as sns
ROOT_DIR = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
os.chdir(os.path.join(ROOT_DIR, "utilities"))
from transform_audio import *
warnings.filterwarnings("ignore", category=UserWarning, module='numpy.lib.npyio')

## Choose data file

In [2]:
transform = 'erb' #['fft', 'erb', 'stft', 'cwt']
group = 'statement' #['gender', 'emotional_intensity', 'statement']
BAND = 'raw' #['geo','raw']
SUBSAMPLE_SIZE = int(1e5) #np.inf if no subsampling
COMPRESSION = "full" if SUBSAMPLE_SIZE == np.inf else f"{SUBSAMPLE_SIZE:.0e}".replace("+0", "")

FULL_DATA_NAME = 'ravdess_oriPitch_' + transform + '_' + group
transformed_data = pd.read_pickle(os.path.join(ROOT_DIR, "transformed-data-audio", "raw-transformed-data", f'{FULL_DATA_NAME}.pickle'))
groups = list(transformed_data['Original-Pitch'].keys())
BAND_TYPE = 'raw-transformed-data'

## Symmetrize data and sort data

In [3]:
for i in transformed_data['Original-Pitch'][groups[0]]: #bands
    for j in transformed_data['Original-Pitch'][groups[0]][i]: #real, imag
        clone = transformed_data['Original-Pitch'][groups[0]][i][j]
        clone = np.abs(clone)
        clone = np.concatenate([-clone, clone])
        clone = np.sort(clone)
        transformed_data['Original-Pitch'][groups[0]][i][j] = clone

In [4]:
for i in transformed_data['Original-Pitch'][groups[1]]: #bands
    for j in transformed_data['Original-Pitch'][groups[1]][i]: #real, imag
        clone = transformed_data['Original-Pitch'][groups[1]][i][j]
        clone = np.abs(clone)
        clone = np.concatenate([-clone, clone])
        clone = np.sort(clone)
        transformed_data['Original-Pitch'][groups[1]][i][j] = clone

## Symmetrize and sort complex data

In [5]:
group_0_complex_coeffs = {}
counter = 0

for i in transformed_data['Original-Pitch'][groups[0]]:
    real_part = transformed_data['Original-Pitch'][groups[0]][i]['real']
    imag_part = transformed_data['Original-Pitch'][groups[0]][i]['imag']
    clone = np.concatenate((real_part, imag_part))
    clone = np.sort(clone)
    group_0_complex_coeffs[counter] = clone
    counter += 1

In [6]:
group_1_complex_coeffs = {}
counter = 0

for i in transformed_data['Original-Pitch'][groups[1]]:
    real_part = transformed_data['Original-Pitch'][groups[1]][i]['real']
    imag_part = transformed_data['Original-Pitch'][groups[1]][i]['imag']
    clone = np.concatenate((real_part, imag_part))
    clone = np.sort(clone)
    group_1_complex_coeffs[counter] = clone
    counter += 1

## Subsample coefficients per group

In [7]:
group_0_real = {}
group_0_real_size = {}
counter = 0

for i in transformed_data['Original-Pitch'][groups[0]]:
    coeffs = transformed_data['Original-Pitch'][groups[0]][i]['real']
    group_0_real_size[counter] = len(coeffs) // 2
    if len(coeffs) > SUBSAMPLE_SIZE:
        idxs = np.linspace(0, len(coeffs) - 1, SUBSAMPLE_SIZE, dtype=int)
        clone = coeffs[idxs]
    else:
        clone = coeffs
    group_0_real[counter] = clone
    counter += 1


In [8]:
group_0_imag = {}
group_0_imag_size = {}
counter = 0

for i in transformed_data['Original-Pitch'][groups[0]]:
    coeffs = transformed_data['Original-Pitch'][groups[0]][i]['imag']
    group_0_imag_size[counter] = len(coeffs) // 2
    if len(coeffs) > SUBSAMPLE_SIZE:
        idxs = np.linspace(0, len(coeffs) - 1, SUBSAMPLE_SIZE, dtype=int)
        clone = coeffs[idxs]
    else:
        clone = coeffs
    group_0_imag[counter] = clone
    counter += 1


In [9]:
group_0_complex = {}
group_0_complex_size = {}
counter = 0

for i in group_0_complex_coeffs:
    coeffs = group_0_complex_coeffs[i]
    group_0_complex_size[counter] = len(coeffs) // 2
    if len(coeffs) > SUBSAMPLE_SIZE:
        idxs = np.linspace(0, len(coeffs) - 1, SUBSAMPLE_SIZE, dtype=int)
        clone = coeffs[idxs]
    else:
        clone = coeffs
    group_0_complex[counter] = clone
    counter += 1

In [10]:
group_1_real = {}
group_1_real_size = {}
counter = 0

for i in transformed_data['Original-Pitch'][groups[1]]:
    coeffs = transformed_data['Original-Pitch'][groups[1]][i]['real']
    group_1_real_size[counter] = len(coeffs) // 2
    if len(coeffs) > SUBSAMPLE_SIZE:
        idxs = np.linspace(0, len(coeffs) - 1, SUBSAMPLE_SIZE, dtype=int)
        clone = coeffs[idxs]
    else:
        clone = coeffs
    group_1_real[counter] = clone
    counter += 1


In [11]:
group_1_imag = {}
group_1_imag_size = {}
counter = 0

for i in transformed_data['Original-Pitch'][groups[1]]:
    coeffs = transformed_data['Original-Pitch'][groups[1]][i]['imag']
    group_1_imag_size[counter] = len(coeffs) // 2
    if len(coeffs) > SUBSAMPLE_SIZE:
        idxs = np.linspace(0, len(coeffs) - 1, SUBSAMPLE_SIZE, dtype=int)
        clone = coeffs[idxs]
    else:
        clone = coeffs
    group_1_imag[counter] = clone
    counter += 1


In [12]:
group_1_complex = {}
group_1_complex_size = {}
counter = 0

for i in group_1_complex_coeffs:
    coeffs = group_1_complex_coeffs[i]
    group_1_complex_size[counter] = len(coeffs) // 2
    if len(coeffs) > SUBSAMPLE_SIZE:
        idxs = np.linspace(0, len(coeffs) - 1, SUBSAMPLE_SIZE, dtype=int)
        clone = coeffs[idxs]
    else:
        clone = coeffs
    group_1_complex[counter] = clone
    counter += 1

## Export to pickle

In [13]:
pd.to_pickle(group_0_complex, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}comp-{transform}-{groups[0]}.pickle"))
pd.to_pickle(group_0_real, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}re-{transform}-{groups[0]}.pickle"))
pd.to_pickle(group_0_imag, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}im-{transform}-{groups[0]}.pickle"))
pd.to_pickle(group_1_complex, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}comp-{transform}-{groups[1]}.pickle"))
pd.to_pickle(group_1_real, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}re-{transform}-{groups[1]}.pickle"))
pd.to_pickle(group_1_imag, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}im-{transform}-{groups[1]}.pickle"))

In [14]:
pd.to_pickle(group_0_complex_size, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}comp-{transform}-{groups[0]}-size.pickle"))
pd.to_pickle(group_0_real_size, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}re-{transform}-{groups[0]}-size.pickle"))
pd.to_pickle(group_0_imag_size, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}im-{transform}-{groups[0]}-size.pickle"))
pd.to_pickle(group_1_complex_size, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}comp-{transform}-{groups[1]}-size.pickle"))
pd.to_pickle(group_1_real_size, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}re-{transform}-{groups[1]}-size.pickle"))
pd.to_pickle(group_1_imag_size, os.path.join(ROOT_DIR, "transformed-data-audio", "subsample-data", f"ravdess-{COMPRESSION}{BAND}im-{transform}-{groups[1]}-size.pickle"))