Figuring out how much noise to add to joint coordinates and wav files for the data augmentation

In [1]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import soundfile as sf

Mean Standard Deviation of each coordinate over studyseqs of all Conds (excluding Cond_M):

In [78]:
Conds = ['Cond_BA', 'Cond_BT', 'Cond_N', 'Cond_SA', 'Cond_SB', 'Cond_SC', 'Cond_SD', 'Cond_SE']
Studyseqs = range(1,41)
cols = pd.read_csv('Dataset/Cond_BA/motion_coords/Studyseq1_joint_coords.csv').columns
mean_stds = pd.Series(np.zeros(len(cols)), index=cols)

for cond in Conds:
    for i in Studyseqs:
        coord_path = 'Dataset/' + cond + '/motion_coords/Studyseq' + str(i) + '_joint_coords.csv'
        coords = pd.read_csv(coord_path)
        mean_stds = mean_stds + np.std(coords)
    
mean_stds = mean_stds / (len(Conds)*len(Studyseqs))

In [79]:
mean_stds   # Note that Spine_X/Y/Z positions are not 0 because the ground truth (Cond_N) has the spine moving but the synthesised does not
            # This spine position is not used for the features so it does not matter what the values are for 'Spine'

Spine_Xposition             0.712084
Spine_Yposition             0.110377
Spine_Zposition             1.662078
Spine1_Xposition            1.020507
Spine1_Yposition            0.138975
Spine1_Zposition            1.863815
Spine2_Xposition            1.350035
Spine2_Yposition            0.183486
Spine2_Zposition            2.085983
Spine3_Xposition            1.720974
Spine3_Yposition            0.239106
Spine3_Zposition            2.339472
LeftShoulder_Xposition      2.150518
LeftShoulder_Yposition      0.447093
LeftShoulder_Zposition      2.571958
LeftArm_Xposition           2.130681
LeftArm_Yposition           1.518511
LeftArm_Zposition           3.021876
LeftForeArm_Xposition       3.332970
LeftForeArm_Yposition       3.700959
LeftForeArm_Zposition       7.177045
LeftHand_Xposition          6.747509
LeftHand_Yposition         10.980588
LeftHand_Zposition          8.254881
RightShoulder_Xposition     2.150580
RightShoulder_Yposition     0.446836
RightShoulder_Zposition     2.571951
R

In [69]:
mean_stds.to_csv('Synthesised_Dataset/mean_stds_of_joint_coords.csv', header=['Mean_std'])

Mean (Mean) RMS Energy of all audio clips

In [45]:
Studyseqs = range(1,41)

mean_rms = 0
for i in Studyseqs:
    path = 'Dataset/audio_files/Studyseq' + str(i) + '.wav'
    audio, sr = librosa.load(path, sr=44100)
    mean_rms = mean_rms + np.sqrt(np.mean(audio**2))
mean_rms = mean_rms / len(Studyseqs)
print(mean_rms)

0.025897790282033383


Data augmentation of audio (adding gaussian noise) blog here: [https://medium.com/analytics-vidhya/adding-noise-to-audio-clips-5d8cee24ccb8]

In [51]:
for i in Studyseqs:
    audio_path = 'Dataset/audio_files/Studyseq' + str(i) + '.wav'
    augmented_path = 'Synthesised_Dataset/audio_files/Studyseq' + str(i+40) + '.wav'
    
    original_audio, sr = librosa.load(audio_path, sr=44100)
    white_noise = np.random.normal(0, 0.1 * mean_rms, original_audio.shape[0])
    
    augmented_audio = original_audio + white_noise
    sf.write(augmented_path, augmented_audio, samplerate=44100)

# Comparing noise addition to bvh file vs joint coordinates

In [5]:
bvh_noise = pd.read_csv('GENEA2020/BVH_evaluation/Cond_BA/bvh/StudySeq1_noisy_gauss.csv')
bvh_noise_optimised = pd.read_csv('GENEA2020/BVH_evaluation/Cond_BA/bvh/StudySeq1_noisy_gauss2.csv')
#joint_noise = pd.read_csv('Synthesised_Dataset/Cond_BA/motion_coords/StudySeq81_joint_coords.csv')
no_noise = pd.read_csv('Dataset/Cond_BA/motion_coords/StudySeq1_joint_coords.csv')
comparison = pd.concat([np.mean(np.abs(bvh_noise - no_noise)),  #np.mean(np.abs(joint_noise - no_noise)),
                        np.mean(np.abs(bvh_noise_optimised - no_noise))], axis=1)
comparison = comparison.set_axis(['bvh_noise - no_noise', 'bvh_noise_optimised - no_noise'], axis=1)
comparison

Unnamed: 0,bvh_noise - no_noise,bvh_noise_optimised - no_noise
Spine_Xposition,0.02367,0.023358
Spine_Yposition,0.022381,0.025264
Spine_Zposition,0.023919,0.023109
Spine1_Xposition,0.02528,0.024214
Spine1_Yposition,0.022378,0.025265
Spine1_Zposition,0.025639,0.024418
Spine2_Xposition,0.027584,0.026251
Spine2_Yposition,0.022418,0.025292
Spine2_Zposition,0.027848,0.027272
Spine3_Xposition,0.030538,0.029159


In [14]:
np.mean(np.abs(joint_noise - no_noise))

Spine_Xposition            0.022258
Spine_Yposition            0.022979
Spine_Zposition            0.023636
Spine1_Xposition           0.021858
Spine1_Yposition           0.024576
Spine1_Zposition           0.023630
Spine2_Xposition           0.023020
Spine2_Yposition           0.023971
Spine2_Zposition           0.022784
Spine3_Xposition           0.023335
Spine3_Yposition           0.020685
Spine3_Zposition           0.022862
LeftShoulder_Xposition     0.022335
LeftShoulder_Yposition     0.021331
LeftShoulder_Zposition     0.022370
LeftArm_Xposition          0.023298
LeftArm_Yposition          0.020863
LeftArm_Zposition          0.022215
LeftForeArm_Xposition      0.023189
LeftForeArm_Yposition      0.022991
LeftForeArm_Zposition      0.024595
LeftHand_Xposition         0.023093
LeftHand_Yposition         0.022879
LeftHand_Zposition         0.023603
RightShoulder_Xposition    0.023173
RightShoulder_Yposition    0.024507
RightShoulder_Zposition    0.022507
RightArm_Xposition         0