## Build Speech data files

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display

%matplotlib inline

In [5]:
df = pd.read_csv('data/pre-processed/audio_features.csv')
df = df[df['label'].isin([0, 1, 2, 3, 4, 5, 6, 7])]
print(df.shape)
display(df.head())

# change 7 to 2
df['label'] = df['label'].map({0: 0, 1: 1, 2: 1, 3: 2, 4: 2, 5: 3, 6: 4, 7: 5})
df.head()

(14, 10)


Unnamed: 0,wav_file,label,sig_mean,sig_std,rmse_mean,rmse_std,silence,harmonic,pitch_mean,pitch_std
0,Ses01M_impro06_F000,7,0.003579,0.006817,0.00451,0.005104,0.008969,-0.00995,18.192109,45.468871
1,Ses01M_impro06_F001,7,0.00298,0.004733,0.003723,0.002918,0.0,-0.008699,16.478085,38.853069
2,Ses01M_impro06_F002,7,0.004494,0.007062,0.005439,0.004501,0.075,-0.005462,48.581025,54.993408
3,Ses01M_impro06_F003,3,0.003673,0.005732,0.004502,0.003538,0.024896,-0.004687,38.890432,48.527822
4,Ses01M_impro06_F004,3,0.005543,0.008848,0.006684,0.005794,0.265306,-0.008176,53.939736,48.179259


Unnamed: 0,wav_file,label,sig_mean,sig_std,rmse_mean,rmse_std,silence,harmonic,pitch_mean,pitch_std
0,Ses01M_impro06_F000,5,0.003579,0.006817,0.00451,0.005104,0.008969,-0.00995,18.192109,45.468871
1,Ses01M_impro06_F001,5,0.00298,0.004733,0.003723,0.002918,0.0,-0.008699,16.478085,38.853069
2,Ses01M_impro06_F002,5,0.004494,0.007062,0.005439,0.004501,0.075,-0.005462,48.581025,54.993408
3,Ses01M_impro06_F003,2,0.003673,0.005732,0.004502,0.003538,0.024896,-0.004687,38.890432,48.527822
4,Ses01M_impro06_F004,2,0.005543,0.008848,0.006684,0.005794,0.265306,-0.008176,53.939736,48.179259


In [16]:
df.to_csv('data/no_sample_df.csv')

# oversample fear
fear_df = df[df['label']==3]
for i in range(30):
    df = df.append(fear_df)

sur_df = df[df['label']==4]
for i in range(10):
    df = df.append(sur_df)
    
df.to_csv('data/modified_df.csv')
print(df)

               wav_file  label  sig_mean   sig_std  rmse_mean  rmse_std  \
0   Ses01M_impro06_F000      5  0.127996  0.290166   0.136884  0.575309   
1   Ses01M_impro06_F001      5  0.000000  0.000000   0.000000  0.136415   
2   Ses01M_impro06_F002      5  0.323647  0.324320   0.298537  0.454117   
3   Ses01M_impro06_F003      2  0.148180  0.139165   0.135488  0.260955   
4   Ses01M_impro06_F004      2  0.547803  0.573059   0.515079  0.713667   
5   Ses01M_impro06_F005      2  0.365054  0.378607   0.319302  0.546593   
6   Ses01M_impro06_F006      2  0.044136  0.037346   0.039773  0.165892   
7   Ses01M_impro06_F007      5  0.108649  0.107585   0.105635  0.232030   
8   Ses01M_impro06_F008      5  0.305311  0.439493   0.315098  0.681283   
9   Ses01M_impro06_F009      5  1.000000  1.000000   1.000000  1.000000   
10  Ses01M_impro06_F010      5  0.420405  0.414170   0.425995  0.475164   
11  Ses01M_impro06_F011      5  0.140655  0.041050   0.134685  0.000000   
12  Ses01M_impro06_F012  

In [17]:
emotion_dict = {'ang': 0,
                'hap': 1,
                'sad': 2,
                'neu': 3,}

# emotion_dict = {'ang': 0,
#                 'hap': 1,
#                 'exc': 2,
#                 'sad': 3,
#                 'fru': 4,
#                 'fea': 5,
#                 'sur': 6,
#                 'neu': 7,
#                 'xxx': 8,
#                 'oth': 8}

scalar = MinMaxScaler()
df[df.columns[2:]] = scalar.fit_transform(df[df.columns[2:]])
df.head()

Unnamed: 0,wav_file,label,sig_mean,sig_std,rmse_mean,rmse_std,silence,harmonic,pitch_mean,pitch_std
0,Ses01M_impro06_F000,5,0.127996,0.290166,0.136884,0.575309,0.027524,0.083336,0.032965,0.409892
1,Ses01M_impro06_F001,5,0.0,0.0,0.0,0.136415,0.0,0.215062,0.0,0.0
2,Ses01M_impro06_F002,5,0.323647,0.32432,0.298537,0.454117,0.230172,0.555776,0.617424,1.0
3,Ses01M_impro06_F003,2,0.14818,0.139165,0.135488,0.260955,0.076406,0.637308,0.431048,0.599414
4,Ses01M_impro06_F004,2,0.547803,0.573059,0.515079,0.713667,0.814215,0.270099,0.720486,0.577819


In [18]:
x_train, x_test = train_test_split(df, test_size=0.20)

x_train.to_csv('data/s2e/audio_train.csv', index=False)
x_test.to_csv('data/s2e/audio_test.csv', index=False)

print(x_train.shape, x_test.shape)

(11, 10) (3, 10)


## Define preprocessing functions for text

In [19]:
import unicodedata

def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

## Build Text data files

In [24]:
import re
import os
import pickle

useful_regex = re.compile(r'^(\w+)', re.IGNORECASE)

file2transcriptions = {}

for sess in range(1, 2):
    transcripts_path = '/home/Data/IEMOCAP_session_only/Session{}/dialog/transcriptions/'.\
        format(sess)
    transcript_files = os.listdir(transcripts_path)
    transcript_files = [x for x in transcript_files if x.startswith("Ses")]
    for f in transcript_files:
        with open('{}{}'.format(transcripts_path, f), 'r') as f:
            all_lines = f.readlines()

        for l in all_lines:
            audio_code = useful_regex.match(l).group()
            transcription = l.split(':')[-1].strip()
            # assuming that all the keys would be unique and hence no `try`
            file2transcriptions[audio_code] = transcription
# save dict
with open('data/t2e/audiocode2text.pkl', 'wb') as file:
    pickle.dump(file2transcriptions, file)
len(file2transcriptions)

1821

In [25]:
audiocode2text = pickle.load(open('data/t2e/audiocode2text.pkl', 'rb'))

In [26]:
# Prepare text data
text_train = pd.DataFrame()
text_train['wav_file'] = x_train['wav_file']
text_train['label'] = x_train['label']
text_train['transcription'] = [normalizeString(audiocode2text[code]) for code in x_train['wav_file']]

text_test = pd.DataFrame()
text_test['wav_file'] = x_test['wav_file']
text_test['label'] = x_test['label']
text_test['transcription'] = [normalizeString(audiocode2text[code]) for code in x_test['wav_file']]

text_train.to_csv('data/t2e/text_train.csv', index=False)
text_test.to_csv('data/t2e/text_test.csv', index=False)

print(text_train.shape, text_test.shape)

(11, 3) (3, 3)
