In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
column_names = ["audio", "text", "emotion"]
speakers = ["11", "12", "13", "14", "15", "16", "17", "18", "19", "20"]
# speakers = ["19"]
all_emotions = ["Neutral", "Surprise", "Happy", "Sad", "Angry"]
df = pd.DataFrame()

In [3]:
def convert_to_vector(emotion):
    base_vector = [0.0] * len(all_emotions)
    index = all_emotions.index(emotion)
    base_vector[index] = 1.0

    return base_vector

In [4]:
def rename_wav(audio):
    return audio[:4] + "/" + audio + ".wav"

In [5]:
for i in speakers:
    tmp_df = pd.read_csv(
        f"/home/xzodia/dev/emo-gan/data/ESD/00{i}/00{i}.txt",
        sep="\t",
        names=column_names,
    )

    tmp_df["emotion"] = tmp_df["emotion"].apply(convert_to_vector)
    tmp_df = tmp_df[["text", "emotion", "audio"]]
    tmp_df["audio"] = tmp_df["audio"].apply(rename_wav)
    tmp_df["speaker"] = int(i)

    df = pd.concat([df, tmp_df], axis=0).reset_index(drop=True)

In [6]:
df.head()

Unnamed: 0,text,emotion,audio,speaker
0,"The nine the eggs, I keep.","[1.0, 0.0, 0.0, 0.0, 0.0]",0011/0011_000001.wav,11
1,"I did go, and made many prisoners.","[1.0, 0.0, 0.0, 0.0, 0.0]",0011/0011_000002.wav,11
2,That I owe my thanks to you.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0011/0011_000003.wav,11
3,They went up to the dark mass job had pointed ...,"[1.0, 0.0, 0.0, 0.0, 0.0]",0011/0011_000004.wav,11
4,Clear than clear water!,"[1.0, 0.0, 0.0, 0.0, 0.0]",0011/0011_000005.wav,11


In [7]:
emotions_count = df["emotion"].value_counts()
print(emotions_count)

emotion
[1.0, 0.0, 0.0, 0.0, 0.0]    3500
[0.0, 0.0, 0.0, 0.0, 1.0]    3500
[0.0, 0.0, 1.0, 0.0, 0.0]    3500
[0.0, 0.0, 0.0, 1.0, 0.0]    3500
[0.0, 1.0, 0.0, 0.0, 0.0]    3500
Name: count, dtype: int64


In [8]:
train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42)

valid_df.rename(columns={"speaker": "augment"}, inplace=True)
valid_df["augment"] = 0
valid_df.to_csv("./data/ESD/validation.csv", index=False, header=False, sep="|")

In [9]:
train_df.rename(columns={'speaker': 'augment'}, inplace=True)
train_df["augment"] = 0
print(train_df.shape)
train_df.head()

(14000, 4)


Unnamed: 0,text,emotion,audio,augment
8762,I have bunburyed all over goat on two separate...,"[1.0, 0.0, 0.0, 0.0, 0.0]",0016/0016_000013.wav,0
7540,I make her quite nervy at times.,"[0.0, 0.0, 0.0, 0.0, 1.0]",0015/0015_000541.wav,0
12270,As rich as Peter's son in law!,"[1.0, 0.0, 0.0, 0.0, 0.0]",0018/0018_000021.wav,0
10708,A raging fire was in his eyes.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0017/0017_000209.wav,0
3646,And there you'll find a snap dragon fly.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0013/0013_000147.wav,0


In [10]:
train_df_copy = train_df.copy()
train_df_copy["augment"] = 1
train_df = pd.concat([train_df, train_df_copy], axis=0).reset_index(drop=True)
train_df.head()

Unnamed: 0,text,emotion,audio,augment
0,I have bunburyed all over goat on two separate...,"[1.0, 0.0, 0.0, 0.0, 0.0]",0016/0016_000013.wav,0
1,I make her quite nervy at times.,"[0.0, 0.0, 0.0, 0.0, 1.0]",0015/0015_000541.wav,0
2,As rich as Peter's son in law!,"[1.0, 0.0, 0.0, 0.0, 0.0]",0018/0018_000021.wav,0
3,A raging fire was in his eyes.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0017/0017_000209.wav,0
4,And there you'll find a snap dragon fly.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0013/0013_000147.wav,0


In [11]:
train_df.to_csv("./data/ESD/training.csv", index=False, header=False, sep="|")
print(train_df.shape)

(28000, 4)


In [12]:
train_df = pd.concat([train_df, valid_df], axis=0).reset_index(drop=True)
train_df = train_df[train_df["augment"] == 0]
train_df.rename(columns={'augment': 'speaker'}, inplace=True)
train_df["speaker"] = train_df["audio"].apply(lambda x: int(x[2:4]))
train_df.head()

Unnamed: 0,text,emotion,audio,speaker
0,I have bunburyed all over goat on two separate...,"[1.0, 0.0, 0.0, 0.0, 0.0]",0016/0016_000013.wav,16
1,I make her quite nervy at times.,"[0.0, 0.0, 0.0, 0.0, 1.0]",0015/0015_000541.wav,15
2,As rich as Peter's son in law!,"[1.0, 0.0, 0.0, 0.0, 0.0]",0018/0018_000021.wav,18
3,A raging fire was in his eyes.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0017/0017_000209.wav,17
4,And there you'll find a snap dragon fly.,"[1.0, 0.0, 0.0, 0.0, 0.0]",0013/0013_000147.wav,13


In [13]:
surprise_index = train_df[train_df["emotion"].apply(lambda x: x[1] == 1.0)].index
mixed_records = []

for i in surprise_index:
    text = train_df.loc[i, "text"]
    speaker = train_df.loc[i, "speaker"]
    audio1 = train_df.loc[i, "audio"]

    matches = train_df[
        (train_df["text"] == text)
        & (train_df["speaker"] == speaker)
        & (
            train_df["emotion"].apply(
                lambda x: x[2] == 1.0 or x[3] == 1.0 or x[4] == 1.0
            )
        )
    ]

    for _, row in matches.iterrows():
        emotion_vector = [0.0, 1.0] + row["emotion"][2:]
        new_row = {
            "text": text,
            "emotion": emotion_vector,
            "audio1": audio1,
            "audio2": row["audio"],
        }
        mixed_records.append(new_row)

mixed_df = pd.DataFrame(mixed_records)

In [14]:
print(mixed_df.shape)
mixed_df.head()

(10274, 4)


Unnamed: 0,text,emotion,audio1,audio2
0,"Chapter eighteen, the return journey.","[0.0, 1.0, 0.0, 0.0, 1.0]",0014/0014_001684.wav,0014/0014_000634.wav
1,"Chapter eighteen, the return journey.","[0.0, 1.0, 0.0, 1.0, 0.0]",0014/0014_001684.wav,0014/0014_001334.wav
2,"Chapter eighteen, the return journey.","[0.0, 1.0, 1.0, 0.0, 0.0]",0014/0014_001684.wav,0014/0014_000984.wav
3,I chose the right way.,"[0.0, 1.0, 1.0, 0.0, 0.0]",0020/0020_001441.wav,0020/0020_000741.wav
4,I chose the right way.,"[0.0, 1.0, 0.0, 0.0, 1.0]",0020/0020_001441.wav,0020/0020_000391.wav


In [15]:
mixed_train_df, mixed_valid_df = train_test_split(mixed_df, test_size=0.2, random_state=42)

mixed_train_df.to_csv("./data/ESD/mixed_training.csv", index=False, header=False, sep="|")
mixed_valid_df.to_csv("./data/ESD/mixed_validation.csv", index=False, header=False, sep="|")