In [6]:
import os
import glob
import random
import soundfile as sf
import shutil
import copy
from collections import defaultdict
import pickle

## Helper function to generate file list for Tacotron2

In [7]:
def emo_wav_generate(emotdir):
    """
    this function return a dictionary where the key is the wavname and value is the extracted emotion.
    each item is a utterance sample
    """
    emot_map = {}
    with open(emotdir, 'r') as file:
        while True:
            line = file.readline()
            if not line:
                break
            if line[0] == '[':
                t = line.split()
                emot_map[t[3]] = t[4]
    return emot_map


def generate_label(emotion): 
    if emotion == 'ang':
        return emotion
    elif emotion == 'sad': 
        return 'sad'
    elif emotion == 'hap' or emotion == 'exc':
        return 'hap'
    elif emotion == 'neu':
        return emotion
    elif emotion == 'fru':
        return emotion
    else: # This could be SURPRISED, FEARFUL, FRUSTRATED, DISGUSTED and OTHER
        return 'something'

In [8]:
session_list = ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']
rootdir = '/data3/IEMOCAP/IEMOCAP_full_release_spec'
emotion_path_dict = defaultdict(list)
path_emotion_dict = defaultdict(list)
for session in session_list:
    wav_dir = os.path.join(rootdir, session, 'sentences/wav/*')
    trans_dir = os.path.join(rootdir, session, 'dialog/transcriptions')
    for sess_path in sorted(glob.glob(wav_dir)):
        emoevl = '/'.join(sess_path.split('/')[:-3])
        emoevl = os.path.join(emoevl, 'dialog', 'EmoEvaluation') #/data3/IEMOCAP/IEMOCAP_full_release/Session4/dialog/EmoEvaluation
        sess = sess_path.split('/')[-1]
        emotdir = emoevl + '/' + sess + '.txt'
        emotdir = emotdir.replace('IEMOCAP_full_release_spec', 'IEMOCAP_full_release')
        trans_path = os.path.join(trans_dir, sess +'.txt')
        emot_map = emo_wav_generate(emotdir)
        for path in glob.glob(os.path.join(sess_path, '*.npy')):
            filename = path.split('/')[-1].split('.')[0]
            path = '..' + path
            emotion = generate_label(emot_map[filename])
            emotion_path_dict[emotion].append(path)
            path_emotion_dict[path].append(emotion)

path_emotion_dict = dict(path_emotion_dict)
emotion_path_dict = dict(emotion_path_dict)

## Statsitics

In [13]:
for emo in list(emotion_path_dict.keys()):
    print('Emotion: ', emo)
    print(len(list(emotion_path_dict[emo])))

Emotion:  fru
1849
Emotion:  something
2659
Emotion:  neu
1708
Emotion:  ang
1103
Emotion:  sad
1084
Emotion:  hap
1636


In [12]:
with open('../filelists/IEMOCAP/emotion_path_dict.pickle', 'wb') as handle:
    pickle.dump(emotion_path_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('../filelists/IEMOCAP/path_emotion_dict.pickle', 'wb') as handle:
    pickle.dump(path_emotion_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)


FileNotFoundError: [Errno 2] No such file or directory: '../filelists/IEMOCAP/emotion_path_dict.pickle'

In [None]:
with open('../filelists/IEMOCAP/emotion_path_dict.pickle', 'rb') as handle:
    b = pickle.load(handle)

emotion_path_dict == b

In [None]:
CUT = 12 # 12s
SR = 16000 

def check_audio_length(path):
    audio, _ = sf.read(path)
    if len(audio) < (CUT * SR):
        return True
    else:
        return False

In [None]:
# def random_sample_emotion(used_path, all_path, used_emo, emo_map):
#     if all_path == []:
#         return None 
#     else:
#         all_path.remove(used_path)
#         path = random.sample(all_path, 1)[0]   
#     while True:
#         path_wav_name = path.split('/')[-1].split('.')[0]
#         path_emo = emo_map[path_wav_name]
#         print('path_emo: ', path_emo)
#         if check_audio_length(path) == True and path_emo == used_emo:
#             return path
#         else:
#             return random_sample_emotion(path, all_path, used_emo, emo_map)


def random_sample_emotion(used_path, used_emo, emo_map):
    all_path = list(emo_map[used_emo])
    all_path.remove(used_path)
    return random.sample(all_path, 1)[0]
    
def random_sample_speaker(used_path, all_path, gender):
    if all_path == []:
        return None 
    elif len(all_path) == 1:
        path = all_path[0]
        all_path.remove(path)
    else:
        all_path.remove(used_path)
        path = random.sample(all_path, 1)[0]   
    while True:
        path_wav_name = path.split('/')[-1].split('.')[0]
        path_gender = path_wav_name.split('_')[-1][0]
        if check_audio_length(path) == True and path_gender == gender:
            return path
        else:
            return random_sample_speaker(path, all_path, gender)

def make_path_hyak(old_wav_npy_path, dot=False):
    old_wav_npy_path = old_wav_npy_path.replace('IEMOCAP_full_release', 'IEMOCAP_full_release_spec')
    old_wav_npy_path = old_wav_npy_path.replace('.wav', '.npy')
    if dot == True:
        new_wav_npy_path = '..' + old_wav_npy_path
    else:
        new_wav_npy_path = old_wav_npy_path
    return new_wav_npy_path


## Generate dataset dicts

In [None]:
rootdir = '/data3/IEMOCAP/IEMOCAP_full_release'


def emo_script(wav_dir, trans_dir):
    trans_map = {}
    skipped_cnt_length = 0
    skipped_cnt_speaker_embedding = 0
    skipped_cnt_emotion_embedding = 0
    skipped_cnt_emo = 0
    skipped_cnt_other = 0
    
    counted = 0
    for sess_path in sorted(glob.glob(wav_dir)):
        emoevl = '/'.join(sess_path.split('/')[:-3])
        emoevl = os.path.join(emoevl, 'dialog', 'EmoEvaluation') #/data3/IEMOCAP/IEMOCAP_full_release/Session4/dialog/EmoEvaluation
        sess = sess_path.split('/')[-1]
        emotdir = emoevl + '/' + sess + '.txt'
        trans_path = os.path.join(trans_dir, sess +'.txt')
        emot_map = emo_wav_generate(emotdir)
        with open(trans_path, 'r') as file:
            while True:
                line = file.readline()
                if not line:
                    break
                if line[0] == 'S':
                    split_line = line.split(':')
                    transcript = split_line[-1][1:-2]
                    wav_file_name = split_line[0].split()[0]
                    wav_name = wav_file_name +'.wav'
                    wav_path = str(os.path.join(sess_path, wav_name))
                    try: # filter out wavname like Ses03F_impro06_MXX1
                        # Ses03F_impro05_MXX0
                        x = int(wav_file_name[-3:])
                    except:
                        skipped_cnt_other += 1
                        continue
            
                    y, _ = sf.read(wav_path)
                    if len(y) >= SR * CUT:
                        skipped_cnt_length += 1
                        continue
                
                    emotion = emot_map[wav_file_name]
                    emotion = generate_label(emotion)
                    
#                     # build a new dataset with emotion name on the filename_emotion.npy
                    old_wav_npy_path = wav_path.replace('IEMOCAP_full_release', 'IEMOCAP_full_release_spec')
                    old_wav_npy_path = old_wav_npy_path.replace('.wav', '.npy')
                    hyak_new_wav_path = '..' + old_wav_npy_path
            
                    #check if the emotion is in the range:
                    if emotion not in ['hap', 'ang', 'neu', 'sad', 'fru']:
                        skipped_cnt_emo += 1
                        continue
                    # Resample one
                    speaker_sess_id = wav_name.split('.')[0][:-3]
                    speaker_sess_path = str(os.path.join(sess_path, speaker_sess_id))
                    all_same_wav_speaker = glob.glob(speaker_sess_path + '*.wav')
                    gender = wav_file_name.split('_')[-1][0]
                    all_same_wav_emo = glob.glob(wav_dir + '/*/*.wav')
                    sampled_path_speaker = random_sample_speaker(wav_path, all_same_wav_speaker, gender)
                    sampled_path_emo = random_sample_emotion(hyak_new_wav_path, emotion, emotion_path_dict)
                    if sampled_path_speaker == None:
                        skipped_cnt_speaker_embedding += 1
                        continue
                    if len(transcript) == 0:
                        continue
                    trans_map[hyak_new_wav_path] = [transcript, make_path_hyak(sampled_path_speaker, dot=True), 
                                                    sampled_path_emo]
                    counted += 1
    print('Number of Successfull files: ', counted)
    print('Number of skipped_cnt_length: ', skipped_cnt_length)
    print('Number of skipped_cnt_speaker_embedding: ', skipped_cnt_speaker_embedding)
    print('Number of skipped_cnt_emo_embedding: ', skipped_cnt_emotion_embedding)
    print('Number of skipped_cnt_emo: ', skipped_cnt_emo)
    print('Number of skipped_cnt_other: ', skipped_cnt_other)
    print('============================================================')
    return trans_map

def dict2list(test_dict, flag):
    output = []
    for key, value in test_dict.items():
        if flag == 0:
            s = key + '|' + value[0] + '\n'
        elif flag == 1: # speaker embedding
            s = value[1] + '|' + 'nothing is here' + '\n'
        else: # emotion embedding
            s = value[2] + '|' + 'nothing is here' + '\n'
        output.append(s)
    return output


# Test Code
# wav_dir = os.path.join(rootdir, 'Session4', 'sentences/wav/*')
# trans_dir = os.path.join(rootdir, 'Session4', 'dialog/transcriptions')
# trans_dict = emo_script(wav_dir, trans_dir)


## Generating .txt files for train, val, test

In [None]:
session_list = ['Session1', 'Session2', 'Session3', 'Session4', 'Session5']

for session in session_list:
    wav_dir = os.path.join(rootdir, session, 'sentences/wav/*')
    trans_dir = os.path.join(rootdir, session, 'dialog/transcriptions')
    if session == 'Session5':
        test_dict = emo_script(wav_dir, trans_dir)
        test_output = dict2list(test_dict, 0)
        test_output_speaker = dict2list(test_dict, 1)
        test_output_emotion = dict2list(test_dict, 2)
        with open('../filelists/IEMOCAP/test_filelist.txt', 'w') as file:
            file.writelines(test_output)
        with open('../filelists/IEMOCAP/test_filelist_speaker.txt', 'w') as file:
            file.writelines(test_output_speaker)
        with open('../filelists/IEMOCAP/test_filelist_emotion.txt', 'w') as file:
            file.writelines(test_output_emotion)

    else:
        train_dict = emo_script(wav_dir, trans_dir)
        train_output = dict2list(train_dict, 0)
        with open('../filelists/IEMOCAP/train_filelist.txt', 'a') as file:
            file.writelines(train_output)

## Quality Check

In [None]:
# original_path = '../filelists/IEMOCAP'
# for ftype in ['train', 'val', 'test']:
#     file_path = original_path + '/' + ftype + '_filelist.txt'
#     alt_path = original_path + '/' + ftype + '_filelist_alt.txt'
#     with open(file_path, 'r') as a, open(alt_path, 'r') as b:
#         content_a = a.readlines()
#         content_b = b.readlines()
#         for line_a, line_b in zip(content_a, content_b):
#             path_wav_a = line_a.split('|')[0]
#             path_wav_b = line_b.split('|')[0]
            
#             if path_wav_a.split('.')[0][-4] == path_wav_b.split('.')[0][-4] \
#                 and line_a.split('|')[0] != line_b.split('|')[0] and check_audio_length(path_wav_a) \
#                     and check_audio_length(path_wav_b):
#                 continue
#             else:
#                 print('==============================')
#                 print('something wrong')
#                 print('ftype', ftype)
#                 print(line_a.split('|')[0].split('.')[0][-4])
#                 print(line_b.split('|')[0].split('.')[0][-4])
#                 print(line_a.split('|')[0])
#                 print(line_b.split('|')[0])
#                 print('==============================')
# print('Finish Checking')
    

## Checking the number

In [None]:
# num_lines_train = sum(1 for line in open('../filelists/IEMOCAP/train_filelist.txt'))
# num_lines_test = sum(1 for line in open('../filelists/IEMOCAP/test_filelist.txt'))
# num_lines_test_alt = sum(1 for line in open('../filelists/IEMOCAP/test_filelist_alt.txt'))

# print('Check the numbers!')
# print(num_lines_train)
# print(num_lines_test)
# print('====================')
# print(num_lines_test_alt)

In [None]:
# Train count:  5788
# Valid count:  2103
# Test count:  2194
# total count:  10085


## Playground