In [6]:
import os
import re
import glob 
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import numpy as np
from os.path import basename
import audiosegment
from multiprocessing import Pool
modulePath = '../../lib/ChristiansPythonLibrary/src' 
import sys
import numpy
sys.path.append(modulePath)
import generalUtility
import dspUtil
import matplotlibUtil
import librosa
import pickle
import string
from random import *
import cv2
min_char = 8
max_char = 20
allchar = string.ascii_letters + string.digits

def generate_random_string():
     return "".join(choice(allchar) for x in range(randint(min_char, max_char)))



#default dpi
default_dpi = 500


#Augmentation factor
number_augmentated_per_image = 25
max_u_over_f = 3
u_over_f_values = numpy.arange(0.2, max_u_over_f, max_u_over_f / 40)
print(u_over_f_values)

scale_array = [ 1/ (np.abs((1-x)) + 0.1)    for x in u_over_f_values]
scale_array  = numpy.abs(scale_array)
print(scale_array)

#Constant
EMOTION_ANNOTATORS = {'anger': 0, 'happiness' : 1, 'sadness' : 2, 'neutral' : 3, 'frustration' : 4, 'excited': 5,
           'fear' : 6,'surprise' : 7,'disgust' : 8, 'other' : 9}

EMOTION = {'ang': 0, 'hap' : 1, 'sad' : 2, 'neu' : 3, 'fru' : 4, 'exc': 5,
           'fea' : 6,'sur' : 7,'dis' : 8, 'oth' : 9, 'xxx':10}


#Development mode. Only run with small data.
dev = False

#augment data bool
isAugmentData = True




[0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.  1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9
 2.  2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.  3.1 3.2 3.3 3.4 3.5 3.6 3.7
 3.8 3.9]
[ 1.11111111  1.25        1.42857143  1.66666667  2.          2.5
  3.33333333  5.         10.          5.          3.33333333  2.5
  2.          1.66666667  1.42857143  1.25        1.11111111  1.
  0.90909091  0.83333333  0.76923077  0.71428571  0.66666667  0.625
  0.58823529  0.55555556  0.52631579  0.5         0.47619048  0.45454545
  0.43478261  0.41666667  0.4         0.38461538  0.37037037  0.35714286
  0.34482759  0.33333333]


In [1]:
#Define class
class Input:
    ##spectral, prosody, erergy are dict type
    def __init__(self, spectral, prosody, energy, spectrogram):
        self.spectral = spectral
        self.prosody = prosody
        self.energy = energy
        self.spectrogram = spectrogram
        
    def print(self):
        print("spectral  features: ", spectral)
        print("prosody features: ", prosody)
        print("energy: ", energy)
        print("spectrogram: ", spectrogram)
        
    def input2Vec(self, onlySpectrogram):
        if (onlySpectrogram ==  False):
            features = []
            s = list(self.spectral.values())
            p = list(self.prosody.values())
            e = list(self.energy.values())
            [features.extend(x) for x in [s, p, e]]
            return features
        else :
            return self.spectrogram
    
class Output:
    def __init__(self, duration, code, category_origin, category_evaluation, attribute):
        self.duration = duration
        self.code = code
        self.category_origin = category_origin
        self.category_evaluation = category_evaluation
        self.attribute = attribute
        
     
    def print(self):
        print("duration: ", self.duration)
        print("code: ", self.code)
        print("category_origin: ", self.category_origin)
        print("category_evaluation: ", self.category_evaluation)
        print("attribute: ", self.attribute)
        
    def output2Vec(self):
        emotion = EMOTION[self.category_origin]
        return emotion
  


def spectrogramToImage(freqs, times, amplitudes, dpi):
    fig, ax = plt.subplots(dpi = dpi)
    ax.pcolormesh(times, freqs, amplitudes)
    ax.axis('off')

    fig.canvas.draw ()
    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
    img = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig)
    return img

def parallel_task(d0, d1):
    print("task...")
    
    
    def create_histrogram(file_in, file_out, folder):
        seg = audiosegment.from_file(file_in)
        freqs, times, amplitudes = seg.spectrogram(window_length_s=0.03, overlap=0.5)
        amplitudes = 10 * np.log10(amplitudes + 1e-9)
        
        img = spectrogramToImage(freqs, times, amplitudes, dpi=default_dpi)
        img = cv2.resize(img, (256, 256))
        #print("hahah:", folder + "/" + file_out + ".png")
        cv2.imwrite(folder + "/" + file_out + ".png", img)
        

        
        
       
        #plt.savefig(folder + "/" + file_out, transparent=True, dpi= default_dpi)
        
        if (isAugmentData == True) :
            print("Augmenting data....")
            for x in scale_array:
               # plt.savefig(folder + "/" + generate_random_string() + file_out, transparent=True, dpi= int(default_dpi * x))
                img = spectrogramToImage(freqs, times, amplitudes, dpi= int(default_dpi * x))
                img = cv2.resize(img, (256, 256))
                cv2.imwrite(folder + "/" + generate_random_string() + file_out +".png", img)
        
    # Each input diectory contains many file
    # This fucntion will walk through all valid 'wav'files in this directory and return the definite path of each file
    def parseInput(dir):
        dicts = {} 
        for f in os.listdir(dir):
            if not f.startswith(".") and os.path.splitext(f)[1] == ".wav":
                dicts[os.path.splitext(f)[0]] = dir + "/" + f


        return dicts
    
   # Get label of file
    def parseOutput(file):
        dict_namefile_output = {}
        # Open file to get all contents excepts the first line.
        f = open(file, 'r')
        content = ""
        index = 0
        for line in f:
            index = index + 1
            if index == 1:
                continue
            content  = content + line

        # Find all matched patterns in the content
        ps = re.findall(r'\[.*?\)\n\n', content, re.DOTALL)

        # Parse each matched pattern into  'Output' object
        try:
            for p in ps:
                ls = p.split("\n")
                ls = list(filter(lambda x: len(x) > 0 ,ls))

                # Split elements of the first line which looks like : 
                # [147.0300 - 151.7101]	Ses01F_impro02_M012	neu	[2.5000, 2.0000, 2.0000]
                ele_line0 = re.search(r'(\[.*?\])(\s)(.*?)(\s)(.*?)(\s)(\[.*?\])', ls[0]).groups()

                # Split time components which looks like:
                # [147.0300 - 151.7101]
                time_dur = ele_line0[0]
                ele_time_dur = re.findall(r"[-+]?\d*\.\d+|\d+", time_dur)
                ele_time_dur = [float(x) for x in ele_time_dur]

                # Get code and category_origin which looks like:
                # Code: Ses01F_impro02_M012
                # Category_origin: neu
                code = ele_line0[2]
                category_origin = ele_line0[4]

                # Split attribute components which looks like:
                # [2.5000, 2.0000, 2.0000]
                attribute = ele_line0[6]
                ele_attribute = re.findall(r"[-+]?\d*\.\d+|\d+", attribute)
                ele_attribute = [float(x) for x in ele_attribute]

                # Get categorial_evaluation:
                lines_categorical = list(filter(lambda x : x[0] == 'C', ls))
                rex = re.compile(r'C.*?:(\s)(.*?)(\s)\(.*?\)')

                category_evaluation = []
                for l in lines_categorical:
                    elements = rex.search(l).groups()
                    cat = elements[1]
                    cat = cat.split(";")
                    cat = map(lambda x: x.lstrip(), cat)
                    cat = list(filter(lambda x: len(x)>0, cat))
                    category_evaluation.extend(cat)


                # Make list distinct
                category_evaluation = list(set(category_evaluation))
                
                

                # Make dict {name_file : parsed_output}
                dict_namefile_output[code] = Output(ele_time_dur, code, category_origin, category_evaluation, ele_attribute)
            return dict_namefile_output
        except Exception as e:
            print(e)


    ### Parse input and output files and get input and output as vector
    dicts_in = parseInput(d0)
    dicts_out = parseOutput(d1)
    in_out = []
    
    keys = list(dicts_in.keys())
    for key in keys:
        if(dicts_out[key].category_origin != 'xxx' and dicts_out[key].category_origin != 'dis' and dicts_out[key].category_origin != 'oth'):
            create_histrogram(dicts_in[key], key, "processed-data/" + dicts_out[key].category_origin)
            
            


def createInput_Output():
    ### Get directories of input and output
    DATA_DIR = "../../IEMOCAP_full_release"
    NUM_SESSION = 5
    input_output = []
    for i in range (1, NUM_SESSION + 1):
        name_session = "Session" + str(i)
        root_dir_of_wav = DATA_DIR + "/" + name_session + "/sentences" + "/wav"
        root_dir_of_labels = DATA_DIR + "/" + name_session + "/dialog" + "/EmoEvaluation"

        for x in os.walk(root_dir_of_wav):
            if(x[0] == root_dir_of_wav):
                dirs_of_wav = x[1]
                index = -1
            else:
                index = index + 1
                input_output.append((x[0], root_dir_of_labels + "/" + dirs_of_wav[index] + ".txt"))
                
    
    ds = input_output
    in_out = []
    input = []
    out = []
    
    # Multi processing
    with Pool(processes=8) as pool:
        pool.starmap(parallel_task, ds)
   
    print("Finished create histogram into files")


createInput_Output()


NameError: name 'os' is not defined