In [31]:
import audiosegment
import pydub 
import numpy as np
import sys
modulePath = '../../ChristiansPythonLibrary/src' 
sys.path.append(modulePath)
import generalUtility
import dspUtil
import praatUtil
import generalUtility
import matplotlibUtil
import parselmouth 

# Get Jittle
def calculateJitter(data):
    """Data is list of time of peaks"""
    data = np.array(data)
    data = data[data != 0]
    n = len(data)
    sum1 = 0
    sum2 = 0
    for i in range(n):
        if i > 0:
            sum1 += abs(data[i-1] - data[i])
        sum2 += data[i]
    sum1 /= float(n - 1)
    sum2 /= float(n)
    return 100 * (sum1 / sum2)


#Get Shimmer
def calculateShimmer(data):
    data = np.array(data)
    data = data[data != 0]
    n = len(data)
    sum1 = 0
    sum2 = 0
    for i in range(n):
        if i > 0:
            sum1 += abs(data[i-1] - data[i])
        sum2 += data[i]
    sum1 /= float(n - 1)
    sum2 /= float(n)
    return 100 * (sum1 / sum2)

def getStatistic(numpy_arr):
    numpy_arr = np.array(numpy_arr)
    numpy_arr = numpy_arr[numpy_arr != 0]
    max_v = np.max(numpy_arr)
    min_v = np.min(numpy_arr)
    range_v = np.max(numpy_arr) - np.min(numpy_arr)
    mean_v = np.mean(numpy_arr)
    median_v = np.median(numpy_arr)
    per25_v = np.percentile(numpy_arr, 25)
    per75_v= np.percentile(numpy_arr, 75)
    std_v = np.std(numpy_arr)
    return np.array([max_v, min_v, range_v, mean_v, median_v, per25_v, per75_v, std_v])

def getAllFeatures(file):
    features = []
    
    #Get peaks and that of frames and times.
    snd = parselmouth.Sound(file)
    pitch = snd.to_pitch()
    formants = snd.to_formant_burg()
    num_frames = pitch.get_number_of_frames()
    frames = [pitch.get_frame(i) for i in range(1, num_frames+1)]
    times = [pitch.get_time_from_frame_number(i) for i in range(1, num_frames+1)]
    
    #Get energy
    energy = snd.get_energy()
    features.append(energy)
    
    #Get F0 statitic
    f0_arr = pitch.selected_array['frequency']
    f0_stat = getStatistic(f0_arr)
    features = np.append(features, f0_stat)
    
    #Get intensity statistic
    intensity_arr = [frame.intensity for frame in frames]
    intensity_stat = getStatistic(intensity_arr)
    features = np.append(features, intensity_stat)
    
    
    #Get formant values and format bandwidth statistic
    f1_arr = [formants.get_value_at_time(1, time) for time in times]
    f1_bandwidth_arr = [formants.get_bandwidth_at_time(1, time) for time in times]
    f1_stat = getStatistic(f1_arr)
    f1_bandwidth_stat = getStatistic(f1_bandwidth_arr)
    features = np.append(features, f1_stat)
    features = np.append(features, f1_bandwidth_stat)
    
    f2_arr = [formants.get_value_at_time(2, time) for time in times]
    f2_bandwidth_arr = [formants.get_bandwidth_at_time(2, time) for time in times]
    f2_stat = getStatistic(f2_arr)
    f2_bandwidth_stat = getStatistic(f2_bandwidth_arr)
    features = np.append(features, f2_stat)
    features = np.append(features, f2_bandwidth_stat)
    
    f3_arr = [formants.get_value_at_time(3, time) for time in times]
    f3_bandwidth_arr = [formants.get_bandwidth_at_time(3, time) for time in times]
    f3_stat = getStatistic(f3_arr)
    f3_bandwidth_stat = getStatistic(f3_bandwidth_arr)
    features = np.append(features, f3_stat)
    features = np.append(features, f3_bandwidth_stat)
    
    #f2/f1, f3/f1 statistic
    f2_over_f1_arr = np.array(f2_arr) / (np.array(f1_arr) + 1)
    f3_over_f1_arr = np.array(f3_arr) / (np.array(f1_arr) + 1)
    f2_over_f1_stat = getStatistic(f2_over_f1_arr)
    f3_over_f1_stat = getStatistic(f3_over_f1_arr)
    features = np.append(features, f2_over_f1_stat)
    features = np.append(features, f3_over_f1_stat)
    
    
    #Jitter
    jitter = calculateJitter(times)
    features = np.append(features, jitter)
    
    #Shimmer
    amplitude_arr = pitch.selected_array['strength']
    shimmer = calculateShimmer(amplitude_arr)
    features = np.append(features, shimmer)
    
    #Duration
    duration = snd.duration
    features = np.append(features, duration)
    
    #Unvoice percentage
    unvoice_per = (num_frames - pitch.count_voiced_frames()) / num_frames
    features = np.append(features, unvoice_per)
    
    

    return features

getAllFeatures("ses01.wav")
    
    
    
    






array([2.58529631e-02, 3.71570411e+02, 1.08412615e+02, 2.63157796e+02,
       2.28677928e+02, 2.08839356e+02, 1.76934976e+02, 2.77358307e+02,
       6.37874451e+01, 9.81805935e-01, 3.70878705e-03, 9.78097148e-01,
       1.17096402e-01, 6.61155512e-02, 1.08809969e-02, 1.68900482e-01,
       1.44424772e-01, 1.83612776e+03, 8.66352812e+01, 1.74949248e+03,
       6.37707415e+02, 5.94848946e+02, 4.83234581e+02, 7.49649838e+02,
       2.41416990e+02, 3.32348056e+03, 1.76645669e+01, 3.30581599e+03,
       4.56923424e+02, 3.59151142e+02, 1.79887213e+02, 6.54173610e+02,
       3.71929064e+02, 2.72204603e+03, 9.36301111e+02, 1.78574492e+03,
       1.81386448e+03, 1.81239992e+03, 1.62942326e+03, 1.98822840e+03,
       2.86632028e+02, 4.15162054e+03, 4.05403181e+01, 4.11108022e+03,
       5.14294746e+02, 4.03147322e+02, 2.05185331e+02, 6.48096311e+02,
       4.88407785e+02, 3.92239373e+03, 1.82168834e+03, 2.10070539e+03,
       2.78976116e+03, 2.83199839e+03, 2.59254589e+03, 2.96369903e+03,
      

In [17]:
a = np.array([1,2,3])
b = np.array([1,2,3])
a +1

array([2, 3, 4])