# Raw Feature Extraction

In [1]:
from parselmouth.praat import call

import pandas as pd
import parselmouth

## Load `Train` and `Test` Data

In [2]:
train_csv = pd.read_csv('../data/train.csv')
train_csv

Unnamed: 0,dialog_id,speaker,transcript,da_tag,start_time,end_time,function,pronoun,ppron,i,...,home,money,relig,death,informal,swear,netspeak,assent,nonflu,filler
0,sw2005,A,okay,"fo_o_fw_""""_by_bc",0.00000,1.31597,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,1.000000,0.0,0.0,1.0,0.000000,0.0
1,sw2005,B,SIL,x,0.00000,10.94882,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
2,sw2005,A,uh first um i need to know uh how do you feel ...,qo,1.31597,10.93013,0.384615,0.076923,0.076923,0.038462,...,0.076923,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.230769,0.0
3,sw2005,A,SIL,x,10.93013,21.35084,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
4,sw2005,B,well of course it's you know it's one of the l...,sv,10.94882,22.28000,0.642857,0.285714,0.190476,0.000000,...,0.000000,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82223,sw4940,A,yeah,ny,294.59659,295.00000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,1.000000,0.0,0.0,1.0,0.000000,0.0
82224,sw4940,B,you can see that horrible horrible brown haze,sd,294.62349,297.66445,0.375000,0.250000,0.125000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
82225,sw4940,A,SIL,x,295.00000,298.58336,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
82226,sw4940,B,SIL,x,297.66445,299.88000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0


In [3]:
test_csv = pd.read_csv('../data/test.csv')
test_csv

Unnamed: 0,dialog_id,speaker,transcript,da_tag,start_time,end_time,function,pronoun,ppron,i,...,home,money,relig,death,informal,swear,netspeak,assent,nonflu,filler
0,sw2015,A,SIL,x,0.00000,2.36986,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
1,sw2015,B,SIL,x,0.00000,24.29833,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
2,sw2015,A,have you ever gotten one of those calls that i...,qy,2.36986,7.66596,0.619048,0.190476,0.047619,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
3,sw2015,A,and their either offering a service,"fo_o_fw_""""_by_bc",7.66596,11.57304,0.500000,0.166667,0.166667,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
4,sw2015,A,or they're introducing some new product in the...,"fo_o_fw_""""_by_bc",11.57304,16.74866,0.444444,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15402,sw4877,B,but uh i haven't had any good really very good...,sd,286.84000,299.04000,0.500000,0.071429,0.071429,0.071429,...,0.000000,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0
15403,sw4877,A,SIL,x,287.42550,289.12000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
15404,sw4877,A,yeah,b,289.12000,289.44587,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,1.000000,0.0,0.0,1.0,0.000000,0.0
15405,sw4877,A,SIL,x,289.44587,290.24000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0


## Remove Short Segments

Remove segments less than 0.5 seconds to reduce errors with feature extraction.

### `Train`

In [4]:
train_csv = test_csv[(test_csv['end_time'] - test_csv['start_time'] > 0.5)]
test_csv

Unnamed: 0,dialog_id,speaker,transcript,da_tag,start_time,end_time,function,pronoun,ppron,i,...,home,money,relig,death,informal,swear,netspeak,assent,nonflu,filler
0,sw2015,A,SIL,x,0.00000,2.36986,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
1,sw2015,B,SIL,x,0.00000,24.29833,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
2,sw2015,A,have you ever gotten one of those calls that i...,qy,2.36986,7.66596,0.619048,0.190476,0.047619,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
3,sw2015,A,and their either offering a service,"fo_o_fw_""""_by_bc",7.66596,11.57304,0.500000,0.166667,0.166667,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
4,sw2015,A,or they're introducing some new product in the...,"fo_o_fw_""""_by_bc",11.57304,16.74866,0.444444,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15402,sw4877,B,but uh i haven't had any good really very good...,sd,286.84000,299.04000,0.500000,0.071429,0.071429,0.071429,...,0.000000,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0
15403,sw4877,A,SIL,x,287.42550,289.12000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
15404,sw4877,A,yeah,b,289.12000,289.44587,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,1.000000,0.0,0.0,1.0,0.000000,0.0
15405,sw4877,A,SIL,x,289.44587,290.24000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0


### `Test`

In [5]:
test_csv = test_csv[(test_csv['end_time'] - test_csv['start_time'] > 0.5)]
test_csv

Unnamed: 0,dialog_id,speaker,transcript,da_tag,start_time,end_time,function,pronoun,ppron,i,...,home,money,relig,death,informal,swear,netspeak,assent,nonflu,filler
0,sw2015,A,SIL,x,0.00000,2.36986,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
1,sw2015,B,SIL,x,0.00000,24.29833,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
2,sw2015,A,have you ever gotten one of those calls that i...,qy,2.36986,7.66596,0.619048,0.190476,0.047619,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
3,sw2015,A,and their either offering a service,"fo_o_fw_""""_by_bc",7.66596,11.57304,0.500000,0.166667,0.166667,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
4,sw2015,A,or they're introducing some new product in the...,"fo_o_fw_""""_by_bc",11.57304,16.74866,0.444444,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15401,sw4877,A,you can come and go,bf,286.40000,287.42550,0.600000,0.200000,0.200000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
15402,sw4877,B,but uh i haven't had any good really very good...,sd,286.84000,299.04000,0.500000,0.071429,0.071429,0.071429,...,0.000000,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0
15403,sw4877,A,SIL,x,287.42550,289.12000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0
15405,sw4877,A,SIL,x,289.44587,290.24000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0


## Text Features

### `Train`

In [6]:
text_train_feats = train_csv.copy()
text_train_feats = text_train_feats.drop(['transcript'], axis=1)
text_train_feats.to_csv('../features/raw_train_text_features.csv')

text_train_feats.head(3)

Unnamed: 0,dialog_id,speaker,da_tag,start_time,end_time,function,pronoun,ppron,i,we,...,home,money,relig,death,informal,swear,netspeak,assent,nonflu,filler
0,sw2015,A,x,0.0,2.36986,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,sw2015,B,x,0.0,24.29833,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,sw2015,A,qy,2.36986,7.66596,0.619048,0.190476,0.047619,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### `Test`

In [7]:
text_test_feats = test_csv.copy()
text_test_feats = text_test_feats.drop(['transcript'], axis=1)
text_test_feats.to_csv('../features/raw_test_text_features.csv')

text_test_feats.head(3)

Unnamed: 0,dialog_id,speaker,da_tag,start_time,end_time,function,pronoun,ppron,i,we,...,home,money,relig,death,informal,swear,netspeak,assent,nonflu,filler
0,sw2015,A,x,0.0,2.36986,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,sw2015,B,x,0.0,24.29833,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,sw2015,A,qy,2.36986,7.66596,0.619048,0.190476,0.047619,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Speech Features

In [8]:
def get_acoustic_features(row, sound):
    sound = sound.extract_part(row['start_time'], row['end_time'])
    
    # Pitch
    pitch = call(sound, "To Pitch", 0.0, 75, 500)
    pitch_min = call(pitch, "Get minimum", 0, 0, "hertz", "Parabolic")
    pitch_max = call(pitch, "Get maximum", 0, 0, "hertz", "Parabolic")
    pitch_mean = call(pitch, "Get mean", 0, 0, "hertz")
    pitch_sd = call(pitch, "Get standard deviation", 0, 0, "hertz")

    # Intensity
    intensity = call(sound, "To Intensity", 75, 0.0, False)
    intensity_min = call(intensity, "Get minimum", 0, 0, "Parabolic")
    intensity_max = call(intensity, "Get maximum", 0, 0, "Parabolic")
    intensity_mean = call(intensity, "Get mean", 0, 0, "energy")
    intensity_sd = call(intensity, "Get standard deviation", 0, 0)

    # Speaking Rate
    duration = row['end_time'] - row['start_time']
    num_of_words = len(row['transcript'].split())
    speaking_rate = num_of_words / duration

    # Jitter and Shimmer
    point_process = call(sound, 'To PointProcess (periodic, cc)...', 75, 500)
    jitter = call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    shimmer = call([sound, point_process], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

    # HNR
    harmonicity = call(sound, 'To Harmonicity (cc)', 0.01, 75, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)

    # Return features.
    return [row['dialog_id'], row['speaker'], row['start_time'], row['end_time'], \
            pitch_min, pitch_max, pitch_mean, pitch_sd, \
            intensity_min, intensity_max, intensity_mean, intensity_sd, \
            speaking_rate, jitter, shimmer, hnr]

def get_speech_features(df):
    data = []
    columns = ['dialog_id', 'speaker', 'start_time', 'end_time', \
           'Min Pitch', 'Max Pitch', 'Mean Pitch', 'Sd Pitch', \
           'Min Intensity', 'Max Intensity', 'Mean Intensity', 'Sd Intensity', \
           'Speaking Rate', 'Jitter', 'Shimmer', 'HNR'] 
    grouped_df = df.groupby(['dialog_id', 'speaker'])
    
    for (i, (row, item)) in enumerate(grouped_df):
        dialog_id = row[0]
        speaker = row[1]
        
        sound_file = '../data/wav/{}_{}.wav'.format(dialog_id, speaker) 
        sound = parselmouth.Sound(sound_file)

        item.apply(lambda x: data.append(get_acoustic_features(x, sound)), axis=1)
        
        print("{} {} Completed ✅ ({:.2f}%)".format(dialog_id, speaker, i/len(grouped_df) * 100))
    
    return pd.DataFrame(data, columns=columns)

### `Train`

In [9]:
speech_train_feats = get_speech_features(train_csv)
speech_train_feats.to_csv('../features/raw_train_speech_features.csv')

speech_train_feats.head(3)

sw2015 A Completed ✅ (0.00%)
sw2015 B Completed ✅ (0.88%)
sw2020 A Completed ✅ (1.75%)
sw2020 B Completed ✅ (2.63%)
sw2022 A Completed ✅ (3.51%)
sw2022 B Completed ✅ (4.39%)
sw2024 A Completed ✅ (5.26%)
sw2024 B Completed ✅ (6.14%)
sw2025 A Completed ✅ (7.02%)
sw2025 B Completed ✅ (7.89%)
sw2038 A Completed ✅ (8.77%)
sw2038 B Completed ✅ (9.65%)
sw2041 A Completed ✅ (10.53%)
sw2041 B Completed ✅ (11.40%)
sw2060 A Completed ✅ (12.28%)
sw2060 B Completed ✅ (13.16%)
sw2061 A Completed ✅ (14.04%)
sw2061 B Completed ✅ (14.91%)
sw2120 A Completed ✅ (15.79%)
sw2120 B Completed ✅ (16.67%)
sw2130 A Completed ✅ (17.54%)
sw2130 B Completed ✅ (18.42%)
sw2145 A Completed ✅ (19.30%)
sw2145 B Completed ✅ (20.18%)
sw2154 A Completed ✅ (21.05%)
sw2154 B Completed ✅ (21.93%)
sw2226 A Completed ✅ (22.81%)
sw2226 B Completed ✅ (23.68%)
sw2260 A Completed ✅ (24.56%)
sw2260 B Completed ✅ (25.44%)
sw2362 A Completed ✅ (26.32%)
sw2362 B Completed ✅ (27.19%)
sw2393 A Completed ✅ (28.07%)
sw2393 B Completed ✅ (

Unnamed: 0,dialog_id,speaker,start_time,end_time,Min Pitch,Max Pitch,Mean Pitch,Sd Pitch,Min Intensity,Max Intensity,Mean Intensity,Sd Intensity,Speaking Rate,Jitter,Shimmer,HNR
0,sw2015,A,0.0,2.36986,174.046508,174.066746,174.054179,0.007531,27.397755,78.526451,61.632826,8.104074,0.421966,0.000111,0.030335,13.731803
1,sw2015,A,2.36986,7.66596,93.313373,256.38758,127.352474,35.21017,27.913918,62.486222,55.339087,9.229627,3.965182,0.016218,0.090979,10.927442
2,sw2015,A,7.66596,11.57304,75.651108,148.327094,117.17362,16.502379,27.797807,65.919881,55.229903,12.94173,1.535674,0.015932,0.075782,12.392446


### `Test`

In [10]:
speech_test_feats = get_speech_features(test_csv)
speech_test_feats.to_csv('../features/raw_test_speech_features.csv')

speech_test_feats.head(3)

sw2015 A Completed ✅ (0.00%)
sw2015 B Completed ✅ (0.88%)
sw2020 A Completed ✅ (1.75%)
sw2020 B Completed ✅ (2.63%)
sw2022 A Completed ✅ (3.51%)
sw2022 B Completed ✅ (4.39%)
sw2024 A Completed ✅ (5.26%)
sw2024 B Completed ✅ (6.14%)
sw2025 A Completed ✅ (7.02%)
sw2025 B Completed ✅ (7.89%)
sw2038 A Completed ✅ (8.77%)
sw2038 B Completed ✅ (9.65%)
sw2041 A Completed ✅ (10.53%)
sw2041 B Completed ✅ (11.40%)
sw2060 A Completed ✅ (12.28%)
sw2060 B Completed ✅ (13.16%)
sw2061 A Completed ✅ (14.04%)
sw2061 B Completed ✅ (14.91%)
sw2120 A Completed ✅ (15.79%)
sw2120 B Completed ✅ (16.67%)
sw2130 A Completed ✅ (17.54%)
sw2130 B Completed ✅ (18.42%)
sw2145 A Completed ✅ (19.30%)
sw2145 B Completed ✅ (20.18%)
sw2154 A Completed ✅ (21.05%)
sw2154 B Completed ✅ (21.93%)
sw2226 A Completed ✅ (22.81%)
sw2226 B Completed ✅ (23.68%)
sw2260 A Completed ✅ (24.56%)
sw2260 B Completed ✅ (25.44%)
sw2362 A Completed ✅ (26.32%)
sw2362 B Completed ✅ (27.19%)
sw2393 A Completed ✅ (28.07%)
sw2393 B Completed ✅ (

Unnamed: 0,dialog_id,speaker,start_time,end_time,Min Pitch,Max Pitch,Mean Pitch,Sd Pitch,Min Intensity,Max Intensity,Mean Intensity,Sd Intensity,Speaking Rate,Jitter,Shimmer,HNR
0,sw2015,A,0.0,2.36986,174.046508,174.066746,174.054179,0.007531,27.397755,78.526451,61.632826,8.104074,0.421966,0.000111,0.030335,13.731803
1,sw2015,A,2.36986,7.66596,93.313373,256.38758,127.352474,35.21017,27.913918,62.486222,55.339087,9.229627,3.965182,0.016218,0.090979,10.927442
2,sw2015,A,7.66596,11.57304,75.651108,148.327094,117.17362,16.502379,27.797807,65.919881,55.229903,12.94173,1.535674,0.015932,0.075782,12.392446
