In [60]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

### Preprocess the merged output csv from OpenFace

In [7]:
df = pd.read_csv("merged_output_openface.csv")
df.head()

Unnamed: 0,Subject,Category,Part,Video clip filename,File size(mb),Frame count,Duration(sec),Done?,Action Unit,Frame_count_sum,filename_index
0,S01,Anxiety,Part1,S01_1001-1.mp4,266,2147,17.91,,AU01,579,S01_1001-1.mp4
1,S01,Anxiety,Part1,S01_1001-1.mp4,266,2147,17.91,,AU02,31,S01_1001-1.mp4
2,S01,Anxiety,Part1,S01_1001-1.mp4,266,2147,17.91,,AU04,1438,S01_1001-1.mp4
3,S01,Anxiety,Part1,S01_1001-1.mp4,266,2147,17.91,,AU05,0,S01_1001-1.mp4
4,S01,Anxiety,Part1,S01_1001-1.mp4,266,2147,17.91,,AU06,0,S01_1001-1.mp4


In [10]:
df_fau_sum = df.groupby(["Subject", "Category", "Action Unit"])["Frame_count_sum"].sum().unstack(fill_value=0).reset_index()
df_fau_sum.head()

Action Unit,Subject,Category,AU01,AU02,AU04,AU05,AU06,AU07,AU09,AU10,AU12,AU14,AU15,AU17,AU20,AU23,AU25,AU26,AU45
0,S01,Anxiety,2920,727,13324,19,1429,2238,128,13972,606,8923,160,1662,956,1580,5113,2801,2930
1,S01,Mild,3207,1267,21336,39,1005,5377,63,23070,854,15507,223,3111,862,1791,7929,5372,3932
2,S01,Moderate,1697,536,10505,78,914,2927,429,10748,345,7373,189,2534,591,953,3947,3271,1982
3,S01,Severe,8749,2070,43618,5,858,6560,397,43168,1625,26568,276,7935,3103,2822,18401,13489,8031
4,S02,Anxiety,4890,1424,27748,176,19804,25563,883,24309,4458,9458,1182,5867,2824,2614,8300,6183,3801


In [11]:
df_fau_sum.columns

Index(['Subject', 'Category', 'AU01', 'AU02', 'AU04', 'AU05', 'AU06', 'AU07',
       'AU09', 'AU10', 'AU12', 'AU14', 'AU15', 'AU17', 'AU20', 'AU23', 'AU25',
       'AU26', 'AU45'],
      dtype='object', name='Action Unit')

In [13]:
df_fau_sum.to_csv("processed_au_openface.csv", index=False)

### Concat the duration of each part to the procssed au

In [14]:
df = pd.read_csv("processed_au_openface.csv")
df.head()

Unnamed: 0,Subject,Category,AU01,AU02,AU04,AU05,AU06,AU07,AU09,AU10,AU12,AU14,AU15,AU17,AU20,AU23,AU25,AU26,AU45
0,S01,Anxiety,2920,727,13324,19,1429,2238,128,13972,606,8923,160,1662,956,1580,5113,2801,2930
1,S01,Mild,3207,1267,21336,39,1005,5377,63,23070,854,15507,223,3111,862,1791,7929,5372,3932
2,S01,Moderate,1697,536,10505,78,914,2927,429,10748,345,7373,189,2534,591,953,3947,3271,1982
3,S01,Severe,8749,2070,43618,5,858,6560,397,43168,1625,26568,276,7935,3103,2822,18401,13489,8031
4,S02,Anxiety,4890,1424,27748,176,19804,25563,883,24309,4458,9458,1182,5867,2824,2614,8300,6183,3801


In [16]:
df_sliced_info = pd.read_csv("Sliced_video_info - Sliced_video_info.csv")
df_sliced_info.head()

Unnamed: 0,Subject,Category,Part,Video clip filename,File size(mb),Frame count,Duration(sec),Done?
0,S01,Anxiety,Part1,S01_1001-1.mp4,266,2147,17.91,
1,S01,Anxiety,Part1,S01_1001-2.mp4,53,467,3.9,
2,S01,Anxiety,Part1,S01_1001-3.mp4,58,491,4.1,
3,S01,Anxiety,Part1,S01_1001-4.mp4,53,486,4.05,
4,S01,Anxiety,Part1,S01_1001-5.mp4,67,592,4.94,


In [18]:
df_sliced_info_duration = df_sliced_info.groupby(["Subject", "Category"])[["Duration(sec)"]].sum().reset_index()
df_sliced_info_duration

Unnamed: 0,Subject,Category,Duration(sec)
0,S01,Anxiety,279.03
1,S01,Mild,362.69
2,S01,Moderate,235.54
3,S01,Severe,723.6
4,S02,Anxiety,256.79
5,S02,Mild,336.62
6,S02,Moderate,253.12
7,S02,Severe,668.21
8,S03,Anxiety,211.82
9,S03,Mild,295.43


In [19]:
df_joined = pd.concat([df, df_sliced_info_duration.iloc[:, 2:]], axis=1)
df_joined

Unnamed: 0,Subject,Category,AU01,AU02,AU04,AU05,AU06,AU07,AU09,AU10,AU12,AU14,AU15,AU17,AU20,AU23,AU25,AU26,AU45,Duration(sec)
0,S01,Anxiety,2920,727,13324,19,1429,2238,128,13972,606,8923,160,1662,956,1580,5113,2801,2930,279.03
1,S01,Mild,3207,1267,21336,39,1005,5377,63,23070,854,15507,223,3111,862,1791,7929,5372,3932,362.69
2,S01,Moderate,1697,536,10505,78,914,2927,429,10748,345,7373,189,2534,591,953,3947,3271,1982,235.54
3,S01,Severe,8749,2070,43618,5,858,6560,397,43168,1625,26568,276,7935,3103,2822,18401,13489,8031,723.6
4,S02,Anxiety,4890,1424,27748,176,19804,25563,883,24309,4458,9458,1182,5867,2824,2614,8300,6183,3801,256.79
5,S02,Mild,7402,2717,31749,11,15124,27127,1520,25215,2853,13022,1166,6234,2962,2965,8882,6683,3399,336.62
6,S02,Moderate,3608,997,22196,0,9126,20236,634,15174,1276,9235,887,2853,1560,1821,6570,5075,1005,253.12
7,S02,Severe,11458,3227,52144,326,22601,46253,1397,40131,3623,14615,3012,12510,3769,4547,13853,11878,3503,668.21
8,S03,Anxiety,877,410,9474,0,860,5650,62,3252,991,18347,137,2709,935,776,2249,1908,885,211.82
9,S03,Mild,66,88,10447,0,0,5303,0,2697,2,30500,106,1217,252,734,768,771,328,295.43


In [26]:
df_joined.to_csv("processed_au_openface_duration.csv", index=False)

### Perform Vector Pattern Matching

In [29]:
FAU = ["AU01", "AU02", "AU04", "AU05", "AU06", "AU07", "AU09", "AU10", "AU12", "AU14", "AU15", "AU17", "AU20", "AU23", "AU25", "AU26", "AU45"] 

In [107]:
df_processed_au = pd.read_csv("processed_au_openface_duration.csv")
df_processed_au.head()

Unnamed: 0,Subject,Category,AU01,AU02,AU04,AU05,AU06,AU07,AU09,AU10,AU12,AU14,AU15,AU17,AU20,AU23,AU25,AU26,AU45,Duration(sec)
0,S01,Anxiety,2920,727,13324,19,1429,2238,128,13972,606,8923,160,1662,956,1580,5113,2801,2930,279.03
1,S01,Mild,3207,1267,21336,39,1005,5377,63,23070,854,15507,223,3111,862,1791,7929,5372,3932,362.69
2,S01,Moderate,1697,536,10505,78,914,2927,429,10748,345,7373,189,2534,591,953,3947,3271,1982,235.54
3,S01,Severe,8749,2070,43618,5,858,6560,397,43168,1625,26568,276,7935,3103,2822,18401,13489,8031,723.6
4,S02,Anxiety,4890,1424,27748,176,19804,25563,883,24309,4458,9458,1182,5867,2824,2614,8300,6183,3801,256.79


In [108]:
# get the average of each FAU based on the duration of the video 
for fau in FAU:
    df_processed_au[fau + "_avg"] = np.ceil(df_processed_au[fau] / df_processed_au["Duration(sec)"])

df_processed_au.head()

Unnamed: 0,Subject,Category,AU01,AU02,AU04,AU05,AU06,AU07,AU09,AU10,...,AU10_avg,AU12_avg,AU14_avg,AU15_avg,AU17_avg,AU20_avg,AU23_avg,AU25_avg,AU26_avg,AU45_avg
0,S01,Anxiety,2920,727,13324,19,1429,2238,128,13972,...,51.0,3.0,32.0,1.0,6.0,4.0,6.0,19.0,11.0,11.0
1,S01,Mild,3207,1267,21336,39,1005,5377,63,23070,...,64.0,3.0,43.0,1.0,9.0,3.0,5.0,22.0,15.0,11.0
2,S01,Moderate,1697,536,10505,78,914,2927,429,10748,...,46.0,2.0,32.0,1.0,11.0,3.0,5.0,17.0,14.0,9.0
3,S01,Severe,8749,2070,43618,5,858,6560,397,43168,...,60.0,3.0,37.0,1.0,11.0,5.0,4.0,26.0,19.0,12.0
4,S02,Anxiety,4890,1424,27748,176,19804,25563,883,24309,...,95.0,18.0,37.0,5.0,23.0,11.0,11.0,33.0,25.0,15.0


In [109]:
# extract the different categories
df_anxiety = np.array(df_processed_au[df_processed_au["Category"] == "Anxiety"].iloc[:, -17:])
df_mild = np.array(df_processed_au[df_processed_au["Category"] == "Mild"].iloc[:, -17:])
df_moderate = np.array(df_processed_au[df_processed_au["Category"] == "Moderate"].iloc[:, -17:])
df_severe = np.array(df_processed_au[df_processed_au["Category"] == "Severe"].iloc[:, -17:])

In [110]:
df_anxiety

array([[ 11.,   3.,  48.,   1.,   6.,   9.,   1.,  51.,   3.,  32.,   1.,
          6.,   4.,   6.,  19.,  11.,  11.],
       [ 20.,   6., 109.,   1.,  78., 100.,   4.,  95.,  18.,  37.,   5.,
         23.,  11.,  11.,  33.,  25.,  15.],
       [  5.,   2.,  45.,   0.,   5.,  27.,   1.,  16.,   5.,  87.,   1.,
         13.,   5.,   4.,  11.,  10.,   5.],
       [ 16.,   9.,  21.,   3.,   7.,  29.,   4.,  52.,   4.,  81.,   7.,
         15.,   6.,   3.,  24.,  16.,  13.],
       [ 10.,   4.,  73.,   2.,  25.,  52.,   3.,  56.,   7.,  50.,   5.,
         14.,   7.,   1.,  15.,   9.,  12.],
       [  7.,   2., 113.,   2.,   1.,  27.,   1.,  37.,   1.,   1.,   2.,
         23.,   7.,   1.,  30.,  13.,  14.],
       [  8.,   3.,  46.,   1.,  27.,  43.,   1.,  49.,  30.,  46.,   4.,
         10.,   3.,   2.,  11.,   7.,   7.]])

### Computing Cosine Similarity for Russell's video

In [111]:
df_russell = pd.read_csv("russell_test.csv")
df_russell.columns = [c.strip() for c in df_russell.columns]
df_russell.head()

Unnamed: 0,frame,face_id,timestamp,confidence,success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
0,1,0,0.0,0.98,1,0.0,0.0,0.11,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0,0.033,0.98,1,0.0,0.0,0.38,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,0,0.067,0.98,1,0.0,0.0,0.42,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,0,0.1,0.98,1,0.0,0.0,0.23,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,0,0.133,0.98,1,0.0,0.0,0.07,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [112]:
# get to duration of the video 
duration = df_russell["timestamp"].iloc[-1]
duration

4.767

In [113]:
# sum up the FAUs that appeared in the videos based on certain criterias
for au in FAU:
    cond1 = df_russell[au + "_c"] == 1
    cond2 = df_russell[au + "_r"] >= 0.5
    cond3 = df_russell["confidence"] >= 0.98
    cond4 = df_russell["success"] == 1
    df_russell[au] = np.where(cond1 & cond2 & cond3 & cond4, 1, 0)

df_russell

Unnamed: 0,frame,face_id,timestamp,confidence,success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,...,AU10,AU12,AU14,AU15,AU17,AU20,AU23,AU25,AU26,AU45
0,1,0,0.000,0.98,1,0.00,0.0,0.11,0.00,0.00,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0.033,0.98,1,0.00,0.0,0.38,0.00,0.00,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0.067,0.98,1,0.00,0.0,0.42,0.00,0.00,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0.100,0.98,1,0.00,0.0,0.23,0.00,0.00,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0.133,0.98,1,0.00,0.0,0.07,0.00,0.00,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,140,0,4.633,0.93,1,1.92,0.0,0.00,0.27,0.75,...,0,0,0,0,0,0,0,0,0,0
140,141,0,4.667,0.88,1,1.35,0.0,0.00,0.15,0.47,...,0,0,0,0,0,0,0,0,0,0
141,142,0,4.700,0.03,0,0.82,0.0,0.00,0.06,0.33,...,0,0,0,0,0,0,0,0,0,0
142,143,0,4.733,0.77,1,0.13,0.0,0.00,0.06,0.34,...,0,0,0,0,0,0,0,0,0,0


In [114]:
# get the average number of FAUs based on the duration of the video
df_russell_fau = np.ceil(df_russell[FAU].sum() / duration)
df_russell_fau = np.array(df_russell_fau).reshape(1, -1)
df_russell_fau

array([[ 5.,  3., 16.,  0.,  0.,  2.,  0.,  8.,  2., 16.,  2.,  9.,  1.,
         2.,  8.,  4.,  3.]])

In [116]:
# compare the input vector with all 28 vectors using cosine similarity
anxiety_vector = cosine_similarity(df_anxiety, df_russell_fau)
mild_vector = cosine_similarity(df_mild, df_russell_fau)
moderate_vector = cosine_similarity(df_moderate, df_russell_fau)
severe_vector = cosine_similarity(df_severe, df_russell_fau)
print("Anxiety:", anxiety_vector.tolist())
print("Mild:", mild_vector.tolist())
print("Moderate:", moderate_vector.tolist())
print("Severe:", severe_vector.tolist())

Anxiety: [[0.8870430164544639], [0.7112959140503119], [0.8768737667777622], [0.855209676354599], [0.8428710421109153], [0.7578149284846961], [0.7941285338752551]]
Mild: [[0.8904351808243383], [0.7720802042302645], [0.7770077011086519], [0.8514631552727685], [0.830190216949803], [0.7678749340599963], [0.7836836329497123]]
Moderate: [[0.908489829426176], [0.7569182786014468], [0.6387592531788006], [0.7839202343683823], [0.9026714406124379], [0.7276327892316894], [0.8142945168377199]]
Severe: [[0.8999947762654039], [0.7410618139614705], [0.8125283762808672], [0.8108928878898881], [0.8766215762169922], [0.7945503688393514], [0.7834837796511356]]


In [117]:
# get the mean for variance vector
print("Anxiety:", anxiety_vector.mean())
print("Mild:", mild_vector.mean())
print("Moderate:", moderate_vector.mean())
print("Severe:", severe_vector.mean())

Anxiety: 0.8178909825868576
Mild: 0.8103907179136478
Moderate: 0.7903837631795217
Severe: 0.8170190827293011


In [66]:
# get the max for each vector
print("Anxiety:", anxiety_vector.max())
print("Mild:", mild_vector.max())
print("Moderate:", moderate_vector.max())
print("Severe:", severe_vector.max())

Anxiety: 0.8870430164544639
Mild: 0.8904351808243383
Moderate: 0.908489829426176
Severe: 0.8999947762654039


In [61]:
# compare the input vector with all 28 vectors using euclidean distance
anxiety_vector = euclidean_distances(df_anxiety, df_russell_fau)
mild_vector = euclidean_distances(df_mild, df_russell_fau)
moderate_vector = euclidean_distances(df_moderate, df_russell_fau)
severe_vector = euclidean_distances(df_severe, df_russell_fau)
print("Anxiety:", anxiety_vector.tolist())
print("Mild:", mild_vector.tolist())
print("Moderate:", moderate_vector.tolist())
print("Severe:", severe_vector.tolist())

Anxiety: [[59.34644049983116], [185.60711193270586], [81.81075723888638], [87.960218280766], [100.61808982484213], [109.68591523071684], [82.03048213926333]]
Mild: [[79.41662294507366], [144.30176714094668], [92.03260291874831], [87.79521627059188], [94.28149341201592], [110.72488428533127], [72.11102550927978]]
Moderate: [[54.101755978895916], [128.8681496724462], [35.91656999213594], [90.95603333479313], [98.91410415102591], [105.48459603183775], [47.40253157796533]]
Severe: [[77.20103626247513], [115.43829520570719], [152.76452467768817], [70.83784299369935], [106.39548862616309], [90.43782394551519], [72.09715667070374]]


In [63]:
# get the mean for each vector
print("Anxiety:", anxiety_vector.var())
print("Mild:", mild_vector.var())
print("Moderate:", moderate_vector.var())
print("Severe:", severe_vector.var())

Anxiety: 1409.5826346803656
Mild: 495.8376865912399
Moderate: 1027.373642624294
Severe: 753.1652994851878


### Compute Cosine Similarity for one of the subjects

In [71]:
df_processed_au.head()

Unnamed: 0,Subject,Category,AU01,AU02,AU04,AU05,AU06,AU07,AU09,AU10,...,AU10_avg,AU12_avg,AU14_avg,AU15_avg,AU17_avg,AU20_avg,AU23_avg,AU25_avg,AU26_avg,AU45_avg
0,S01,Anxiety,2920,727,13324,19,1429,2238,128,13972,...,51.0,3.0,32.0,1.0,6.0,4.0,6.0,19.0,11.0,11.0
1,S01,Mild,3207,1267,21336,39,1005,5377,63,23070,...,64.0,3.0,43.0,1.0,9.0,3.0,5.0,22.0,15.0,11.0
2,S01,Moderate,1697,536,10505,78,914,2927,429,10748,...,46.0,2.0,32.0,1.0,11.0,3.0,5.0,17.0,14.0,9.0
3,S01,Severe,8749,2070,43618,5,858,6560,397,43168,...,60.0,3.0,37.0,1.0,11.0,5.0,4.0,26.0,19.0,12.0
4,S02,Anxiety,4890,1424,27748,176,19804,25563,883,24309,...,95.0,18.0,37.0,5.0,23.0,11.0,11.0,33.0,25.0,15.0


In [74]:
df_s01 = df_processed_au[(df_processed_au["Subject"] == "S01") & (df_processed_au["Category"] == "Anxiety")].iloc[:, -17:]
df_s01

Unnamed: 0,AU01_avg,AU02_avg,AU04_avg,AU05_avg,AU06_avg,AU07_avg,AU09_avg,AU10_avg,AU12_avg,AU14_avg,AU15_avg,AU17_avg,AU20_avg,AU23_avg,AU25_avg,AU26_avg,AU45_avg
0,11.0,3.0,48.0,1.0,6.0,9.0,1.0,51.0,3.0,32.0,1.0,6.0,4.0,6.0,19.0,11.0,11.0


In [75]:
df_s01 = np.array(df_s01).reshape(1, -1)
df_s01

array([[11.,  3., 48.,  1.,  6.,  9.,  1., 51.,  3., 32.,  1.,  6.,  4.,
         6., 19., 11., 11.]])

In [76]:
# compare the input vector with all 28 vectors using cosine similarity
anxiety_vector = cosine_similarity(df_anxiety, df_s01)
mild_vector = cosine_similarity(df_mild, df_s01)
moderate_vector = cosine_similarity(df_moderate, df_s01)
severe_vector = cosine_similarity(df_severe, df_s01)
print("Anxiety:", anxiety_vector.tolist())
print("Mild:", mild_vector.tolist())
print("Moderate:", moderate_vector.tolist())
print("Severe:", severe_vector.tolist())

Anxiety: [[0.9999999999999998], [0.8394089948651632], [0.7555572600831348], [0.8363220595058479], [0.916058814768512], [0.8119124834340238], [0.8655463024733929]]
Mild: [[0.9956943385093844], [0.8766892827879319], [0.6348794376785696], [0.8238902026076464], [0.9092751616882253], [0.7828229179634966], [0.8396721857801094]]
Moderate: [[0.9931744830617658], [0.852515824893015], [0.45811175120826264], [0.7736512631679305], [0.9425287269157215], [0.7594207222742997], [0.8677905103908489]]
Severe: [[0.9946029039782871], [0.851905102662749], [0.723151189868504], [0.7913869931232971], [0.9176484747411693], [0.8126525462562827], [0.8618756988875625]]


In [78]:
# get the mean for variance vector
print("Anxiety:", anxiety_vector.mean())
print("Mild:", mild_vector.mean())
print("Moderate:", moderate_vector.mean())
print("Severe:", severe_vector.mean())

Anxiety: 0.8606865593042964
Mild: 0.8375605038593378
Moderate: 0.8067418974159777
Severe: 0.8504604156454073


In [79]:
# compare the input vector with all 28 vectors using euclidean distance
anxiety_vector = euclidean_distances(df_anxiety, df_s01)
mild_vector = euclidean_distances(df_mild, df_s01)
moderate_vector = euclidean_distances(df_moderate, df_s01)
severe_vector = euclidean_distances(df_severe, df_s01)
print("Anxiety:", anxiety_vector.tolist())
print("Mild:", mild_vector.tolist())
print("Moderate:", moderate_vector.tolist())
print("Severe:", severe_vector.tolist())

Anxiety: [[0.0], [142.33060106667153], [69.14477565225012], [61.57109711544857], [57.810033731178535], [78.8098978555359], [51.73973328110612]]
Mild: [[22.293496809607955], [100.5236290630218], [87.49857141690943], [63.198101237299845], [53.58171329847526], [83.773504164503], [50.3189825016365]]
Moderate: [[10.44030650891055], [89.41476388158725], [74.22937423958254], [70.64700984472024], [53.25410782277739], [81.62720134857007], [41.46082488325576]]
Severe: [[20.97617696340303], [77.45966692414834], [128.33160171991932], [57.043842787806646], [63.40346993658943], [65.24568951279464], [46.8187996428785]]


In [81]:
print("Anxiety:", anxiety_vector.max() - anxiety_vector.min())
print("Mild:", mild_vector.max() - mild_vector.min())
print("Moderate:", moderate_vector.max() - moderate_vector.min())
print("Severe:", severe_vector.max() - severe_vector.min())

Anxiety: 142.33060106667153
Mild: 78.23013225341384
Moderate: 78.9744573726767
Severe: 107.35542475651629


In [84]:
df_s01 = pd.read_csv("S01_1001-1.csv")
df_s01.columns = [c.strip() for c in df_s01.columns]
df_s01.head()

Unnamed: 0,frame,face_id,timestamp,confidence,success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
0,1,0,0.0,0.98,1,1.4,1.52,1.4,0.0,0.52,...,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,2,0,0.008,0.98,1,1.2,0.72,1.16,0.0,0.68,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,3,0,0.017,0.98,1,1.25,0.7,1.08,0.0,0.72,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,4,0,0.025,0.88,1,1.39,0.95,1.07,0.0,0.81,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4,5,0,0.033,0.98,1,1.72,1.16,1.16,0.0,0.83,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [85]:
# sum up the FAUs that appeared in the videos based on certain criterias
for au in FAU:
    cond1 = df_s01[au + "_c"] == 1
    cond2 = df_s01[au + "_r"] >= 0.5
    cond3 = df_s01["confidence"] >= 0.98
    cond4 = df_s01["success"] == 1
    df_s01[au] = np.where(cond1 & cond2 & cond3 & cond4, 1, 0)

df_s01

Unnamed: 0,frame,face_id,timestamp,confidence,success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,...,AU10,AU12,AU14,AU15,AU17,AU20,AU23,AU25,AU26,AU45
0,1,0,0.000,0.98,1,1.40,1.52,1.40,0.0,0.52,...,1,1,1,0,0,0,1,0,0,1
1,2,0,0.008,0.98,1,1.20,0.72,1.16,0.0,0.68,...,1,0,1,0,0,0,1,0,0,1
2,3,0,0.017,0.98,1,1.25,0.70,1.08,0.0,0.72,...,1,0,1,0,0,0,0,0,0,1
3,4,0,0.025,0.88,1,1.39,0.95,1.07,0.0,0.81,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0.033,0.98,1,1.72,1.16,1.16,0.0,0.83,...,1,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2142,2143,0,17.868,0.98,1,0.00,0.00,1.05,0.0,0.74,...,1,0,1,0,0,0,0,0,0,0
2143,2144,0,17.876,0.98,1,0.06,0.01,1.04,0.0,0.82,...,1,0,1,0,0,0,0,0,0,0
2144,2145,0,17.885,0.98,1,0.06,0.01,0.99,0.0,0.77,...,1,0,1,0,0,0,0,0,0,0
2145,2146,0,17.893,0.98,1,0.11,0.01,1.17,0.0,0.78,...,1,0,1,0,0,0,0,0,0,0


In [90]:
df_s01_au = np.ceil(df_s01[FAU].sum() / 17.91)
df_s01_au = np.array(df_s01_au).reshape(1, -1)
df_s01_au

array([[ 33.,   2.,  81.,   0.,   0.,   0.,   0., 118.,   1., 115.,   0.,
          1.,   2.,   4.,   2.,   1.,  35.]])

In [91]:
anxiety_vector = cosine_similarity(df_anxiety, df_s01_au)
mild_vector = cosine_similarity(df_mild, df_s01_au)
moderate_vector = cosine_similarity(df_moderate, df_s01_au)
severe_vector = cosine_similarity(df_severe, df_s01_au)
print("Anxiety:", anxiety_vector.tolist())
print("Mild:", mild_vector.tolist())
print("Moderate:", moderate_vector.tolist())
print("Severe:", severe_vector.tolist())

Anxiety: [[0.9162646391446748], [0.6611618438875491], [0.7971158443611593], [0.8673041387930371], [0.8163717817756625], [0.5885205194351353], [0.7900025542539406]]
Mild: [[0.9190334337023647], [0.7123378749421732], [0.7596797222010188], [0.8593420094744803], [0.7980932106459623], [0.5588020902651616], [0.7587010265472499]]
Moderate: [[0.9012656072108233], [0.6805624892844331], [0.6419749593040324], [0.8517487797350938], [0.866150265674824], [0.5430341371778626], [0.7829974609671241]]
Severe: [[0.8935478255469956], [0.6667814035243067], [0.7270895088261778], [0.7827052016346057], [0.850487596198053], [0.6108304374017619], [0.7830814099449528]]


In [93]:
print("Anxiety:", anxiety_vector.var())
print("Mild:", mild_vector.var())
print("Moderate:", moderate_vector.var())
print("Severe:", severe_vector.var())

Anxiety: 0.011230798887496925
Mild: 0.011294783077548566
Moderate: 0.015299247733647315
Severe: 0.00844162646320738
