# Dynamic Bayesian Network

We have now constructed the P(AU) and P(FT) as outlined in the theoretical model. Now we move on to computing the P(AU and FT) for each AU and each FT.

In [66]:
import os
import pandas as pd
import pprint 
import pickle
import numpy as np
pp = pprint.PrettyPrinter(indent=4)
from sklearn.model_selection import train_test_split
from pgmpy.models import BayesianNetwork, DynamicBayesianNetwork
from pgmpy.inference import DBNInference
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.base import DAG

In [67]:
# load data

with open("markov_probs.pickle", 'rb') as f:
    probabilities = pickle.load(f)
    
with open("onesec_dataframes.pickle", 'rb') as f:
    dataframes = pickle.load(f)
    
with open("conditional_probs.pickle", 'rb') as f:
    conditional_probabilities = pickle.load(f)
    
with open("onesec_dataframes_sorted.pickle", "rb") as f:
    dfs_with_scores = pickle.load(f)
    
with open("train_data_combined.pickle", "rb") as f:
    train_data = pickle.load(f)
    
with open("test_dfs.pickle", "rb") as f:
    test_dfs = pickle.load(f)
    


In [68]:
print(type(dataframes['/home/roni/coding/mastersProject/src/csvOut/p_100/recording_3'].values))

<class 'numpy.ndarray'>


In [69]:
frame_sum = 0
for df in dfs_with_scores:
    print(df)
    print(dfs_with_scores[df].head())
    frame_sum+= len(dfs_with_scores[df])
    
print("total number of seconds:",frame_sum)

/home/roni/coding/mastersProject/src/csvOut/p_112
                      AU17_c   AU07_c   AU14_c   AU12_c   AU20_c  facetouch  \
 timestamp                                                                    
1970-01-01 00:00:00      0.0      0.0      0.0      0.0      0.0          0   
1970-01-01 00:00:01      0.0      0.0      0.0      0.0      0.0          0   
1970-01-01 00:00:02      0.0      0.0      0.0      0.0      0.0          0   
1970-01-01 00:00:03      0.0      0.0      0.0      0.0      0.0          0   
1970-01-01 00:00:04      0.0      0.0      0.0      0.0      0.0          0   

                     PHQ Score  GAD Score  
 timestamp                                 
1970-01-01 00:00:00       19.0       12.0  
1970-01-01 00:00:01       19.0       12.0  
1970-01-01 00:00:02       19.0       12.0  
1970-01-01 00:00:03       19.0       12.0  
1970-01-01 00:00:04       19.0       12.0  
/home/roni/coding/mastersProject/src/csvOut/p_76/recording_1
                      AU17_

In [70]:
nodes = [' AU17_c', ' AU07_c',' AU14_c' , ' AU12_c',' AU20_c' ]
cooc = [ 'FT AU17_c', 'FT AU07_c','FT AU14_c' , 'FT AU12_c','FT AU20_c' ]
res = ['GAD Score','PHQ Score']

In [100]:
# now we add the columns with the same time co-occurrence
def addCoocCol(df):
    for i in cooc:
        temp = i.split(' ')
        temp[0] = 'facetouch' # was 'FT' before
        temp[1] = ' ' + temp[1]
        #print (temp)
        
        df[i] = ((df[temp[0]] + df[temp[1]])==2).astype(int)
            
addCoocCol(train_data)

In [72]:
train_data[ train_data['facetouch']==1 ]

Unnamed: 0_level_0,AU17_c,AU07_c,AU14_c,AU12_c,AU20_c,facetouch,PHQ Score,GAD Score,FT AU17_c,FT AU07_c,FT AU14_c,FT AU12_c,FT AU20_c
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1970-01-01 00:00:21,0.0,0.0,0.0,0.0,0.0,1,2.0,1.0,0,0,0,0,0
1970-01-01 00:00:22,0.0,0.0,0.0,0.0,0.0,1,2.0,1.0,0,0,0,0,0
1970-01-01 00:00:23,0.0,0.0,0.0,0.0,0.0,1,2.0,1.0,0,0,0,0,0
1970-01-01 00:00:24,0.0,0.0,0.0,0.0,0.0,1,2.0,1.0,0,0,0,0,0
1970-01-01 00:00:58,0.0,1.0,1.0,1.0,1.0,1,0.0,0.0,0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1970-01-01 00:03:26,1.0,0.0,0.0,0.0,1.0,1,0.0,1.0,1,0,0,0,1
1970-01-01 00:03:27,1.0,0.0,0.0,0.0,1.0,1,0.0,1.0,1,0,0,0,1
1970-01-01 00:03:29,1.0,0.0,0.0,0.0,0.0,1,0.0,1.0,1,0,0,0,0
1970-01-01 00:03:30,1.0,0.0,0.0,0.0,0.0,1,0.0,1.0,1,0,0,0,0


In [73]:
#filtered_dfs = {}
#for df in dfs_with_scores:
#    filtered_dfs[df] = dfs_with_scores[df][[ ' AU17_c', ' AU07_c',' AU14_c' , ' AU12_c',' AU20_c', 'facetouch',  'FT AU17_c', 'FT AU07_c','FT AU14_c' , 'FT AU12_c','FT AU20_c', 'GAD Score','PHQ Score' ]]

In [74]:
#p109 = filtered_dfs['/home/roni/coding/mastersProject/src/csvOut/p_109']

In [75]:
def makeBN(df):
    bn = BayesianNetwork()
    T = len(df)
    for t in range(T):
        for i in cooc: # mid layer
            for j in nodes: # base layer
                if( 'FT' + j == i ):
                    bn.add_edge( u= j, v= i)
            #add PHQ/GAD edges
            bn.add_edge( u='facetouch', v=i)
            bn.add_edge( u=i, v=res[0])
            bn.add_edge( u=i, v=res[1])
    bn.fit(df)
    return bn
#print(bn.edges())

In [76]:
bn = makeBN(train_data)

In [77]:
bn.cpds

[<TabularCPD representing P( AU17_c:2) at 0x7fb0e2208250>,
 <TabularCPD representing P(FT AU17_c:2 |  AU17_c:2, facetouch:2) at 0x7fb0be9f2ad0>,
 <TabularCPD representing P(facetouch:2) at 0x7fb0e220b130>,
 <TabularCPD representing P(GAD Score:6 | FT AU07_c:2, FT AU12_c:2, FT AU14_c:2, FT AU17_c:2, FT AU20_c:2) at 0x7fb0e2bda710>,
 <TabularCPD representing P(PHQ Score:9 | FT AU07_c:2, FT AU12_c:2, FT AU14_c:2, FT AU17_c:2, FT AU20_c:2) at 0x7fb0e135d000>,
 <TabularCPD representing P( AU07_c:2) at 0x7fb0e135fb80>,
 <TabularCPD representing P(FT AU07_c:2 |  AU07_c:2, facetouch:2) at 0x7fb0be9b7880>,
 <TabularCPD representing P( AU14_c:2) at 0x7fb0beacb2e0>,
 <TabularCPD representing P(FT AU14_c:2 |  AU14_c:2, facetouch:2) at 0x7fb0be9b7f40>,
 <TabularCPD representing P( AU12_c:2) at 0x7fb0e135ecb0>,
 <TabularCPD representing P(FT AU12_c:2 |  AU12_c:2, facetouch:2) at 0x7fb0e135c730>,
 <TabularCPD representing P( AU20_c:2) at 0x7fb0e22080a0>,
 <TabularCPD representing P(FT AU20_c:2 |  AU2

In [102]:
#we have fitted the model, so now we test the prediction.

t = test_dfs['/home/roni/coding/mastersProject/src/csvOut/p_109'].copy()
t2 = test_dfs['/home/roni/coding/mastersProject/src/csvOut/p_70/recording_0'].copy()

In [103]:
addCoocCol(t)
addCoocCol(t2)

In [104]:
t.head()

Unnamed: 0_level_0,AU17_c,AU07_c,AU14_c,AU12_c,AU20_c,facetouch,PHQ Score,GAD Score,FT AU17_c,FT AU07_c,FT AU14_c,FT AU12_c,FT AU20_c
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1970-01-01 00:00:00,0.0,0.0,0.0,0.0,0.0,0,17.0,15.0,0,0,0,0,0
1970-01-01 00:00:01,0.0,0.0,0.0,0.0,0.0,0,17.0,15.0,0,0,0,0,0
1970-01-01 00:00:02,0.0,0.0,0.0,0.0,0.0,0,17.0,15.0,0,0,0,0,0
1970-01-01 00:00:03,0.0,0.0,0.0,0.0,0.0,0,17.0,15.0,0,0,0,0,0
1970-01-01 00:00:04,0.0,0.0,0.0,0.0,0.0,0,17.0,15.0,0,0,0,0,0


In [105]:
t2.head()

Unnamed: 0_level_0,AU17_c,AU07_c,AU14_c,AU12_c,AU20_c,facetouch,PHQ Score,GAD Score,FT AU17_c,FT AU07_c,FT AU14_c,FT AU12_c,FT AU20_c
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1970-01-01 00:00:00,1.0,0.0,0.0,1.0,0.0,0,0.0,1.0,0,0,0,0,0
1970-01-01 00:00:01,1.0,0.0,1.0,1.0,0.0,0,0.0,1.0,0,0,0,0,0
1970-01-01 00:00:02,1.0,0.0,1.0,1.0,0.0,0,0.0,1.0,0,0,0,0,0
1970-01-01 00:00:03,1.0,0.0,1.0,1.0,0.0,0,0.0,1.0,0,0,0,0,0
1970-01-01 00:00:04,1.0,0.0,0.0,0.0,1.0,0,0.0,1.0,0,0,0,0,0


In [106]:
t.drop('GAD Score', axis=1, inplace=True)
t.drop('PHQ Score', axis=1, inplace=True)
t2.drop('GAD Score', axis=1, inplace=True)
t2.drop('PHQ Score', axis=1, inplace=True)


In [107]:
t_pred = bn.predict(t)

  0%|          | 0/49 [00:00<?, ?it/s]

In [108]:
t_pred.to_csv("prediction1.csv")

In [117]:
print("GAD pred:", t_pred['GAD Score'].unique(), t_pred['GAD Score'].unique()[1:].mean())
print("PHQ pred:",t_pred['PHQ Score'].unique(), t_pred['PHQ Score'].unique()[1:].mean())

GAD pred: [ 0.  1.  2. 12. 19.] 8.5
PHQ pred: [ 0.  4. 19.  1. 18.] 10.5


In [110]:
t2_pred = bn.predict(t2)

  0%|          | 0/16 [00:00<?, ?it/s]

In [114]:
print("GAD pred:",t2_pred['GAD Score'].unique(), t2_pred['GAD Score'].unique().mean())
print("PHQ pred:",t2_pred['PHQ Score'].unique(), t2_pred['PHQ Score'].unique().mean())


GAD pred: [ 0. 12.] 6.0
PHQ pred: [ 0. 19.] 9.5


In [14]:
def convertDfToOneRow(df):
    temp = df.reset_index(drop=True)
     
    cols = list(temp.columns)
    T = len(temp)
    new_cols = [(col, t) for col in cols for t in range(T)]

    # create a new DataFrame with T * len(cols) columns
    new_df = pd.DataFrame(columns=new_cols)

    new_df.loc[0] = [ temp[x[0]].loc[x[1]] for x in new_cols ]
    return new_df

In [15]:
convertDfToOneRow(filtered_dfs['/home/roni/coding/mastersProject/src/csvOut/p_109'])

Unnamed: 0,"( AU17_c, 0)","( AU17_c, 1)","( AU17_c, 2)","( AU17_c, 3)","( AU17_c, 4)","( AU17_c, 5)","( AU17_c, 6)","( AU17_c, 7)","( AU17_c, 8)","( AU17_c, 9)",...,"(PHQ Score, 567)","(PHQ Score, 568)","(PHQ Score, 569)","(PHQ Score, 570)","(PHQ Score, 571)","(PHQ Score, 572)","(PHQ Score, 573)","(PHQ Score, 574)","(PHQ Score, 575)","(PHQ Score, 576)"
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0,17.0


In [16]:
def makeGraph(df, T):
    G = DAG()
    print(T)
    for t in range(T):
        for i in cooc: # mid layer
            for j in nodes: # base layer
                if(t+1 <= T):
                    G.add_edge( u= (j, t), v= (j, t+1))
                if ("FT"+j == i): # add edge for AU            
                    G.add_edge( u= (j, t), v= (i, t))
                if(j == 'facetouch' and i[0:2]=='FT'): # add edge for facetouch
                    G.add_edge( u=(j, t), v=(i, t))
            #add PHQ/GAD edges
            G.add_edge( u=(i, t), v=(res[0],t))
            G.add_edge( u=(i, t), v=(res[1],t))
            if(t+1 <= T):
                G.add_edge( u=(i, t), v=(i,t+1) )
    print(G.edges())
    return G
#pp.pprint(G.edges())



In [17]:
example = convertDfToOneRow(filtered_dfs['/home/roni/coding/mastersProject/src/csvOut/p_109'])

In [None]:
for col in example.columns:
    if isinstance(col, tuple) and col[1] in [0, 1]:
        print(example[col])

0    0.0
Name: ( AU17_c, 0), dtype: float64
0    0.0
Name: ( AU17_c, 1), dtype: float64
0    0.0
Name: ( AU07_c, 0), dtype: float64
0    0.0
Name: ( AU07_c, 1), dtype: float64
0    0.0
Name: ( AU14_c, 0), dtype: float64
0    0.0
Name: ( AU14_c, 1), dtype: float64
0    0.0
Name: ( AU12_c, 0), dtype: float64
0    0.0
Name: ( AU12_c, 1), dtype: float64
0    0.0
Name: ( AU20_c, 0), dtype: float64
0    0.0
Name: ( AU20_c, 1), dtype: float64
0    0.0
Name: (facetouch, 0), dtype: float64
0    0.0
Name: (facetouch, 1), dtype: float64
0    0.0
Name: (FT AU17_c, 0), dtype: float64
0    0.0
Name: (FT AU17_c, 1), dtype: float64
0    0.0
Name: (FT AU07_c, 0), dtype: float64
0    0.0
Name: (FT AU07_c, 1), dtype: float64
0    0.0
Name: (FT AU14_c, 0), dtype: float64
0    0.0
Name: (FT AU14_c, 1), dtype: float64
0    0.0
Name: (FT AU12_c, 0), dtype: float64
0    0.0
Name: (FT AU12_c, 1), dtype: float64
0    0.0
Name: (FT AU20_c, 0), dtype: float64
0    0.0
Name: (FT AU20_c, 1), dtype: float64
0    15.

In [18]:

#print(example)
#x  = makeGraph(example, len(filtered_dfs['/home/roni/coding/mastersProject/src/csvOut/p_109']))
#print(len(x))
xt = DynamicBayesianNetwork()

example.to_csv('p109filteredexample.csv')

T = len(filtered_dfs['/home/roni/coding/mastersProject/src/csvOut/p_109'])

cols = example.columns.tolist()

for i in cols:
    if( i[0] in nodes ):
        xt.add_edge( i, ( 'FT' + i[0], i[1] ) )
    elif( i[0] in cooc ):
        xt.add_edge( ('facetouch',i[1]), i )
        xt.add_edge( i, (res[0],i[1]) )
        xt.add_edge( i, (res[1],i[1]) )
        


#for t in range(T):
#    for i in cooc: # mid layer
#        for j in nodes: # base layer
#            #if(t+1 <= T):
#                #xt.add_edge( (j, t), (j, t+1))
#            xt.add_edge( (j, t), (i, t))    
#        #add PHQ/GAD edges
#        xt.add_edge((i, t), (res[0],t))
#        xt.add_edge( (i, t), (res[1],t))
#        #if(t+1 <= T):
#            #xt.add_edge( (i, t), (i,t+1) )
#est = MaximumLikelihoodEstimator(bn,p109)
xt.fit(example)

ValueError: Data contains unexpected states for variable:  AU17_c_1.

In [None]:
c = example.columns.tolist()
example[(' AU12_c', 1)]
#for i in c:
#    if 'AU12' in i[0]:
#        print(i)

0    0.0
Name: ( AU12_c, 1), dtype: float64

In [None]:
model = DynamicBayesianNetwork()

for video_name, df in dataframes.items():
    temp = BayesianNetwork()    
    est = MaximumLikelihoodEstimator(temp,df)
    print(video_name)
    print(df)
    #temp.fit(df,est)
    
    #infer = DBNInference(temp)
    #estimator = MaximumLikelihoodEstimator(model, temp)
    
    #temp.fit(probabilities[video_name], estimator)#, inference_algorithm=infer)
    
    #for node_name in nodes:
    #    model.add_node(node_name, temp.cpd(node_name))
    
    #if model.cardinality is None:
    #    model.cardinality = temp.cardinality
    #else:
    #    model.cardinality = np.maximum(model.cardinality, temp.cardinality)
    
    #model.add_transition(model.previous_timestep_nodes, nodes, transition_type='sequential')
    #model.previous_timestep_nodes = nodes
    
    
    
    #model.add_node(df, temp)
    
    #add temp to model



/home/roni/coding/mastersProject/src/csvOut/p_59/recording_0
                      AU17_c   AU07_c   AU14_c   AU12_c   AU20_c  \
 timestamp                                                         
1970-01-01 00:00:00      0.0      0.0      0.0      0.0      0.0   
1970-01-01 00:00:01      0.0      0.0      0.0      0.0      0.0   
1970-01-01 00:00:02      0.0      0.0      0.0      0.0      0.0   
1970-01-01 00:00:03      0.0      0.0      0.0      0.0      0.0   
1970-01-01 00:00:04      0.0      0.0      0.0      0.0      0.0   
...                      ...      ...      ...      ...      ...   
1970-01-01 00:05:08      0.0      1.0      0.0      0.0      0.0   
1970-01-01 00:05:09      0.0      0.0      0.0      0.0      1.0   
1970-01-01 00:05:10      0.0      0.0      0.0      0.0      0.0   
1970-01-01 00:05:11      0.0      0.0      0.0      0.0      0.0   
1970-01-01 00:05:12      1.0      0.0      0.0      0.0      1.0   

                     leftHandTouching  rightHandTouchi

In [None]:
# Set the first nodes as start nodes
model.add_transition(model.start_nodes, model.previous_timestep_nodes, transition_type='sequential')
    
# Add end nodes and set their transitions
end_nodes = []
for node_name in nodes:
    end_node_name = f"{node_name}_end"
    end_nodes.append(end_node_name)
    model.add_node(end_node_name)
    model.add_transition([node_name], end_node_name, transition_type='static')
model.add_transition(model.previous_timestep_nodes, end_nodes, transition_type='sequential')

model.bake()