In [1]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
import seaborn as sns
import rqa_functions as rqa
import os
from scipy import stats

In [2]:
# Specify the directory where the files are located
directory_path = "/data/Italo/correlation_distances"

# List all files and directories in the specified path
all_items = os.listdir(directory_path)

# Filter out files that start with "dyad_20"
matching_files = [filename for filename in all_items if filename.startswith("dyad_20")]

# Optionally, get the full paths if needed
full_paths = [os.path.join(directory_path, filename) for filename in matching_files]

# Print the list of matching file paths
for file_path in full_paths:
    print(file_path)

/data/Italo/correlation_distances/dyad_20221003_distances.pkl
/data/Italo/correlation_distances/dyad_20220713_distances.pkl
/data/Italo/correlation_distances/dyad_20220816_distances.pkl
/data/Italo/correlation_distances/dyad_20221005_distances.pkl
/data/Italo/correlation_distances/dyad_2022100401_distances.pkl
/data/Italo/correlation_distances/dyad_20220810_distances.pkl
/data/Italo/correlation_distances/dyad_2022100402_distances.pkl
/data/Italo/correlation_distances/dyad_20220811_distances.pkl
/data/Italo/correlation_distances/dyad_20220721_distances.pkl
/data/Italo/correlation_distances/dyad_20220808_distances.pkl
/data/Italo/correlation_distances/dyad_20220815_distances.pkl
/data/Italo/correlation_distances/dyad_20220804_distances.pkl


In [3]:
def find_indices_to_eliminate(subj1, subj2):
    """
    Calculate the indices to be eliminated based on the differences in trial data points
    for two subjects, ensuring that only the necessary data points are removed to align their sizes.

    Parameters:
    - list_indices_subj1: Numpy array of trial sizes for subject 1
    - list_indices_subj2: Numpy array of trial sizes for subject 2

    Returns:
    - index_to_eliminate_subj1: Indices to eliminate from subject 1 to align with subject 2
    - index_to_eliminate_subj2: Indices to eliminate from subject 2 to align with subject 1
    """

    list_indices_subj1 = np.array([i[1] for i in subj1])
    list_indices_subj2 = np.array([i[1] for i in subj2])

    cumsum_subj1 = [sum([x[1] for x in subj1[:i+1]]) for i in range(len(subj1))]
    cumsum_subj2 = [sum([x[1] for x in subj2[:i+1]]) for i in range(len(subj2))]

    index_differences_sub1 = list_indices_subj1 - list_indices_subj2
    index_differences_sub2 = list_indices_subj2 - list_indices_subj1

    index_to_eliminate_subj1 = []
    for i,n_points in enumerate(index_differences_sub1):
        if n_points>0:
            indexes = [j for j in range(cumsum_subj1[i]-index_differences_sub1[i],cumsum_subj1[i])]
            index_to_eliminate_subj1.extend(indexes)

    index_to_eliminate_subj2 = []
    for i,n_points in enumerate(index_differences_sub2):
        if n_points>0:
            indexes = [j for j in range(cumsum_subj2[i]-index_differences_sub2[i],cumsum_subj2[i])]
            index_to_eliminate_subj2.extend(indexes)

    return index_to_eliminate_subj1[::-1], index_to_eliminate_subj2[::-1]

def session_data_loading(file_path):

    session = (file_path.split('/')[-1]).split('_')[1]
    with open(file_path, 'rb') as file:
        # Load the object from the pickle file
        data = pickle.load(file)

    subj1 = data['subj1']['sizes']
    subj2 = data['subj2']['sizes']

    index_to_eliminate_subj1, index_to_eliminate_subj2 = find_indices_to_eliminate(subj1, subj2)

    file_order_size = []
    for i in range(len(subj1)):
        file_sub1,len_1 = subj1[i]
        file_sub2,len_2 = subj2[i]
        if len_1 < len_2:
            file_order_size.append((file_sub1,file_sub2,len_1))
        else:
            file_order_size.append((file_sub1,file_sub2,len_2))

    mat1 = data['subj1']['distances']
    for index in index_to_eliminate_subj1:
        mat1 = np.delete(np.delete(mat1, index, axis=0), index, axis=1)

    mat2 = data['subj2']['distances']
    for index in index_to_eliminate_subj2:
        mat2 = np.delete(np.delete(mat2, index, axis=0), index, axis=1)

    trial_len = [i[2] for i in file_order_size]
    start_points = list(np.cumsum(trial_len))
    end_points = [i-1 for i in start_points]
    start_points.insert(0,0)
    start_points.pop(-1)
    #print(start_points)
    #print(end_points)
    start_stop = list(zip(start_points,end_points))
    #print(start_stop)

    condition_dictionary = {1: 'Uncoupled', 2: '1_lead', 3: '2_lead', 4: 'Mutual'}
    type_dictionary = {1: 'Synchronization', 2: 'Syncopation'}

    # Initialize an empty list to store each row's data as a dictionary
    data = []

    for i, entry in enumerate(file_order_size):
        session = entry[0].split('/')[0]
        trial = entry[0].split('_')[2][:-4]
        length = entry[2]
        start, stop = start_stop[i]
        filename = '/data/Italo/finger_tapping_behavioral_data/clean_' + str(session) + '_bpchan.mat'
        beh_data = loadmat(filename)
        conditions = list(beh_data['conditions'][0])
        condition = condition_dictionary[conditions[int(trial)-1]]
        session_type = type_dictionary[beh_data['session'][0][0]]

        # Instead of printing, store the data in a dictionary
        row_data = {
            'session': session,
            'session_type': session_type,
            'condition': condition,
            'trial': trial,
            'start': start,
            'stop': stop
        }

        # Append the dictionary to the list
        data.append(row_data)

    # Convert the list of dictionaries to a pandas DataFrame
    metadata = pd.DataFrame(data)
    session_data = {'Subject 1': mat1,
                    'Subject 2': mat2,
                    'Metadata': metadata,
                    'Session Type': session_type}
    return session, session_data

def transform_tuples_to_symbols(tuple_sequence):
    """
    Transforms a sequence of tuples into a sequence of unique symbols (integer numbers).
    
    Parameters:
    - tuple_sequence: A sequence (e.g., list) of tuples.
    
    Returns:
    - A list of integers representing the sequence of symbols.
    """
    # Step 1: Create a mapping from each unique tuple to a unique integer
    unique_tuples = set(tuple_sequence)  # Find all unique tuples
    tuple_to_symbol_map = {t: i for i, t in enumerate(unique_tuples)}
    
    # Step 2: Transform the original sequence of tuples using the map
    symbol_sequence = [tuple_to_symbol_map[t] for t in tuple_sequence]
    
    return symbol_sequence

In [4]:
session_data = {}
for file_path in full_paths:
    session,data = session_data_loading(file_path)
    session_data[session] = data

In [5]:
def load_pickle_file(filename):
    """
    Load a pickle file.

    Parameters:
    - filename (str): The path to the pickle file to be loaded.

    Returns:
    - The Python object loaded from the pickle file.
    """
    try:
        with open(filename, 'rb') as file:
            data = pickle.load(file)
            return data
    except FileNotFoundError:
        print(f"Error: The file '{filename}' was not found.")
    except EOFError:
        print(f"Error: The file '{filename}' may be corrupted or empty.")
    except pickle.UnpicklingError:
        print(f"Error: The file '{filename}' could not be unpickled. It may not be a valid pickle file or may be corrupted.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        
session_clusterings = load_pickle_file('./clustering_data.pkl')

In [6]:
data = []
recurrence_plots = {}

for session_code in session_data:
    mat1,mat2,metadata,session_type =  session_data[session_code].values()
    sub1_seq,sub2_seq = session_clusterings[session_code]
    joint_seq = list(zip(sub1_seq,sub2_seq))
    joint_seq = transform_tuples_to_symbols(joint_seq)

    start_list = list(metadata['start'])
    stop_list = list(metadata['stop'])
    start_stop = list(zip(start_list,stop_list))
    conditions = list(metadata['condition'])

    recurrence_matrix = rqa.build_rp(joint_seq)
    recurrence_plots[session_code] = {'rp': recurrence_matrix,
                                      'symbol_sequence': joint_seq}
    for j,indices in enumerate(start_stop):
        condition = conditions[j]
        if condition == '1_lead' or condition == '2_lead':
            condition = 'Leader-Follower'
        start, stop = indices
        
        matrix = recurrence_matrix[start:stop,start:stop]

        vlines = rqa.find_lines(matrix, min_len=2, direction='vertical')
        vmean = np.mean(vlines)
        vvar = np.var(vlines)
        dlines = rqa.find_lines(matrix, min_len=2, direction='diagonal')
        dmean = np.mean(dlines)
        dvar = np.var(dlines)

        df_row = {'Session': session_code,
                'Session Type': session_type,
                'Condition': condition,
                'vmean': vmean,
                'vvar': vvar,
                'dmean': dmean,
                'dvar': dvar}
        data.append(df_row)

data_df = pd.DataFrame(data)
data_df

Unnamed: 0,Session,Session Type,Condition,vmean,vvar,dmean,dvar
0,20221003,Synchronization,Uncoupled,2.877586,1.231567,5.360656,161.525665
1,20221003,Synchronization,Leader-Follower,2.749616,1.115495,6.355140,318.266399
2,20221003,Synchronization,Mutual,2.422680,0.292132,3.109375,68.972412
3,20221003,Synchronization,Leader-Follower,3.294586,2.497614,5.825175,161.892513
4,20221003,Synchronization,Leader-Follower,2.199438,0.159663,2.088889,0.080988
...,...,...,...,...,...,...,...
139,20220804,Synchronization,Leader-Follower,4.000000,3.201365,11.746193,639.955887
140,20220804,Synchronization,Uncoupled,2.769585,0.724175,3.606897,57.797194
141,20220804,Synchronization,Leader-Follower,3.403207,2.412452,8.995098,532.897035
142,20220804,Synchronization,Leader-Follower,2.629888,1.710783,6.666667,278.065359
