## Twin Analysis

In [32]:
# Import required libraries/functions
from __future__ import print_function, division
import sys
import os
import csv
import numpy as  np

from scipy.stats.stats import pearsonr

# Import custom code from module om, including general functions and OO code for handling data
sys.path.append('/Users/thomasdonoghue/Documents/GitCode/omegamappin/')

from om.core.db import OMDB
from om.core.osc import Osc
from om.meg.single import MegData
from om.meg.twin import get_twin_data, match_twins

In [24]:
# Pull out twin data from demographics file
mz_twins, dz_twins, all_twins, not_twins,  = get_twin_data()

# Match twin pairs
mz_id_pairs, mz_singles = match_twins(mz_twins, 1)
dz_id_pairs, dz_singles = match_twins(dz_twins, 1)

# Check how many pairs where extracted
print('There are', str(len(mz_id_pairs)), 'MZ twin pairs.')
print('There are', str(len(dz_id_pairs)), 'DZ twin pairs.')

There are 18 MZ twin pairs.
There are 19 DZ twin pairs.


In [3]:
# Get database object and set up database to use
db = OMDB()
dat_source = 'HCP'

In [4]:
foof_files, _ = db.check_dat_files('foof', dat_source='HCP', verbose=False)

In [9]:
no_dat = []

for f in all_twins:
    if f not in foof_files:
        no_dat.append(f)

print(str(len(no_dat)), 'Subject Files Not Available: ')
print(no_dat)

33 Subject Files Not Available: 
[104012, 108323, 125525, 133019, 149741, 151526, 177746, 179245, 182840, 200109, 287248, 500222, 601127, 660951, 662551, 665254, 667056, 679770, 680957, 707749, 715950, 725751, 735148, 783462, 814649, 825048, 872764, 877168, 891667, 898176, 912447, 917255, 990366]


In [25]:
def check_complete_pairs(twin_ids, foof_dat):

    complete_pairs = []

    for pair in twin_ids:

        if pair[0] in foof_files and pair[1] in foof_files:
            complete_pairs.append(pair)
    
    return complete_pairs

In [26]:
mz_complete_pairs = check_complete_pairs(mz_id_pairs, foof_files)
dz_complete_pairs = check_complete_pairs(dz_id_pairs, foof_files)

In [27]:
mz_complete_pairs

[[164636, 214524],
 [181232, 191841],
 [195041, 204521],
 [116726, 257845],
 [233326, 352738]]

In [28]:
dz_complete_pairs

[[100307, 255639],
 [109123, 111514],
 [113922, 166438],
 [175237, 406836],
 [162026, 568963],
 [112920, 223929],
 [191437, 559053],
 [174841, 212823],
 [198653, 581450],
 [162935, 283543],
 [212318, 293748]]

In [90]:
rand_pairs = [[181232, 166438], [212318, 175237], [204521, 255639],
              [191841, 293748], [214524, 352738], [223929, 111514]]

In [68]:
pair_ind = 4

# Set subject number to load
subj_1 = mz_complete_pairs[pair_ind][0]
subj_2 = mz_complete_pairs[pair_ind][1]

In [73]:
def compare_twin_pair(pair_inds):
    """   """
    
    # Set up oscillation band definition
    osc = Osc(default=True)

    # Initialize data object and load data
    meg_subj_1 = MegData(db, dat_source, osc)
    meg_subj_1.import_foof(pair_inds[0], get_demo=True)
    meg_subj_1.osc_bands_vertex()

    meg_subj_2 = MegData(db, dat_source, osc)
    meg_subj_2.import_foof(pair_inds[1], get_demo=True)
    meg_subj_2.osc_bands_vertex()
    
    corr_dat = np.zeros([4, 2])
    
    for ind, band in enumerate(osc.bands):
        corr_dat[ind, 0], corr_dat[ind, 1] = pearsonr(meg_subj_1.oscs[band][:, 0], meg_subj_2.oscs[band][:, 0])

    return corr_dat

In [82]:
n_mz_pairs = len(mz_complete_pairs)
mz_corr_dat = np.zeros([n_mz_pairs, 4, 2])

for ind, pair in enumerate(mz_complete_pairs):

    mz_corr_dat[ind, :, :] = compare_twin_pair(pair)

In [84]:
n_dz_pairs = len(dz_complete_pairs)
dz_corr_dat = np.zeros([n_dz_pairs, 4, 2])

for ind, pair in enumerate(dz_complete_pairs):

    dz_corr_dat[ind, :, :] = compare_twin_pair(pair)

In [91]:
n_rand_pairs = len(rand_pairs)
rand_corr_dat = np.zeros([n_rand_pairs, 4, 2])

for ind, pair in enumerate(rand_pairs):

    rand_corr_dat[ind, :, :] = compare_twin_pair(pair)

In [86]:
np.mean(mz_corr_dat, axis=0)

array([[  2.03199209e-01,   7.63443429e-02],
       [  2.20574075e-01,   2.28331086e-28],
       [  1.54657319e-01,   9.59576766e-04],
       [  1.82610148e-01,   5.48417021e-02]])

In [87]:
np.mean(dz_corr_dat, axis=0)

array([[  1.71742018e-01,   2.73093832e-06],
       [  1.29018269e-01,   4.89399153e-05],
       [  1.14158743e-01,   8.14871060e-03],
       [  1.04679615e-01,   1.45673227e-02]])

In [96]:
np.mean(rand_corr_dat, axis=0)

array([[ 0.1362881 ,  0.01306603],
       [ 0.10844563,  0.12799352],
       [ 0.00249754,  0.21088716],
       [ 0.01660769,  0.15087963]])

In [93]:
rand_corr_dat

array([[[  1.82797929e-001,   2.32078417e-057],
        [  1.79056004e-001,   4.50561744e-055],
        [ -1.78025052e-001,   1.88521574e-054],
        [ -4.96219727e-002,   1.71322077e-005]],

       [[  2.02047367e-001,   6.24959373e-070],
        [  7.61525058e-002,   4.01585862e-011],
        [ -3.52850774e-002,   2.24146995e-003],
        [ -3.24054087e-002,   5.00580453e-003]],

       [[  1.30942729e-001,   4.84632592e-030],
        [  3.40766616e-003,   7.67944826e-001],
        [ -9.37929229e-003,   4.16703837e-001],
        [ -2.03498161e-002,   7.80296878e-002]],

       [[  2.03248111e-002,   7.83961624e-002],
        [  4.97468223e-002,   1.63142603e-005],
        [ -2.23751397e-003,   8.46377639e-001],
        [ -7.25802922e-002,   3.11608291e-010]],

       [[  6.30380942e-002,   4.66091912e-008],
        [  1.58297889e-001,   2.77862105e-043],
        [  1.08223543e-001,   5.53538035e-021],
        [  2.59485464e-003,   8.22225170e-001]],

       [[  2.18577687e-001,   

In [100]:
rand_corr_dat[:, 3, 1]

array([  1.71322077e-005,   5.00580453e-003,   7.80296878e-002,
         3.11608291e-010,   8.22225170e-001,   2.36322184e-127])