## Twin Analysis

The HCP databases uses a twin sample, allowing for the comparison of monozygotic (MZ) vs dizygotic (DZ) twins. 

This allows us to compare the heritability of oscillations.

In [1]:
# Import required libraries/functions
from __future__ import print_function, division
import sys
import random
import itertools
import numpy as  np

# Import custom code from module om, including general functions and OO code for handling data
sys.path.append('/Users/thomasdonoghue/Documents/GitCode/omegamappin/')

from om.core.db import OMDB
from om.core.osc import Osc
from om.meg.twin import get_twin_data, match_twins, check_complete_pairs, rm_twin_pairs, compare_pair

In [2]:
# Pull out twin data from demographics file
mz_twins, dz_twins, all_twins, not_twins = get_twin_data()

# Match twin pairs
mz_id_pairs, mz_singles = match_twins(mz_twins, 1)
dz_id_pairs, dz_singles = match_twins(dz_twins, 1)

# Check how many pairs where extracted
print('There are', str(len(mz_id_pairs)), 'MZ twin pairs.')
print('There are', str(len(dz_id_pairs)), 'DZ twin pairs.')

There are 18 MZ twin pairs.
There are 19 DZ twin pairs.


In [3]:
# Get database object and set up database to use
db = OMDB()
dat_source = 'HCP'

In [4]:
# Not all HCP files have been processed. Check which subjects have not yet been run

# Check all FOOFed files from the HCP database
foof_files, _ = db.check_dat_files('foof', dat_source='HCP', verbose=False)

# Check which subjects listed in the demographic information are not yet FOOFed
no_dat = []
for f in all_twins:
    if f not in foof_files:
        no_dat.append(f)

# Print out results
print(str(len(no_dat)), 'Subject Files Not Available: ')
print(no_dat)

33 Subject Files Not Available: 
[104012, 108323, 125525, 133019, 149741, 151526, 177746, 179245, 182840, 200109, 287248, 500222, 601127, 660951, 662551, 665254, 667056, 679770, 680957, 707749, 715950, 725751, 735148, 783462, 814649, 825048, 872764, 877168, 891667, 898176, 912447, 917255, 990366]


In [5]:
# Given the missing data, check which twin pairs are complete
mz_complete_pairs = check_complete_pairs(mz_id_pairs, foof_files)
dz_complete_pairs = check_complete_pairs(dz_id_pairs, foof_files)

# Print out number of available pairs by twin type
print('There are', str(len(mz_complete_pairs)), 'complete MZ twin pairs.')
print('There are', str(len(dz_complete_pairs)), 'complete MZ twin pairs.')

There are 5 complete MZ twin pairs.
There are 11 complete MZ twin pairs.


In [6]:
# Get a list of all possible non-twin pairs, to compare to
all_pairs = list(itertools.combinations(foof_files, 2))
non_twin_pairs = rm_twin_pairs(all_pairs, mz_id_pairs + dz_id_pairs)

### Center Frequency Comparison

The following compares the center frequencies from within oscillatory bands. 

NOTE: As implemented, the center-frequency comparison is not specific to how subjects are similar/different. 
Subjects can differ in either:
- The spatial topography across which the oscillation band is found and/or
- The center frequencies within the vertices where the oscillatory band is found

^ This can/will be parcelled out in further analyses:
- A basic check is simply to look at the degree of overlap of oscillatory band topographies between subjects. 

In [7]:
# Compare MZ twins - Oscillatory Band Center Frequencies

# Check how many twin pairs are available
n_mz_pairs = len(mz_complete_pairs)
mz_corr_dat = np.zeros([n_mz_pairs, 4, 2])

# Loop through MZ twin pairs, comparing them
for ind, pair in enumerate(mz_complete_pairs):
    mz_corr_dat[ind, :, :] = compare_pair(pair)
    
# Get average correlation within bands across pairs
mz_avg_corr = np.mean(mz_corr_dat, axis=0)

In [8]:
# Compare DZ twins - Oscillatory Band Center Frequencies

# Check how many twin pairs are available
n_dz_pairs = len(dz_complete_pairs)
dz_corr_dat = np.zeros([n_dz_pairs, 4, 2])

# Loop through DZ twin pairs, comparing them
for ind, pair in enumerate(dz_complete_pairs):
    dz_corr_dat[ind, :, :] = compare_pair(pair)
    
# Get average correlation within bands across pairs
dz_avg_corr = np.mean(dz_corr_dat, axis=0)

In [9]:
# Compare non-twins - Oscillatory Band Center Frequencies

# Get a random sample of non-twin pairs, matching number of twin pairs
n_non_twin = n_dz_pairs
rand_inds = random.sample(range(len(non_twin_pairs)), n_non_twin)
non_twin_samp = [non_twin_pairs[i] for i in rand_inds]
non_twin_corr_dat = np.zeros([n_non_twin, 4, 2])

# Loop through non-twin pairs, comparing them
for ind, pair in enumerate(non_twin_samp):
    non_twin_corr_dat[ind, :, :] = compare_pair(pair)
    
# Get average correlation within bands across pairs
non_twin_avg_corr = np.mean(non_twin_corr_dat, axis=0)

In [11]:
# Check Results

osc = Osc(default=True)

def print_twin_results(corr_dat, labels):
    """   """
    for i in range(len(labels)):
        print('\t', labels[i], ' : ', '{:5.4f}'.format(corr_dat[i, 0]))

print('MZ Twin Oscillatory Band Center Frequency Results: ')
print_twin_results(mz_avg_corr, osc.bands.keys())
print('DZ Twin Oscillatory Band Center Frequency Results: ')
print_twin_results(dz_avg_corr, osc.bands.keys())
print('Non-Twin Oscillatory Band Center Frequency Results: ')
print_twin_results(non_twin_avg_corr, osc.bands.keys())

MZ Twin Oscillatory Band Center Frequency Results: 
	 Theta  :  0.2032
	 Alpha  :  0.1547
	 Beta  :  0.2206
	 LowGamma  :  0.1826
DZ Twin Oscillatory Band Center Frequency Results: 
	 Theta  :  0.1717
	 Alpha  :  0.1142
	 Beta  :  0.1290
	 LowGamma  :  0.1047
Non-Twin Oscillatory Band Center Frequency Results: 
	 Theta  :  0.1496
	 Alpha  :  0.0578
	 Beta  :  0.1176
	 LowGamma  :  0.0349


## TEST CODE

In [1]:
n_rand_pairs = len(rand_pairs)
rand_corr_dat = np.zeros([n_rand_pairs, 4, 2])


for ind, pair in enumerate(rand_pairs):
    rand_corr_dat[ind, :, :] = compare_twin_pair(pair)

In [90]:
rand_pairs = [[181232, 166438], [212318, 175237], [204521, 255639],
              [191841, 293748], [214524, 352738], [223929, 111514]]

In [68]:
pair_ind = 4

# Set subject number to load
subj_1 = mz_complete_pairs[pair_ind][0]
subj_2 = mz_complete_pairs[pair_ind][1]

In [10]:
np.mean(mz_corr_dat, axis=0)

array([[  2.03199209e-01,   7.63443429e-02],
       [  1.54657319e-01,   9.59576766e-04],
       [  2.20574075e-01,   2.28331086e-28],
       [  1.82610148e-01,   5.48417021e-02]])

In [11]:
np.mean(dz_corr_dat, axis=0)

array([[  1.71742018e-01,   2.73093832e-06],
       [  1.14158743e-01,   8.14871060e-03],
       [  1.29018269e-01,   4.89399153e-05],
       [  1.04679615e-01,   1.45673227e-02]])

In [96]:
np.mean(rand_corr_dat, axis=0)

array([[ 0.1362881 ,  0.01306603],
       [ 0.10844563,  0.12799352],
       [ 0.00249754,  0.21088716],
       [ 0.01660769,  0.15087963]])

In [34]:
np.mean(non_twin_corr_dat, axis=0)

array([[ 0.11783372,  0.10345028],
       [ 0.07169276,  0.17776039],
       [ 0.09236982,  0.08019821],
       [ 0.09158019,  0.10449761]])

In [100]:
rand_corr_dat[:, 3, 1]

array([  1.71322077e-005,   5.00580453e-003,   7.80296878e-002,
         3.11608291e-010,   8.22225170e-001,   2.36322184e-127])