# Narhayu et al., 2020 data to Laland et al., 1995 analysis

In [2]:
import pyreadr as pr
import pandas as pd
import numpy as np

## 1) Parents-offspring analysis

### Load individuals (subject's) data

- Sampled individuals (ego, or focal) are identified in the file ego_individuals.RData
- In ego_individuals['handedness']: 0 = Right , 1 = Left

In [52]:
ego = pr.read_r('/Users/ronykarstadt/Desktop/handedness/nurhayu2020/data/Script_Table1/ego_individuals.RData')['Allego']

# print(ego.shape, "\n", ego.isna().sum(), "\n", 'handedness: \n', ego['handedness'].value_counts())
# ego

### Find and remove mock inviduals 

- Nurhayu et al., : "For computing the various family categories presented in Table 1 (sample's frequencies),a full pedigree was required, i.e. including some mock individuals.[...] The list of all addedmock individuals is in the mock_individuals.RData file." (Script_for_data_in_Table1)

In [53]:
mock = pr.read_r('/Users/ronykarstadt/Desktop/handedness/nurhayu2020/data/Script_Table1/mock_individuals.RData')['mock_individuals'].values
mock = [x[0] for x in mock]

In [54]:
ego['animal'] = np.where(ego['animal'].isin(mock),np.nan ,ego['animal']) #Replace mocks in 'animal' with NaN
ego['Idfather'] = np.where(ego['Idfather'].isin(mock),np.nan ,ego['Idfather']) #Replace mocks in 'Idmother' with NaN
ego['Idmother'] = np.where(ego['Idmother'].isin(mock),np.nan ,ego['Idmother']) #Replace mocks in 'Idfather' with NaN

# print(ego.shape, "\n", ego.isna().sum())

### Insert parents' handedness 

- Nurhayu et al.,: "All other categories (parents, grand-parents, child,sibs, spouse and spouse's familiy) are identified topologically in the pedigrees, from ego." (Script_for_data_in_Table1)
- Runing "Script_for_data_in_Table1" creats multiple dataset for these categories. 

- **Here, 'parents' is used for data on ego['animal'] parents' handedness.**



In [55]:
parents = pr.read_r('/Users/ronykarstadt/Desktop/handedness/nurhayu2020/data/Script_Table1/parents.RData')['parents']

In [56]:
# insert mothers' hand into ego:
#note: some mothers apprear more than once in ego as they have several kids
moms_p = parents[parents['animal'].isin(ego['Idmother'])] # Individuals('animal') in parents who are mothers in ego
moms_p = moms_p[['animal','handedness']] # Subset these mothers' ID ('animal') and their handedness
moms_p.rename(columns = {'animal':'Idmother', 'handedness':'mother_hand' }, inplace = True) #Rename col to merge with ego

ego = pd.merge(ego,moms_p, how = 'left' , on = 'Idmother') #merge mothers' hand with ego


##insert fathers' hand into ego:
fa_p = parents[parents['animal'].isin(ego['Idfather'])] # Individuals('animal') in parents who are fathers in ego
fa_p = fa_p[['animal','handedness']] # Subset these fathers' ID ('animal') and their handedness
fa_p.rename(columns = {'animal':'Idfather', 'handedness':'father_hand' }, inplace = True) #Rename col to merge with ego

ego = pd.merge (ego,fa_p, how = 'left' , on = 'Idfather') #merge fathers' hand with ego

# print(ego.shape, "\n", ego.isna().sum(), "\n")
# ego

### Make table nice and tidy

- As missing values havn't been removed yet - remove all of them now. **Current missing values:**

- Reset index after removing NaNs 

In [57]:
ego = ego.dropna(axis = 0, how = 'any') #remove rows missing values
ego['famille'] = ego['famille'].cat.remove_unused_categories() # Remove unused categories from 'famille'
ego = ego.reset_index() #Reset index

# print("ego shape:", ego.shape,
#       "\n", "\n",
#       "NaN values:\n", ego.isna().sum(),
#       '\n', "\n", 
#       "Mothers:\n",
#       "    Unique values:", ego['Idmother'].nunique(), "\n",
#       "    mother_hand:\n", ego['mother_hand'].value_counts(), "\n",
#       "Fathers:\n", "\n", 
#       "    Unique values:", ego['Idfather'].nunique(), "\n",
#       "    father_hand:\n", ego['father_hand'].value_counts(), "\n",
#       "Focals:\n", ego['handedness'].value_counts())

# ego


### Frequncy of Right\Left offspring by parents mating (RxR, RxL, LxL)

Notes:
- In 'ego' table: 0 = Right ; 1 = Left
- RxL and LxR mating is treated equally in Laland et al., 1995 and therfore referred here as RxL ubiquitously 
- Output ordered in Laland:  R|RxR  L|RxR  R|RxL  L|RxL  R|LxL  L|LxL

In [58]:

ego["child_parents"] = np.array(range(0,len(ego['index']))) #create a new column in ego

for i in range(0,len(ego["child_parents"])):
    
    if((ego['mother_hand'][i] == 0) and (ego['father_hand'][i] == 0) and (ego['handedness'][i] == 0)): # R|RxR 
        ego["child_parents"].replace(i, 'R|RxR', inplace = True)         
    if((ego['mother_hand'][i] == 0) and (ego['father_hand'][i] == 0) and (ego['handedness'][i] == 1)): # L|RxR 
        ego["child_parents"].replace(i, 'L|RxR', inplace = True)       
                   
    if(
        ((ego['mother_hand'][i] == 0) and (ego['father_hand'][i] == 1) and (ego['handedness'][i] == 0)) #R|RxL
        or
        ((ego['mother_hand'][i] == 1) and (ego['father_hand'][i] == 0) and (ego['handedness'][i] == 0))):  # R|LxR
        ego["child_parents"].replace(i, 'R|RxL', inplace = True)
    if(
        ((ego['mother_hand'][i] == 0) and (ego['father_hand'][i] == 1) and (ego['handedness'][i] == 1)) #L|RxL
        or
        ((ego['mother_hand'][i] == 1) and (ego['father_hand'][i] == 0) and (ego['handedness'][i] == 1))):  #L|LxR
        ego["child_parents"].replace(i, 'L|RxL', inplace = True)     
                   
    if((ego['mother_hand'][i] == 1) and (ego['father_hand'][i] == 1) and (ego['handedness'][i] == 0)): # R|LxL 
        ego["child_parents"].replace(i, 'R|LxL', inplace = True)                              
    if((ego['mother_hand'][i] == 1) and (ego['father_hand'][i] == 1) and (ego['handedness'][i] == 1)): # L|LxL 
        ego["child_parents"].replace(i, 'L|LxL', inplace = True)

ego

Unnamed: 0,index,famille,animal,sex,Idfather,Idmother,handedness,mother_hand,father_hand,child_parents
0,0,1_bajawa,1_bajawaego,F,1_bajawafather,1_bajawamother,0.0,0.0,0.0,R|RxR
1,1,1_bajawa,1_bajawaego2,F,1_bajawahusband,1_bajawaego,0.0,0.0,0.0,R|RxR
2,2,3_bajawa,3_bajawaego,F,3_bajawafather,3_bajawamother,0.0,0.0,0.0,R|RxR
3,3,7_langa,7_langaego,M,7_langafather,7_langamother,1.0,0.0,1.0,L|RxL
4,4,8_langa,8_langaego,M,8_langafather,8_langamother,0.0,0.0,0.0,R|RxR
...,...,...,...,...,...,...,...,...,...,...
678,695,295_lbj,295_lbjego,M,295_lbjfather,295_lbjmother,0.0,0.0,0.0,R|RxR
679,696,296_lbj,296_lbjego,M,296_lbjfather,296_lbjmother,0.0,0.0,1.0,R|RxL
680,697,297_lbj,297_lbjego,M,297_lbjfather,297_lbjmother,0.0,0.0,1.0,R|RxL
681,698,298_lbj,298_lbjego,M,298_lbjfather,298_lbjmother,0.0,0.0,0.0,R|RxR


### A. All subjects (focal individuals) in ego (include multiple generations from the same family)

In [10]:
child_parents_sum = pd.DataFrame([ego['child_parents'].value_counts()])
child_parents_sum

Unnamed: 0,R|RxR,L|RxR,R|RxL,L|RxL,R|LxL,L|LxL
child_parents,521,73,61,26,1,1


### B. Randomly choose multiple generations from the same family 

In [11]:
sample_size = 1000
fam = ego.loc[:,['famille','child_parents']] #subset 'famillie' and 'child_parents' col from ego
store = np.zeros([sample_size ,ego['child_parents'].nunique()])  #create empty array for future sample (row number = uniqe values in fam, col numbers =  possible child_parents combo (6)
store.shape

(1000, 6)

In [12]:
for i in range(0,sample_size): #itrate number of uniqu values in 'famille'
    fam_rand = fam.groupby('famille').sample(n = 1) #generate randome sample with 1 row from each value
    R_RxR = fam_rand['child_parents'].value_counts()['R|RxR']
    L_RxR = fam_rand['child_parents'].value_counts()['L|RxR']
    R_RxL = fam_rand['child_parents'].value_counts()['R|RxL']
    L_RxL = fam_rand['child_parents'].value_counts()['L|RxL']
    R_LxL = fam_rand['child_parents'].value_counts()['R|LxL']
    L_LxL = fam_rand['child_parents'].value_counts()['L|LxL']
    fam_rand_array = np.array([R_RxR, L_RxR, R_RxL, L_RxL, R_LxL, L_LxL])
    store[i] = fam_rand_array

In [13]:
df_store = pd.DataFrame(store, columns = ['R|RxR', 'L|RxR', 'R|RxL', 'L|RxL', 'R|LxL', 'L|LxL']) #turn store into Data Frame
df_store = df_store.loc[:,].astype(int) #turn values in df_store from float to int
df_store = df_store.drop_duplicates()
df_store

Unnamed: 0,R|RxR,L|RxR,R|RxL,L|RxL,R|LxL,L|LxL
0,496,67,51,23,1,1
1,497,65,52,23,1,1
2,498,64,51,24,1,1
3,493,67,55,22,1,1
4,496,66,52,23,1,1
...,...,...,...,...,...,...
847,493,70,50,24,1,1
877,492,67,55,23,1,1
886,496,68,49,24,1,1
982,491,70,53,23,1,1


## 2) Multiple generations- within familly analysis

### Re-arrenge table from parents-child to multiple generations

In [12]:
ped = pr.read_r('/Users/ronykarstadt/Desktop/handedness/nurhayu2020/data/Script_Table1/all.RData')['all']
ped.to_csv('ped.csv', index = False)

In [65]:
# x = ego.groupby('famille').size()
# x = x.reset_index()
# x = x.rename(columns = {0:'size'})
# y = x[x['size'] > 1] 
# r = x[x['size'] == 1]

In [66]:
ped[ped['famille'] == '95_dariwali']

Unnamed: 0_level_0,famille,animal,sex,Idfather,Idmother,handedness,id,duplicate
rownames,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1614,95_dariwali,95_dariwaliego,F,95_dariwalifather,95_dariwalimother,0.0,Focal,0.0
1624,95_dariwali,95_dariwalic1,M,95_dariwalihusband,95_dariwaliego,0.0,Childl,0.0
1625,95_dariwali,95_dariwalic2,M,95_dariwalihusband,95_dariwaliego,0.0,Childl,0.0
1626,95_dariwali,95_dariwalic3,F,95_dariwalihusband,95_dariwaliego,0.0,Childl,0.0
1627,95_dariwali,95_dariwalic4,M,95_dariwalihusband,95_dariwaliego,0.0,Childl,0.0
1609,95_dariwali,95_dariwalifather,M,95_dariwaliGP2,95_dariwaliGP1,0.0,Parents,0.0
1610,95_dariwali,95_dariwalimother,F,95_dariwaliGP3,95_dariwaliGP4,0.0,Parents,0.0
1613,95_dariwali,95_dariwalis1,M,95_dariwalifather,95_dariwalimother,0.0,Sibs,0.0
1615,95_dariwali,95_dariwalis3,M,95_dariwalifather,95_dariwalimother,0.0,Sibs,0.0
1616,95_dariwali,95_dariwalis4,F,95_dariwalifather,95_dariwalimother,0.0,Sibs,0.0


In [64]:
# ped_un = ped['famille'].unique() #unique families

# for i in ped_un:
#     x = ped[ped['famille'] == i]         #subset ped_un value from ped
#     print(i, '\n', x['id'].value_counts())
#     if x['id'].value_counts()['Focal'] > 1:     #families with more than 1 focal individual
#         print(i, '\n', x['id'].value_counts()['Focal'])
        
#     elif x['id'].value_counts()['Focal'] == 0:     #families with more than 1 focal individual
#         print(i, x['id'].value_counts()['Focal'])

