In [None]:
import os
import seaborn as sns
import pandas as pd
from scipy import stats
import scipy as sp
import numpy as np
import datetime as dt
%matplotlib inline
import matplotlib
# matplotlib.use('Agg')
# %matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib import gridspec
from itertools import groupby
from operator import itemgetter
import dabest
from nptdms import *
import math
from collections import Counter
import shutil
import progressbar
from svgutils.compose import *
from matplotlib.lines import Line2D
import random
import dabest

## Prepare Data for EBprot

### Calculate Z scores

In [None]:
Z_scores = {'Di':[], 'Si':[], 'ORNs':[], 'Sex_Satiety_Wind':[], 'LightInt':[]}

rootDirectory = "C:/Users/tumkayat/Desktop/WALISAR_Paper/Figure2/Data/All_merged_intensity_wTSALE/"
ornList = os.listdir(rootDirectory)
bar = progressbar.ProgressBar()

## go thru the ORNs
for ORN in bar(ornList):
    rootDir = os.path.join(rootDirectory, ORN, "weighted_TSALE", "weighted_TSALE_values.pkl")
    
    df = pd.read_pickle(rootDir)
    df = df.assign(Sex_Satiety_Wind = pd.Series(df['Sex'] + '_' + df['Satiety'] + '_' + df['Wind status'], index = df.index))
    
    for condition in df['Sex_Satiety_Wind'].unique():
        for intensity in df['Light Intensity(uW/mm2)'].unique():
            dfOI = df[(df['Sex_Satiety_Wind'] == condition) & (df['Light Intensity(uW/mm2)'] == intensity)] 
            ## calculate the mean difference as Offspring - Parent, since having 2 or 3 independent groups does not affect the mean
            ctrl_wTSALE = dfOI[dfOI['Status'] == 'Parent']['weighted_TSALE_P10']
            exp_wTSALE = dfOI[dfOI['Status'] == 'Offspring']['weighted_TSALE_P10']

            Di = exp_wTSALE.mean() - ctrl_wTSALE.mean()
            
            ## calculate Si for three genotypes and then get the average - different than combining the controls
            genotypes = df['Genotype'].unique()
            
            g0_data = dfOI[dfOI['Genotype'] == genotypes[0]]['weighted_TSALE_P10']
            g1_data = dfOI[dfOI['Genotype'] == genotypes[1]]['weighted_TSALE_P10']
            g2_data = dfOI[dfOI['Genotype'] == genotypes[2]]['weighted_TSALE_P10']
            
            Si = (g0_data.std() + g1_data.std() + g2_data.std()) / 3.
            
            Z_scores['ORNs'].append(ORN)
            Z_scores['Sex_Satiety_Wind'].append(condition)
            Z_scores['LightInt'].append(intensity)
            Z_scores['Di'].append(Di)
            Z_scores['Si'].append(Si)
                     
Z_scores_df = pd.DataFrame(Z_scores)
Z_scores_df_dropna = Z_scores_df.dropna()

a0 = np.percentile(Z_scores_df_dropna['Si'], 90)
Z_scores_df_dropna['Zi'] = Z_scores_df_dropna['Di'] / (a0 + Z_scores_df_dropna['Si'])

In [None]:
Z_scores_df_dropna.shape

In [None]:
Z_scores_df_dropna['Tag'] = Z_scores_df_dropna['ORNs'] + '_' + Z_scores_df_dropna['Sex_Satiety_Wind']

In [None]:
male_fed_noair = Z_scores_df_dropna[Z_scores_df_dropna['Sex_Satiety_Wind'] == 'male_fed_NoAir']
male_fed_noair = male_fed_noair[male_fed_noair['ORNs'] != 'Gr66a']

In [None]:
male_fed_noair.shape

In [None]:
## If you are using only a part of the dataset, i.e. Fed and NoAir, calculate the Z scores for that group only
a0 = np.percentile(male_fed_noair['Si'], 90)
male_fed_noair['Zi'] = male_fed_noair['Di'] / (a0 + male_fed_noair['Si'])

In [None]:
male_fed_noair.to_csv('C:/Users/tumkayat/Desktop/Fed_and_NoWir_only_woutGr66a.csv')

In [None]:
##Drop Gr66a, EBprot blows up otherwise
a = Z_scores_df_dropna[Z_scores_df_dropna['ORNs'] != 'Gr66a']
a.shape

In [None]:
a.to_csv('C:/Users/tumkayat/Desktop/All_adult_data_woutGr66a_wCombos.csv')

### Calculate Z scores using data from DF

In [None]:
##!!!! Calculate Z scores for stand-alone dataframes
def calculate_Z(a0,Di,Si):
    z_scores = []
    Di = Di.dropna()
    Si = Si.dropna()
    
    for k in range(len(Di)):
        z = Di.iloc[k] / (a0 + Si.iloc[k])
        
        z_scores.append(z)
    return z_scores

In [None]:
deltadelta_df = pd.read_csv('C:/Users/tumkayat/Desktop/WALISAR_Paper/Figure3/AllDeltaDelta/deltadelta_ES_ALL.csv')

In [None]:
xyz = pd.read_csv('C:/Users/tumkayat/Desktop/OATAScreennew.csv')

In [None]:
xyz

In [None]:
a0_xyz = np.percentile(xyz['Si'], 90)

In [None]:
Z_scores_xyz = calculate_Z(a0_xyz, xyz['Effect_size'], xyz['Si'])

In [None]:
xyz['Zscores'] = Z_scores_xyz

In [None]:
xyz.to_csv('C:/Users/tumkayat/Desktop/OATA_Screen_Zscores.csv')

In [None]:
deltadelta_df.columns

In [None]:
merge_Si = [
            deltadelta_df['male_fed_air_V_male_fed_noair_pooledSD'].dropna(),
            deltadelta_df['male_starved_air_V_male_starved_noair_pooledSD'].dropna(),
            deltadelta_df['male_fed_noair_V_male_starved_noair_pooledSD'].dropna(),
            deltadelta_df['male_fed_air_V_male_starved_air_pooledSD'].dropna(),
            deltadelta_df['female_fed_noair_V_male_fed_noair_pooledSD'].dropna(),
            deltadelta_df['female_fed_air_V_male_fed_air_pooledSD'].dropna(),
            deltadelta_df['female_starved_noair_V_male_starved_noair_pooledSD'].dropna(),
            deltadelta_df['female_starved_air_V_male_starved_air_pooledSD'].dropna()
           ]

all_model_Si = pd.concat(merge_Si, ignore_index=True)
a0 = np.percentile(all_model_Si, 90)

## Wind
male_fed_air_V_male_fed_noair_ZScores = calculate_Z(a0,deltadelta_df['male_fed_air_V_male_fed_noair_ES'].dropna(), deltadelta_df['male_fed_air_V_male_fed_noair_pooledSD'].dropna())
male_starved_air_V_male_starved_noair_ZScores = calculate_Z(a0,deltadelta_df['male_starved_air_V_male_starved_noair_ES'].dropna(), deltadelta_df['male_starved_air_V_male_starved_noair_pooledSD'].dropna())

## Starvation
male_fed_noair_V_male_starved_noair_ZScores = calculate_Z(a0,deltadelta_df['male_fed_noair_V_male_starved_noair_ES'].dropna(), deltadelta_df['male_fed_noair_V_male_starved_noair_pooledSD'].dropna())
male_fed_air_V_male_starved_air_ZScores = calculate_Z(a0,deltadelta_df['male_fed_air_V_male_starved_air_ES'].dropna(), deltadelta_df['male_fed_air_V_male_starved_air_pooledSD'].dropna())

## Sex
female_fed_noair_V_male_fed_noair_ZScores = calculate_Z(a0,deltadelta_df['female_fed_noair_V_male_fed_noair_ES'].dropna(), deltadelta_df['female_fed_noair_V_male_fed_noair_pooledSD'].dropna())
female_fed_air_V_male_fed_air_ZScores = calculate_Z(a0,deltadelta_df['female_fed_air_V_male_fed_air_ES'].dropna(), deltadelta_df['female_fed_air_V_male_fed_air_pooledSD'].dropna())
female_starved_noair_V_male_starved_noair_ZScores = calculate_Z(a0,deltadelta_df['female_starved_noair_V_male_starved_noair_ES'].dropna(), deltadelta_df['female_starved_noair_V_male_starved_noair_pooledSD'].dropna())
female_starved_air_V_male_starved_air_ZScores = calculate_Z(a0,deltadelta_df['female_starved_air_V_male_starved_air_ES'].dropna(), deltadelta_df['female_starved_air_V_male_starved_air_pooledSD'].dropna())

In [None]:
## Make DF for male_fed_air_V_male_fed_noair
male_fed_air_V_male_fed_noair_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
male_fed_air_V_male_fed_noair_ZScore_DF = pd.DataFrame(male_fed_air_V_male_fed_noair_ZScore_dict)

male_fed_air_V_male_fed_noair_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['male_fed_air_V_male_fed_noair_ES'])['ORNs']
male_fed_air_V_male_fed_noair_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['male_fed_air_V_male_fed_noair_ES'])['LightInt']
male_fed_air_V_male_fed_noair_ZScore_DF['ZScores'] = male_fed_air_V_male_fed_noair_ZScores
male_fed_air_V_male_fed_noair_ZScore_DF['Condition'] = 'male_fed_air_V_male_fed_noair'

## Make DF for male_starved_air_V_male_starved_noair
male_starved_air_V_male_starved_noair_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
male_starved_air_V_male_starved_noair_ZScore_DF = pd.DataFrame(male_starved_air_V_male_starved_noair_ZScore_dict)

male_starved_air_V_male_starved_noair_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['male_starved_air_V_male_starved_noair_ES'])['ORNs']
male_starved_air_V_male_starved_noair_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['male_starved_air_V_male_starved_noair_ES'])['LightInt']
male_starved_air_V_male_starved_noair_ZScore_DF['ZScores'] = male_starved_air_V_male_starved_noair_ZScores
male_starved_air_V_male_starved_noair_ZScore_DF['Condition'] = 'male_starved_air_V_male_starved_noair'

## Make DF for male_fed_noair_V_male_starved_noair
male_fed_noair_V_male_starved_noair_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
male_fed_noair_V_male_starved_noair_ZScore_DF = pd.DataFrame(male_fed_noair_V_male_starved_noair_ZScore_dict)

male_fed_noair_V_male_starved_noair_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['male_fed_noair_V_male_starved_noair_ES'])['ORNs']
male_fed_noair_V_male_starved_noair_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['male_fed_noair_V_male_starved_noair_ES'])['LightInt']
male_fed_noair_V_male_starved_noair_ZScore_DF['ZScores'] = male_fed_noair_V_male_starved_noair_ZScores
male_fed_noair_V_male_starved_noair_ZScore_DF['Condition'] = 'male_fed_noair_V_male_starved_noair'

## Make DF for male_fed_air_V_male_starved_air
male_fed_air_V_male_starved_air_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
male_fed_air_V_male_starved_air_ZScore_DF = pd.DataFrame(male_fed_air_V_male_starved_air_ZScore_dict)

male_fed_air_V_male_starved_air_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['male_fed_air_V_male_starved_air_ES'])['ORNs']
male_fed_air_V_male_starved_air_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['male_fed_air_V_male_starved_air_ES'])['LightInt']
male_fed_air_V_male_starved_air_ZScore_DF['ZScores'] = male_fed_air_V_male_starved_air_ZScores
male_fed_air_V_male_starved_air_ZScore_DF['Condition'] = 'male_fed_air_V_male_starved_air'

## Make DF for female_fed_noair_V_male_fed_noair
female_fed_noair_V_male_fed_noair_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
female_fed_noair_V_male_fed_noair_ZScore_DF = pd.DataFrame(female_fed_noair_V_male_fed_noair_ZScore_dict)

female_fed_noair_V_male_fed_noair_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['female_fed_noair_V_male_fed_noair_ES'])['ORNs']
female_fed_noair_V_male_fed_noair_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['female_fed_noair_V_male_fed_noair_ES'])['LightInt']
female_fed_noair_V_male_fed_noair_ZScore_DF['ZScores'] = female_fed_noair_V_male_fed_noair_ZScores                                                        
female_fed_noair_V_male_fed_noair_ZScore_DF['Condition'] = 'female_fed_noair_V_male_fed_noair'

## Make DF for female_fed_air_V_male_fed_air
female_fed_air_V_male_fed_air_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
female_fed_air_V_male_fed_air_ZScore_DF = pd.DataFrame(female_fed_air_V_male_fed_air_ZScore_dict)

female_fed_air_V_male_fed_air_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['female_fed_air_V_male_fed_air_ES'])['ORNs']
female_fed_air_V_male_fed_air_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['female_fed_air_V_male_fed_air_ES'])['LightInt']
female_fed_air_V_male_fed_air_ZScore_DF['ZScores'] = female_fed_air_V_male_fed_air_ZScores
female_fed_air_V_male_fed_air_ZScore_DF['Condition'] = 'female_fed_air_V_male_fed_air'

## Make DF for female_starved_noair_V_male_starved_noair
female_starved_noair_V_male_starved_noair_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
female_starved_noair_V_male_starved_noair_ZScore_DF = pd.DataFrame(female_starved_noair_V_male_starved_noair_ZScore_dict)

female_starved_noair_V_male_starved_noair_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['female_starved_noair_V_male_starved_noair_ES'])['ORNs']
female_starved_noair_V_male_starved_noair_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['female_starved_noair_V_male_starved_noair_ES'])['LightInt']
female_starved_noair_V_male_starved_noair_ZScore_DF['ZScores'] = female_starved_noair_V_male_starved_noair_ZScores
female_starved_noair_V_male_starved_noair_ZScore_DF['Condition'] = 'female_starved_noair_V_male_starved_noair'

## Make DF for female_starved_air_V_male_starved_air
female_starved_air_V_male_starved_air_ZScore_dict = {'ORNs':[], 'LightInt':[], 'ZScores':[]}
female_starved_air_V_male_starved_air_ZScore_DF = pd.DataFrame(female_starved_air_V_male_starved_air_ZScore_dict)

female_starved_air_V_male_starved_air_ZScore_DF['ORNs'] = deltadelta_df.dropna(subset=['female_starved_air_V_male_starved_air_ES'])['ORNs']
female_starved_air_V_male_starved_air_ZScore_DF['LightInt'] = deltadelta_df.dropna(subset=['female_starved_air_V_male_starved_air_ES'])['LightInt']
female_starved_air_V_male_starved_air_ZScore_DF['ZScores'] = female_starved_air_V_male_starved_air_ZScores
female_starved_air_V_male_starved_air_ZScore_DF['Condition'] = 'female_starved_air_V_male_starved_air'

In [None]:
merge = [male_fed_air_V_male_fed_noair_ZScore_DF, male_starved_air_V_male_starved_noair_ZScore_DF,
         male_fed_noair_V_male_starved_noair_ZScore_DF, male_fed_air_V_male_starved_air_ZScore_DF,
         female_fed_noair_V_male_fed_noair_ZScore_DF, female_fed_air_V_male_fed_air_ZScore_DF,
         female_starved_noair_V_male_starved_noair_ZScore_DF, female_starved_air_V_male_starved_air_ZScore_DF]

deltadelta_ZScore_Table = pd.concat(merge, ignore_index=True)

In [None]:
airEffect = deltadelta_ZScore_Table[(deltadelta_ZScore_Table['Condition'] == 'male_fed_air_V_male_fed_noair') | (deltadelta_ZScore_Table['Condition'] == 'male_starved_air_V_male_starved_noair') ]
airEffect['ORNs'] = airEffect['ORNs'] + '_' + airEffect['Condition']
airEffect.to_csv("C:/Users/tumkayat/Desktop/deltadelta_ZScores_Air_Effect_Only.csv")

In [None]:
starvationEffect = deltadelta_ZScore_Table[(deltadelta_ZScore_Table['Condition'] == 'male_fed_air_V_male_starved_air') | (deltadelta_ZScore_Table['Condition'] == 'male_fed_noair_V_male_starved_noair') ]
starvationEffect['ORNs'] = starvationEffect['ORNs'] + '_' + starvationEffect['Condition']
starvationEffect.to_csv("C:/Users/tumkayat/Desktop/deltadelta_ZScores_Starvation_Effect_Only.csv")

In [None]:
sexEffect = deltadelta_ZScore_Table[(deltadelta_ZScore_Table['Condition'] == 'female_fed_air_V_male_fed_air') | (deltadelta_ZScore_Table['Condition'] == 'female_fed_noair_V_male_fed_noair') | \
                                   (deltadelta_ZScore_Table['Condition'] == 'female_starved_noair_V_male_starved_air') | (deltadelta_ZScore_Table['Condition'] == 'female_starved_noair_V_male_starved_noair')]
sexEffect['ORNs'] = sexEffect['ORNs'] + '_' + sexEffect['Condition']
sexEffect.to_csv("C:/Users/tumkayat/Desktop/deltadelta_ZScores_Sex_Effect_Only.csv")

In [None]:
deltadelta_ZScore_Table['ORNs'] = deltadelta_ZScore_Table['ORNs'] + '_' + deltadelta_ZScore_Table['Condition']
deltadelta_ZScore_Table.to_csv("C:/Users/tumkayat/Desktop/deltadelta_ZScores_ALL.csv")

In [None]:
deltadelta_ZScore_Table

In [None]:
a

## Prepare EBprot RESULTS

In [None]:
allData_meanFixed_df = pd.read_csv("C:/Users/tumkayat/Desktop/allData_Z_Gr66aREMOVED_CombosADDED_RESULTS.csv")

In [None]:
allData_meanFixed_df['Condition'] = allData_meanFixed_df['ZI.Protein'].apply(lambda x: x.split('_')[1:])
allData_meanFixed_df['ORNs'] = allData_meanFixed_df['ZI.Protein'].apply(lambda x: x.split('_')[0])
allData_meanFixed_df['Condition'] = allData_meanFixed_df['Condition'].apply(lambda x: x[0] + '_' + x[1] + '_' + x[2])

In [None]:
listOfHits = allData_meanFixed_df[(allData_meanFixed_df["ZI.BFDR"] <= 0.2)].sort_values('Condition')  #& (allData_meanFixed_df["Condition"] == 'male_fed_Air')]
listOfHits

In [None]:
allData_meanFixed_df['Condition'].unique()

In [None]:
listOfHits.to_csv("C:/Users/tumkayat/Desktop/AllData_Combos_NoGr66a_BFDR_0.2_results.csv")

In [None]:
allData_meanFixedFALSE_df = pd.read_csv("C:/Users/tumkayat/Desktop/allData_Gr66aREMOVED_CombosADDED_RESULTS.csv")

In [None]:
allData_meanFixedFALSE_df['Condition'] = allData_meanFixedFALSE_df['ZI.Protein'].apply(lambda x: x.split('_')[1:])
allData_meanFixedFALSE_df['ORNs'] = allData_meanFixedFALSE_df['ZI.Protein'].apply(lambda x: x.split('_')[0])
allData_meanFixedFALSE_df['Condition'] = allData_meanFixedFALSE_df['Condition'].apply(lambda x: x[0] + '_' + x[1] + '_' + x[2])

In [None]:
allData_meanFixedFALSE_df[(allData_meanFixedFALSE_df["ZI.BFDR"] <= 0.3) & (allData_meanFixedFALSE_df["Condition"] == 'male_fed_Air')]

In [None]:
allData_meanFixedFALSE_df[allData_meanFixedFALSE_df['ZI.Protein'].str.contains('Or42b')]

## Calculate delta-delta Z scores

In [None]:
Z_scores_df_dropna