In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
strains = ['CRY2(535)/CIB1', 
           'CRY2FL/CIB1',
           'CRY2(L348F)/CIB1', 
           'CRY2PHR/CIB1', 
           'CRY2(W349R)/CIB1',
           'EL222', 
           'eMagA/eMagB',
           'eMagA/eMagB HH', 
           'eMagA/eMagB LH', 
           'eMagA/eMagBM', 
           'eMagAF/eMagB', 
           'eMagAF/eMagBF', 
           'eMagAF/eMagBM', 
           'EL222(AQTrip)', 
           'EL222(A79Q)']

columns_keep = ['period_scaled', 
                'duty_cycle',
                'intensity',
                'CRY2(535)',
                'CRY2FL',
                'CRY2L348F',
                'CRY2PHR', 
                'CRY2W349R', 
                'EL222', 
                'eMagAB',
                'eMagAB HH', 
                'eMagAB LH', 
                'eMagABM', 
                'eMagAFB', 
                'eMagAFBF',
                'eMagAFBM', 
                'EL222(AQTrip)b',
                'EL222A79Q']

In [3]:
# format all data from screening experiments
df_screen_0 = pd.read_csv("data/optogenetics_data_4-2023.csv")

# exp 586 had outlier efficacy of eMagAB 
exp586ind = df_screen_0.exp.values == "exp586"
df_screen_0 = df_screen_0.iloc[~exp586ind].copy()

# screening experiment with more strains 
df_screen_1 = pd.read_csv("data/230824-Lightsweep1-p2.csv")

# validation 
df_val = pd.read_csv("data/230907-validation3-output.csv")

# follow up experiments
df_bo_1 = pd.concat((pd.read_csv("data/optogenetics_data_6_30_23.csv"), 
                     pd.read_csv("data/230720_opto_data.csv"))).fillna(0.)
df_bo_2 = pd.read_csv("data/230904-Hardval-output_CORRECTED.csv")

In [4]:
# Format screening data, validation, and exploitation data
df = pd.concat((df_screen_0, df_screen_1, df_val)).fillna(0.)

# remove control conditions 
control_inds = [~np.all(sample==np.array([60., 1., 0.])) for sample in df[["period", "duty_cycle", "intensity"]].values]
df = df.iloc[control_inds].copy()

# add scaled period feature
df['period_scaled'] = df['period'] / 240.

# define features and target variable
features = ["period_scaled", "duty_cycle", "intensity"]
strains = list(df.columns.values[8:-1])
response = ["efficacy"]

# for each unique combination of features, identify efficacy of each transcription factor 
unique_ftrs, counts = np.unique(df[features].values, axis=0, return_counts=True)

multi_DF = pd.DataFrame()
for unique_ftr in unique_ftrs:

    # find indices of samples with same feature set 
    inds = [np.all(sample==unique_ftr) for sample in df[features].values]
    df_slice = df.iloc[inds].copy()

    df_efficacies = df_slice['efficacy'].values # df_slice['Value'].values/df_slice['Max'].values 
    sorted_efficacies = np.zeros(len(strains))

    for i,factor in enumerate(strains):
        factor_inds = np.array(df_slice[factor].values, bool)
        if any(factor_inds):
            # store average of replicates
            sorted_efficacies[i] = np.mean(df_efficacies[factor_inds])
        else:
            sorted_efficacies[i] = np.nan

    multi_df = pd.DataFrame()
    multi_df[features] = [unique_ftr]
    multi_df[strains]  = [sorted_efficacies]
    multi_DF = pd.concat((multi_DF, multi_df))

In [5]:
# save screening dataframe 
multi_DF[columns_keep].to_csv("data/universal_df.csv", index=False)

In [6]:
# set up dataframe with same columns as universal dataframe but with data from designed exp 
df = pd.DataFrame(columns=df.columns)
df[df_bo_1.columns] = df_bo_1
df.fillna(0., inplace=True)

# add scaled period feature
df['period_scaled'] = df['period'] / 240.

# for each unique combination of features, identify efficacy of each transcription factor 
unique_ftrs, counts = np.unique(df[features].values, axis=0, return_counts=True)

multi_DF = pd.DataFrame()
for unique_ftr in unique_ftrs:

    # find indices of samples with same feature set 
    inds = [np.all(sample==unique_ftr) for sample in df[features].values]
    df_slice = df.iloc[inds].copy()

    df_efficacies = df_slice['efficacy'].values # df_slice['Value'].values/df_slice['Max'].values 
    sorted_efficacies = np.zeros(len(strains))

    for i,factor in enumerate(strains):
        factor_inds = np.array(df_slice[factor].values, bool)
        if any(factor_inds):
            # store average of replicates
            sorted_efficacies[i] = np.mean(df_efficacies[factor_inds])
        else:
            sorted_efficacies[i] = np.nan

    multi_df = pd.DataFrame()
    multi_df[features] = [unique_ftr]
    multi_df[strains]  = [sorted_efficacies]
    multi_DF = pd.concat((multi_DF, multi_df))

In [7]:
# save screening dataframe 
multi_DF[columns_keep].to_csv("data/design_1_df.csv", index=False)

In [8]:
# set up dataframe with same columns as universal dataframe but with data from designed exp 
df = pd.DataFrame(columns=df.columns)
df[df_bo_2.columns] = df_bo_2
df.fillna(0., inplace=True)

# add scaled period feature
df['period_scaled'] = df['period'] / 240.

# for each unique combination of features, identify efficacy of each transcription factor 
unique_ftrs, counts = np.unique(df[features].values, axis=0, return_counts=True)

multi_DF = pd.DataFrame()
for unique_ftr in unique_ftrs:

    # find indices of samples with same feature set 
    inds = [np.all(sample==unique_ftr) for sample in df[features].values]
    df_slice = df.iloc[inds].copy()

    df_efficacies = df_slice['efficacy'].values # df_slice['Value'].values/df_slice['Max'].values 
    sorted_efficacies = np.zeros(len(strains))

    for i,factor in enumerate(strains):
        factor_inds = np.array(df_slice[factor].values, bool)
        if any(factor_inds):
            # store average of replicates
            sorted_efficacies[i] = np.mean(df_efficacies[factor_inds])
        else:
            sorted_efficacies[i] = np.nan

    multi_df = pd.DataFrame()
    multi_df[features] = [unique_ftr]
    multi_df[strains]  = [sorted_efficacies]
    multi_DF = pd.concat((multi_DF, multi_df))

In [9]:
# save screening dataframe 
multi_DF[columns_keep].to_csv("data/design_2_df.csv", index=False)