In [None]:
import os
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import zscore, f_oneway
from patsy import dmatrices
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.multitest import multipletests
from matplotlib.lines import Line2D
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.ticker import LinearLocator
plt.rc('font', family='arial')

In [None]:
# Function to fit mixed-effects models
def fit_mixed_effects_model(df, dependent_var, independent_vars, fixed, interaction_term, func):
    # Convert 'Timepoint' and 'Randomization' columns to categorical data type
    df['Timepoint'] = df['Timepoint'].astype('category')
    df['Randomization'] = df['Randomization'].astype('category')
    
    # Create a string of fixed effects by joining independent variables and fixed terms
    fixed_effects = ' + '.join(independent_vars + fixed)
    
    # Construct the formula for the mixed-effects model
    if interaction_term == '':
        formula = f"{dependent_var} ~ {fixed_effects}"
    else:
        formula = f"{dependent_var} ~ {fixed_effects} + {interaction_term}"
    
    # Fit the mixed-effects model while suppressing convergence warnings
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=ConvergenceWarning)
        mixed_model = smf.mixedlm(formula, df, groups=df['Record ID'], re_formula="1").fit(method=func, maxiter=1000, full_output=True)
    
    # Create design matrices for the model
    y, X = dmatrices(formula, df, return_type='dataframe')
    
    # Calculate Variance Inflation Factor (VIF) for each feature
    vif = pd.DataFrame()
    vif["VIF Factor"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    vif["features"] = X.columns
    
    # Return the fitted model and VIF dataframe
    return mixed_model, vif

In [None]:
# Function to run models and plot results
def run_models_and_plot(data, dependent_vars, independent_vars, fixed, interaction_term, func, y_var_names):
    models = {}
    emmeans_data = {}
    
    # Fit models for each dependent variable
    for dependent_var in dependent_vars:
        # Fit the mixed-effects model
        model, vif = fit_mixed_effects_model(data, dependent_var, independent_vars, fixed, interaction_term, func)
        models[dependent_var] = {'model': model, 'VIF': vif}
        
        # Print model summary and VIF results
        print(f"Model summary for {dependent_var}:")
        print(model.summary())
        print("VIF results:")
        print(vif)
        print("\n" + "="*80 + "\n")
        
        # Add predicted values to the data
        data[f'{dependent_var}_predicted'] = model.fittedvalues
        emmeans_data[dependent_var] = data[['Timepoint', dependent_var, f'{dependent_var}_predicted'] + independent_vars]
    
    # Prepare coefficient data for plotting
    coef_df = pd.DataFrame()
    for model_name, model_details in models.items():
        model = model_details['model']
        err_series = model.params - model.conf_int()[0]
        temp_df = pd.DataFrame({
            'coef': model.params.values[1:], 
            'pval': model.pvalues[1:],
            'err': err_series.values[1:], 
            'varname': err_series.index.values[1:], 
            'model': model_name  
        })
        coef_df = pd.concat([coef_df, temp_df], ignore_index=True)
    
    # Set up plot parameters
    base_y = np.arange(len(coef_df['varname'].unique()[0:-1]))
    width = 0.2
    marker_list = ['o', 's', 'D', '^', 'v']
    model_names = coef_df['model'].unique()
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(8, 7))
    for i, mod in enumerate(model_names):
        mod_df = coef_df[coef_df.model == mod]
        mod_df = mod_df.set_index('varname').reindex(coef_df['varname'].unique()[0:-1])
        Y = base_y + width*i
        
        # Plot error bars
        ax.barh(Y, mod_df['coef'], height=width, color='none', edgecolor='none', xerr=mod_df['err'], capsize=5)
        
        # Set grid and title
        ax.grid(False)
        total_subjs = len(data['Record ID'].unique())
        ax.set_title(f"{total_subjs} subjects")
        
        # Plot coefficients with color-coded p-values
        colors = ['red' if p < 0.05 else 'green' if p < 0.07 else 'black' for p in mod_df['pval']]
        for y, coef, color in zip(Y, mod_df['coef'], colors):
            ax.scatter(y=y, x=coef, marker=marker_list[i % len(marker_list)], s=50, color=color, zorder=3)
    
    # Add vertical line at x=0
    ax.axvline(x=0, linestyle='--', color='black', linewidth=1)
    
    # Set y-axis labels
    ax.set_yticks(base_y + width)
    print(y_var_names)
    ax.set_yticklabels(y_var_names, rotation=0, fontsize=16)
    
    # Set x-axis label
    ax.set_xlabel('Coefficient and 95% Confidence Interval', fontsize=16)
    
    # Add legend
    legend_elements = [Line2D([0], [0], marker=marker_list[i % len(marker_list)], label=f'{model_names[i]}', color='k', markersize=8) for i in range(len(model_names))]
    ax.legend(handles=legend_elements, loc='upper right', prop={'size': 16}, labelspacing=1.2)
    
    # Adjust layout and display plot
    plt.tight_layout()
    plt.show()        
    
    return models, emmeans_data

In [None]:
# Function to calculate Cohen's f²
def calculate_cohens_f2(model, X):
    r2 = model.rsquared
    f2 = r2 / (1 - r2)
    return f2

# Function to plot interaction effects
def plot_interaction_effect(models, emmeans_data, y_var_names, plot_name):
    for dependent_var, model_info in models.items():
        model = model_info['model']
        pvalues = model.pvalues
        # Identify significant (and trending) variables (p < 0.07)
        significant_vars = pvalues[pvalues < 0.07].index.tolist()
        
        for sig_var in significant_vars:
            # Skip certain variables
            if sig_var not in ['Intercept', 'Age', 'ADOS_Module', 'C(Timepoint)[T.Post_CBD]', 'C(Timepoint)[T.Post_Placebo]', 'C(Timepoint)[T.Post_Wash]']:
                # Create a 1x2 subplot
                fig, axs = plt.subplots(1, 2, figsize=(6, 6), sharey=False, sharex=False)
                
                # Plot for Post_CBD and Post_Placebo timepoints
                for i, timepoint in enumerate(['Post_CBD', 'Post_Placebo']):
                    subset_data = emmeans_data[dependent_var][emmeans_data[dependent_var]['Timepoint'] == timepoint]
                    first_color = sns.color_palette("Set2")[0]
                    
                    # Create regression plot
                    sns.regplot(data=subset_data, x=sig_var, y=f'{dependent_var}_predicted', ax=axs[i], color=first_color, scatter_kws={'edgecolor': 'none', 's': 100})
                    
                    # Perform OLS regression
                    X = sm.add_constant(subset_data[sig_var])
                    y = subset_data[f'{dependent_var}_predicted']
                    est = sm.OLS(y, X).fit()
                    
                    # Calculate statistics
                    raw_p_value = est.pvalues.iloc[1]  # p-value of the slope
                    bonf_p_value = min(raw_p_value * 3, 1.0)  # Bonferroni correction
                    r2 = est.rsquared
                    cohens_f2 = calculate_cohens_f2(est, X)
                    
                    # Set plot title and labels
                    axs[i].set_title(f'{timepoint}', fontsize=20, fontweight='bold')
                    axs[i].text(0.5, -0.3, f'p = {raw_p_value:.3f}', fontsize=16, ha='center', transform=axs[i].transAxes)
                    axs[i].text(0.5, -0.4, f'$R^2$: {r2:.3f}', fontsize=16, ha='center', transform=axs[i].transAxes)
                    axs[i].text(0.5, -0.5, f'f²: {cohens_f2:.3f}', fontsize=16, ha='center', transform=axs[i].transAxes)
                    
                    axs[i].set_xlabel(sig_var, fontsize=20, fontweight='bold')
                    if i == 0:
                        axs[i].set_ylabel(f'Predicted {dependent_var}', fontsize=20, fontweight='bold')
                    else:
                        axs[i].set_ylabel('')
                    axs[i].tick_params(axis='both', which='major', labelsize=16)
                
                # Set overall title
                model_p_value = model.pvalues[sig_var]
                plt.suptitle(f'Effect of {sig_var} on {dependent_var} (Stratified by Study Timepoint)\nModel p-value: {model_p_value:.3f}', fontsize=20)
                
                # Adjust layout
                plt.subplots_adjust(bottom=0.5)
                plt.tight_layout()
                
                # Save plot
                directory = '05_Plots\LMM'
                file_format = 'pdf'
                file_path = os.path.join(directory, f'{plot_name}_Effect_of_{sig_var}_on_{dependent_var}.{file_format}')
                plt.savefig(file_path, format=file_format)
                
                plt.show()

In [None]:
def plot_pvalue_heatmap(models, dependent_var_names, heatmap_names, plot_name):
    # Extract z-values and p-values for each independent variable across different dependent variables
    z_values = {}
    p_values = {}
    
    # Iterate through models to extract z-values and p-values
    for dep_var, model_info in models.items():
        model = model_info['model']
        print(model)
        summary = model.summary()
        # Extract z-values and p-values from the model summary
        result_table = summary.tables[1]
        z_values[dep_var] = result_table['z'].to_dict()
        p_values[dep_var] = result_table['P>|z|'].to_dict()
    
    # Create DataFrames for z-values and p-values
    indep_vars = list(z_values[next(iter(z_values))].keys())
    dep_vars = list(z_values.keys())
    
    z_data = pd.DataFrame(index=indep_vars, columns=dep_vars)
    p_data = pd.DataFrame(index=indep_vars, columns=dep_vars)
    
    # Populate DataFrames with extracted values
    for dep_var in dep_vars:
        z_data[dep_var] = z_data.index.map(z_values[dep_var])
        p_data[dep_var] = p_data.index.map(p_values[dep_var])
    
    # Convert all values to numeric, forcing any non-numeric to NaN
    z_data = z_data.apply(pd.to_numeric, errors='coerce')
    p_data = p_data.apply(pd.to_numeric, errors='coerce')
    
    # Transpose the DataFrames to have dependent variables on the top
    z_data = z_data.T
    p_data = p_data.T
    
    # Create a custom colormap using variations of the first color in "Set2" palette
    first_color = sns.color_palette("Set2")[0]
    colors = [(first_color[0], first_color[1], first_color[2], alpha) for alpha in np.linspace(1, 0, 256)]
    cmap = LinearSegmentedColormap.from_list('custom_set2', colors, N=256)
    
    # Remove 'Group' from heatmap_names and adjust data accordingly
    group_index = heatmap_names.index('Group')
    heatmap_names = heatmap_names[:group_index]
    p_data = p_data.iloc[:, :group_index]
    z_data = z_data.iloc[:, :group_index]

    # Create figure and axes
    fig, ax = plt.subplots(figsize=(18, 16))  # Increased height for better visibility

    # Plot heatmap with adjusted parameters and no grid
    heatmap = sns.heatmap(p_data, annot=z_data, fmt=".2f", cmap=cmap, vmin=0, vmax=0.07,
                cbar_kws={'label': 'p-value', 'use_gridspec': False, 'location': 'right'},
                linewidths=0, linecolor='none',  # Remove grid lines
                annot_kws={"size": 16, "weight": "normal"},
                cbar=True, square=True, ax=ax)

    # Adjust colorbar
    cbar = heatmap.collections[0].colorbar
    cbar.ax.set_ylabel('p-value', rotation=270, labelpad=20)
    cbar.ax.yaxis.set_label_position("left")
    cbar.ax.set_position([0.92, ax.get_position().y0, 0.02, ax.get_position().height])

    # Add stars for significant p-values
    for i in range(p_data.shape[0]):
        for j in range(p_data.shape[1]):
            if p_data.iloc[i, j] <= 0.001:
                ax.text(j + 0.5, i + 0.35, '***', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)
            elif p_data.iloc[i, j] <= 0.01:
                ax.text(j + 0.5, i + 0.35, '**', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)
            elif p_data.iloc[i, j] <= 0.05:
                ax.text(j + 0.5, i + 0.35, '*', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)
            elif p_data.iloc[i, j] <= 0.07:
                ax.text(j + 0.5, i + 0.35, '#', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)

    # Set labels and title (empty in this case)
    ax.set_title('', fontsize=16, color='black')
    ax.set_xlabel('', fontsize=20, color='black')
    ax.set_ylabel('', fontsize=20, color='black')
    
    # Set and format x-axis and y-axis labels
    ax.set_xticks(np.arange(len(heatmap_names)) + 0.5)
    ax.set_xticklabels(heatmap_names, rotation=45, ha='center', va='top', fontsize=14, color='black', fontweight='bold')
    ax.set_yticklabels(dependent_var_names, rotation=0, va='center', fontsize=20, fontweight='bold')
    
    # Remove ticks
    ax.tick_params(axis='both', which='both', length=0)
    
    # Adjust bottom margin to accommodate centered labels
    plt.subplots_adjust(bottom=0.2)

    # Save the plot as a PDF file
    directory = '05_Plots\LMM'
    file_format = 'pdf'
    file_path = os.path.join(directory, f'{plot_name}.{file_format}')
    plt.savefig(file_path, format=file_format, bbox_inches='tight', dpi=300)

    # Display the plot
    plt.show()

In [None]:
def plot_pvalue_heatmap_beh(models, dependent_var_names, heatmap_names, plot_name):
    # Extract z-values and p-values for each independent variable across different dependent variables
    z_values = {}
    p_values = {}
    
    # Iterate through models to extract z-values and p-values
    for dep_var, model_info in models.items():
        model = model_info['model']
        summary = model.summary()
        result_table = summary.tables[1]
        z_values[dep_var] = result_table['z'].to_dict()
        p_values[dep_var] = result_table['P>|z|'].to_dict()
    
    # Create DataFrames for z-values and p-values
    indep_vars = list(z_values[next(iter(z_values))].keys())
    dep_vars = list(z_values.keys())
    
    z_data = pd.DataFrame(index=indep_vars, columns=dep_vars)
    p_data = pd.DataFrame(index=indep_vars, columns=dep_vars)
    
    # Populate DataFrames with extracted values
    for dep_var in dep_vars:
        z_data[dep_var] = z_data.index.map(z_values[dep_var])
        p_data[dep_var] = p_data.index.map(p_values[dep_var])
    
    # Convert all values to numeric, forcing any non-numeric to NaN
    z_data = z_data.apply(pd.to_numeric, errors='coerce')
    p_data = p_data.apply(pd.to_numeric, errors='coerce')
    
    # Transpose the DataFrames to have dependent variables on the top
    z_data = z_data.T
    p_data = p_data.T
    
    # Create a custom colormap using variations of the first color in "Set2" palette
    first_color = sns.color_palette("Set2")[0]
    colors = [(first_color[0], first_color[1], first_color[2], alpha) for alpha in np.linspace(1, 0, 256)]
    cmap = LinearSegmentedColormap.from_list('custom_set2', colors, N=256)
    
    # Create figure and axes
    fig, ax = plt.subplots(figsize=(18, 16))  # Increased height for better visibility

    # Plot heatmap with adjusted parameters and no grid
    heatmap = sns.heatmap(p_data, annot=z_data, fmt=".2f", cmap=cmap, vmin=0, vmax=0.07,
                cbar_kws={'label': 'p-value', 'use_gridspec': False, 'location': 'right'},
                linewidths=0, linecolor='none',
                annot_kws={"size": 16, "weight": "normal"},
                cbar=True, square=True, ax=ax)

    # Adjust colorbar
    cbar = heatmap.collections[0].colorbar
    cbar.ax.set_ylabel('p-value', rotation=270, labelpad=20)
    cbar.ax.yaxis.set_label_position("left")
    cbar.ax.set_position([0.92, ax.get_position().y0, 0.02, ax.get_position().height])

    # Add stars for significant p-values
    for i in range(p_data.shape[0]):
        for j in range(p_data.shape[1]):
            if p_data.iloc[i, j] <= 0.001:
                ax.text(j + 0.5, i + 0.35, '***', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)
            elif p_data.iloc[i, j] <= 0.01:
                ax.text(j + 0.5, i + 0.35, '**', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)
            elif p_data.iloc[i, j] <= 0.05:
                ax.text(j + 0.5, i + 0.35, '*', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)
            elif p_data.iloc[i, j] <= 0.07:
                ax.text(j + 0.5, i + 0.35, '#', ha='center', va='bottom', color='black', fontweight='bold', fontsize=20)

    # Set labels and title (empty in this case)
    ax.set_title('', fontsize=16, color='black')
    ax.set_xlabel('', fontsize=20, color='black')
    ax.set_ylabel('', fontsize=20, color='black')
    
    # Set and format x-axis and y-axis labels
    ax.set_xticks(np.arange(len(heatmap_names)) + 0.5)
    ax.set_xticklabels(heatmap_names, rotation=45, ha='center', va='top', fontsize=14, color='black', fontweight='bold')
    ax.set_yticklabels(dependent_var_names, rotation=0, va='center', fontsize=20, fontweight='bold')
    
    # Remove ticks
    ax.tick_params(axis='both', which='both', length=0)
    
    # Adjust bottom margin to accommodate centered labels
    plt.subplots_adjust(bottom=0.2)

    # Save the plot as a PDF file
    directory = '05_Plots\LMM'
    file_format = 'pdf'
    file_path = os.path.join(directory, f'{plot_name}.{file_format}')
    plt.savefig(file_path, format=file_format, bbox_inches='tight', dpi=300)

    # Display the plot
    plt.show()

In [None]:
# Frequency bands definition
BANDS = {'Delta': (1, 4), 'Theta': (4, 8), 'Alpha': (8, 13)}

# Preload channel names
CHANNELS = ['F7', 'Fp1', 'Fp2', 'F8', 'F3', 'Fz', 'F4', 'C3', 'Cz', 'P8', 'P7', 'Pz', 'P4', 'T3', 'P3', 'O1', 'O2', 'C4', 'T4']

# Find the index location of channels in anatomical groups
All_Channel_indices = np.arange(len(CHANNELS))
Occipital_Channel_indices = [CHANNELS.index('O1'), CHANNELS.index('O2')]
Frontal_Channel_indices = [CHANNELS.index('F7'), CHANNELS.index('F3'), CHANNELS.index('Fz'), CHANNELS.index('F4'), CHANNELS.index('F8')]
Central_Channel_indices = [CHANNELS.index('Cz'), CHANNELS.index('C3'), CHANNELS.index('C4')]

# Name of anatomical channel groups and channel indices
Channel_Groups = {
    'All': All_Channel_indices,
    'Occipital': Occipital_Channel_indices, 
    'Frontal': Frontal_Channel_indices,
    'Central': Central_Channel_indices,
}

# Directory paths
csv_path = '04_Features_EEG/Range_13_10_Sec_Epoch_Fixed_Merged.csv'

# Load dataframe
all_data = pd.read_csv(csv_path)
filt_data = all_data.copy()

# Filter dataframe for specific timepoints and Record IDs with at least 3 timepoints
filt_data = all_data[(all_data['Timepoint'].isin(['Baseline', 'Post_Wash', 'Post_Placebo', 'Post_CBD']))]
filt_data = filt_data.groupby('Record ID').filter(lambda x: len(x['Timepoint'].unique()) >= 3)

# Z-score normalize metabolite levels across subjects
for metabolite in ['CBD', 'OHCBD', 'COOHCBD', 'AEA']:
    filt_data[f'{metabolite}_Z_score'] = zscore(filt_data[metabolite])

# Calculate combined score
filt_data['Combined_score'] = (zscore(filt_data['CBD']) + zscore(filt_data['OHCBD']) + zscore(filt_data['COOHCBD'])) / 3

# Convert categorical variables to numeric codes
for col in ['Timepoint', 'Randomization', 'ADOS_Module']:
    filt_data[f'{col}_numeric'] = filt_data[col].astype('category').cat.codes

# Sort the dataframe
filt_data = filt_data.sort_values(by=['Record ID', 'Timepoint'], ascending=[True, False])

# Print summary information
print(f"Number of unique 'Record ID' in the comparisons: n = {len(filt_data['Record ID'].unique())}")
print(filt_data['Record ID'].unique())

# Function to calculate statistics for each group
def calculate_stats(group):
    return pd.Series({
        'Mean Age': group['Age'].mean(),
        'SEM Age': group['Age'].sem(),
        'Min Age': group['Age'].min(),
        'Max Age': group['Age'].max(),
        'Unique Record ID': group['Record ID'].nunique()
    })

# Group by 'Timepoint' and calculate statistics
result = filt_data.groupby('Timepoint').apply(calculate_stats).reset_index()
print(result)

In [None]:
def plot_single_violin_ANOVA(data, channels, y_vars, y_var_names, timepoint_col='Timepoint', subject_col='Record ID', figsize=(25, 10), font=20, x_tick_labels=None):
    # Determine the number of plots
    n_cols = len(y_vars)
    if len(channels) != 0:
        n_rows = len(channels)
    else:
        n_rows = 1
         
    # Create subplots
    fig, axs = plt.subplots(n_rows, n_cols, figsize=figsize)
    axs = axs.flatten()
    p_values = []
    y_vars_grouped = []

    # Loop through each variable to plot
    for idx, var_group in enumerate(y_vars):
        if len(channels) != 0:
            var = f"{channels[0]}_{var_group}"
        else:
            var = var_group
    
        # Filter out rows with missing data
        plot_df = data[[subject_col, timepoint_col, var]].dropna()
    
        # Collect groups for ANOVA
        groups = [group[var].dropna() for name, group in plot_df.groupby(timepoint_col)]
    
        # Perform one-way ANOVA across all unique timepoints
        if len(groups) > 1:
            f_stat, p_value = f_oneway(*groups)
            p_values.append(p_value)
        else:
            p_values.append(1)  # Non-significant p-value if not enough groups
    
        # Generate violin plots
        order = ['Baseline', 'Post_Placebo', 'Post_CBD', 'Post_Wash']
        palette = sns.color_palette("Set2")
        
        # Create box and strip plots
        sns.boxplot(ax=axs[idx], data=plot_df, x=timepoint_col, y=var, order=order, gap=.001, fill=False, showfliers=False, palette=palette)
        sns.stripplot(ax=axs[idx], data=plot_df, x=timepoint_col, y=var, dodge=False, edgecolor='white', size=10, alpha=.5, order=order, palette=palette)
    
        # Set labels and format axes
        axs[idx].set_ylabel(y_var_names[idx], fontsize=font, fontweight='bold')
        axs[idx].set_xlabel('', fontsize=font, fontweight='bold')
        axs[idx].tick_params(axis='both', which='major', labelsize=font, labelrotation=45)
        axs[idx].grid(False)
        
        # Set custom x-tick labels if provided
        if x_tick_labels:
            axs[idx].set_xticklabels(x_tick_labels)

    # Apply Bonferroni correction if statistical tests were performed
    if len(p_values) > 1:
        corrected_p_values = multipletests(p_values, alpha=0.05, method='bonferroni')[1]
        for idx, p in enumerate(corrected_p_values):
            if idx < len(y_vars):
                if len(channels) != 0:
                    axs[idx].set_title(f'{channels[0]} Electrodes\nANOVA p = {p_values[idx]:.4f}, Bonf. p = {p:.4f}', fontsize=font, fontweight='bold')
                else:
                    axs[idx].set_title(f'All Subjects \nANOVA p = {p_values[idx]:.4f}', fontsize=font, fontweight='bold')            
    
    # Hide unused axes
    for ax in axs[len(y_vars):]:
        ax.set_visible(False)
        
    # Adjust layout and save figure
    plt.tight_layout()
    directory = '05_Plots'
    file_format = 'pdf'
    file_path = os.path.join(directory, f'Descriptive.{file_format}')
    plt.savefig(file_path, format=file_format)
    
    plt.show()

# Example usage
channel_groups = []
y_vars = ['Duration_Secs', 'CBD', 'OHCBD', 'COOHCBD']
y_var_names = [
    'rsEEG Duration\n(seconds)', 'CBD\n(ng/mL)', '7-OHCBD\n(ng/mL)', '7-COOHCBD\n(ng/mL)'
]

titles = ['All Subjects']

# Custom x-tick labels
x_tick_labels = ['Baseline', 'Post-Placebo', 'Post-CBD', 'Post-Wash']

# Call the function with the specified parameters
plot_single_violin_ANOVA(
    data=filt_data, 
    channels=[],  # Pass an empty list for all electrodes
    y_vars=y_vars, 
    y_var_names=y_var_names, 
    timepoint_col='Timepoint', 
    subject_col='Record ID',
    x_tick_labels=x_tick_labels  # Use custom x-tick labels
)

In [None]:
# Define parameters for the statistical model and visualization

# Fixed effects for the mixed-effects model
fixed = ['Age', 'ADOS_Module', 'C(Timepoint)', 'C(Randomization)']

# Optimization method for the mixed-effects model
func = 'nm'  # Nelder-Mead optimization algorithm

# No interaction term specified
interaction_term = ''

# Labels for y-axis in visualization
y_var_names = [
    'CBD Timepoint',
    'Placebo Timepoint',
    'Wash Timepoint',
    'Randomization',
    'Offset\n(uV²)',  
    'Exponent\n(uV² / Hz)', 
    'Delta\n(uV²)',
    'Theta\n(uV²)',
    'Alpha\n(uV²)', 
    'ADOS\n(module)', 
    'Age\n(years)'
]

# Labels for heatmap
heatmap_names = [
    'Intercept',
    'Post-CBD',
    'Post-Placebo',
    'Post-Wash',
    'Randomization',
    'Offset\n(uV²)',  
    'Exponent\n(uV²/Hz)', 
    'Delta SNR\n(uV²/Hz)',
    'Theta SNR\n(uV²/Hz)',
    'Alpha SNR\n(uV²/Hz)', 
    'Age',
    'ADOS',
    'Group']

# Dependent variable name (metabolite)
dependent_var_name = ['7-COOH-CBD (z)']

# Names of independent variables for visualization
independent_var_name = [
    'Int', 
    'Post-CBD',
    'Post-Placebo',
    'Post-Wash',
    'Randomization', 
    'Offset\n(uV²)',
    'Exponent\n(uV² / Hz)',
    'Delta\n(uV²)', 
    'Theta\n(uV²)', 
    'Alpha\n(uV²)', 
    'Age\n(years)', 
    'ADOS\n(module)'
]

# Dependent variables for the model
dependent_vars = ['COOHCBD_Z_score']

In [None]:
# Initialize list to store results
all_results = []

# Loop through each channel group
for group_name, channel_indices in Channel_Groups.items():
    independent_vars = []
    
    # Add exponent and offset variables for the current group
    exponent_var = f"{group_name}_Exponent"
    offset_var = f"{group_name}_Offset"
    independent_vars.extend([offset_var, exponent_var])
    
    # Add band-specific variables for the current group
    for band_name, freq_range in BANDS.items():
        band_var = f"{group_name}_{band_name}_SNR"
        independent_vars.append(band_var)
    
    # Run models and create plots for the current group
    print(f"Running models and plotting for group: {group_name}")
    models, emmeans_results = run_models_and_plot(filt_data, dependent_vars, independent_vars, fixed, interaction_term, func, y_var_names)
    
    # Generate interaction effect plot
    plot_name = f"{group_name}"
    plot_interaction_effect(models, emmeans_results, y_var_names, plot_name)
    
    # Generate heatmap plot
    plot_name = f"{group_name}_heatmap"
    plot_pvalue_heatmap(models, dependent_var_name, heatmap_names, plot_name)
    
    # Store results for the current group
    all_results.append({
        'group': group_name,
        'models': models,
        'emmeans_results': emmeans_results,
        'independent_vars': independent_vars,
        'dependent_vars': dependent_vars
    })

In [None]:
# Model parameters
func = 'nm'  # Optimization method (likely Nelder-Mead)
interaction_term = ''  # No interaction term specified
fixed = ['Age', 'ADOS_Module', 'C(Timepoint)', 'C(Randomization)']  # Fixed effects

# Dependent variable (CBD metabolite)
dependent_vars = ['COOHCBD_Z_score']  # Focusing on 7-COOH-CBD z-scores

# Independent variables (various cognitive and behavioral assessments)
independent_vars = [
    'rbs_total_score', 'ppvt_raw_score', 'toni4_raw_score', 'eowpvt4_raw_score', 
    'beery_vmi_raw_score', 'beery_vp_raw_score', 'beery_mc_raw_score'
]

# Labels for y-axis in visualization
y_var_names_constant = [
    'CBD Timepoint', 'Placebo Timepoint', 'Wash Timepoint', 'Randomization', 'Age', 'ADOS'
]

y_var_names_append = [
    'Total RBS\nScore', 'PPVT\nRaw Score', 'TONI-4\nRaw Score', 'EOWPVT-4\nRaw Score',
    'Beery VMI\nRaw Score', 'Beery VP\nRaw Score', 'Beery MC\nRaw Score',
]

# Labels for heatmap
heatmap_names = [
    'Intercept', 'Post-CBD', 'Post-Placebo', 'Post-Wash', 'Randomization',
    'TONI-4\nScore', 'Age', 'ADOS', 'Group'
]

# Dependent variable name for visualization
dependent_var_name = ['7-COOH-CBD (z)']

# Loop through each independent variable
for ind_var, y_var_name in zip(independent_vars, y_var_names_append):
    # Prepare y-axis labels
    y_var_names = y_var_names_constant[:4] + [y_var_name] + y_var_names_constant[4:]
    print(y_var_names)
    
    # Prepare data
    df_copy = filt_data.copy()
    df = df_copy.dropna(subset=[ind_var])
    df_beh_filt = df.groupby('Record ID').filter(lambda x: len(x['Timepoint'].unique()) >= 3)
    
    # Calculate and print statistics for each timepoint
    result = df_beh_filt.groupby('Timepoint').apply(calculate_stats).reset_index()
    print(ind_var)
    print(result)
    
    # Run models and generate plots
    models, emmeans_results = run_models_and_plot(df_beh_filt, dependent_vars, [ind_var], fixed, interaction_term, func, y_var_names)
    
    # Generate interaction effect plot
    plot_name = f"{ind_var}_beh_reg"
    plot_interaction_effect(models, emmeans_results, y_var_names, plot_name)
    
    # Generate heatmap
    plot_name = f"{ind_var}_beh_heat"
    plot_pvalue_heatmap_beh(models, dependent_var_name, heatmap_names, plot_name)

In [None]:
# Beery VMI, VP, MC analysis

# Dependent variable (CBD metabolite)
dependent_vars = ['COOHCBD_Z_score']  # Focusing on 7-COOH-CBD z-scores

# Fixed effects for the model
fixed = ['Age', 'ADOS_Module', 'C(Timepoint)', 'C(Randomization)']

# Independent variables (Beery subtests)
independent_vars = ['beery_vmi_raw_score', 'beery_vp_raw_score', 'beery_mc_raw_score']

# Labels for y-axis in visualization
y_var_names = [
    'Session_CBD', 'Session_Placebo', 'Session_Wash', 'Randomization',
    'Total Beery VMI\nRaw Score', 'Total Beery VP\nRaw Score', 'Total Beery MC\nRaw Score',
    'Age', 'ADOS'
]

# Labels for heatmap
heatmap_names = [
    'Intercept', 'Post-CBD', 'Post-Placebo', 'Post-Wash', 'Randomization',
    'VMI\nScore', 'VP\nScore', 'MC\nScore', 'Age', 'ADOS'
]

# Dependent variable name for visualization
dependent_var_name = ['7-COOH-CBD (z)']

# Data preparation
df_copy = filt_data.copy()
df = df_copy.dropna(subset=['beery_vmi_raw_score', 'beery_vp_raw_score', 'beery_mc_raw_score'])
df = df.groupby('Record ID').filter(lambda x: len(x['Timepoint'].unique()) >= 3)

# Calculate and print statistics for each timepoint
result = df.groupby('Timepoint').apply(calculate_stats).reset_index()
print(ind_var)  # Note: 'ind_var' is not defined in this snippet
print(result)

# Run models and generate plots
models, emmeans_results = run_models_and_plot(df, dependent_vars, independent_vars, fixed, interaction_term, func, y_var_names)

# Generate interaction effect plot
plot_name = 'beery_beh_reg'
plot_interaction_effect(models, emmeans_results, y_var_names, plot_name)

# Generate heatmap
plot_name = 'beery_beh_heat'
plot_pvalue_heatmap_beh(models, dependent_var_name, heatmap_names, plot_name)

# Print statistics again
result = df.groupby('Timepoint').apply(calculate_stats).reset_index()
print(result)