# stats.ipynb

Perform the main statistical analyses

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

import analysis
from metadata import METADATA, STIMULUS_METADATA

In [2]:
# Progress bars for apply
tqdm.pandas()

## Set up R

In [3]:
%load_ext rpy2.ipython

In [4]:
%%R
library(tidyverse)
library(ggplot2)
library(lme4)
library(broom)
library(multcomp)

R[write to console]: Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang

R[write to console]: Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2

R[write to console]: ── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──

R[write to console]: [32m✔[39m [34mggplot2[39m 3.1.1       [32m✔[39m [34mpurrr  [39m 0.3.2  
[32m✔[39m [34mtibble [39m 2.1.1       [32m✔[39m [34mdplyr  [39m 0.8.0.[31m1[39m
[32m✔[39m [34mtidyr  [39m 0.8.3       [32m✔[39m [34mstringr[39m 1.4.0  
[32m✔[39m [34mreadr  [39m 1.3.1       [32m✔[39m [34mforcats[39m 0.4.0  

R[write to console]: ── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[

## Build contrast matrices

NOTE: The order of terms is hardcoded. It must match the order in the R model object.

In [5]:
def rename_terms(model):
    terms = model.loc[model.group == "fixed", "term"]
    terms = terms.str.replace("area", "").str.replace("layer", "").str.replace("stimulus_type", "")
    return terms

In [6]:
terms = [
    "(Intercept)",
    "L4",
    "L5",
    "natural",
    "L4:natural",
    "L5:natural"
]
contrasts = [
    "L2/3:natural - L2/3:artificial",
    "L4:natural - L4:artificial",
    "L5:natural - L5:artificial",
]
LAYER_CONTRAST_MATRIX = pd.DataFrame(
    np.zeros([len(contrasts), len(terms)], dtype=int), 
    columns=terms,
    index=contrasts
)
# L23:natural - L23:artificial == 0
#   (intercept + natural) - (intercept) == 0
#   L23:natural == 0
LAYER_CONTRAST_MATRIX.loc["L2/3:natural - L2/3:artificial", ["natural"]] = 1
# L4:natural - L4:artificial == 0
#   (intercept + L4 + natural + L4:natural) - (intercept + L4) == 0
#   natural + L4:natural == 0
LAYER_CONTRAST_MATRIX.loc["L4:natural - L4:artificial", ["natural", "L4:natural"]] = 1
# L5:natural - L5:artificial == 0
#   (intercept + L5 + natural + L5:natural) - (intercept + L5) == 0
#   natural + L5:natural == 0
LAYER_CONTRAST_MATRIX.loc["L5:natural - L5:artificial", ["natural", "L5:natural"]] = 1

LAYER_CONTRAST_MATRIX

Unnamed: 0,(Intercept),L4,L5,natural,L4:natural,L5:natural
L2/3:natural - L2/3:artificial,0,0,0,1,0,0
L4:natural - L4:artificial,0,0,0,1,1,0
L5:natural - L5:artificial,0,0,0,1,0,1


In [7]:
terms = [
    "(Intercept)",
    "LM",
    "AL",
    "PM",
    "AM",
    "natural",
    "LM:natural",
    "AL:natural",
    "PM:natural",
    "AM:natural",
]
contrasts = [
    "V1:natural - V1:artificial",
    "LM:natural - LM:artificial",
    "AL:natural - AL:artificial",
    "PM:natural - PM:artificial",
    "AM:natural - AM:artificial",
]
AREA_CONTRAST_MATRIX = pd.DataFrame(
    np.zeros([5, len(terms)], dtype=int), 
    columns=terms,
    index=contrasts,
)
# V1:natural - V1:artificial == 0
#   (intercept + natural + V1 + V1:natural) - (intercept + V1) == 0
#   natural + V1:natural== 0
AREA_CONTRAST_MATRIX.loc["V1:natural - V1:artificial", ["natural"]] = 1
# L:natural - L:artificial == 0
#   (intercept + natural + L + L:natural) - (intercept + L) == 0
#   natural + L:natural == 0
AREA_CONTRAST_MATRIX.loc["LM:natural - LM:artificial", ["natural", "LM:natural"]] = 1
# AL:natural - AL:artificial == 0
#   (intercept + natural) - (intercept) == 0
#   natural == 0
AREA_CONTRAST_MATRIX.loc["AL:natural - AL:artificial", ["natural", "AL:natural"]] = 1
# PM:natural - PM:artificial == 0
#   (intercept + natural + PM + PM:natural) - (intercept + PM) == 0
#   natural + PM:natural == 0
AREA_CONTRAST_MATRIX.loc["PM:natural - PM:artificial", ["natural", "PM:natural"]] = 1
# AM:natural - AM:artificial == 0
#   (intercept + natural + AM + AM:natural) - (intercept + AM) == 0
#   natural + AM:natural == 0
AREA_CONTRAST_MATRIX.loc["AM:natural - AM:artificial", ["natural", "AM:natural"]] = 1

AREA_CONTRAST_MATRIX

Unnamed: 0,(Intercept),LM,AL,PM,AM,natural,LM:natural,AL:natural,PM:natural,AM:natural
V1:natural - V1:artificial,0,0,0,0,0,1,0,0,0,0
LM:natural - LM:artificial,0,0,0,0,0,1,1,0,0,0
AL:natural - AL:artificial,0,0,0,0,0,1,0,1,0,0
PM:natural - PM:artificial,0,0,0,0,0,1,0,0,1,0
AM:natural - AM:artificial,0,0,0,0,0,1,0,0,0,1


## LME functions

In [8]:
%%R
compare_lme_models = function(data, formula, reduced_formula, contrast_matrix, posthoc_alternative) {
    # Estimate models
    model = lmer(formula, data=data, REML=FALSE)
    reduced_model = lmer(reduced_formula, data=data, REML=FALSE)
    # Compare models
    model_comparison = anova(model, reduced_model)
    
    # Run post hoc tests
    posthoc_model = glht(model, as.matrix(contrast_matrix), alternative = posthoc_alternative)
    
    tidy_model = tidy(model)
    tidy_reduced_model = tidy(reduced_model)
    tidy_comparison = tidy(model_comparison)
    tidy_posthoc = tidy(summary(posthoc_model))

    tidy_comparison$test_type = "likelihood_ratio"
    tidy_posthoc$test_type = "posthoc"
    
    # Return output in tidy form
    list(
        "model" = tidy_model,
        "reduced_model" = tidy_reduced_model,
        "comparison" = tidy_comparison,
        "posthoc" = tidy_posthoc,
        "comparison_description" = capture.output(print(model_comparison)),
        "posthoc_description" = capture.output(print(summary(posthoc_model)))
    )
}

In [9]:
import rpy2.robjects as ro


def lme_result_to_python(lme_result):
    """Convert the output of the R function `compare_lme_models` to Python objects."""
    with ro.conversion.localconverter(ro.default_converter + ro.pandas2ri.converter):
        output = dict(
            zip(
                ["model", "reduced_model", "comparison", "posthoc"],
                list(map(ro.conversion.rpy2py, lme_result[:4])),
            )
        )
    output.update(
        {
            "comparison_description": "\n".join(lme_result[4]),
            "posthoc_description": "\n".join(lme_result[5]),
        }
    )
    return output

In [10]:
def run_lme(data, formula, reduced_formula, contrast_matrix, posthoc_alternative):
    """
    Keyword Arguments:
        posthoc_alternative: The type of post-hoc test to perform ('two.sided', 'less', 'greater')
    """
    lme_result = %R -i data -i formula -i reduced_formula -i contrast_matrix -i posthoc_alternative compare_lme_models(data, formula, reduced_formula, contrast_matrix, posthoc_alternative)
    # Check that contrast matrix terms are as expected
    lme_result = lme_result_to_python(lme_result)
    expected_terms = list(contrast_matrix.columns)
    actual_terms = list(rename_terms(lme_result['model']))
    assert expected_terms == actual_terms, (
        'Unexpected terms in the posthoc model; check the contrast matrix!\n'
        f'  Expected: {expected_terms}\n'
        f'    Actual: {actual_terms}'
    )
    return lme_result

In [11]:
EXPECTED_NUMBER_OF_OBSERVATIONS = 2200

In [12]:
def run_stats(data, output_path, prefix=""):
    assert (
        data.shape[0] == EXPECTED_NUMBER_OF_OBSERVATIONS
    ), "Unexpected number of observations, check parameters!"
    
    posthoc_alternative = "greater"
    
    layer_result = run_lme(
        data,
        LAYER_FORMULA,
        LAYER_REDUCED_FORMULA,
        LAYER_CONTRAST_MATRIX,
        posthoc_alternative=posthoc_alternative,
    )
    layer_result_df = layer_result["comparison"].merge(
       layer_result["posthoc"], how="outer"
    )
    layer_result_df["interaction"] = "layer"
    layer_result_df["formula"] = LAYER_FORMULA
    layer_result_df["reduced_formula"] = LAYER_REDUCED_FORMULA

    area_result = run_lme(
        data,
        AREA_FORMULA,
        AREA_REDUCED_FORMULA,
        AREA_CONTRAST_MATRIX,
        posthoc_alternative=posthoc_alternative,
    )
    area_result_df = area_result["comparison"].merge(
        area_result["posthoc"], how="outer"
    )
    area_result_df["interaction"] = "area"
    area_result_df["formula"] = AREA_FORMULA
    area_result_df["reduced_formula"] = AREA_REDUCED_FORMULA

    result_df = pd.concat([layer_result_df, area_result_df], axis="rows")
    result_df["posthoc_alternative"] = posthoc_alternative

    with output_path.open(mode="wt") as f:
        f.write(
            prefix
            + "\n".join(
                [
                    "=" * 80,
                    "Layer",
                    "=" * 80,
                    layer_result["comparison_description"],
                    layer_result["posthoc_description"],
                    "",
                    "=" * 80,
                    "Area",
                    "=" * 80,
                    area_result["comparison_description"],
                    area_result["posthoc_description"],
                ]
            )
        )

    return result_df

# Spectral differentiation

## Load data

In [13]:
df = pd.read_parquet("results/sensitivity_analysis.parquet")

# Convert Categoricals to unordered since rpy2 has issues with them 
df["layer"] = pd.Categorical(df["layer"], categories=df["layer"].cat.categories, ordered=False)
df["area"] = pd.Categorical(df["area"], categories=df["area"].cat.categories, ordered=False)

In [14]:
response = "log(normalized differentiation)"

In [15]:
# Convert Inf to NaN for R
df.loc[
    np.isinf(df[response]),
    response
] = np.nan

## Estimate models

### Figures 3, S5, S6, & S7

In [16]:
LAYER_FORMULA = f"`{response}` ~ 1 + layer * stimulus_type + (1 | session)"
LAYER_REDUCED_FORMULA = f"`{response}` ~ 1 + layer + stimulus_type + (1 | session)" 
print(LAYER_FORMULA)
print(LAYER_REDUCED_FORMULA)

`log(normalized differentiation)` ~ 1 + layer * stimulus_type + (1 | session)
`log(normalized differentiation)` ~ 1 + layer + stimulus_type + (1 | session)


In [17]:
AREA_FORMULA = f"`{response}` ~ 1 + area * stimulus_type + (1 | session)"
AREA_REDUCED_FORMULA = f"`{response}` ~ 1 + area + stimulus_type + (1 | session)" 
print(AREA_FORMULA)
print(AREA_REDUCED_FORMULA)

`log(normalized differentiation)` ~ 1 + area * stimulus_type + (1 | session)
`log(normalized differentiation)` ~ 1 + area + stimulus_type + (1 | session)


In [18]:
OUTPUT_DIR = Path('results/stats')
OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

In [19]:
GROUPING_PARAMETERS = [
    "state_length",
    "metric",
    "window",
    "window_param",
    "overlap",
    "log_frequency",
]


def run_all_stats(group):
    params = dict(zip(GROUPING_PARAMETERS, group.name))
    output_path = OUTPUT_DIR / (analysis.make_filename(params) + ".txt")
    prefix = "\n".join(
        [
            "Parameters:",
            str(params),
            "",
        ]
    )
    return run_stats(group, output_path, prefix=prefix)

In [20]:
# Unscrambled vs. scrambled
subset = df.loc[
    df["stimulus_is_scrambled_pair"]
]

In [21]:
stats = (
    subset
    .reset_index()
    .groupby(GROUPING_PARAMETERS, dropna=False)
    .progress_apply(run_all_stats)
)
stats.index = stats.index.droplevel(None)

  0%|          | 0/108 [00:00<?, ?it/s]

In [22]:
index = stats.index.names

stats = stats.reset_index()

# Remap values for display
stats = stats.assign(
    window=stats['window'].replace({None: 'rectangular'}),
    overlap=stats['overlap'].fillna(0),
)
# Rename posthoc differences
renames = {
    'layer': {
        "L2/3:natural - L2/3:artificial": "L2/3",
        "L4:natural - L4:artificial": "L4",
        "L5:natural - L5:artificial": "L5",
    },
    'area': {
        "V1:natural - V1:artificial": "V1",
        "LM:natural - LM:artificial": "LM",
        "AL:natural - AL:artificial": "AL",
        "PM:natural - PM:artificial": "PM",
        "AM:natural - AM:artificial": "AM",
    }
}
for interaction, rename in renames.items():
    stats[interaction] = stats["lhs"].map(rename)
    stats[interaction] = pd.Categorical(
        stats[interaction], categories=rename.values(), ordered=True
    )

# Restore index
stats = stats.set_index(index)

In [23]:
stats.to_parquet("results/stats.parquet")

## Examine stats

In [24]:
pd.options.display.max_rows = 256

In [25]:
ALPHA = 0.05

In [26]:
def significance(p):
    indicator = ''
    if p <= 0.05:
        indicator += "*"
    if p <= 0.01:
        indicator += "*"
    if p <= 0.001:
        indicator += "*"
    return indicator

In [27]:
stats["significant"] = stats["p.value"].map(significance)

### Likelihood ratio

In [28]:
likelihood_ratio = (
    stats.loc[
        (stats["test_type"] == "likelihood_ratio") &
        (stats["term"] == "model")
    ]
    .reset_index()
    .set_index(["interaction"] + index)
    .sort_index()
    .loc[:, ["significant", "p.value"]]
)

In [29]:
print("Layer:")
likelihood_ratio.loc["layer"]

Layer:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,significant,p.value
state_length,metric,window,window_param,overlap,log_frequency,Unnamed: 6_level_1,Unnamed: 7_level_1
0.2,chebyshev,kaiser,14.0,0.5,False,**,0.002532
0.2,chebyshev,kaiser,14.0,0.5,True,**,0.00286
0.2,chebyshev,rectangular,,0.0,False,**,0.002203
0.2,chebyshev,rectangular,,0.0,True,**,0.002174
0.2,chebyshev,tukey,0.25,0.125,False,**,0.002459
0.2,chebyshev,tukey,0.25,0.125,True,**,0.002518
0.2,cityblock,kaiser,14.0,0.5,False,**,0.009382
0.2,cityblock,kaiser,14.0,0.5,True,**,0.009248
0.2,cityblock,rectangular,,0.0,False,**,0.008819
0.2,cityblock,rectangular,,0.0,True,**,0.008121


In [30]:
print("Area:")
likelihood_ratio.loc["area"]

Area:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,significant,p.value
state_length,metric,window,window_param,overlap,log_frequency,Unnamed: 6_level_1,Unnamed: 7_level_1
0.2,chebyshev,kaiser,14.0,0.5,False,**,0.006179
0.2,chebyshev,kaiser,14.0,0.5,True,**,0.006704
0.2,chebyshev,rectangular,,0.0,False,**,0.006981
0.2,chebyshev,rectangular,,0.0,True,**,0.007028
0.2,chebyshev,tukey,0.25,0.125,False,**,0.006616
0.2,chebyshev,tukey,0.25,0.125,True,**,0.006718
0.2,cityblock,kaiser,14.0,0.5,False,*,0.018778
0.2,cityblock,kaiser,14.0,0.5,True,*,0.018687
0.2,cityblock,rectangular,,0.0,False,*,0.019166
0.2,cityblock,rectangular,,0.0,True,*,0.019284


### Posthoc

In [31]:
significant = likelihood_ratio.loc[:, "p.value"] <= ALPHA

In [32]:
posthoc = (
    stats.loc[
        (stats["test_type"] == "posthoc")
    ]
    .reset_index()
    .set_index(["interaction"] + index)
    .sort_index()
    .dropna(axis="columns", how="all")
    .round({"p.value": 4})
    .loc[significant]
)

In [33]:
layer_posthoc = (
    posthoc.loc["layer"]
    .reset_index()
    .pivot(index=index, columns=["layer"], values=["significant", "p.value"])
    .T
    .reset_index()
    .set_index(["layer", "level_0"])
    .sort_index(level="layer", sort_remaining=False)
)
layer_posthoc.index.set_names("", level=1, inplace=True)
layer_posthoc = layer_posthoc.T

area_posthoc = (
    posthoc.loc["area"]
    .reset_index()
    .pivot(index=index, columns=["area"], values=["significant", "p.value"])
    .T
    .reset_index()
    .set_index(["area", "level_0"])
    .sort_index(level="area", sort_remaining=False)
)
area_posthoc.index.set_names("", level=1, inplace=True)
area_posthoc = area_posthoc.T

In [34]:
print("Layer:")
layer_posthoc

Layer:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,layer,L2/3,L2/3,L4,L4,L5,L5
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,significant,p.value,significant,p.value,significant,p.value
state_length,metric,window,window_param,overlap,log_frequency,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
0.2,chebyshev,kaiser,14.0,0.5,False,***,0.0001,,0.7489,,0.9875
0.2,chebyshev,kaiser,14.0,0.5,True,***,0.0001,,0.7481,,0.9869
0.2,chebyshev,rectangular,,0.0,False,***,0.0001,,0.7876,,0.9853
0.2,chebyshev,rectangular,,0.0,True,***,0.0001,,0.7855,,0.9854
0.2,chebyshev,tukey,0.25,0.125,False,***,0.0001,,0.793,,0.9851
0.2,chebyshev,tukey,0.25,0.125,True,***,0.0001,,0.7931,,0.9851
0.2,cityblock,kaiser,14.0,0.5,False,***,0.0006,,0.6976,,0.983
0.2,cityblock,kaiser,14.0,0.5,True,***,0.0006,,0.6956,,0.9831
0.2,cityblock,rectangular,,0.0,False,***,0.0005,,0.6895,,0.9814
0.2,cityblock,rectangular,,0.0,True,***,0.0004,,0.6823,,0.9823


In [35]:
print("Area:")
area_posthoc

Area:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,area,AL,AL,AM,AM,LM,LM,PM,PM,V1,V1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,significant,p.value,significant,p.value,significant,p.value,significant,p.value,significant,p.value
state_length,metric,window,window_param,overlap,log_frequency,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
0.2,chebyshev,kaiser,14.0,0.5,False,**,0.0079,,0.0524,,0.8445,,1.0,,0.5926
0.2,chebyshev,kaiser,14.0,0.5,True,**,0.0086,,0.0524,,0.847,,1.0,,0.5995
0.2,chebyshev,rectangular,,0.0,False,**,0.0067,,0.0601,,0.8462,,1.0,,0.6244
0.2,chebyshev,rectangular,,0.0,True,**,0.0068,,0.0612,,0.8411,,1.0,,0.6147
0.2,chebyshev,tukey,0.25,0.125,False,**,0.0071,,0.0581,,0.8603,,1.0,,0.6296
0.2,chebyshev,tukey,0.25,0.125,True,**,0.0071,,0.0591,,0.859,,1.0,,0.6321
0.2,cityblock,kaiser,14.0,0.5,False,*,0.0221,,0.0796,,0.7599,,1.0,,0.6738
0.2,cityblock,kaiser,14.0,0.5,True,*,0.0218,,0.079,,0.762,,1.0,,0.6713
0.2,cityblock,rectangular,,0.0,False,*,0.0182,,0.08,,0.7723,,1.0,,0.6622
0.2,cityblock,rectangular,,0.0,True,*,0.0172,,0.08,,0.7717,,1.0,,0.659


# Mean centroid distance

### Figure 5

In [36]:
df = pd.read_parquet("results/mean_centroid_distance.parquet")

# Convert Categoricals to unordered since rpy2 has issues with them 
df["layer"] = pd.Categorical(df["layer"], categories=METADATA["layer"].cat.categories, ordered=False)
df["area"] = pd.Categorical(df["area"], categories=METADATA["area"].cat.categories, ordered=False)

In [37]:
response = "log(mean_centroid_distance)"

In [38]:
# Convert Inf to NaN for R
df.loc[
    np.isinf(df[response]),
    response
] = np.nan

In [39]:
LAYER_FORMULA = f"`{response}` ~ 1 + layer * stimulus_type + (1 | session)"
LAYER_REDUCED_FORMULA = f"`{response}` ~ 1 + layer + stimulus_type + (1 | session)" 
print(LAYER_FORMULA)
print(LAYER_REDUCED_FORMULA)

`log(mean_centroid_distance)` ~ 1 + layer * stimulus_type + (1 | session)
`log(mean_centroid_distance)` ~ 1 + layer + stimulus_type + (1 | session)


In [40]:
AREA_FORMULA = f"`{response}` ~ 1 + area * stimulus_type + (1 | session)"
AREA_REDUCED_FORMULA = f"`{response}` ~ 1 + area + stimulus_type + (1 | session)" 
print(AREA_FORMULA)
print(AREA_REDUCED_FORMULA)

`log(mean_centroid_distance)` ~ 1 + area * stimulus_type + (1 | session)
`log(mean_centroid_distance)` ~ 1 + area + stimulus_type + (1 | session)


In [41]:
# Unscrambled vs. scrambled
subset = (
    df
    .loc[df["stimulus_is_scrambled_pair"]]
    .reset_index()
)

In [42]:
output_path = OUTPUT_DIR / "mean_centroid_distance.txt"

In [43]:
stats = run_stats(subset, output_path)

In [44]:
cat {output_path}

Layer
Data: data
Models:
reduced_model: `log(mean_centroid_distance)` ~ 1 + layer + stimulus_type + (1 | 
reduced_model:     session)
model: `log(mean_centroid_distance)` ~ 1 + layer * stimulus_type + (1 | 
model:     session)
              Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
reduced_model  6 -4777.7 -4743.6 2394.9  -4789.7                             
model          8 -4790.9 -4745.3 2403.4  -4806.9 17.103      2  0.0001933 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

	 Simultaneous Tests for General Linear Hypotheses

Fit: lmer(formula = `log(mean_centroid_distance)` ~ 1 + layer * stimulus_type + 
    (1 | session), data = data, REML = FALSE)

Linear Hypotheses:
                                     Estimate Std. Error z value   Pr(>z)    
L2/3:natural - L2/3:artificial <= 0  0.028892   0.005816   4.968 1.02e-06 ***
L4:natural - L4:artificial <= 0      0.009355   0.005816   1.609    0.153    
L5:natural - L5:artificial <= 0     -0

# Figure S4

In [45]:
main = pd.read_parquet('results/main.parquet')

# Convert Categoricals to unordered since rpy2 has issues with them 
main["layer"] = pd.Categorical(main["layer"], categories=main["layer"].cat.categories, ordered=False)
main["area"] = pd.Categorical(main["area"], categories=main["area"].cat.categories, ordered=False)

In [46]:
response = "log(normalized differentiation)"

In [47]:
LAYER_FORMULA = f"`{response}` ~ 1 + layer * stimulus_type + (1 | session)"
LAYER_REDUCED_FORMULA = f"`{response}` ~ 1 + layer + stimulus_type + (1 | session)" 
print(LAYER_FORMULA)
print(LAYER_REDUCED_FORMULA)

`log(normalized differentiation)` ~ 1 + layer * stimulus_type + (1 | session)
`log(normalized differentiation)` ~ 1 + layer + stimulus_type + (1 | session)


In [48]:
AREA_FORMULA = f"`{response}` ~ 1 + area * stimulus_type + (1 | session)"
AREA_REDUCED_FORMULA = f"`{response}` ~ 1 + area + stimulus_type + (1 | session)" 
print(AREA_FORMULA)
print(AREA_REDUCED_FORMULA)

`log(normalized differentiation)` ~ 1 + area * stimulus_type + (1 | session)
`log(normalized differentiation)` ~ 1 + area + stimulus_type + (1 | session)


In [49]:
subset = main.loc[
    main['stimulus_is_block']
]

In [50]:
output_path = OUTPUT_DIR / "fig-s4.txt"

In [51]:
EXPECTED_NUMBER_OF_OBSERVATIONS = 5280

In [52]:
result = run_stats(subset, output_path)

In [53]:
cat {output_path}

Layer
Data: data
Models:
reduced_model: `log(normalized differentiation)` ~ 1 + layer + stimulus_type + 
reduced_model:     (1 | session)
model: `log(normalized differentiation)` ~ 1 + layer * stimulus_type + 
model:     (1 | session)
              Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
reduced_model  6 6687.4 6726.8 -3337.7   6675.4                             
model          8 6675.0 6727.6 -3329.5   6659.0 16.343      2  0.0002827 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

	 Simultaneous Tests for General Linear Hypotheses

Fit: lmer(formula = `log(normalized differentiation)` ~ 1 + layer * 
    stimulus_type + (1 | session), data = data, REML = FALSE)

Linear Hypotheses:
                                    Estimate Std. Error z value   Pr(>z)    
L2/3:natural - L2/3:artificial <= 0  0.11085    0.02229   4.974 9.82e-07 ***
L4:natural - L4:artificial <= 0     -0.01002    0.02229  -0.450 0.965212    
L5:natural - L5:artificial <= 0 