In [None]:
# If you have not installed `wiscs` locally, run this cell
!pip install git+https://github.com/w-decker/wiscs.git --quiet # REQUIRED FOR THIS NOTEBOOK
!pip install git+https://github.com/w-decker/rinterface.git --quiet # REQUIRED FOR THIS NOTEBOOK

In [1]:
# always run this cell, no matter which method you choose
import wiscs
from wiscs.simulate import DataGenerator
from wiscs.formula import Formula

from rinterface.rinterface import rinterface as R
from rinterface.utils import to_r

import numpy as np
import pandas as pd

# Make baseline dataset

In [39]:
np.random.seed(2027)

n_subject = 8
n_item = 30
n_question = 2
word_task = np.array([100, 100])
hyp = "shared"
if hyp == "shared":
    image_task = np.array([100, 100])
else:
    image_task = np.array([100, 105])

re_formula = Formula("(1 + question | subject) + (1 + question | item)")
question_sd = [10, 12, 15, 18, 11] # must be n_q - 1
params = {'word.perceptual': 100, 'image.perceptual': 95, 'word.conceptual': 100, 'image.conceptual': 100, 'word.task': word_task, 'image.task': image_task,
        # noise parameters     
        'sd.item': 40,     'sd.question': question_sd[:n_question-1],    'sd.subject': 35,       "sd.modality": 30, "sd.error": 50, "sd.re_formula": str(re_formula),
        # correlations among random effects    
        "corr.subject": np.eye(n_question), 'corr.item':np.eye(n_question),
        # design parameters
        'n.subject': n_subject, 'n.question': n_question, 'n.item': n_item
}
wiscs.set_params(params, verbose=False)

DG = DataGenerator()
DG.fit_transform(seed=2027, verbose=False)
df = DG.to_pandas()

# Make `tag`
`tag` is fully counterbalanced. 

`tag == 1` $\rightarrow$ `Q1-I : Q2-I : Q1-W : Q2-W` \
`tag == 2` $\rightarrow$ `Q2-I : Q1-I : Q2-W : Q1-W` \
`tag == 3` $\rightarrow$ `Q2-W : Q1-W : Q2-I : Q1-I` \
`tag == 4` $\rightarrow$ `Q1-W : Q2-W : Q1-I : Q2-I`

In [40]:
group_tags = [1, 2, 3, 4]
subject_ids = np.arange(n_subject)
tags = np.tile(group_tags, int(n_subject / len(group_tags)))

# In case n_subjects is not divisible by 4
np.random.shuffle(tags)
subject_tag_map = dict(zip(subject_ids, tags))
df['tag'] = df['subject'].map(subject_tag_map)

In [41]:
# find how many subjects are in each tag
tag_counts = df['tag'].value_counts().sort_index()
print("Tag counts:")
print(tag_counts)

Tag counts:
tag
1    240
2    240
3    240
4    240
Name: count, dtype: int64


# Make block order

In [42]:
# function that is applied row-wise to create the block order variable

def make_block_order(row):
    tag = row['tag']
    modality = row['modality']
    question = row['question']

    if tag == 1:
        if modality == 'image' and question == 0: return 1
        if modality == 'image' and question == 1: return 2
        if modality == 'word' and question == 0: return 3
        if modality == 'word' and question == 1: return 4

    elif tag == 2:
        if modality == 'image' and question == 0: return 2
        if modality == 'image' and question == 1: return 1
        if modality == 'word' and question == 0: return 4
        if modality == 'word' and question == 1: return 3
    
    elif tag == 3:
        if modality == 'image' and question == 0: return 3
        if modality == 'image' and question == 1: return 4
        if modality == 'word' and question == 0: return 1
        if modality == 'word' and question == 1: return 2

    elif tag == 4:
        if modality == 'image' and question == 0: return 4
        if modality == 'image' and question == 1: return 3
        if modality == 'word' and question == 0: return 2
        if modality == 'word' and question == 1: return 1
    else:
        raise ValueError(f"Invalid tag: {tag}")

In [43]:
df['block_order'] = df.apply(make_block_order, axis=1)

In [44]:
df.head()

Unnamed: 0,subject,rt,question,item,modality,tag,block_order
0,0,293.943622,0,0,image,2,2
1,0,377.888696,0,1,image,2,2
2,0,329.188892,0,2,image,2,2
3,0,333.048233,0,3,image,2,2
4,0,276.623143,0,4,image,2,2


# Test whether `block_order` is okay

In [45]:
code = f"""


suppressMessages(library(lme4))
suppressMessages(library(lmerTest))
suppressMessages(library(tidyverse))

control <- lmerControl(optimizer = "bobyqa", check.conv.singular = "ignore") # suppress singular fit warnings

all_data <- {to_r(df)} %>%
  mutate(
    question = as.factor(question),
    subject = as.factor(subject),
    item = as.factor(item),
    tag = as.factor(tag),
    modality = factor(modality, levels = c("word", "image")), 
    block = as.factor(block_order)
  )

# set reference levels
contrasts(all_data$modality) <- c(-0.5, 0.5)
contrasts(all_data$question) <- c(-0.5, 0.5)
contrasts(all_data$block) <- contr.sum(4)

model <- lmer(rt ~ modality * question + block +
                (1 + question | subject) +
                (1 + question | item),
              data = all_data, REML = FALSE, control = control)

summary(model)
"""

In [46]:
R(code)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: rt ~ modality * question + block + (1 + question | subject) +  
    (1 + question | item)
   Data: all_data
Control: control

      AIC       BIC    logLik -2*log(L)  df.resid 
  10388.7   10456.8   -5180.3   10360.7       946 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3872 -0.6673  0.0036  0.6397  3.1321 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 item     (Intercept) 1785.8   42.26         
          question1    276.2   16.62    0.13 
 subject  (Intercept)  955.2   30.91         
          question1    116.0   10.77    -0.54
 Residual             2440.0   49.40         
Number of obs: 960, groups:  item, 30; subject, 8

Fixed effects:
                    Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)          313.207     13.471  16.268  23.250 6.52e-14 ***
modality1             -7.559      3.188 885.220  -2.371    0.0

So no rank-deficient warnings!

# Make question order

In [47]:
# function that is applied row-wise to create `question_order` variable

def make_question_order(row):
    tag = row['tag']
    question = row['question']

    if tag == 1:
        if question == 0: return 1
        if question == 1: return 2
    elif tag == 2:
        if question == 0: return 2
        if question == 1: return 1
    elif tag == 3:
        if question == 0: return 2
        if question == 1: return 1
    elif tag == 4:
        if question == 0: return 1
        if question == 1: return 2

In [48]:
df ['question_order'] = df.apply(make_question_order, axis=1)

In [49]:
df.head()

Unnamed: 0,subject,rt,question,item,modality,tag,block_order,question_order
0,0,293.943622,0,0,image,2,2,2
1,0,377.888696,0,1,image,2,2,2
2,0,329.188892,0,2,image,2,2,2
3,0,333.048233,0,3,image,2,2,2
4,0,276.623143,0,4,image,2,2,2


# Test whether `question_order` is okay

In [None]:
code = f"""

suppressMessages(library(lme4))
suppressMessages(library(lmerTest))
suppressMessages(library(tidyverse))

control <- lmerControl(optimizer = "bobyqa", check.conv.singular = "ignore") # suppress singular fit warnings

all_data <- {to_r(df)} %>%
  mutate(
    question = as.factor(question),
    subject = as.factor(subject),
    item = as.factor(item),
    tag = as.factor(tag),
    modality = factor(modality, levels = c("word", "image")), 
    block = as.factor(block_order),
    question_order = as.factor(question_order)
  )

# set reference levels
contrasts(all_data$modality) <- c(-0.5, 0.5)
contrasts(all_data$question) <- c(-0.5, 0.5)
contrasts(all_data$block) <- contr.sum(4)
contrasts(all_data$question_order) <- c(-0.5, 0.5)

model <- lmer(rt ~ modality * question + question_order +
                (1 + question | subject) +
                (1 + question | item),
              data = all_data, REML = FALSE, control = control)

summary(model)

# print(model.matrix(model))
"""

In [57]:
R(code)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: rt ~ modality * question + block + question_order + (1 + question |  
    subject) + (1 + question | item)
   Data: all_data
Control: control

      AIC       BIC    logLik -2*log(L)  df.resid 
  10390.5   10463.5   -5180.3   10360.5       945 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3809 -0.6708  0.0028  0.6355  3.1433 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 item     (Intercept) 1786.86  42.271        
          question1    275.81  16.607   0.13 
 subject  (Intercept)  956.18  30.922        
          question1     97.97   9.898   -0.43
 Residual             2439.99  49.396        
Number of obs: 960, groups:  item, 30; subject, 8

Fixed effects:
                    Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)         313.2072    13.4769  16.2499  23.240 6.73e-14 ***
modality1            -7.5590     3.1885 885

So `question_order` works alone! Let's try `question_order:modality` term.

In [None]:
code = f"""

suppressMessages(library(lme4))
suppressMessages(library(lmerTest))
suppressMessages(library(tidyverse))

control <- lmerControl(optimizer = "bobyqa", check.conv.singular = "ignore") # suppress singular fit warnings

all_data <- {to_r(df)} %>%
  mutate(
    question = as.factor(question),
    subject = as.factor(subject),
    item = as.factor(item),
    tag = as.factor(tag),
    modality = factor(modality, levels = c("word", "image")), 
    block = as.factor(block_order),
    question_order = as.factor(question_order)
  )

# set reference levels
contrasts(all_data$modality) <- c(-0.5, 0.5)
contrasts(all_data$question) <- c(-0.5, 0.5)
contrasts(all_data$block) <- contr.sum(4)
contrasts(all_data$question_order) <- c(-0.5, 0.5)

model <- lmer(rt ~ modality * question  + question_order:modality +
                (1 + question | subject) +
                (1 + question | item),
              data = all_data, REML = FALSE, control = control)

summary(model)

# print(model.matrix(model))
"""

In [60]:
R(code)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: rt ~ modality * question + block + question_order:modality +  
    (1 + question | subject) + (1 + question | item)
   Data: all_data
Control: control

      AIC       BIC    logLik -2*log(L)  df.resid 
  10390.5   10463.5   -5180.3   10360.5       945 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3809 -0.6708  0.0028  0.6355  3.1433 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 item     (Intercept) 1786.86  42.271        
          question1    275.81  16.607   0.13 
 subject  (Intercept)  956.18  30.922        
          question1     97.97   9.898   -0.43
 Residual             2439.99  49.396        
Number of obs: 960, groups:  item, 30; subject, 8

Fixed effects:
                             Estimate Std. Error      df t value Pr(>|t|)    
(Intercept)                   313.207     13.477  16.250  23.240 6.73e-14 ***
modality1         

fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
fixed-effect model matrix is rank deficient so dropping 1 column / coefficient


The `question:modality` interaction term does not work and introduces rank-deficient warnings.

# Let's try `block_order` _and_ `question_order` at the same time

In [68]:
code = f"""

suppressMessages(library(lme4))
suppressMessages(library(lmerTest))
suppressMessages(library(tidyverse))

control <- lmerControl(optimizer = "bobyqa", check.conv.singular = "ignore") # suppress singular fit warnings

all_data <- {to_r(df)} %>%
  mutate(
    question = as.factor(question),
    subject = as.factor(subject),
    item = as.factor(item),
    tag = as.factor(tag),
    modality = factor(modality, levels = c("word", "image")), 
    block = as.factor(block_order),
    question_order = as.factor(question_order)
  )

# set reference levels
contrasts(all_data$modality) <- c(-0.5, 0.5)
contrasts(all_data$question) <- c(-0.5, 0.5)
contrasts(all_data$block) <- contr.sum(4)
contrasts(all_data$question_order) <- c(-0.5, 0.5)

model <- lmer(rt ~ modality * question + block + question_order +
                (1 + question | subject) +
                (1 + question | item),
              data = all_data, REML = FALSE, control = control)

summary(model)

# print(model.matrix(model))
"""

In [69]:
R(code)

Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: rt ~ modality * question + block + question_order + (1 + question |  
    subject) + (1 + question | item)
   Data: all_data
Control: control

      AIC       BIC    logLik -2*log(L)  df.resid 
  10390.5   10463.5   -5180.3   10360.5       945 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.3809 -0.6708  0.0028  0.6355  3.1433 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 item     (Intercept) 1786.86  42.271        
          question1    275.81  16.607   0.13 
 subject  (Intercept)  956.18  30.922        
          question1     97.97   9.898   -0.43
 Residual             2439.99  49.396        
Number of obs: 960, groups:  item, 30; subject, 8

Fixed effects:
                    Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)         313.2072    13.4769  16.2499  23.240 6.73e-14 ***
modality1            -7.5590     3.1885 885

Everything works here!