In [1]:
# If you have not installed `wiscs` locally, run this cell
!pip install git+https://github.com/w-decker/wiscs.git --quiet # REQUIRED FOR THIS NOTEBOOK
!pip install git+https://github.com/w-decker/rinterface.git --quiet # REQUIRED FOR THIS NOTEBOOK

In [2]:
# always run this cell, no matter which method you choose
import wiscs
from wiscs.simulate import DataGenerator
from wiscs.formula import Formula

import numpy as np
import pandas as pd

# Generate data without order effects

In [3]:
np.random.seed(2027)

n_subject = 180
n_item = 30
n_question = 2
word_task = np.array([100, 100])
hyp = "shared"
if hyp == "shared":
    image_task = np.array([100, 100])
else:
    image_task = np.array([100, 105])

re_formula = Formula("(1 + question | subject) + (1 + question | item)")
question_sd = [10, 12, 15, 18, 11] # must be n_q - 1
params = {'word.perceptual': 100, 'image.perceptual': 95, 'word.conceptual': 100, 'image.conceptual': 100, 'word.task': word_task, 'image.task': image_task,
        # noise parameters     
        'sd.item': 40,     'sd.question': question_sd[:n_question-1],    'sd.subject': 35,       "sd.modality": 30, "sd.error": 50, "sd.re_formula": str(re_formula),
        # correlations among random effects    
        "corr.subject": np.eye(n_question), 'corr.item':np.eye(n_question),
        # design parameters
        'n.subject': n_subject, 'n.question': n_question, 'n.item': n_item
}
wiscs.set_params(params, verbose=False)

DG = DataGenerator()
DG.fit_transform(seed=2027, verbose=False)
df = DG.to_pandas()

In [5]:
# df.to_csv("~/Desktop/test.csv", index=False)

In [4]:
# checking tag variables
# want to know whether this dataset follows Exp2 design or not
# and how we can recode to order variables to match Exp1 and Exp2 designs

df2 = df.copy()
np.random.seed(44)
subjects = df2['subject'].unique()
half_size = len(subjects) // 2
tags = np.array([0] * half_size + [1] * (len(subjects) - half_size))
np.random.shuffle(tags)  # Shuffle to randomize the order
subject_tags = dict(zip(subjects, tags))
df2['tag'] = df2['subject'].map(subject_tags)

In [5]:
# list each subject and their tag
subject_tags_list = df2[['subject', 'tag']].drop_duplicates().sort_values(by='subject')
print(subject_tags_list)

# list number of unique subjects in each tag
unique_tags = df2['tag'].unique()
print(f"Unique tags: {unique_tags}")
# list number of unique subjects in each tag
unique_subjects_per_tag = df2.groupby('tag')['subject'].nunique()
print(f"Unique subjects per tag: {unique_subjects_per_tag}")

       subject  tag
0            0    0
60           1    0
120          2    0
180          3    1
240          4    1
...        ...  ...
10500      175    1
10560      176    0
10620      177    1
10680      178    1
10740      179    0

[180 rows x 2 columns]
Unique tags: [0 1]
Unique subjects per tag: tag
0    90
1    90
Name: subject, dtype: int64


# Generate data with order effects

In [None]:
df2 = df.copy()
np.random.seed(44)
subjects = df2['subject'].unique()
half_size = len(subjects) // 2
tags = np.array([0] * half_size + [1] * (len(subjects) - half_size))
np.random.shuffle(tags)  # Shuffle to randomize the order
subject_tags = dict(zip(subjects, tags))
df2['tag'] = df2['subject'].map(subject_tags)

# Add `qorder`

# Define the recoding function
def recode_qorder(row):
    if row["tag"] == 0:
        if row["modality"] == "word":
            return 1 if row["question"] == 1 else 2
        else:  # image
            return 3 if row["question"] == 1 else 4
    else:  # tag == 1
        if row["modality"] == "image":
            return 1 if row["question"] == 2 else 2
        else:  # word
            return 3 if row["question"] == 2 else 4

# Apply to your dataframe
df2["qorder"] = df2.apply(recode_qorder, axis=1)

In [9]:
# apply order effects

def shift_rt(row, shift=300, noise_sd=50):
    # Determine if this is the first image block for the subject based on tag
    if row['modality'] == 'image':
        if (row['tag'] == 0 and row['qorder'] == 3) or (row['tag'] == 1 and row['qorder'] == 1):
            return row['rt'] + shift + np.random.normal(0, noise_sd)
    return row['rt']

df2['rt'] = df2.apply(shift_rt, axis=1)


In [13]:
df2['first_image_block'] = (
    ((df2['tag'] == 0) & (df2['qorder'] == 3)) |
    ((df2['tag'] == 1) & (df2['qorder'] == 1))
).astype(int)

In [14]:
df2.to_csv("wiscs_data_ordere.csv", index=False)

## Only generating order effects for a proportion of subjects

In [None]:
df3 = df.copy()
np.random.seed(44)
subjects = df3['subject'].unique()
half_size = len(subjects) // 2
tags = np.array([0] * half_size + [1] * (len(subjects) - half_size))
np.random.shuffle(tags)  # Shuffle to randomize the order
subject_tags = dict(zip(subjects, tags))
df3['tag'] = df3['subject'].map(subject_tags)
df3['qorder'] = np.where(df3['question'] == df2['tag'], 0, 1)

# subset subjects to receive the order effect
proportion_with_effect = 0.35
n_effect = int(proportion_with_effect * len(subjects))
np.random.seed(99)
subjects_with_effect = np.random.choice(subjects, size=n_effect, replace=False)
subjects_with_effect = set(subjects_with_effect)

# function to apply order effect only to some participants
def shift_rt(row, shift=300, noise_sd=50):
    if (
        row['subject'] in subjects_with_effect and
        row['modality'] == 'image' and
        row['question'] == row['tag']
    ):
        return row['rt'] + shift + np.random.normal(0, noise_sd)
    else:
        return row['rt']

# Apply order effect
df3['rt'] = df3.apply(shift_rt, axis=1)
df3.to_csv("wiscs_data_ordere_prop.csv", index=False)