# Generate Experiment Stimuli

In [15]:
# --- Imports ------------------------------------------------------------------

from itertools import product

import numpy as np
import pandas as pd


## Generate Stimuli by Sampling

In [18]:
anchors = [25, 50]
nearAnchors = [21, 22, 23, 24, 26, 27, 28, 29, 46, 47, 48, 49]
startPositions = [0, 25, 50, 75]
endPositions = [25, 50, 75, 100]

values = []
alignmentCategories = []
alignmentDistances = []
alignmentTypes = []
valueAlignments = []
anchorCategories = []
anchorDistances = []
round5Distances = []
positions = []

for selectedPart in range(15, 56):
    for startPosition in range(0, 100):
        endPosition = startPosition + selectedPart # calculate end position

        # remove impossible positions
        if endPosition > 100:
            continue

        # calculate minimum distance from alignment for both ends
        startDists = [startPosition, abs(startPosition - 25), abs(startPosition - 50), abs(startPosition - 75)]
        endDists = [abs(endPosition - 25), abs(endPosition - 50), abs(endPosition - 75), abs(endPosition - 100)]
        
        startAlignment = np.min(startDists)
        endAlignment = np.min(endDists)

        # determine which end is closer to alignment
        if startAlignment < endAlignment:
            alignmentSide = "start"
            alignmentPosition = startPositions[np.argmin(startDists)]
        elif startAlignment > endAlignment:
            alignmentSide = "end"
            alignmentPosition = endPositions[np.argmin(endDists)]
        else:
            alignmentSide = "both ends"
            startAligned = np.argmin(startDists)
            endAligned = np.argmin(endDists)
            if startAligned < endAligned:
                alignmentPosition = f"{startPositions[startAligned]} and {endPositions[endAligned]}"
            else:
                alignmentPosition = f"{startPositions[startAligned]} and {endPositions[endAligned]}"

        # determine the alignment category
        alignmentDistance = min(startAlignment, endAlignment)
        if alignmentDistance == 0:
            alignmentCategory = "aligned"
        elif alignmentDistance < 5:
            alignmentCategory = "near-align"
        else: 
            alignmentCategory = "far-align"

        # determine anchor category
        anchorDistance = min(abs(selectedPart - 25), abs(selectedPart - 50))
        if selectedPart in anchors:
            anchorCategory = "anchor"
        elif selectedPart in nearAnchors:
            anchorCategory = "near-anchor"
        else:
            anchorCategory = "far-anchor"

        # determine distance to the nearest 5
        round5Distance = abs(selectedPart - round(selectedPart / 5) * 5)

        # describe every possible selectedPart/alignment condition in the range
        valueAlignment = f"Value {selectedPart} with {alignmentSide} {alignmentDistance} off alignment with {alignmentPosition}."
        if valueAlignment not in valueAlignments:
            valueAlignments.append(valueAlignment)
            values.append(selectedPart)
            alignmentCategories.append(alignmentCategory)
            anchorCategories.append(anchorCategory)
            alignmentDistances.append(alignmentDistance)
            anchorDistances.append(anchorDistance)
            round5Distances.append(round5Distance)
            alignmentTypes.append(alignmentPosition)
            positions.append(startPosition)

df = pd.DataFrame({
    # "Description": valueAlignments,
    "selectedPart": values,
    "alignmentCategory": alignmentCategories,
    "anchorCategory": anchorCategories,
    "anchorDistance": anchorDistances,
    "alignmentDistance": alignmentDistances,
    "round5Distance": round5Distances,
    "alignmentType": alignmentTypes,
    "alignmentPosition": positions
})
df

Unnamed: 0,selectedPart,alignmentCategory,anchorCategory,anchorDistance,alignmentDistance,round5Distance,alignmentType,alignmentPosition
0,15,aligned,far-anchor,10,0,0,0,0
1,15,near-align,far-anchor,10,1,0,0,1
2,15,near-align,far-anchor,10,2,0,0,2
3,15,near-align,far-anchor,10,3,0,0,3
4,15,near-align,far-anchor,10,4,0,0,4
...,...,...,...,...,...,...,...,...
2297,55,near-align,far-anchor,5,4,0,100,41
2298,55,near-align,far-anchor,5,3,0,100,42
2299,55,near-align,far-anchor,5,2,0,100,43
2300,55,near-align,far-anchor,5,1,0,100,44


In [None]:
anchorCounts = {
  "anchor": 12, 
  "near-anchor": 36, 
  "far-anchor": 48
}

alignmentCounts = {
  "aligned": 24,
  "near-aligned": 36,
  "far-aligned": 36
}

participantIDs = range(1, 101)
chartTypes = ["line", "pie"]
rng = np.random.default_rng(123)

participantStimuli <- list()

for (pid in participantIDs) {
  participantDF <- data.frame()

  for (anchor in anchorCategories) {
    total <- anchorCounts[[anchor]]
    per_chart <- total / 2  # per chartType

    # Temporary variables for easier reference
    n_aligned <- ceiling(per_chart * 0.25)
    n_remaining <- per_chart - n_aligned
    n_near <- floor(n_remaining / 2)
    n_far <- per_chart - n_aligned - n_near



    for (chart in chartTypes) {
      for (align in names(alignmentCounts)) {
        n_to_sample <- alignmentCounts[[align]]

        if (anchor == "anchor") {
          # Split n_to_sample evenly between anchorValue 25 and 50
          n_each_anchor <- floor(n_to_sample / 2)
          leftover <- n_to_sample - 2 * n_each_anchor  # In case of odd number

          for (anchorValue in c(25, 50)) {
            give_extra_to_25 <- (pid %% 2 == 1)
            n_this <- n_each_anchor + ifelse((anchorValue == 25) == give_extra_to_25, leftover, 0)

            sample_pool <- allConditions %>%
              filter(anchorCategory == anchor,
                     alignmentCategory == align,
                     anchorValue == anchorValue)

            if (nrow(sample_pool) < n_this) {
              warning(paste("Not enough samples for:", anchor, align, chart, anchorValue))
            }

            sampled <- sample_pool %>%
              sample_n(n_this, replace = TRUE) %>%
              mutate(chartType = chart)

            participantDF <- bind_rows(participantDF, sampled)
          }

        } else {
          sample_pool <- allConditions %>%
            filter(anchorCategory == anchor,
                   alignmentCategory == align)

          if (nrow(sample_pool) < n_to_sample) {
            warning(paste("Not enough samples for:", anchor, align, chart))
          }

          sampled <- sample_pool %>%
            sample_n(n_to_sample, replace = TRUE) %>%
            mutate(chartType = chart)

          participantDF <- bind_rows(participantDF, sampled)
        }
      }
    }
  }

  participantDF$UID <- pid
  participantStimuli[[pid]] <- participantDF
}

stimuli <- bind_rows(participantStimuli)
stimuli$selectedLabel <- sample(LETTERS[1:7], nrow(stimuli), replace = TRUE)
stimuli

In [None]:
set.seed(123)
labels <- LETTERS[1:7]  # A to G

set.seed(123)
labels <- LETTERS[1:7]  # A to G

# Helper to get 'n' positive integers summing to 'target'
sample_partition <- function(n, target) {
  repeat {
    raw <- rbeta(n, 2, 10)
    scaled <- round(raw / sum(raw) * target)
    diff <- target - sum(scaled)
    if (diff != 0) scaled[1] <- scaled[1] + diff
    if (all(scaled > 0)) return(scaled)
  }
}

generate_parts_row <- function(row) {
  selectedPart <- as.numeric(row[["selectedPart"]])
  selectedLabel <- as.character(row[["selectedLabel"]])
  alignmentPos <- as.numeric(row[["alignmentPosition"]])

  otherLabels <- setdiff(labels, selectedLabel)
  remainingSpace <- 100 - selectedPart
  leftSpace <- alignmentPos
  rightSpace <- 100 - selectedPart - alignmentPos

  # Determine how many go left vs right
  leftCount <- round(6 * (leftSpace / (leftSpace + rightSpace)))
  rightCount <- 6 - leftCount

  # Adjust for edge cases
  if (leftCount == 0 && leftSpace > 0) {
    leftCount <- 1
    rightCount <- 5
  }
  if (rightCount == 0 && rightSpace > 0) {
    rightCount <- 1
    leftCount <- 5
  }

  # Sample values for each side
  leftVals <- if (leftCount > 0) sample_partition(leftCount, leftSpace) else numeric(0)
  rightVals <- if (rightCount > 0) sample_partition(rightCount, rightSpace) else numeric(0)

  # Combine full part values in left → selected → right order
  partValues <- c(leftVals, selectedPart, rightVals)

  # Assign labels and indices
  shuffledLabels <- sample(otherLabels)
  leftLabels <- if (leftCount > 0) shuffledLabels[1:leftCount] else character(0)
  rightLabels <- if (rightCount > 0) shuffledLabels[(leftCount + 1):(leftCount + rightCount)] else character(0)
  orderedLabels <- c(leftLabels, selectedLabel, rightLabels)

  values <- setNames(rep(NA_integer_, 7), labels)
  indexMap <- setNames(rep(NA_integer_, 7), paste0(labels, "_ind"))

  for (i in 0:6) {
    lab <- orderedLabels[i + 1]
    values[lab] <- partValues[i + 1]
    indexMap[paste0(lab, "_ind")] <- i
  }

  return(as.list(c(values, indexMap)))
}

newParts <- lapply(1:nrow(stimuli), function(i) generate_parts_row(stimuli[i, ]))
newParts_df <- as.data.frame(do.call(rbind, newParts), stringsAsFactors = FALSE)

for (col in c(LETTERS[1:7], paste0(LETTERS[1:7], "_ind"))) {
  newParts_df[[col]] <- as.numeric(newParts_df[[col]])
}

stimuli <- cbind(stimuli, newParts_df)
stimuli

In [None]:
# Step 1: Get part value columns and their index columns
part_cols <- LETTERS[1:7]             # A to G
index_cols <- paste0(part_cols, "_ind")  # A_ind to G_ind

# Step 2: Define a function to get pre-selected part sum and compare to alignmentPosition
check_alignment <- function(row) {
  selected_label <- row["selectedLabel"]
  selected_index <- as.numeric(row[paste0(selected_label, "_ind")])
  alignment_pos <- as.numeric(row["alignmentPosition"])
  
  # Extract all part indices from the row
  part_indices <- as.numeric(row[index_cols])
  part_values <- as.numeric(row[part_cols])
  
  # Find parts that are before the selected part index
  preceding_values <- part_values[part_indices < selected_index]
  
  # Return the sum of preceding values (should equal alignmentPosition)
  sum(preceding_values) == alignment_pos
}

# Step 3: Apply the check across all rows
alignment_correct <- apply(stimuli, 1, check_alignment)

# Step 4: Count how many rows fail the check
sum(!alignment_correct)

stimuli <- stimuli %>%
  group_by(UID) %>%
  slice_sample(prop = 1) %>%
  ungroup()

write.csv(stimuli, "data/stimuli.csv", row.names = FALSE)