In [149]:
# Import libraries
library(ggplot2)
library(dplyr)
library(ggdist)
library(patchwork)

In [150]:
# Load data
allConditions <- read.csv("data/allConditions.csv", header=T)
allConditions

selectedPart,alignmentCategory,anchorCategory,anchorDistance,alignmentDistance,round5Distance,alignmentType,alignmentPosition
<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<int>
15,aligned,far-anchor,10,0,0,0,0
15,near-aligned,far-anchor,10,1,0,0,1
15,near-aligned,far-anchor,10,2,0,0,2
15,near-aligned,far-anchor,10,3,0,0,3
15,near-aligned,far-anchor,10,4,0,0,4
15,far-from-aligned,far-anchor,10,5,0,0 and 25,5
15,near-aligned,far-anchor,10,4,0,25,6
15,near-aligned,far-anchor,10,3,0,25,7
15,near-aligned,far-anchor,10,2,0,25,8
15,near-aligned,far-anchor,10,1,0,25,9


In [151]:
library(dplyr)

participantIDs <- 1:100

# Define anchor categories and alignment categories
anchorCategories <- c("anchor", "near-anchor", "far-anchor")
alignmentCategories <- c("aligned", "near-aligned", "far-from-aligned")
chartTypes <- c("line", "pie")

# Sampling targets per anchor category
anchorCounts <- list(
  anchor = 12, 
  `near-anchor` = 36,
  `far-anchor` = 48
)

set.seed(123)
participantStimuli <- list()

for (pid in participantIDs) {
  participantDF <- data.frame()

  for (anchor in anchorCategories) {
    total <- anchorCounts[[anchor]]
    per_chart <- total / 2  # per chartType

    # Temporary variables for easier reference
    n_aligned <- ceiling(per_chart * 0.25)
    n_remaining <- per_chart - n_aligned
    n_near <- floor(n_remaining / 2)
    n_far <- per_chart - n_aligned - n_near

    alignmentCounts <- c(
      aligned = n_aligned,
      `near-aligned` = n_near,
      `far-from-aligned` = n_far
    )

    for (chart in chartTypes) {
      for (align in names(alignmentCounts)) {
        n_to_sample <- alignmentCounts[[align]]

        if (anchor == "anchor") {
          # Split n_to_sample evenly between anchorValue 25 and 50
          n_each_anchor <- floor(n_to_sample / 2)
          leftover <- n_to_sample - 2 * n_each_anchor  # In case of odd number

          for (anchorValue in c(25, 50)) {
            give_extra_to_25 <- (pid %% 2 == 1)
            n_this <- n_each_anchor + ifelse((anchorValue == 25) == give_extra_to_25, leftover, 0)

            sample_pool <- allConditions %>%
              filter(anchorCategory == anchor,
                     alignmentCategory == align,
                     anchorValue == anchorValue)

            if (nrow(sample_pool) < n_this) {
              warning(paste("Not enough samples for:", anchor, align, chart, anchorValue))
            }

            sampled <- sample_pool %>%
              sample_n(n_this, replace = TRUE) %>%
              mutate(chartType = chart)

            participantDF <- bind_rows(participantDF, sampled)
          }

        } else {
          sample_pool <- allConditions %>%
            filter(anchorCategory == anchor,
                   alignmentCategory == align)

          if (nrow(sample_pool) < n_to_sample) {
            warning(paste("Not enough samples for:", anchor, align, chart))
          }

          sampled <- sample_pool %>%
            sample_n(n_to_sample, replace = TRUE) %>%
            mutate(chartType = chart)

          participantDF <- bind_rows(participantDF, sampled)
        }
      }
    }
  }

  participantDF$UID <- pid
  participantStimuli[[pid]] <- participantDF
}

stimuli <- bind_rows(participantStimuli)
stimuli$selectedLabel <- sample(LETTERS[1:7], nrow(stimuli), replace = TRUE)
stimuli

selectedPart,alignmentCategory,anchorCategory,anchorDistance,alignmentDistance,round5Distance,alignmentType,alignmentPosition,chartType,UID,selectedLabel
<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<int>,<chr>,<int>,<chr>
50,aligned,anchor,0,0,0,50 and 100,50,line,1,B
50,aligned,anchor,0,0,0,50 and 100,50,line,1,G
50,near-aligned,anchor,0,3,0,0 and 50,3,line,1,C
25,near-aligned,anchor,0,3,0,75 and 100,72,line,1,D
25,far-from-aligned,anchor,0,7,0,0 and 25,7,line,1,D
50,far-from-aligned,anchor,0,11,0,25 and 75,14,line,1,C
25,aligned,anchor,0,0,0,25 and 50,25,pie,1,D
50,aligned,anchor,0,0,0,25 and 75,25,pie,1,D
25,near-aligned,anchor,0,2,0,50 and 75,48,pie,1,E
25,near-aligned,anchor,0,4,0,25 and 50,21,pie,1,F


In [152]:
set.seed(123)
labels <- LETTERS[1:7]  # A to G

set.seed(123)
labels <- LETTERS[1:7]  # A to G

# Helper to get 'n' positive integers summing to 'target'
sample_partition <- function(n, target) {
  repeat {
    raw <- rbeta(n, 2, 10)
    scaled <- round(raw / sum(raw) * target)
    diff <- target - sum(scaled)
    if (diff != 0) scaled[1] <- scaled[1] + diff
    if (all(scaled > 0)) return(scaled)
  }
}

generate_parts_row <- function(row) {
  selectedPart <- as.numeric(row[["selectedPart"]])
  selectedLabel <- as.character(row[["selectedLabel"]])
  alignmentPos <- as.numeric(row[["alignmentPosition"]])

  otherLabels <- setdiff(labels, selectedLabel)
  remainingSpace <- 100 - selectedPart
  leftSpace <- alignmentPos
  rightSpace <- 100 - selectedPart - alignmentPos

  # Determine how many go left vs right
  leftCount <- round(6 * (leftSpace / (leftSpace + rightSpace)))
  rightCount <- 6 - leftCount

  # Adjust for edge cases
  if (leftCount == 0 && leftSpace > 0) {
    leftCount <- 1
    rightCount <- 5
  }
  if (rightCount == 0 && rightSpace > 0) {
    rightCount <- 1
    leftCount <- 5
  }

  # Sample values for each side
  leftVals <- if (leftCount > 0) sample_partition(leftCount, leftSpace) else numeric(0)
  rightVals <- if (rightCount > 0) sample_partition(rightCount, rightSpace) else numeric(0)

  # Combine full part values in left → selected → right order
  partValues <- c(leftVals, selectedPart, rightVals)

  # Assign labels and indices
  shuffledLabels <- sample(otherLabels)
  leftLabels <- if (leftCount > 0) shuffledLabels[1:leftCount] else character(0)
  rightLabels <- if (rightCount > 0) shuffledLabels[(leftCount + 1):(leftCount + rightCount)] else character(0)
  orderedLabels <- c(leftLabels, selectedLabel, rightLabels)

  values <- setNames(rep(NA_integer_, 7), labels)
  indexMap <- setNames(rep(NA_integer_, 7), paste0(labels, "_ind"))

  for (i in 0:6) {
    lab <- orderedLabels[i + 1]
    values[lab] <- partValues[i + 1]
    indexMap[paste0(lab, "_ind")] <- i
  }

  return(as.list(c(values, indexMap)))
}

newParts <- lapply(1:nrow(stimuli), function(i) generate_parts_row(stimuli[i, ]))
newParts_df <- as.data.frame(do.call(rbind, newParts), stringsAsFactors = FALSE)

for (col in c(LETTERS[1:7], paste0(LETTERS[1:7], "_ind"))) {
  newParts_df[[col]] <- as.numeric(newParts_df[[col]])
}

stimuli <- cbind(stimuli, newParts_df)
stimuli

selectedPart,alignmentCategory,anchorCategory,anchorDistance,alignmentDistance,round5Distance,alignmentType,alignmentPosition,chartType,UID,⋯,E,F,G,A_ind,B_ind,C_ind,D_ind,E_ind,F_ind,G_ind
<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<int>,<chr>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
50,aligned,anchor,0,0,0,50 and 100,50,line,1,⋯,19,7,9,3,6,0,1,2,4,5
50,aligned,anchor,0,0,0,50 and 100,50,line,1,⋯,11,13,50,4,3,2,1,0,5,6
50,near-aligned,anchor,0,3,0,0 and 50,3,line,1,⋯,9,3,21,0,2,1,6,4,5,3
25,near-aligned,anchor,0,3,0,75 and 100,72,line,1,⋯,3,19,4,0,1,4,5,6,3,2
25,far-from-aligned,anchor,0,7,0,0 and 25,7,line,1,⋯,14,11,7,4,5,3,1,2,6,0
50,far-from-aligned,anchor,0,11,0,25 and 75,14,line,1,⋯,8,4,3,5,1,2,6,0,3,4
25,aligned,anchor,0,0,0,25 and 50,25,pie,1,⋯,24,7,4,0,6,1,2,5,3,4
50,aligned,anchor,0,0,0,25 and 75,25,pie,1,⋯,10,8,9,6,4,1,3,2,0,5
25,near-aligned,anchor,0,2,0,50 and 75,48,pie,1,⋯,25,18,19,2,3,6,1,4,5,0
25,near-aligned,anchor,0,4,0,25 and 50,21,pie,1,⋯,7,25,12,1,4,0,6,5,2,3


In [153]:
# # Make sure A to G are numeric
# part_cols <- LETTERS[1:7]

# stimuli[part_cols] <- lapply(stimuli[part_cols], as.numeric)

# # Now compute row sums safely
# stimuli$partSum <- rowSums(stimuli[part_cols])

# # Count how many do not sum to 100
# sum(stimuli$partSum != 100)


In [154]:
# Step 1: Get part value columns and their index columns
part_cols <- LETTERS[1:7]             # A to G
index_cols <- paste0(part_cols, "_ind")  # A_ind to G_ind

# Step 2: Define a function to get pre-selected part sum and compare to alignmentPosition
check_alignment <- function(row) {
  selected_label <- row["selectedLabel"]
  selected_index <- as.numeric(row[paste0(selected_label, "_ind")])
  alignment_pos <- as.numeric(row["alignmentPosition"])
  
  # Extract all part indices from the row
  part_indices <- as.numeric(row[index_cols])
  part_values <- as.numeric(row[part_cols])
  
  # Find parts that are before the selected part index
  preceding_values <- part_values[part_indices < selected_index]
  
  # Return the sum of preceding values (should equal alignmentPosition)
  sum(preceding_values) == alignment_pos
}

# Step 3: Apply the check across all rows
alignment_correct <- apply(stimuli, 1, check_alignment)

# Step 4: Count how many rows fail the check
sum(!alignment_correct)

In [155]:
sum(stimuli$selectedPart != mapply(function(label, row) stimuli[row, label], 
                                        stimuli$selectedLabel, 
                                        seq_len(nrow(stimuli))))


In [156]:
library(dplyr)

stimuli <- stimuli %>%
  group_by(UID) %>%
  slice_sample(prop = 1) %>%
  ungroup()


In [157]:
stimuli

selectedPart,alignmentCategory,anchorCategory,anchorDistance,alignmentDistance,round5Distance,alignmentType,alignmentPosition,chartType,UID,⋯,E,F,G,A_ind,B_ind,C_ind,D_ind,E_ind,F_ind,G_ind
<int>,<chr>,<chr>,<int>,<int>,<int>,<chr>,<int>,<chr>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
40,aligned,far-anchor,10,0,0,75,35,pie,1,⋯,9,16,40,5,1,2,6,3,0,4
22,aligned,near-anchor,3,0,2,0,0,line,1,⋯,19,22,11,6,5,3,4,1,0,2
22,aligned,near-anchor,3,0,2,0,0,pie,1,⋯,15,9,14,4,1,0,5,2,6,3
50,aligned,anchor,0,0,0,50 and 100,50,line,1,⋯,19,7,9,3,6,0,1,2,4,5
25,far-from-aligned,anchor,0,7,0,25 and 50,18,pie,1,⋯,7,25,10,2,6,0,4,3,1,5
16,near-aligned,far-anchor,9,1,1,100,83,pie,1,⋯,1,32,12,5,0,2,1,6,4,3
17,aligned,far-anchor,8,0,2,50,50,line,1,⋯,13,14,2,5,4,6,1,0,2,3
52,near-aligned,far-anchor,2,3,2,25,28,line,1,⋯,3,9,11,0,1,4,5,3,6,2
20,aligned,far-anchor,5,0,0,0,0,line,1,⋯,22,19,22,2,6,0,4,5,3,1
22,far-from-aligned,near-anchor,3,5,2,75,58,pie,1,⋯,20,9,3,4,1,6,5,2,3,0


In [158]:
write.csv(stimuli, "data/stimuli.csv", row.names = FALSE)