In [39]:
# Read chengyu list from txt file, keep only 4-character entries
chengyu_txt_path = "/Users/yufang/WM_load/word_database_txt/four_3_syllable_words.txt"
with open(chengyu_txt_path, "r", encoding="utf-8") as f:
    chengyu_list = [line.strip() for line in f if len(line.strip()) == 3]

# Combine chengyu_list and four_char_words without duplication
combined_list = chengyu_list

print(f"Total number of unique 4-character words (chengyu + four_char_words): {len(combined_list)}")

Total number of unique 4-character words (chengyu + four_char_words): 159


In [40]:
import random
import pandas as pd

# ----------------------
# HYPERPARAMETERS
# ----------------------
N_positions = 3                # Number of possible cue positions in a word (e.g., 4 for 4-char words)
N_trials_per_position = 41     # Number of trials for each cue position type
N_words_each_trial = 4         # Number of words in each trial (e.g., 3)
N_words = len(chengyu_list)    # Total number of available words

words_list = chengyu_list.copy()
random.shuffle(words_list)

# ----------------------
# COLUMN NAMES
# ----------------------
word_columns = [f"W{i+1}" for i in range(N_words_each_trial)]

# ----------------------
# CONSTRAINTS
# ----------------------
# - No trial should have duplicated words
# - Each (word, cue position) combination can appear at most once
# - Don't cue position where the index of character and index of word is the same (i.e., don't cue 1st char in 1st word, 2nd char in 2nd word, etc.)

# ----------------------
# TRACKERS
# ----------------------
used_word_cuepos = set()  # (word, cue_pos) pairs used
used_trials = set()       # frozenset of words in a trial, to avoid duplicate trials

trials = []

# ----------------------
# STEP 1: Generate trials for each cue position type
# ----------------------
# The original code creates 40 trials for (0,1) and then for every (word_idx, char_idx) where word_idx != char_idx,
# it creates 40 more for each, including (1,1), (2,2), (3,3) -- but those are excluded by the if word_idx != char_idx.
# So, let's print out the cue_types to see what's happening.

cue_types = []
# First N_trials_per_position: cue second char of first word
cue_types.extend([(0, 1)] * N_trials_per_position)

# For the rest, enumerate all valid (word_idx, char_idx) pairs, except where word_idx == char_idx
for word_idx in range(N_words_each_trial):
    for char_idx in range(N_positions):
        if word_idx != char_idx:
            cue_types.extend([(word_idx, char_idx)] * N_trials_per_position)

# Let's check how many times (0,1) appears in cue_types
from collections import Counter
cue_type_counts = Counter(cue_types)
print("Cue type counts:")
for k, v in sorted(cue_type_counts.items()):
    print(f"{k}: {v}")

# The reason (0,1) appears 80 times is:
# - First, you explicitly add 40 of (0,1)
# - Then, in the nested loop, for word_idx=0, char_idx=1, you add another 40 of (0,1)
#   (since word_idx != char_idx, so (0,1) is valid and added again)
# So, (0,1) is added twice, for a total of 80.

# To fix this, you should not add (0,1) in both places.
# Solution: Only add (0,1) in the nested loop, and remove the explicit first 40.
# Or, if you want the first 40 to be "special", then in the nested loop, skip (0,1).

# Here's the corrected code:

cue_types = []
# First N_trials_per_position: cue second char of first word
cue_types.extend([(0, 1)] * N_trials_per_position)

# For the rest, enumerate all valid (word_idx, char_idx) pairs, except where word_idx != char_idx and (word_idx, char_idx) != (0,1)
for word_idx in range(N_words_each_trial):
    for char_idx in range(N_positions):
        if word_idx != char_idx and not (word_idx == 0 and char_idx == 1):
            cue_types.extend([(word_idx, char_idx)] * N_trials_per_position)

# Now, (0,1) will only appear 40 times.

# Let's check again
cue_type_counts = Counter(cue_types)
print("Corrected cue type counts:")
for k, v in sorted(cue_type_counts.items()):
    print(f"{k}: {v}")

# Total number of trials to generate
N_total_trials = len(cue_types)

# ----------------------
# STEP 2: Generate trials
# ----------------------
# To ensure we can always find enough valid trials, we may need to try multiple times
attempts = 0
max_attempts = N_total_trials * 100

while len(trials) < N_total_trials and attempts < max_attempts:
    attempts += 1
    # Randomly sample N_words_each_trial distinct words
    trial_words = tuple(random.sample(words_list, N_words_each_trial))
    if len(set(trial_words)) < N_words_each_trial:
        continue  # skip if not all words are unique

    trial_set = frozenset(trial_words)
    if trial_set in used_trials:
        continue  # skip duplicate trial

    # Get the cue type for this trial
    trial_idx = len(trials)
    word_idx, char_idx = cue_types[trial_idx]

    word_for_cue = trial_words[word_idx]
    # Check if this (word, cue_pos) has been used
    if (word_for_cue, char_idx) in used_word_cuepos:
        continue

    # All constraints satisfied, add trial
    used_trials.add(trial_set)
    used_word_cuepos.add((word_for_cue, char_idx))
    cue_char = word_for_cue[char_idx]
    trial_dict = {col: w for col, w in zip(word_columns, trial_words)}
    trial_dict['Cue'] = cue_char
    trial_dict['Cue_Word'] = word_idx + 1  # 1-based index
    trial_dict['Cue_Pos'] = char_idx
    trials.append(trial_dict)

if len(trials) < N_total_trials:
    raise RuntimeError(f"Could not generate enough valid trials ({len(trials)}/{N_total_trials}) with the given constraints.")

# ----------------------
# STEP 3: Save to DataFrame
# ----------------------
results = pd.DataFrame(trials)
# Reorder columns for clarity
cols = word_columns + ['Cue_Word', 'Cue_Pos', 'Cue']
results = results[cols]

results.to_csv("/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/four_3_syllable_trials.csv", index=False, encoding="utf-8-sig")

print(f"Saved {len(results)} trials")
print("First few trials:")
print(results.head())


Cue type counts:
(0, 1): 82
(0, 2): 41
(1, 0): 41
(1, 2): 41
(2, 0): 41
(2, 1): 41
(3, 0): 41
(3, 1): 41
(3, 2): 41
Corrected cue type counts:
(0, 1): 41
(0, 2): 41
(1, 0): 41
(1, 2): 41
(2, 0): 41
(2, 1): 41
(3, 0): 41
(3, 1): 41
(3, 2): 41
Saved 369 trials
First few trials:
    W1   W2   W3   W4  Cue_Word  Cue_Pos Cue
0  重要性  运输车  科学馆  销售额         1        1   要
1  保险箱  高利贷  纪念碑  参谋掌         1        1   险
2  营业员  太阳能  神经元  责任感         1        1   业
3  抗生素  保险箱  黑板报  营业员         1        1   生
4  锦标赛  主人翁  自来水  科学家         1        1   标


In [None]:
import csv
import random

# File paths
input_csv = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_4_syllable_exp.csv"
output_csv = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_4_syllable_exp_with_blocks.csv"
input_csv = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_3_syllable_exp.csv"
output_csv = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_3_syllable_exp_with_blocks.csv"
input_csv = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/four_3_syllable_exp.csv"
output_csv = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/four_3_syllable_exp_with_blocks.csv"

# Number of trials per block
N = 41

# Read all rows from the input CSV
with open(input_csv, 'r', encoding='utf-8') as infile:
    reader = list(csv.reader(infile))
    header = reader[0]
    data_rows = reader[1:]

# Shuffle the data rows randomly
random.shuffle(data_rows)

# Assign block numbers
for idx, row in enumerate(data_rows):
    block_num = idx // N + 1
    row.append(str(block_num))

# Write to the new CSV with the 'block' column
with open(output_csv, 'w', encoding='utf-8', newline='') as outfile:
    writer = csv.writer(outfile)
    # Write header with new 'block' column
    writer.writerow(header + ['block'])
    # Write data rows with block numbers
    writer.writerows(data_rows)

print(f"Randomized and added 'block' column to {output_csv}")


Randomized and added 'block' column to /Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/four_3_syllable_trials_with_blocks.csv


# Three Syllable words processing

In [None]:
import csv

def extract_distinct_words(csv_file, output_file):
    """
    Read CSV file and extract all distinct words from W1, W2, W3 columns.
    Save the distinct words to a text file.
    """
    distinct_words = set()
    
    # Read the CSV file
    with open(csv_file, 'r', encoding='utf-8') as file:
        # Skip the header row
        next(file)
        
        csv_reader = csv.reader(file)
        
        for row in csv_reader:
            if len(row) >= 3:  # Ensure row has at least 3 columns
                # Extract W1, W2, W3 (columns 0, 1, 2)
                w1 = row[0].strip()
                w2 = row[1].strip()
                w3 = row[2].strip()
                
                # Add non-empty words to the set
                if w1:
                    distinct_words.add(w1)
                if w2:
                    distinct_words.add(w2)
                if w3:
                    distinct_words.add(w3)
    
    # Sort the distinct words for better readability
    sorted_words = sorted(distinct_words)
    
    # Write to output file
    with open(output_file, 'w', encoding='utf-8') as file:
        for word in sorted_words:
            file.write(word + '\n')
    
    print(f"Extracted {len(distinct_words)} distinct words.")
    print(f"Words saved to: {output_file}")
    
    return sorted_words


# csv_filename = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_3_syllable_exp.csv" 
# output_filename = "/Users/yufang/WM_load/word_database_txt/three_3_syllable_words.txt"

csv_filename = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/four_syllable_trials.csv" 
output_filename = "/Users/yufang/WM_load/word_database_txt/three_4_syllable_words.txt"

csv_filename = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/four_3_syllable_trials.csv"
output_filename = "/Users/yufang/WM_load/word_database_txt/four_3_syllable_words.txt"

try:
    words = extract_distinct_words(csv_filename, output_filename)
    print("\nDistinct words:")
    for word in words:
        print(word)
except FileNotFoundError:
    print(f"Error: File '{csv_filename}' not found.")
except Exception as e:
    print(f"Error: {e}")

Extracted 146 distinct words.
Words saved to: /Users/yufang/WM_load/word_database_txt/three_4_syllable_words.txt

Distinct words:
一举两得
一往无前
一成不变
一诺千金
一败涂地
七步成诗
万无一失
万紫千红
三顾茅庐
东施效颦
举一反三
举案齐眉
乐善好施
五湖四海
仁至义尽
信守不渝
光明正大
光明磊落
凿壁偷光
刮目相看
刻舟求剑
前功尽弃
力争上游
功亏一篑
功败垂成
势不可挡
勇往直前
十拿九稳
千军万马
千载难逢
半途而废
博古通今
卧薪尝胆
叶公好龙
同甘共苦
后生可畏
唇亡齿寒
唇枪舌剑
喜出望外
囊萤映雪
四面楚歌
囫囵吞枣
围魏救赵
国泰民安
声势浩大
夜以继日
大公无私
大张旗鼓
大雪纷飞
天作之合
奋发图强
如释重负
孜孜不倦
学富五车
学而不厌
守口如瓶
守株待兔
安居乐业
完璧归赵
家喻户晓
对牛弹琴
山崩地裂
山清水秀
废寝忘食
得不偿失
心心相印
心花怒放
忠心耿耿
患难与共
悬梁刺股
惊弓之鸟
成竹在胸
所向披靡
才高八斗
持之以恒
指鹿为马
排山倒海
掩耳盗铃
揭竿而起
旗开得胜
日出而作
春暖花开
望梅止渴
欣欣向荣
比翼双飞
气势磅礴
水滴石穿
波澜壮阔
洛阳纸贵
浩浩荡荡
海誓山盟
温故知新
滥竽充数
滥竽充术
班门弄斧
琴瑟和鸣
画蛇添足
画龙点睛
疾如雷霆
白头偕老
白雪皑皑
目不暇接
盲人摸象
相濡以沫
破釜沉舟
硕果累累
秋高气爽
程门立雪
笨鸟先飞
精益求精
繁花似锦
纸上谈兵
绿树成荫
翻天覆地
肝胆相照
背水一战
胸有成竹
自相矛盾
至死不渝
舍己为人
舍生取义
草木皆兵
草船借箭
触类旁通
言而有信
负荆请罪
赤壁鏖兵
赤胆忠心
走马观花
运筹帷幄
退避三舍
釜底抽薪
铁杵成针
锐不可当
门可罗雀
闻鸡起舞
雷厉风行
雷霆万钧
青出于蓝
风和日丽
风起云涌
风驰电掣
马到成功
鸟语花香
鸾凤和鸣
鹤立鸡群


In [29]:
import pandas as pd
import re

def extract_cue_position(row):
    """
    Find the position of the cue character in W1.
    Returns (word_position, char_position) where:
    - word_position: 1 for W1, 2 for W2, 3 for W3
    - char_position: 0-based index of character within the word
    """
    cue = row['Cue']
    
    # Check W1 first
    if cue in row['W1']:
        return 1, row['W1'].index(cue)
    # Check W2
    elif cue in row['W2']:
        return 2, row['W2'].index(cue)
    # Check W3
    elif cue in row['W3']:
        return 3, row['W3'].index(cue)
    
    return None, None

def transform_csv(input_file, output_file):
    # Read the CSV, skipping empty columns
    df = pd.read_csv(input_file)
    
    # Remove unnamed/empty columns
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    
    # Keep only the necessary columns
    df = df[['W1', 'W2', 'W3', 'Cue']]
    
    # Remove any rows with missing values
    df = df.dropna()
    
    # Extract cue position information
    df[['Cue_Word', 'Cue_Pos']] = df.apply(
        lambda row: pd.Series(extract_cue_position(row)), 
        axis=1
    )
    
    # Reorder columns to match desired output
    df = df[['W1', 'W2', 'W3', 'Cue_Word', 'Cue_Pos', 'Cue']]
    
    # Save to new CSV
    df.to_csv(output_file, index=False)
    print(f"Transformation complete! Output saved to {output_file}")
    print(f"\nProcessed {len(df)} rows")

# Usage
if __name__ == "__main__":
    input_file = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_3_syllable_exp.csv"  # Change to your input file name
    output_file = "/Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_3_syllable_exp_new.csv"  # Change to your desired output file name
    
    transform_csv(input_file, output_file)

Transformation complete! Output saved to /Users/yufang/WM_load/Exp1_Syllable_Sequence_and_Cueing_Syllable_List/three_3_syllable_exp_new.csv

Processed 432 rows
