In [2]:
content = """
Dear participant,

Thank you for participating in our study titled "[Study title]"! Below [is/are] the [Number of gift card][$ Amount & Retailer] eGift card [code/codes] as compensation for your time:

[Code]

Please email us back when you have received [this code/these codes], so we can keep track for our records. If you have any trouble accessing or using [this code/these codes], please let us know. 

Best,
The Brain Bridge Lab
"""

with open('compensation_message.txt', 'w') as file:
    file.write(content)


In [1]:
import pandas as pd
import random
import string

# Function to generate a sample eGift card code
def generate_code():
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=4)) + '-' + \
           ''.join(random.choices(string.ascii_uppercase + string.digits, k=6)) + '-' + \
           ''.join(random.choices(string.ascii_uppercase + string.digits, k=4))

# Generate sample data
num_rows = 10  # Number of rows to generate
data = []

for _ in range(num_rows):
    code = generate_code()
    row = {
        "Code": code,
        "Amount": 5,
        "Retailer": "Amazon",
        "Experiment": "",
        "Subject ID": "",
        "Experimenter": "",
        "Date Given": "",
        "Sent": 0
    }
    data.append(row)

# Convert data to DataFrame
df = pd.DataFrame(data)

# Write data to Parquet file
df.to_parquet('egift_cards.parquet', index=False)

print("Parquet file generated: egift_cards.parquet")


Parquet file generated: egift_cards.parquet


In [3]:
import pandas as pd
import random
import string

# Function to generate random consent (0 or 1)
def generate_consent():
    return random.choice([0, 1])

# Function to generate random age (14-99)
def generate_age():
    return random.randint(14, 99)

# Function to generate random sex
def generate_sex():
    return random.choice(['Female', 'Male', 'Other/No Response'])

# Function to generate random ethnicity
def generate_ethnicity():
    return random.choice(['Hispanic/Latino', 'Not Hispanic/Latino', 'No Response'])

# Function to generate random race
def generate_race():
    return random.choice(['American Indian/Alaska Native', 'Asian', 'Native Hawaiian/Pacific Islander', 
                          'Black/African American', 'White/Caucasian', 'More than one race', 'No Response'])

# Function to generate random VVIQ responses (1-4)
def generate_vviq_responses():
    return [random.randint(1, 4) for _ in range(16)]

# Function to generate random ideal participation (0 or 1)
def generate_ideal_participation():
    return random.choice([0, 1])

# Function to generate random attention check task
def generate_attention_check_task():
    return random.choice(['Pictures', 'Abstract words', 'Concrete words', 'Numbers', 'Symbols'])

# Main function to generate the Parquet file
def generate_parquet_file():
    # Define the column names
    columns = ['EmailAddress', 'FutureContactConsent', 'Age', 'Sex', 'Ethnicity', 'Race'] + \
              [f'VVIQ_Q{i+1}' for i in range(16)] + ['IdealParticipation', 'AttentionCheckTask']
    
    # Define the email addresses
    emails = ['ymuhannah@uchicago.edu', 'aliceluo@uchicago.edu']
    
    # Prepare the data
    data = []
    for email in emails:
        consent = generate_consent()
        age = generate_age()
        sex = generate_sex()
        ethnicity = generate_ethnicity()
        race = generate_race()
        vviq_responses = generate_vviq_responses()
        ideal_participation = generate_ideal_participation()
        attention_check_task = generate_attention_check_task()

        # Define the row data
        row = [email, consent, age, sex, ethnicity, race] + vviq_responses + [ideal_participation, attention_check_task]
        data.append(row)
    
    # Convert data to DataFrame
    df = pd.DataFrame(data, columns=columns)
    
    # Write to Parquet file
    df.to_parquet('participants_data.parquet', index=False)

    print("Parquet file generated: participants_data.parquet")

# Generate the Parquet file
generate_parquet_file()



Parquet file generated: participants_data.parquet


In [2]:
import boto3

# Create S3 client
s3 = boto3.client('s3')

# Define bucket names
bucket1 = 'sending-egift-cards-parquet'
bucket2 = 'testing-exp-data-parquet'

# Define file paths
egift_cards_file = 'egift_cards.parquet'
participants_data_file = 'participants_data.parquet'
compensation_message_file = 'compensation_message.txt'

# Function to create S3 bucket
def create_bucket(bucket_name):
    try:
        s3.create_bucket(Bucket=bucket_name)
        print(f"Bucket '{bucket_name}' created successfully.")
    except Exception as e:
        print(f"Error creating bucket '{bucket_name}': {e}")

# Function to upload file to S3 bucket
def upload_file_to_bucket(bucket_name, file_name):
    try:
        s3.upload_file(file_name, bucket_name, file_name)
        print(f"File '{file_name}' uploaded to bucket '{bucket_name}' successfully.")
    except Exception as e:
        print(f"Error uploading file '{file_name}' to bucket '{bucket_name}': {e}")

# Create two S3 buckets
create_bucket(bucket1)
create_bucket(bucket2)

# Upload files to respective S3 buckets
upload_file_to_bucket(bucket1, egift_cards_file)
upload_file_to_bucket(bucket1, compensation_message_file)
upload_file_to_bucket(bucket2, participants_data_file)


Bucket 'sending-egift-cards-parquet' created successfully.
Bucket 'testing-exp-data-parquet' created successfully.
File 'egift_cards.parquet' uploaded to bucket 'sending-egift-cards-parquet' successfully.
File 'compensation_message.txt' uploaded to bucket 'sending-egift-cards-parquet' successfully.
File 'participants_data.parquet' uploaded to bucket 'testing-exp-data-parquet' successfully.


In [3]:
import boto3
import pandas as pd
from io import BytesIO

# Create S3 client
s3 = boto3.client('s3')

# Define bucket name and file path
bucket_name = 'testing-exp-data-parquet'
file_name = 'participants_data.parquet'

# Download the existing Parquet file from S3
obj = s3.get_object(Bucket=bucket_name, Key=file_name)
data = obj['Body'].read()

# Read the Parquet file into a DataFrame
df = pd.read_parquet(BytesIO(data))

# Add the new column with a default value of 0
df['egift-card-sent'] = 0

# Save the modified DataFrame back to a Parquet file
buffer = BytesIO()
df.to_parquet(buffer, index=False)
buffer.seek(0)

# Upload the modified Parquet file back to S3
s3.put_object(Bucket=bucket_name, Key=file_name, Body=buffer.getvalue())

print("Parquet file updated and uploaded successfully.")


Parquet file updated and uploaded successfully.
