In [3]:
import os 

# Move to the survey results directory 
data_dir = '../../data'
os.chdir(data_dir)

ai_study = "AI_Study_Finalized.csv"
track_a_file = "track_a.csv"
track_b_file = "track_b.csv"



In [5]:
########
#  A.1.2 
########

import pandas as pd
import plotly.express as px

# Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Skip the first two rows
df = df.iloc[2:].reset_index(drop=True)

# Function to split responses, keeping special categories together
def split_responses(response):
    if pd.isna(response):
        return []
    parts = str(response).split(',')
    result = []
    i = 0
    chatbot_category_added = False
    while i < len(parts):
        if i+1 < len(parts) and parts[i].strip() == "Other" and parts[i+1].strip() == "please explain":
            result.append("Other, please explain")
            i += 2
        elif i+1 < len(parts) and parts[i].strip() == "Programming analysis (e.g." and parts[i+1].strip().startswith("Code completion or code generation"):
            result.append("Programming analysis (e.g., Code completion or code generation)")
            i += 2
        elif not chatbot_category_added and (parts[i].strip() in ["Chatbots", "Personal Assistants or Recommender Systems"] or (i+1 < len(parts) and parts[i].strip() == "Personal Assistants" and parts[i+1].strip() == "or Recommender Systems")):
            result.append("Chatbots, Personal Assistants or Recommender Systems")
            chatbot_category_added = True
            i += 2 if parts[i].strip() == "Personal Assistants" else 1
        elif chatbot_category_added and parts[i].strip() in ["Chatbots", "Personal Assistants", "Recommender Systems", "Personal Assistants or Recommender Systems"]:
            i += 1  # Skip this part as we've already added the category
        else:
            result.append(parts[i].strip())
            i += 1
    return result

# Apply the split_responses function to the 'A.1.2' column
df['A.1.2_split'] = df['A.1.2'].apply(split_responses)

# Count the number of responses for each option
option_counts = df['A.1.2_split'].explode().value_counts()

# Calculate the total number of valid responses (rows with non-empty lists)
total_valid_responses = df['A.1.2_split'].apply(len).ne(0).sum()

# Calculate percentages
option_percentages = (option_counts / total_valid_responses * 100).round(2)

# Create a DataFrame for plotting
plot_df = pd.DataFrame({
    'Option': option_counts.index,
    'Count': option_counts.values,
    'Percentage': option_percentages.values
})

# Sort the DataFrame by Count in descending order
plot_df = plot_df.sort_values('Count', ascending=False)

# Create the bar plot
fig = px.bar(plot_df, x='Option', y='Count',
             title='Survey Results for A.1.2',
             labels={'Option': 'AI Application', 'Count': 'Number of Selections'},
             color='Percentage',
             color_continuous_scale='Blues',
             text=plot_df['Percentage'].apply(lambda x: f'{x:.1f}%'))

# Adjust the layout for better readability
fig.update_layout(
    xaxis_tickangle=45,
    xaxis_title="AI Application",
    yaxis_title="Number of Selections",
    uniformtext_minsize=8,
    uniformtext_mode='hide'
)

# Update traces to position and style the text
fig.update_traces(texttemplate='%{text}', textposition='inside')

# Adjust y-axis to ensure all labels are visible
fig.update_layout(yaxis_range=[0, plot_df['Count'].max() * 1.1])

# Show the plot
fig.show(renderer="browser")

# Statistical Analysis
total_options = len(plot_df)
average_percentage = plot_df['Percentage'].mean()
most_common = plot_df.iloc[0]
least_common = plot_df.iloc[-1]

print("A.1.2: What are the primary applications of AI in your products that you see today and envision for the near future? Select all that apply.")
print(f"Total valid responses: {total_valid_responses}")
print(f"Total unique AI applications mentioned: {total_options}")
print(f"Average percentage per application: {average_percentage:.2f}%")
print(f"Most common application: {most_common['Option']} ({most_common['Count']} selections, {most_common['Percentage']:.2f}% of respondents)")
print(f"Least common application: {least_common['Option']} ({least_common['Count']} selections, {least_common['Percentage']:.2f}% of respondents)")

print("\nBreakdown of responses:")
for _, row in plot_df.iterrows():
    print(f"{row['Option']}: {row['Count']} selections ({row['Percentage']:.2f}% of respondents)")

A.1.2: What are the primary applications of AI in your products that you see today and envision for the near future? Select all that apply.
Total valid responses: 107
Total unique AI applications mentioned: 18
Average percentage per application: 35.36%
Most common application: Chatbots, Personal Assistants or Recommender Systems (79 selections, 73.83% of respondents)
Least common application: Other, please explain (3 selections, 2.80% of respondents)

Breakdown of responses:
Chatbots, Personal Assistants or Recommender Systems: 79 selections (73.83% of respondents)
Translation or Text Generation: 60 selections (56.07% of respondents)
Customer Service: 58 selections (54.21% of respondents)
Code Completion or Code Generation): 51 selections (47.66% of respondents)
Programming Analysis (e.g.: 51 selections (47.66% of respondents)
Predictive Analysis: 46 selections (42.99% of respondents)
Healthcare: 43 selections (40.19% of respondents)
Financial Services: 38 selections (35.51% of respond

In [10]:
import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.1.3'
df = df[df['A.1.3'].notna()]  # Remove NaNs
df['A.1.3'] = df['A.1.3'].astype(str).apply(lambda x: x.strip())
df = df[df['A.1.3'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['A.1.3'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
fig = px.pie(response_counts, names='Response', values='Count',
             title='Frequency of AI Tool Usage (A.1.3)',
             color_discrete_sequence=px.colors.sequential.Blues,
             labels={'Response': 'Usage Frequency', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Print summary results
total_responses = response_counts['Count'].sum()
total_options = len(response_counts)
average_responses = total_responses / total_options
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

question_text = "A.1.3 Frequency of AI Tool Usage"
print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total unique frequency options: {total_options}")
print(f"Average responses per option: {average_responses:.2f}")
print(f"Most common frequency: {most_common['Response']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common frequency: {least_common['Response']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in response_counts.iterrows():
    print(f"{row['Response']}: {row['Count']} responses ({row['Percentage']}% of respondents)")


A.1.3 Frequency of AI Tool Usage
Total valid responses: 107
Total unique frequency options: 6
Average responses per option: 17.83
Most common frequency: Daily (36 responses, 33.64% of respondents)
Least common frequency: Prefer not to say (1 responses, 0.93% of respondents)

Breakdown of responses:
Daily: 36 responses (33.64% of respondents)
4-6 times a week: 30 responses (28.04% of respondents)
1-3 times a week: 22 responses (20.56% of respondents)
Rarely: 17 responses (15.89% of respondents)
Never: 1 responses (0.93% of respondents)
Prefer not to say: 1 responses (0.93% of respondents)


In [11]:
####################################
#  A.1.4
####################################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.1.4'
df = df[df['A.1.4'].notna()]  # Remove NaNs
df['A.1.4'] = df['A.1.4'].astype(str).apply(lambda x: x.strip())
df = df[df['A.1.4'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['A.1.4'] = df['A.1.4'].apply(lambda x: re.split(r',\s*', x))

# Flatten the list of responses and count occurrences
all_responses = df['A.1.4'].explode().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "A.1.4 How do AI-enhanced tools affect your development process and/or your company's workflow?"
fig = px.bar(all_responses, x='Option', y='Count',
             title=question_text,
             labels={'Option': 'Effects', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Effects', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")

# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all effects: {total_responses}")
print(f"Total unique effects: {total_options}")
print(f"Average responses per effect: {average_responses:.2f}")
print(f"Most common effect: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common effect: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")

A.1.4 How do AI-enhanced tools affect your development process and/or your company's workflow?
Total valid responses: 107
Total responses across all effects: 377
Total unique effects: 13
Average responses per effect: 29.00
Most common effect: Increased Efficiency (76 responses, 71.03% of respondents)
Least common effect: Prefer not to say (1 responses, 0.93% of respondents)

Breakdown of responses:
Increased Efficiency: 76 responses (71.03% of respondents)
Improved Accuracy: 44 responses (41.12% of respondents)
Innovation and Creativity: 43 responses (40.19% of respondents)
Enhanced Decision Making: 42 responses (39.25% of respondents)
Improved Research Quality: 42 responses (39.25% of respondents)
Training and Skill Development: 38 responses (35.51% of respondents)
Cost Reduction: 33 responses (30.84% of respondents)
Streamlined Workflows: 29 responses (27.1% of respondents)
Security and Privacy Concerns: 17 responses (15.89% of respondents)
No Significant Impact: 8 responses (7.48% o

In [4]:
####################################
#  A.1.5 with Grouped Location and Role Analysis
####################################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.1.5'
df = df[df['A.1.5'].notna()]  # Remove NaNs
df['A.1.5'] = df['A.1.5'].astype(str).apply(lambda x: x.strip())
df = df[df['A.1.5'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['A.1.5'] = df['A.1.5'].apply(lambda x: re.split(r',\s*', x))

# Add location data and group it
location_mapping = {
    'North America': 'US',
    'Central/South America': 'Other',
    'EU/UK/EEA': 'Europe',
    'Europe - Outside of EU/UK/EEA': 'Europe',
    'Africa': 'Other',
    'Middle East': 'Other',
    'Asia': 'Other',
    'Australia and Oceania': 'Other',
    'Prefer not to say': 'Other',
    'Other, please specify': 'Other'
}
df['Location'] = df.iloc[:, 28].map(location_mapping)

# Add role data and group it
role_mapping = {
    'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
    'AI Manager': 'AI Manager',
    'Requirements Analyst or Engineer': 'Requirements analyst',
    'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
    'AI Engineer or Developer': 'AI developers',
    '(Software) Developer, Designer, or Architect': 'AI developers',
    'Data Scientist or Data Analyst': 'AI developers',
    'Information Security Analyst or Engineer': 'Security/Privacy',
    'Information Privacy Analyst or Engineer': 'Security/Privacy',
    'AI Ethicist': 'Other',
    'AI Researcher': 'AI Researcher',
    '(Software) Quality Assurance Engineer or Tester': 'QA and Maintanence',
    'Other, please specify': 'Other'
}
df['Role'] = df.iloc[:, 30].map(role_mapping)

# Flatten the list of responses and count occurrences
all_responses = df['A.1.5'].explode().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Option', y='Count',
             title="Factors Limiting Company's Involvement with AI A.1.5",
             labels={'Option': 'Limiting Factors', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Limiting Factors', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")

# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print("A.1.5: How do AI-enhanced tools affect your development process and/or your company's workflow? Select all that apply.")
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all factors: {total_responses}")
print(f"Total unique limiting factors: {total_options}")
print(f"Average responses per factor: {average_responses:.2f}")
print(f"Most common factor: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common factor: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")

# Grouped Location-based analysis
print("\nGrouped Location-based Analysis:")
grouped_locations = ['US', 'Europe', 'Other']

for location in grouped_locations:
    print(f"\nLocation Group: {location}")
    location_df = df[df['Location'] == location]
    location_responses = location_df['A.1.5'].explode().value_counts().reset_index()
    location_responses.columns = ['Option', 'Count']
    location_total = location_df.shape[0]
    location_responses['Percentage'] = (location_responses['Count'] / location_total * 100).round(2)
    
    print(f"Total responses from this location group: {location_total}")
    print("Breakdown of responses:")
    for _, row in location_responses.iterrows():
        print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents from this location group)")
    
    if not location_responses.empty:
        location_most_common = location_responses.iloc[0]
        location_least_common = location_responses.iloc[-1]
        print(f"Most common factor: {location_most_common['Option']} ({location_most_common['Count']} responses, {location_most_common['Percentage']}% of respondents from this location group)")
        print(f"Least common factor: {location_least_common['Option']} ({location_least_common['Count']} responses, {location_least_common['Percentage']}% of respondents from this location group)")
    else:
        print("No responses from this location group.")

# Role-based analysis
print("\nRole-based Analysis:")
grouped_roles = list(set(role_mapping.values()))  # Get unique role groups

for role in grouped_roles:
    print(f"\nRole Group: {role}")
    role_df = df[df['Role'] == role]
    role_responses = role_df['A.1.5'].explode().value_counts().reset_index()
    role_responses.columns = ['Option', 'Count']
    role_total = role_df.shape[0]
    role_responses['Percentage'] = (role_responses['Count'] / role_total * 100).round(2)
    
    print(f"Total responses from this role group: {role_total}")
    print("Breakdown of responses:")
    for _, row in role_responses.iterrows():
        print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents from this role group)")
    
    if not role_responses.empty:
        role_most_common = role_responses.iloc[0]
        role_least_common = role_responses.iloc[-1]
        print(f"Most common factor: {role_most_common['Option']} ({role_most_common['Count']} responses, {role_most_common['Percentage']}% of respondents from this role group)")
        print(f"Least common factor: {role_least_common['Option']} ({role_least_common['Count']} responses, {role_least_common['Percentage']}% of respondents from this role group)")
    else:
        print("No responses from this role group.")

A.1.5: How do AI-enhanced tools affect your development process and/or your company's workflow? Select all that apply.
Total valid responses: 104
Total responses across all factors: 254
Total unique limiting factors: 11
Average responses per factor: 23.09
Most common factor: Privacy and Security Concerns (47 responses, 45.19% of respondents)
Least common factor: please explain (3 responses, 2.88% of respondents)

Breakdown of responses:
Privacy and Security Concerns: 47 responses (45.19% of respondents)
Lack of Trust: 40 responses (38.46% of respondents)
Lack of Expertise: 37 responses (35.58% of respondents)
Financial: 31 responses (29.81% of respondents)
Regulations: 28 responses (26.92% of respondents)
Technology: 22 responses (21.15% of respondents)
Proprietary Reasons: 21 responses (20.19% of respondents)
Human Resources: 17 responses (16.35% of respondents)
Prefer not to say: 5 responses (4.81% of respondents)
Other: 3 responses (2.88% of respondents)
please explain: 3 responses 

In [6]:
################################
# A.2.2 with Grouped Location and Role Analysis
################################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study, encoding='latin-1')

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.2.2'
df = df[df['A.2.2'].notna()]  # Remove NaNs
df['A.2.2'] = df['A.2.2'].astype(str).apply(lambda x: x.strip())
df = df[df['A.2.2'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['A.2.2'] = df['A.2.2'].apply(lambda x: re.split(r',\s*', x))

# Add location data and group it
location_mapping = {
    'North America': 'US',
    'Central/South America': 'Other',
    'EU/UK/EEA': 'Europe',
    'Europe - Outside of EU/UK/EEA': 'Europe',
    'Africa': 'Other',
    'Middle East': 'Other',
    'Asia': 'Other',
    'Australia and Oceania': 'Other',
    'Prefer not to say': 'Other',
    'Other, please specify': 'Other'
}
df['Location'] = df.iloc[:, 28].map(location_mapping)

# Add role data and group it
role_mapping = {
    'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
    'AI Manager': 'AI Manager',
    'Requirements Analyst or Engineer': 'Requirements analyst',
    'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
    'AI Engineer or Developer': 'AI developers',
    '(Software) Developer, Designer, or Architect': 'AI developers',
    'Data Scientist or Data Analyst': 'AI developers',
    'Information Security Analyst or Engineer': 'Security/Privacy',
    'Information Privacy Analyst or Engineer': 'Security/Privacy',
    'AI Ethicist': 'Other',
    'AI Researcher': 'AI Researcher',
    '(Software) Quality Assurance Engineer or Tester': 'QA and Maintanence',
    'Other, please specify': 'Other'
}
df['Role'] = df.iloc[:, 30].map(role_mapping)

# Flatten the list of responses and count occurrences
all_responses = df['A.2.2'].explode().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Option', y='Count',
             title='AI Ethics Principles at Risk Due to Current AI Systems A.2.2',
             labels={'Option': 'Ethics Principles', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Ethics Principles', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")

# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print("A.2.2: Which AI ethics principles do you consider to be at risk due to current AI systems? Select all that apply.")
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all principles: {total_responses}")
print(f"Total unique ethics principles: {total_options}")
print(f"Average responses per principle: {average_responses:.2f}")
print(f"Most common principle: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common principle: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")

# Grouped Location-based analysis
print("\nGrouped Location-based Analysis:")
grouped_locations = ['US', 'Europe', 'Other']

for location in grouped_locations:
    print(f"\nLocation Group: {location}")
    location_df = df[df['Location'] == location]
    location_responses = location_df['A.2.2'].explode().value_counts().reset_index()
    location_responses.columns = ['Option', 'Count']
    location_total = location_df.shape[0]
    location_responses['Percentage'] = (location_responses['Count'] / location_total * 100).round(2)
    
    print(f"Total responses from this location group: {location_total}")
    print("Breakdown of responses:")
    for _, row in location_responses.iterrows():
        print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents from this location group)")
    
    if not location_responses.empty:
        location_most_common = location_responses.iloc[0]
        location_least_common = location_responses.iloc[-1]
        print(f"Most common principle: {location_most_common['Option']} ({location_most_common['Count']} responses, {location_most_common['Percentage']}% of respondents from this location group)")
        print(f"Least common principle: {location_least_common['Option']} ({location_least_common['Count']} responses, {location_least_common['Percentage']}% of respondents from this location group)")
    else:
        print("No responses from this location group.")

# Role-based analysis
print("\nRole-based Analysis:")
grouped_roles = list(set(role_mapping.values()))  # Get unique role groups

for role in grouped_roles:
    print(f"\nRole Group: {role}")
    role_df = df[df['Role'] == role]
    role_responses = role_df['A.2.2'].explode().value_counts().reset_index()
    role_responses.columns = ['Option', 'Count']
    role_total = role_df.shape[0]
    role_responses['Percentage'] = (role_responses['Count'] / role_total * 100).round(2)
    
    print(f"Total responses from this role group: {role_total}")
    print("Breakdown of responses:")
    for _, row in role_responses.iterrows():
        print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents from this role group)")
    
    if not role_responses.empty:
        role_most_common = role_responses.iloc[0]
        role_least_common = role_responses.iloc[-1]
        print(f"Most common principle: {role_most_common['Option']} ({role_most_common['Count']} responses, {role_most_common['Percentage']}% of respondents from this role group)")
        print(f"Least common principle: {role_least_common['Option']} ({role_least_common['Count']} responses, {role_least_common['Percentage']}% of respondents from this role group)")
    else:
        print("No responses from this role group.")

A.2.2: Which AI ethics principles do you consider to be at risk due to current AI systems? Select all that apply.
Total valid responses: 92
Total responses across all principles: 255
Total unique ethics principles: 11
Average responses per principle: 23.18
Most common principle: Data Protection and Right to Privacy (66 responses, 71.74% of respondents)
Least common principle: None (1 responses, 1.09% of respondents)

Breakdown of responses:
Data Protection and Right to Privacy: 66 responses (71.74% of respondents)
Transparency and Explainability of AI Systems: 39 responses (42.39% of respondents)
Accountability and Responsibility: 31 responses (33.7% of respondents)
Fairness and Justice: 24 responses (26.09% of respondents)
Harm Prevention and Beneficence: 22 responses (23.91% of respondents)
Non-Discrimination and Freedom of Privileges: 20 responses (21.74% of respondents)
Respect for Human Rights: 17 responses (18.48% of respondents)
Democracy and Rule of Law: 15 responses (16.3% of 

In [136]:
#########################
# A.4.1 statistics
#########################

import pandas as pd

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.4.1'
df = df[df['A.4.1'].notna()]  # Remove NaNs
df['A.4.1'] = df['A.4.1'].astype(str).apply(lambda x: x.strip())
df = df[df['A.4.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['A.4.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Print results in clear text
print("Statistics for 'A.4.1': Can you envision a scenario where your company may integrate AI into future operations?")
for index, row in response_counts.iterrows():
    print(f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)")

print(f"Total valid responses: {total_valid_responses}")


Statistics for 'A.4.1': Can you envision a scenario where your company may integrate AI into future operations?
Yes: 64 responses (59.81%)
Maybe: 36 responses (33.64%)
No: 6 responses (5.61%)
Prefer not to say: 1 responses (0.93%)
Total valid responses: 107


In [10]:
#############################
# A.4.2
###################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.4.2'
df = df[df['A.4.2'].notna()]  # Remove NaNs
df['A.4.2'] = df['A.4.2'].astype(str).apply(lambda x: x.strip())
df = df[df['A.4.2'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['A.4.2'] = df['A.4.2'].apply(lambda x: re.split(r',\s*', x))

# Flatten the list of responses and count occurrences
all_responses = df['A.4.2'].explode().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Option', y='Count',
             title='AI Ethics Principles to Consider for Future Operations',
             labels={'Option': 'Ethics Principles', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Ethics Principles', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")

# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

question_text = "A.4.2 AI Ethics Principles to Consider for Future Operations"
print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all principles: {total_responses}")
print(f"Total unique ethics principles: {total_options}")
print(f"Average responses per principle: {average_responses:.2f}")
print(f"Most common principle: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common principle: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")


In [138]:
#################
# A.4.3
#################


import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'A.4.3'
df = df[df['A.4.3'].notna()]  # Remove NaNs
df['A.4.3'] = df['A.4.3'].astype(str).apply(lambda x: x.strip())
df = df[df['A.4.3'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['A.4.3'] = df['A.4.3'].apply(lambda x: re.split(r',\s*', x))

# Flatten the list of responses and count occurrences
all_responses = df['A.4.3'].explode().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Option', y='Count',
             title='A.4.3 Reasons for Not Integrating AI Systems',
             labels={'Option': 'Reasons for Not Integrating AI', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Reasons', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")


# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

question_text = "A.4.3 Reasons for Not Integrating AI Systems"
print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all reasons: {total_responses}")
print(f"Total unique reasons: {total_options}")
print(f"Average responses per reason: {average_responses:.2f}")
print(f"Most common reason: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common reason: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")


A.4.3 Reasons for Not Integrating AI Systems
Total valid responses: 61
Total responses across all reasons: 183
Total unique reasons: 10
Average responses per reason: 18.30
Most common reason: Data Protection and Right to Privacy (37 responses, 60.66% of respondents)
Least common reason: Democracy and Rule of Law (7 responses, 11.48% of respondents)

Breakdown of responses:
Data Protection and Right to Privacy: 37 responses (60.66% of respondents)
Accountability and Responsibility: 26 responses (42.62% of respondents)
Transparency and Explainability of AI Systems: 21 responses (34.43% of respondents)
Respect for Human Rights: 19 responses (31.15% of respondents)
All: 18 responses (29.51% of respondents)
Harm Prevention and Beneficence: 16 responses (26.23% of respondents)
Non-Discrimination and Freedom of Privileges: 14 responses (22.95% of respondents)
Fairness and Justice: 13 responses (21.31% of respondents)
Environment and Social Responsibility: 12 responses (19.67% of respondents)


In [4]:
#############################
# B.1.2
###################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses, keeping special categories together
def split_responses(response):
    if pd.isna(response):
        return []
    parts = str(response).split(',')
    result = []
    i = 0
    chatbot_category_added = False
    while i < len(parts):
        if i+1 < len(parts) and parts[i].strip() == "Other" and parts[i+1].strip() == "please explain":
            result.append("Other, please explain")
            i += 2
        elif i+1 < len(parts) and parts[i].strip() == "Programming Analysis (e.g." and parts[i+1].strip().startswith("Code Completion or Code Generation"):
            result.append("Programming analysis (e.g., Code completion or code generation)")
            i += 2
        elif not chatbot_category_added and (parts[i].strip() in ["Chatbots", "Personal Assistants or Recommender Systems"] or (i+1 < len(parts) and parts[i].strip() == "Personal Assistants" and parts[i+1].strip() == "or Recommender Systems")):
            result.append("Chatbots, Personal Assistants or Recommender Systems")
            chatbot_category_added = True
            i += 2 if parts[i].strip() == "Personal Assistants" else 1
        elif chatbot_category_added and parts[i].strip() in ["Chatbots", "Personal Assistants", "Recommender Systems", "Personal Assistants or Recommender Systems"]:
            i += 1  # Skip this part as we've already added the category
        else:
            result.append(parts[i].strip())
            i += 1
    return result

# Step 3: Process the responses using the split_responses function
df['B.1.2'] = df['B.1.2'].apply(split_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.1.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df[df['B.1.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Option', y='Count',
             title='B.1.2 Primary Applications of AI in Products',
             labels={'Option': 'AI Application', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='AI Application', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")


# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

question_text = "B.1.2 Primary Applications of AI in Products"
print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all applications: {total_responses}")
print(f"Total unique AI applications: {total_options}")
print(f"Average responses per application: {average_responses:.2f}")
print(f"Most common application: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common application: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")

B.1.2 Primary Applications of AI in Products
Total valid responses: 303
Total responses across all applications: 1632
Total unique AI applications: 18
Average responses per application: 90.67
Most common application: Chatbots, Personal Assistants or Recommender Systems (193 responses, 63.7% of respondents)
Least common application: Prefer not to say (1 responses, 0.33% of respondents)

Breakdown of responses:
Chatbots, Personal Assistants or Recommender Systems: 193 responses (63.7% of respondents)
Customer Service: 145 responses (47.85% of respondents)
Programming analysis (e.g., Code completion or code generation): 132 responses (43.56% of respondents)
Translation or Text Generation: 129 responses (42.57% of respondents)
Predictive Analysis: 123 responses (40.59% of respondents)
Robotics and Automation: 120 responses (39.6% of respondents)
Cybersecurity: 113 responses (37.29% of respondents)
Computer Vision: 101 responses (33.33% of respondents)
Financial Services: 97 responses (32.0

In [8]:
####################
# B.1.3
######################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.1.3'
df = df[df['B.1.3'].notna()]  # Remove NaNs
df['B.1.3'] = df['B.1.3'].astype(str).apply(lambda x: x.strip())
df = df[df['B.1.3'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.1.3'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
fig = px.pie(response_counts, names='Response', values='Count',
             title='Frequency of AI-Enhanced Tool Usage',
             color_discrete_sequence=px.colors.sequential.Blues,
             labels={'Response': 'Usage Frequency', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

total_responses = response_counts['Count'].sum()
total_options = len(response_counts)
average_responses = total_responses / total_options
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

question_text = "B.1.3 Frequency of AI-Enhanced Tool Usage"
print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total unique frequency options: {total_options}")
print(f"Average responses per option: {average_responses:.2f}")
print(f"Most common frequency: {most_common['Response']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common frequency: {least_common['Response']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in response_counts.iterrows():
    print(f"{row['Response']}: {row['Count']} responses ({row['Percentage']}% of respondents)")


B.1.3 Frequency of AI-Enhanced Tool Usage
Total valid responses: 362
Total unique frequency options: 7
Average responses per option: 51.71
Most common frequency: Daily (179 responses, 49.45% of respondents)
Least common frequency: How often do you use AI-enhanced tools in your work? (1 responses, 0.28% of respondents)

Breakdown of responses:
Daily: 179 responses (49.45% of respondents)
4-6 times a week: 88 responses (24.31% of respondents)
1-3 times a week: 71 responses (19.61% of respondents)
Rarely: 11 responses (3.04% of respondents)
Prefer not to say: 7 responses (1.93% of respondents)
Never: 5 responses (1.38% of respondents)
How often do you use AI-enhanced tools in your work?: 1 responses (0.28% of respondents)


In [12]:
##################
# B.1.4
##################  
import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.1.4'
df = df[df['B.1.4'].notna()]  # Remove NaNs
df['B.1.4'] = df['B.1.4'].astype(str).apply(lambda x: x.strip())
df = df[df['B.1.4'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['B.1.4'] = df['B.1.4'].apply(lambda x: re.split(r',\s*', x))

# Flatten the list of responses and count occurrences
all_responses = df['B.1.4'].explode().value_counts().reset_index()
all_responses.columns = ['Option', 'Count']

# Step 5: Calculate total number of valid responses (non-empty)
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Option', y='Count',
             title='B.1.4 Effect of AI-Enhanced Tools on Development Process and Workflow',
             labels={'Option': 'Impact of AI-Enhanced Tools', 'Count': 'Number of Responses'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}%'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Impact', yaxis_title='Number of Responses')

# Step 8: Show the figure
fig.show(renderer="browser")


# Statistical Analysis
total_responses = all_responses['Count'].sum()
total_options = len(all_responses)
average_responses = total_responses / total_options
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

question_text = "Track B: Effect of AI-Enhanced Tools on Development Process and Workflow"
print(question_text)
print(f"Total valid responses: {total_valid_responses}")
print(f"Total responses across all impacts: {total_responses}")
print(f"Total unique impacts: {total_options}")
print(f"Average responses per impact: {average_responses:.2f}")
print(f"Most common impact: {most_common['Option']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common impact: {least_common['Option']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Option']}: {row['Count']} responses ({row['Percentage']}% of respondents)")


Track B: Effect of AI-Enhanced Tools on Development Process and Workflow
Total valid responses: 302
Total responses across all impacts: 1287
Total unique impacts: 13
Average responses per impact: 99.00
Most common impact: Increased Efficiency (234 responses, 77.48% of respondents)
Least common impact: Prefer not to say (3 responses, 0.99% of respondents)

Breakdown of responses:
Increased Efficiency: 234 responses (77.48% of respondents)
Improved Accuracy: 167 responses (55.3% of respondents)
Cost Reduction: 160 responses (52.98% of respondents)
Innovation and Creativity: 141 responses (46.69% of respondents)
Improved Research Quality: 140 responses (46.36% of respondents)
Enhanced Decision Making: 120 responses (39.74% of respondents)
Training and Skill Development: 112 responses (37.09% of respondents)
Streamlined Workflows: 111 responses (36.75% of respondents)
Security and Privacy Concerns: 70 responses (23.18% of respondents)
No Significant Impact: 19 responses (6.29% of responden

In [7]:
import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split multiple responses (assumes responses are comma-separated)
def split_responses(response):
    if pd.isna(response):
        return []
    # Split by comma and strip any extra whitespace
    return [part.strip() for part in str(response).split(',')]

# Step 3: Process the responses for b.1.5 using the split_responses function
df['B.1.5'] = df['B.1.5'].apply(split_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.1.5'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Tool or Technology', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.1.5'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each tool/technology
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Print quantitative analysis
print("Quantitative Analysis for B.1.5: Tools and Technologies used to develop or deploy your products")
print(f"Total valid responses: {total_valid_responses}")

print("\nBreakdown of responses:")
breakdown = ", ".join([
    f"{row['Tool or Technology']}: {row['Count']} mentions ({row['Percentage']}% of respondents)"
    for _, row in all_responses.iterrows()
])
print(breakdown)

# Calculate and print summary statistics
total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
summary_stats = [
    f"Total mentions across all tools/technologies: {total_mentions}",
    f"Average mentions per tool/technology: {average_mentions:.2f}",
    f"Most common tool/technology: {all_responses.iloc[0]['Tool or Technology']} ({all_responses.iloc[0]['Count']} mentions, {all_responses.iloc[0]['Percentage']}% of respondents)",
    f"Least common tool/technology: {all_responses.iloc[-1]['Tool or Technology']} ({all_responses.iloc[-1]['Count']} mentions, {all_responses.iloc[-1]['Percentage']}% of respondents)"
]
print("\n" + ", ".join(summary_stats))

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(
    all_responses, 
    x='Tool or Technology', 
    y='Count',
    title='B.1.5: Tools and Technologies used in Product Development/Deployment',
    labels={'Tool or Technology': 'Tool/Technology', 'Count': 'Number of Mentions'},
    color='Count',
    color_continuous_scale='Blues',
    text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents')
)

# Update layout to enhance readability
fig.update_layout(xaxis_tickangle=45, xaxis_title='Tool/Technology', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")


Quantitative Analysis for B.1.5: Tools and Technologies used to develop or deploy your products
Total valid responses: 300

Breakdown of responses:
Google Cloud AI: 148 mentions (49.33% of respondents), Model provider APIs: 128 mentions (42.67% of respondents), Anthropic: 128 mentions (42.67% of respondents), DeepInfra: 128 mentions (42.67% of respondents), OpenAI: 128 mentions (42.67% of respondents), i.e.: 128 mentions (42.67% of respondents), LLama: 128 mentions (42.67% of respondents), Azure ML: 97 mentions (32.33% of respondents), PyTorch: 89 mentions (29.67% of respondents), AWS SageMaker: 75 mentions (25.0% of respondents), Pandas: 73 mentions (24.33% of respondents), TensorFlow / Keras: 72 mentions (24.0% of respondents), Convolutional Neural Network (CNN): 66 mentions (22.0% of respondents), HuggingFace: 64 mentions (21.33% of respondents), Sci-Kit Learn / NLTK: 62 mentions (20.67% of respondents), OpenCV: 58 mentions (19.33% of respondents), Transformer Based LLM: 53 mentions

In [12]:
import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.2.2'
df = df[df['B.2.2'].notna()]  # Remove NaNs
df['B.2.2'] = df['B.2.2'].astype(str).apply(lambda x: x.strip())
df = df[df['B.2.2'] != ""]  # Remove empty strings

# Step 4: Process the responses
# Split the strings by comma and normalize by stripping whitespace
df['B.2.2'] = df['B.2.2'].apply(lambda x: [item.strip() for item in x.split(',')])

# Step 5: Flatten the list of responses and count occurrences
all_responses = df['B.2.2'].explode().value_counts().reset_index()
all_responses.columns = ['Principle', 'Count']

# Step 6: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.2.2'].apply(len) > 0].shape[0]

# Step 7: Calculate the percentage of rows that mention each principle
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 8: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Principle', y='Count',
             title='B.2.2 Principles Considered in Product Development or Deployment',
             labels={'Principle': 'Ethical Principle', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'<b><span style="font-weight:900;">{x}</span></b>'))

# Update layout to match the visual
fig.update_layout(
    xaxis_tickangle=45,
    xaxis_title='Ethical Principle',
    yaxis_title='Number of Mentions',
    title={
        'text': 'B.2.2 Principles Considered in Product Development or Deployment',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    font=dict(size=12),
    margin=dict(t=150),
    yaxis=dict(range=[0, all_responses['Count'].max() * 1.15]),
    coloraxis_showscale=False
)

# Update traces to position text just above the bars and ensure uniform text size
fig.update_traces(
    textposition='outside',
    textfont=dict(size=16, family='sans-serif', weight='bold'),
    cliponaxis=False
)

# Step 9: Show the figure
fig.show(renderer="browser")

In [7]:
import pandas as pd
import os
import re

# Define file path
file_path = 'AI_Study_Accepted_with_replacement_codes.csv'

# Demographic columns and their mappings (using column numbers)
demographics = {
    'Location': {
        'column': 28,  # Column number for Location
        'mapping': {
            'North America': 'US',
            'Central/South America': 'Other',
            'EU/UK/EEA': 'Europe',
            'Europe - Outside of EU/UK/EEA': 'Europe',
            'Africa': 'Other',
            'Middle East': 'Other',
            'Asia': 'Other',
            'Australia and Oceania': 'Other',
            'Prefer not to say': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Company Type': {
        'column': 26,  # Column number for Company Type
        'mapping': {
            'Multi-national Corporate': 'Multi-national',
            'Startup/Small Business': 'Startup/Small',
            'Academic Institution/Research Center': 'Academic/Research',
            'Government': 'Government',
            'Individual': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Role': {
        'column': 30,  # Column number for Role
        'mapping': {
            'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
            'AI Manager': 'AI Manager',
            'Requirements Analyst or Engineer': 'Requirements analyst',
            'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
            'AI Engineer or Developer': 'AI developers',
            '(Software) Developer, Designer, or Architect': 'AI developers',
            'Data Scientist or Data Analyst': 'AI developers',
            'Information Security Analyst or Engineer': 'Security/Privacy',
            'Information Privacy Analyst or Engineer': 'Security/Privacy',
            'AI Ethicist': 'Other',
            'AI Researcher': 'AI Researcher',
            '(Software) Quality Assurance Engineer or Tester': 'QA and Maintenance',
            'Other, please specify': 'Other'
        }
    },
    'Education': {
        'column': 21,  # Column number for Education
        'mapping': {
            "High School Degree": "High School Degree",
            "Bachelor's Degree": "Bachelor's Degree",
            "Master's Degree (i.e., MSc., M.A., etc.)": "Graduate Education",
            "MBA (Master of Business Administration)": "Graduate Education",
            "Graduate Certificates": "Graduate Education",
            "Ph.D.": "Ph.D.",
            "Other, please specify": "Other"
        }
    },
    'Dev Experience': {
        'column': 32,  # Column number for Dev Experience
        'mapping': {
            'None': 'None',
            '1-2 Years': '1-2 Years',
            '2-5 Years': '2-5 Years',
            '5-10 Years': '5-10 Years',
            '10+ Years': '10+ Years'
        }
    },
    'Gender': {
        'column': 19,  # Column number for Gender
        'mapping': {  # No predefined mapping for gender; use unique values directly
            'Male': 'Male',
            'Female': 'Female',
            'Non-binary / Third gender': 'Non-binary / Third gender',
            'Prefer not to say': 'Prefer not to say',
            'Other, please specify': 'Other'
        }
    },
    'Company Size': {
        'column': 25,  # Column number for Company Size
        'mapping': {
            '1-5 Employees': '1-5 Employees',
            '6-20 Employees': '6-20 Employees',
            '21-50 Employees': '21-50 Employees',
            '51-100 Employees': '51-100 Employees',
            '101+ Employees': '100+ Employees'
        }
    }
}

def read_and_clean_csv(file_path):
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
        print(f"Successfully read {file_path}.")
        return df
    except UnicodeDecodeError:
        print(f"Failed to read {file_path} with utf-8 encoding.")
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None

def process_data(df, demographic):
    try:
        if demographics[demographic]['mapping']:
            categories = set(demographics[demographic]['mapping'].values())
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']].map(demographics[demographic]['mapping'])
        else:
            categories = df.iloc[:, demographics[demographic]['column']].dropna().unique()
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']]

        all_results = {}

        for category in categories:
            df_category = df[df['GroupedCategory'] == category]

            # Step 2: Exclude the first two rows
            df_category = df_category.iloc[2:]

            # Step 3: Remove NaNs or empty values in 'B.2.2'
            df_category = df_category[df_category['B.2.2'].notna()]  # Remove NaNs
            df_category['B.2.2'] = df_category['B.2.2'].astype(str).apply(lambda x: x.strip())
            df_category = df_category[df_category['B.2.2'] != ""]  # Remove empty strings

            # Step 4: Process the responses
            df_category['B.2.2'] = df_category['B.2.2'].apply(lambda x: [item.strip() for item in x.split(',')])

            # Step 5: Flatten the list of responses and count occurrences
            all_responses = df_category['B.2.2'].explode().value_counts().reset_index()
            all_responses.columns = ['Principle', 'Count']

            # Step 6: Calculate total number of valid responses (rows with non-empty lists)
            total_valid_responses = df_category[df_category['B.2.2'].apply(len) > 0].shape[0]

            # Step 7: Calculate the percentage of rows that mention each principle
            all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

            all_results[category] = all_responses

        # Combine and rank results
        combined_results = pd.concat(all_results, names=[demographic, 'Category'])
        combined_results = combined_results.reset_index()

        ranked_results = combined_results.groupby(['Principle', demographic])['Percentage'].mean().reset_index()
        ranked_results = ranked_results.sort_values(by=['Principle', 'Percentage'], ascending=[True, False])

        return ranked_results
    except KeyError as e:
        print(f"Error: Column '{e}' not found in the DataFrame. Check your demographics dictionary and CSV file.")
        return pd.DataFrame()  # Return an empty DataFrame to avoid further errors

# Main execution
df = read_and_clean_csv(file_path)
if df is not None:
    print(df.columns)  # Print the columns for debugging
    for demographic in demographics:
        print(f"Processing {demographic}...")
        ranked_results = process_data(df, demographic)  # Call the updated function name

        # Print the ranked results
        print(f"\nRanked Results for {demographic}:\n")
        print(ranked_results.to_markdown(index=False, numalign="left", stralign="left"))

Successfully read AI_Study_Accepted_with_replacement_codes.csv.
Index(['Remove? (Yes/No/Maybe)', 'StartDate', 'EndDate', 'Finished',
       'RecordedDate', 'Unnamed: 5', 'ResponseId', 'UserLanguage',
       'Q_RecaptchaScore', 'Q_RelevantIDDuplicate',
       ...
       'C.1 _5', 'C.1 _6', 'C.1 _7', 'C.1 _8', 'C.2', 'C.3.a', 'C.3_1_TEXT',
       'C.3_2_TEXT', 'C.3_3_TEXT', 'PROLIFIC_PID'],
      dtype='object', length=234)
Processing Location...

Ranked Results for Location:

| Principle                                     | Location   | Percentage   |
|:----------------------------------------------|:-----------|:-------------|
| Accountability and Responsibility             | Other      | 42.42        |
| Accountability and Responsibility             | US         | 37.41        |
| Accountability and Responsibility             | Europe     | 37.18        |
| All                                           | US         | 21.58        |
| All                                           | Ot

In [10]:
import pandas as pd

# Define file path
file_path = 'AI_Study_Accepted_with_replacement_codes.csv'

# Demographic columns and their mappings (using column numbers)
demographics = {
    'Location': {
        'column': 28,  # Column number for Location
        'mapping': {
            'North America': 'US',
            'Central/South America': 'Other',
            'EU/UK/EEA': 'Europe',
            'Europe - Outside of EU/UK/EEA': 'Europe',
            'Africa': 'Other',
            'Middle East': 'Other',
            'Asia': 'Other',
            'Australia and Oceania': 'Other',
            'Prefer not to say': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Company Type': {
        'column': 26,  # Column number for Company Type
        'mapping': {
            'Multi-national Corporate': 'Multi-national',
            'Startup/Small Business': 'Startup/Small',
            'Academic Institution/Research Center': 'Academic/Research',
            'Government': 'Government',
            'Individual': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Role': {
        'column': 30,  # Column number for Role
        'mapping': {
            'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
            'AI Manager': 'AI Manager',
            'Requirements Analyst or Engineer': 'Requirements analyst',
            'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
            'AI Engineer or Developer': 'AI developers',
            '(Software) Developer, Designer, or Architect': 'AI developers',
            'Data Scientist or Data Analyst': 'AI developers',
            'Information Security Analyst or Engineer': 'Security/Privacy',
            'Information Privacy Analyst or Engineer': 'Security/Privacy',
            'AI Ethicist': 'Other',
            'AI Researcher': 'AI Researcher',
            '(Software) Quality Assurance Engineer or Tester': 'QA and Maintenance',
            'Other, please specify': 'Other'
        }
    },
    'Education': {
        'column': 21,  # Column number for Education
        'mapping': {
            "High School Degree": "High School Degree",
            "Bachelor's Degree": "Bachelor's Degree",
            "Master's Degree (i.e., MSc., M.A., etc.)": "Graduate Education",
            "MBA (Master of Business Administration)": "Graduate Education",
            "Graduate Certificates": "Graduate Education",
            "Ph.D.": "Ph.D.",
            "Other, please specify": "Other"
        }
    },
    'Dev Experience': {
        'column': 32,  # Column number for Dev Experience
        'mapping': {
            'None': 'None',
            '1-2 Years': '1-2 Years',
            '2-5 Years': '2-5 Years',
            '5-10 Years': '5-10 Years',
            '10+ Years': '10+ Years'
        }
    },
    'Gender': {
        'column': 19,  # Column number for Gender
        'mapping': {  # No predefined mapping for gender; use unique values directly
            'Male': 'Male',
            'Female': 'Female',
            'Non-binary / Third gender': 'Non-binary / Third gender',
            'Prefer not to say': 'Prefer not to say',
            'Other, please specify': 'Other'
        }
    },
    'Company Size': {
        'column': 25,  # Column number for Company Size
        'mapping': {
            '1-5 Employees': '1-5 Employees',
            '6-20 Employees': '6-20 Employees',
            '21-50 Employees': '21-50 Employees',
            '51-100 Employees': '51-100 Employees',
            '101+ Employees': '100+ Employees'
        }
    }
}

def read_and_clean_csv(file_path):
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
        print(f"Successfully read {file_path}.")
        return df
    except UnicodeDecodeError:
        print(f"Failed to read {file_path} with utf-8 encoding.")
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None

def process_data(df, demographic):
    try:
        if demographics[demographic]['mapping']:
            categories = set(demographics[demographic]['mapping'].values())
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']].map(demographics[demographic]['mapping'])
        else:
            categories = df.iloc[:, demographics[demographic]['column']].dropna().unique()
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']]

        all_results = {}

        for category in categories:
            df_category = df[df['GroupedCategory'] == category]

            # Step 2: Exclude the first two rows
            df_category = df_category.iloc[2:]

            # Step 3: Remove NaNs or empty values in 'B.2.3'
            df_category = df_category[df_category['B.2.3'].notna()]  # Remove NaNs
            df_category['B.2.3'] = df_category['B.2.3'].astype(str).apply(lambda x: x.strip())
            df_category = df_category[df_category['B.2.3'] != ""]  # Remove empty strings

            # Step 4: Count occurrences of each response
            response_counts = df_category['B.2.3'].value_counts().reset_index()
            response_counts.columns = ['Response', 'Count']

            # Step 5: Calculate total number of valid responses
            total_valid_responses = df_category.shape[0]

            # Step 6: Calculate the percentage of each option based on valid responses
            response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

            # --- Calculate percentage considering "at least sometimes" ---
            at_least_sometimes_percentage = response_counts[response_counts['Response'].isin(['Sometimes', 'Often', 'Always'])][
                'Percentage'
            ].sum()

            all_results[category] = pd.DataFrame({'Response': ['At Least Sometimes'], 'Percentage': [at_least_sometimes_percentage]})

        # Combine and rank results
        combined_results = pd.concat(all_results, names=[demographic, 'Category'])
        combined_results = combined_results.reset_index()

        ranked_results = combined_results.groupby([demographic])['Percentage'].mean().reset_index()

        # Sort by average percentage in descending order
        ranked_results = ranked_results.sort_values(by=['Percentage'], ascending=[False])

        return ranked_results
    except KeyError as e:
        print(f"Error: Column '{e}' not found in the DataFrame. Check your demographics dictionary and CSV file.")
        return pd.DataFrame()  # Return an empty DataFrame to avoid further errors

# Main execution
df = read_and_clean_csv(file_path)
if df is not None:
    print(df.columns)  # Print the columns for debugging
    for demographic in demographics:
        print(f"Processing {demographic}...")
        ranked_results = process_data(df, demographic)

        # Print the ranked results
        print(f"\nRanked Results for {demographic}:\n")
        print(ranked_results.to_markdown(index=False, numalign="left", stralign="left"))

Successfully read AI_Study_Accepted_with_replacement_codes.csv.
Index(['Remove? (Yes/No/Maybe)', 'StartDate', 'EndDate', 'Finished',
       'RecordedDate', 'Unnamed: 5', 'ResponseId', 'UserLanguage',
       'Q_RecaptchaScore', 'Q_RelevantIDDuplicate',
       ...
       'C.1 _5', 'C.1 _6', 'C.1 _7', 'C.1 _8', 'C.2', 'C.3.a', 'C.3_1_TEXT',
       'C.3_2_TEXT', 'C.3_3_TEXT', 'PROLIFIC_PID'],
      dtype='object', length=234)
Processing Location...

Ranked Results for Location:

| Location   | Percentage   |
|:-----------|:-------------|
| Other      | 89.55        |
| US         | 89.19        |
| Europe     | 79.75        |
Processing Company Type...

Ranked Results for Company Type:

| Company Type      | Percentage   |
|:------------------|:-------------|
| Government        | 95.65        |
| Academic/Research | 90.62        |
| Multi-national    | 86.28        |
| Startup/Small     | 84.54        |
| Other             | 82.86        |
Processing Role...

Ranked Results for Role:

| R

In [25]:
import pandas as pd
import plotly.express as px
import os
import re

# Define file path
file_path = 'AI_Study_Accepted_with_replacement_codes.csv'

# Create a folder named 'graphs/b.2.4' if it doesn't exist
if not os.path.exists('graphs/b.2.4'):
    os.makedirs('graphs/b.2.4')

# Demographic columns and their mappings (using column numbers)
demographics = {
    'Location': {
        'column': 28,  # Column number for Location
        'mapping': {
            'North America': 'US',
            'Central/South America': 'Other',
            'EU/UK/EEA': 'Europe',
            'Europe - Outside of EU/UK/EEA': 'Europe',
            'Africa': 'Other',
            'Middle East': 'Other',
            'Asia': 'Other',
            'Australia and Oceania': 'Other',
            'Prefer not to say': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Company Type': {
        'column': 26,  # Column number for Company Type
        'mapping': {
            'Multi-national Corporate': 'Multi-national',
            'Startup/Small Business': 'Startup/Small',
            'Academic Institution/Research Center': 'Academic/Research',
            'Government': 'Government',
            'Individual': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Role': {
        'column': 30,  # Column number for Role
        'mapping': {
            'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
            'AI Manager': 'AI Manager',
            'Requirements Analyst or Engineer': 'Requirements analyst',
            'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
            'AI Engineer or Developer': 'AI developers',
            '(Software) Developer, Designer, or Architect': 'AI developers',
            'Data Scientist or Data Analyst': 'AI developers',
            'Information Security Analyst or Engineer': 'Security/Privacy',
            'Information Privacy Analyst or Engineer': 'Security/Privacy',
            'AI Ethicist': 'Other',
            'AI Researcher': 'AI Researcher',
            '(Software) Quality Assurance Engineer or Tester': 'QA and Maintenance',
            'Other, please specify': 'Other'
        }
    },
    'Education': {
        'column': 21,  # Column number for Education
        'mapping': {
            "High School Degree": "High School Degree",
            "Bachelor's Degree": "Bachelor's Degree",
            "Master's Degree (i.e., MSc., M.A., etc.)": "Graduate Education",
            "MBA (Master of Business Administration)": "Graduate Education",
            "Graduate Certificates": "Graduate Education",
            "Ph.D.": "Ph.D.",
            "Other, please specify": "Other"
        }
    },
    'Dev Experience': {
        'column': 32,  # Column number for Dev Experience
        'mapping': {
            'None': 'None',
            '1-2 Years': '1-2 Years',
            '2-5 Years': '2-5 Years',
            '5-10 Years': '5-10 Years',
            '10+ Years': '10+ Years'
        }
    },
    'Gender': {
        'column': 19,  # Column number for Gender
        'mapping': {  # No predefined mapping for gender; use unique values directly
            'Male': 'Male',
            'Female': 'Female',
            'Non-binary / Third gender': 'Non-binary / Third gender',
            'Prefer not to say': 'Prefer not to say',
            'Other, please specify': 'Other'
        }
    },
    'Company Size': {
        'column': 25,  # Column number for Company Size
        'mapping': {
            '1-5 Employees': '1-5 Employees',
            '6-20 Employees': '6-20 Employees',
            '21-50 Employees': '21-50 Employees',
            '51-100 Employees': '51-100 Employees',
            '101+ Employees': '100+ Employees'
        }
    }
}

def read_and_clean_csv(file_path):
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
        print(f"Successfully read {file_path}.")
        return df
    except UnicodeDecodeError:
        print(f"Failed to read {file_path} with utf-8 encoding.")
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None

def process_data(df):
    # Step 2: Exclude the first two rows
    df = df.iloc[2:]

    # Step 3: Remove NaNs or empty values in 'B.2.4'
    df = df[df['B.2.4'].notna()]  # Remove NaNs
    df['B.2.4'] = df['B.2.4'].astype(str).apply(lambda x: x.strip())
    df = df[df['B.2.4'] != ""]  # Remove empty strings

    # Step 4: Process the responses
    df['B.2.4'] = df['B.2.4'].apply(lambda x: [item.strip() for item in x.split(',')])

    # Step 5: Flatten the list of responses and count occurrences
    all_responses = df['B.2.4'].explode().value_counts().reset_index()
    all_responses.columns = ['Principle', 'Count']

    # Step 6: Calculate total number of valid responses (rows with non-empty lists)
    total_valid_responses = df[df['B.2.4'].apply(len) > 0].shape[0]

    # Step 7: Calculate the percentage of rows that mention each principle
    all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

    # --- Rank demographics by principle ---

    all_results = {}
    for demographic in demographics:
        if demographics[demographic]['mapping']:
            categories = set(demographics[demographic]['mapping'].values())
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']].map(demographics[demographic]['mapping'])
        else:
            categories = df.iloc[:, demographics[demographic]['column']].dropna().unique()
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']]

        demographic_results = {}
        for category in categories:
            df_category = df[df['GroupedCategory'] == category]
            category_responses = df_category['B.2.4'].explode().value_counts().reset_index()
            category_responses.columns = ['Principle', 'Count']
            category_responses['Percentage'] = (category_responses['Count'] / len(df_category) * 100).round(2)
            demographic_results[category] = category_responses

        combined_results = pd.concat(demographic_results, names=[demographic, 'Category'])
        combined_results = combined_results.reset_index()

        ranked_results = combined_results.groupby(['Principle', demographic])['Percentage'].mean().reset_index()
        ranked_results = ranked_results.sort_values(by=['Principle', 'Percentage'], ascending=[True, False])

        all_results[demographic] = ranked_results

    return all_results

# Main execution
df = read_and_clean_csv(file_path)
if df is not None:
    print(df.columns)  # Print the columns for debugging
    all_ranked_results = process_data(df)

    # Determine the maximum number of unique demographic options across all demographics
    max_demographic_options = max([len(result[demographic].unique()) for demographic, result in all_ranked_results.items()])

    # Create a color scale with more saturated blues and yellows
    blue_colors = px.colors.sequential.Blues[2:]  # Start from a darker blue
    yellow_colors = px.colors.sequential.YlOrRd[1:]  # Start from a darker yellow
    color_scale = blue_colors[:max_demographic_options//2] + yellow_colors[max_demographic_options//2:]

    for demographic, ranked_results in all_ranked_results.items():
        # --- Create graph for each demographic ---
        fig = px.bar(
            ranked_results,
            x='Principle',  # Principles on the x-axis
            y='Percentage',
            color=demographic,  # Demographic options as colors
            title=f'AI Ethics Principles at Risk by {demographic}',
            labels={'Percentage': 'Average Percentage of Mentions', 'Principle': 'Principle'},
            color_discrete_sequence=color_scale,  # Use the generated color scale
            barmode='group'  # Grouped bar chart
        )

        fig.update_layout(
            xaxis_tickangle=45,
            xaxis_title='Principle',  # X-axis label
            yaxis_title='Average Percentage of Mentions',
            title={
                'y': 0.95,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'
            },
            font=dict(size=12),
            margin=dict(t=150),
            yaxis_range=[0, 100]  # Set y-axis range to 0-100
        )

        # Show the figure in browser
        fig.show(renderer="browser")

Successfully read AI_Study_Accepted_with_replacement_codes.csv.
Index(['Remove? (Yes/No/Maybe)', 'StartDate', 'EndDate', 'Finished',
       'RecordedDate', 'Unnamed: 5', 'ResponseId', 'UserLanguage',
       'Q_RecaptchaScore', 'Q_RelevantIDDuplicate',
       ...
       'C.1 _5', 'C.1 _6', 'C.1 _7', 'C.1 _8', 'C.2', 'C.3.a', 'C.3_1_TEXT',
       'C.3_2_TEXT', 'C.3_3_TEXT', 'PROLIFIC_PID'],
      dtype='object', length=234)


In [None]:
####################
# B.2.5
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = [part.strip() for part in str(response).split(',')]
    result = []
    current_item = ""
    for part in parts:
        if current_item:
            current_item += ", " + part
            if "Biases" in part or "Measures" in part:
                result.append(current_item)
                current_item = ""
        elif "Clean Data" in part:
            current_item = part
        elif "Use or Implement" in part:
            current_item = part
        else:
            result.append(part)
    if current_item:
        result.append(current_item)
    return result

# Step 3: Process the responses using the split_and_merge_responses function
df['B.2.5'] = df['B.2.5'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.2.5'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Mitigation Strategy', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.2.5'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each strategy
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Print quantitative analysis
print("Quantitative Analysis for B.2.5: Strategies to Mitigate Risks Associated with AI Technologies")
print(f"Total valid responses: {total_valid_responses}")

print("\nBreakdown of responses:")
breakdown = ", ".join([f"{row['Mitigation Strategy']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()])
print(breakdown)

# Calculate and print summary statistics
total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
summary_stats = [
    f"Total mentions across all strategies: {total_mentions}",
    f"Average mentions per strategy: {average_mentions:.2f}",
    f"Most common strategy: {all_responses.iloc[0]['Mitigation Strategy']} ({all_responses.iloc[0]['Count']} mentions, {all_responses.iloc[0]['Percentage']}% of respondents)",
    f"Least common strategy: {all_responses.iloc[-1]['Mitigation Strategy']} ({all_responses.iloc[-1]['Count']} mentions, {all_responses.iloc[-1]['Percentage']}% of respondents)"
]
print("\n" + ", ".join(summary_stats))

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Mitigation Strategy', y='Count',
             title='B.2.5 Strategies to Mitigate Risks Associated with AI Technologies',
             labels={'Mitigation Strategy': 'Strategy', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))  # Add percentage text

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Mitigation Strategy', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

In [21]:
import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    # First, protect the specific phrases we want to keep together
    response = response.replace("Clean Data to Remove, Mitigate, or Minimize Biases", "Clean Data to Remove Mitigate or Minimize Biases")
    response = response.replace("Others, please specify", "Others please specify")
    parts = [part.strip() for part in response.split(',')]
    # Now, restore the original phrases
    parts = [part.replace("Clean Data to Remove Mitigate or Minimize Biases", "Clean Data to Remove, Mitigate, or Minimize Biases") for part in parts]
    parts = [part.replace("Others please specify", "Others, please specify") for part in parts]
    return parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.2.5'] = df['B.2.5'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.2.5'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Mitigation Strategy', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.2.5'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each strategy
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Mitigation Strategy', y='Count',
             title='B.2.5 Strategies to Mitigate Risks Associated with AI Technologies',
             labels={'Mitigation Strategy': 'Strategy', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'<b><span style="font-size:12px;">{x}</span></b>'))

# Step 8: Update layout to match the visual style
fig.update_layout(
    xaxis_tickangle=45,
    xaxis_title='Mitigation Strategy',
    yaxis_title='Number of Mentions',
    title={
        'text': 'B.2.5 Strategies to Mitigate Risks Associated with AI Technologies',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    font=dict(size=12),
    margin=dict(t=150),
    yaxis=dict(range=[0, all_responses['Count'].max() * 1.15]),
    coloraxis_showscale=False
)

# Step 9: Update traces to position text just above the bars and ensure uniform text size
fig.update_traces(
    textposition='outside',
    textfont=dict(size=16, family='sans-serif', weight='bold'),
    cliponaxis=False
)

# Step 10: Show the figure
fig.show(renderer="browser")

# Print summary statistics
print(f"\nTotal valid responses: {total_valid_responses}")
print("\nBreakdown of responses:")
for _, row in all_responses.iterrows():
    print(f"{row['Mitigation Strategy']}: {row['Count']} mentions ({row['Percentage']}% of respondents)")

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
print(f"\nSummary Statistics:")
print(f"Total mentions across all strategies: {total_mentions}")
print(f"Average mentions per strategy: {average_mentions:.2f}")
print(f"Most common strategy: {all_responses.iloc[0]['Mitigation Strategy']} ({all_responses.iloc[0]['Count']} mentions, {all_responses.iloc[0]['Percentage']}% of respondents)")
print(f"Least common strategy: {all_responses.iloc[-1]['Mitigation Strategy']} ({all_responses.iloc[-1]['Count']} mentions, {all_responses.iloc[-1]['Percentage']}% of respondents)")


Total valid responses: 300

Breakdown of responses:
Clean Data to Remove, Mitigate, or Minimize Biases: 162 mentions (54.0% of respondents)
Monitor AI System Performance: 158 mentions (52.67% of respondents)
Use AI Testing and Validation: 149 mentions (49.67% of respondents)
Invest in Training and Education: 143 mentions (47.67% of respondents)
Develop Ethical Guidelines: 131 mentions (43.67% of respondents)
Implement Robust Security Measures: 130 mentions (43.33% of respondents)
Conduct Regular Audits and Assessments: 129 mentions (43.0% of respondents)
Use or Implement Privacy Enhancing Tools or Measures: 115 mentions (38.33% of respondents)
Implement Transparent and Explainable Approaches: 93 mentions (31.0% of respondents)
Implement Feedback Mechanisms: 88 mentions (29.33% of respondents)
Adopt AI Governance Frameworks: 79 mentions (26.33% of respondents)
Open Design and Development of Models and Datasets: 78 mentions (26.0% of respondents)
Conduct Ethical and Privacy Impact Asses

In [26]:
import pandas as pd
import plotly.express as px
import os
import re

# Define file path
file_path = 'AI_Study_Accepted_with_replacement_codes.csv'

# Create a folder named 'graphs/b.2.5' if it doesn't exist
if not os.path.exists('graphs/b.2.5'):
    os.makedirs('graphs/b.2.5')

# Demographic columns and their mappings (using column numbers)
demographics = {
    'Location': {
        'column': 28,  # Column number for Location
        'mapping': {
            'North America': 'US',
            'Central/South America': 'Other',
            'EU/UK/EEA': 'Europe',
            'Europe - Outside of EU/UK/EEA': 'Europe',
            'Africa': 'Other',
            'Middle East': 'Other',
            'Asia': 'Other',
            'Australia and Oceania': 'Other',
            'Prefer not to say': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Company Type': {
        'column': 26,  # Column number for Company Type
        'mapping': {
            'Multi-national Corporate': 'Multi-national',
            'Startup/Small Business': 'Startup/Small',
            'Academic Institution/Research Center': 'Academic/Research',
            'Government': 'Government',
            'Individual': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Role': {
        'column': 30,  # Column number for Role
        'mapping': {
            'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
            'AI Manager': 'AI Manager',
            'Requirements Analyst or Engineer': 'Requirements analyst',
            'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
            'AI Engineer or Developer': 'AI developers',
            '(Software) Developer, Designer, or Architect': 'AI developers',
            'Data Scientist or Data Analyst': 'AI developers',
            'Information Security Analyst or Engineer': 'Security/Privacy',
            'Information Privacy Analyst or Engineer': 'Security/Privacy',
            'AI Ethicist': 'Other',
            'AI Researcher': 'AI Researcher',
            '(Software) Quality Assurance Engineer or Tester': 'QA and Maintenance',
            'Other, please specify': 'Other'
        }
    },
    'Education': {
        'column': 21,  # Column number for Education
        'mapping': {
            "High School Degree": "High School Degree",
            "Bachelor's Degree": "Bachelor's Degree",
            "Master's Degree (i.e., MSc., M.A., etc.)": "Graduate Education",
            "MBA (Master of Business Administration)": "Graduate Education",
            "Graduate Certificates": "Graduate Education",
            "Ph.D.": "Ph.D.",
            "Other, please specify": "Other"
        }
    },
    'Dev Experience': {
        'column': 32,  # Column number for Dev Experience
        'mapping': {
            'None': 'None',
            '1-2 Years': '1-2 Years',
            '2-5 Years': '2-5 Years',
            '5-10 Years': '5-10 Years',
            '10+ Years': '10+ Years'
        }
    },
    'Gender': {
        'column': 19,  # Column number for Gender
        'mapping': {  # No predefined mapping for gender; use unique values directly
            'Male': 'Male',
            'Female': 'Female',
            'Non-binary / Third gender': 'Non-binary / Third gender',
            'Prefer not to say': 'Prefer not to say',
            'Other, please specify': 'Other'
        }
    },
    'Company Size': {
        'column': 25,  # Column number for Company Size
        'mapping': {
            '1-5 Employees': '1-5 Employees',
            '6-20 Employees': '6-20 Employees',
            '21-50 Employees': '21-50 Employees',
            '51-100 Employees': '51-100 Employees',
            '101+ Employees': '100+ Employees'
        }
    }
}

def read_and_clean_csv(file_path):
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
        print(f"Successfully read {file_path}.")
        return df
    except UnicodeDecodeError:
        print(f"Failed to read {file_path} with utf-8 encoding.")
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None

def process_data(df):
    # Step 2: Exclude the first two rows
    df = df.iloc[2:]

    # Function to split responses and merge specific categories
    def split_and_merge_responses(response):
        if pd.isna(response):
            return []
        # First, protect the specific phrases we want to keep together
        response = response.replace("Clean Data to Remove, Mitigate, or Minimize Biases", "Clean Data to Remove Mitigate or Minimize Biases")
        response = response.replace("Others, please specify", "Others please specify")
        parts = [part.strip() for part in response.split(',')]
        # Now, restore the original phrases
        parts = [part.replace("Clean Data to Remove Mitigate or Minimize Biases", "Clean Data to Remove, Mitigate, or Minimize Biases") for part in parts]
        parts = [part.replace("Others please specify", "Others, please specify") for part in parts]
        return parts

    # Step 3: Process the responses using the split_and_merge_responses function
    df['B.2.5'] = df['B.2.5'].apply(split_and_merge_responses)

    # Step 4: Flatten the list of responses and count occurrences
    all_responses = df['B.2.5'].explode().dropna().value_counts().reset_index()
    all_responses.columns = ['Mitigation Strategy', 'Count']

    # Step 5: Calculate total number of valid responses (rows with non-empty lists)
    total_valid_responses = df[df['B.2.5'].apply(len) > 0].shape[0]

    # Step 6: Calculate the percentage of rows that mention each strategy
    all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

    # --- Rank demographics by principle ---

    all_results = {}
    for demographic in demographics:
        if demographics[demographic]['mapping']:
            categories = set(demographics[demographic]['mapping'].values())
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']].map(demographics[demographic]['mapping'])
        else:
            categories = df.iloc[:, demographics[demographic]['column']].dropna().unique()
            df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']]

        demographic_results = {}
        for category in categories:
            df_category = df[df['GroupedCategory'] == category]
            category_responses = df_category['B.2.5'].explode().value_counts().reset_index()
            category_responses.columns = ['Mitigation Strategy', 'Count']
            category_responses['Percentage'] = (category_responses['Count'] / len(df_category) * 100).round(2)
            demographic_results[category] = category_responses

        combined_results = pd.concat(demographic_results, names=[demographic, 'Category'])
        combined_results = combined_results.reset_index()

        ranked_results = combined_results.groupby(['Mitigation Strategy', demographic])['Percentage'].mean().reset_index()
        ranked_results = ranked_results.sort_values(by=['Mitigation Strategy', 'Percentage'], ascending=[True, False])

        all_results[demographic] = ranked_results

    return all_results

# Main execution
df = read_and_clean_csv(file_path)
if df is not None:
    print(df.columns)  # Print the columns for debugging
    all_ranked_results = process_data(df)

    # Determine the maximum number of unique demographic options across all demographics
    max_demographic_options = max([len(result[demographic].unique()) for demographic, result in all_ranked_results.items()])

    # Create a color scale with shades of blue and yellow
    blue_colors = px.colors.sequential.Blues[2:]  # Start from a darker blue
    yellow_colors = px.colors.sequential.YlOrRd[1:]  # Start from a darker yellow
    color_scale = blue_colors[:max_demographic_options//2] + yellow_colors[max_demographic_options//2:]

    for demographic, ranked_results in all_ranked_results.items():
        # --- Create graph for each demographic ---
        fig = px.bar(
            ranked_results,
            x='Mitigation Strategy',  # Mitigation strategies on the x-axis
            y='Percentage',
            color=demographic,  # Demographic options as colors
            title=f'Mitigation Strategies by {demographic}',
            labels={'Percentage': 'Average Percentage', 'Mitigation Strategy': 'Strategy'},
            color_discrete_sequence=color_scale,  # Use the generated color scale
            barmode='group'  # Grouped bar chart
        )

        fig.update_layout(
            xaxis_tickangle=45,
            xaxis_title='Mitigation Strategy',  # X-axis label
            yaxis_title='Average Percentage',
            title={
                'y': 0.95,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'
            },
            font=dict(size=12),
            margin=dict(t=150),
            yaxis_range=[0, 100]  # Set y-axis range to 0-100
        )

        # Show the figure in browser
        fig.show(renderer="browser")

        # Print the ranked results
        print(f"\nRanked Results for {demographic}:\n")
        print(ranked_results.to_markdown(index=False, numalign="left", stralign="left"))

Successfully read AI_Study_Accepted_with_replacement_codes.csv.
Index(['Remove? (Yes/No/Maybe)', 'StartDate', 'EndDate', 'Finished',
       'RecordedDate', 'Unnamed: 5', 'ResponseId', 'UserLanguage',
       'Q_RecaptchaScore', 'Q_RelevantIDDuplicate',
       ...
       'C.1 _5', 'C.1 _6', 'C.1 _7', 'C.1 _8', 'C.2', 'C.3.a', 'C.3_1_TEXT',
       'C.3_2_TEXT', 'C.3_3_TEXT', 'PROLIFIC_PID'],
      dtype='object', length=234)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/


Ranked Results for Location:

| Mitigation Strategy                                  | Location   | Percentage   |
|:-----------------------------------------------------|:-----------|:-------------|
| Adopt AI Governance Frameworks                       | Other      | 21.74        |
| Adopt AI Governance Frameworks                       | US         | 20.9         |
| Adopt AI Governance Frameworks                       | Europe     | 13.68        |
| Clean Data to Remove, Mitigate, or Minimize Biases   | Other      | 45.65        |
| Clean Data to Remove, Mitigate, or Minimize Biases   | Europe     | 41.03        |
| Clean Data to Remove, Mitigate, or Minimize Biases   | US         | 35.32        |
| Collaborate with Experts                             | Europe     | 26.5         |
| Collaborate with Experts                             | US         | 14.93        |
| Collaborate with Experts                             | Other      | 11.96        |
| Conduct Ethical and Privacy Impa

In [32]:
import pandas as pd
import os
import re

# Define file path
file_path = 'AI_Study_Accepted.csv'

# Demographic columns and their mappings (using column numbers)
demographics = {
    'Location': {
        'column': 28,  # Column number for Location
        'mapping': {
            'North America': 'US',
            'Central/South America': 'Other',
            'EU/UK/EEA': 'Europe',
            'Europe - Outside of EU/UK/EEA': 'Europe',
            'Africa': 'Other',
            'Middle East': 'Other',
            'Asia': 'Other',
            'Australia and Oceania': 'Other',
            'Prefer not to say': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Company Type': {
        'column': 26,  # Column number for Company Type
        'mapping': {
            'Multi-national Corporate': 'Multi-national',
            'Startup/Small Business': 'Startup/Small',
            'Academic Institution/Research Center': 'Academic/Research',
            'Government': 'Government',
            'Individual': 'Other',
            'Other, please specify': 'Other'
        }
    },
    'Role': {
        'column': 30,  # Column number for Role
        'mapping': {
            'Administrative role (CEO, Chief Technical Officer, Chief Operating Officer, Chief Information Officer)': 'AI Manager',
            'AI Manager': 'AI Manager',
            'Requirements Analyst or Engineer': 'Requirements analyst',
            'Scrum Master, Product Manager, or Project Manager': 'Requirements analyst',
            'AI Engineer or Developer': 'AI developers',
            '(Software) Developer, Designer, or Architect': 'AI developers',
            'Data Scientist or Data Analyst': 'AI developers',
            'Information Security Analyst or Engineer': 'Security/Privacy',
            'Information Privacy Analyst or Engineer': 'Security/Privacy',
            'AI Ethicist': 'Other',
            'AI Researcher': 'AI Researcher',
            '(Software) Quality Assurance Engineer or Tester': 'QA and Maintenance',
            'Other, please specify': 'Other'
        }
    },
    'Education': {
        'column': 21,  # Column number for Education
        'mapping': {
            "High School Degree": "High School Degree",
            "Bachelor's Degree": "Bachelor's Degree",
            "Master's Degree (i.e., MSc., M.A., etc.)": "Graduate Education",
            "MBA (Master of Business Administration)": "Graduate Education",
            "Graduate Certificates": "Graduate Education",
            "Ph.D.": "Ph.D.",
            "Other, please specify": "Other"
        }
    },
    'Dev Experience': {
        'column': 32,  # Column number for Dev Experience
        'mapping': {
            'None': 'None',
            '1-2 Years': '1-2 Years',
            '2-5 Years': '2-5 Years',
            '5-10 Years': '5-10 Years',
            '10+ Years': '10+ Years'
        }
    },
    'Gender': {
        'column': 19,  # Column number for Gender
        'mapping': {  # No predefined mapping for gender; use unique values directly
            'Male': 'Male',
            'Female': 'Female',
            'Non-binary / Third gender': 'Non-binary / Third gender',
            'Prefer not to say': 'Prefer not to say',
            'Other, please specify': 'Other'
        }
    },
    'Company Size': {
        'column': 25,  # Column number for Company Size
        'mapping': {
            '1-5 Employees': '1-5 Employees',
            '6-20 Employees': '6-20 Employees',
            '21-50 Employees': '21-50 Employees',
            '51-100 Employees': '51-100 Employees',
            '101+ Employees': '100+ Employees'
        }
    }
}

def read_and_clean_csv(file_path):
    try:
        df = pd.read_csv(file_path, encoding='utf-8')
        print(f"Successfully read {file_path}.")
        return df
    except UnicodeDecodeError:
        print(f"Failed to read {file_path} with utf-8 encoding.")
        return None
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None

def process_data(df, demographic):
    if demographics[demographic]['mapping']:
        categories = set(demographics[demographic]['mapping'].values())
        df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']].map(demographics[demographic]['mapping'])
    else:
        categories = df.iloc[:, demographics[demographic]['column']].dropna().unique()
        df['GroupedCategory'] = df.iloc[:, demographics[demographic]['column']]

    all_results = {}

    for category in categories:
        df_category = df[df['GroupedCategory'] == category]

        # Step 2: Exclude the first two rows
        df_category = df_category.iloc[2:]

        # Function to split responses and merge specific categories
        def split_and_merge_responses(response):
            if pd.isna(response):
                return []
            response = response.replace("Clean Data to Remove, Mitigate, or Minimize Biases", "Clean Data to Remove Mitigate or Minimize Biases")
            response = response.replace("Others, please specify", "Others please specify")
            parts = [part.strip() for part in response.split(',')]
            parts = [part.replace("Clean Data to Remove Mitigate or Minimize Biases", "Clean Data to Remove, Mitigate, or Minimize Biases") for part in parts]
            parts = [part.replace("Others please specify", "Others, please specify") for part in parts]
            return parts

        # Step 3: Process the responses using the split_and_merge_responses function
        df_category['B.2.5'] = df_category['B.2.5'].apply(split_and_merge_responses)

        # Step 4: Flatten the list of responses and count occurrences
        all_responses = df_category['B.2.5'].explode().dropna().value_counts().reset_index()
        all_responses.columns = ['Mitigation Strategy', 'Count']

        # Step 5: Calculate total number of valid responses (rows with non-empty lists)
        total_valid_responses = df_category[df_category['B.2.5'].apply(len) > 0].shape[0]

        # Step 6: Calculate the percentage of rows that mention each strategy
        all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

        all_results[category] = all_responses

    # Combine and rank results
    combined_results = pd.concat(all_results, names=[demographic, 'Category'])
    combined_results = combined_results.reset_index()

    ranked_results = combined_results.groupby(['Mitigation Strategy', demographic])['Percentage'].mean().reset_index()
    ranked_results = ranked_results.sort_values(by=['Mitigation Strategy', 'Percentage'], ascending=[True, False])

    return ranked_results

# Main execution
df = read_and_clean_csv(file_path)
if df is not None:
    print(df.columns)  # Print the columns for debugging

    for demographic in demographics:
        ranked_results = process_data(df, demographic)

        # --- Print ranked results for all strategies ---
        print(f"\nRanked Results for {demographic}:\n")
        for strategy in ranked_results['Mitigation Strategy'].unique():
            strategy_results = ranked_results[ranked_results['Mitigation Strategy'] == strategy]
            print(f"\nStrategy: {strategy}\n")
            print(strategy_results.to_markdown(index=False, numalign="left", stralign="left"))

        # --- Print overall ranked strategies for the demographic ---
        print(f"\nOverall Ranked Strategies for {demographic}:\n")
        overall_ranking = ranked_results.groupby('Mitigation Strategy')['Percentage'].mean().sort_values(ascending=False)
        for i, (strategy, percentage) in enumerate(overall_ranking.items()):
            print(f"{i+1}. {strategy}: {percentage:.2f}%") 

Successfully read AI_Study_Accepted.csv.
Index(['Remove? (Yes/No/Maybe)', 'StartDate', 'EndDate', 'Finished',
       'RecordedDate', 'Unnamed: 5', 'ResponseId', 'UserLanguage',
       'Q_RecaptchaScore', 'Q_RelevantIDDuplicate',
       ...
       'C.1 _5', 'C.1 _6', 'C.1 _7', 'C.1 _8', 'C.2', 'C.3.1', 'C.3_1_TEXT',
       'C.3_2_TEXT', 'C.3_3_TEXT', 'PROLIFIC_PID'],
      dtype='object', length=234)

Ranked Results for Location:


Strategy: Adopt AI Governance Frameworks

| Mitigation Strategy            | Location   | Percentage   |
|:-------------------------------|:-----------|:-------------|
| Adopt AI Governance Frameworks | Other      | 35.11        |
| Adopt AI Governance Frameworks | US         | 26.95        |
| Adopt AI Governance Frameworks | Europe     | 21.11        |

Strategy: Clean Data to Remove, Mitigate, or Minimize Biases

| Mitigation Strategy                                | Location   | Percentage   |
|:---------------------------------------------------|:-------

In [107]:
####################
# B.9.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.9.1'
df = df[df['B.9.1'].notna()]  # Remove NaNs
df['B.9.1'] = df['B.9.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.9.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.9.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
fig = px.pie(response_counts, names='Response', values='Count',
             title='B.9.1 Effectiveness of AI Ethics Principles Integration',
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Effectiveness Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print("B.9.1 Effectiveness of AI Ethics Principles Integration")

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

# Calculate additional statistics
total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all effectiveness levels: {total_mentions}, ", end="")
print(f"Average mentions per effectiveness level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.9.1 Effectiveness of AI Ethics Principles Integration
Total respondents: 65
Breakdown of responses: Moderately Effectively: 19 responses (29.23%), Extremely Effectively: 19 responses (29.23%), Somewhat Effectively: 17 responses (26.15%), Moderately Ineffectively: 6 responses (9.23%), Extremely Ineffectively: 3 responses (4.62%), Prefer not to say: 1 responses (1.54%)
Total mentions across all effectiveness levels: 65, Average mentions per effectiveness level: 10.83, Most common response: Moderately Effectively (19 mentions, 29.23% of respondents), Least common response: Prefer not to say (1 mentions, 1.54% of respondents)


In [111]:
####################
# B.9.2
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = [part.strip() for part in str(response).split(',')]
    return parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.9.2'] = df['B.9.2'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.9.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Action', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.9.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each action
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Action', y='Count',
             title='B.9.2 Actions Taken to Promote Ethical Practices in AI',
             labels={'Action': 'Action', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Action', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
question_text = "B.9.2 Which of the following actions has your company taken to promote ethical practices in AI? Select all that apply."
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Action']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all actions: {total_mentions}, ", end="")
print(f"Average mentions per action: {average_mentions:.2f}, ", end="")
print(f"Most common action: {most_common['Action']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common action: {least_common['Action']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.9.2 Which of the following actions has your company taken to promote ethical practices in AI? Select all that apply.
Total respondents: 64
Breakdown of responses: Provided AI ethics training for employees: 40 mentions (62.5% of respondents), Implemented ethical guidelines for AI development and deployment: 37 mentions (57.81% of respondents), Established an AI ethics committee: 35 mentions (54.69% of respondents), Engages with external experts on AI ethics: 34 mentions (53.12% of respondents), Regularly audits AI systems for ethical compliance: 32 mentions (50.0% of respondents), Prefer not to say: 3 mentions (4.69% of respondents)
Total mentions across all actions: 181, Average mentions per action: 30.17, Most common action: Provided AI ethics training for employees (40 mentions, 62.5% of respondents), Least common action: Prefer not to say (3 mentions, 4.69% of respondents)


In [109]:
####################
# B.9.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = [part.strip() for part in str(response).split(',')]
    return parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.9.3'] = df['B.9.3'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.9.3'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Responsible Party', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.9.3'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each responsible party
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
fig = px.bar(all_responses, x='Responsible Party', y='Count',
             title='B.9.3 Responsibility for Ensuring AI Ethics Principles are Followed',
             labels={'Responsible Party': 'Responsible Party', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Responsible Party', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
question_text = "B.9.3 Who is primarily responsible for ensuring AI ethics principles are followed in your company? Select all that applies."
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Responsible Party']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all responsible parties: {total_mentions}, ", end="")
print(f"Average mentions per responsible party: {average_mentions:.2f}, ", end="")
print(f"Most common responsible party: {most_common['Responsible Party']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common responsible party: {least_common['Responsible Party']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.9.3 Who is primarily responsible for ensuring AI ethics principles are followed in your company? Select all that applies.
Total respondents: 65
Breakdown of responses: AI Development Team: 43 mentions (66.15% of respondents), Upper Management: 39 mentions (60.0% of respondents), Internal Legal and Compliance Team: 28 mentions (43.08% of respondents), AI Ethics Committee: 26 mentions (40.0% of respondents), Prefer not to say: 1 mentions (1.54% of respondents)
Total mentions across all responsible parties: 137, Average mentions per responsible party: 27.40, Most common responsible party: AI Development Team (43 mentions, 66.15% of respondents), Least common responsible party: Prefer not to say (1 mentions, 1.54% of respondents)


In [110]:
####################
# B.9.4
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.9.4'
df = df[df['B.9.4'].notna()]  # Remove NaNs
df['B.9.4'] = df['B.9.4'].astype(str).apply(lambda x: x.strip())
df = df[df['B.9.4'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.9.4'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
fig = px.pie(response_counts, names='Response', values='Count',
             title='B.9.4 Frequency of AI Ethics Principles Influencing Business Decisions',
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Frequency', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
question_text = "B.9.4 How often do AI ethics principles influence business decisions in your company?"
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all frequency levels: {total_mentions}, ", end="")
print(f"Average mentions per frequency level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.9.4 How often do AI ethics principles influence business decisions in your company?
Total respondents: 64
Breakdown of responses: Often: 30 responses (46.88%), Sometimes: 15 responses (23.44%), Always: 10 responses (15.62%), Never: 6 responses (9.38%), Rarely: 3 responses (4.69%)
Total mentions across all frequency levels: 64, Average mentions per frequency level: 12.80, Most common response: Often (30 mentions, 46.88% of respondents), Least common response: Rarely (3 mentions, 4.69% of respondents)


In [113]:
####################
# B.10.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.10.1'
df = df[df['B.10.1'].notna()]  # Remove NaNs
df['B.10.1'] = df['B.10.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.10.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.10.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.10.1 How often do you include ethical considerations in your AI requirements documentation?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Frequency', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all frequency levels: {total_mentions}, ", end="")
print(f"Average mentions per frequency level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.10.1 How often do you include ethical considerations in your AI requirements documentation?
Total respondents: 35
Breakdown of responses: Often: 12 responses (34.29%), Always: 8 responses (22.86%), Sometimes: 7 responses (20.0%), Rarely: 6 responses (17.14%), Never: 2 responses (5.71%)
Total mentions across all frequency levels: 35, Average mentions per frequency level: 7.00, Most common response: Often (12 mentions, 34.29% of respondents), Least common response: Never (2 mentions, 5.71% of respondents)


In [114]:
####################
# B.10.2
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.10.2'
df = df[df['B.10.2'].notna()]  # Remove NaNs
df['B.10.2'] = df['B.10.2'].astype(str).apply(lambda x: x.strip())
df = df[df['B.10.2'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.10.2'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.10.2 How do ethical requirements impact the overall AI project lifecycle?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Impact', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all impact levels: {total_mentions}, ", end="")
print(f"Average mentions per impact level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.10.2 How do ethical requirements impact the overall AI project lifecycle?
Total respondents: 35
Breakdown of responses: Somewhat enhance the project outcomes: 17 responses (48.57%), Somewhat hinders the project outcomes: 9 responses (25.71%), Significantly enhance the project outcomes: 4 responses (11.43%), No impact at all on the project outcomes: 3 responses (8.57%), Significantly hinders the project outcomes: 1 responses (2.86%), Prefer not to say: 1 responses (2.86%)
Total mentions across all impact levels: 35, Average mentions per impact level: 5.83, Most common response: Somewhat enhance the project outcomes (17 mentions, 48.57% of respondents), Least common response: Prefer not to say (1 mentions, 2.86% of respondents)


In [115]:
####################
# B.10.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = [part.strip() for part in str(response).split(',')]
    return parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.10.3'] = df['B.10.3'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.10.3'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Principle', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.10.3'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each principle
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.10.3 Which AI ethics principles do you prioritize when defining AI requirements? Select all that apply."
fig = px.bar(all_responses, x='Principle', y='Count',
             title=question_text,
             labels={'Principle': 'AI Ethics Principle', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='AI Ethics Principle', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Principle']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all principles: {total_mentions}, ", end="")
print(f"Average mentions per principle: {average_mentions:.2f}, ", end="")
print(f"Most common principle: {most_common['Principle']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common principle: {least_common['Principle']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")####################
# B.10.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = [part.strip() for part in str(response).split(',')]
    return parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.10.3'] = df['B.10.3'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.10.3'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Principle', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.10.3'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each principle
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.10.3 Which AI ethics principles do you prioritize when defining AI requirements? Select all that apply."
fig = px.bar(all_responses, x='Principle', y='Count',
             title=question_text,
             labels={'Principle': 'AI Ethics Principle', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='AI Ethics Principle', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Principle']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all principles: {total_mentions}, ", end="")
print(f"Average mentions per principle: {average_mentions:.2f}, ", end="")
print(f"Most common principle: {most_common['Principle']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common principle: {least_common['Principle']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.10.3 Which AI ethics principles do you prioritize when defining AI requirements? Select all that apply.
Total respondents: 33
Breakdown of responses: Data Protection and Right to Privacy: 21 mentions (63.64% of respondents), Transparency and Explainability of AI Systems: 15 mentions (45.45% of respondents), Harm Prevention and Beneficence: 11 mentions (33.33% of respondents), Non-Discrimination and Freedom of Privileges: 10 mentions (30.3% of respondents), Fairness and Justice: 9 mentions (27.27% of respondents), Respect for Human Rights: 9 mentions (27.27% of respondents), Democracy and Rule of Law: 8 mentions (24.24% of respondents), Accountability and Responsibility: 6 mentions (18.18% of respondents), Environment and Social Responsibility: 4 mentions (12.12% of respondents), All: 4 mentions (12.12% of respondents)
Total mentions across all principles: 97, Average mentions per principle: 9.70, Most common principle: Data Protection and Right to Privacy (21 mentions, 63.64% of resp

In [2]:
####################
# B.11.1
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    # Split by comma, but keep commas within quotes
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    # Clean up each part and merge "Other" with "please specify"
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.11.1'] = df['B.11.1'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.11.1'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Method', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.11.1'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each method
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.11.1 What methods do you use to mitigate biases in AI algorithms? Select all that apply."
fig = px.bar(all_responses, x='Method', y='Count',
             title=question_text,
             labels={'Method': 'Bias Mitigation Method', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer and set the title to bold
fig.update_layout(
    xaxis_tickangle=45,
    xaxis_title='Bias Mitigation Method',
    yaxis_title='Number of Mentions',
    title={
        'text': f"<b>{question_text}</b>",  # Wrap the title in <b> tags to make it bold
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Method']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all methods: {total_mentions}, ", end="")
print(f"Average mentions per method: {average_mentions:.2f}, ", end="")
print(f"Most common method: {most_common['Method']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common method: {least_common['Method']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.11.1 What methods do you use to mitigate biases in AI algorithms? Select all that apply.
Total respondents: 138
Breakdown of responses: Evaluating the results of the models: 83 mentions (60.14% of respondents), Ensure including diverse and representative training data: 67 mentions (48.55% of respondents), Regular bias audits and testing: 61 mentions (44.2% of respondents), Regular data cleaning: 60 mentions (43.48% of respondents), Peer reviews and collaborative development: 58 mentions (42.03% of respondents), User feedback and iterative improvements: 57 mentions (41.3% of respondents), Conducting regular ethics impact assessments: 48 mentions (34.78% of respondents), Identify and examine vulnerable groups in your AI system: 47 mentions (34.06% of respondents), Implementing fairness constraints in models: 46 mentions (33.33% of respondents), Using bias-aware algorithms: 39 mentions (28.26% of respondents), Finetuning decision boundaries: 35 mentions (25.36% of respondents), Prefer n

In [None]:
####################
# B.11.1 by Location
####################

import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows and set the location column
df = df.iloc[2:]
df['Location'] = df.iloc[:, 28]  # Assuming the location is in column 29 (index 28)

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('other, please specify'):
            cleaned_parts.append('Other, please specify')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.11.1'] = df['B.11.1'].apply(split_and_merge_responses)

# Step 4: Create a new dataframe with location and response combinations
location_responses = df.explode('B.11.1')[['Location', 'B.11.1']].dropna()

# Step 5: Calculate percentages for each response within each location
location_percentages = location_responses.groupby('Location')['B.11.1'].value_counts(normalize=True).unstack().fillna(0) * 100

# Step 6: Create a subplot for each location
fig = make_subplots(rows=len(location_percentages), cols=1, 
                    subplot_titles=location_percentages.index,
                    vertical_spacing=0.05)

# Step 7: Add bar traces for each location
for i, location in enumerate(location_percentages.index, start=1):
    fig.add_trace(
        go.Bar(x=location_percentages.columns, y=location_percentages.loc[location],
               name=location, text=location_percentages.loc[location].round(1),
               textposition='outside'),
        row=i, col=1
    )

# Update layout
fig.update_layout(height=400*len(location_percentages), width=1200, 
                  title_text="B.11.1 Methods to Mitigate Biases in AI Algorithms by Location (Percentage)",
                  showlegend=False)

fig.update_xaxes(tickangle=45)
fig.update_yaxes(range=[0, 100])

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print("B.11.1 Methods to Mitigate Biases in AI Algorithms by Location")
print("\nPercentage of responses for each method by location:")
print(location_percentages.round(2).to_string())

print("\nMost common method for each location:")
for location in location_percentages.index:
    most_common = location_percentages.loc[location].idxmax()
    percentage = location_percentages.loc[location, most_common]
    print(f"{location}: {most_common} ({percentage:.2f}%)")

print("\nLeast common method for each location:")
for location in location_percentages.index:
    least_common = location_percentages.loc[location].idxmin()
    percentage = location_percentages.loc[location, least_common]
    print(f"{location}: {least_common} ({percentage:.2f}%)")

In [123]:
####################
# B.11.2
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.11.2'
df = df[df['B.11.2'].notna()]  # Remove NaNs
df['B.11.2'] = df['B.11.2'].astype(str).apply(lambda x: x.strip())
df = df[df['B.11.2'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.11.2'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.11.2 How important is it to you that the AI systems you develop are transparent and explainable?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Importance Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all importance levels: {total_mentions}, ", end="")
print(f"Average mentions per importance level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.11.2 How important is it to you that the AI systems you develop are transparent and explainable?
Total respondents: 140
Breakdown of responses: Extremely Important: 73 responses (52.14%), Moderately Important: 33 responses (23.57%), Somewhat Important: 22 responses (15.71%), Not Very Important: 7 responses (5.0%), Not At All Important: 5 responses (3.57%)
Total mentions across all importance levels: 140, Average mentions per importance level: 28.00, Most common response: Extremely Important (73 mentions, 52.14% of respondents), Least common response: Not At All Important (5 mentions, 3.57% of respondents)


In [124]:
####################
# B.11.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.11.3'
df = df[df['B.11.3'].notna()]  # Remove NaNs
df['B.11.3'] = df['B.11.3'].astype(str).apply(lambda x: x.strip())
df = df[df['B.11.3'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.11.3'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.11.3 How confident are you in making ethical decisions during AI development?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Confidence Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all confidence levels: {total_mentions}, ", end="")
print(f"Average mentions per confidence level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.11.3 How confident are you in making ethical decisions during AI development?
Total respondents: 140
Breakdown of responses: Moderately Confident: 54 responses (38.57%), Extremely Confident: 36 responses (25.71%), Somewhat Confident: 32 responses (22.86%), Not Very Confident: 11 responses (7.86%), Not At All Confident: 6 responses (4.29%), Prefer not to say: 1 responses (0.71%)
Total mentions across all confidence levels: 140, Average mentions per confidence level: 23.33, Most common response: Moderately Confident (54 mentions, 38.57% of respondents), Least common response: Prefer not to say (1 mentions, 0.71% of respondents)


In [23]:
####################
# B.11.4
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.11.4'] = df['B.11.4'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.11.4'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Resource', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.11.4'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each resource
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "AD: What types of training or resources would help you better integrate AI ethics<br>into your development work?"
fig = px.bar(all_responses, x='Resource', y='Count',
             labels={'Resource': 'Training or Resource Type', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'<b>{x:.1f}%</b>'))

# Update layout to make the chart clearer and set the title to bold
fig.update_layout(
    xaxis_tickangle=45,
    xaxis_title='Training or Resource Type',
    yaxis_title='Number of Mentions',
    title={
        'text': f"<b>{question_text}</b>",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    font=dict(size=12)
)

# Update traces to make percentage text bold and bigger
fig.update_traces(textposition='inside', textfont=dict(size=14))

# Step 8: Show the figure
fig.show(renderer="browser")

# ... (rest of the code remains the same)

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Resource']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all resource types: {total_mentions}, ", end="")
print(f"Average mentions per resource type: {average_responses:.2f}, ", end="")
print(f"Most common resource: {most_common['Resource']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common resource: {least_common['Resource']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

AD: What types of training or resources would help you better integrate AI ethics<br>into your development work?
Total respondents: 139
Breakdown of responses: Access to ethical guidelines and best practices: 82 mentions (58.99% of respondents), Collaboration with AI ethicists and legal experts: 78 mentions (56.12% of respondents), Regular AI ethics workshops and training: 74 mentions (53.24% of respondents), Case studies of AI ethics implementation: 72 mentions (51.8% of respondents), Regular AI ethics reviews and feedback sessions: 66 mentions (47.48% of respondents), Support for continuous education related to AI ethics: 55 mentions (39.57% of respondents), Prefer not to say: 3 mentions (2.16% of respondents), Other: 1 mentions (0.72% of respondents)
Total mentions across all resource types: 431, Average mentions per resource type: 23.18, Most common resource: Access to ethical guidelines and best practices (82 mentions, 58.99% of respondents), Least common resource: Other (1 mentio

In [126]:
####################
# B.12.1
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    # Split by comma, but keep commas within quotes
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    # Clean up each part and merge "Other" with "please specify"
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.12.1'] = df['B.12.1'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.12.1'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Technique', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.12.1'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each technique
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.12.1 What techniques do you use to identify biases in AI systems? Select all that apply."
fig = px.bar(all_responses, x='Technique', y='Count',
             title=question_text,
             labels={'Technique': 'Bias Identification Technique', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Bias Identification Technique', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Technique']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all techniques: {total_mentions}, ", end="")
print(f"Average mentions per technique: {average_mentions:.2f}, ", end="")
print(f"Most common technique: {most_common['Technique']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common technique: {least_common['Technique']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.12.1 What techniques do you use to identify biases in AI systems? Select all that apply.
Total respondents: 15
Breakdown of responses: Identifying and testing results for vulnerable groups in your AI system: 10 mentions (66.67% of respondents), User testing with diverse groups: 9 mentions (60.0% of respondents), Reviewing training data for diversity: 8 mentions (53.33% of respondents), Regular bias audits and testing: 7 mentions (46.67% of respondents), Analyzing model outputs for discriminatory patterns: 7 mentions (46.67% of respondents), Bias detection tools and software: 6 mentions (40.0% of respondents), Peer reviews and collaborative development: 4 mentions (26.67% of respondents), Evaluating the correct implementation of fairness constraints in models: 3 mentions (20.0% of respondents), Prefer not to say: 2 mentions (13.33% of respondents)
Total mentions across all techniques: 56, Average mentions per technique: 6.22, Most common technique: Identifying and testing results for 

In [127]:
####################
# B.12.2
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.12.2'
df = df[df['B.12.2'].notna()]  # Remove NaNs
df['B.12.2'] = df['B.12.2'].astype(str).apply(lambda x: x.strip())
df = df[df['B.12.2'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.12.2'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.12.2 How important is training on AI ethics for your role as QA or tester?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Importance Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all importance levels: {total_mentions}, ", end="")
print(f"Average mentions per importance level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.12.2 How important is training on AI ethics for your role as QA or tester?
Total respondents: 15
Breakdown of responses: Extremely Important: 6 responses (40.0%), Somewhat Important: 5 responses (33.33%), Not Very Important: 2 responses (13.33%), Moderately Important: 2 responses (13.33%)
Total mentions across all importance levels: 15, Average mentions per importance level: 3.75, Most common response: Extremely Important (6 mentions, 40.0% of respondents), Least common response: Moderately Important (2 mentions, 13.33% of respondents)


In [129]:
####################
# B.12.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.12.3'
df = df[df['B.12.3'].notna()]  # Remove NaNs
df['B.12.3'] = df['B.12.3'].astype(str).apply(lambda x: x.strip())
df = df[df['B.12.3'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.12.3'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.12.3 How confident are you in making ethical decisions during AI development?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Confidence Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all confidence levels: {total_mentions}, ", end="")
print(f"Average mentions per confidence level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.12.3 How confident are you in making ethical decisions during AI development?
Total respondents: 15
Breakdown of responses: Somewhat Confident: 6 responses (40.0%), Moderately Confident: 5 responses (33.33%), Extremely Confident: 4 responses (26.67%)
Total mentions across all confidence levels: 15, Average mentions per confidence level: 5.00, Most common response: Somewhat Confident (6 mentions, 40.0% of respondents), Least common response: Extremely Confident (4 mentions, 26.67% of respondents)


In [130]:
####################
# B.12.4
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    # Split by comma, but keep commas within quotes
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    # Clean up each part and merge "Other" with "please specify"
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.12.4'] = df['B.12.4'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.12.4'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Method', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.12.4'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each method
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.12.4 How do you ensure data privacy is maintained during AI testing? Select all that apply."
fig = px.bar(all_responses, x='Method', y='Count',
             title=question_text,
             labels={'Method': 'Privacy Maintenance Method', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Privacy Maintenance Method', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Method']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all methods: {total_mentions}, ", end="")
print(f"Average mentions per method: {average_mentions:.2f}, ", end="")
print(f"Most common method: {most_common['Method']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common method: {least_common['Method']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.12.4 How do you ensure data privacy is maintained during AI testing? Select all that apply.
Total respondents: 15
Breakdown of responses: Using secure testing environments: 12 mentions (80.0% of respondents), Conducting privacy impact assessments: 7 mentions (46.67% of respondents), Anonymizing test data: 7 mentions (46.67% of respondents), Regularly updating privacy policies: 6 mentions (40.0% of respondents), Implementing access controls: 6 mentions (40.0% of respondents), Use of synthetic data: 2 mentions (13.33% of respondents), Prefer not to say: 1 mentions (6.67% of respondents)
Total mentions across all methods: 41, Average mentions per method: 5.86, Most common method: Using secure testing environments (12 mentions, 80.0% of respondents), Least common method: Prefer not to say (1 mentions, 6.67% of respondents)
