In [3]:
import os 

# Move to the survey results directory 
data_dir = '../../data'
os.chdir(data_dir)

ai_study = "AI_Study_Finalized.csv"
track_a_file = "track_a.csv"
track_b_file = "track_b.csv"



In [1]:
####################
# B.13.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.13.1'
df = df[df['B.13.1'].notna()]  # Remove NaNs
df['B.13.1'] = df['B.13.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.13.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.13.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.13.1 How do you believe AI technologies impact data privacy within your company?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Impact on Data Privacy', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all impact levels: {total_mentions}, ", end="")
print(f"Average mentions per impact level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.13.1 How do you believe AI technologies impact data privacy within your company?
Total respondents: 14
Breakdown of responses: Somewhat compromise data privacy: 5 responses (35.71%), Somewhat improve data privacy: 4 responses (28.57%), No impact on data privacy at all: 2 responses (14.29%), Significantly improve data privacy: 2 responses (14.29%), Significantly compromise data privacy: 1 responses (7.14%)
Total mentions across all impact levels: 14, Average mentions per impact level: 2.80, Most common response: Somewhat compromise data privacy (5 mentions, 35.71% of respondents), Least common response: Significantly compromise data privacy (1 mentions, 7.14% of respondents)


In [3]:
####################
# B.13.2
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    # Split by comma, but keep commas within quotes and within the PETs option
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)(?=(?:[^(]*\([^)]*\))*[^()]*$)', response)
    # Clean up each part and handle special cases
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part.startswith('Integrating Privacy Enhancing Technologies'):
            cleaned_parts.append('Integrating Privacy Enhancing Technologies (PETs)')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.13.2'] = df['B.13.2'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.13.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Measure', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.13.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each measure
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.13.2 Which measures has your company implemented to mitigate AI-related security or privacy risks? Select all that apply."
fig = px.bar(all_responses, x='Measure', y='Count',
             title=question_text,
             labels={'Measure': 'Security/Privacy Measure', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Security/Privacy Measure', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Measure']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all measures: {total_mentions}, ", end="")
print(f"Average mentions per measure: {average_mentions:.2f}, ", end="")
print(f"Most common measure: {most_common['Measure']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common measure: {least_common['Measure']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.13.2 Which measures has your company implemented to mitigate AI-related security or privacy risks? Select all that apply.
Total respondents: 13
Breakdown of responses: Access controls and user authentication: 12 mentions (92.31% of respondents), Encryption of data used in AI models: 8 mentions (61.54% of respondents), Incident response plans specific to AI security/privacy breaches: 6 mentions (46.15% of respondents), Regular security and/or privacy audits of AI systems: 6 mentions (46.15% of respondents), Collaboration with AI developers to identify security/privacy risks: 5 mentions (38.46% of respondents), Integrating Privacy Enhancing Technologies (PETs): 4 mentions (30.77% of respondents), Prefer not to say: 1 mentions (7.69% of respondents)
Total mentions across all measures: 42, Average mentions per measure: 6.00, Most common measure: Access controls and user authentication (12 mentions, 92.31% of respondents), Least common measure: Prefer not to say (1 mentions, 7.69% of resp

In [4]:
####################
# B.13.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.13.3'
df = df[df['B.13.3'].notna()]
df['B.13.3'] = df['B.13.3'].astype(str).apply(lambda x: x.strip())
df = df[df['B.13.3'] != ""]

# Step 4: Count occurrences of each response
response_counts = df['B.13.3'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize with a Pie Chart
question_text = "B.13.3 How effectively does your company ensure compliance with data privacy regulations when using AI?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Effectiveness Level', 'Count': 'Number of Responses'})

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show(renderer="browser")

# Step 8: Generate a statistical report
print(question_text)
print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total responses: {total_mentions}, Average responses per option: {average_mentions:.2f}")
print(f"Most common: {most_common['Response']} ({most_common['Count']} responses, {most_common['Percentage']}% of respondents)")
print(f"Least common: {least_common['Response']} ({least_common['Count']} responses, {least_common['Percentage']}% of respondents)")

B.13.3 How effectively does your company ensure compliance with data privacy regulations when using AI?
Total respondents: 14
Breakdown of responses: Extremely Effectively: 6 responses (42.86%), Moderately Effectively: 6 responses (42.86%), Prefer not to say: 2 responses (14.29%)
Total responses: 14, Average responses per option: 4.67
Most common: Extremely Effectively (6 responses, 42.86% of respondents)
Least common: Prefer not to say (2 responses, 14.29% of respondents)


In [5]:
####################
# B.13.4
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.13.4'
df = df[df['B.13.4'].notna()]  # Remove NaNs
df['B.13.4'] = df['B.13.4'].astype(str).apply(lambda x: x.strip())
df = df[df['B.13.4'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.13.4'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.13.4 How involved are you in integrating privacy and security principles into the AI development process?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Involvement Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all involvement levels: {total_mentions}, ", end="")
print(f"Average mentions per involvement level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.13.4 How involved are you in integrating privacy and security principles into the AI development process?
Total respondents: 14
Breakdown of responses: Somewhat Involved: 5 responses (35.71%), Very Involved: 5 responses (35.71%), Rarely Involved: 2 responses (14.29%), Neutral: 1 responses (7.14%), Prefer not to say: 1 responses (7.14%)
Total mentions across all involvement levels: 14, Average mentions per involvement level: 2.80, Most common response: Somewhat Involved (5 mentions, 35.71% of respondents), Least common response: Prefer not to say (1 mentions, 7.14% of respondents)


In [6]:
####################
# B.14.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.14.1'
df = df[df['B.14.1'].notna()]  # Remove NaNs
df['B.14.1'] = df['B.14.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.14.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.14.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.14.1 How effectively do you believe your company implements AI ethics principles in its projects?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Effectiveness Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all effectiveness levels: {total_mentions}, ", end="")
print(f"Average mentions per effectiveness level: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.14.1 How effectively do you believe your company implements AI ethics principles in its projects?
Total respondents: 3
Breakdown of responses: Moderately Effectively: 2 responses (66.67%), Extremely Ineffectively: 1 responses (33.33%)
Total mentions across all effectiveness levels: 3, Average mentions per effectiveness level: 1.50, Most common response: Moderately Effectively (2 mentions, 66.67% of respondents), Least common response: Extremely Ineffectively (1 mentions, 33.33% of respondents)


In [7]:
####################
# B.14.2
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.14.2'] = df['B.14.2'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.14.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Action', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.14.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each action
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.14.2 Which of the following actions has your company taken to raise ethical awareness in your company? Select all that apply."
fig = px.bar(all_responses, x='Action', y='Count',
             title=question_text,
             labels={'Action': 'Ethical Awareness Action', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Ethical Awareness Action', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Action']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all actions: {total_mentions}, ", end="")
print(f"Average mentions per action: {average_mentions:.2f}, ", end="")
print(f"Most common action: {most_common['Action']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common action: {least_common['Action']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.14.2 Which of the following actions has your company taken to raise ethical awareness in your company? Select all that apply.
Total respondents: 3
Breakdown of responses: Promote AI ethicists' roles in the company: 2 mentions (66.67% of respondents), Uses case studies to show ethical dilemmas: 2 mentions (66.67% of respondents), Creates and distributes ethical guidelines: 1 mentions (33.33% of respondents), Encourages ongoing education and certification in AI ethics: 1 mentions (33.33% of respondents), Integrates AI ethics discussions in meetings: 1 mentions (33.33% of respondents), Provides regular AI ethics training for employees: 1 mentions (33.33% of respondents)
Total mentions across all actions: 8, Average mentions per action: 1.33, Most common action: Promote AI ethicists' roles in the company (2 mentions, 66.67% of respondents), Least common action: Provides regular AI ethics training for employees (1 mentions, 33.33% of respondents)


In [8]:
####################
# B.16.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.16.1'
df = df[df['B.16.1'].notna()]  # Remove NaNs
df['B.16.1'] = df['B.16.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.16.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.16.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.16.1 How often do you incorporate AI ethics principles in your research methodology?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Frequency', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total mentions across all frequencies: {total_mentions}, ", end="")
print(f"Average mentions per frequency: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.16.1 How often do you incorporate AI ethics principles in your research methodology?
Total respondents: 22
Breakdown of responses: Often: 9 responses (40.91%), Always: 8 responses (36.36%), Sometimes: 2 responses (9.09%), Rarely: 2 responses (9.09%), Never: 1 responses (4.55%)
Total mentions across all frequencies: 22, Average mentions per frequency: 4.40, Most common response: Often (9 mentions, 40.91% of respondents), Least common response: Never (1 mentions, 4.55% of respondents)


In [9]:
####################
# B.16.2
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('please specify'):
            cleaned_parts.append('Other')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.16.2'] = df['B.16.2'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.16.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Source', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.16.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each source
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.16.2 Where do you primarily seek guidance for AI ethics in your research? Select all that apply."
fig = px.bar(all_responses, x='Source', y='Count',
             title=question_text,
             labels={'Source': 'Guidance Source', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Guidance Source', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Source']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all sources: {total_mentions}, ", end="")
print(f"Average mentions per source: {average_mentions:.2f}, ", end="")
print(f"Most common source: {most_common['Source']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common source: {least_common['Source']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.16.2 Where do you primarily seek guidance for AI ethics in your research? Select all that apply.
Total respondents: 22
Breakdown of responses: Institutional guidelines and policies: 14 mentions (63.64% of respondents), Personal judgement and experience: 13 mentions (59.09% of respondents), Academic literature and journals: 11 mentions (50.0% of respondents), Professional organizations (e.g.: 9 mentions (40.91% of respondents), AAAI: 9 mentions (40.91% of respondents), ACM: 9 mentions (40.91% of respondents), ACL: 9 mentions (40.91% of respondents), IEEE: 9 mentions (40.91% of respondents), etc.): 9 mentions (40.91% of respondents), AI ethics workshops and conferences: 7 mentions (31.82% of respondents), Other: 1 mentions (4.55% of respondents), Prefer not to say: 1 mentions (4.55% of respondents)
Total mentions across all sources: 101, Average mentions per source: 8.42, Most common source: Institutional guidelines and policies (14 mentions, 63.64% of respondents), Least common source

In [12]:
####################
# B.16.2
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    # Split by comma, but keep commas within quotes and parentheses
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)(?=(?:[^(]*\([^)]*\))*[^()]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower().startswith('professional organizations'):
            cleaned_parts.append('Professional organizations (e.g., ACM, IEEE, ACL, AAAI, etc.)')
        elif part.lower().startswith('other, please specify'):
            cleaned_parts.append('Other, please specify')
        elif part and not part.lower() == 'other':
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.16.2'] = df['B.16.2'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.16.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Source', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.16.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each source
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.16.2 Where do you primarily seek guidance for AI ethics in your research? Select all that apply."
fig = px.bar(all_responses, x='Source', y='Count',
             title=question_text,
             labels={'Source': 'Guidance Source', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Guidance Source', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Source']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all sources: {total_mentions}, ", end="")
print(f"Average mentions per source: {average_mentions:.2f}, ", end="")
print(f"Most common source: {most_common['Source']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common source: {least_common['Source']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.16.2 Where do you primarily seek guidance for AI ethics in your research? Select all that apply.
Total respondents: 22
Breakdown of responses: Institutional guidelines and policies: 14 mentions (63.64% of respondents), Personal judgement and experience: 13 mentions (59.09% of respondents), Academic literature and journals: 11 mentions (50.0% of respondents), Professional organizations (e.g., ACM, IEEE, ACL, AAAI, etc.): 9 mentions (40.91% of respondents), AI ethics workshops and conferences: 7 mentions (31.82% of respondents), please specify: 1 mentions (4.55% of respondents), Prefer not to say: 1 mentions (4.55% of respondents)
Total mentions across all sources: 56, Average mentions per source: 8.00, Most common source: Institutional guidelines and policies (14 mentions, 63.64% of respondents), Least common source: Prefer not to say (1 mentions, 4.55% of respondents)


In [13]:
####################
# B.16.4
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('other, please specify'):
            cleaned_parts.append('Other, please specify')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.16.4'] = df['B.16.4'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.16.4'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Method', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.16.4'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each method
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.16.4 How do you monitor adherence to the ethics statements in your research? Select all that apply."
fig = px.bar(all_responses, x='Method', y='Count',
             title=question_text,
             labels={'Method': 'Monitoring Method', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Monitoring Method', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Method']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all methods: {total_mentions}, ", end="")
print(f"Average mentions per method: {average_mentions:.2f}, ", end="")
print(f"Most common method: {most_common['Method']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common method: {least_common['Method']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.16.4 How do you monitor adherence to the ethics statements in your research? Select all that apply.
Total respondents: 22
Breakdown of responses: Regular peer reviews: 13 mentions (59.09% of respondents), Feedback from ethics committees: 12 mentions (54.55% of respondents), Internal audits and assessments: 12 mentions (54.55% of respondents), Testing the results of the models: 11 mentions (50.0% of respondents), Relying on personal or team accountability: 11 mentions (50.0% of respondents), Reviewing training data for any potential biases: 9 mentions (40.91% of respondents), Using checklists and guidelines: 8 mentions (36.36% of respondents), please specify: 1 mentions (4.55% of respondents)
Total mentions across all methods: 77, Average mentions per method: 9.62, Most common method: Regular peer reviews (13 mentions, 59.09% of respondents), Least common method: please specify (1 mentions, 4.55% of respondents)


In [14]:
####################
# B.16.5
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('other, please specify'):
            cleaned_parts.append('Other, please specify')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.16.5'] = df['B.16.5'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.16.5'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Challenge', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.16.5'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each challenge
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.16.5 What challenges do you encounter when writing ethics statements for your research papers? Select all that apply."
fig = px.bar(all_responses, x='Challenge', y='Count',
             title=question_text,
             labels={'Challenge': 'Ethical Challenge', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Ethical Challenge', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Challenge']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all challenges: {total_mentions}, ", end="")
print(f"Average mentions per challenge: {average_mentions:.2f}, ", end="")
print(f"Most common challenge: {most_common['Challenge']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common challenge: {least_common['Challenge']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.16.5 What challenges do you encounter when writing ethics statements for your research papers? Select all that apply.
Total respondents: 20
Breakdown of responses: Difficulty in addressing all ethical aspects: 12 mentions (60.0% of respondents), Lack of clear guidelines: 10 mentions (50.0% of respondents), Balancing ethical considerations with research goals: 8 mentions (40.0% of respondents), Limited understanding of ethical principles: 7 mentions (35.0% of respondents), Time constraints: 7 mentions (35.0% of respondents), please specify: 2 mentions (10.0% of respondents), Prefer not to say: 1 mentions (5.0% of respondents)
Total mentions across all challenges: 47, Average mentions per challenge: 6.71, Most common challenge: Difficulty in addressing all ethical aspects (12 mentions, 60.0% of respondents), Least common challenge: Prefer not to say (1 mentions, 5.0% of respondents)


In [15]:
####################
# B.17.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.17.1'
df = df[df['B.17.1'].notna()]  # Remove NaNs
df['B.17.1'] = df['B.17.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.17.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.17.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.17.1 Do you or your company collect, curate, and manage data for training purposes?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Answer', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total responses: {total_mentions}, ", end="")
print(f"Average responses per option: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.17.1 Do you or your company collect, curate, and manage data for training purposes?
Total respondents: 229
Breakdown of responses: Yes: 178 responses (77.73%), No: 24 responses (10.48%), Unsure: 18 responses (7.86%), Prefer not to say: 9 responses (3.93%)
Total responses: 229, Average responses per option: 57.25, Most common response: Yes (178 mentions, 77.73% of respondents), Least common response: Prefer not to say (9 mentions, 3.93% of respondents)


In [16]:
####################
# B.17.2
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.17.2'
df = df[df['B.17.2'].notna()]  # Remove NaNs
df['B.17.2'] = df['B.17.2'].astype(str).apply(lambda x: x.strip())
df = df[df['B.17.2'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.17.2'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.17.2 Do you train your models in-house?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Answer', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total responses: {total_mentions}, ", end="")
print(f"Average responses per option: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.17.2 Do you train your models in-house?
Total respondents: 178
Breakdown of responses: Yes: 149 responses (83.71%), No: 18 responses (10.11%), Prefer not to say: 6 responses (3.37%), Unsure: 5 responses (2.81%)
Total responses: 178, Average responses per option: 44.50, Most common response: Yes (149 mentions, 83.71% of respondents), Least common response: Unsure (5 mentions, 2.81% of respondents)


In [17]:
####################
# B.17.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.17.3'
df = df[df['B.17.3'].notna()]  # Remove NaNs
df['B.17.3'] = df['B.17.3'].astype(str).apply(lambda x: x.strip())
df = df[df['B.17.3'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.17.3'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.17.3 How much resources do you need to train these models?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Resource Level', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total responses: {total_mentions}, ", end="")
print(f"Average responses per option: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.17.3 How much resources do you need to train these models?
Total respondents: 178
Breakdown of responses: Significant Resources: 74 responses (41.57%), Moderate Resources: 47 responses (26.4%), Extensive Resources: 38 responses (21.35%), Unsure: 8 responses (4.49%), Prefer not to say: 7 responses (3.93%), Minimal Resources: 4 responses (2.25%)
Total responses: 178, Average responses per option: 29.67, Most common response: Significant Resources (74 mentions, 41.57% of respondents), Least common response: Minimal Resources (4 mentions, 2.25% of respondents)


In [19]:
####################
# B.18.1
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'B.18.1'
df = df[df['B.18.1'].notna()]  # Remove NaNs
df['B.18.1'] = df['B.18.1'].astype(str).apply(lambda x: x.strip())
df = df[df['B.18.1'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['B.18.1'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "B.18.1 Do your AI models use personal data for training or testing AI systems?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.sequential.RdBu,
             labels={'Response': 'Answer', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total responses: {total_mentions}, ", end="")
print(f"Average responses per option: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.18.1 Do your AI models use personal data for training or testing AI systems?
Total respondents: 229
Breakdown of responses: Yes: 101 responses (44.1%), No: 81 responses (35.37%), Unsure: 31 responses (13.54%), Prefer not to say: 16 responses (6.99%)
Total responses: 229, Average responses per option: 57.25, Most common response: Yes (101 mentions, 44.1% of respondents), Least common response: Prefer not to say (16 mentions, 6.99% of respondents)


In [20]:
####################
# B.18.2
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('other, please specify'):
            cleaned_parts.append('Other, please specify')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.18.2'] = df['B.18.2'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.18.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Method', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.18.2'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each method
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.18.2 How do you ensure their security and privacy while the data is at rest? Select all that apply."
fig = px.bar(all_responses, x='Method', y='Count',
             title=question_text,
             labels={'Method': 'Security/Privacy Method', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Security/Privacy Method', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Method']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all methods: {total_mentions}, ", end="")
print(f"Average mentions per method: {average_mentions:.2f}, ", end="")
print(f"Most common method: {most_common['Method']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common method: {least_common['Method']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.18.2 How do you ensure their security and privacy while the data is at rest? Select all that apply.
Total respondents: 101
Breakdown of responses: Use of Encryption: 62 mentions (61.39% of respondents), Access Control: 54 mentions (53.47% of respondents), Data Masking and Anonymization: 51 mentions (50.5% of respondents), Multi-Factor Authentication: 50 mentions (49.5% of respondents), Security Storage Solutions: 49 mentions (48.51% of respondents), Regular Audits and Monitoring: 48 mentions (47.52% of respondents), Data Retention Policies: 42 mentions (41.58% of respondents), Following Privacy Regulations and Standards: 40 mentions (39.6% of respondents), Backup and Recovery Protocols: 38 mentions (37.62% of respondents), Physical Security Measures: 37 mentions (36.63% of respondents), Prefer not to say: 2 mentions (1.98% of respondents), please specify: 1 mentions (0.99% of respondents)
Total mentions across all methods: 474, Average mentions per method: 39.50, Most common method: 

In [21]:
####################
# B.18.3
####################

import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to split responses and merge specific categories
def split_and_merge_responses(response):
    if pd.isna(response):
        return []
    parts = re.split(r',\s*(?=(?:[^"]*"[^"]*")*[^"]*$)', response)
    cleaned_parts = []
    for part in parts:
        part = part.strip().strip('"')
        if part.lower() == 'other':
            continue
        elif part.lower().startswith('other, please specify'):
            cleaned_parts.append('Other, please specify')
        elif part:
            cleaned_parts.append(part)
    return cleaned_parts

# Step 3: Process the responses using the split_and_merge_responses function
df['B.18.3'] = df['B.18.3'].apply(split_and_merge_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['B.18.3'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Method', 'Count']

# Step 5: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['B.18.3'].apply(len) > 0].shape[0]

# Step 6: Calculate the percentage of rows that mention each method
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Bar Chart using Plotly
question_text = "B.18.3 How do you ensure the data remains private and does not get leaked? Select all that apply."
fig = px.bar(all_responses, x='Method', y='Count',
             title=question_text,
             labels={'Method': 'Privacy/Security Method', 'Count': 'Number of Mentions'},
             color='Count',
             color_continuous_scale='Blues',
             text=all_responses['Percentage'].apply(lambda x: f'{x}% of respondents'))

# Update layout to make the chart clearer
fig.update_layout(xaxis_tickangle=45, xaxis_title='Privacy/Security Method', yaxis_title='Number of Mentions')

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Method']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all methods: {total_mentions}, ", end="")
print(f"Average mentions per method: {average_mentions:.2f}, ", end="")
print(f"Most common method: {most_common['Method']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common method: {least_common['Method']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

B.18.3 How do you ensure the data remains private and does not get leaked? Select all that apply.
Total respondents: 101
Breakdown of responses: Use of Encryption: 61 mentions (60.4% of respondents), Data Masking and Anonymization: 55 mentions (54.46% of respondents), Employee Training and Awareness: 51 mentions (50.5% of respondents), Multi-Factor Authentication: 50 mentions (49.5% of respondents), Use of Access Control: 49 mentions (48.51% of respondents), Intrusion Detection Systems (IDS) and Intrusion Prevention Systems (IPS): 47 mentions (46.53% of respondents), Network Security Measures: 46 mentions (45.54% of respondents), Data Loss Prevention (DLP) Tools: 46 mentions (45.54% of respondents), Use of Differential Privacy Algorithms: 45 mentions (44.55% of respondents), Regular Security and Privacy Audits: 45 mentions (44.55% of respondents), Secure Backup and Recovery Procedures: 44 mentions (43.56% of respondents), Following Privacy Regulations and Standards: 37 mentions (36.63%

In [1]:
import pandas as pd
import plotly.express as px
import re

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Function to process and clean responses
def clean_responses(response):
    if pd.isna(response):
        return []
    cleaned_response = response.strip()
    if cleaned_response.lower() == 'other':
        return []
    return [cleaned_response] if cleaned_response else []

# Function to truncate labels at the first occurrence of '-'
def truncate_label(label):
    return label.split('-')[0].strip()  # Split at '-' and take the first part

# Step 3: Process the responses using the clean_responses function
df['C.2'] = df['C.2'].apply(clean_responses)

# Step 4: Flatten the list of responses and count occurrences
all_responses = df['C.2'].explode().dropna().value_counts().reset_index()
all_responses.columns = ['Response', 'Count']

# Step 5: Truncate the responses to only show the part before the '-' 
all_responses['Truncated Response'] = all_responses['Response'].apply(truncate_label)

# Step 6: Calculate total number of valid responses (rows with non-empty lists)
total_valid_responses = df[df['C.2'].apply(len) > 0].shape[0]

# Step 7: Calculate the percentage of rows that mention each response
all_responses['Percentage'] = (all_responses['Count'] / total_valid_responses * 100).round(2)

# Step 8: Visualize the results with a Bar Chart using Plotly
question_text = ("C.2 How would you perceive the impact of AI regulations in shaping the "
                 "future of technology in your field?")

# Adding line breaks in the title to ensure it splits across multiple lines if needed
formatted_question_text = ("C.2 How would you perceive the impact of AI regulations<br>"
                           "in shaping the future of technology in your field?")

fig = px.bar(all_responses, x='Truncated Response', y='Count',
             labels={'Truncated Response': 'Perceived Impact', 'Count': 'Number of Mentions'},
             color='Count',  # Use color based on Count
             color_continuous_scale='Blues',  # Keep the blue color gradient
             text=all_responses['Percentage'].apply(lambda x: f'<b>{x:.1f}%</b>'))  # Bold and formatted percentage text

# Update layout to make the chart clearer and set the title to split across multiple lines
fig.update_layout(
    xaxis_tickangle=45,
    xaxis_title='Perceived Impact',
    yaxis_title='Number of Mentions',
    title={
        'text': f"<b>{formatted_question_text}</b>",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    font=dict(size=12),
    margin=dict(t=150),  # Increase top margin to create more space between the title and the graph
    yaxis=dict(range=[0, all_responses['Count'].max() * 1.15]),  # Extend y-axis range above the tallest bar
    coloraxis_showscale=False  # Remove the color key while keeping colors on the bars
)

# Update traces to position text just above the bars and ensure uniform text size
fig.update_traces(
    textposition='outside',  # Move text above the bars
    textfont=dict(size=16),  # Keep the font size uniform
    cliponaxis=False  # Ensure text is not clipped
)

# Step 9: Show the figure
fig.show(renderer="browser")

# Step 10: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Truncated Response']}: {row['Count']} mentions ({row['Percentage']}% of respondents)" for _, row in all_responses.iterrows()]))

total_mentions = all_responses['Count'].sum()
average_mentions = total_mentions / len(all_responses)
most_common = all_responses.iloc[0]
least_common = all_responses.iloc[-1]

print(f"Total mentions across all responses: {total_mentions}, ", end="")
print(f"Average mentions per response: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Truncated Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Truncated Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")


C.2 How would you perceive the impact of AI regulations in shaping the future of technology in your field?
Total respondents: 298
Breakdown of responses: Highly Positive: 107 mentions (35.91% of respondents), Somewhat Positive: 101 mentions (33.89% of respondents), Neutral: 40 mentions (13.42% of respondents), Somewhat Negative: 20 mentions (6.71% of respondents), Highly Negative: 14 mentions (4.7% of respondents), It Depends: 10 mentions (3.36% of respondents), Uncertain: 5 mentions (1.68% of respondents), No effect: 1 mentions (0.34% of respondents)
Total mentions across all responses: 298, Average mentions per response: 37.25, Most common response: Highly Positive (107 mentions, 35.91% of respondents), Least common response: No effect (1 mentions, 0.34% of respondents)


In [None]:
####################################################################
# Generate normalized stacked bar charts of the perception of AI regulations impact on future technology by location
# Survey Question (C.2) -> Column C.3.1, grouped by location (P8)
####################################################################

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Print the first few rows of the DataFrame to check the column names
print("DataFrame Columns:")
print(df.columns)

region_column = df.columns[28]

response_column = df.columns[228]

# Print the seleted columns to ensure they are correct
print(f"Region Column: {region_column}")
print(f"Response Column: {response_column}")

# Print the unique values in the response column to check the data
print("Unique values in the response column:")
print(df[response_column].unique())

# Define the mapping for the responses
response_mapping = {
    "Highly Positive - Regulations will ensure ethical AI systems": "Highly Positive",
    "Somewhat Positive - Regulations will provide helpful guidance towards ethical AI systems": "Somewhat Positive",
    "Neutral - Regulations will have minimal impact on AI development": "Neutral",
    "Somewhat Negative - Regulations will slow innovation and have little effect": "Somewhat Negative",
    "Highly Negative - Regulations will hinder AI advancement and competitiveness with little to no positive impact.": "Highly Negative",
    "It Depends - Impact will vary heavily based on administration and technological advancements": "It Depends",
    "Uncertain - Not able to predict at this time": "Uncertain",
    "No effect": "No Effect"
}

# Define the order of responses by certainty
response_order = [
    "Highly Positive",
    "Somewhat Positive",
    "Neutral",
    "Somewhat Negative",
    "Highly Negative",
    "It Depends",
    "Uncertain",
    "No Effect"
]

# Map the responses to the defined categories
df[response_column] = df[response_column].map(response_mapping)

# Convert the response column to a categorical type with the specified order
df[response_column] = pd.Categorical(df[response_column], categories=response_order, ordered=True)

# Drop rows with NaN values in the response column
df = df.dropna(subset=[response_column])

# Debugging: Print the first few rows of the DataFrame to check the mapped responses
print("Mapped Responses:")
print(df[[region_column, response_column]].head())

# Function to create and display normalized stacked bar chart with annotations
def plot_normalized_stacked_bar(data, title, xlabel):
    # Count the number of responses for each category by region
    counts = data.groupby([region_column, response_column]).size().unstack(fill_value=0)
    
    # Debugging: Print the counts to check the grouped data
    print("Counts:")
    print(counts.head())
    
    # Normalize the counts by the total number of respondents in each region
    counts = counts.div(counts.sum(axis=1), axis=0) * 100
    
    # Debugging: Print the normalized counts to check the data
    print("Normalized Counts:")
    print(counts.head())
    
    # Filter out columns with all zeros
    counts = counts.loc[:, (counts != 0).any(axis=0)]
    
    # Plot the normalized stacked bar chart
    ax = counts.plot(kind='bar', stacked=True, figsize=(20, 12), colormap='viridis')
    plt.title(title, fontsize=16)
    plt.xlabel(xlabel, fontsize=14)
    plt.ylabel('Percentage of Respondents', fontsize=14)
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.yticks(fontsize=12)
    
    # Annotate the bars with the percentages
    for container in ax.containers:
        labels = [f'{v.get_height():.1f}%' if v.get_height() > 0 else '' for v in container]
        ax.bar_label(container, labels=labels, label_type='center', fontsize=10)
    
    # Set the legend order
    handles, labels = ax.get_legend_handles_labels()
    sorted_handles_labels = sorted(zip(handles, labels), key=lambda x: response_order.index(x[1]))
    handles, labels = zip(*sorted_handles_labels)
    ax.legend(handles, labels, title='Response', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12, title_fontsize=14)
    
    plt.tight_layout()
    plt.show()

# Plot for all locations
plot_normalized_stacked_bar(df, 'Perception of AI Regulations Impact on Future Technology by Location', 'Region')

# Create a new DataFrame with grouped regions
def group_regions(region):
    if region in ['North America', 'EU/UK/EEA']:
        return region
    else:
        return 'Other'

grouped_df = df.copy()
grouped_df[region_column] = grouped_df[region_column].apply(group_regions)

# Plot for grouped regions
plot_normalized_stacked_bar(grouped_df, 'Perception of AI Regulations Impact on Future Technology by Grouped Regions', 'Grouped Region')

In [6]:
####################
# C.3
####################

import pandas as pd
import plotly.express as px

# Step 1: Read CSV file into a DataFrame
df = pd.read_csv(ai_study)

# Step 2: Exclude the first two rows
df = df.iloc[2:]

# Step 3: Remove NaNs or empty values in 'C.3'
df = df[df['C.3.a'].notna()]  # Remove NaNs
df['C.3.a'] = df['C.3.a'].astype(str).apply(lambda x: x.strip())
df = df[df['C.3.a'] != ""]  # Remove empty strings

# Step 4: Count occurrences of each response
response_counts = df['C.3.a'].value_counts().reset_index()
response_counts.columns = ['Response', 'Count']

# Step 5: Calculate total number of valid responses
total_valid_responses = df.shape[0]

# Step 6: Calculate the percentage of each option based on valid responses
response_counts['Percentage'] = (response_counts['Count'] / total_valid_responses * 100).round(2)

# Step 7: Visualize the results with a Pie Chart using Plotly
question_text = "C.3 Do you believe regulations for mitigating the AI ethics risks will negatively impact your company or industry?"
fig = px.pie(response_counts, names='Response', values='Count',
             title=question_text,
             color_discrete_sequence=px.colors.qualitative.Set3,
             labels={'Response': 'Answer', 'Count': 'Number of Responses'})

# Add percentage text to the pie slices
fig.update_traces(textposition='inside', textinfo='percent+label')

# Update layout to make the chart clearer
fig.update_layout(title_font_size=20, title_x=0.5)

# Step 8: Show the figure
fig.show(renderer="browser")

# Step 9: Generate a statistical report
print(question_text)

print(f"Total respondents: {total_valid_responses}")
print("Breakdown of responses:", end=" ")
print(", ".join([f"{row['Response']}: {row['Count']} responses ({row['Percentage']}%)" for _, row in response_counts.iterrows()]))

total_mentions = response_counts['Count'].sum()
average_mentions = total_mentions / len(response_counts)
most_common = response_counts.iloc[0]
least_common = response_counts.iloc[-1]

print(f"Total responses: {total_mentions}, ", end="")
print(f"Average responses per option: {average_mentions:.2f}, ", end="")
print(f"Most common response: {most_common['Response']} ({most_common['Count']} mentions, {most_common['Percentage']}% of respondents), ", end="")
print(f"Least common response: {least_common['Response']} ({least_common['Count']} mentions, {least_common['Percentage']}% of respondents)")

C.3 Do you believe regulations for mitigating the AI ethics risks will negatively impact your company or industry?
Total respondents: 394
Breakdown of responses: No: 185 responses (46.95%), Yes: 96 responses (24.37%), Unsure: 59 responses (14.97%), Prefer not to say: 54 responses (13.71%)
Total responses: 394, Average responses per option: 98.50, Most common response: No (185 mentions, 46.95% of respondents), Least common response: Prefer not to say (54 mentions, 13.71% of respondents)
