In [1]:
import pandas as pd
import os
os.environ.pop("MPLBACKEND", None)

import matplotlib.pyplot as plt
import seaborn as sns

df_survey = pd.read_csv("Student_AI_Survey_2023_Raw_data.csv", encoding='ISO-8859-1')

# Preview the column headers
print(df_survey.columns)


Index(['Unique Response Number', '1. I agree with the above information.',
       '2. Please select the appropriate age range.',
       '2.a. Which subject area are you studying?',
       '3. Have you used any Generative AI tools as part of your studies? (A generative AI tool will usually create new content from a prompt)',
       '4. Which of the following generative AI tools have you used?',
       '4.a. If you selected Other, please specify:',
       '5. What devices do you use for Generative AI?',
       '5.a. If you selected Other, please specify:',
       '6. What are your reasons for not using Generative AI tools in your studies?',
       '6.a. If you selected Other, please specify:',
       '7. How helpful are AI tools in the following tasks?',
       '7.1. Generating new content', '7.2. Summarising content',
       '7.3. Editing text', '7.4. Revising grammar and sentence structure',
       '7.5. Generating ideas', '7.6. Research', '7.7. Planning',
       '7.8. Inspiration', '7

In [2]:
from collections import Counter
import matplotlib.pyplot as plt

# Split multiple tools in a single cell (assuming comma-separated)
tools_raw = df_survey["4. Which of the following generative AI tools have you used?"].dropna()
tool_list = [tool.strip() for row in tools_raw for tool in str(row).split(',')]
tool_counts = Counter(tool_list)

# Plot
plt.figure(figsize=(10, 5))
plt.bar(tool_counts.keys(), tool_counts.values(), color='skyblue')
plt.title("Most Common Generative AI Tools Used by Students")
plt.xlabel("AI Tool")
plt.ylabel("Number of Users")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [3]:
# Clean column names
df_survey.columns = df_survey.columns.str.strip()

# Define columns
task_columns = [
    '7.1. Generating new content',
    '7.2. Summarising content',
    '7.3. Editing text',
    '7.4. Revising grammar and sentence structure',
    '7.5. Generating ideas',
    '7.6. Research',
    '7.7. Planning',
    '7.8. Inspiration',
    '7.9. Writing Communications',
    '7.10. Summarising notes',
    '7.11. As a discursive tool / collaborative partner'
]

# Apply mapping
likert_map = {
    "Very Helpful": 5,
    "Helpful": 4,
    "Neither helpful or unhelpful": 3,
    "Unhelpful": 2,
    "Not helpful at all": 1,
    "I have not used any": None,
    "": None,
    None: None
}

# Replace responses with numeric values
df_cleaned = df_survey[task_columns].replace(likert_map)
df_numeric = df_cleaned.apply(pd.to_numeric, errors='coerce')

# Compute means and plot
task_means = df_numeric.mean().sort_values()

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x=task_means.values, y=task_means.index, palette='viridis')
plt.title("Perceived Helpfulness of AI Tools by Task")
plt.xlabel("Average Usefulness Rating (1–5)")
plt.tight_layout()
plt.show()

  df_cleaned = df_survey[task_columns].replace(likert_map)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=task_means.values, y=task_means.index, palette='viridis')


In [4]:
# Replace with actual columns if names differ
attitude_cols = {
    'AI gives unfair advantage': '9.1. The use of AI in education may give an unfair advantage to those who use them.',
    'AI should be available to all': '9.2. AI tools should be available to ALL students to ensure equality.',
    'AI skills = future opportunity': '9.3. Students trained in the use of AI tools will have more opportunities in the future.',
    'Restrict AI to protect learning': '9.4. Access to AI systems should be restricted at the University so that they do not undermine learning.'
}

# Count response frequencies
attitudes = {k: df_survey[v].value_counts().sort_index() for k, v in attitude_cols.items()}

# Convert to DataFrame
df_attitudes = pd.DataFrame(attitudes).fillna(0)

# Plot
df_attitudes.plot(kind='bar', stacked=True, figsize=(10, 6), colormap='coolwarm')
plt.title("Student Attitudes Toward AI in Education")
plt.ylabel("Number of Students")
plt.xlabel("Agreement Level")
plt.xticks(rotation=45)
plt.legend(title="Statement", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [5]:
from wordcloud import WordCloud

comments = df_survey["14. If you have any other thoughts about the use of Generative AI, please write them below."].dropna()
text_blob = " ".join(comments)

wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text_blob)

plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Student Sentiment: Common Words from Open Comments")
plt.show()


In [6]:
# Extract column of interest
tool_column = '4. Which of the following generative AI tools have you used?'
tools_raw = df_survey[tool_column].dropna()

# Split comma-separated tools and normalize
tool_list = [tool.strip() for row in tools_raw for tool in str(row).split(',') if tool.strip() != ""]
tool_counts = Counter(tool_list)

# Convert counts to % of total respondents
total_respondents = len(df_survey)
tool_df = pd.DataFrame({
    "Tool Used": list(tool_counts.keys()),
    "% of Respondents (approx.)": [f"{(count / total_respondents) * 100:.0f}%" for count in tool_counts.values()]
}).sort_values(by="% of Respondents (approx.)", ascending=False)

# Plot as table
fig, ax = plt.subplots(figsize=(9, len(tool_df)*0.5 + 1))
ax.axis('tight')
ax.axis('off')
tbl = ax.table(cellText=tool_df.values,
               colLabels=tool_df.columns,
               cellLoc='center',
               loc='center')
tbl.auto_set_font_size(False)
tbl.set_fontsize(12)
tbl.scale(1.1, 1.5)

plt.title("Generative AI Tool Usage Among Students (Survey-Based)", fontsize=14, pad=10)
plt.show()

In [7]:
# Column: 6. What are your reasons for not using Generative AI tools in your studies?
reason_col = '6. What are your reasons for not using Generative AI tools in your studies?'
non_use_raw = df_survey[reason_col].dropna()

# Split comma-separated responses
reasons = [reason.strip() for row in non_use_raw for reason in str(row).split(',')]
reason_counts = pd.Series(reasons).value_counts()
reason_percent = (reason_counts / len(df_survey)) * 100

# Display results
print("Top reasons for not using AI (% of total respondents):")
print(reason_percent.head(5).round(1).astype(str) + "%")


Top reasons for not using AI (% of total respondents):
I am concerned that using AI tools would be cheating    14.0%
I don't feel the need to use AI tools.                  13.2%
I don't know how to use any of the AI tools             10.1%
I feel that using AI tools would limit my creativity     9.3%
I am not aware of AI tools                               9.3%
Name: count, dtype: object


In [8]:
# Define attitude columns and labels
attitude_map = {
    "AI gives unfair advantage": '9.1. The use of AI in education may give an unfair advantage to those who use them.',
    "AI should be available to all": '9.2. AI tools should be available to ALL students to ensure equality.',
    "AI = Future opportunity": '9.3. Students trained in the use of AI tools will have more opportunities in the future.',
    "Restrict AI access": '9.4. Access to AI systems should be restricted at the University so that they do not undermine learning.'
}

# Calculate % who Agree or Strongly Agree
attitude_agree = {}
for label, col in attitude_map.items():
    agree_pct = df_survey[col].isin(["Agree", "Strongly agree"]).sum() / len(df_survey) * 100
    attitude_agree[label] = round(agree_pct, 1)

# Print result
print("AI Attitudes (% Agree or Strongly Agree):")
for k, v in attitude_agree.items():
    print(f"{k}: {v}%")


AI Attitudes (% Agree or Strongly Agree):
AI gives unfair advantage: 58.9%
AI should be available to all: 62.0%
AI = Future opportunity: 56.6%
Restrict AI access: 39.5%


In [9]:
# Column: 11.1. Not at all important vs Very Important
q_col = '11.1. Not at all important vs Very Important'
training_responses = pd.to_numeric(df_survey[q_col], errors='coerce')

important = (training_responses >= 4).sum()
neutral = (training_responses == 3).sum()
not_important = (training_responses <= 2).sum()

total = len(training_responses.dropna())
important_pct = round((important / total) * 100, 1)
neutral_pct = round((neutral / total) * 100, 1)
not_important_pct = round((not_important / total) * 100, 1)

print("Training Expectations:")
print(f"Important to Very Important (4–5): {important_pct}%")
print(f"Neutral (3): {neutral_pct}%")
print(f"Not Important (1–2): {not_important_pct}%")


Training Expectations:
Important to Very Important (4–5): 36.2%
Neutral (3): 23.6%
Not Important (1–2): 40.2%
