In [1]:
import numpy as np
import pandas as pd

# 1. Data Creation and Categorization
grades = np.array([67, 45, 88, 50, 39, 72, 54, 33, 90, 47])

def categorize_grades(grades_array, threshold=50):
    # Create categorical factor-like array with "Pass" or "Fail"
    categories = np.where(grades_array >= threshold, 'Pass', 'Fail')
    return pd.Categorical(categories, categories=['Fail', 'Pass'], ordered=True)

# Default threshold = 50
categories = categorize_grades(grades)

# 2. Data Structure Analysis
print(f'Length of grades vector: {len(grades)}')
print(f'Data type of grades vector: {grades.dtype}')
print(f'Data type of categories vector: {categories.dtype}')
print(f'Unique categories: {categories.categories.tolist()}')

# 3. Statistical Summary
mean_grade = np.mean(grades)
median_grade = np.median(grades)
pass_rate = (categories == 'Pass').mean() * 100
fail_count = (categories == 'Fail').sum()

print(f'Mean grade: {mean_grade:.2f}')
print(f'Median grade: {median_grade}')
print(f'Pass rate: {pass_rate:.2f}%')
print(f'Fail count: {fail_count}')

# 4. Interpretation Task: Adapt to different threshold
new_threshold = 60
new_categories = categorize_grades(grades, threshold=new_threshold)
new_pass_rate = (new_categories == 'Pass').mean() * 100
new_fail_count = (new_categories == 'Fail').sum()

print(f'\nWith threshold = {new_threshold}:')
print(f'Pass rate: {new_pass_rate:.2f}%')
print(f'Fail count: {new_fail_count}')


Length of grades vector: 10
Data type of grades vector: int64
Data type of categories vector: category
Unique categories: ['Fail', 'Pass']
Mean grade: 58.50
Median grade: 52.0
Pass rate: 60.00%
Fail count: 4

With threshold = 60:
Pass rate: 40.00%
Fail count: 6
