In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Read the CSV file into a DataFrame
df = pd.read_csv('student_scores.csv')

# Display the first few rows
print("First 5 rows of the dataset:")
print(df.head())

Explore the Dataset Structure

In [None]:
# Check the shape of the dataset (rows, columns)
print(f"\nDataset shape: {df.shape}")

# Display column names and data types
print("\nDataset information:")
print(df.info())

In [None]:
# Display basic statistics
print("\nBasic statistics:")
print(df.describe())

Data Analysis Tasks

Calculate Average Scores

In [None]:
# Calculate average scores for each subject
avg_math = df['math_score'].mean()
avg_science = df['science_score'].mean()
avg_english = df['english_score'].mean()

print(f"Average Math Score: {avg_math:.2f}")
print(f"Average Science Score: {avg_science:.2f}")
print(f"Average English Score: {avg_english:.2f}")

Find Top Performers

In [None]:
# Sort by math_score in descending order and get top 3
top_math_students = df.sort_values('math_score', ascending=False).head(3)

print("\nTop 3 Math Students:")
print(top_math_students[['name', 'math_score']])

Create a New Column

In [None]:
# Calculate average score for each student
df['average_score'] = (df['math_score'] + df['science_score'] + df['english_score']) / 3

print("\nDataFrame with average scores:")
print(df[['name', 'math_score', 'science_score', 'english_score', 'average_score']].head())

Filter Students

In [None]:
# Filter students with average score > 85 and attendance > 90
high_performers = df[(df['average_score'] > 85) & (df['attendance_rate'] > 90)]

print("\nHigh Performing Students:")
print(high_performers[['name', 'average_score', 'attendance_rate']])

Data Visualization Tasks

Create a Bar Chart of Average Scores by Subject

In [None]:
# Calculate average scores
subjects = ['Math', 'Science', 'English']
averages = [df['math_score'].mean(), 
            df['science_score'].mean(), 
            df['english_score'].mean()]

# Create bar chart
plt.figure(figsize=(8, 6))
plt.bar(subjects, averages, color=['blue', 'green', 'orange'])
plt.xlabel('Subject')
plt.ylabel('Average Score')
plt.title('Average Scores by Subject')
plt.ylim(0, 100)

# Add value labels on bars
for i, v in enumerate(averages):
    plt.text(i, v + 1, f'{v:.1f}', ha='center', va='bottom')

plt.grid(axis='y', alpha=0.3)
plt.show()

Create a Scatter Plot

In [None]:
# Create a scatter plot showing the relationship between hours studied and average score.

# Create scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(df['hours_studied'], df['average_score'], 
            color='purple', alpha=0.6, s=100)
plt.xlabel('Hours Studied per Week')
plt.ylabel('Average Score')
plt.title('Relationship Between Study Hours and Average Score')
plt.grid(True, alpha=0.3)

# Add a trend line (optional)
z = np.polyfit(df['hours_studied'], df['average_score'], 1)
p = np.poly1d(z)
plt.plot(df['hours_studied'], p(df['hours_studied']), 
         "r--", alpha=0.8, label='Trend Line')
plt.legend()

plt.show()

Create Multiple Subplots

In [None]:
# Create a figure with 3 subplots showing histograms for Math, Science, and English scores.
# Create figure with 3 subplots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Math scores histogram
axes[0].hist(df['math_score'], bins=8, color='blue', alpha=0.7, edgecolor='black')
axes[0].set_xlabel('Score')
axes[0].set_ylabel('Number of Students')
axes[0].set_title('Math Scores')
axes[0].grid(axis='y', alpha=0.3)

# Science scores histogram
axes[1].hist(df['science_score'], bins=8, color='green', alpha=0.7, edgecolor='black')
axes[1].set_xlabel('Score')
axes[1].set_ylabel('Number of Students')
axes[1].set_title('Science Scores')
axes[1].grid(axis='y', alpha=0.3)

# English scores histogram
axes[2].hist(df['english_score'], bins=8, color='orange', alpha=0.7, edgecolor='black')
axes[2].set_xlabel('Score')
axes[2].set_ylabel('Number of Students')
axes[2].set_title('English Scores')
axes[2].grid(axis='y', alpha=0.3)

plt.suptitle('Score Distribution by Subject', fontsize=16)
plt.tight_layout()
plt.show()