In [1]:
import pandas as pd

# Step 1: Load the Dataset
# Load the dataset from a CSV file
file_path = r'students_data.csv'
df = pd.read_csv(file_path)

In [2]:
# Display the first few rows
print("First few rows of the dataset:")
print(df.head())


First few rows of the dataset:
     Name   Age  Gender  Grade  Attendance
0   Alice  20.0  Female   85.0        95.0
1     Bob  22.0    Male   90.0         NaN
2     NaN  19.0    Male    NaN        85.0
3   Diana  21.0  Female   78.0        90.0
4  Edward   NaN    Male   92.0        80.0


In [3]:
# Step 3: Fill Missing Values in the Grade Column
# Fill missing values in the 'Grade' column with the mean of the 'Grade' column
mean_grade = df['Grade'].mean()
df['Grade'] = df['Grade'].fillna(mean_grade)

In [4]:
# Step 4: Drop Rows with Missing Attendance
# Drop rows where the 'Attendance' column has missing values
df = df.dropna(subset=['Attendance'])

In [5]:
# Step 5: Replace Missing Names
# Replace missing entries in the 'Name' column with 'Unknown'
df['Name'] = df['Name'].fillna("Unknown")

print("\nMissing names replaced with 'Unknown'.")


Missing names replaced with 'Unknown'.


In [6]:
# Step 6: Challenge - Create a DataFrame with No Missing Values
# Create a new DataFrame that contains only rows without any missing values
df_no_missing = df.dropna()

print(f"\nNew DataFrame with no missing values created. Total rows: {len(df_no_missing)}")


New DataFrame with no missing values created. Total rows: 5


In [7]:
# Step 7: Save the amended DataFrame to a new CSV file
# Save the modified DataFrame to a new CSV file
output_file_path = r"students_data_amended.csv"
df.to_csv(output_file_path, index=False)

In [8]:
# Summary Code - All steps together
# After handling missing values, display the final DataFrame and the DataFrame with no missing values
print("\nData after handling missing values:")
print(df.head())

print("\nNew DataFrame with no missing values:")
print(df_no_missing.head())


Data after handling missing values:
      Name   Age  Gender      Grade  Attendance
0    Alice  20.0  Female  85.000000        95.0
2  Unknown  19.0    Male  86.285714        85.0
3    Diana  21.0  Female  78.000000        90.0
4   Edward   NaN    Male  92.000000        80.0
6    Grace  20.0  Female  88.000000        75.0

New DataFrame with no missing values:
      Name   Age  Gender      Grade  Attendance
0    Alice  20.0  Female  85.000000        95.0
2  Unknown  19.0    Male  86.285714        85.0
3    Diana  21.0  Female  78.000000        90.0
6    Grace  20.0  Female  88.000000        75.0
9     Jack  24.0    Male  86.285714        85.0
