In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

file_path = r'/Users/andrewoh/Downloads/Natality2022.csv'

try:
    data = pd.read_csv(file_path)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"File not found: {file_path}")
    input("Press Enter to exit...")
    exit()
except PermissionError:
    print(f"Permission denied: {file_path}. Please check file permissions.")
    input("Press Enter to exit...")
    exit()
except pd.errors.EmptyDataError:
    print(f"No data: {file_path} is empty.")
    input("Press Enter to exit...")
    exit()
except pd.errors.ParserError:
    print(f"Parsing error: {file_path} could not be parsed.")
    input("Press Enter to exit...")
    exit()

print(data.head())

data['mother_age'] = pd.to_numeric(data['mother_age'], errors='coerce')
data['father_age'] = pd.to_numeric(data['father_age'], errors='coerce')

filtered_data = data[(data['mother_age'] >= 25) & (data['mother_age'] <= 40) &
                     (data['father_age'] >= 25) & (data['father_age'] <= 40)]

filtered_data = filtered_data.dropna(subset=['mother_age', 'father_age'])

filtered_data = filtered_data[filtered_data['mother_race_ethnicity_comb'] == filtered_data['father_race_ethnicity_comb']]

average_age_mother_by_race = filtered_data.groupby('mother_race_ethnicity_comb')['mother_age'].mean().reset_index()
average_age_father_by_race = filtered_data.groupby('father_race_ethnicity_comb')['father_age'].mean().reset_index()

# mother avg age
plt.figure(figsize=(12, 6))
sns.barplot(x='mother_race_ethnicity_comb', y='mother_age', data=average_age_mother_by_race, palette='viridis')
plt.title('Average Age of Mothers by Race/Ethnicity (Ages 25-40)')
plt.xlabel('Mother Race/Ethnicity')
plt.ylabel('Average Age')
plt.xticks(rotation=45)
plt.ylim(25, 40)  # Set y-axis limits to focus on the 25-40 age range
for index, value in enumerate(average_age_mother_by_race['mother_age']):
    plt.text(index, value + 0.2, f'{value:.1f}', ha='center')
plt.show()

# father avg age
plt.figure(figsize=(12, 6))
sns.barplot(x='father_race_ethnicity_comb', y='father_age', data=average_age_father_by_race, palette='viridis')
plt.title('Average Age of Fathers by Race/Ethnicity (Ages 25-40)')
plt.xlabel('Father Race/Ethnicity')
plt.ylabel('Average Age')
plt.xticks(rotation=45)
plt.ylim(25, 40)  # Set y-axis limits to focus on the 25-40 age range
for index, value in enumerate(average_age_father_by_race['father_age']):
    plt.text(index, value + 0.2, f'{value:.1f}', ha='center')
plt.show()