In [1]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Let's start by loading the provided CSV files to inspect and understand their contents.
import pandas as pd

# Load the files
death_rate_df = pd.read_csv('C:/Users/YourUsername/Desktop/death-rate.csv')
birth_rate_df = pd.read_csv('C:/Users/YourUsername/Desktop/crude-birth-rate.csv')
primary_enrollment_df = pd.read_csv('C:/Users/YourUsername/Desktop/gross-enrollment-ratio-in-primary-education.csv')
secondary_enrollment_df = pd.read_csv('C:/Users/YourUsername/Desktop/gross-enrollment-ratio-in-secondary-education.csv')

# Display the first few rows of each file to understand the structure
death_rate_df.head(), birth_rate_df.head(), primary_enrollment_df.head(), secondary_enrollment_df.head()


In [None]:
# Renaming columns for easier merging
death_rate_df = death_rate_df.rename(columns={"Death rate - Sex: all - Age: all - Variant: estimates": "Death_rate"})
birth_rate_df = birth_rate_df.rename(columns={"Birth rate - Sex: all - Age: all - Variant: estimates": "Birth_rate"})
primary_enrollment_df = primary_enrollment_df.rename(columns={"Gross enrolment ratio, primary, both sexes (%)": "Primary_enrollment_rate"})
secondary_enrollment_df = secondary_enrollment_df.rename(columns={"Gross enrolment ratio, secondary, both sexes (%)": "Secondary_enrollment_rate"})

# Merging datasets on Entity, Code, and Year
merged_df = death_rate_df.merge(birth_rate_df, on=["Entity", "Code", "Year"], how="inner")
merged_df = merged_df.merge(primary_enrollment_df, on=["Entity", "Code", "Year"], how="inner")
merged_df = merged_df.merge(secondary_enrollment_df, on=["Entity", "Code", "Year"], how="inner")

# Displaying the first few rows of the merged dataset to ensure it looks correct
merged_df.head()


In [None]:
import statsmodels.api as sm

# Defining the independent variables (Primary and Secondary enrollment rates)
X = merged_df[['Primary_enrollment_rate', 'Secondary_enrollment_rate']]
X = sm.add_constant(X)  # Adding a constant term for the intercept

# Regression analysis 1: Predicting Birth Rate
Y_birth = merged_df['Birth_rate']
model_birth = sm.OLS(Y_birth, X).fit()

# Regression analysis 2: Predicting Death Rate
Y_death = merged_df['Death_rate']
model_death = sm.OLS(Y_death, X).fit()

# Displaying the summary of both regression models
model_birth_summary = model_birth.summary()
model_death_summary = model_death.summary()

model_birth_summary, model_death_summary

In [None]:
# Calculating the average annual growth rate for primary and secondary enrollment rates
# We'll use the pct_change() method to get the year-on-year growth, then take the mean.

# Calculating year-over-year growth rates
merged_df['Primary_growth'] = merged_df['Primary_enrollment_rate'].pct_change()
merged_df['Secondary_growth'] = merged_df['Secondary_enrollment_rate'].pct_change()

# Dropping NaN values (first year for growth rate calculation)
growth_df = merged_df.dropna(subset=['Primary_growth', 'Secondary_growth'])

# Calculating average growth rates
primary_avg_growth = growth_df['Primary_growth'].mean()
secondary_avg_growth = growth_df['Secondary_growth'].mean()

primary_avg_growth, secondary_avg_growth

In [None]:
# Checking for any zero or very small values in enrollment rates that could cause infinity in growth calculations
print("Primary Enrollment Rate - Min Value:", merged_df['Primary_enrollment_rate'].min())
print("Secondary Enrollment Rate - Min Value:", merged_df['Secondary_enrollment_rate'].min())

# Remove entries with zero or near-zero enrollment rates to avoid infinity in percentage change calculation
filtered_df = merged_df[(merged_df['Primary_enrollment_rate'] > 1) & (merged_df['Secondary_enrollment_rate'] > 1)]

# Recalculating the year-over-year growth rates on filtered data
filtered_df['Primary_growth'] = filtered_df['Primary_enrollment_rate'].pct_change()
filtered_df['Secondary_growth'] = filtered_df['Secondary_enrollment_rate'].pct_change()

# Dropping NaN values (first year for growth rate calculation)
filtered_growth_df = filtered_df.dropna(subset=['Primary_growth', 'Secondary_growth'])

# Calculating average growth rates after filtering
primary_avg_growth_filtered = filtered_growth_df['Primary_growth'].mean()
secondary_avg_growth_filtered = filtered_growth_df['Secondary_growth'].mean()

primary_avg_growth_filtered, secondary_avg_growth_filtered


In [None]:
# Setting up future years for prediction (e.g., next 10 years)
future_years = list(range(2024, 2034))

# Starting with the latest available data for primary and secondary enrollment rates
last_primary_rate = filtered_df['Primary_enrollment_rate'].iloc[-1]
last_secondary_rate = filtered_df['Secondary_enrollment_rate'].iloc[-1]

# Generate predicted enrollment rates for future years using the average growth rates
predicted_primary_rates = [last_primary_rate * (1 + primary_avg_growth_filtered) ** (year - 2023) for year in future_years]
predicted_secondary_rates = [last_secondary_rate * (1 + secondary_avg_growth_filtered) ** (year - 2023) for year in future_years]

# Creating a DataFrame for future predictions
future_data = pd.DataFrame({
    'Year': future_years,
    'Primary_enrollment_rate': predicted_primary_rates,
    'Secondary_enrollment_rate': predicted_secondary_rates
})

# Adding a constant term for intercept
future_X = sm.add_constant(future_data[['Primary_enrollment_rate', 'Secondary_enrollment_rate']])

# Using the existing regression models to predict future birth and death rates
future_data['Predicted_Birth_rate'] = model_birth.predict(future_X)
future_data['Predicted_Death_rate'] = model_death.predict(future_X)

import ace_tools as tools; tools.display_dataframe_to_user(name="Future Predictions of Birth and Death Rates", dataframe=future_data)
