In [None]:
#Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

In [None]:
# csv_file pathing
fertility_rate_path = "Resource/fred_fertility_rates.csv"
living_arrangements_path = "Resource/census_living_arrangements.csv"
marital_status_path = "Resource/marital status overtime.csv"
age_fertility_path = "Resource/Age_Fertility.csv"
education_path = "Resource/Female Education Attainment Level.csv"


In [None]:
# Raw DataFrame Prep: Fertility Rates

# Read csv, drop na
fertility_rate_df = pd.read_csv(fertility_rate_path).dropna()

# Clean data
fertility_rate_df['DATE'] = pd.to_datetime(fertility_rate_df['DATE'].str.strip(), format='%d/%m/%Y').dt.year
fertility_rate_df.rename(columns = {'SPDYNTFRTINUSA':'fertility_rate','DATE':'Year'}, inplace = True) 

# Print and display
print(f'{len(fertility_rate_df)} records')
fertility_rate_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# fertility_rate_df.to_csv("Output/fertility_rate.csv", index=False, header=True)


## Fertility Rate vs Living Arrangements

In [None]:
# Raw DataFrame Prep: Living Arrangements

# Read csv, drop na
living_arrangements_df = pd.read_csv(living_arrangements_path).dropna()

# Merge with fertility rates
merge_living_df = pd.merge(fertility_rate_df, living_arrangements_df, on = "Year", how = "outer").dropna()

# Print, display, export options
print(f'{len(merge_living_df)} records')
# merge_living_df.head()
# merge_living_df.to_csv("Output/living_arrangements.csv", index=False, header=True)

In [None]:
# Living Arrangements Bar Chart

# Clean and organize
other_ttl = merge_living_df['father_only'] + merge_living_df['other_relatives'] + merge_living_df['non_relatives']
pct_mother_only = merge_living_df['mother_only'] / merge_living_df['under_18'] 
pct_two_parents = merge_living_df['two_parents'] / merge_living_df['under_18'] 
pct_other = merge_living_df['other'] / merge_living_df['under_18'] 

merge_living_df['other']= other_ttl
merge_living_df['%_mother_only']= pct_mother_only.map("{:,.2%}".format)
merge_living_df['%_two_parents']= pct_two_parents.map("{:,.2%}".format)
merge_living_df['%_other']= pct_other.map("{:,.2%}".format)

# Create data frame w/specific chart data required
chart_data_df = merge_living_df[['Year', 'fertility_rate', 'under_18', 'mother_only', 'two_parents', 'other', '%_mother_only', '%_two_parents', '%_other']].reset_index(drop=True)
pct_mother_only = chart_data_df['mother_only'] / chart_data_df['under_18'] 
pct_two_parents = chart_data_df['two_parents'] / chart_data_df['under_18'] 
pct_other = chart_data_df['other'] / chart_data_df['under_18'] 

# Drop Outlier
chart_data_df = chart_data_df[chart_data_df.Year != 1960]
chart_data_df.head()

# Print output
chart_data_df.to_csv("Output/living_arrangements_chartdata.csv", index=False, header=True)

In [None]:
# Living Arrangements Line Chart

x_axis_data = chart_data_df['Year']
y_axis1 = chart_data_df['%_mother_only']
y_axis2 = chart_data_df['%_other']
y_axis3 = chart_data_df['%_two_parents']

plt.plot(x_axis_data, y_axis1)
plt.plot(x_axis_data, y_axis2)
plt.plot(x_axis_data, y_axis3)


In [None]:
# Living Arrangements Scatter Chart

fertility_rate = chart_data_df['fertility_rate']
mother_only = chart_data_df['%_mother_only']

plt.scatter(fertility_rate, mother_only, marker="o", facecolors="red", edgecolors="gray")

plt.xlabel('Fertility Rate')
plt.ylabel('Mother Only')

plt.show()


## Fertility Rate vs Marital Status

In [4]:
# Raw DataFrame Prep: Marital Status
import numpy as np
# Read csv, drop na
marital_status_df = pd.read_csv(marital_status_path).dropna()

# Merge with fertility rates
merge_marital_df = pd.merge(fertility_rate_df, marital_status_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_marital_df)} records')
merge_marital_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
merge_marital_df.to_csv("Output/marital_status.csv", index=False, header=True)
merge_marital_df.head()

61 records


Unnamed: 0,Year,fertility_rate,Men Total,Married Men,Unmarried Men Total,Never Married Men,Widowed Men,Divorced Men,Women Total,Married Women,Unmarried Women Total,Never Married Women,Widowed Women,Divorced Women
0,1960,3.654,60273.0,41781.0,18492.0,15274.0,2112.0,1106.0,64607.0,42583.0,22024.0,12252.0,8064.0,1708.0
1,1961,3.62,,,,,,,,,,,,
2,1962,3.461,,,,,,,,,,,,
3,1963,3.319,,,,,,,,,,,,
4,1964,3.19,,,,,,,,,,,,


In [5]:
#drop Nan
clean_marital_df=merge_marital_df.dropna(how="any")
clean_marital_df.head()

Unnamed: 0,Year,fertility_rate,Men Total,Married Men,Unmarried Men Total,Never Married Men,Widowed Men,Divorced Men,Women Total,Married Women,Unmarried Women Total,Never Married Women,Widowed Women,Divorced Women
0,1960,3.654,60273,41781,18492,15274,2112,1106,64607,42583,22024,12252,8064,1708
10,1970,2.48,70559,47109,23450,19832,2051,1567,77766,48148,29618,17167,9734,2717
20,1980,1.8395,81947,51813,30134,24227,1977,3930,89914,52965,36950,20226,10758,5966
30,1990,2.081,91955,55833,36121,27505,2333,6283,99838,56797,43040,22718,11477,8845
33,1993,2.0195,94854,56833,38021,28775,2468,6778,102400,57768,44631,23534,11214,9883


In [7]:
#get rid of warning
import warnings
warnings.filterwarnings('ignore')

#change column value
clean_maritial_df = clean_marital_df.apply(pd.to_numeric,errors = 'ignore')
clean_marital_df.dtypes

Year                        int64
fertility_rate            float64
Men Total                  object
Married Men                object
Unmarried Men Total        object
Never Married Men          object
Widowed Men                object
Divorced Men               object
Women Total                object
Married Women              object
Unmarried Women Total      object
Never Married Women        object
Widowed Women              object
Divorced Women             object
dtype: object

In [None]:
#line chart
x_axis_data = clean_marital_df['Year']
y_axis1 = clean_marital_df['fertility_rate']
y_axis2 = clean_marital_df['Married Men']
y_axis3 = clean_marital_df['Married Women']

plt.plot(x_axis_data, y_axis1)
plt.plot(x_axis_data, y_axis2)
plt.plot(x_axis_data, y_axis3)

## Fertility Rate vs Age

In [None]:
# Raw DataFrame Prep: Fertility by Age

# Read csv
age_fertility_df = pd.read_csv(age_fertility_path)

# Clean
age_fertility_df = age_fertility_df[['Year', 'Age 10-14 Years', 'Age 15-19 Years', 'Age 20-24 Years',
                           'Age 25-29 Years', 'Age 30-34 Years', 'Age 35-39 Years', 'Age 40-44 Years', 'Age 45-49 Years']]

# Merge with fertility rates
merge_age_df = pd.merge(fertility_rate_df, age_fertility_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_age_df)} records')
merge_age_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# merge_age_df.to_csv("Output/fertility_age.csv", index=False, header=True)

## Fertility Rate vs Education

In [None]:
# Raw DataFrame Prep: Female Education Attainment Level

# Read csv and drop na
education_df = pd.read_csv(education_path).dropna()

# Clean
education_df.rename(columns = {'Years':'Year'}, inplace=True)

# Merge with fertility rates
merge_education_df = pd.merge(fertility_rate_df, education_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_education_df)} records')
merge_education_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# merge_education_df.to_csv("Output/education.csv", index=False, header=True)