In [44]:
#Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [45]:
# csv_file pathing
fertility_rate_path = "Resource/fred_fertility_rates.csv"
living_arrangements_path = "Resource/census_living_arrangements.csv"
marital_status_path = "Resource/marital status overtime.csv"
age_fertility_path = "Resource/Age_Fertility.csv"
education_path = "Resource/Female Education Attainment Level.csv"

In [46]:
# Raw DataFrame Prep: Fertility Rates

fertility_rate_df = pd.read_csv(fertility_rate_path).dropna()
fertility_rate_df['DATE'] = pd.to_datetime(fertility_rate_df['DATE'].str.strip(), format='%d/%m/%Y').dt.year
fertility_rate_df.rename(columns = {'SPDYNTFRTINUSA':'fertility_rate','DATE':'Year'}, inplace = True) 

print(f'{len(fertility_rate_df)} records')
fertility_rate_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# fertility_rate_df.to_csv("Output/fertility_rate.csv", index=False, header=True)

59 records


Unnamed: 0,Year,fertility_rate
0,1960,3.654
1,1961,3.62
2,1962,3.461
3,1963,3.319
4,1964,3.19


In [47]:
# Raw DataFrame Prep: Living Arrangements

# Read csv, drop na
living_arrangements_df = pd.read_csv(living_arrangements_path).dropna()

# Merge with fertility rates
merge_living_df = pd.merge(fertility_rate_df, living_arrangements_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_living_df)} records')
merge_living_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# merge_living_df.to_csv("Output/living_arrangements.csv", index=False, header=True)

60 records


Unnamed: 0,Year,fertility_rate,under_18,two_parents,mother_only,father_only,other_relatives,non_relatives
0,1960,3.654,63727.0,55877.0,5105.0,724.0,1601.0,420.0
1,1961,3.62,,,,,,
2,1962,3.461,,,,,,
3,1963,3.319,,,,,,
4,1964,3.19,,,,,,


In [48]:
# Raw DataFrame Prep: Marital Status

# Read csv, drop na
marital_status_df = pd.read_csv(marital_status_path).dropna()

# Merge with fertility rates
merge_marital_df = pd.merge(fertility_rate_df, marital_status_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_marital_df)} records')
merge_marital_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# merge_marital_df.to_csv("Output/marital_status.csv", index=False, header=True)

61 records


Unnamed: 0,Year,fertility_rate,Men Total,Married Men,Unmarried Men Total,Never Married Men,Widowed Men,Divorced Men,Women Total,Married Women,Unmarried Women Total,Never Married Women,Widowed Women,Divorced Women
0,1960,3.654,60273.0,41781.0,18492.0,15274.0,2112.0,1106.0,64607.0,42583.0,22024.0,12252.0,8064.0,1708.0
1,1961,3.62,,,,,,,,,,,,
2,1962,3.461,,,,,,,,,,,,
3,1963,3.319,,,,,,,,,,,,
4,1964,3.19,,,,,,,,,,,,


In [29]:
# Raw DataFrame Prep: Fertility by Age

# Read csv
age_fertility_df = pd.read_csv(age_fertility_path)

# Clean
age_fertility_df = age_fertility_df[['Year', 'Age 10-14 Years', 'Age 15-19 Years', 'Age 20-24 Years',
                           'Age 25-29 Years', 'Age 30-34 Years', 'Age 35-39 Years', 'Age 40-44 Years', 'Age 45-49 Years']]

# Merge with fertility rates
merge_age_df = pd.merge(fertility_rate_df, age_fertility_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_age_df)} records')
merge_age_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# merge_age_df.to_csv("Output/fertility_age.csv", index=False, header=True)

73 records


Unnamed: 0,Year,fertility_rate,Age 10-14 Years,Age 15-19 Years,Age 20-24 Years,Age 25-29 Years,Age 30-34 Years,Age 35-39 Years,Age 40-44 Years,Age 45-49 Years
0,1960,3.654,0.8,89.1,258.1,197.4,112.7,56.2,15.5,0.9
1,1961,3.62,0.9,88.6,251.9,197.5,113.2,55.6,15.6,0.9
2,1962,3.461,0.8,81.4,241.9,191.1,108.6,52.6,14.9,0.9
3,1963,3.319,0.9,76.7,229.1,185.1,105.8,51.2,14.2,0.9
4,1964,3.19,0.9,73.1,217.5,178.7,103.4,49.9,13.8,0.8


In [30]:
# Raw DataFrame Prep: Female Education Attainment Level

# Read csv and drop na
education_df = pd.read_csv(education_path).dropna()

# Clean
education_df.rename(columns = {'Years':'Year'}, inplace=True)

# Merge with fertility rates
merge_education_df = pd.merge(fertility_rate_df, education_df, on="Year", how="outer")

# Print and display
print(f'{len(merge_education_df)} records')
merge_education_df.head()

# Print to csv - remove hashmark in front of below code to export to csv
# merge_education_df.to_csv("Output/education.csv", index=False, header=True)

66 records


Unnamed: 0,Year,fertility_rate,Total # in thousands,e0 - 4,e5 - 8,hs0 - 3,hs4,c0 - 3,c4
0,1960,3.654,51468.0,7.30%,30.40%,19.70%,27.70%,9.00%,5.80%
1,1961,3.62,,,,,,,
2,1962,3.461,52381.0,6.90%,27.70%,17.90%,31.60%,9.30%,6.70%
3,1963,3.319,,,,,,,
4,1964,3.19,53447.0,6.20%,26.40%,18.50%,33.40%,8.80%,6.80%


In [52]:
#drop NaN
clean_martial_df=merge_marital_df.dropna(how="any")
clean_martial_df.dtypes

Year                        int64
fertility_rate            float64
Men Total                  object
Married Men                object
Unmarried Men Total        object
Never Married Men          object
Widowed Men                object
Divorced Men               object
Women Total                object
Married Women              object
Unmarried Women Total      object
Never Married Women        object
Widowed Women              object
Divorced Women             object
dtype: object

In [57]:
#change column type
clean_martial_df["Men Total"] = pd.to_numeric(clean_martial_df["Men Total"], errors='coerce')
clean_martial_df["Married Men"] = pd.to_numeric(clean_martial_df["Married Men"], errors='coerce')
clean_martial_df["Never Married Men"] = pd.to_numeric(clean_martial_df["Never Married Men"], errors='coerce')
clean_martial_df["Widowed Men"] = pd.to_numeric(clean_martial_df["Widowed Men"], errors='coerce')
clean_martial_df["Divorced Men"] = pd.to_numeric(clean_martial_df["Divorced Men"], errors='coerce')
clean_martial_df["Unmarried Men Total"] = pd.to_numeric(clean_martial_df["Unmarried Men Total"], errors='coerce')
clean_martial_df["Women Total"] = pd.to_numeric(clean_martial_df["Women Total"], errors='coerce')
clean_martial_df["Married Women"] = pd.to_numeric(clean_martial_df["Married Women"], errors='coerce')
clean_martial_df["Never Married Women"] = pd.to_numeric(clean_martial_df["Never Married Women"], errors='coerce')
clean_martial_df["Widowed Women"] = pd.to_numeric(clean_martial_df["Widowed Women"], errors='coerce')
clean_martial_df["Divorced Women"] = pd.to_numeric(clean_martial_df["Divorced Women"], errors='coerce')
#clean_martial_df["Unmarried Women Total"] = pd.to_numeric(clean_martial_df["Unmarried Women Total"], errors='coerce')
clean_martial_df.dtypes

Year                        int64
fertility_rate            float64
Men Total                 float64
Married Men               float64
Unmarried Men Total       float64
Never Married Men         float64
Widowed Men               float64
Divorced Men              float64
Women Total               float64
Married Women             float64
Unmarried Women Total      object
Never Married Women       float64
Widowed Women             float64
Divorced Women            float64
dtype: object

In [10]:
#men: calculate for percentage,married, unmarried,never married
married_man_per=clean_martial_df["Married Men"]/clean_martial_df["Men Total"]
unmarried_man_per=clean_martial_df["Unmarried Men Total"]/clean_martial_df["Men Total"]
never_married_man_per=clean_martial_df["Never Married Men"]/clean_martial_df["Men Total"]

#women:same as man
married_wm_per=clean_martial_df["Married Women"]/clean_martial_df["Women Total"]
unmarried_wm_per=clean_martial_df["Unmarried Women Total"]/clean_martial_df["Women Total"]
never_wm_man_per=clean_martial_df["Never Married Women"]/clean_martial_df["Women Total"]

#add to dataframe
clean_martial_df["Percentage of Married Men"]=married_wm_per
clean_martial_df["Percentage of Unmarried Men"]=unmarried_wm_per
clean_martial_df["Percentage of Never Married Men"]=never_married_wm_per
clean_martial_df["Percentage of Married Women"]=married_wm_per
clean_martial_df["Percentage of Unmarried Women"]=unmarried_wm_per
clean_martial_df["Percentage of Never Married Women"]=never_married_wm_per
#show new dataframe
clean_martial_df.head()

TypeError: unsupported operand type(s) for /: 'str' and 'str'