<a href="https://colab.research.google.com/github/vickidecastro/final-project-reproductive-health-MD/blob/main/final_project_BA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Final Project
## Using data from Maryland Open Data Portal to examine teen birth rate, babies with low birth weight, and access to early prenatal care
### Note: the data appeared to be inconsistent across races, to address this issue we only looked at the "all races/aggregated" data

## Metrics
### - Early prenatal care: percentage of pregnant women who receive prenatal care beginning in the first trimester

### - Teen birth rate: rate of births to teens ages 15-19 years (per 1,000 population).

### - Babies with low birth weight: percentage of live births that are a low birth weight (2500 grams or less)

# Vicki

In [134]:
# for data analysis
import pandas as pd
import numpy as np

# for data visualization
import plotly.express as px
from plotly.subplots import make_subplots 

# for exporting files
from google.colab import files


In [135]:
# import teen birth rate data
df_teen=pd.read_csv("https://raw.githubusercontent.com/vickidecastro/final-project-reproductive-health-MD/main/SHIP_teen_birth_rate.csv")
# import low birth weight data 
df_baby=pd.read_csv("https://raw.githubusercontent.com/vickidecastro/final-project-reproductive-health-MD/main/SHIP_babies_with_low_birth_weight.csv")
# import early prenatal care data
df_pren=pd.read_csv("https://raw.githubusercontent.com/vickidecastro/final-project-reproductive-health-MD/main/SHIP%20early%20prenatal%20care.csv")

## Data Cleaning and Aggregation



In [136]:
# preview teen BR data
df_teen.head()

Unnamed: 0,Jurisdiction,Value,Race/ ethnicity,Year,Measure
0,State,14.2,All races/ ethnicities (aggregated),2017,Teen birth rate
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,Teen birth rate
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,Teen birth rate
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,Teen birth rate
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,Teen birth rate


In [137]:
# preview babies with low birth weight data
df_baby.head()

Unnamed: 0,Jurisdiction,Value,Race/ ethnicity,Year,Measure
0,State,8.9,All races/ ethnicities (aggregated),2017,Babies with low birth weight
1,Allegany,6.6,All races/ ethnicities (aggregated),2017,Babies with low birth weight
2,Anne Arundel,7.8,All races/ ethnicities (aggregated),2017,Babies with low birth weight
3,Baltimore City,12.4,All races/ ethnicities (aggregated),2017,Babies with low birth weight
4,Baltimore County,9.5,All races/ ethnicities (aggregated),2017,Babies with low birth weight


In [138]:
# preview prenatal care data
df_pren.head()

Unnamed: 0,Jurisdiction,Value,Race/ ethnicity,Year,Measure
0,State,69.6,All races/ ethnicities (aggregated),2017,Early prenatal care
1,Allegany,78.1,All races/ ethnicities (aggregated),2017,Early prenatal care
2,Anne Arundel,71.0,All races/ ethnicities (aggregated),2017,Early prenatal care
3,Baltimore City,67.1,All races/ ethnicities (aggregated),2017,Early prenatal care
4,Baltimore County,69.0,All races/ ethnicities (aggregated),2017,Early prenatal care


In [139]:
# renaming column values for df_teen
df_teen.rename(columns={"Value": "teen_birth_rate"})
df_teen=df_teen.rename(columns={"Value": "teen_birth_rate"})

In [140]:
# renaming column values for df_baby
df_baby.rename(columns={"Value": "babies_low_birth_weight"})
df_baby=df_baby.rename(columns={"Value": "babies_low_birth_weight"})


In [141]:
# renaming column values for df_ifd
df_pren.rename(columns={"Value": "early_prenatal_care"})
df_pren=df_pren.rename(columns={"Value": "early_prenatal_care"})

In [142]:
# preview data
df_teen.head()

Unnamed: 0,Jurisdiction,teen_birth_rate,Race/ ethnicity,Year,Measure
0,State,14.2,All races/ ethnicities (aggregated),2017,Teen birth rate
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,Teen birth rate
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,Teen birth rate
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,Teen birth rate
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,Teen birth rate


In [143]:
# preview data
df_pren.head()

Unnamed: 0,Jurisdiction,early_prenatal_care,Race/ ethnicity,Year,Measure
0,State,69.6,All races/ ethnicities (aggregated),2017,Early prenatal care
1,Allegany,78.1,All races/ ethnicities (aggregated),2017,Early prenatal care
2,Anne Arundel,71.0,All races/ ethnicities (aggregated),2017,Early prenatal care
3,Baltimore City,67.1,All races/ ethnicities (aggregated),2017,Early prenatal care
4,Baltimore County,69.0,All races/ ethnicities (aggregated),2017,Early prenatal care


In [144]:
# preview data 
df_baby.head()

Unnamed: 0,Jurisdiction,babies_low_birth_weight,Race/ ethnicity,Year,Measure
0,State,8.9,All races/ ethnicities (aggregated),2017,Babies with low birth weight
1,Allegany,6.6,All races/ ethnicities (aggregated),2017,Babies with low birth weight
2,Anne Arundel,7.8,All races/ ethnicities (aggregated),2017,Babies with low birth weight
3,Baltimore City,12.4,All races/ ethnicities (aggregated),2017,Babies with low birth weight
4,Baltimore County,9.5,All races/ ethnicities (aggregated),2017,Babies with low birth weight


In [145]:
len(df_baby.index)

200

In [146]:
len(df_teen.index)

200

In [147]:
len(df_pren.index)

200

In [148]:
# to merge early prenatal care and teen birth rate data sets
df_pren_teen=pd.merge(df_teen, df_pren, on=['Jurisdiction','Year', "Race/ ethnicity"], how="left")

In [149]:
# to drop redundant columns
df_2=df_pren_teen.drop(columns=["Measure_x", "Measure_y"])

In [150]:
df_2.head()

Unnamed: 0,Jurisdiction,teen_birth_rate,Race/ ethnicity,Year,early_prenatal_care
0,State,14.2,All races/ ethnicities (aggregated),2017,69.6
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,78.1
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,71.0
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,67.1
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,69.0


In [151]:
df_2.head()

Unnamed: 0,Jurisdiction,teen_birth_rate,Race/ ethnicity,Year,early_prenatal_care
0,State,14.2,All races/ ethnicities (aggregated),2017,69.6
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,78.1
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,71.0
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,67.1
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,69.0


In [152]:
df_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Jurisdiction         200 non-null    object 
 1   teen_birth_rate      200 non-null    float64
 2   Race/ ethnicity      200 non-null    object 
 3   Year                 200 non-null    int64  
 4   early_prenatal_care  200 non-null    float64
dtypes: float64(2), int64(1), object(2)
memory usage: 9.4+ KB


In [153]:
df_baby.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Jurisdiction             200 non-null    object 
 1   babies_low_birth_weight  200 non-null    float64
 2   Race/ ethnicity          200 non-null    object 
 3   Year                     200 non-null    int64  
 4   Measure                  200 non-null    object 
dtypes: float64(1), int64(1), object(3)
memory usage: 7.9+ KB


In [154]:
# to make "Year" the same data type in both data sets
df_2["Year"]=df_2["Year"].astype(object)

In [155]:
df_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Jurisdiction         200 non-null    object 
 1   teen_birth_rate      200 non-null    float64
 2   Race/ ethnicity      200 non-null    object 
 3   Year                 200 non-null    object 
 4   early_prenatal_care  200 non-null    float64
dtypes: float64(2), object(3)
memory usage: 9.4+ KB


In [156]:
# using concat to merge life_exp dataset with teen/inf dataset
df_all=pd.concat([df_2, df_baby], axis=1)

In [157]:
df_all.head()

Unnamed: 0,Jurisdiction,teen_birth_rate,Race/ ethnicity,Year,early_prenatal_care,Jurisdiction.1,babies_low_birth_weight,Race/ ethnicity.1,Year.1,Measure
0,State,14.2,All races/ ethnicities (aggregated),2017,69.6,State,8.9,All races/ ethnicities (aggregated),2017,Babies with low birth weight
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,78.1,Allegany,6.6,All races/ ethnicities (aggregated),2017,Babies with low birth weight
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,71.0,Anne Arundel,7.8,All races/ ethnicities (aggregated),2017,Babies with low birth weight
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,67.1,Baltimore City,12.4,All races/ ethnicities (aggregated),2017,Babies with low birth weight
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,69.0,Baltimore County,9.5,All races/ ethnicities (aggregated),2017,Babies with low birth weight


In [158]:
# drop unnecessary column
df_all.drop(columns="Measure")

Unnamed: 0,Jurisdiction,teen_birth_rate,Race/ ethnicity,Year,early_prenatal_care,Jurisdiction.1,babies_low_birth_weight,Race/ ethnicity.1,Year.1
0,State,14.2,All races/ ethnicities (aggregated),2017,69.6,State,8.9,All races/ ethnicities (aggregated),2017
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,78.1,Allegany,6.6,All races/ ethnicities (aggregated),2017
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,71.0,Anne Arundel,7.8,All races/ ethnicities (aggregated),2017
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,67.1,Baltimore City,12.4,All races/ ethnicities (aggregated),2017
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,69.0,Baltimore County,9.5,All races/ ethnicities (aggregated),2017
...,...,...,...,...,...,...,...,...,...
195,Somerset,31.6,All races/ ethnicities (aggregated),2010,73.6,Somerset,7.4,All races/ ethnicities (aggregated),2010
196,Talbot,20.4,All races/ ethnicities (aggregated),2010,79.4,Talbot,6.7,All races/ ethnicities (aggregated),2010
197,Washington,36.0,All races/ ethnicities (aggregated),2010,64.0,Washington,9.1,All races/ ethnicities (aggregated),2010
198,Wicomico,30.9,All races/ ethnicities (aggregated),2010,67.4,Wicomico,8.2,All races/ ethnicities (aggregated),2010


In [159]:
df_all.columns

Index(['Jurisdiction', 'teen_birth_rate', 'Race/ ethnicity', 'Year',
       'early_prenatal_care', 'Jurisdiction', 'babies_low_birth_weight',
       'Race/ ethnicity', 'Year', 'Measure'],
      dtype='object')

In [160]:
# renaming columns to get rid of columns we do not need
df_all.columns=['Jurisdiction', 'teen_birth_rate', 'Race/ ethnicity', 'Year',
       'early_prenatal_care', 'Jurisdiction_1', 'babies_low_birth_weight', 'Race/ ethnicity_1',
       'Year_1', 'Measure']
       

In [161]:
# deleting unneeded columns
df_all=df_all.drop(columns=["Jurisdiction_1", "Race/ ethnicity_1", "Year_1", "Measure"])

In [162]:
df_all

Unnamed: 0,Jurisdiction,teen_birth_rate,Race/ ethnicity,Year,early_prenatal_care,babies_low_birth_weight
0,State,14.2,All races/ ethnicities (aggregated),2017,69.6,8.9
1,Allegany,23.5,All races/ ethnicities (aggregated),2017,78.1,6.6
2,Anne Arundel,11.9,All races/ ethnicities (aggregated),2017,71.0,7.8
3,Baltimore City,28.9,All races/ ethnicities (aggregated),2017,67.1,12.4
4,Baltimore County,11.0,All races/ ethnicities (aggregated),2017,69.0,9.5
...,...,...,...,...,...,...
195,Somerset,31.6,All races/ ethnicities (aggregated),2010,73.6,7.4
196,Talbot,20.4,All races/ ethnicities (aggregated),2010,79.4,6.7
197,Washington,36.0,All races/ ethnicities (aggregated),2010,64.0,9.1
198,Wicomico,30.9,All races/ ethnicities (aggregated),2010,67.4,8.2


In [163]:
# rearranging columns
df_all=df_all[["Jurisdiction", "Race/ ethnicity", "Year", "teen_birth_rate", "early_prenatal_care", "babies_low_birth_weight"]]

In [164]:
df_all

Unnamed: 0,Jurisdiction,Race/ ethnicity,Year,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
0,State,All races/ ethnicities (aggregated),2017,14.2,69.6,8.9
1,Allegany,All races/ ethnicities (aggregated),2017,23.5,78.1,6.6
2,Anne Arundel,All races/ ethnicities (aggregated),2017,11.9,71.0,7.8
3,Baltimore City,All races/ ethnicities (aggregated),2017,28.9,67.1,12.4
4,Baltimore County,All races/ ethnicities (aggregated),2017,11.0,69.0,9.5
...,...,...,...,...,...,...
195,Somerset,All races/ ethnicities (aggregated),2010,31.6,73.6,7.4
196,Talbot,All races/ ethnicities (aggregated),2010,20.4,79.4,6.7
197,Washington,All races/ ethnicities (aggregated),2010,36.0,64.0,9.1
198,Wicomico,All races/ ethnicities (aggregated),2010,30.9,67.4,8.2


In [165]:
df_all=df_all.rename(columns={"Race/ ethnicity": "race/ethn", "Jurisdiction":"jurisd", "Year":"yr"})

In [166]:
df_all.head()

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
0,State,All races/ ethnicities (aggregated),2017,14.2,69.6,8.9
1,Allegany,All races/ ethnicities (aggregated),2017,23.5,78.1,6.6
2,Anne Arundel,All races/ ethnicities (aggregated),2017,11.9,71.0,7.8
3,Baltimore City,All races/ ethnicities (aggregated),2017,28.9,67.1,12.4
4,Baltimore County,All races/ ethnicities (aggregated),2017,11.0,69.0,9.5


In [167]:
# df_all_races does not include State rates
df_all_races=df_all[(df_all["jurisd"]!="State")]

In [168]:
df_all_races

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
1,Allegany,All races/ ethnicities (aggregated),2017,23.5,78.1,6.6
2,Anne Arundel,All races/ ethnicities (aggregated),2017,11.9,71.0,7.8
3,Baltimore City,All races/ ethnicities (aggregated),2017,28.9,67.1,12.4
4,Baltimore County,All races/ ethnicities (aggregated),2017,11.0,69.0,9.5
5,Calvert,All races/ ethnicities (aggregated),2017,8.1,77.1,7.8
...,...,...,...,...,...,...
195,Somerset,All races/ ethnicities (aggregated),2010,31.6,73.6,7.4
196,Talbot,All races/ ethnicities (aggregated),2010,20.4,79.4,6.7
197,Washington,All races/ ethnicities (aggregated),2010,36.0,64.0,9.1
198,Wicomico,All races/ ethnicities (aggregated),2010,30.9,67.4,8.2


## Data Visualizations

In [169]:
# line graph of teen birth rate for all counties in Maryland
line_teen_all_races=px.line(df_all_races, x="yr", y="teen_birth_rate", color="jurisd", title="Teen Birth Rate in Maryland, 2010-2017", labels={"yr":"Year", "teen_birth_rate": "Teen Birth Rate"})
line_teen_all_races

In [121]:
# line graph of access to early prenatal care for all counties in Maryland
line_pren_all_races=px.line(df_all_races, x="yr", y="early_prenatal_care", color="jurisd", title="Access to Early Prenatal Care in Maryland, 2010-2017", labels={"yr":"Year", "early_prenatal_care": "Early Prenatal Care"})
line_pren_all_races

In [122]:
# line graph of babies with low birth weight for all counties in Maryland
line_babies_all_races=px.line(df_all_races, x="yr", y="babies_low_birth_weight", color="jurisd", title="Babies with Low Birth Weight in Maryland, 2010-2017", labels={"yr":"Year", "babies_low_birth_weight":"Babies with Low Birth Weight"})
line_babies_all_races

In [123]:
df_all.tail()

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
195,Somerset,All races/ ethnicities (aggregated),2010,31.6,73.6,7.4
196,Talbot,All races/ ethnicities (aggregated),2010,20.4,79.4,6.7
197,Washington,All races/ ethnicities (aggregated),2010,36.0,64.0,9.1
198,Wicomico,All races/ ethnicities (aggregated),2010,30.9,67.4,8.2
199,Worcester,All races/ ethnicities (aggregated),2010,26.6,73.4,5.3


In [124]:
df_all.head()

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
0,State,All races/ ethnicities (aggregated),2017,14.2,69.6,8.9
1,Allegany,All races/ ethnicities (aggregated),2017,23.5,78.1,6.6
2,Anne Arundel,All races/ ethnicities (aggregated),2017,11.9,71.0,7.8
3,Baltimore City,All races/ ethnicities (aggregated),2017,28.9,67.1,12.4
4,Baltimore County,All races/ ethnicities (aggregated),2017,11.0,69.0,9.5


In [125]:
df_all_races

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
1,Allegany,All races/ ethnicities (aggregated),2017,23.5,78.1,6.6
2,Anne Arundel,All races/ ethnicities (aggregated),2017,11.9,71.0,7.8
3,Baltimore City,All races/ ethnicities (aggregated),2017,28.9,67.1,12.4
4,Baltimore County,All races/ ethnicities (aggregated),2017,11.0,69.0,9.5
5,Calvert,All races/ ethnicities (aggregated),2017,8.1,77.1,7.8
...,...,...,...,...,...,...
195,Somerset,All races/ ethnicities (aggregated),2010,31.6,73.6,7.4
196,Talbot,All races/ ethnicities (aggregated),2010,20.4,79.4,6.7
197,Washington,All races/ ethnicities (aggregated),2010,36.0,64.0,9.1
198,Wicomico,All races/ ethnicities (aggregated),2010,30.9,67.4,8.2


In [126]:
# new dataframe with only 5 largest counties in Maryland
df_top5 = df_all_races[df_all_races["jurisd"].isin(["Montgomery","Prince George's","Baltimore County","Baltimore City","Anne Arundel"])]
df_top5.head()

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
2,Anne Arundel,All races/ ethnicities (aggregated),2017,11.9,71.0,7.8
3,Baltimore City,All races/ ethnicities (aggregated),2017,28.9,67.1,12.4
4,Baltimore County,All races/ ethnicities (aggregated),2017,11.0,69.0,9.5
16,Montgomery,All races/ ethnicities (aggregated),2017,9.5,70.9,7.5
17,Prince George's,All races/ ethnicities (aggregated),2017,19.3,59.1,9.8


In [127]:
# new dataframe with only 5 smallest counties in Maryland
df_bot5=df_all_races[df_all_races["jurisd"].isin(["Caroline", "Dorchester", "Garrett", "Somerset", "Kent"])]
df_bot5.head()

Unnamed: 0,jurisd,race/ethn,yr,teen_birth_rate,early_prenatal_care,babies_low_birth_weight
6,Caroline,All races/ ethnicities (aggregated),2017,15.8,71.3,7.4
10,Dorchester,All races/ ethnicities (aggregated),2017,21.4,78.4,7.3
12,Garrett,All races/ ethnicities (aggregated),2017,17.9,78.9,11.0
15,Kent,All races/ ethnicities (aggregated),2017,8.5,80.3,10.4
20,Somerset,All races/ ethnicities (aggregated),2017,13.0,76.4,13.2


In [128]:
# line graph of teen birth rate for 5 largest counties in Maryland
line_teen_top5=px.line(df_top5, x="yr", y="teen_birth_rate", color="jurisd", title="Teen Birth Rate for 5 Largest Counties in Maryland, 2010-2017", labels={"yr":"Year", "teen_birth_rate": "Teen Birth Rate"})
line_teen_top5

In [129]:
# line graph of teen birth rate for 5 smallest counties in Maryland
line_teen_bot5=px.line(df_bot5, x="yr", y="teen_birth_rate", color="jurisd", title="Teen Birth Rate for 5 Smallest Counties in Maryland, 2010-2017", labels={"yr":"Year", "teen_birth_rate": "Teen Birth Rate"})
line_teen_bot5

In [130]:
# line graph of access to early prenatal care for 5 largest counties in Maryland
line_pren_top5=px.line(df_top5, x="yr", y="early_prenatal_care", color="jurisd", title="Access to Early Prenatal Care in 5 Largest Counties in Maryland, 2010-2017", labels={"yr":"Year", "early_prenatal_care": "Early Prenatal Care"}, range_y=[50,85])
line_pren_top5

In [131]:
# line graph of access to early prenatal care for 5 smallest counties in Maryland
line_pren_bot5=px.line(df_bot5, x="yr", y="early_prenatal_care", color="jurisd", title="Access to Early Prenatal Care in 5 Smallest Counties in Maryland, 2010-2017", labels={"yr":"Year", "early_prenatal_care": "Early Prenatal Care"}, range_y=[50,85])
line_pren_bot5

In [132]:
# line graph of babies with low birth weight for 5 largest counties in Maryland
line_babies_top5=px.line(df_top5, x="yr", y="babies_low_birth_weight", color="jurisd", title="Babies with Low Birth Weight in 5 Largest Counties Maryland, 2010-2017", labels={"yr":"Year", "babies_low_birth_weight":"Babies with Low Birth Weight"})
line_babies_top5

In [133]:
# line graph of babies with low birth weight for 5 smallest counties in Maryland
line_babies_bot5=px.line(df_top5, x="yr", y="babies_low_birth_weight", color="jurisd", title="Babies with Low Birth Weight in 5 Smallest Counties Maryland, 2010-2017", labels={"yr":"Year", "babies_low_birth_weight":"Babies with Low Birth Weight"})
line_babies_bot5