In [7]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import statsmodels.api as sm # to build a LOWESS model
import plotly.express as px # for data visualization 
import numpy as np

In [8]:
data = pd.read_csv("/workspaces/unifying-data-science-2023-project-team7/05_clean_data/processed_data_clean_final.csv")
data.head()

Unnamed: 0,date,total_activity,citation_issued,citation_rate,day_of_week,month,days_end_month,end_of_month,year,days_end_year,end_of_year,quarter,days_end_quarter,end_of_quarter,city,state
0,2010-01-01,27,27,1.0,5,1,30,False,2010,180.0,False,3.0,89.0,False,bakersfield,ca
1,2010-01-02,6,6,1.0,6,1,29,False,2010,179.0,False,3.0,88.0,False,bakersfield,ca
2,2010-01-03,13,13,1.0,7,1,28,False,2010,178.0,False,3.0,87.0,False,bakersfield,ca
3,2010-01-04,86,86,1.0,1,1,27,False,2010,177.0,False,3.0,86.0,False,bakersfield,ca
4,2010-01-05,60,60,1.0,2,1,26,False,2010,176.0,False,3.0,85.0,False,bakersfield,ca


In [9]:
lowess = sm.nonparametric.lowess # LOWESS model

In [10]:
def lowess_with_confidence_bounds(
    x, y, eval_x, N=200, conf_interval=0.95, lowess_kw=None
):
    """
    Perform Lowess regression and determine a confidence interval by bootstrap resampling
    """
    # Lowess smoothing
    smoothed = sm.nonparametric.lowess(exog=x, endog=y, xvals=eval_x, **lowess_kw)

    # Perform bootstrap resamplings of the data
    # and  evaluate the smoothing at a fixed set of points
    smoothed_values = np.empty((N, len(eval_x)))
    for i in range(N):
        sample = np.random.choice(len(x), len(x), replace=True)
        sampled_x = x[sample]
        sampled_y = y[sample]

        smoothed_values[i] = sm.nonparametric.lowess(
            exog=sampled_x, endog=sampled_y, xvals=eval_x, **lowess_kw
        )

    # Get the confidence interval
    sorted_values = np.sort(smoothed_values, axis=0)
    bound = int(N * (1 - conf_interval) / 2)
    bottom = sorted_values[bound - 1]
    top = sorted_values[-bound]

    return smoothed, bottom, top

## Cincinnati

In [11]:
data[data['city'] == 'cincinnati']["year"].unique()

array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017])

In [12]:
year = [[2009, 2010, 2011, 2012, 2013], [2014, 2015, 2016, 2017, 2018]]

fig = make_subplots(
            rows=2, cols=5,
            subplot_titles=("2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018"),
            vertical_spacing = 0.1)

row = 1
for y_row in year:
    col = 1
    for y_col in y_row:

        dat = data[(data['city'] == 'cincinnati') & (data['year'] == y_col)]

        # x values for LOWESS
        x=dat['days_end_month'].values 
        # y values for both
        y=dat['citation_issued'].values

        lowess_reg = lowess(y, x, frac=0.2)

        fig.add_trace(go.Scatter(x=x, y=y,showlegend=False,mode = 'markers', name=y_col),
                    row=row, col=col)
        if col == 1 and row == 1:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red')),
                       row=row, col=col)
        else:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red'),showlegend=False,),
                       row=row, col=col)

        col += 1
    row += 1

# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Cincinnati", 
                             font=dict(color='black')),
                
    )

fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()


Mean of empty slice.


invalid value encountered in scalar divide



In [13]:
year = 2017
dat = data[(data['city'] == 'cincinnati') & (data['year'] == year)]

# x values for LOWESS
x=dat['days_end_month'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 29, 30)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Cincinnati in 2017", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=5,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the month", 
                  yaxis_title="Number of citations for Cincinnati")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

In [14]:
city = 'cincinnati'
dat = data[(data['city'] == city)]

# x values for LOWESS
x=dat['days_end_year'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 365, 366)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text=f"Number of days until the end of the month v.s. Number of citations for {city} from 2009-2017", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=50,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the month", 
                  yaxis_title=f"Number of citations for {city}")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

## Seattle

In [15]:
data[data['city'] == 'seattle']["year"].unique()

array([2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015])

In [16]:
year = [[2006, 2007, 2008, 2009, 2010], [2011, 2012, 2013, 2014, 2015]]

fig = make_subplots(
            rows=2, cols=5,
            subplot_titles=("2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"),
            vertical_spacing = 0.1)

row = 1
for y_row in year:
    col = 1
    for y_col in y_row:

        dat = data[(data['city'] == 'seattle') & (data['year'] == y_col)]

        # x values for LOWESS
        x=dat['days_end_month'].values 
        # y values for both
        y=dat['citation_issued'].values

        lowess_reg = lowess(y, x, frac=0.2)

        fig.add_trace(go.Scatter(x=x, y=y,showlegend=False,mode = 'markers', name=y_col),
                    row=row, col=col)
        if col == 1 and row == 1:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red')),
                       row=row, col=col)
        else:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red'),showlegend=False,),
                       row=row, col=col)

        col += 1
    row += 1

# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Seattle", 
                             font=dict(color='black')),                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'))

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

In [17]:
year = 2006
dat = data[(data['city'] == 'seattle') & (data['year'] == year)]

# x values for LOWESS
x=dat['days_end_month'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 30, 31)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Seattle in 2006", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=5,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the month", 
                  yaxis_title="Number of citations for Seattle")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

## Madison

In [18]:
data[data['city'] == 'madison']["year"].unique()

array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016])

In [19]:
city = "madison"
dat = data[(data['city'] == city)]

# x values for LOWESS
x=dat['days_end_year'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 365, 366)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text=f"Number of days until the end of the year v.s. Number of citations for {city} from 2008-2016", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=50,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the year", 
                  yaxis_title=f"Number of citations for {city}")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

In [20]:
year = [[2007, 2008, 2009, 2010, 2011], [2012, 2013, 2014, 2015, 2016],[2017, 2018, 2019, 2020]]

fig = make_subplots(
            rows=3, cols=5,
            subplot_titles=("2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016","2017", "2018", "2019", "2020"),
            vertical_spacing = 0.1)

row = 1
for y_row in year:
    col = 1
    for y_col in y_row:

        dat = data[(data['city'] == 'madison') & (data['year'] == y_col)]

        # x values for LOWESS
        x=dat['days_end_month'].values 
        # y values for both
        y=dat['citation_issued'].values

        lowess_reg = lowess(y, x, frac=0.2)

        fig.add_trace(go.Scatter(x=x, y=y,showlegend=False,mode = 'markers', name=y_col),
                    row=row, col=col)
        if col == 1 and row == 1:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red')),
                       row=row, col=col)
        else:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red'),showlegend=False,),
                       row=row, col=col)

        col += 1
    row += 1

# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Madison", 
                             font=dict(color='black')),
                
    )

fig.update_layout(dict(plot_bgcolor = 'white'))

fig.update_xaxes(autorange="reversed")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()


Mean of empty slice.


invalid value encountered in scalar divide



In [21]:
year = 2015
dat = data[(data['city'] == 'madison') & (data['year'] == year)]

# x values for LOWESS
x=dat['days_end_month'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 30, 31)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Madison in 2015", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=5,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the month", 
                  yaxis_title="Number of citations for Madison")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

## Durham

In [22]:
data[data['city'] == 'durham']["year"].unique()

array([2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015])

In [23]:
year = [[2001, 2002, 2003, 2004, 2005], [2006, 2007, 2008, 2009, 2010],[2011, 2012, 2013, 2014, 2015]]

fig = make_subplots(
            rows=3, cols=5,
            subplot_titles=("2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010","2011", "2012", "2013", "2014", "2015"),
            vertical_spacing = 0.1)

row = 1
for y_row in year:
    col = 1
    for y_col in y_row:

        dat = data[(data['city'] == 'durham') & (data['year'] == y_col)]

        # x values for LOWESS
        x=dat['days_end_month'].values 
        # y values for both
        y=dat['citation_issued'].values

        lowess_reg = lowess(y, x, frac=0.2)

        fig.add_trace(go.Scatter(x=x, y=y,showlegend=False,mode = 'markers', name=y_col),
                    row=row, col=col)
        if col == 1 and row == 1:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red')),
                       row=row, col=col)
        else:
            fig.add_trace(go.Scatter(x=lowess_reg[:,0], y=lowess_reg[:,1], name='LOWESS', line=dict(color='red'),showlegend=False,),
                       row=row, col=col)

        col += 1
    row += 1

# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Durham", 
                             font=dict(color='black')),
                             xaxis1=dict(
                                tickmode='linear',
                                dtick=5,),
                            xaxis2=dict(
                                tickmode='linear',
                                dtick=5,),
                            xaxis3=dict(
                                tickmode='linear',
                                dtick=5,),
                            xaxis4=dict(
                                tickmode='linear',
                                dtick=5,),
                
    )

fig.update_layout(dict(plot_bgcolor = 'white'))

fig.update_xaxes(autorange="reversed")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()


Mean of empty slice.


invalid value encountered in scalar divide



In [24]:
year = 2013
dat = data[(data['city'] == 'durham') & (data['year'] == year)]

# x values for LOWESS
x=dat['days_end_month'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 30, 31)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the month v.s. Number of citations for Durham in 2013", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=5,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the month", 
                  yaxis_title="Number of citations for Durham")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

In [25]:
city = 'durham'
dat = data[(data['city'] == city)]

# x values for LOWESS
x=dat['days_end_year'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 365, 366)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text=f"Number of days until the end of the year v.s. Number of citations for {city} from 2002-2015", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=50,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the year", 
                  yaxis_title=f"Number of citations for {city}")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

In [26]:
city = 'durham'
dat = data[(data['city'] == city)]
# x values for LOWESS
x=dat['days_end_year'].values 
# y values for both
y=dat['citation_issued'].values

fig = px.scatter(dat, x=x, y=y, 
                opacity=0.8, color_discrete_sequence=['black'])

for year in data[data['city'] == 'durham']["year"].unique():

    x=dat[dat["year"] == year]['days_end_year'].values 
    # y values for both
    y=dat[dat["year"] == year]['citation_issued'].values

    eval_x = np.linspace(0, 365, 366)

    lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

    fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

    fig.add_trace(go.Scatter(x=eval_x, y = top,
                    line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
            
    fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                    line = dict(color='rgba(0,0,0,0)'),
                    fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                width=1000,
                title=dict(text=f"Number of days until the end of the year v.s. Number of citations for {city} from 2002-2015", 
                            font=dict(color='black')),
                            xaxis=dict(
                                tickmode='linear',
                                dtick=50,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                xaxis_title="Number of days until the end of the year", 
                yaxis_title=f"Number of citations for {city}")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()

# Aurora

In [27]:
dat = data[(data['city'] == 'aurora')]

# x values for LOWESS
x=dat['days_end_year'].values 
# y values for both
y=dat['citation_issued'].values

eval_x = np.linspace(0, 365, 366)

lowess_reg, bottom, top = lowess_with_confidence_bounds(x, y, eval_x, N=200, conf_interval=0.95, lowess_kw={"frac": 0.2})

fig = px.scatter(dat, x=x, y=y, 
                 opacity=0.8, color_discrete_sequence=['black'])

fig.add_traces(go.Scatter(x=eval_x, y=lowess_reg, name='LOWESS', line=dict(color='red')))

fig.add_trace(go.Scatter(x=eval_x, y = top,
                line = dict(color='rgba(0,0,0,0)'),showlegend=False,))
        
fig.add_trace(go.Scatter(x=eval_x, y = bottom,
                line = dict(color='rgba(0,0,0,0)'),
                fill='tonexty',name = '95% confidence interval',fillcolor = 'rgba(255, 0, 0, 0.2)'))


# Set figure title
fig.update_layout(height=750, 
                  width=1000,
                  title=dict(text="Number of days until the end of the year v.s. Number of citations for Aurora from 2012-2019", 
                             font=dict(color='black')),
                             xaxis=dict(
                                tickmode='linear',
                                dtick=50,),
                
    )
fig.update_xaxes(autorange="reversed")

fig.update_layout(dict(plot_bgcolor = 'white'),
                  xaxis_title="Number of days until the end of the year", 
                  yaxis_title="Number of citations for Aurora")

# Update axes lines
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')

fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                 zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                 showline=True, linewidth=1, linecolor='black')


# Update marker size
fig.update_traces(marker=dict(size=3))

fig.show()