In [91]:
import pandas as pd 
import plotly.express as px 
import dash 
import os

In [92]:
def protest_data():
    # change to the create path using parent wd so it's not specific to me 
    df = pd.DataFrame(pd.read_csv("/home/lisettesolis/30122-project-project-protest/project_protests/count_data/police-data.csv",on_bad_lines='skip'),\
        columns = ['Location', 'Date', 'County', 'StateTerritory','City_Town'])
    df['Date']= pd.to_datetime(df['Date'])
    df['Month'] = df['Date'].dt.month
    df['Year'] = df['Date'].dt.year 

    return df

df = protest_data()

In [93]:
def count_all():
    """
    """
    df = protest_data()
    df_pivot = pd.pivot_table(df, index='Year', values='Date',
                          aggfunc='count', margins=True, margins_name='Count')
    df_pivot.drop(index='Count', inplace=True)
    df_pivot.rename({'Date':'Count'}, axis='columns', inplace=True)
    fig = px.line(df_pivot, y='Count', template="simple_white")
    # fig.update_layout(xaxis_showgrid=False, yaxis_showgrid=False)

    return fig.show()

count_all()

In [94]:
def month_counts():
    """
    """
    df = protest_data()
    
    df_pivot = df.groupby(['Year', 'Month'])['Date'].count().reset_index()
    df_pivot.rename({'Date':'Count'}, axis='columns', inplace=True)
    months = {1:'Jan', 2:'Feb', 3:'March', 4:'April', 5:'May', 6:'June', 7:'July', \
        8:'Aug', 9:'Sept', 10:'Oct', 11:'Nov', 12:'Dec'}
    df_pivot['Month'] = df_pivot.Month.replace(months)

    fig = px.line(df_pivot, x='Month', y='Count', color='Year', \
        category_orders={'Month':['Jan','Feb','March','April','May','June','July','Aug','Sept','Oct','Nov','Dec']},\
        template="simple_white", color_discrete_sequence= px.colors.sequential.Plasma_r)

    return fig.show()
 
month_counts()

In [95]:
import plotly.graph_objects as go

df = protest_data()
df_pivot = df.groupby(['Year']).size().to_frame().reset_index()
# df_pivot.rename({'0':'Count'}, axis='columns', inplace=True)
df_pivot.head()

fig = go.Figure()
fig.add_trace(go.Scatter(x=df_pivot["Year"], y=df_pivot[0], name="All", mode="lines"))
# fig.add_trace(go.Scatter(x=df["Date"], y=df["Open"], name="Open", mode="lines"))
fig.update_layout(
    title="Number of Protests", template="simple_white"
)
fig.show()

In [None]:
def go_cities():
    df = protest_data()
    dfg = df.groupby(['Year', 'City_Town']).size().to_frame().sort_values([0], ascending = False).head(10).reset_index()
    cities = list(dfg['City_Town'].unique())
    df['Count'] = 1
    df = df.loc[df['City_Town'].isin(cities)]
    
    # pivot data by year 
    df_pivot = df.groupby(['Year', 'City_Town']).count().reset_index()
    
    # create traces for figure 
    trace = []
    dropdowns = []
    buttons = []
    
    for i, city in enumerate(cities):
        sub_df = df_pivot.loc[df_pivot['City_Town'] == city]
        t = go.Scatter(x=sub_df["Year"], y=sub_df['Count'], name=city, mode="lines")        
        trace.append(t)
        dropdowns.append({'label': city, 'value':t})

    # Define the initial layout with the dropdown menu
    buttons =[ ]
    
    for i in range(len(dropdowns)):
        visible = [False] * len(cities)
        visible[i] = True
        buttons.append({'args': [{'visible': visible}],
                        'label': dropdowns[i]['label'], \
                        'method': 'update'
                        })

    initial_layout = go.Layout(
        updatemenus=[{'buttons': buttons,
            'direction': 'down',
            'showactive': True,
            'x': 0.1,
            'y': 1.2
                }
            ],
        xaxis=dict(title='Year'),
        yaxis=dict(title='Count')
            )

    #Create the figure with the traces and initial layout
    fig = go.Figure(data=trace, layout=initial_layout)
    fig.update_layout(
    title="Cities with the most Protests", template="simple_white"
    )

    return fig.show()

go_cities()


In [None]:
# not in final version 

def protest_cities_line():
    """
    """
    # identify 10 cities with highest number of protests  
    df = protest_data()
    dfg = df.groupby(['City_Town']).size().to_frame().sort_values([0], ascending = False).head(10).reset_index()
    cities = list(dfg['City_Town'])

    # subset dataset to only include these 10 cities 
    df['Count'] = 1
    df = df.loc[df['City_Town'].isin(cities)]
    # pivot data by year 
    df_pivot = df.groupby(['Year', 'City_Town']).count().reset_index()
    fig = px.line(df_pivot, x='Year', y='Count', color='City_Town', template="simple_white", color_discrete_sequence= px.colors.sequential.Plasma_r)

    return fig.show()

protest_cities_line()

In [None]:
# not in final version 

def protest_cities_hist():
    """
    """
    df = protest_data()
    dfg = df.groupby(['City_Town']).size().to_frame().sort_values([0], ascending = False).head(10).reset_index()
    cities = list(dfg['City_Town'])

    df['Count'] = 1
    df = df.loc[df['City_Town'].isin(cities)]

    df_pivot = pd.pivot_table(df, index= ['Year', 'City_Town'], values='Date',
            aggfunc='count', margins=True, margins_name='Count')
    df_pivot.drop(index='Count', inplace=True)
    df_pivot.rename({'Date':'Count'}, axis='columns', inplace=True)


    df.apply(pd.to_numeric, errors='ignore')
    fig = px.histogram(df, x="Year", y="Count", color="City_Town",
            width=600, height=400,
            labels={ # replaces default labels by column name
                "Year": "Year",  "Count": "Count", "City_Town": "City"
            },
            template="simple_white", color_discrete_sequence= px.colors.sequential.Plasma_r
            )
    return fig.show()

protest_cities_hist()