In [1]:
# DATA551 Worldwide suicide rates visualizations 

import pandas as pd
import plotly
import plotly.plotly as py

plotly.offline.init_notebook_mode(connected=True)

In [2]:
# Let's import the data first 

suicide_df = pd.read_csv('suicide_rates_overview_1985_to_2016.csv')

# Let's find the average suicide rate per country

suicide_df.head()


Unnamed: 0,country,year,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Albania,1987,male,15-24 years,21,312900,6.71,Albania1987,,2156624900,796,Generation X
1,Albania,1987,male,35-54 years,16,308000,5.19,Albania1987,,2156624900,796,Silent
2,Albania,1987,female,15-24 years,14,289700,4.83,Albania1987,,2156624900,796,Generation X
3,Albania,1987,male,75+ years,1,21800,4.59,Albania1987,,2156624900,796,G.I. Generation
4,Albania,1987,male,25-34 years,9,274300,3.28,Albania1987,,2156624900,796,Boomers


In [3]:
# Let's do some data wrangling 

# First we'll need to determine the sum of suicides per year 
suicide_year_sum =pd.DataFrame(suicide_df.groupby(["country", "year"]).sum()['suicides/100k pop'])
suicide_year_sum
type(suicide_year_sum)
suicide_year_sum= suicide_year_sum.reset_index()
suicide_year_sum.head()


# Now we can group the data by country and mean in order to get average yearly suicide rate
suicide_by_year = pd.DataFrame(suicide_year_sum.groupby(["country"]).mean()['suicides/100k pop'])
suicide_by_year
type(suicide_by_year)

#suicide_by_year.rename(columns = {'country':'country','suicides/100k pop' :'suicides/100k pop' }, inplace = True)
suicide_by_year = suicide_by_year.reset_index()
suicide_by_year.head()
#suicide_by_year.columns.values()


# Let's also take a look at the average yearly global suicide rate this will be useful later 
global_year_avg = suicide_year_sum.groupby(['year']).mean()['suicides/100k pop']




# suicide_by_year = pd.DataFrame(suicide_df.groupby(["country"]).mean()['suicides/100k pop'])
# suicide_by_year
# type(suicide_by_year)

# suicide_by_year.rename(columns = {'country':'country','suicides/100k pop' :'suicides/100k pop' }, inplace = True)
# suicide_by_year = suicide_by_year.reset_index()
# suicide_by_year.head()
# #suicide_by_year.columns.values()

In [4]:
len(suicide_by_year)

count_row = suicide_by_year.shape[0]  # gives number of row count
count_col = suicide_by_year.shape[1]  # gives number of col count

suicide_by_year.shape
# count_row
# count_col

(101, 2)

In [5]:
country_codes = pd.read_csv('country_codes.csv')

country_codes.head()
#country_codes.shape

Unnamed: 0,Country,ISO ALPHA_3
0,Afghanistan,AFG
1,Åland Islands,ALA
2,Albania,ALB
3,Algeria,DZA
4,American Samoa,ASM


In [6]:
# We will need to join country codes to the data so that plotly will recognize the countries 

df = pd.merge(suicide_by_year, country_codes, left_on = 'country', right_on = 'Country')

df.head()

df.drop(columns=['Country'], inplace= True)

df.head()
#df.shape

Unnamed: 0,country,suicides/100k pop,ISO ALPHA_3
0,Albania,42.034545,ALB
1,Antigua and Barbuda,6.634815,ATG
2,Argentina,125.631935,ARG
3,Armenia,39.0484,ARM
4,Aruba,114.037143,ABW


In [97]:
# We can export the data to have a look at our file

#df.to_csv("testmerge.csv")

In [7]:
# Here we create the data dictionary required for plotly

data = [ dict(
        type = 'choropleth',
        locations = df['ISO ALPHA_3'],
        z = df['suicides/100k pop'],
        text = df['country'],
        colorscale = [[0,"rgb(255, 255, 255)"],[1,"rgb(0, 0, 0)"]],
        autocolorscale = False,
        reversescale = False,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            #autotick = False,
            #tickprefix = '$',
            title = 'suicides/100K pop'),
      ) ]


#  colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
#             [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],

In [8]:
# Here we create the layout field required for plotly 

layout = dict(
    #itle = dict(text= 'Where are the most suicides committed each year?', xanchor='right'),
    autosize=True,
    #width = 1000, 
    #height = 800,
    geo = dict(
        scope = 'world',
        showframe = False,
        showcoastlines = True,
        projection = dict(
            type = 'miller' 
        )
    )
)

In [9]:
# Now we create the  Figure 1: Annual suicide rates 

fig = dict( data=data, layout=layout )

plotly.offline.plot(fig, filename='test.html')
#py.iplot( fig, validate=False, filename='d3-world-map' )

#plotly.offline.iplot(fig, filename='test.html')


'file://C:\\Users\\levan\\DATA551-Project\\test.html'

In [10]:
import plotly.io as pio

#pio.write_image(fig, 'figure1_globalsuicides.svg')

For the second figure we want to create a scatterplot that shows that top five countries where the most suicides are commited and how that has changed over time

In [11]:
# Let's also take a look at the average yearly global suicide rate this will be useful later 
global_year_avg = pd.DataFrame(suicide_year_sum.groupby(['year']).mean()['suicides/100k pop'])

global_year_avg.head()
global_year_avg= global_year_avg.reset_index()
global_year_avg.head()

global_year_avg.rename(columns = {'suicides/100k pop':'avg suicides/100k pop'}, inplace = True)
global_year_avg.head()

Unnamed: 0,year,avg suicides/100k pop
0,1985,141.914375
1,1986,137.08
2,1987,139.730556
3,1988,152.512857
4,1989,154.548846


In [12]:
# We'll begin by first finding the top five countries
# Remember we want to create a scatterplot by year of these top 10

suicide_by_year.sort_values(by = ['suicides/100k pop'], ascending= False, inplace = True)
suicide_by_year.head(10)

# Now that we know who the top 10 are lets create a data frame with just these countries from suicide_year_sum.

top10 = ['Lithuania', 'Sri Lanka','Russian Federation', 'Hungary', 'Belarus','Kazakhstan','Latvia','Slovenia','Estonia','Ukraine']

top5 = ['Lithuania', 'Sri Lanka','Russian Federation', 'Hungary', 'Belarus']

top_10_yearly_suicides = suicide_year_sum[suicide_year_sum['country'].isin(top10)]

top_10_yearly_suicides.shape

# Double check that you only got the top ten countries
top_10_yearly_suicides.country.unique()

array(['Belarus', 'Estonia', 'Hungary', 'Kazakhstan', 'Latvia',
       'Lithuania', 'Russian Federation', 'Slovenia', 'Sri Lanka',
       'Ukraine'], dtype=object)

In [13]:
# Let's take a look at the head of our data 

top_10_yearly_suicides.head()


Unnamed: 0,country,year,suicides/100k pop
266,Belarus,1990,274.37
267,Belarus,1991,297.64
268,Belarus,1992,314.24
269,Belarus,1993,365.11
270,Belarus,1994,403.58


In [14]:
# Ok now we have a dataframe with what we want and we can plot
# We're going to have to arrange our data in a very special way 

# Let's create a dataframe for each of the top 10 countries and sort ascending by year 
# This will help us make our traces 

#  Belarus 
belarus = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Belarus'])
belarus = belarus.reset_index()

belarus.sort_values(by = ['year'], ascending= True, inplace = True)
belarus.head(10)



Unnamed: 0,index,country,year,suicides/100k pop
0,266,Belarus,1990,274.37
1,267,Belarus,1991,297.64
2,268,Belarus,1992,314.24
3,269,Belarus,1993,365.11
4,270,Belarus,1994,403.58
5,271,Belarus,1995,404.01
6,272,Belarus,1996,455.71
7,273,Belarus,1997,446.5
8,274,Belarus,1998,455.64
9,275,Belarus,1999,450.36


In [15]:
#  Estonia 
estonia = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Estonia'])
estonia = estonia.reset_index()

estonia.sort_values(by = ['year'], ascending= True, inplace = True)
estonia.head(10)

Unnamed: 0,index,country,year,suicides/100k pop
0,697,Estonia,1995,543.19
1,698,Estonia,1996,528.72
2,699,Estonia,1997,502.06
3,700,Estonia,1998,427.52
4,701,Estonia,1999,486.47
5,702,Estonia,2000,364.64
6,703,Estonia,2001,371.68
7,704,Estonia,2002,358.3
8,705,Estonia,2003,338.24
9,706,Estonia,2004,299.46


In [16]:
#  Hungary 
hungary = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Hungary'])
hungary = hungary.reset_index()

hungary.sort_values(by = ['year'], ascending= True, inplace = True)
hungary.head(10)

#hungary.country.unique()


Unnamed: 0,index,country,year,suicides/100k pop
0,948,Hungary,1991,575.0
1,949,Hungary,1992,570.26
2,950,Hungary,1993,539.28
3,951,Hungary,1994,535.81
4,952,Hungary,1995,493.42
5,953,Hungary,1996,482.74
6,954,Hungary,1997,470.44
7,955,Hungary,1998,436.78
8,956,Hungary,1999,483.88
9,957,Hungary,2000,464.56


In [17]:
#  Kazakhstan 
kazakhstan = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Kazakhstan'])
kazakhstan = kazakhstan.reset_index()

kazakhstan.sort_values(by = ['year'], ascending= True, inplace = True)
kazakhstan.head(10)

#kazakhstan.country.unique()


Unnamed: 0,index,country,year,suicides/100k pop
0,1146,Kazakhstan,1990,334.12
1,1147,Kazakhstan,1991,318.66
2,1148,Kazakhstan,1992,323.53
3,1149,Kazakhstan,1993,376.49
4,1150,Kazakhstan,1994,414.68
5,1151,Kazakhstan,1995,454.67
6,1152,Kazakhstan,1996,452.84
7,1153,Kazakhstan,1997,433.74
8,1154,Kazakhstan,1998,440.52
9,1155,Kazakhstan,1999,407.73


In [18]:
#  Latvia 
latvia = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Latvia'])
latvia = latvia.reset_index()

latvia.sort_values(by = ['year'], ascending= True, inplace = True)
latvia.head(10)

#latvia.country.unique()

Unnamed: 0,index,country,year,suicides/100k pop
0,1234,Latvia,1995,545.62
1,1235,Latvia,1996,492.01
2,1236,Latvia,1997,492.57
3,1237,Latvia,1998,441.84
4,1238,Latvia,1999,420.41
5,1239,Latvia,2000,453.2
6,1240,Latvia,2001,383.49
7,1241,Latvia,2002,367.76
8,1242,Latvia,2003,361.63
9,1243,Latvia,2004,328.38


In [19]:
#  Lithuania
lithuania = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Lithuania'])
lithuania = lithuania.reset_index()

lithuania.sort_values(by = ['year'], ascending= True, inplace = True)
lithuania.head(10)

#lithuania.country.unique()


Unnamed: 0,index,country,year,suicides/100k pop
0,1255,Lithuania,1995,639.3
1,1256,Lithuania,1996,595.61
2,1257,Lithuania,1997,565.44
3,1258,Lithuania,1998,566.36
4,1259,Lithuania,1999,561.53
5,1260,Lithuania,2000,571.8
6,1261,Lithuania,2001,568.98
7,1262,Lithuania,2002,555.62
8,1263,Lithuania,2003,530.52
9,1264,Lithuania,2004,503.37


In [20]:
#  Russian Federation
russia = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Russian Federation'])
russia = russia.reset_index()

russia.sort_values(by = ['year'], ascending= True, inplace = True)
russia.head(10)

#russia.country.unique()

# array(['Belarus', 'Estonia', 'Hungary', 'Kazakhstan', 'Latvia',
#        'Lithuania', 'Russian Federation', 'Slovenia', 'Sri Lanka',
#        'Ukraine'], dtype=object)

Unnamed: 0,index,country,year,suicides/100k pop
0,1747,Russian Federation,1989,398.82
1,1748,Russian Federation,1990,399.17
2,1749,Russian Federation,1991,396.4
3,1750,Russian Federation,1992,437.48
4,1751,Russian Federation,1993,522.06
5,1752,Russian Federation,1994,567.64
6,1753,Russian Federation,1995,547.38
7,1754,Russian Federation,1996,526.28
8,1755,Russian Federation,1997,508.09
9,1756,Russian Federation,1998,474.74


In [21]:
# Slovenia
slovenia = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Slovenia'])
slovenia = slovenia.reset_index()

slovenia.sort_values(by = ['year'], ascending= True, inplace = True)
slovenia.head(10)

#slovenia.country.unique()

# array(['Belarus', 'Estonia', 'Hungary', 'Kazakhstan', 'Latvia',
#        'Lithuania', 'Russian Federation', 'Slovenia', 'Sri Lanka',
#        'Ukraine'], dtype=object)

Unnamed: 0,index,country,year,suicides/100k pop
0,1922,Slovenia,1995,393.87
1,1923,Slovenia,1996,421.11
2,1924,Slovenia,1997,410.1
3,1925,Slovenia,1998,413.6
4,1926,Slovenia,1999,393.36
5,1927,Slovenia,2000,404.12
6,1928,Slovenia,2001,423.4
7,1929,Slovenia,2002,354.3
8,1930,Slovenia,2003,376.56
9,1931,Slovenia,2004,298.48


In [22]:
# Sri Lanka
sri_lanka = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Sri Lanka'])
sri_lanka = sri_lanka.reset_index()

sri_lanka.sort_values(by = ['year'], ascending= True, inplace = True)
sri_lanka.head(10)

#sri_lanka.country.unique()


Unnamed: 0,index,country,year,suicides/100k pop
0,1994,Sri Lanka,1985,558.72
1,1995,Sri Lanka,1986,529.8
2,1996,Sri Lanka,1996,395.71
3,1997,Sri Lanka,1997,472.1
4,1998,Sri Lanka,1998,451.15
5,1999,Sri Lanka,1999,445.24
6,2000,Sri Lanka,2000,427.29
7,2001,Sri Lanka,2001,358.06
8,2002,Sri Lanka,2002,339.41
9,2003,Sri Lanka,2003,352.75


In [23]:
# Ukraine
ukraine = pd.DataFrame(top_10_yearly_suicides[top_10_yearly_suicides['country'] == 'Ukraine'])
ukraine = ukraine.reset_index()

ukraine.sort_values(by = ['year'], ascending= True, inplace = True)
ukraine.head(10)

#ukraine.country.unique()


Unnamed: 0,index,country,year,suicides/100k pop
0,2175,Ukraine,1987,294.04
1,2176,Ukraine,1988,286.72
2,2177,Ukraine,1989,316.32
3,2178,Ukraine,1990,296.7
4,2179,Ukraine,1991,297.91
5,2180,Ukraine,1992,322.49
6,2181,Ukraine,1993,352.11
7,2182,Ukraine,1994,384.29
8,2183,Ukraine,1995,396.67
9,2184,Ukraine,1996,403.42


In [183]:
#type(belarus['suicides/100k pop'].values.tolist())

list

In [24]:
global_year_avg.head()

Unnamed: 0,year,avg suicides/100k pop
0,1985,141.914375
1,1986,137.08
2,1987,139.730556
3,1988,152.512857
4,1989,154.548846


In [27]:
# Ok wow that was a lot of work, we're going to have to put this into a loop for the dashboard when we need to make all countries.

# Now we create the traces for our graphs

import plotly.graph_objs as go
 
# Create traces
trace0 = go.Scatter(
    x = belarus['year'].values.tolist(),
    y = belarus['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        color = ('rgb(220, 220, 220)'),
        width = 4
        ),
    name = 'Belarus'
)

trace1 = go.Scatter(
    x = estonia['year'].values.tolist(),
    y = estonia['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        #color = ('rgb(22, 96, 167)'),
        width = 4
        ),
    name = 'Estonia'
)

trace2 = go.Scatter(
   x = hungary['year'].values.tolist(),
    y = hungary['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        color = ('rgb(192, 192, 192)'),
        width = 4
        ),
    name = 'Hungary'
)

trace3 = go.Scatter(
    x = kazakhstan['year'].values.tolist(),
    y = kazakhstan['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        #color = ('rgb(22, 96, 167)'),
        width = 4,
        ),
    name = 'Kazakhstan'
)

trace4 = go.Scatter(
    x = latvia['year'].values.tolist(),
    y = latvia['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        #color = ('rgb(22, 96, 167)'),
        width = 4
        ),
    name = 'Latvia'
)

trace5 = go.Scatter(
    x = lithuania['year'].values.tolist(),
    y = lithuania['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        color = ('rgb(0, 0, 0)'),
        width = 4
        ),
    name = 'Lithuania', 
    text = 'Lithuania',
)

trace6 = go.Scatter(
    x = russia['year'].values.tolist(),
    y = russia['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        color = ('rgb(169, 169, 169)'),
        width = 4
        ),
    name = 'Russia Federation'
)

trace7 = go.Scatter(
    x = slovenia['year'].values.tolist(),
    y = slovenia['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        #color = ('rgb(22, 96, 167)'),
        width = 4
        ),
    name = 'Slovenia'
)

trace8 = go.Scatter(
    x = sri_lanka['year'].values.tolist(),
    y = sri_lanka['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        color = ('rgb(105,105,105)'),
        width = 4
        ),
    name = 'Sri Lanka'
)

trace9 = go.Scatter(
    x = ukraine['year'].values.tolist(),
    y = ukraine['suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        #color = ('rgb(22, 96, 167)'),
        width = 4
        ),
    name = 'Ukraine'
)

trace10 = go.Scatter(
    x = global_year_avg['year'].values.tolist(),
    y = global_year_avg['avg suicides/100k pop'].values.tolist(),
    mode = 'lines',
    line =  dict(
        color = ('rgb(255, 0, 0)'),
        width = 5
        ),
    name = 'Global Average'
)


#data1 = [trace0, trace1, trace2, trace3, trace4, trace5, trace6, trace7, trace8, trace9]
data1 = [trace5, trace8, trace6, trace10]

layout = go.Layout(
    #title='Plot Title',
    xaxis=dict(
        showgrid = False,
        title='year',
        titlefont=dict(
            size=18
        )
    ),
    yaxis=dict(
        showgrid = False,
        title='suicides/100K population',
        titlefont=dict(
            size=18
        )
    )
)

fig2 = go.Figure(data=data1, layout=layout)

#py.iplot(data1, filename='line-mode')

plotly.offline.iplot(fig2, filename='figure2_top5suicicderates.html')

plotly.offline.plot(fig2, filename='figure2_top5suicicderates.html')

'file://C:\\Users\\levan\\DATA551-Project\\figure2_top5suicicderates.html'

In [28]:
# Let's export a static svg 

pio.write_image(fig2, 'figure2_top5suicicderates.svg')

In [191]:
# The line chart may be too busy let's try a bar 

# Create traces
trace0 = go.Bar(
    x = belarus['year'].values.tolist(),
    y = belarus['suicides/100k pop'].values.tolist(),
    name = 'Belarus'
)

trace1 = go.Bar(
    x = estonia['year'].values.tolist(),
    y = estonia['suicides/100k pop'].values.tolist(),
    name = 'Estonia'
)

trace2 = go.Bar(
    x = hungary['year'].values.tolist(),
    y = hungary['suicides/100k pop'].values.tolist(),
    name = 'Hungary'
)

trace3 = go.Bar(
    x = kazakhstan['year'].values.tolist(),
    y = kazakhstan['suicides/100k pop'].values.tolist(),
    name = 'Kazakhstan'
)

trace4 = go.Bar(
    x = latvia['year'].values.tolist(),
    y = latvia['suicides/100k pop'].values.tolist(),
    name = 'Latvia'
)

trace5 = go.Bar(
    x = lithuania['year'].values.tolist(),
    y = lithuania['suicides/100k pop'].values.tolist(),
    name = 'Lithuania'
)

trace6 = go.Bar(
    x = russia['year'].values.tolist(),
    y = russia['suicides/100k pop'].values.tolist(),
    name = 'Russia Federation'
)

trace7 = go.Bar(
    x = slovenia['year'].values.tolist(),
    y = slovenia['suicides/100k pop'].values.tolist(),
    name = 'Slovenia'
)

trace8 = go.Bar(
    x = sri_lanka['year'].values.tolist(),
    y = sri_lanka['suicides/100k pop'].values.tolist(),
    name = 'Sri Lanka'
)

trace9 = go.Bar(
    x = ukraine['year'].values.tolist(),
    y = ukraine['suicides/100k pop'].values.tolist(),
    name = 'Ukraine'
)


data1 = [trace5, trace8, trace6, trace2, trace0]



layout = go.Layout(
    title='Yearly Suicide Rates per Country',
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1
)

fig = go.Figure(data=data1, layout=layout)
plotly.offline.iplot(fig, filename='btest.html')

plotly.offline.plot(fig, filename='btest.html')






# array(['Belarus', 'Estonia', 'Hungary', 'Kazakhstan', 'Latvia',
#        'Lithuania', 'Russian Federation', 'Slovenia', 'Sri Lanka',
#        'Ukraine'], dtype=object)


# #py.iplot(data1, filename='line-mode')

# plotly.offline.iplot(data1, filename='btest.html')

# plotly.offline.plot(data1, filename='btest.html')

'file://C:\\Users\\levan\\DATA551-Project\\btest.html'

In [193]:
# The line chart may be too busy let's try a stacked bar 

# Create traces
trace0 = go.Bar(
    x = belarus['year'].values.tolist(),
    y = belarus['suicides/100k pop'].values.tolist(),
    name = 'Belarus'
)

trace1 = go.Bar(
    x = estonia['year'].values.tolist(),
    y = estonia['suicides/100k pop'].values.tolist(),
    name = 'Estonia'
)

trace2 = go.Bar(
    x = hungary['year'].values.tolist(),
    y = hungary['suicides/100k pop'].values.tolist(),
    name = 'Hungary'
)

trace3 = go.Bar(
    x = kazakhstan['year'].values.tolist(),
    y = kazakhstan['suicides/100k pop'].values.tolist(),
    name = 'Kazakhstan'
)

trace4 = go.Bar(
    x = latvia['year'].values.tolist(),
    y = latvia['suicides/100k pop'].values.tolist(),
    name = 'Latvia'
)

trace5 = go.Bar(
    x = lithuania['year'].values.tolist(),
    y = lithuania['suicides/100k pop'].values.tolist(),
    name = 'Lithuania'
)

trace6 = go.Bar(
    x = russia['year'].values.tolist(),
    y = russia['suicides/100k pop'].values.tolist(),
    name = 'Russia Federation'
)

trace7 = go.Bar(
    x = slovenia['year'].values.tolist(),
    y = slovenia['suicides/100k pop'].values.tolist(),
    name = 'Slovenia'
)

trace8 = go.Bar(
    x = sri_lanka['year'].values.tolist(),
    y = sri_lanka['suicides/100k pop'].values.tolist(),
    name = 'Sri Lanka'
)

trace9 = go.Bar(
    x = ukraine['year'].values.tolist(),
    y = ukraine['suicides/100k pop'].values.tolist(),
    name = 'Ukraine'
)


data1 = [trace5, trace8, trace6, trace2, trace0]



layout = go.Layout(
    title='Yearly Suicide Rates per Country',
    barmode='stack',
  
)

fig = go.Figure(data=data1, layout=layout)
plotly.offline.iplot(fig, filename='bstack_test.html')

plotly.offline.plot(fig, filename='bstack_test.html')






# array(['Belarus', 'Estonia', 'Hungary', 'Kazakhstan', 'Latvia',
#        'Lithuania', 'Russian Federation', 'Slovenia', 'Sri Lanka',
#        'Ukraine'], dtype=object)


# #py.iplot(data1, filename='line-mode')

# plotly.offline.iplot(data1, filename='btest.html')

# plotly.offline.plot(data1, filename='btest.html')

'file://C:\\Users\\levan\\DATA551-Project\\bstack_test.html'

In [96]:
# Now we'll look at the porportion of suicides commited by men vs women globally 

# this will require us to group our data by year and sex to find the sum of suicides commited by each gender per year

suicide_df.head()
suicide_year_sex_sum =pd.DataFrame(suicide_df.groupby(["year", "sex"]).sum()['suicides/100k pop'])
suicide_year_sex_sum = suicide_year_sex_sum.reset_index()
suicide_year_sex_sum.head()

Unnamed: 0,year,sex,suicides/100k pop
0,1985,female,1663.75
1,1985,male,5148.14
2,1986,female,1674.05
3,1986,male,4905.79
4,1987,female,1857.21


In [97]:
# Now we'll create dataframes for each of the sexes

# female
female = pd.DataFrame(suicide_year_sex_sum[suicide_year_sex_sum['sex'] == 'female'])
female = female.reset_index()

female.sort_values(by = ['year'], ascending= True, inplace = True)
female.head(10)

#female.sex.unique()

Unnamed: 0,index,year,sex,suicides/100k pop
0,0,1985,female,1663.75
1,2,1986,female,1674.05
2,4,1987,female,1857.21
3,6,1988,female,1859.08
4,8,1989,female,1927.17
5,10,1990,female,2268.29
6,12,1991,female,2314.74
7,14,1992,female,2437.37
8,16,1993,female,2352.19
9,18,1994,female,2545.93


In [98]:
# male
male = pd.DataFrame(suicide_year_sex_sum[suicide_year_sex_sum['sex'] == 'male'])
male = male.reset_index()

male.sort_values(by = ['year'], ascending= True, inplace = True)
male.head(10)

#male.sex.unique()

Unnamed: 0,index,year,sex,suicides/100k pop
0,1,1985,male,5148.14
1,3,1986,male,4905.79
2,5,1987,male,5688.24
3,7,1988,male,5614.05
4,9,1989,male,6109.37
5,11,1990,male,7610.46
6,13,1991,male,8006.32
7,15,1992,male,8091.51
8,17,1993,male,8438.1
9,19,1994,male,8937.86


In [100]:
# Let's use these traces to create a stacked bar chart

# Create traces
trace0 = go.Bar(
    x = female['year'].values.tolist(),
    y = female['suicides/100k pop'].values.tolist(),
    marker=dict(
        color='rgb(255,255,255)',
        line=dict(
            color='rgb(0,0,0)',
            width=0.8,
    )),
    name = 'Female'
)

trace1 = go.Bar(
    x = male['year'].values.tolist(),
    y = male['suicides/100k pop'].values.tolist(),
    marker=dict(
        color='rgb(0,0,0)',
        line=dict(
            color='rgb(0,0,0)',
            width=0.8,
    )),
    name = 'Male'
)


data1 = [trace0, trace1]



layout = go.Layout(
    #title='Yearly Suicide Rates by Sex',
    barmode='stack',
    xaxis=dict(
        title='year',
        titlefont=dict(
            size=18
        )
    ),
    yaxis=dict(
        showgrid = False,
        title='sum of suicides/100K population',
        titlefont=dict(
            size=18
  
))
)

fig3 = go.Figure(data=data1, layout=layout)
plotly.offline.iplot(fig3, filename='suicides_gender.html')

plotly.offline.plot(fig3, filename='suicides_gender.html')


# array(['Belarus', 'Estonia', 'Hungary', 'Kazakhstan', 'Latvia',
#        'Lithuania', 'Russian Federation', 'Slovenia', 'Sri Lanka',
#        'Ukraine'], dtype=object)


# #py.iplot(data1, filename='line-mode')

# plotly.offline.iplot(data1, filename='btest.html')

# plotly.offline.plot(data1, filename='btest.html')

'file://C:\\Users\\levan\\DATA551-Project\\suicides_gender.html'

In [101]:
# Let's export a static svg 

pio.write_image(fig3, 'figure3_suicicderatesgender.svg')