In [104]:
import plotly.graph_objects as go
import json

In [105]:
# Opening JSON file
f = open('Artists.json', encoding="utf8")
 
# returns JSON object as
# a dictionary
data = json.load(f)

In [106]:
data = [[data["Nationality"], data["Gender"], data["BeginDate"]] for data in data]

In [107]:
import pandas as pd
df = pd.DataFrame(data, columns =["nationality", "gender", 'year'])

In [108]:
df.head()

Unnamed: 0,nationality,gender,year
0,American,Male,1930
1,Spanish,Male,1936
2,American,Male,1941
3,American,Male,1946
4,Danish,Male,1941


In [109]:
df = df[df.year != 0]

In [110]:
df.year = df.year.apply(lambda x : x - x % 10)

In [111]:
cols = list(set(list(df.nationality) + list(df.gender) + list(df.year)))

In [112]:
mapper = {col : i for i, col in enumerate(cols)}

In [114]:
count1 = df.groupby(["nationality", "year"]).count().reset_index()
count1.head()

Unnamed: 0,nationality,year,gender
0,Afghan,1970,1
1,Albanian,1960,3
2,Albanian,1970,1
3,Algerian,1920,1
4,Algerian,1950,3


In [115]:
count2 = df.groupby(["year", "gender"]).count().reset_index()
count2.head()

Unnamed: 0,year,gender,nationality
0,1730,Male,1
1,1740,Male,1
2,1750,Male,1
3,1770,Male,1
4,1780,Male,4


In [116]:
count1 = count1[count1.gender > 2]
count2 = count2[count2.nationality > 2]

In [117]:
count1.year = count1.year.apply(lambda x : mapper[x])
count1.nationality = count1.nationality.apply(lambda x : mapper[x])
count2.year = count2.year.apply(lambda x : mapper[x])
count2.gender = count2.gender.apply(lambda x : mapper[x])

In [118]:
first = list(count1.nationality) + list(count2.year)
second = list(count1.year) + list(count2.gender)
third = list(count1.gender) + list(count2.nationality)

In [119]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = list(mapper.keys()), # make this all the artists, years, and genders
      hovertemplate='Node %{customdata} has total value %{value}<extra></extra>',
      color = "blue"
    ),
    link = dict(
      source = first, # map each artist to year, and map each year to a gender
      target = second,
      value = third,
      hovertemplate='Link from node %{source.customdata}<br />'+
        'to node%{target.customdata}<br />has value %{value}'+
        '<br />and data %{customdata}<extra></extra>',
  ))])

In [120]:
fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()