In [7]:
%matplotlib notebook

In [8]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import itertools
from functools import reduce
from chord import Chord

# Chord Diagram using ChordPro

In [9]:
#importing data
met_data = pd.read_csv("CHORD_MET.csv")

In [10]:
met_data.columns

Index(['Object Number', 'Is Public Domain', 'Object ID', 'Department',
       'AccessionYear', 'Object Name', 'Title', 'Culture',
       'Artist Display Name', 'Object End Date', 'Medium', 'Country',
       'Classification'],
      dtype='object')

In [11]:
met_data["Country"].unique()

array(['China', 'France', 'Germany', 'Italy', 'United Kingdom',
       'United States', 'Indonesia', 'Spain', 'Cameroon', "Cote d'Ivoire",
       'Mali', 'Nigeria', 'Papua New Guinea', 'Egypt', 'Japan', 'Mexico',
       'Netherlands', 'Congo', 'Austria', 'Canada', 'Bolivia',
       'Burkina Faso', 'Philippines', 'Colombia', 'Costa Rica', 'Ecuador',
       'Ghana', 'Guatemala', 'Panama', 'Peru', 'India', 'Benin', 'italy'],
      dtype=object)

In [12]:
met_data["Country"] = met_data["Country"].replace('italy','Italy')

In [13]:
met_data["Country"].unique()

array(['China', 'France', 'Germany', 'Italy', 'United Kingdom',
       'United States', 'Indonesia', 'Spain', 'Cameroon', "Cote d'Ivoire",
       'Mali', 'Nigeria', 'Papua New Guinea', 'Egypt', 'Japan', 'Mexico',
       'Netherlands', 'Congo', 'Austria', 'Canada', 'Bolivia',
       'Burkina Faso', 'Philippines', 'Colombia', 'Costa Rica', 'Ecuador',
       'Ghana', 'Guatemala', 'Panama', 'Peru', 'India', 'Benin'],
      dtype=object)

In [14]:
met_data["Classification"].unique()

array(['Accessory', 'Architectural', 'Barkcloth', 'Ceramics', 'Coins',
       'Containers', 'Costumes', 'Drawings', 'Enamels', 'Furniture',
       'Glass', 'Glass-Stained', 'Implements', 'Ivories', 'Jewelry',
       'Manuscripts and Illuminations', 'Metalwork',
       'Musical Instruments', 'musical instruments', 'Ornaments',
       'Paintings', 'Paper', 'Photographs', 'Pottery', 'Prints',
       'Sculpture', 'Silver', 'Textiles', 'Tiles', 'Vessels', 'Woodwork'],
      dtype=object)

In [15]:
met_data["Classification"] = met_data["Classification"].replace('musical instruments','Musical Instruments')
met_data["Classification"].unique()

array(['Accessory', 'Architectural', 'Barkcloth', 'Ceramics', 'Coins',
       'Containers', 'Costumes', 'Drawings', 'Enamels', 'Furniture',
       'Glass', 'Glass-Stained', 'Implements', 'Ivories', 'Jewelry',
       'Manuscripts and Illuminations', 'Metalwork',
       'Musical Instruments', 'Ornaments', 'Paintings', 'Paper',
       'Photographs', 'Pottery', 'Prints', 'Sculpture', 'Silver',
       'Textiles', 'Tiles', 'Vessels', 'Woodwork'], dtype=object)

In [16]:
#narrow down to top 20
met_data = met_data [
    met_data["Country"].isin(
        list(met_data["Country"].value_counts()[:20].index)
    )]
met_data = met_data[
    met_data["Classification"].isin(
        list(met_data["Classification"].value_counts()[:20].index)
    )]

In [17]:
left = list(met_data["Country"].value_counts().index)[::1]
pd.DataFrame(left)

Unnamed: 0,0
0,United States
1,Peru
2,France
3,Egypt
4,Mexico
5,Indonesia
6,United Kingdom
7,Papua New Guinea
8,China
9,Germany


In [18]:
right = list(met_data["Classification"].value_counts().index)
pd.DataFrame(right)

Unnamed: 0,0
0,Ceramics
1,Musical Instruments
2,Sculpture
3,Glass
4,Textiles
5,Ornaments
6,Implements
7,Furniture
8,Metalwork
9,Silver


In [19]:
#only select country and classification
MET_country_class = pd.DataFrame(met_data[["Country","Classification"]].values)
MET_country_class
                                          
                                          

Unnamed: 0,0,1
0,France,Architectural
1,France,Architectural
2,France,Architectural
3,France,Architectural
4,France,Architectural
...,...,...
25664,Germany,Vessels
25665,Germany,Vessels
25666,Italy,Vessels
25667,Mexico,Vessels


In [20]:
#setting up two inputs, for the co-occurence matrix, add left and right sides
names = left + right
pd.DataFrame(names)

Unnamed: 0,0
0,United States
1,Peru
2,France
3,Egypt
4,Mexico
5,Indonesia
6,United Kingdom
7,Papua New Guinea
8,China
9,Germany


In [21]:
#create an empty co-occurrence matrix with the names
matrix = pd.DataFrame(0, index=names, columns=names)
matrix

Unnamed: 0,United States,Peru,France,Egypt,Mexico,Indonesia,United Kingdom,Papua New Guinea,China,Germany,...,Containers,Architectural,Drawings,Paintings,Costumes,Jewelry,Enamels,Paper,Vessels,Glass-Stained
United States,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Peru,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
France,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Egypt,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Mexico,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Indonesia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
United Kingdom,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Papua New Guinea,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
China,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Germany,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
#populate the matrix by creating a list
MET_country_class = list(itertools.chain.from_iterable((i,i[::-1])for i in MET_country_class.values))


In [23]:
#complete the matrix
for pairing in MET_country_class:matrix.at[pairing[0],pairing[1]] +=1
    
matrix = matrix.values.tolist()

In [24]:
pd.DataFrame(matrix)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,39
0,0,0,0,0,0,0,0,0,0,0,...,124,128,594,48,48,146,0,9,27,0
1,0,0,0,0,0,0,0,0,0,0,...,532,0,0,2,52,2,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,506,12,8,2,5,171,4,1,113
3,0,0,0,0,0,0,0,0,0,0,...,0,2,1,4,31,19,1,197,162,0
4,0,0,0,0,0,0,0,0,0,0,...,155,1,2,8,0,0,0,0,3,0
5,0,0,0,0,0,0,0,0,0,0,...,98,18,0,2,59,12,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,2,9,2,0,9,1,1,0,6
7,0,0,0,0,0,0,0,0,0,0,...,25,7,28,345,1,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,3,0,0,0,21,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,1,7,9,3,0,38,0,10,55


In [25]:
Chord.user = "rios.lisa2@gmail.com"
Chord.key = "CP-fd69ed5c-6f8f-4393-bafb-568c98de41ea"

In [26]:
colors  = ["#332626","#4c1b13","#ff9180","#99574d","#f2beb6","#ff6600","#993d00","#d9986c","#bfa38f","#4c3213","#f2a200","#7f6c20","#4d4939","#ccc233","#293300","#afbf8f","#3f731d","#55f23d","#6cd998","#165943","#b6f2de","#39dae6","#263033","#5995b3","#335566","#79aaf2","#000733","#394173","#2e1966","#7736d9","#e63df2","#311a33","#644d66","#800077","#e6acda","#ff80d5","#ff0066","#4c001f","#a6295b","#e5001f"]


In [28]:
Chord(
    matrix,
    names,
    colors=colors,
    wrap_labels=False,
    width=910,
    margin=40,
    padding=0.05,
    font_size="12px",
    font_size_large="12px",
    noun="Country and Classification",
    title="MET top 20 Countries and Item Classifications",
    divide=True,
    divide_idx=len(left),
    divide_size=0.6,
    allow_download=True,
).show()



In [34]:
Chord(
    matrix,
    names,
    colors=colors,
    wrap_labels=False,
    width=900,
    margin=100,
    padding=0.05,
    font_size="10px",
    font_size_large="12px",
    noun="Country and Classification",
    title="MET top 20 Countries and Item Classifications",
    divide=True,
    divide_idx=len(left),
    divide_size=0.6,
    allow_download=False,
).to_html("MET_div_chord.html")

In [107]:
Chord(
    matrix,
    names,
    #colors="d3.schemeSpectral()",
    wrap_labels=False,
    width=800,
    margin=100,
    padding=0.06,
    font_size="10px",
    font_size_large="12px",
    noun="Country and Classification",
    #title="MET top 20 Countries and Item Classifications",
    allow_download=True,
).to_html('MET_chord.html')