In [34]:
import pandas as pd
import plotly.graph_objects as go

# Load the data from the CSV file into a pandas DataFrame
df = pd.read_csv('department_word_frequencies.csv')
df = df.dropna()  # Remove rows with NaN values

# Filter the DataFrame to only include rows where the "Frequency" is 5 or greater
df = df[df['Frequency'] >= 10]

# Create unique IDs by combining the department index and the word
df['ID'] = df['Department Index'] + '-' + df['Word']

print(df)

     Department Index          Word  Frequency               ID
1                  EN      Literary         12      EN-Literary
6                  EN    Literature         44    EN-Literature
7                  EN       Studies         32       EN-Studies
14                 EN       Fiction         11       EN-Fiction
19                 EN      American         23      EN-American
...               ...           ...        ...              ...
3341               LR       Russian         20       LR-Russian
3485               BB      Research         19      BB-Research
3486               BB  Biochemistry         19  BB-Biochemistry
3487               BB     Molecular         20     BB-Molecular
3488               BB       Biology         20       BB-Biology

[73 rows x 4 columns]


In [35]:
# Create a DataFrame for the parent nodes
df_parent = pd.DataFrame({
    'labels': df['Department Index'].unique(),
    'parents': ['']*df['Department Index'].nunique(),
    'values': [0]*df['Department Index'].nunique(),
    'ids': df['Department Index'].unique()
})

# Create a DataFrame for the child nodes
df_child = pd.DataFrame({
    'labels': df['Word'],
    'parents': df['Department Index'],
    'values': df['Frequency'],
    'ids': df['ID']
})

In [36]:
# Concatenate the parent and child DataFrames
df_sunburst = pd.concat([df_parent, df_child])

# Create a sunburst chart
fig = go.Figure(go.Sunburst(
    ids=df_sunburst['ids'],
    labels=df_sunburst['labels'],
    parents=df_sunburst['parents'],
    values=df_sunburst['values'],
))

fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))

fig.show()