In [None]:
from pathlib import Path

### Paths
data_path = Path('').parent.absolute()
paper_annotations_path = data_path / 'analysis/manual_papers_coding.csv'
patent_annotations_path = data_path / 'analysis/manual_patents_coding.csv'
(figs_dir := data_path / 'figures').mkdir(exist_ok=True)

In [None]:
### Imports
%load_ext autoreload
%autoreload 2
import os
import pandas as pd
from pandas import DataFrame
import plotly.express as px
import plotly
import plotly.io as pio; pio.renderers.default = "iframe"

In [None]:
### Load and clean data
# Load
papers = pd.read_csv(paper_annotations_path)
papers['Doc_type'] = 'paper'
patents = pd.read_csv(patent_annotations_path)
patents['Doc_type'] = 'patent'
docs = pd.concat([papers, patents], ignore_index=True)  # combine papers and patents

# Clean
docs = docs.replace({
    'Traces of socially significant human data (e.g. location, friends, cultural identity, preferences)': 'Salient traces',
    'Traces of human data (e.g. an engineer\'s blueprints)': 'Unspecified',
    'Unspecified data (provides no indication of applications)': 'Unspecified',
    'Unspecified data (does not state whether can be used for human data)': 'Unspecified'},)
categories = ['Human body parts', 'Human bodies', 'Human spaces', 'Salient traces', 'Unspecified', 'Non-human data']
annotations = docs.copy()
annotations['Data'] = annotations['label']

In [None]:
### Visualize pie chart
# Get counts
counts = annotations['Data'].value_counts()
counts = pd.DataFrame({'Data':counts.index, 'Count':counts.values})

# Sort order of pie slices
counts['data_index'] = counts.Data.apply(lambda data: categories.index(data))  
counts = counts.sort_values('data_index')

# Visualize
fig = px.pie(
    counts, names='Data', values='Count', hover_name='Data', 
    width=500, height=500, color_discrete_sequence=['#AA1600', '#FF4000', '#FF7C00', '#FFA200'] + ['white', 'black'])
fig.update_layout(showlegend=False, font_size=15, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
fig.update_traces(textinfo='percent+label', rotation=0, insidetextorientation='horizontal', textfont_size=14, sort=False, marker=dict(line=dict(color='white', width=1)))
plotly.io.write_image(fig, os.path.join(figs_dir, f'pie.png'),scale=10)
display(fig)