In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as plt

# https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html
#
# Categories are ordered by descending frequency in dataset so that
# setting unsorted=True makes graphs come out correctely where
# the category is the primary category of the population.
#
# To get the frequency of, say, 'language', run
# src/explore/table/histogram-table.py -i data/database/20220612.yaml language

categories_type = pd.api.types.CategoricalDtype(
    categories=[
        'M/M',
        'Gen',
        'F/M',
        'F/F',
        'Multi',
        'No category',
        'Other'
    ],
    ordered=True)

# Ordered by frequency in dataset
warnings_type = pd.api.types.CategoricalDtype(
    categories=[
        'No Archive Warnings Apply',
        'Choose Not To Use Archive Warnings',
        'Graphic Depictions Of Violence',
        'Major Character Death',
        'Rape/Non-Con',
        'Underage',
    ],
    ordered=True)

# Ordered by frequency in dataset
rating_type = pd.api.types.CategoricalDtype(
    categories=[
        'General Audiences',
        'Teen And Up Audiences',
        'Explicit',
        'Mature',
        'Not Rated',
    ],
    ordered=True)

language_type = pd.api.types.CategoricalDtype(
    categories=[
        'en',
        'ru',
        'de',
        'zh-Hans',
        'it',
        'pt-br',
        'ko',
        'fr',
        'es',
        'cy',
        'pl',
        'cs',
        'ja',
        'he',
        'tlh-Latn',
        'nl'
    ],
    ordered=True)

dtypes = { 'id': 'int64',
           'author': 'string',
           'chapter': 'Int64',
           'chapters': 'Int64',
           'comments': 'Int64',
           'complete': 'bool',
           'filename': 'string',
           'hits': 'Int64',
           'kudos': 'Int64',
           'language': 'category',
           'summary': 'string',
           'title': 'string',
           'userid': 'Int64',
           'words': 'Int64',
           'rating': rating_type,
           'language': language_type }

# Load data from CSV into Pandas dataframe
# See https://pbpython.com/pandas_dtypes.html
df = pd.read_csv('data/database/20220612.csv', dtype=dtypes)
df.set_index('id', inplace=True)

# Convert to pandas datetime
# Only publications after 2010
df['publicationdate'] = pd.to_datetime(df['publicationdate'])
dawn = pd.Timestamp('2010-01-01')
df = df[df['publicationdate'] >= dawn]

# Only English
df = df[df['language'] == 'en']

# Complete works
df = df[df['complete'] == True]



In [2]:
sex_type = pd.api.types.CategoricalDtype(
    categories=[
        'Male',
        'Female',
        'Other',
        'unknown'
    ],
    ordered=True)

species_type = pd.api.types.CategoricalDtype(
    categories=[
        'Bajoran',
        'Betazoid',
        'Cardassian',
        'Cat',
        'Changeling',
        'Ferangi',
        'Hologram',
        'Human',
        "Jem'Hadar",
        'Klingon',
        'Q',
        'Romulan',
        'Symbiont'
        'Trill',
        'Vorta',
        'Vulcan',
        'unknown'
    ],
    ordered=True)

character_attributes = pd.DataFrame.from_dict({
    "Darhe'el": ['Cardassian', 'Male'],
    "Dukat's Children": ['Cardassian', 'unknown'],
    "Dukat's Mother": ['Cardassian', 'Female'],
    "K'Ehleyr": ['Klingon', 'Female'],
    "Keiko O’Brien": ['Human', 'Female'],
    "Kirayoshi O'Brien": ['Human', 'Male'],
    "Kotan Pa'Dar": ['Cardassian', 'Male'],
    "Miles O'Brien": ['Human', 'Male'],
    "Molly O'Brien": ['Human', 'Female'],
    "Omet'iklan": ["Jem'Hadar", 'Male'],
    "Remata'Klan": ["Jem'Hadar", 'Male'],
    "Rugal Pa'Dar": ['Cardassian', 'Male'],
    'Aamin Marritza': ['Cardassian', 'Male'],
    'Akellen Macet': ['Cardassian', 'Male'],
    'Akorem Laan': ['Bajoran', 'Male'],
    'Alexander Rozhenko': ['Human', 'Male'],
    'Alynna Nechayev': ['Human', 'Female'],
    'Amsha Bashir': ['Human', 'Female'],
    'Arjin': ['Trill', 'Male'],
    'Athra Dukat': ['Cardassian', 'Female'],
    'Audrid Dax': ['Trill', 'Female'],
    'Baby Changeling': ['Changeling', 'unknown'],
    'Bareil Antos': ['Bajoran', 'Male'],
    'Barin Troi': ['Betazoid', 'Male'],
    'Benjamin Sisko': ['Betazoid', 'Male'],
    'Broca': ['Cardassian', 'Male'],
    'Brunt': ['Ferangi', 'Male'],
    'Cal Hudson': ['Human', 'Male'],
    'Captain Solok': ['Vulcan', 'Male'],
    'Chalan Aroya': ['Bajoran', 'Female'],
    'Chester': ['Cat', 'Male'],
    'Corat Damar': ['Cardassian', 'Male'],
    'Corbin Entek': ['Cardassian', 'Male'],
    'Curzon Dax': ['Trill', 'Male'],
    'Dax': ['sy', 'Other'],
    'Dulmur': ['Human', 'Male'],
    'Elim Garak': ['Cardassian', 'Male'],
    'Emergency Medical Hologram Mark II': ['Hologram', 'Male'],
    'Emony Dax': ['Trill', 'Female'],
    'Enabran Tain': ['Cardassian', 'Male'],
    'Entek': ['Cardassian', 'Male'],
    'Eris': ['Vorta', 'Female'],
    'Ezri Dax': ['Trill', 'Female'],
    'Fala Trentin': ['Bajoran', 'Female'],
    'Female Changeling': ['Changeling', 'Female'],
    'Furel': ['Bajoran', 'Male'],
    'Gaila': ['fc', 'Male'],
    'Gariff Lucsly': ['Human', 'Male'],
    'Gilora Rejal': ['Cardassian', 'Female'],
    'Gowron': ['Klingon', 'Male'],
    'Grilka': ['Klingon', 'Female'],
    'Gul Jasad': ['Cardassian', 'Male'],
    'Gul Marratt': ['Cardassian', 'Male'],
    'Gul Revok': ['Cardassian', 'Male'],
    'Gul Rusot': ['Cardassian', 'Male'],
    'Gul Russol': ['Cardassian', 'Male'],
    'Gul Zarale': ['Cardassian', 'Male'],
    'Iliana Ghemor': ['Cardassian', 'Female'],
    'Iloja of Prim': ['Cardassian', 'Male'],
    'Ishka': ['Ferangi', 'Female'],
    'Jack': ['Human', 'Male'],
    'Jadzia Dax': ['Trill', 'Female'],
    'Jake Sisko': ['Human', 'Male'],
    'Jaro Essa': ['Bajoran', 'Male'],
    'Jennifer Sisko': ['Human', 'Female'],
    'Joran Dax': ['Trill', 'Male'],
    'Joseph Sisko': ['Human', 'Male'],
    'Judith Sisko': ['Human', 'Female'],
    'Julian Bashir': ['Human', 'Male'],
    'Kai Winn': ['Bajoran', 'Female'],
    'Kang': ['Klingon', 'Male'],
    'Kasidy Yates': ['Human', 'Male'],
    'Keevan': ['Vorta', 'Male'],
    'Kelas Parmak': ['Cardassian', 'Male'],
    'Keldar': ['Ferangi', 'Male'],
    'Kilana': ['Vorta', 'Female'],
    'Kimara Cretak': ['Romulan', 'Female'],
    'Kira Meru': ['Bajoran', 'Female'],
    'Kira Nerys': ['Bajoran', 'Female'],
    'Kira Pohl': ['Bajoran', 'Male'],
    'Kira Reon': ['Bajoran', 'Male'],
    'Kira Taban': ['Bajoran', 'Male'],
    'Koloth': ['Klingon', 'Male'],
    'Kor': ['Klingon', 'Male'],
    'Kurn': ['Klingon', 'Male'],
    'Laas': ['Changeling', 'Male'],
    'Lauren': ['Human', 'Female'],
    'Leeta': ['Bajoran', 'Female'],
    'Lela Dax': ['Trill', 'Female'],
    'Lenara Kahn': ['Trill', 'Female'],
    'Lewis Zimmerman': ['Human', 'Male'],
    'Leyton': ['Human', 'Male'],
    'Li Nalas': ['Bajoran', 'Male'],
    'Lisa Cusak': ['Human', 'Female'],
    'Lupaza': ['Bajoran', 'Female'],
    'Luther Sloan': ['Human', 'Male'],
    'Lwaxana Troi': ['Betazoid', 'Female'],
    'Martok': ['Klingon', 'Male'],
    'Michael Eddington': ['Human', 'Male'],
    'Mila Garak': ['Cardassian', 'Female'],
    'Mora Pol': ['Bajoran', 'Male'],
    'Morn': ['unknown', 'Male'],
    'Mullibok': ['Bajoran', 'Male'],
    'Natima Lang': ['Bajoran', 'Male'],
    'Niala Damar': ['Cardassian', 'Female'],
    'Nilani Kahn': ['Trill', 'Female'],
    'Nog': ['Ferangi', 'Male'],
    'Odo': ['Changeling', 'Male'],
    'Opaka': ['Bajoran', 'Female'],
    'Patrick': ['Human', 'Male'],
    'Pel': ['Ferangi', 'Female'],
    'Prinadora': ['Ferangi', 'Female'],
    'Q': ['Q', 'Male'],
    'Quark': ['Ferangi', 'Male'],
    'Rebecca Sisko': ['Human', 'Female'],
    'Rebecca Sullivan': ['Human', 'Female'],
    'Richard Bashir': ['Human', 'Male'],
    'Rom': ['Ferangi', 'Male'],
    'Sakal Damar': ['Cardassian', 'Male'],
    'Sarah Sisko': ['Human', 'Female'],
    'Sarina Douglas': ['Human', 'Female'],
    'Shakaar Edon': ['Bajoran', 'Male'],
    'Sirella': ['Klingon', 'Female'],
    'Skrain Dukat': ['Cardassian', 'Male'],
    'Tahna Los': ['Bajoran', 'Male'],
    'Tekeny Ghemor': ['Cardassian', 'Male'],
    'Thomas Riker': ['Human', 'Male'],
    'Thrax': ['Cardassian', 'Male'],
    'Tobin Dax': ['Trill', 'Male'],
    'Tora Naprem': ['Cardassian', 'Female'],
    'Tora Ziyal': ['Multiple', 'Female'],
    'Torias Dax': ['Trill', 'Male'],
    'Vaatrik Pallra': ['Bajoran', 'Female'],
    'Vash': ['Human', 'Female'],
    'Verad': ['Trill', 'Male'],
    'Vic Fontaine': ['Human', 'Male'],
    'Vreenak': ['Romulan', 'Male'],
    'Weyoun': ['Vorta', 'Male'],
    'Winn Adami': ['Bajoran', 'Female'],
    'Worf': ['Klingon', 'Male'],
    'Yedrin Dax': ['Trill', 'Male'],
    'Yelgrun': ['Vorta', 'Male'],
    'Zek': ['Ferangi', 'Male'],
    'Ziranne Idaris': ['Trill', 'Female']},
                                              orient='index',
                                              columns=['species', 'sex'],
                                              dtype='string')
# Species and Sex are categories
character_attributes['species'] = character_attributes['species'].astype(species_type)
character_attributes['sex'] = character_attributes['sex'].astype(sex_type)

# character_attributes is ready for use
