In [30]:
import requests
import getpass
import re
import unidecode
import csv
import os
import random
from collections import deque

**The function implementing the original idea to work with the whole Drama Corpus data**: network data for each play mined via the API, recorded into separate file for each sub-corpus.

In [None]:
def get_network_metrics():
    url_base = 'http://dracor.org/api/corpora/'

    r = requests.get(url_base)
    corpora = [item for item in r.json()]
    
    with open('canon.csv', 'w', newline='') as canoncsv:
        fieldnames = ['name', 'size', 'diameter',
                      'density', 'averageClustering',
                      'averagePathLength', 'maxDegree']
        canonwriter = csv.DictWriter(canoncsv, fieldnames=fieldnames, delimiter=';')
        canonwriter.writeheader()

    for corpus in corpora:
        print(corpus['name'])
        fname = corpus['name'] + '.csv'
    
        r = requests.get(url_base + corpus['name'])
        dramas = [drama['name'] for drama in r.json()['dramas']]
        
        if not os.path.exists(fname):
            with open(fname, 'w', newline='') as csvfile:
                fieldnames = ['name', 'size', 'diameter', 
                              'density', 'averageClustering', 
                              'averagePathLength', 'maxDegree']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=';')
                writer.writeheader()

                for drama in dramas:
                    print(drama)
                    r = requests.get(url_base + corpus['name'] + 
                                     '/play/' + drama + '/metrics')

                    metrics = r.json()          
                    writer.writerow({fieldname: metrics[fieldname] for fieldname in fieldnames})

get_network_metrics()

Downloading the network data for the subset of drama corpus constituting the minimal canon of European drama as assembled by F.Fischer et al. for the poster contribution to DH2018 via the repository at https://github.com/lehkost/dramenquartett

In [26]:
username = 'wildlighted'
password = getpass.getpass()

········


In [None]:
def get_canon_plays(): 
    
    r = requests.get('https://raw.githubusercontent.com/lehkost/dramenquartett/' \
                     'master/dh2018-mexico/brecht_beats_shakespeare_data.csv')
    raw_text = r.text
    text = unidecode.unidecode(raw_text.lower())
    plays = text.split('\n')[1:-1]
    return plays
         
plays = get_canon_plays()

The data unified, cleaned and assembled into a dataset of different format for further work.

In [31]:
def write_canon():
    
    with open ('canon.csv', 'w', encoding='utf-8') as f:
        fieldnames = ['id', 'name', 'size', 'diameter',
                          'density', 'averageClustering',
                          'averagePathLength', 'maxDegree']
        f.write((';'.join(fieldnames)+'\n'))
        
        id = 1
        for line in plays:
            fields = re.split(',(?!\d+"|\s\w+)', line)
            author = fields[0].split()[-1]
            title = '-'.join(re.sub('[^\w\d\s]+', '', fields[1]).split())
            name = author + '-' + title
            metrics = [field.strip('"') for field in fields[5:-1]]
            metrics = [re.sub(',', '.', metric) for metric in metrics]
            f.write(str(id) + ';' + name + ';' + ';'.join(metrics) + '\n')
            id += 1

if not os.path.exists('canon.csv'):
    write_canon()        

Average value for each of the six metrics calculated.

In [4]:
def count_average(filename):
    
    with open(filename, 'r', encoding='utf-8') as f:
        plays = f.readlines()[1:]
    avgmetrics = {'size': 0, 'diameter': 0, 'density': 0, 
                  'averageClustering': 0, 'averagePathLength': 0, 'maxDegree': 0}
    for play in plays:
        metrics = play.split(';')[2:]
        for i, metric in enumerate(metrics):
            avgmetrics[list(avgmetrics.keys())[i]] += float(metric)
    avgmetrics = {key: avgmetrics[key]/len(plays) for key in list(avgmetrics.keys())}
    
    return avgmetrics

averages = count_average('canon.csv')
#print(averages)

{'size': 37.21875, 'diameter': 3.125, 'density': 0.41625000000000006, 'averageClustering': 0.8390624999999998, 'averagePathLength': 1.7487500000000007, 'maxDegree': 25.53125}


Function calculating the divergence of all the metrics of a play from the average. Average path length taken with the opposite sign, as its lower value trumps.

In [5]:
def count_divergence(metrics, avgs):
    
    divs = {}
    for i, metric in enumerate(metrics):
        divs[list(avgs.keys())[i]] = metric - avgs[list(avgs.keys())[i]]
    divs['averagePathLength'] = -divs['averagePathLength']
    
    return divs

Function forming two random decks from a dataset.

In [14]:
def form_deck(csvfile, size):
    
    with open(csvfile, 'r') as f:
        cards = f.readlines()[1:]
        random.shuffle(cards)
        print(len(cards))
        first_deck, second_deck = deque(cards[:size//2]), deque(cards[size//2:])
        
    return first_deck, second_deck

Function simulating one turn of a game.
Depending on the variable indicating which player is choosing, the metric with the highest divergence from average from the corresponding card is chosen, compared with the same metric on the other card. Cards from the table are added to the bottom of the winning player's deck.
In case of a draw, recursion is used until the draw is resolved.  
All the specifics of the turn including cards, choosing player, winner, metric are logged.

In [26]:
metricnames = ['size', 'diameter', 'density',
               'averageClustering', 'averagePathLength', 'maxDegree']

def one_turn(first_deck, second_deck):
    card_1 = first_deck.pop()
    card_2 = second_deck.pop()
    global table
    table.extend([card_1, card_2])
    print(table)
    
    log = card_1[:-1] + ',' + card_2[:-1] + ','
    
    metrics_card_1, metrics_card_2 =  [float(metric) for metric in card_1.split(';')[2:]], \
                            [float(metric) for metric in card_2.split(';')[2:]]
    metrics_1, metrics_2 = {}, {}
    
    for i, metric1 in enumerate(metrics_card_1):
        metrics_1[metricnames[i]] = metric1
    for j, metric2 in enumerate(metrics_card_2):
        metrics_2[metricnames[j]] = metric2
    
    divs_1, divs_2 = count_divergence(metrics_card_1, averages), \
                        count_divergence(metrics_card_2, averages)
    
    global first_player
    if first_player:
        log += '1,'
        inv_divs = {value: key for key, value in divs_1.items()}
        number = max(divs_1.values())
    else:
        log += '2,'
        inv_divs = {value: key for key, value in divs_2.items()}
        number = max(divs_2.values())
        
    value_1 = {inv_divs[number]: metrics_1[inv_divs[number]]}
    value_2 = {inv_divs[number]: metrics_2[inv_divs[number]]}
    metricname = list(value_1.keys())[0]
    
    log += metricname + ',' + str(metrics_1[inv_divs[number]]) + ',' \
    + str(metrics_2[inv_divs[number]]) + ',' + \
    str(divs_1[inv_divs[number]]) + ',' + str(divs_2[inv_divs[number]]) + ','
    

    if metricname != 'averagePathLength':
        if value_1[metricname] > value_2[metricname]:
            first_deck.extendleft(table)
            first_player = True
            log += '1'
        elif value_1[metricname] == value_2[metricname]:
            if first_deck and second_deck:
                one_turn(first_deck, second_deck)
                log += '0'
            elif first_player:
                first_deck.extendleft(table)
                log += '1'
            else:
                second_deck.extendleft(table)
                log += '2'
        else:
            second_deck.extendleft(table)
            first_player = False
            log += '2'
    else:
        if value_1[metricname] < value_2[metricname]:
            first_deck.extendleft(table)
            first_player = True
            log += '1'
        elif value_1[metricname] == value_2[metricname]:
            if first_deck and second_deck:
                one_turn(first_deck, second_deck)
                log += '0'
            elif first_player:
                first_deck.extendleft(table)
                log += '1'
            else:
                second_deck.extendleft(table)
                log += '2'
        else:
            second_deck.extendleft(table)
            first_player = False
            log += '2'
    table.clear()
    return log

Function simulating one game.
One turn function is repeated until one of the decks is left empty.
Each game process is logged into a separate file.

In [27]:
def one_game(game_number):
    
    deck_1, deck_2 = form_deck('canon.csv', 32)

    with open ('game_' + str(game_number) + '_deck_1.csv', 'w', encoding='utf-8') as f:
        for card in deck_1:
            f.write(card)
    with open ('game_' + str(game_number) + '_deck_2.csv', 'w', encoding='utf-8') as f:
        for card in deck_2:
            f.write(card)

    global table
    table = []
    j = 1    
    global first_player
    first_player = True
    with open('game_' + str(game_number) + '.csv', 'w', encoding='utf-8') as f:
        f.write('turn,card1,card2,chooser,metric,value1,value2,div1,div2,winner\n')
        while deck_1 and deck_2:
            turn_log = str(j) + ',' + one_turn(deck_1, deck_2) + '\n'
            f.write(turn_log)
            #print(j, len(deck_1), len(deck_2))
            j += 1

Nececcary number of games is simulated.

In [None]:
for i in range(1, 101):
    one_game(i)