## Setup

In [381]:
import pandas as pd
import requests
from io import StringIO
from scipy.special import comb

import datetime

In [338]:
url_1 = "https://github.com/fivethirtyeight/data/raw/master/riddler-castles/castle-solutions.csv"
url_2 = "https://github.com/fivethirtyeight/data/raw/master/riddler-castles/castle-solutions-2.csv"
url_3 = "https://github.com/fivethirtyeight/data/raw/master/riddler-castles/castle-solutions-3.csv"

s_1 = requests.get(url_1).text
s_2 = requests.get(url_2).text
s_3 = requests.get(url_3).text

In [339]:
royale_1 = pd.read_csv(StringIO(s_1))
royale_2 = pd.read_csv(StringIO(s_2))
royale_3 = pd.read_csv(StringIO(s_3))

In [340]:
battles = pd.concat([royale_1, royale_2, royale_3])

## Cleaning

In [341]:
battles.dtypes

Castle 1                                      object
Castle 2                                      object
Castle 3                                      object
Castle 4                                      object
Castle 5                                      object
Castle 6                                     float64
Castle 7                                     float64
Castle 8                                     float64
Castle 9                                     float64
Castle 10                                    float64
Why did you choose your troop deployment?     object
dtype: object

- Got a bunch to work with, so just going to drop the ones that are not cooperating

In [342]:
battles[pd.to_numeric(battles['Castle 2'], errors='coerce').isnull()]

Unnamed: 0,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?
816,0,2p,0,0,20,20.0,20.0,20.0,0.0,0.0,Try to get to 28 in a way that average person ...
1028,2- z,4- z,4- z,7- 15,12- 20,15.0,20.0,22.0,7.0,7.0,


- Turns out it's just two rows that are wrong. Drop 'em

In [343]:
battles = battles.drop(battles.index[[816, 1028]])

In [344]:
cols = battles.columns.drop("Why did you choose your troop deployment?")

battles[cols] = battles[cols].apply(pd.to_numeric)

In [345]:
battles.dtypes

Castle 1                                     float64
Castle 2                                     float64
Castle 3                                     float64
Castle 4                                     float64
Castle 5                                     float64
Castle 6                                     float64
Castle 7                                     float64
Castle 8                                     float64
Castle 9                                     float64
Castle 10                                    float64
Why did you choose your troop deployment?     object
dtype: object

In [346]:
battles.head()

Unnamed: 0,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?
0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"because, I am number one!"
1,52.0,2.0,2.0,2.0,2.0,2.0,2.0,12.0,12.0,12.0,I need to win at least 4 castles to win the ga...
2,26.0,26.0,26.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,The top 3 are necessary for a majority and the...
3,26.0,5.0,5.0,5.0,6.0,7.0,26.0,0.0,0.0,0.0,"Most people will focus on high number, but cas..."
4,25.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,25.0,"The total points up for grabs is 55, and to wi..."


## Remove Duplicates

In [347]:
battles.shape

(3780, 11)

In [348]:
battles = battles.drop_duplicates(subset=battles.columns[0:10])

In [349]:
battles.shape

(3403, 11)

In [350]:
battles = battles.reset_index()

In [351]:
battles.head(10)

Unnamed: 0,index,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?
0,0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"because, I am number one!"
1,1,52.0,2.0,2.0,2.0,2.0,2.0,2.0,12.0,12.0,12.0,I need to win at least 4 castles to win the ga...
2,2,26.0,26.0,26.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,The top 3 are necessary for a majority and the...
3,3,26.0,5.0,5.0,5.0,6.0,7.0,26.0,0.0,0.0,0.0,"Most people will focus on high number, but cas..."
4,4,25.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,25.0,"The total points up for grabs is 55, and to wi..."
5,8,23.0,1.0,1.0,1.0,1.0,2.0,2.0,23.0,23.0,23.0,The ones and twos are mostly to pick up any un...
6,9,21.0,18.0,15.0,13.0,11.0,9.0,6.0,4.0,2.0,1.0,"On average 1.81 soldiers per point, with some ..."
7,10,21.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,26.0,27.0,"If you were to win castles 10, 9, 8, and 1 eac..."
8,11,20.0,12.0,13.0,13.0,14.0,14.0,14.0,0.0,0.0,0.0,Get to 28 by conquering the smallest towers
9,12,20.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,30.0,it put high power making it easy to win the ca...


## Battle Code

In [370]:
debug = False

'''
Should add a dict that records the results of battles to use to speed up later ones
- Key is the index value
- Value is a dict of the enemies played by index and the points gotten against that enemy

Can search against this dict before doing all the work over
'''

def get_battle_score(row):
    # track royale wins
    royale_total = 0

    # loop through all submissions
    for i in range(0, results.shape[0]):
        points_total_home = 0
        points_total_enemy = 0
        
        # don't let the row battle itself
        if i != row.name:
            enemy = results.iloc[i]
            if debug: print(f"Row {row.name} versus row {i}")
            
            # loop thru castles in battle
            for c in range(1, 11):
                # if home has more, they get the points
                if row[f'Castle {c}'] > enemy[f'Castle {c}']:
                    points_total_home += c
                # if there's a tie and it isn't zero, both get points
                elif row[f'Castle {c}'] == enemy[f'Castle {c}']:
                    if row[f'Castle {c}'] != 0:
                        points_total_home += c / 2
                        points_total_enemy += c / 2
                # otherwise enemy has more and gets points
                else:
                    points_total_enemy += c
            
            if debug: print(f"\t Home has {points_total_home}")
            if debug: print(f"\t Enemy has {points_total_enemy}")
            
            # count tie battles as a win
            if points_total_home >= points_total_enemy:
                royale_total += 1
                if debug: print("\t Home Wins")
            
    return royale_total


# Battle Royale

In [403]:
results = battles.copy()

results = results.iloc[0:500]

possible_combos = comb(results.shape[0], 2)
print(f"There are {possible_combos} combinations")

results.head(10)

There are 124750.0 combinations


Unnamed: 0,index,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?
0,0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"because, I am number one!"
1,1,52.0,2.0,2.0,2.0,2.0,2.0,2.0,12.0,12.0,12.0,I need to win at least 4 castles to win the ga...
2,2,26.0,26.0,26.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,The top 3 are necessary for a majority and the...
3,3,26.0,5.0,5.0,5.0,6.0,7.0,26.0,0.0,0.0,0.0,"Most people will focus on high number, but cas..."
4,4,25.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,25.0,"The total points up for grabs is 55, and to wi..."
5,8,23.0,1.0,1.0,1.0,1.0,2.0,2.0,23.0,23.0,23.0,The ones and twos are mostly to pick up any un...
6,9,21.0,18.0,15.0,13.0,11.0,9.0,6.0,4.0,2.0,1.0,"On average 1.81 soldiers per point, with some ..."
7,10,21.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,26.0,27.0,"If you were to win castles 10, 9, 8, and 1 eac..."
8,11,20.0,12.0,13.0,13.0,14.0,14.0,14.0,0.0,0.0,0.0,Get to 28 by conquering the smallest towers
9,12,20.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,30.0,it put high power making it easy to win the ca...


## Run it!

In [404]:
start = datetime.datetime.now()

possible_combos = comb(results.shape[0], 2)
print(f"There are {possible_combos} combinations")
print("Running...")

# this takes a while if you run it on everything
results['wins'] = results.apply(get_battle_score, axis=1)

print(f"DONE! {datetime.datetime.now() - start} to execute")

There are 124750.0 combinations
Running...
DONE! 0:01:34.465914 to execute


## Final Results

In [394]:
results.sort_values(by='wins', ascending=False).head(25)

Unnamed: 0,index,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?,wins
21,25,15.0,1.0,1.0,1.0,2.0,2.0,3.0,25.0,25.0,25.0,Need 28 pts to win a war. I assumed there are...,87
5,8,23.0,1.0,1.0,1.0,1.0,2.0,2.0,23.0,23.0,23.0,The ones and twos are mostly to pick up any un...,87
11,14,19.0,1.0,1.0,1.0,1.0,1.0,1.0,25.0,25.0,25.0,need 28 to win,85
1,1,52.0,2.0,2.0,2.0,2.0,2.0,2.0,12.0,12.0,12.0,I need to win at least 4 castles to win the ga...,82
38,45,12.0,12.0,2.0,4.0,4.0,11.0,19.0,10.0,18.0,8.0,Used a random number formula which summed to 1...,82
7,10,21.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,26.0,27.0,"If you were to win castles 10, 9, 8, and 1 eac...",82
4,4,25.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,25.0,"The total points up for grabs is 55, and to wi...",79
9,12,20.0,0.0,0.0,0.0,0.0,0.0,0.0,25.0,25.0,30.0,it put high power making it easy to win the ca...,78
18,22,15.0,7.0,8.0,5.0,5.0,11.0,10.0,11.0,10.0,18.0,People want 1 and 10 so I'm fortifying them,78
27,33,13.0,13.0,13.0,13.0,13.0,13.0,13.0,3.0,3.0,3.0,I chose this troop deployment because 28 victo...,78


In [405]:
print(f"There are {results.wins.sum()} total wins and {possible_combos} possible combos")

There are 128725 total wins and 124750.0 possible combos


# Testing

Taking it to be that in tie battles for a castle "splitting the points" means both teams get points for the castle if it ends in a tie (so the points aren't split in half, but rather given to both teams).

In [183]:
practice = battles.iloc[0:6]

In [184]:
practice.head(10)

Unnamed: 0,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?
0,100,0,0,0,0,0.0,0.0,0.0,0.0,0.0,"because, I am number one!"
1,52,2,2,2,2,2.0,2.0,12.0,12.0,12.0,I need to win at least 4 castles to win the ga...
2,26,26,26,16,1,1.0,1.0,1.0,1.0,1.0,The top 3 are necessary for a majority and the...
3,26,5,5,5,6,7.0,26.0,0.0,0.0,0.0,"Most people will focus on high number, but cas..."
4,25,0,0,0,0,0.0,0.0,25.0,25.0,25.0,"The total points up for grabs is 55, and to wi..."
5,25,0,0,0,0,0.0,0.0,25.0,25.0,25.0,Submission #4. A variation of my third submiss...


In [185]:
possible_combos = comb(practice.shape[0], 2)
print(f"There are {possible_combos} combinations")

There are 15.0 combinations


In [186]:
results = practice.copy()

results['wins'] = practice.apply(get_battle_score, axis=1)

In [187]:
results.sort_values(by='wins', ascending=False).head(10)

Unnamed: 0,Castle 1,Castle 2,Castle 3,Castle 4,Castle 5,Castle 6,Castle 7,Castle 8,Castle 9,Castle 10,Why did you choose your troop deployment?,wins
1,52,2,2,2,2,2.0,2.0,12.0,12.0,12.0,I need to win at least 4 castles to win the ga...,5
2,26,26,26,16,1,1.0,1.0,1.0,1.0,1.0,The top 3 are necessary for a majority and the...,4
3,26,5,5,5,6,7.0,26.0,0.0,0.0,0.0,"Most people will focus on high number, but cas...",3
4,25,0,0,0,0,0.0,0.0,25.0,25.0,25.0,"The total points up for grabs is 55, and to wi...",2
5,25,0,0,0,0,0.0,0.0,25.0,25.0,25.0,Submission #4. A variation of my third submiss...,2
0,100,0,0,0,0,0.0,0.0,0.0,0.0,0.0,"because, I am number one!",0


In [188]:
print(f"There are {results.wins.sum()} total wins and {possible_combos} possible combos")

There are 16 total wins and 15.0 possible combos


- Here we have more wins than possible combos because we have a repeat distribution
- Should remove identical soldier allocations to fix this

### Removing Duplicates

In [189]:
remove_dups = battles.iloc[0:50]

In [190]:
remove_dups.shape

(50, 11)

In [195]:
remove_dups.drop_duplicates(subset=remove_dups.columns[0:10]).shape

(41, 11)

- This should work for whittling down the choices