In [1]:
import pandas as pd
import os
import seaborn
from datetime import datetime
import time
import math
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import altair as alt
import numpy as np
import gc

alt.data_transformers.disable_max_rows() # allow datasets larger than 100k rows

DataTransformerRegistry.enable('default')

In [2]:
# define font sizes
fs_title = 28
fs_axis = 24
fs_tick = 20

# define chart sizes
c_width = 800
c_height = 800

# define color schemes
seq_scheme = "plasma"
a_div_scheme = "purpleorange"
m_div_scheme = "PuOr"
sm_cat_scheme = "category10"
lg_cat_scheme = "category20"
solid_color = "#0f0888"

In [3]:
pb_only = True # SET TO TRUE TO USE ONLY PERSONAL BEST RECORDS

record_df = pd.DataFrame()
if not pb_only:
    # full record dataset
    record_df = pd.read_csv("out/all_records_pt_1.csv", index_col=[0])
    record_df = pd.concat([record_df, pd.read_csv("out/all_records_pt_2.csv", index_col=[0])], ignore_index=True)
else:
    # pb records only
    record_df = pd.read_csv("out/all_pbs.csv", index_col=[0])
    record_df = record_df.reset_index(drop=True)

# full user dataset
user_df = pd.read_csv("out/all_user_info.csv", index_col=[0])
user_df = user_df.reset_index(drop=True)

In [4]:
# construct more attributes
record_df["kps"] = record_df["inputs"] / (record_df["final_time"] / 1000) # keys per second
record_df["kpp"] = record_df["inputs"] / record_df["pieces_placed"] # keys per piece
record_df["percent_perf"] = (record_df["finesse_perf"] / record_df["pieces_placed"]) # percent of pieces placed with perfect finesse
record_df['time'] = (record_df['final_time'] // 1000) # final time rounded down to the second

In [5]:
record_df = record_df.dropna(subset="percent_perf")
# record_df['percent_perf'] = record_df['percent_perf'].apply(math.floor)

In [6]:
# drop the three records with over 5k finesse faults
crazy_outliers = record_df.loc[record_df["finesse_faults"] > 5000].index

for crazy_outlier in crazy_outliers:
    record_df = record_df.drop(crazy_outlier)

In [7]:
small_record_df = record_df[['time', 'percent_perf', 'pps', 'kps', 'kpp', 'quads', 'doubles', 'singles', 'triples', 'pieces_placed']]

In [8]:
del record_df
gc.collect()

40

***Warning: only run 1-2 of these charts at one time***

In [9]:
# finesse % chart

# alt.Chart(small_record_df[['time', 'percent_perf']], title='Finesse Percent vs. Final Time').mark_rect().encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)').scale(reverse=True, domain=[8,358]),
#     alt.Y('percent_perf').bin(maxbins=100).axis(title='Percent of Pieces Placed with Perfect Finesse', format='%').scale(domain=[0,1]),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# ).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.02
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )

In [10]:
# x*y=100
# x*y=145
# lines_df = pd.DataFrame({'x': np.arange(1,400)})
# lines_df['Minimum'] = lines_df['x'].apply(lambda x: 100/x)
# lines_df['Maximum'] = lines_df['x'].apply(lambda x: 145/x)
# # lines_df = lines_df.melt(id_vars=['x'])

# lines = alt.Chart(lines_df, title="Predicted Minimum and Maximum PPS by Final Time").mark_line(clip=True, color='red').encode(
#     alt.X('x', title="Final Time (seconds)").scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('Minimum', title="Predicted PPS").scale(domainMax=7.5)
# ) + alt.Chart(lines_df).mark_line(clip=True, color='red').encode(
#     alt.X('x', title="Final Time (seconds)").scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('Maximum', title="Predicted PPS").scale(domainMax=7.5)
# )

# lines.properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.25
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis
# )

In [11]:
# PPS chart

# pps = alt.Chart(small_record_df[['time', 'pps']], title='Pieces per Second vs. Final Time').mark_rect().encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)').scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('pps').bin(maxbins=360).axis(title='Pieces Placed per Second'),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# )

# (pps+lines).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.25
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )

In [12]:
# KPS chart

# alt.Chart(small_record_df, title='Keys per Second vs. Final Time').mark_rect().encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)', tickMinStep=5).scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('kps').bin(maxbins=100).axis(title='Keys Pressed per Second').scale(domainMin=0),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# ).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.5
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )

In [13]:
# KPP chart

# alt.Chart(small_record_df[['time', 'kpp']], title='Keys per Piece vs. Final Time').mark_rect(clip=True).encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)', tickMinStep=5).scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('kpp').bin(maxbins=100).axis(title='Keys Pressed per Piece').scale(domainMin=2, domainMax=15),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# ).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.5
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )

In [14]:
# Quads chart

# alt.Chart(small_record_df, title='Quads vs. Final Time').mark_rect().encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)', tickMinStep=5).scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('quads').bin(maxbins=11).axis(title='Number of Quads'),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# ).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.25
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )

In [15]:
# Doubles chart

# alt.Chart(small_record_df, title='Doubles vs. Final Time').mark_rect().encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)', tickMinStep=5).scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('doubles').bin(maxbins=21).axis(title='Number of Doubles'),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# ).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.5
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )

In [16]:
small_record_df['total_cleared'] = (small_record_df['singles'] * 1) + (small_record_df['doubles'] * 2) + (small_record_df['triples'] * 3) + (small_record_df['quads'] * 4)
small_record_df['4Singles'] = (small_record_df['singles'] * 1) / small_record_df['total_cleared']
small_record_df['3Doubles'] = (small_record_df['doubles'] * 2) / small_record_df['total_cleared']
small_record_df['2Triples'] = (small_record_df['triples'] * 3) / small_record_df['total_cleared']
small_record_df['1Quads'] = (small_record_df['quads'] * 4) / small_record_df['total_cleared']

In [17]:
smaller_df = small_record_df[['time', '4Singles', '3Doubles', '2Triples', '1Quads']].groupby('time').mean().reset_index()

smaller_df = smaller_df.melt(id_vars=['time'])
# smaller_df = smaller_df.merge(small_record_df[['time']].reset_index(), on='index')
smaller_df

Unnamed: 0,time,variable,value
0,13.0,4Singles,0.150000
1,14.0,4Singles,0.121429
2,15.0,4Singles,0.040625
3,16.0,4Singles,0.050000
4,17.0,4Singles,0.050229
...,...,...,...
1359,349.0,1Quads,0.125915
1360,350.0,1Quads,0.123201
1361,351.0,1Quads,0.118728
1362,352.0,1Quads,0.112760


In [18]:
# 
# alt.Chart(smaller_df, title='Clear Types vs. Final Time').mark_area(opacity=0.5, line=True).encode(
#     alt.X('time').axis(title='Final Time (seconds)').scale(reverse=True, domainMax=353, domainMin=13),
#     alt.Y('value').axis(title="Percent of Lines Cleared by Type", format="%").scale(domainMax=1),
#     alt.Color('variable', title="Type").scale(scheme=sm_cat_scheme)
# ).properties(
#     height=400,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=0.1
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis
# )

In [None]:
# alt.Chart(small_record_df[['time', 'pieces_placed']], title='Pieces Placed vs. Final Time').mark_rect().encode(
#     alt.X('time').bin(maxbins=341).axis(title='Final Time (seconds)', tickMinStep=5).scale(reverse=True, domainMax=358, domainMin=8),
#     alt.Y('pieces_placed').bin(maxbins=45).axis(title='Total Pieces Placed').scale(domainMin=100, domainMax=145),
#     alt.Color('count():Q', title="Count").scale(scheme=seq_scheme)
# ).properties(
#     height=800,
#     width=800
# ).configure_title(
#     fontSize=fs_title
# ).configure_axisX(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=5,
#     tickMinStep=5
# ).configure_axisY(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelSeparation=10,
#     tickMinStep=1
# ).configure_legend(
#     labelFontSize=fs_tick,
#     titleFontSize=fs_axis,
#     labelBaseline='bottom'
# )