In [2]:
import numpy as np
import math
import sys

# ----------------------------------------------------------------------------------------------------------------

id_column = '_id.$oid'
# ----------------------------------------------------------------------------------------------------------------
# secondlastentry & lastentry correspond to '_id.$oid' from gameplays db
# we expand A to "secondlastentry", B to "lastentry" and BOTH to ["secondlastentry", "lastentry"]
# in columns fun, challenging and frustrating
d = {
  "A": "secondlastentry",
  "B": "lastentry"
}
def expand_AB(x):
    if x in d:
        return d[x]
    else:
        return x
def expand_BOTH (x):
    if x == "both":
        return ["secondlastentry","lastentry"]
    else:
        return x
    
def replace_column(db, column, function):
    db[column] = db[column].apply(function)

ffc_columns = ["fun", "frustrating", "challenging"]

# expand A to "secondlastentry", B to "lastentry" in columns fun, frustrating and challenging
def expand_AB_all(questionresponses):
    for column in ffc_columns:
        replace_column(questionresponses, column, lambda x: expand_AB(x))

# expand BOTH to ["secondlastentry", "lastentry"] in columns fun, frustrating and challenging
def expand_BOTH_all(questionresponses):
    for column in ffc_columns:
        replace_column(questionresponses, column, lambda x: expand_BOTH(x))
        
def expand_questionresponses(questionresponses):
    d = questionresponses
    expand_AB_all(d)
    expand_BOTH_all(d)
    return d
    
    
# ----------------------------------------------------------------------------------------------------------------  
# explode questionresponses on secondlastentry and lastentry, + indicate which_entry 
# turn every row about secondlastentry and lastentry into 2 rows
# secondlastentry_id | lastentry_id | fun | frustrating | challenging
# secondlastentry_id | "secondlastentry" | fun | frustrating | challenging
# lastentry_id       | "lastentry"       | fun | frustrating | challenging
# where fun | frustrating | challenging
#        are True if evaluated positively under "secondlastentry" or "lastentry" or "both"
def explode_questionresponses(questionresponses, gameplays):
    #explode
    q = questionresponses
    q['entry'] = q[['secondlastentry', 'lastentry']].values.tolist()
    q = q.explode('entry')
    q['original_index'] = q.index
    q['which_entry'] = np.where((q['entry'] == q['secondlastentry']), 'secondlastentry', 'lastentry')
    q = q[['entry', 'which_entry', 'fun', 'challenging', 'frustrating']]
    
    # transform columns
    for column in ffc_columns:
        # turn every element into list, even if its a single element
        q[column] = q[column].apply(lambda x: x if(isinstance(x, list)) else [x])
        #check if the value corresponds to the correct entry
        kwargs = {column : lambda x: [a in b for a,b in zip(q['which_entry'],q[column])]}
        q = q.assign(**kwargs)
    
    # append gameplay data
    a = q.merge(gameplays, how='left', left_on='entry', right_on='_id.$oid')
    return a    
    
# ----------------------------------------------------------------------------------------------------------------
def add_distance_per_jump(gameplays):
    df = gameplays
    df['distance_per_jump'] = df.apply(lambda row: row.actualDistance / row.nr_jumps if row.nr_jumps != 0 else sys.maxsize, axis=1)
    df[['distance_per_jump', 'nr_jumps', 'actualDistance']]
    return df

# ----------------------------------------------------------------------------------------------------------------
# filter out scores that are too high, or games with not enough jumps/distance
def preprocess_gameplays(gameplays):
    d = gameplays
    d = d.loc[d['actualDistance'] < 5000]
    d = d.loc[d['actualDistance']/d['nr_jumps'] <= 100  ]
    return d

# ----------------------------------------------------------------------------------------------------------------
# filter out questionresponses that took too long to fill in
def preprocess_questionresponses(questionresponses):
    d = questionresponses
    d = d.loc[d['time'] < 60000] # max 1 minute
    return d

# ----------------------------------------------------------------------------------------------------------------
# relevant parameters from gameplays
parameters = ['parameters.SPEED', 
              'parameters.ACCELERATION',
              'parameters.MIN_GAP',
              'parameters.NIGHT_MODE_ENABLED', 
              'parameters.NIGHT_MODE_DISTANCE',
              'parameters.CLEAR_TIME', 
              'parameters.MAX_OBSTACLE_LENGTH',
              'parameters.MAX_SPEED',
              'parameters.MAX_GAP', 
              'actualDistance', 
              'collisionObstacle.typeConfig.type',
              'nr_jumps',
              'invertedGameOver'
             ] 

# ----------------------------------------------------------------------------------------------------------------
obstacle_nums = {'CACTUS_LARGE':0, 'CACTUS_SMALL':1, 'PTERODACTYL':2}

# ----------------------------------------------------------------------------------------------------------------
# converst collisionObstacle to ints
def convert_collisionObstacle_numeric(gameplays):
    a = gameplays
    column = 'collisionObstacle.typeConfig.type'
    if(isinstance(a[column][0], str)):
        a[column] = a[column].replace(obstacle_nums)
    return a


# ----------------------------------------------------------------------------------------------------------------
def scale_data(data):
    scaled_data = StandardScaler().fit_transform(data)
    return scaled_data

# ----------------------------------------------------------------------------------------------------------------
from sklearn.preprocessing import StandardScaler
import umap 
def umap_fit(data, n_neighbors=15, min_dist=0.1):
    scaled_data = StandardScaler().fit_transform(data)

    fit = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist
    )
    u = fit.fit_transform(scaled_data);
    return u



# ----------------------------------------------------------------------------------------------------------------
def nice_number(value, round_=False):
    '''nice_number(value, round_=False) -> float'''
    exponent = math.floor(math.log(value, 10))
    fraction = value / 10 ** exponent

    if round_:
        if fraction < 1.5:
            nice_fraction = 1.
        elif fraction < 3.:
            nice_fraction = 2.
        elif fraction < 7.:
            nice_fraction = 5.
        else:
            nice_fraction = 10.
    else:
        if fraction <= 1:
            nice_fraction = 1.
        elif fraction <= 2:
            nice_fraction = 2.
        elif fraction <= 5:
            nice_fraction = 5.
        else:
            nice_fraction = 10.

    return nice_fraction * 10 ** exponent


def nice_bounds(axis_start, axis_end, num_ticks=10):
    '''
    nice_bounds(axis_start, axis_end, num_ticks=10) -> tuple
    @return: tuple as (nice_axis_start, nice_axis_end, nice_tick_width)
    '''
    axis_width = axis_end - axis_start
    if axis_width == 0:
        nice_tick = 0
    else:
        nice_range = nice_number(axis_width)
        nice_tick = nice_number(nice_range / (num_ticks - 1), round_=True)
        axis_start = math.floor(axis_start / nice_tick) * nice_tick
        axis_end = math.ceil(axis_end / nice_tick) * nice_tick

    return axis_start, axis_end, nice_tick
# ----------------------------------------------------------------------------------------------------------------
# remove forms that have been filled in twice, keep the last one
def drop_duplicates_questionresponses(questionresponses):
    q = questionresponses
    q = q.drop_duplicates(subset=['secondlastentry', 'lastentry'], keep='last')
    return q

# ----------------------------------------------------------------------------------------------------------------
# get all the id's that occur in questionresponses as a list
def get_evaluated_entries(questionresponses):
    q = questionresponses
    q['entries'] = q[['secondlastentry', 'lastentry']].values.tolist()
    q = q.explode('entries')
    evaluated_entries = q['entries'].values.tolist()
    # remove duplicates
    evaluated_entries = list(dict.fromkeys(evaluated_entries))
    return evaluated_entries

# ----------------------------------------------------------------------------------------------------------------
# get all the id's that occur in gameplays as a list
def get_played_games(gameplays):
    g = gameplays
    played_games = g['_id.$oid'].values.tolist()
    # remove duplicates
    played_games = list(dict.fromkeys(played_games))
    return played_games

# ----------------------------------------------------------------------------------------------------------------
# restrict gameplays to the ones that were also evaluated in questionresponses
def get_useful_gameplays(gameplays, questionresponses):
    evaluated_entries = get_evaluated_entries(questionresponses)
    played_games = get_played_games(gameplays)
    # get intersection
    common = list(set(evaluated_entries).intersection(played_games))
    return common

# ----------------------------------------------------------------------------------------------------------------
def filter_useful_gameplays(gameplays, questionresponses):
    useful_gameplays = get_useful_gameplays(gameplays, questionresponses)
    return gameplays[gameplays['_id.$oid'].isin(useful_gameplays)]
    
# ----------------------------------------------------------------------------------------------------------------
def filter_useful_questionresponses(questionresponses, gameplays):
    useful_gameplays = get_useful_gameplays(gameplays, questionresponses)
    return questionresponses[questionresponses['lastentry'].isin(useful_gameplays) & questionresponses['secondlastentry'].isin(useful_gameplays)]
# ----------------------------------------------------------------------------------------------------------------
    
def filter_useful_gameplays_questionresponses(gameplays, questionresponses):
    useful_gameplays_1 = filter_useful_gameplays(gameplays, questionresponses)
    useful_questionresponses_1 = filter_useful_questionresponses(questionresponses, gameplays)
    # second pass
    useful_gameplays = filter_useful_gameplays(useful_gameplays_1, useful_questionresponses_1)
    useful_questionresponses = filter_useful_questionresponses(useful_questionresponses_1, useful_gameplays_1)
    
    return (useful_gameplays, useful_questionresponses)


# ----------------------------------------------------------------------------------------------------------------
    