In [2]:
import math

In [3]:
# Submission file should have 1 predicted CTR for each test entry.
# This function returns a list of pCTR.
def ReadSubmissionFile(filename):
    pctr = []
    f = open(filename)
    for line in f:
        line = line.strip()
        pctr.append(float(line))
    return pctr

In [4]:
# Solution file should have 3 comma separated columns [clicks,impressions,indicator]
# This function returns a list of clicks and a list of impressions.
def ReadSolutionFile(filename):
    clicks = []
    impressions = []
    f = open(filename)
    for line in f:
        line = line.strip().split(",")
        clicks.append(float(line[0]))
        impressions.append(float(line[1]))
    return clicks, impressions

In [5]:
# Calculate the score using element-wise score function.
# element-wise score function takes [click, impression, predicted_ctr]
# as argument and return the score for this entry.
def ScoreElementwiseMetric(clicks, impressions, pctrs, score_fn):
    score = 0.0
    weight = 0.0
    for click, impression, pctr in zip(clicks, impressions, pctrs):
        score += score_fn(click, impression, pctr) * impression
        weight += impression
    return score / weight

In [6]:
# Calculate the weighted root mean square error.
def WRMSE(clicks, impressions, pctrs):
    mse = lambda click, impression, pctr: math.pow(click / impression - pctr, 2.0)
    wmse = ScoreElementwiseMetric(clicks, impressions, pctrs, mse)
    return math.sqrt(wmse)

In [7]:
# Calculate the normalized weighted mean absolute error.
def NWMAE(clicks, impressions, pctrs):
    err = lambda click, impression, pctr: abs(click / impression - pctr)
    return ScoreElementwiseMetric(clicks, impressions, pctrs, err)

In [1]:
# Calculate the area under curve.
def AUC(clicks, impressions, pctrs):
    sorted_index = sorted(range(len(pctrs)),key=lambda i: pctrs[i], reverse=True)
    auc_temp = 0.0
    click_sum = 0.0
    old_click_sum = 0.0
    no_click = 0.0
    no_click_sum = 0.0
    # treat all instances with the same pctr as coming from the
    # same bucket
    last_ctr = pctrs[sorted_index[0]] + 1.0
    for i in range(len(pctrs)):
        if last_ctr != pctrs[sorted_index[i]]: 
            auc_temp += (click_sum + old_click_sum) * no_click / 2.0        
            old_click_sum = click_sum
            no_click = 0.0
            last_ctr = pctrs[sorted_index[i]]
        no_click += impressions[sorted_index[i]] - clicks[sorted_index[i]]
        no_click_sum += impressions[sorted_index[i]] - clicks[sorted_index[i]]
        click_sum += clicks[sorted_index[i]]
    auc_temp += (click_sum + old_click_sum) * no_click / 2.0
    auc = auc_temp / (click_sum * no_click_sum)
    return auc