In [18]:
import numpy as np
import pandas as pd
from ideas import intersection_over_union
from ideas import al

def size_score(line, theta, true_column_suffix):
    a,b = line['area'], line['area' + true_column_suffix]
    if a < b:
        b,a = a,b
    if a == 0:
        return 0        
    return 1 - (1 - b/a)/(1 - theta)

def shape_score(line, theta, true_column_suffix):
    ral,raw = line['L'], max(1, line['W'])
    rbl,rbw = line['L' + true_column_suffix], max(1, line['W' + true_column_suffix])
    ra = ral/raw
    rb = rbl/rbw
    a = ra/max(1,rb)
    if ra > rb :
        a = rb/max(1,ra)
    return 1 - (1 - a)/(1 - theta)

def position_score(line, theta, true_column_suffix):
    W = max(1, max(line['L'], line['L' + true_column_suffix]))
    H =  max(1, max(line['W'], line['W' + true_column_suffix]))
    X = abs(line['X'] - line['X' + true_column_suffix])
    Y = abs(line['Y'] - line['Y' + true_column_suffix])
    return 1 - avg(X/W/(1 - theta), Y/H/(1 - theta))

def prepare(users_rating, fn, fns):
    t_d = pd.read_csv(fn + '.csv')
    t_d = pd.merge(users_rating, t_d, on=['userId'], how='right')
    for i in list(map(lambda x: fn + x + '.csv', fns)):        
        t_d = pd.merge(pd.read_csv(i), t_d, on=['itemId'], how='right')       

    theta = .5        
    true_column_suffix = '_gd'
    for i in ['', true_column_suffix]:
        t_d['L' + i] = t_d['Xmax' + i] - t_d['Xmin' + i]
        t_d['W' + i] = t_d['Ymax' + i] - t_d['Ymin' + i]
        t_d['area' + i] = t_d['L' + i] * t_d['W' + i]        
        t_d['X' + i] = (t_d['Xmax' + i] + t_d['Xmin' + i])/2
        t_d['Y' + i] = (t_d['Ymax' + i] + t_d['Ymin' + i])/2
      
    t_d['size'] = t_d.apply(size_score, axis=1, theta=theta,\
                                          true_column_suffix=true_column_suffix)    
    t_d['shape'] = t_d.apply(shape_score, axis=1, theta=theta,\
                                          true_column_suffix=true_column_suffix)    
    t_d['position'] = t_d.apply(shape_score, axis=1, theta=theta,\
                                          true_column_suffix=true_column_suffix)   
    t_d['score'] = t_d[['size', 'shape', 'position']].apply(np.mean, axis=1)    
    t_d.to_csv(fn + '_full.csv', index=False)    

In [5]:
users_rating = pd.read_csv('users_rating.csv')
users_rating = users_rating.set_index('userId')

In [19]:
prepare(users_rating, 'test_data', ['_gd'])
prepare(users_rating, 'train_data', ['_gd'])