In [4]:
import numpy as np
import math
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.path import Path
import matplotlib.patches as patches
from ideas import intersection_over_union
from ideas import al
from ideas import aw

box_columns = ['Xmin','Ymin','Xmax','Ymax', 'W', 'L']
box_columns_gd = list(map(lambda x: x + '_gd', box_columns))
box_columns_fgd = list(map(lambda x: x + '_fgd', box_columns))
box_columns_iou = list(map(lambda x: x + '_iou', box_columns))

def weighted_averaging(iid, df, np_data, avg, columns, back_up = None):
    global box_columns
    items = df[df['itemId'] == iid]
    vi = np_data[items.index.values]
    bl = len(columns)
    avg.loc[iid][bl] = iid
    if vi.shape[0] == 0:
        bp = back_up[back_up['itemId'] == iid][box_columns]
        for y in range(0, bl):                
            avg.loc[iid][y] = bp[box_columns[y]]               
        return
    l = al(vi)
    w = aw(vi)
    sabx = (l * w) ** 2
    sb = sum(1 / sabx)    
    if math.isinf(sb):
        vi = vi.mean(axis=0)
        for y in range(0, bl):                
            avg.loc[iid][y] = vi[y]
    else:
        for y in range(0, bl):                
            avg.loc[iid][y] = sum(vi[:,y] / sabx) / sb
    
    return avg.loc[iid]

codes = [
    Path.MOVETO,
    Path.LINETO,
    Path.LINETO,
    Path.LINETO,
    Path.CLOSEPOLY
]

def weighted_averaging_file(fn, columns, fns, ratio, ratio_column, u_r = None):
    global box_columns
    t_d = pd.read_csv(fn + '.csv')  
    if u_r is not None:
        t_d = t_d.merge(users_rating, on=['userId'])
    ids = list(set(t_d['itemId']))
    t_d['L'] = t_d['Xmax'] - t_d['Xmin']
    t_d['W'] = t_d['Ymax'] - t_d['Ymin']
    t_d['area'] = t_d['L']*t_d['W']
    t_d_max = t_d[['area', 'itemId']].groupby('itemId').max()
    t_d = t_d.merge(t_d_max, on=['itemId'])
    t_d['area_ratio'] = t_d['area_x']/t_d['area_y']
    t_d_r = t_d[t_d[ratio_column] >= ratio][box_columns + ['itemId']]
    t_d_r = t_d_r.reset_index()
    np_t_d = t_d_r[box_columns].values
    t_d_wa = pd.DataFrame(np.zeros((len(ids), len(columns) + 1), dtype=int), 
                           index=ids, columns=[columns + ['itemId']])

    back_up = t_d.groupby('itemId')[box_columns].mean().reset_index()
    for iid in ids:
        weighted_averaging(iid, t_d_r, np_t_d, t_d_wa, columns, back_up)
        
    t_d_wa.to_csv(fn + fns + '.csv', index=False)

In [7]:
users_rating = pd.read_csv('users_rating.csv')
users_rating = users_rating.set_index('userId') 
weighted_averaging_file('train_data', box_columns_fgd, '_fgd', 0, 'area_ratio')
weighted_averaging_file('train_data', box_columns_gd, '_gd', .75, 'area_ratio')
weighted_averaging_file('train_data', box_columns_iou, '_iou', .5, 'iou', users_rating)
weighted_averaging_file('test_data', box_columns_gd, '_gd', .75, 'area_ratio')
weighted_averaging_file('test_data', box_columns_fgd, '_fgd', 0, 'area_ratio')
weighted_averaging_file('test_data', box_columns_iou, '_iou', .5, 'iou', users_rating)

