In [37]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import linear_model
from sklearn.isotonic import IsotonicRegression
from ideas import intersection_over_union
from ideas import bb_intersection_over_union
from sklearn.metrics import make_scorer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.model_selection import cross_val_score
from ideas import al
from ideas import aw
import math

In [84]:
columns = ['Xmin','Ymin','Xmax','Ymax']
box_columns = columns + ['L', 'W']
box_columns_gd = list(map(lambda x: x + '_gd', box_columns))
box_columns_fgd = list(map(lambda x: x + '_fgd', box_columns))
box_columns_iou = list(map(lambda x: x + '_iou', box_columns))

center_columns = [
#                   'Xcenter_iou', 'Ycenter_iou', 
#                   'Xcenter_gd', 'Ycenter_gd',
#                    'Xcenter', 'Ycenter',
#                   'XcenterL', 'YcenterW',
#                   'XcenterL_iou', 'YcenterW_iou',
#                   'XcenterL_gd', 'YcenterW_gd'
]
# ['XcenterDelta_iou', 'YcenterDelta_iou']
# 'Xratio', 'Yratio'
# 'box_iou', 'iou'
columns_ratio = []
# user_columns = ['XminError', 'XmaxError', 'YminError', 'YmaxError']
# ['XError', 'YError']
user_columns = []

box_columns_all = box_columns + box_columns_gd + center_columns + user_columns
box_columns_true = ['Xmin_true', 'Ymin_true', 'Xmax_true', 'Ymax_true']
box_columns_id = columns + ['itemId']

In [35]:
def weighted_averaging(iid, df, np_data, avg, columns, back_up = None):
    global box_columns
    items = df[df['itemId'] == iid]
    vi = np_data[items.index.values]
    bl = len(columns)
    avg.loc[iid][bl] = iid
    if vi.shape[0] == 0:
        bp = back_up[back_up['itemId'] == iid][box_columns]
        for y in range(0, bl):                
            avg.loc[iid][y] = bp[box_columns[y]]               
        return
    l = al(vi)
    w = aw(vi)
    sabx = (l * w) ** 2
    sb = sum(1 / sabx)    
    if math.isinf(sb):
        vi = vi.mean(axis=0)
        for y in range(0, bl):                
            avg.loc[iid][y] = vi[y]
    else:
        for y in range(0, bl):                
            avg.loc[iid][y] = sum(vi[:,y] / sabx) / sb
    
    return avg.loc[iid]

In [85]:
te_d = pd.read_csv('test_data_full.csv')
t_d = pd.read_csv('train_data_full.csv')
t_a = pd.read_csv('train_answers.csv')
t_d_a = t_d.merge(t_a, on="itemId")
ids = list(set(te_d['itemId']))
scaler = MinMaxScaler()
scaler_fit_d = t_d_a[box_columns_all].values
scaler_fit_d = np.append(scaler_fit_d, te_d[box_columns_all].values, axis = 0)
scaler.fit(scaler_fit_d)
t_d_a[box_columns_all] = scaler.transform(t_d_a[box_columns_all])
te_d[box_columns_all] = scaler.transform(te_d[box_columns_all])

In [4]:
def my_custom_loss_func(y_true, y_pred):
    iou = np.zeros(len(y_true))
    for i in range(0, len(y_pred)):
        iou[i] = bb_intersection_over_union(y_true.iloc[i].values, y_pred[i])                          
    res = iou.mean()
    return res

In [88]:
# for uid in uids:
rate = .2
X = t_d_a[box_columns_all + columns_ratio][t_d_a['iou'] > rate]
y = t_d_a[box_columns_true][t_d_a['iou'] > rate]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
# clf = linear_model.MultiTaskLassoCV(alphas=np.logspace(-4, 0, 5), 
#                                     eps=0.5, cv=4, max_iter=100000)
# clf = MultiOutputRegressor(linear_model.LogisticRegression(solver='lbfgs', 
#                                                            multi_class='auto',
#                                                            max_iter=1000))
# clf = linear_model.MultiTaskElasticNetCV(alphas=np.logspace(-4, 0, 5), cv=4, 
#                                          l1_ratio=0.8, max_iter=100000)
clf = linear_model.RidgeCV(alphas=np.logspace(-4, 0, 10), 
                           cv=4, scoring=make_scorer(my_custom_loss_func))
# clf = linear_model.LassoLarsCV()
clf.fit(X_train, y_train)
y_test_pred = clf.predict(X_test)
iou = np.zeros(len(y_test_pred))
for i in range(0, len(y_test_pred)):
    iou[i] = bb_intersection_over_union(y_test_pred[i], y_test.iloc[i].values)
print(iou.mean())    

0.5408622886335104


In [89]:
X_submit = te_d[box_columns_all + columns_ratio]
y_submit = clf.predict(X_submit)
y_submit_pd = pd.DataFrame(data=np.c_[y_submit, te_d['itemId']], columns=box_columns_id)
ids = list(set(y_submit_pd['itemId']))
answers = pd.DataFrame(np.zeros((len(ids), len(box_columns_id)), dtype=int), index=ids, columns=box_columns_id)
for iid in ids:
    weighted_averaging(iid, y_submit_pd, y_submit, answers, columns)
answers[['itemId'] + columns].to_csv('submit2401.csv', index=False, header=False)
data = answers.merge(pd.read_csv("submit-max.csv"), on=['itemId'])
data["iou"] = data[['Xmin','Ymin', 'Xmax', 'Ymax', 'Xmin_true',\
      'Ymin_true', 'Xmax_true','Ymax_true']].apply(intersection_over_union, axis=1)
data["iou"].mean()

0.7280086606052493