In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import linear_model
from sklearn.isotonic import IsotonicRegression
from ideas import intersection_over_union
from ideas import bb_intersection_over_union
from sklearn.metrics import make_scorer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.model_selection import cross_val_score
from ideas import al
from ideas import aw
import math

In [2]:
columns = ['Xmin','Ymin','Xmax','Ymax']
box_columns = columns
box_columns_gd = list(map(lambda x: x + '_gd', box_columns))
box_columns_fgd = list(map(lambda x: x + '_fgd', box_columns))
box_columns_iou = list(map(lambda x: x + '_iou', box_columns))

center_columns = ['L', 'W', 'Xcenter', 'Ycenter']
# 'L', 'W', 'Xcenter', 'Ycenter'
l, w, xc, yc = 0,1,2,3 
center_columns_gd = list(map(lambda x: x + '_gd', center_columns))
center_columns_gd = list(map(lambda x: x + '_iou', center_columns))

# ['XcenterDelta_iou', 'YcenterDelta_iou']
# 'Xratio', 'Yratio'
# 'box_iou', 'iou'
columns_ratio = []
# user_columns = ['XminError', 'XmaxError', 'YminError', 'YmaxError']
# ['XError', 'YError']
user_columns = []

box_columns_all = box_columns + box_columns_gd + user_columns
center_columns_all = center_columns + box_columns_gd + user_columns
center_columns_true = list(map(lambda x: x + '_true', center_columns))
scaler_columns = list(set(box_columns_all + center_columns_all))
box_columns_true = list(map(lambda x: x + '_true', columns))
box_columns_id = columns + ['itemId']
box_test_columns = list(map(lambda x: x + '_test', columns))
box_test_columns_id = box_test_columns + ['itemId']

In [3]:
def weighted_averaging(iid, df, np_data, avg, columns, back_up = None):
    global box_columns
    items = df[df['itemId'] == iid]
    vi = np_data[items.index.values]
    bl = len(columns)
    avg.loc[iid][bl] = iid
    if vi.shape[0] == 0:
        bp = back_up[back_up['itemId'] == iid][box_columns]
        for y in range(0, bl):                
            avg.loc[iid][y] = bp[box_columns[y]]               
        return
    l = al(vi)
    w = aw(vi)
    sabx = (l * w) ** 2
    sb = sum(1 / sabx)    
    if math.isinf(sb):
        vi = vi.mean(axis=0)
        for y in range(0, bl):                
            avg.loc[iid][y] = vi[y]
    else:
        for y in range(0, bl):                
            avg.loc[iid][y] = sum(vi[:,y] / sabx) / sb
    
    return avg.loc[iid]

def add_center_columns(df):
    df['L_true'] = df['Xmax_true'] - df['Xmin_true']
    df['W_true'] = df['Ymax_true'] - df['Ymin_true'] 
    df['Xcenter_true'] = (df['Xmax_true'] + df['Xmin_true'])/2
    df['Ycenter_true'] = (df['Ymax_true'] + df['Ymin_true'])/2    
    return df

In [73]:
te_d = pd.read_csv('test_data_full.csv')
t_d = pd.read_csv('train_data_full.csv')
t_a = add_center_columns(pd.read_csv('train_answers.csv'))
removeIds = []
t_d_a = t_d.merge(t_a, on="itemId")
t_d_a = t_d_a.drop(t_d_a[t_d_a['itemId'].isin(removeIds)].index)
ids = list(set(te_d['itemId']))
scaler = MinMaxScaler()
scaler_fit_d = t_d_a[scaler_columns].values
scaler_fit_d = np.append(scaler_fit_d, te_d[scaler_columns].values, axis = 0)
scaler.fit(scaler_fit_d)
t_d_a[scaler_columns] = scaler.transform(t_d_a[scaler_columns])
te_d[scaler_columns] = scaler.transform(te_d[scaler_columns])

In [5]:
def my_custom_loss_func(y_true, y_pred):
    iou = np.zeros(len(y_true))
    for i in range(0, len(y_pred)):
        iou[i] = bb_intersection_over_union(y_true.iloc[i].values, y_pred[i])                          
    res = iou.mean()
    return res

In [87]:
box_rate = 10
kf = KFold(n_splits=5, random_state = 9)
clf = linear_model.RidgeCV(alphas=np.logspace(-4, 0, 10),
                           cv=kf, scoring=make_scorer(my_custom_loss_func))
X = t_d_a[box_columns_all + columns_ratio][t_d_a['area'] > box_rate]
y = t_d_a[t_d_a['area'] > box_rate]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
clf.fit(X_train, y_train[box_columns_true])
y_test_pred = clf.predict(X_test)
iou = np.zeros(len(y_test_pred))

y_test_pred_pd = pd.DataFrame(data=np.c_[y_test_pred, y_test['itemId']], columns=box_columns_id)
y_test_pred_pd['itemId'] = pd.to_numeric(y_test_pred_pd['itemId'], downcast='integer')
data = y_test[box_columns_true + ['itemId']].merge(y_test_pred_pd, on=['itemId'])
data["iou"] = data[box_columns + box_columns_true].apply(intersection_over_union, axis=1)
print(data["iou"].mean())

0.5713057315820744


In [78]:
ids = list(set(data['itemId']))
print(set(data[data['iou'] < .1]['itemId']))
# print(data[data['itemId']  == 25135])

{22483, 29012, 25142, 35738, 13855}


In [89]:
center_rate = 10
kf = KFold(n_splits=4, random_state = 9)
clf_center = linear_model.MultiTaskLassoCV(alphas=np.logspace(-4, 0, 10), cv=kf)
X = t_d_a[center_columns_all + columns_ratio][t_d_a['area'] > center_rate]
y = t_d_a[t_d_a['area'] > center_rate]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
clf_center.fit(X_train, y_train[center_columns_true])
y_test_pred = clf_center.predict(X_test)
y_test_pred = np.c_[y_test_pred, np.zeros(y_test_pred.shape)]
# Xmin, Ymin, Xmax, Ymax
y_test_pred[:, 4] = y_test_pred[:, xc] - y_test_pred[:, l]/2 
y_test_pred[:, 5] = y_test_pred[:, yc] - y_test_pred[:, w]/2
y_test_pred[:, 6] = y_test_pred[:, xc] + y_test_pred[:, l]/2
y_test_pred[:, 7] = y_test_pred[:, yc] + y_test_pred[:, w]/2
y_test_pred = y_test_pred[:, 4:]
y_test_pred_pd = pd.DataFrame(data=np.c_[y_test_pred, y_test['itemId']], columns=box_test_columns_id)
y_test_pred_pd['itemId'] = pd.to_numeric(y_test_pred_pd['itemId'], downcast='integer')
data = y_test[box_columns_true + ['itemId']].merge(y_test_pred_pd, on=['itemId'])
data["iou"] = data[box_test_columns + box_columns_true].apply(intersection_over_union, axis=1)
print(data["iou"].mean())

0.5716842630520145


In [82]:
ids = list(set(data['itemId']))
print(set(data[data['iou'] < .2]['itemId']))

{31749, 4038, 32551, 1095, 17191, 8398, 7761, 22483, 29012, 25142, 11258, 13855}


In [90]:
X_submit_box = te_d[box_columns_all + columns_ratio]
y_submit_box = clf.predict(X_submit_box)
X_submit_center = te_d[center_columns_all + columns_ratio]
y_submit_center = clf_center.predict(X_submit_center)
# Xmin, Ymin, Xmax, Ymax
y_submit_center = np.c_[y_submit_center, np.zeros(y_submit_center.shape)]
y_submit_center[:, 4] = y_submit_center[:, xc] - y_submit_center[:, l]/2 
y_submit_center[:, 5] = y_submit_center[:, yc] - y_submit_center[:, w]/2
y_submit_center[:, 6] = y_submit_center[:, xc] + y_submit_center[:, l]/2
y_submit_center[:, 7] = y_submit_center[:, yc] + y_submit_center[:, w]/2
y_submit_center = y_submit_center[:, 4:]

y_submit_pd_box = pd.DataFrame(data=np.c_[y_submit_box, te_d['itemId']], columns=box_columns_id)
y_submit_pd_box['itemId'] = pd.to_numeric(y_submit_pd_box['itemId'], downcast='integer')
y_submit_pd_center = pd.DataFrame(data=np.c_[y_submit_center, te_d['itemId']], columns=box_columns_id)
y_submit_pd_center['itemId'] = pd.to_numeric(y_submit_pd_center['itemId'], downcast='integer')

In [91]:
print(y_submit_pd_box[y_submit_pd_box['itemId'] == 4099])
print(y_submit_pd_center[y_submit_pd_center['itemId'] == 4099])

        Xmin        Ymin        Xmax         Ymax  itemId
0  79.905174  842.300529  386.082872  1109.564785    4099
1  79.732177  843.882775  382.166302  1109.610940    4099
2  80.604491  844.746830  383.652503  1109.999726    4099
        Xmin        Ymin        Xmax         Ymax  itemId
0  80.424011  843.462482  387.047622  1110.294818    4099
1  79.688756  844.498842  383.853074  1109.357176    4099
2  80.882680  845.227254  385.721165  1109.492565    4099


In [95]:
result = pd.concat([y_submit_pd_box, y_submit_pd_center])
print(result.shape)
# ids = list(set(y_submit_pd['itemId']))
# answers = pd.DataFrame(np.zeros((len(ids), len(box_columns_id)), dtype=int), index=ids, columns=box_columns_id)
# for iid in ids:
#     weighted_averaging(iid, y_submit_pd, y_submit, answers, columns)
answers = result.groupby('itemId').mean().reset_index()
answers[['itemId'] + columns].to_csv('submit2504.csv', index=False, header=False)
data = answers.merge(pd.read_csv("submit-max.csv"), on=['itemId'])
data["iou"] = data[['Xmin','Ymin', 'Xmax', 'Ymax', 'Xmin_true',\
      'Ymin_true', 'Xmax_true','Ymax_true']].apply(intersection_over_union, axis=1)
data["iou"].mean()

(7230, 5)


0.9535161478033032