In [533]:
%run cv.ipynb

In [2]:
train = pd.read_csv(input_path + '/train_data.csv', sep=',')
test = pd.read_csv(input_path + '/test_data.csv', sep=',')
train_answers = pd.read_csv(input_path + '/train_answers.csv', sep=',')

In [505]:
class GeneticSearchModel(MyModel):
    def __init__(self, params={}):
        self.model_class = MinMaxModelEx2
        self.params = params
        self.sub_params = {
            'use_cache': True,
        }
    def fit(self, train, train_answers):
        self.train = train
        self.train_answers = train_answers
        self.population = Population(size=self.params['size'], items=[
            MutableFloatFactory(min_value=0.2, max_value=1.5),
            MutableFloatFactory(min_value=0, max_value=0.1),
            MutableFloatFactory(min_value=0.2, max_value=1.5),
            MutableFloatFactory(min_value=0, max_value=0.7),
            MutableFloatFactory(min_value=0, max_value=0.7),
            MutableFloatFactory(min_value=50, max_value=150),
            MutableFloatFactory(min_value=8, max_value=25),
            MutableFloatFactory(min_value=8, max_value=25),
            MutableFloatFactory(min_value=0, max_value=1),
            MutableFloatFactory(min_value=0, max_value=1),
            MutableFloatFactory(min_value=5, max_value=45),
            MutableFloatFactory(min_value=5, max_value=45),
            MutableFloatFactory(min_value=5, max_value=45),
            MutableFloatFactory(min_value=0, max_value=15),
            MutableFloatFactory(min_value=0, max_value=15),
        ], score_getter=lambda x: self.score_getter(x))
        for i in range(self.params['epochs']):
            self.population.do_epoch()
        
    def set_sub_params(self, params):
        self.sub_params['mult_dx'],\
        self.sub_params['mult_dx2'],\
        self.sub_params['mult_dd'],\
        self.sub_params['mult_dd2'],\
        self.sub_params['mult_dd3'],\
        self.sub_params['thr1'],\
        self.sub_params['thr2'],\
        self.sub_params['thr3'],\
        self.sub_params['c1'],\
        self.sub_params['c2'],\
        self.sub_params['p1'],\
        self.sub_params['p2'],\
        self.sub_params['p3'],\
        self.sub_params['p4'],\
        self.sub_params['p5']\
        = params
        
    def score_getter(self, params):
        self.set_sub_params(params)
        model = self.model_class(self.sub_params)
        model.fit(self.train, self.train_answers)
        return get_score(self.train_answers, model.predict(self.train)) 
    
    def predict(self, X):
        params = self.population.get_best_values()
        self.set_sub_params(params)
        model = self.model_class(self.sub_params)
        model.fit(self.train, self.train_answers)
        return model.predict(X)

In [534]:
# genetic

class LgbModel(MyModel):
    def get_X(self, data):
        return data[self.params['feats']]
    def fit(self, data):
        lgb_train = lgb.Dataset(self.get_X(data), data['target'])
        params = self.params.copy()
        num_boost_round = params['num_boost_round']
        del params['num_boost_round']
        del params['feats']
        params['objective'] = 'fair'
        self.model = lgb.train(params, lgb_train, num_boost_round=num_boost_round)
    def predict(self, data):
        return self.model.predict(self.get_X(data))
            
class MinMaxModelEx2(MinMaxModel2):
    def fit(self, train, train_answers):
        self.x1_targeter = lambda df: df.Xmin_min - df.Xmin_true # сколько нужно отнять от Xmin
        self.y2_targeter = lambda df: df.Ymax_max - df.Ymax_true
        
        h = get_df_hash(train)
        if self.params.get('use_cache', False):
            self.params['_fit_cache'] = self.params.get('_fit_cache', {})
            if h not in self.params['_fit_cache']:
                feats_x1 = create_df(create_features(train), train_answers, aggr=True, targeter=self.x1_targeter)
                feats_y2 = create_df(create_features(train), train_answers, aggr=True, targeter=self.y2_targeter)
                params = {
                    'boosting_type': 'gbdt',
                    'min_data_in_leaf': 100,
                    'lambda_l2': 0.5,
                    'num_leaves': 5,
                    'learning_rate': 0.007,
                    'feature_fraction': 1,
                    'bagging_fraction': 1,
                    'bagging_freq': 1,
                    'num_boost_round': 700,
                    'verbose': 0,
                }
                model_x1 = LgbModel(dict(params, feats=['Xmin_min']))
                model_x1.fit(feats_x1)
                model_y2 = LgbModel(dict(params, feats=['Xmin_min', 'Xmin_min2', 'width', 'usersCount', 'Ymin_min', 'ratio_mean']))
                model_y2.fit(feats_y2)
                self.params['_fit_cache'][h] = (model_x1, model_y2)
            else:
                model_x1, model_y2 = self.params['_fit_cache'][h]
        else:
            sdsdsdsd()
            
        self.model_x1 = model_x1
        self.model_y2 = model_y2
    def get_base_result(self, X):
        feats_x1 = create_df(create_features(X), None, aggr=True, targeter=self.x1_targeter)
        feats_y2 = create_df(create_features(X), None, aggr=True, targeter=self.y2_targeter)

        pred = pd.DataFrame({
            'itemId': feats_x1.itemId,
            'target_x1': self.model_x1.predict(feats_x1),
            'target_y2': self.model_y2.predict(feats_y2),
        })
        assert pred.shape[0] == feats_x1.shape[0]
        
        result = []
        for items in feats_x1.join(pred.set_index('itemId'), on='itemId').itertuples():
            x1 = items.Xmin_min
            y1 = items.Ymin_min
            x2 = items.Xmax_max
            y2 = items.Ymax_max
            x1_init = x1
            if x1 >= 70:
                x1 -= items.target_x1
                
            y2 -= items.target_y2 * 0.1

            result.append([items.itemId, x1, y1, x2, y2, x1_init, items.Xmax_max2 - items.Xmax_max])
        return result
        
    def predict(self, X):
        minmax_result = self.get_base_result_cached(X)
        result = []
        
        mult_dx = self.params.get('mult_dx')
        mult_dx2 = self.params.get('mult_dx2')
        mult_dd = self.params.get('mult_dd')
        mult_dd2 = self.params.get('mult_dd2')
        mult_dd3 = self.params.get('mult_dd3')
        thr1 = self.params.get('thr1')
        thr2 = self.params.get('thr2')
        thr3 = self.params.get('thr3')
        c1 = self.params.get('c1')
        c2 = self.params.get('c2')
        p1 = self.params.get('p1')
        p2 = self.params.get('p2')
        p3 = self.params.get('p3')
        p4 = self.params.get('p4')
        p5 = self.params.get('p5')
        
        for itemId, x1, y1, x2, y2, x1_orig, x1_delta in minmax_result:
            if x1_orig >= 70:
                x1 -= x2 * mult_dx2

            dx = (x2 - x1)*mult_dx
            dy = y2 - y1

            if dy > dx:
                dd = (dy - dx)
                y2 -= dd*mult_dd
                if x1 >= thr1:
                    x1 -= dd*c1
                else:
                    x2 += dd*c2
            else:
                dd = (dx - dy)
                y2 += dd*mult_dd2
                y1 -= dd*mult_dd3

            if y2 < x1*thr3:
                y2 += ((x1*thr3 - y2)/(x1*thr3)) * p1
                y1 -= ((x1*thr3 - y2)/(x1*thr3)) * p3  # wtf

            if y1 < x1*thr2:
                x1 -= ((x1*thr2 - y1)/(x1*thr2)) * p2

            if dy > 318:
                y2 -= (dy - 318) / 318 * p4

            v=450
            if dx/mult_dx > v:
                x2 -= (dx/mult_dx - v) / v * p5
                x1 += (dx/mult_dx - v) / v * p5

            result.append([itemId, x1, y1, x2, y2])
            
        return pd.DataFrame(result, columns=['itemId', 'Xmin', 'Ymin', 'Xmax', 'Ymax']) 

In [None]:
seed_everything(345522)
cross_validation(GeneticSearchModel({'epochs': 250, 'size': 70}), train, train_answers)

In [204]:
def create_features(df):
    res = df.copy()
    res['width'] = df.Xmax - df.Xmin
    res['height'] = df.Ymax - df.Ymin
    res['ratio'] = ((res.width - res.height) / (res.width + res.height)).fillna(1)
    
    to_join = []
    to_join.append(df.groupby('itemId').agg({'userId': 'count'}).rename(columns={'userId': 'usersCount'}))
    for j in to_join:
        res = res.join(j, how='left', on='itemId')
    return res
def create_df(feats, answers, aggr=True, targeter=None):
    if targeter is None:
        targeter = lambda res: res.Xmin_min - res.Xmin_true # сколько нужно отнять от Xmin
    
    item_stat = feats.groupby('itemId').agg({'Xmin': 'min'})
    item_stat.rename(columns={'Xmin': 'Xmin_item'}, inplace=True)
    
    res = feats.groupby('itemId' if aggr else ['itemId', 'userId']).apply(lambda a: pd.Series({
        'width': a.width.mean(),
        'height': a.height.mean(),
        'usersCount': a.usersCount.mean(),
        'X_range': a.Xmax.max() - a.Xmin.min(),
        'Xmin_min': a.Xmin.min(),
        'Xmin_min2': min2(a.Xmin),
        'Xmin_max': a.Xmin.max(),
        'Xmin_range': a.Xmin.max() - a.Xmin.min(),
        'Xmin_mean': a.Xmin.mean(),
        'Ymin_min': a.Ymin.min(),
        'Xmax_max': a.Xmax.max(),
        'Xmax_max2': max2(a.Xmax),
        'Ymax_max': a.Ymax.max(),
        'ratio_min': a.ratio.min(),
        'ratio_max': a.ratio.max(),
        'ratio_mean': a.ratio.mean(),
        'ratio_balance': (a.Xmax.max() - a.Xmin.min()) * 0.74 - (a.Ymax.max() - a.Ymin.min())
    }))
    res.reset_index(inplace=True)
    res = res.join(item_stat, on='itemId')
    
    if answers is not None:
        res = res.merge(answers, on='itemId')
        res['target'] = targeter(res)
        res.drop(['Xmin_true', 'Ymin_true', 'Xmax_true', 'Ymax_true'], 1, inplace=True)
    return res

create_df(create_features(train), train_answers, aggr=True).head(20)

Unnamed: 0,itemId,width,height,usersCount,X_range,Xmin_min,Xmin_min2,Xmin_max,Xmin_range,Xmin_mean,Ymin_min,Xmax_max,Xmax_max2,Ymax_max,ratio_min,ratio_max,ratio_mean,ratio_balance,Xmin_item,target
0,5,317.0,330.0,3.0,360.0,151.0,192.0,210.0,59.0,184.333333,914.0,511.0,510.0,1331.0,-0.134694,0.19403,-0.01173,-150.6,151,-47.0
1,10,328.666667,292.666667,3.0,355.0,72.0,95.0,96.0,24.0,87.666667,559.0,427.0,420.0,858.0,0.014925,0.09434,0.057222,-36.3,72,46.0
2,13,527.0,363.666667,3.0,575.0,46.0,69.0,94.0,48.0,69.666667,600.0,621.0,608.0,981.0,0.134948,0.21822,0.182376,44.5,46,-187.0
3,30,177.8,93.6,5.0,259.0,65.0,85.0,124.0,59.0,91.4,353.0,324.0,323.0,491.0,-0.333333,0.434903,0.180328,53.66,65,4.0
4,32,228.0,200.833333,6.0,379.0,9.0,17.0,42.0,33.0,28.166667,279.0,388.0,387.0,884.0,0.02381,0.08777,0.062097,-324.54,9,-21.0
5,48,102.666667,70.666667,3.0,113.0,50.0,52.0,55.0,5.0,52.333333,1029.0,163.0,158.0,1103.0,0.170455,0.208556,0.183662,9.62,50,-35.0
6,64,191.333333,135.666667,3.0,222.0,22.0,26.0,30.0,8.0,26.0,417.0,244.0,235.0,564.0,0.055351,0.218659,0.161273,17.28,22,1.0
7,78,506.5,488.0,4.0,568.0,49.0,75.0,120.0,71.0,85.75,194.0,617.0,590.0,707.0,-0.050104,0.107212,0.017273,-92.68,49,45.0
8,90,348.666667,288.666667,3.0,439.0,35.0,66.0,134.0,99.0,78.333333,269.0,474.0,428.0,608.0,0.046263,0.138781,0.08874,-14.14,35,-68.0
9,118,93.0,64.0,3.0,141.0,47.0,48.0,168.0,121.0,87.666667,1269.0,188.0,186.0,1372.0,-1.0,0.221239,-0.207674,1.34,47,1.0


In [623]:
class LgbModel(MyModel):
    def get_X(self, data):
        return data[self.params['feats']]
    def fit(self, data):
        lgb_train = lgb.Dataset(self.get_X(data), data['target'])
        params = self.params.copy()
        num_boost_round = params['num_boost_round']
        del params['num_boost_round']
        del params['feats']
        params['objective'] = 'fair'
        self.model = lgb.train(params, lgb_train, num_boost_round=num_boost_round)
    def predict(self, data):
        proba = self.model.predict(self.get_X(data))
        return proba
    
class KerasModel(MyModel):
    def get_X(self, data):
        return data[self.params['feats']].copy()
    def fit(self, data):
        params = self.params
        self.scaler = MyScaler()
        data = data.copy()
        self.scaler.fit_transform(data, inplace=True)
        X = self.get_X(data)
        y = data['target'].values
        
        model = keras.models.Sequential()
        self.model = model

        model.add(keras.layers.Dense(params['n1'], activation = "relu", input_shape=(X.shape[1], )))
        model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.LeakyReLU())

        model.add(keras.layers.Dropout(params['dropout'], noise_shape=None, seed=1))
        model.add(keras.layers.Dense(params['n2'], activation = "relu"))
        model.add(keras.layers.BatchNormalization())
        
        if 'n3' in params:
            model.add(keras.layers.Dropout(params['dropout'], noise_shape=None, seed=1))
            model.add(keras.layers.Dense(params['n3'], activation = "relu"))
            model.add(keras.layers.BatchNormalization())

        model.add(keras.layers.Dense(1, activation = "sigmoid"))
        
        if params['verbose'] > 0:
            model.summary()
        model.compile(
            optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
            loss = "mse",
            metrics = ["mse"]
        )
        results = model.fit(
            X, y,
            epochs=params['epochs'],
            batch_size=params.get('batch_size', 1024),
            verbose=params['verbose']
        )

    def predict(self, X):
        X = self.get_X(X)
        self.scaler.transform(X, inplace=True)
        res = pd.DataFrame({'target': self.model.predict(X)[:,0]})
        self.scaler.invert_transform(res, inplace=True)
        return res.target.values
            
        
class MinMaxModelEx(MyModel):
#     def x1_targeter_base(self, df):
#         res = []
#         for row in df.itertuples():
#             x1, y1, x2, y2 = self.do_magic(row.Xmin_min, row.Xmin_min, row.Ymin_min, row.Xmax_max, row.Ymax_max)
#             res.append(x1 - row.Xmin_true)
#         return np.array(res)
            
    def fit(self, train, train_answers):
#         user_ious = defaultdict(list)
#         for row in train.join(train_answers.set_index('itemId'), on='itemId').itertuples():
#             i = intersection_over_union([row.Xmin, row.Ymin, row.Xmax, row.Ymax,
#                                          row.Xmin_true, row.Ymin_true, row.Xmax_true, row.Ymax_true])
#             user_ious[row.userId].append(i)
#         self.bad_users = [userId for userId, ious in user_ious.items() if len(ious) > 3 and np.mean(ious) < 0.3]
        
        self.x1_targeter = lambda df: df.Xmin_min - df.Xmin_true # сколько нужно отнять от Xmin
        self.x2_targeter = lambda df: df.Xmax_max - df.Xmax_true
        self.y1_targeter = lambda df: df.Ymin_min - df.Ymin_true
        self.y2_targeter = lambda df: df.Ymax_max - df.Ymax_true
        feats_x1 = create_df(create_features(train), train_answers, aggr=True, targeter=self.x1_targeter)
        #feats_y1 = create_df(create_features(train), train_answers, aggr=True, targeter=self.y1_targeter)
        #feats_x2 = create_df(create_features(train), train_answers, aggr=True, targeter=self.x2_targeter)
        feats_y2 = create_df(create_features(train), train_answers, aggr=True, targeter=self.y2_targeter)
        params = {
            'boosting_type': 'gbdt',
            'min_data_in_leaf': 100,
            'lambda_l2': 0.5,
            'num_leaves': 5,
            'learning_rate': 0.007,
            'feature_fraction': 1,
            'bagging_fraction': 1,
            'bagging_freq': 1,
            'num_boost_round': 700,
            'verbose': 0,
        }
        keras_params = {
            'epochs': 30,
            'verbose': 0,
            'n1': 17,
            'n2': 17,
            'dropout': 0.1,
        }
        self.model_x1 = LgbModel(dict(params, feats=['Xmin_min']))
        #self.model_x1 = KerasModel(dict(params, feats=['Xmin_min', 'Ymin_min', 'Xmax_max', 'Ymax_max', 'usersCount',
        #                                              'ratio_mean']))
        self.model_x1.fit(feats_x1)
        
#         self.model_x2 = LgbModel(dict(params, feats=['Xmin_min', 'Xmin_min2', 'width', 'usersCount',
#                                                      'Ymin_min', 'ratio_mean']))
#         self.model_x2.fit(feats_x2)
        
#         self.model_y1 = LgbModel(dict(params, feats=['Xmin_min', 'width', 'usersCount',                                             'Ymin_min', 'ratio_mean']))
#         self.model_y1.fit(feats_y1)
                                      
        self.model_y2 = LgbModel(dict(params, feats=['Xmin_min', 'Xmin_min2', 'width', 'usersCount',
                                                     'Ymin_min', 'ratio_mean']))        
#         self.model_y2 = KerasModel(dict(keras_params, feats=['Xmin_min', 'Ymin_min', 'Xmax_max', 'Ymax_max',
#                                                              'width', 'usersCount', 'ratio_mean']))
        self.model_y2.fit(feats_y2)

        #self.model_x1_del = LgbModel(dict(params, feats=['Xmin_min', 'Xmin_min2', 'width', 'usersCount',
        #                                                'Ymin_min', 'ratio_mean'],
        #                                 num_leaves=8))
        #self.model_x1_del.fit(feats_x1_del)
    def do_magic(self, x1_orig, x1, y1, x2, y2, a):
        mult_dx,\
        mult_dd,\
        thr1,\
        thr2,\
        thr3,\
        c1,\
        c2,\
        p1,\
        p2 = 0.7415657456058256, 0.5640671461647451, 80.04833508987377, 17.020502776535906, 14.801316697593093, 0.06141877165368582, 0.36311996949549297, 20.650668482381576, 28.753285758777665

        mult_dd2 = 0.3
        p1 = 25
        p3 = 12
        p4 = 5
        p5 = 10
        mult_dx2 = 1.2 / 60
        mult_dd3 = 0.1

        
#         mult_dx,\
#         mult_dx2,\
#         mult_dd,\
#         mult_dd2,\
#         mult_dd3,\
#         thr1,\
#         thr2,\
#         thr3,\
#         c1,\
#         c2,\
#         p1,\
#         p2,\
#         p3,\
#         p4,\
#         p5 = 0.752466776455673, 0.012916260455715561, 0.5136272954795015, 0.23729916690030142, 0.09856159832958664, 83.83112212146243, 16.878227996320813, 12.242354362722258, 0.0979468628964354, 0.396297669909049, 44.48846959068099, 32.83262185010617, 10.785694180447328, 9.662475163107493, 12.999344805779131
        
        
        if x1_orig >= 70:
            x1 -= x2 * mult_dx2
        
        dx = (x2 - x1)*mult_dx
        dy = y2 - y1

        if dy > dx:
            dd = (dy - dx)
            y2 -= dd*mult_dd
            if x1 >= thr1:
                x1 -= dd*c1
            else:
                x2 += dd*c2
        else:
            dd = (dx - dy)
            y2 += dd*mult_dd2
            y1 -= dd*mult_dd3

        if y2 < x1*thr3:
            y2 += ((x1*thr3 - y2)/(x1*thr3)) * p1
            y1 -= ((x1*thr3 - y2)/(x1*thr3)) * p3  # wtf
            ##x1 -= ((x1*thr3 - y2)/(x1*thr3)) * p1

        if y1 < x1*thr2:
            x1 -= ((x1*thr2 - y1)/(x1*thr2)) * p2

        if dy > 318:
            y2 -= (dy - 318) / 318 * p4

        v=450
        if dx/mult_dx > v:
            x2 -= (dx/mult_dx - v) / v * p5
            x1 += (dx/mult_dx - v) / v * p5
            
        return x1, y1, x2, y2, a
        
    def predict(self, X):
        #X = X[~X.userId.isin(self.bad_users)]
        feats_x1 = create_df(create_features(X), None, aggr=True, targeter=self.x1_targeter)
        #feats_x2 = create_df(create_features(X), None, aggr=True, targeter=self.x2_targeter)
        #feats_y1 = create_df(create_features(X), None, aggr=True, targeter=self.y1_targeter)
        feats_y2 = create_df(create_features(X), None, aggr=True, targeter=self.y2_targeter)
        #feats_x1_del = create_df(create_features(X), None, aggr=False, targeter=self.x1_del_targeter)
#        assert all(feats_x1.itemId == feats_x2.itemId)
        pred = pd.DataFrame({
            'itemId': feats_x1.itemId,
            'target_x1': self.model_x1.predict(feats_x1),
            'target_y2': self.model_y2.predict(feats_y2),
            #'target_x2': self.model_y2.predict(feats_x2),
            #'target_y1': self.model_y1.predict(feats_y1),
        })
        assert pred.shape[0] == feats_x1.shape[0]  
#         pred_no_aggr = pd.DataFrame({
#             'itemId': feats_x1_del.itemId,
#             'userId': feats_x1_del.userId,
#             'target_x1_del': self.model_x1_del.predict(feats_x1_del),
#         })
#        pred_no_aggr = pred_no_aggr.groupby('userId').agg({'target_x1_del': 'mean'}).reset_index()
        
        result = []
        a=0
        for items in feats_x1.join(pred.set_index('itemId'), on='itemId').itertuples():
            x1 = items.Xmin_min
            y1 = items.Ymin_min
            x2 = items.Xmax_max
            y2 = items.Ymax_max
            x1_orig = x1
            
            if x1_orig >= 70:
                x1 -= items.target_x1
            
            y2 -= items.target_y2 * 0.1
            #x2 -= items.target_x2 * 0.1
            #y1 -= items.target_y1 * 0.1
            
            x1, y1, x2, y2, a = self.do_magic(x1_orig, x1, y1, x2, y2, a)
            
            result.append([items.itemId, x1, y1, x2, y2])
        #print('{}/{}'.format(a, len(result)))
        return pd.DataFrame(result, columns=['itemId', 'Xmin', 'Ymin', 'Xmax', 'Ymax']) 

In [624]:
seed_everything(34332)
cross_validation(MinMaxModelEx(), train, train_answers)

KFold(n_splits=5, random_state=2707, shuffle=True)
 0 - 1 : 0.6081, mean=0.6081
 0 - 2 : 0.6074, mean=0.6078
 0 - 3 : 0.5853, mean=0.6003
 0 - 4 : 0.5910, mean=0.5980
 0 - 5 : 0.6016, mean=0.5987
KFold(n_splits=5, random_state=2708, shuffle=True)
 1 - 1 : 0.5896, mean=0.5972
 1 - 2 : 0.5979, mean=0.5973
 1 - 3 : 0.5952, mean=0.5970
 1 - 4 : 0.6098, mean=0.5985
 1 - 5 : 0.6002, mean=0.5986
KFold(n_splits=5, random_state=2709, shuffle=True)
 2 - 1 : 0.6057, mean=0.5993
 2 - 2 : 0.5996, mean=0.5993
 2 - 3 : 0.6011, mean=0.5994
 2 - 4 : 0.5983, mean=0.5993
 2 - 5 : 0.5877, mean=0.5986
KFold(n_splits=5, random_state=2710, shuffle=True)
 3 - 1 : 0.5857, mean=0.5978
 3 - 2 : 0.5946, mean=0.5976
 3 - 3 : 0.6029, mean=0.5979
 3 - 4 : 0.6179, mean=0.5989
 3 - 5 : 0.5904, mean=0.5985
KFold(n_splits=5, random_state=2711, shuffle=True)
 4 - 1 : 0.6030, mean=0.5987
 4 - 2 : 0.5963, mean=0.5986
 4 - 3 : 0.6083, mean=0.5990
 4 - 4 : 0.5738, mean=0.5980
 4 - 5 : 0.6107, mean=0.5985


0.598492346450105

In [625]:
seed_everything(34332)
model = MinMaxModelEx()
#model = GeneticSearchModel({'epochs': 600, 'size': 150})
model.fit(train, train_answers)
res_test = model.predict(test).sort_values(by='itemId')
res_test.head(10)

Unnamed: 0,itemId,Xmin,Ymin,Xmax,Ymax
0,18,71.044994,632.456995,758.833933,1132.913198
1,19,27.0,559.0,155.458869,670.993657
2,33,21.0,377.9912,414.0,657.373737
3,62,38.422222,823.161669,635.577778,1264.126661
4,114,35.732554,570.0,243.953564,733.332823
5,146,13.125421,211.0,799.40418,858.441111
6,156,54.599154,801.659864,163.943405,880.701859
7,163,17.747588,317.0,214.63918,461.348492
8,164,4.0,146.0,485.72798,538.821533
9,179,58.69145,485.49169,330.0,709.306875


In [626]:
with open(output_path + '/res.txt', 'w') as out:
    lines = res_test.apply(lambda x: '%d,%f,%f,%f,%f' % (x.itemId, x.Xmin, x.Ymin, x.Xmax, x.Ymax), axis=1).values 
    out.write('\n'.join(lines) + '\n')