In [1]:
import pandas as pd
import numpy as np

import datetime

from tqdm._tqdm_notebook import tqdm_notebook

tqdm_notebook.pandas()

import xgboost as xgb 

from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit, train_test_split
from sklearn.feature_selection import f_regression, mutual_info_regression
from sklearn.metrics import mean_squared_error

import graphviz

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def fillnan(data, years_c):
    for name in years_c:
        #заменяем троеточия
        data[name] = data[name].apply(lambda x: np.nan if x==('..') else float(x))

    return data

def dropempt(data, years_c):
    data = fillnan(data, years_c)
    return data.dropna(thresh=10)

def Clearing(data):
    years_c = [item for ind, item in enumerate(np.array(data.columns)) if item not in ['Country Name', 'Country Code','Series Code','Series Name']]
    print ('Before clear:', len(data))

    data = dropempt(data, years_c)

    print ('After clear:', len(data))
    return data

In [3]:
def Make_region(code, reg='region', by = 'name'):
    #print code
    if code in data_cnt[by].values:
        #print code
        name = (data_cnt[by][data_cnt[by] == code].index.tolist())[0]
        return data_cnt[reg].at[name]
    else:
        return np.nan

def Sorting(data, reg='region'):
    #if data['Country Name'].get_value(len(data)-5) == np.nan:
    #    data=data.drop(data.index[[range(len(data)-5, len(data))]]).reset_index()
    if 'Region' not in data.columns:
        data['Region'] = data['Country Name'].progress_apply(lambda x: Make_region(x, reg))
        data['Region'].dropna()
        return data.reset_index()
    else:
        return data

In [4]:
df = pd.read_excel('data/Data_Extract_From_Gender_Statistics.xlsx', encoding = 'utf8').\
            append(pd.read_excel('data/Data_Extract_From_Health_Nutrition_and_Population_Statistics.xlsx', encoding='utf8')).\
            append(pd.read_excel('data/Data_Extract_From_Millennium_Development_Goals.xlsx', encoding='utf8')).\
            append(pd.read_excel('data/Data_Extract_From_Health_Nutrition_and_Population_Statistics_by_Wealth_Quintile.xlsx',encoding='utf8'))

data_cnt = pd.read_csv('all.csv', encoding='utf8')
df = Clearing(df)
defen = pd.read_excel('data/Data_Extract_From_Gender_Statistics.xlsx', sheet_name=1, encoding = 'utf8').\
               append(pd.read_excel('data/Data_Extract_From_Health_Nutrition_and_Population_Statistics.xlsx', sheet_name=1, encoding='utf8')).\
               append(pd.read_excel('data/Data_Extract_From_Millennium_Development_Goals.xlsx', sheet_name=1, encoding='utf8'))

df = Sorting(df, 'sub-region')
df.drop_duplicates(inplace = True)
df.drop(columns = ['index','2016 [YR2016]'], inplace=True)

Before clear: 398541
After clear: 104445


HBox(children=(IntProgress(value=0, max=104445), HTML(value='')))




In [7]:
def Testing_grid_xgb(X_train, Y_train):
    cv = TimeSeriesSplit(n_splits=8)
    clf = xgb.XGBRegressor()        
    parameters = {
        'scale_pos_weight' : [1, 1.5, 1.2],
        'objective':['reg:linear'],
        'learning_rate': [0.05, 0.1, 0.2, 0.3],
        'max_depth': [3 ,4 ,6, 5],
        'min_child_weight': [2, 3 ,4, 5],
        'max_delta_step': [0, 0.2, 0.8],
        'reg_lambda': [0.5, 0.8, 0],
        'reg_alpha': [0.5, 0.8, 0],
        'silent': [1],
        'subsample': [0.2, 0.05, 0.3, 0.4],
        'colsample_bytree': [0.7, 0.8, 1],
        'n_estimators': [100 ,250, 450],
        'base_score' : [0.5, 1],
        'gamma': [0, 0.5, 1]
    }
    
    start_time = datetime.datetime.now()
    gs = RandomizedSearchCV(clf, scoring='neg_mean_squared_error', param_distributions=parameters, 
                      cv=cv, n_jobs = -1, verbose=True,  n_iter=10000)
    gs.fit(X_train,Y_train)
    print ('Time elapsed:', datetime.datetime.now() - start_time)
    
    means = gs.cv_results_['mean_test_score']
    
    print (max(means))
    return gs.best_params_

In [8]:
def makexy(df, country = 'RUS'):
    #Years
    years_c = [item for ind, item in enumerate(np.array(df.columns)) if item not in ['Region','Country Name', 'Country Code','Series Code','Series Name', 'index']]
    
    #split country
    df = df[(df['Country Code']== country)].copy()
    df.drop_duplicates(inplace = True)
    
    y = df[df['Series Name'] == 'Life expectancy at birth, total (years)'][years_c].interpolate(axis=1).T
    
    if y.shape[1] == 1:
        y.columns = ['Life expectancy at birth, total (years)']
    else:
        return pd.DataFrame(), pd.DataFrame()
    
    #Transponenting and then adding 
    mask = ((df['Series Name'] != 'Life expectancy at birth, male (years)') & 
            (df['Series Name'] != 'Life expectancy at birth, female (years)') &
            (df['Series Name'] != 'Life expectancy at birth, total (years)'))
    
    X = df[mask][years_c].T
    X.columns = df[(df['Country Code']== country) & mask]['Series Name']
    X.reset_index(inplace = True, drop = True)
    del df
    
    X['Years'] = [int(name.split()[0]) for name in years_c]
    X['Region_code'] = int(Make_region(country,'region-code', by = 'alpha-3'))
    X['Reion_sub_code'] = int(Make_region(country, 'sub-region-code', 'alpha-3'))
    X['Country_code'] = int(Make_region(country, 'country-code', 'alpha-3'))
    return X.reset_index(drop = True).fillna(method='pad'), y.reset_index(drop = True).fillna(method='pad')

def train_set(df, region = 'Eastern Europe'):
    x_train, y_train = pd.DataFrame(), pd.DataFrame()
    for country in df[df['Region'] == region]['Country Code'].unique():
        tmp = makexy(df, country)
        x_train = pd.concat([tmp[0], x_train], axis=0).fillna(-999)
        y_train = pd.concat([tmp[1], y_train], axis=0).fillna(-999)
    return x_train.as_matrix(), y_train.as_matrix(), x_train.columns, x_train
    

In [7]:
def Resmaking(df):
    for region in tqdm_notebook(df['Region'].dropna().unique()):
        tmp = train_set(df, region)
        X_train, X_test, y_train, y_test = train_test_split(tmp[0], tmp[1], test_size=0.25,
                                                            random_state=42)
        p = Testing_grid_xgb(X_train, y_train)
        
        model = xgb.XGBRegressor(max_depth=p['max_depth'], learning_rate = p['learning_rate'], 
                                 n_estimators = p['n_estimators'], objective = p['objective'], 
                                 gamma = p['gamma'], min_child_weight = p['min_child_weight'], 
                                 max_delta_step = p['max_delta_step'], subsample = p['subsample'], 
                                 colsample_bytree = p['colsample_bytree'], reg_alpha = p['reg_alpha'], 
                                 reg_lambda = p['reg_lambda'], scale_pos_weight = p['scale_pos_weight'], 
                                 base_score = p['base_score'])
        model.fit(X_train, y_train)
        
        print ('MSE : {0:f}%, \nIn {1:s}'.format(100-mean_squared_error(model.predict(X_test), y_test), region))
        
        model.fit(tmp[0], tmp[1])
        
        rez = pd.DataFrame(model.feature_importances_, index = tmp[2]).sort_values(by = [0], ascending=False)
        rez[rez[0]!=0].to_excel('Importance/Varible_reg_in_'+str(region)+'.xlsx')
        
        rez = pd.concat([pd.DataFrame(f_regression(tmp[0], tmp[1].ravel())[0], columns=['F-value'], index=tmp[2]),
                         pd.DataFrame(f_regression(tmp[0], tmp[1].ravel())[1], columns=['p-value'], index=tmp[2])], axis=1)
        rez['p-value'] = rez['p-value'].apply(lambda x: round(x, 5))
        
        rez[rez['p-value']<0.01].sort_values(by = ['F-value'], ascending=False).to_excel('Importance/Varible_f_reg_in_'+str(region)+'.xlsx')
        
        rez = pd.DataFrame(mutual_info_regression(tmp[0], tmp[1].ravel()),
                           index = tmp[2]).sort_values(by = [0], ascending=False)
        rez[rez[0]!=0].to_excel('Importance/Varible_muatal_reg_in_'+str(region)+'.xlsx')

In [8]:
Resmaking(df)

HBox(children=(IntProgress(value=0, max=22), HTML(value='')))

Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 691 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 1191 tasks      | elapsed:   18.2s
[Parallel(n_jobs=-1)]: Done 1891 tasks      | elapsed:   28.3s
[Parallel(n_jobs=-1)]: Done 2791 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done 3891 tasks      | elapsed:   56.0s
[Parallel(n_jobs=-1)]: Done 5191 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 6691 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 8391 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 10291 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 12391 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 14691 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 17191 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 19891 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 22791 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 25891 tasks      | elap

Time elapsed: 0:17:33.847840
-1.827964772277412
MSE : 99.643631%, 
In Southern Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 936 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 2436 tasks      | elapsed:   18.2s
[Parallel(n_jobs=-1)]: Done 4536 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 7236 tasks      | elapsed:   56.3s
[Parallel(n_jobs=-1)]: Done 10536 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 14436 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 18936 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 24036 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 29736 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 36036 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 42936 tasks      | elapsed:  5.7min
[Parallel(n_jobs=-1)]: Done 50436 tasks      | elapsed:  6.7min
[Parallel(n_jobs=-1)]: Done 58536 tasks      | elapsed:  7.8min
[Parallel(n_jobs=-1)]: Done 67236 tasks      | elapsed:  9.0min
[Parallel(n_jobs=-1)]: Done 76536 tasks      | 

Time elapsed: 0:10:40.901152
-0.9859103297383882
MSE : 99.729860%, 
In Northern Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 588 tasks      | elapsed:    5.6s
[Parallel(n_jobs=-1)]: Done 1588 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 2988 tasks      | elapsed:   27.7s
[Parallel(n_jobs=-1)]: Done 4788 tasks      | elapsed:   44.3s
[Parallel(n_jobs=-1)]: Done 6988 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 9588 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 12588 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 15988 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 19788 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 23988 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 28588 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 33588 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 38988 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 44788 tasks      | elapsed:  6.8min
[Parallel(n_jobs=-1)]: Done 50988 tasks      | el

Time elapsed: 0:12:03.226434
-3.938847076268569
MSE : 99.255567%, 
In Middle Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 582 tasks      | elapsed:   11.4s
[Parallel(n_jobs=-1)]: Done 1332 tasks      | elapsed:   24.2s
[Parallel(n_jobs=-1)]: Done 2382 tasks      | elapsed:   42.4s
[Parallel(n_jobs=-1)]: Done 2923 tasks      | elapsed:   53.9s
[Parallel(n_jobs=-1)]: Done 3742 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 4476 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 5976 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 7676 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 9576 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 11676 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 13976 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 16476 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 19176 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 22076 tasks      | elapsed:  6.8min
[Parallel(n_jobs=-1)]: Done 24775 tasks      | elaps

Time elapsed: 0:24:47.836603
-0.4382147285494618
MSE : 99.878580%, 
In South America


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 328 tasks      | elapsed:    6.3s
[Parallel(n_jobs=-1)]: Done 828 tasks      | elapsed:   15.5s
[Parallel(n_jobs=-1)]: Done 1528 tasks      | elapsed:   28.7s
[Parallel(n_jobs=-1)]: Done 2428 tasks      | elapsed:   48.7s
[Parallel(n_jobs=-1)]: Done 3223 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 4267 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 5509 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 6359 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 7309 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 8359 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 9509 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 11105 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 13674 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 15124 tasks      | elapsed:  5.2min
[Parallel(n_jobs=-1)]: Done 16872 tasks      | elapsed:

Time elapsed: 0:27:33.479854
-0.7421142978511441
MSE : 99.957490%, 
In Western Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1240 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 3240 tasks      | elapsed:   18.5s
[Parallel(n_jobs=-1)]: Done 6040 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done 9640 tasks      | elapsed:   55.9s
[Parallel(n_jobs=-1)]: Done 14040 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 19240 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 25240 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 32040 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 39640 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 48040 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 57240 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 67240 tasks      | elapsed:  6.5min
[Parallel(n_jobs=-1)]: Done 78040 tasks      | elapsed:  7.5min
[Parallel(n_jobs=-1)]: Done 80000 out of 80000 | elapsed:  7.7min finished


Time elapsed: 0:07:42.064056
-0.1326339085449199
MSE : 99.836872%, 
In Australia and New Zealand


  corr /= X_norms
  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 730 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 1233 tasks      | elapsed:   16.8s
[Parallel(n_jobs=-1)]: Done 1933 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done 2833 tasks      | elapsed:   39.2s
[Parallel(n_jobs=-1)]: Done 3933 tasks      | elapsed:   57.1s
[Parallel(n_jobs=-1)]: Done 5233 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 6733 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 8433 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 10333 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 12433 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 14733 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 17233 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done 19933 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 22833 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 25933 tasks      | elap

Time elapsed: 0:19:32.529148
-0.5152916047610381
MSE : 99.768292%, 
In Caribbean


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 1009 tasks      | elapsed:   11.2s
[Parallel(n_jobs=-1)]: Done 1968 tasks      | elapsed:   22.9s
[Parallel(n_jobs=-1)]: Done 2668 tasks      | elapsed:   31.4s
[Parallel(n_jobs=-1)]: Done 3568 tasks      | elapsed:   42.4s
[Parallel(n_jobs=-1)]: Done 4668 tasks      | elapsed:   56.7s
[Parallel(n_jobs=-1)]: Done 5968 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 7468 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 9168 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 11068 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 13168 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 15468 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 17968 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 20668 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done 23568 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 26668 tasks      | ela

Time elapsed: 0:16:23.253373
-0.954385163073052
MSE : 99.664282%, 
In Eastern Europe


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 639 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 1389 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 2439 tasks      | elapsed:   30.4s
[Parallel(n_jobs=-1)]: Done 3789 tasks      | elapsed:   45.8s
[Parallel(n_jobs=-1)]: Done 5439 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 7389 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 9639 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 12189 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 15039 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 18189 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 21639 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 25389 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 29439 tasks      | elapsed:  6.0min
[Parallel(n_jobs=-1)]: Done 33789 tasks      | elapsed:  6.9min
[Parallel(n_jobs=-1)]: Done 38439 tasks      | ela

Time elapsed: 0:16:25.249902
-0.5630603531239453
MSE : 99.837292%, 
In Central America


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 624 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 1374 tasks      | elapsed:   20.9s
[Parallel(n_jobs=-1)]: Done 2424 tasks      | elapsed:   36.8s
[Parallel(n_jobs=-1)]: Done 3289 tasks      | elapsed:   49.5s
[Parallel(n_jobs=-1)]: Done 4389 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 5689 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 7189 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 8889 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 10789 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 12889 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 15189 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 17689 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 20389 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 23289 tasks      | elapsed:  6.0min
[Parallel(n_jobs=-1)]: Done 26389 tasks      | elap

Time elapsed: 0:20:55.785196
-1.7368031975876426
MSE : 99.893882%, 
In Western Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 1119 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done 2369 tasks      | elapsed:   20.2s
[Parallel(n_jobs=-1)]: Done 4119 tasks      | elapsed:   36.9s
[Parallel(n_jobs=-1)]: Done 6369 tasks      | elapsed:   56.1s
[Parallel(n_jobs=-1)]: Done 9119 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 12369 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 16119 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 20369 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 25119 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 30369 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 36119 tasks      | elapsed:  5.2min
[Parallel(n_jobs=-1)]: Done 42369 tasks      | elapsed:  6.1min
[Parallel(n_jobs=-1)]: Done 49119 tasks      | elapsed:  7.1min
[Parallel(n_jobs=-1)]: Done 56369 tasks      | elapsed:  8.1min
[Parallel(n_jobs=-1)]: Done 64119 tasks      | 

Time elapsed: 0:11:28.091827
-1.2423323924017728
MSE : 99.472480%, 
In Southern Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 470 tasks      | elapsed:    9.2s
[Parallel(n_jobs=-1)]: Done 972 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 1672 tasks      | elapsed:   32.6s
[Parallel(n_jobs=-1)]: Done 2572 tasks      | elapsed:   49.9s
[Parallel(n_jobs=-1)]: Done 3672 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 4972 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 6472 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 8172 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 10072 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 12172 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 14472 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 16972 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 19672 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 22572 tasks      | elapsed:  6.8min
[Parallel(n_jobs=-1)]: Done 25175 tasks      | elaps

Time elapsed: 0:23:55.863242
-0.3993129187267825
MSE : 99.883338%, 
In Eastern Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 883 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done 1386 tasks      | elapsed:   18.1s
[Parallel(n_jobs=-1)]: Done 2086 tasks      | elapsed:   27.3s
[Parallel(n_jobs=-1)]: Done 2986 tasks      | elapsed:   38.2s
[Parallel(n_jobs=-1)]: Done 4086 tasks      | elapsed:   51.9s
[Parallel(n_jobs=-1)]: Done 5386 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 6886 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 8586 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 10486 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 12586 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 14886 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 17386 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 20086 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 22986 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 26086 tasks      | elap

Time elapsed: 0:17:29.241243
-0.40027402512360866
MSE : 99.863699%, 
In South-Eastern Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  31 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 328 tasks      | elapsed:    6.4s
[Parallel(n_jobs=-1)]: Done 828 tasks      | elapsed:   13.6s
[Parallel(n_jobs=-1)]: Done 1528 tasks      | elapsed:   25.5s
[Parallel(n_jobs=-1)]: Done 2428 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 3528 tasks      | elapsed:   58.0s
[Parallel(n_jobs=-1)]: Done 4828 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 6328 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 8028 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 9928 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 12028 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 14328 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 16828 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 19528 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 22428 tasks      | elapsed:  6.1min
[Parallel(n_jobs=-1)]: Done 25528 tasks      | elapse

Time elapsed: 0:21:28.524326
-0.5522124921715652
MSE : 99.867731%, 
In Southern Europe


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1240 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done 3240 tasks      | elapsed:   23.4s
[Parallel(n_jobs=-1)]: Done 6040 tasks      | elapsed:   42.2s
[Parallel(n_jobs=-1)]: Done 9640 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 14040 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 19240 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 25240 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 32040 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 36140 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 40340 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 44940 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 49940 tasks      | elapsed:  5.7min
[Parallel(n_jobs=-1)]: Done 55340 tasks      | elapsed:  6.3min
[Parallel(n_jobs=-1)]: Done 61140 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 67340 tasks      |

Time elapsed: 0:09:07.838081
-0.46933539633535276
MSE : 99.618226%, 
In Melanesia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:    6.5s
[Parallel(n_jobs=-1)]: Done 1632 tasks      | elapsed:   17.8s
[Parallel(n_jobs=-1)]: Done 3032 tasks      | elapsed:   34.5s
[Parallel(n_jobs=-1)]: Done 4832 tasks      | elapsed:   55.1s
[Parallel(n_jobs=-1)]: Done 7032 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 9632 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 11914 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 13614 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 15514 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 17614 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 19914 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 22414 tasks      | elapsed:  4.4min
[Parallel(n_jobs=-1)]: Done 25114 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 28014 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 31114 tasks      | el

Time elapsed: 0:15:54.061134
-0.18243350037426986
MSE : 99.864998%, 
In Western Europe


  corr /= X_norms
  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done 1192 tasks      | elapsed:   16.3s
[Parallel(n_jobs=-1)]: Done 1892 tasks      | elapsed:   25.8s
[Parallel(n_jobs=-1)]: Done 2792 tasks      | elapsed:   37.9s
[Parallel(n_jobs=-1)]: Done 3892 tasks      | elapsed:   51.4s
[Parallel(n_jobs=-1)]: Done 5192 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 6692 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 8392 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 10292 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 12392 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 14692 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 17192 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 19892 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 22792 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 25892 tasks      | elap

Time elapsed: 0:17:32.033669
-0.6972016929269541
MSE : 99.687662%, 
In Northern Europe


  corr /= X_norms
  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1260 tasks      | elapsed:    8.8s
[Parallel(n_jobs=-1)]: Done 2760 tasks      | elapsed:   19.2s
[Parallel(n_jobs=-1)]: Done 4860 tasks      | elapsed:   32.7s
[Parallel(n_jobs=-1)]: Done 7560 tasks      | elapsed:   51.2s
[Parallel(n_jobs=-1)]: Done 10860 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 14760 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 19260 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 24360 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 30060 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 36360 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 43260 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 50760 tasks      | elapsed:  6.0min
[Parallel(n_jobs=-1)]: Done 58860 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 67560 tasks      | elapsed:  8.0min
[Parallel(n_jobs=-1)]: Done 76860 tasks      |

Time elapsed: 0:09:29.016002
-0.4799289787954153
MSE : 99.877344%, 
In Central Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 936 tasks      | elapsed:    6.3s
[Parallel(n_jobs=-1)]: Done 2436 tasks      | elapsed:   17.0s
[Parallel(n_jobs=-1)]: Done 4536 tasks      | elapsed:   31.5s
[Parallel(n_jobs=-1)]: Done 7236 tasks      | elapsed:   50.2s
[Parallel(n_jobs=-1)]: Done 10536 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 14436 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 18936 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 24036 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 29736 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 36036 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 42936 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 50436 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 58536 tasks      | elapsed:  6.7min
[Parallel(n_jobs=-1)]: Done 67236 tasks      | elapsed:  7.7min
[Parallel(n_jobs=-1)]: Done 76536 tasks      | 

Time elapsed: 0:09:06.859234
-4.393879519438223
MSE : 99.718224%, 
In Eastern Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1240 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 3240 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 6040 tasks      | elapsed:   33.6s
[Parallel(n_jobs=-1)]: Done 9640 tasks      | elapsed:   54.0s
[Parallel(n_jobs=-1)]: Done 14040 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 19240 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 25240 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 32040 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 39640 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 48040 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 57240 tasks      | elapsed:  5.4min
[Parallel(n_jobs=-1)]: Done 67240 tasks      | elapsed:  6.3min
[Parallel(n_jobs=-1)]: Done 78040 tasks      | elapsed:  7.3min
[Parallel(n_jobs=-1)]: Done 80000 out of 80000 | elapsed:  7.5min finished


Time elapsed: 0:07:32.004999
-1.2390538104261644
MSE : 99.472950%, 
In Northern America


  corr /= X_norms
  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1240 tasks      | elapsed:    6.8s
[Parallel(n_jobs=-1)]: Done 3240 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done 6040 tasks      | elapsed:   31.8s
[Parallel(n_jobs=-1)]: Done 9640 tasks      | elapsed:   50.2s
[Parallel(n_jobs=-1)]: Done 14040 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 19240 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 25240 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 32040 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 39640 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 48040 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 57240 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 67240 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 78040 tasks      | elapsed:  6.7min
[Parallel(n_jobs=-1)]: Done 80000 out of 80000 | elapsed:  6.9min finished


Time elapsed: 0:06:55.126268
-0.7690055595359245
MSE : 99.111031%, 
In Polynesia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 1632 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 3032 tasks      | elapsed:   14.1s
[Parallel(n_jobs=-1)]: Done 4832 tasks      | elapsed:   22.4s
[Parallel(n_jobs=-1)]: Done 7032 tasks      | elapsed:   32.9s
[Parallel(n_jobs=-1)]: Done 9632 tasks      | elapsed:   45.3s
[Parallel(n_jobs=-1)]: Done 12632 tasks      | elapsed:   59.4s
[Parallel(n_jobs=-1)]: Done 16032 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 19832 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 24032 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 28632 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 33632 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 39032 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 44832 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 51032 tasks      | el

Time elapsed: 0:06:07.378326
-0.1609113678059338
MSE : 99.935141%, 
In Micronesia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)





In [9]:
def Resmaking_for_contr(df, country):
    tmp = list(makexy(df, country))
    tmp.append(tmp[0].columns)
        
    X_train, X_test, y_train, y_test = train_test_split(tmp[0].as_matrix(), tmp[1].as_matrix(), test_size=0.1, random_state=42)
    p = Testing_grid_xgb(X_train, y_train)
        
    model = xgb.XGBRegressor(max_depth=p['max_depth'], learning_rate = p['learning_rate'], 
                                 n_estimators = p['n_estimators'], objective = p['objective'], 
                                 gamma = p['gamma'], min_child_weight = p['min_child_weight'], 
                                 max_delta_step = p['max_delta_step'], subsample = p['subsample'], 
                                 colsample_bytree = p['colsample_bytree'], reg_alpha = p['reg_alpha'], 
                                 reg_lambda = p['reg_lambda'], scale_pos_weight = p['scale_pos_weight'], 
                                 base_score = p['base_score'])
    model.fit(X_train, y_train)
        
    print ('MSE : {0:f}%, \nIn {1:s}'.format(100-mean_squared_error(model.predict(X_test), y_test), country))
        
    model.fit(tmp[0], tmp[1])
    tmp[0] = np.nan_to_num(tmp[0].as_matrix())
    tmp[1] = np.nan_to_num(tmp[1].as_matrix())
    
    rez = pd.DataFrame(model.feature_importances_, index = tmp[2]).sort_values(by = [0], ascending=False)
    rez[rez[0]!=0].to_excel('Importance/Varible_reg_in_'+str(country)+'.xlsx')
        
    rez = pd.concat([pd.DataFrame(f_regression(tmp[0], tmp[1].ravel())[0], columns=['F-value'], index=tmp[2]),
                     pd.DataFrame(f_regression(tmp[0], tmp[1].ravel())[1], columns=['p-value'], index=tmp[2])], axis=1)
    rez['p-value'] = rez['p-value'].apply(lambda x: round(x, 5))
        
    rez[rez['p-value']<0.01].sort_values(by = ['F-value'], ascending=False).to_excel('Importance/Varible_f_reg_in_'+str(country)+'.xlsx')
        
    rez = pd.DataFrame(mutual_info_regression(tmp[0], tmp[1].ravel()),
                           index = tmp[2]).sort_values(by = [0], ascending=False)
    rez[rez[0]!=0].to_excel('Importance/Varible_muatal_reg_in_'+str(country)+'.xlsx')

In [10]:
Resmaking_for_contr(df, 'RUS')

Fitting 8 folds for each of 10000 candidates, totalling 80000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1240 tasks      | elapsed:    5.9s
[Parallel(n_jobs=-1)]: Done 3240 tasks      | elapsed:   15.9s
[Parallel(n_jobs=-1)]: Done 6040 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done 9640 tasks      | elapsed:   46.4s
[Parallel(n_jobs=-1)]: Done 14040 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 19240 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 25240 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 32040 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 39640 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 48040 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 57240 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 67240 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 78040 tasks      | elapsed:  6.2min
[Parallel(n_jobs=-1)]: Done 80000 out of 80000 | elapsed:  6.3min finished


Time elapsed: 0:06:21.570331
-0.36721313107861697
MSE : 97.812259%, 
In RUS


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
