In [1]:
import pandas as pd
import numpy as np

import datetime

from tqdm._tqdm_notebook import tqdm_notebook

tqdm_notebook.pandas()

import xgboost as xgb 

from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit, train_test_split
from sklearn.feature_selection import f_regression, mutual_info_regression
from sklearn.metrics import mean_squared_error

import graphviz

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def fillnan(data, years_c):
    for name in years_c:
        #заменяем троеточия
        data[name] = data[name].apply(lambda x: np.nan if x==('..') else float(x))

    return data

def dropempt(data, years_c):
    data = fillnan(data, years_c)
    return data.dropna(thresh=10)

def Clearing(data):
    years_c = [item for ind, item in enumerate(np.array(data.columns)) if item not in ['Country Name', 'Country Code','Series Code','Series Name']]
    print ('Before clear:', len(data))

    data = dropempt(data, years_c)

    print ('After clear:', len(data))
    return data

In [3]:
def Make_region(code, reg='region', by = 'name'):
    #print code
    if code in data_cnt[by].values:
        #print code
        name = (data_cnt[by][data_cnt[by] == code].index.tolist())[0]
        return data_cnt[reg].at[name]
    else:
        return np.nan

def Sorting(data, reg='region'):
    #if data['Country Name'].get_value(len(data)-5) == np.nan:
    #    data=data.drop(data.index[[range(len(data)-5, len(data))]]).reset_index()
    if 'Region' not in data.columns:
        data['Region'] = data['Country Name'].progress_apply(lambda x: Make_region(x, reg))
        data['Region'].dropna()
        return data.reset_index()
    else:
        return data

In [4]:
df = pd.read_excel('data/Data_Extract_From_Gender_Statistics.xlsx', encoding = 'utf8').\
            append(pd.read_excel('data/Data_Extract_From_Health_Nutrition_and_Population_Statistics.xlsx', encoding='utf8')).\
            append(pd.read_excel('data/Data_Extract_From_Millennium_Development_Goals.xlsx', encoding='utf8')).\
            append(pd.read_excel('data/Data_Extract_From_Health_Nutrition_and_Population_Statistics_by_Wealth_Quintile.xlsx',encoding='utf8'))

data_cnt = pd.read_csv('all.csv', encoding='utf8')
df = Clearing(df)
defen = pd.read_excel('data/Data_Extract_From_Gender_Statistics.xlsx', sheet_name=1, encoding = 'utf8').\
               append(pd.read_excel('data/Data_Extract_From_Health_Nutrition_and_Population_Statistics.xlsx', sheet_name=1, encoding='utf8')).\
               append(pd.read_excel('data/Data_Extract_From_Millennium_Development_Goals.xlsx', sheet_name=1, encoding='utf8'))

df = Sorting(df, 'sub-region')
df.drop_duplicates(inplace = True)
df.drop(columns = 'index', inplace=True)

Before clear: 398541
After clear: 104445





In [5]:
def makexy(df, country = 'RUS'):
    #Years
    years_c = [item for ind, item in enumerate(np.array(df.columns)) if item not in ['Region','Country Name', 'Country Code','Series Code','Series Name', 'index']]
    
    #split country
    df = df[(df['Country Code']== country)].copy()
    df.drop_duplicates(inplace = True)
    
    y = df[df['Series Name'] == 'Life expectancy at birth, total (years)'][years_c].interpolate(axis=1).T
    
    if y.shape[1] == 1:
        y.columns = ['Life expectancy at birth, total (years)']
    else:
        return pd.DataFrame(), pd.DataFrame()
    
    #Transponenting and then adding 
    mask = ((df['Series Name'] != 'Life expectancy at birth, male (years)') & 
            (df['Series Name'] != 'Life expectancy at birth, female (years)') &
            (df['Series Name'] != 'Life expectancy at birth, total (years)'))
    
    X = df[mask][years_c].T
    X.columns = df[(df['Country Code']== country) & mask]['Series Name']
    X.reset_index(inplace = True, drop = True)
    del df
    
    X['Years'] = [int(name.split()[0]) for name in years_c]
    X['Region_code'] = int(Make_region(country,'region-code', by = 'alpha-3'))
    X['Reion_sub_code'] = int(Make_region(country, 'sub-region-code', 'alpha-3'))
    X['Country_code'] = int(Make_region(country, 'country-code', 'alpha-3'))
    return X.reset_index(drop = True), y.reset_index(drop = True)

def train_set(df, region = 'Eastern Europe'):
    x_train, y_train = pd.DataFrame(), pd.DataFrame()
    for country in df[df['Region'] == region]['Country Code'].unique():
        tmp = makexy(df, country)
        x_train = pd.concat([tmp[0], x_train], axis=0).fillna(-999)
        y_train = pd.concat([tmp[1], y_train], axis=0).fillna(-999)
    return x_train.as_matrix(), y_train.as_matrix(), x_train.columns
    

In [6]:
def Testing_grid_xgb(X_train, Y_train):
    cv = TimeSeriesSplit(n_splits=4)
    clf = xgb.XGBRegressor()        
    parameters = {
        'scale_pos_weight' : [1, 1.5, 1.2],
        'objective':['reg:linear'],
        'learning_rate': [0.05, 0.1, 0.2, 0.3],
        'max_depth': [3 ,4 ,6, 5],
        'min_child_weight': [2, 3 ,4, 5],
        'max_delta_step': [0, 0.2, 0.8],
        'reg_lambda': [0.5, 0.8, 0],
        'reg_alpha': [0.5, 0.8, 0],
        'silent': [1],
        'subsample': [0.2, 0.05, 0.3, 0.4],
        'colsample_bytree': [0.7, 0.8, 1],
        'n_estimators': [100 ,250, 450],
        'base_score' : [0.5, 1],
        'gamma': [0, 0.5, 1]
    }
    
    start_time = datetime.datetime.now()
    gs = RandomizedSearchCV(clf, scoring='neg_mean_squared_error', param_distributions=parameters, 
                      cv=cv, n_jobs = -1, verbose=True,  n_iter=3000)
    gs.fit(X_train,Y_train)
    print ('Time elapsed:', datetime.datetime.now() - start_time)
    
    means = gs.cv_results_['mean_test_score']
    
    print (max(means))
    return gs.best_params_

In [7]:
def Resmaking(df):
    for region in tqdm_notebook(df['Region'].dropna().unique()):
        tmp = train_set(df, region)
        X_train, X_test, y_train, y_test = train_test_split(tmp[0], tmp[1], test_size=0.25,
                                                            shuffle = True, random_state=42)
        p = Testing_grid_xgb(X_train, y_train)
        
        model = xgb.XGBRegressor(max_depth=p['max_depth'], learning_rate = p['learning_rate'], 
                                 n_estimators = p['n_estimators'], objective = p['objective'], 
                                 gamma = p['gamma'], min_child_weight = p['min_child_weight'], 
                                 max_delta_step = p['max_delta_step'], subsample = p['subsample'], 
                                 colsample_bytree = p['colsample_bytree'], reg_alpha = p['reg_alpha'], 
                                 reg_lambda = p['reg_lambda'], scale_pos_weight = p['scale_pos_weight'], 
                                 base_score = p['base_score'])
        model.fit(X_train, y_train)
        
        print ('MSE : {0:f}%, \nIn {1:s}'.format(100-mean_squared_error(model.predict(X_test), y_test), region))
        
        model.fit(tmp[0], tmp[1])
        
        rez = pd.DataFrame(model.feature_importances_, index = tmp[2]).sort_values(by = [0], ascending=False)
        rez[rez[0]!=0].to_csv('Importance/Varible_in_'+str(region)+'.csv')
        
        rez = pd.concat([pd.DataFrame(f_regression(tmp[0], tmp[1].ravel())[0], columns=['F-value'], index=tmp[2]),
                         pd.DataFrame(f_regression(tmp[0], tmp[1].ravel())[1], columns=['p-value'], index=tmp[2])], axis=1)
        rez['p-value'] = rez['p-value'].apply(lambda x: round(x, 5))
        
        rez[rez['p-value']<0.01].to_csv('Importance/Varible_f_reg_in_'+str(region)+'.csv')
        
        rez = pd.DataFrame(mutual_info_regression(tmp[0], tmp[1].ravel()),
                           index = tmp[2]).sort_values(by = [0], ascending=False)
        rez[rez[0]!=0].to_csv('Importance/Varible_muatal_in_'+str(region)+'.csv')

In [8]:
Resmaking(df)

Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 328 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done 828 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done 1528 tasks      | elapsed:   19.7s
[Parallel(n_jobs=-1)]: Done 2428 tasks      | elapsed:   31.2s
[Parallel(n_jobs=-1)]: Done 3528 tasks      | elapsed:   44.2s
[Parallel(n_jobs=-1)]: Done 4828 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 6328 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 8028 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 9928 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  2.6min finished


Time elapsed: 0:02:37.576438
-2.4965683162371435
MSE : 99.398320%, 
In Southern Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 1339 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 2589 tasks      | elapsed:   20.4s
[Parallel(n_jobs=-1)]: Done 4339 tasks      | elapsed:   34.6s
[Parallel(n_jobs=-1)]: Done 6589 tasks      | elapsed:   54.1s
[Parallel(n_jobs=-1)]: Done 9339 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.7min finished


Time elapsed: 0:01:39.636134
-0.6631576281249901
MSE : 99.862632%, 
In Northern Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 936 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done 2436 tasks      | elapsed:   23.3s
[Parallel(n_jobs=-1)]: Done 4536 tasks      | elapsed:   43.4s
[Parallel(n_jobs=-1)]: Done 7236 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 10536 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.9min finished


Time elapsed: 0:01:56.517480
-2.3001440684278167
MSE : 99.670746%, 
In Middle Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 328 tasks      | elapsed:    6.6s
[Parallel(n_jobs=-1)]: Done 828 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done 1528 tasks      | elapsed:   27.7s
[Parallel(n_jobs=-1)]: Done 2428 tasks      | elapsed:   44.8s
[Parallel(n_jobs=-1)]: Done 3528 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 4828 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 6328 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 8028 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 9928 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  3.9min finished


Time elapsed: 0:03:54.168465
-0.31673370451108246
MSE : 99.222551%, 
In South America


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 428 tasks      | elapsed:   10.7s
[Parallel(n_jobs=-1)]: Done 678 tasks      | elapsed:   15.9s
[Parallel(n_jobs=-1)]: Done 1028 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 1478 tasks      | elapsed:   37.0s
[Parallel(n_jobs=-1)]: Done 2028 tasks      | elapsed:   49.6s
[Parallel(n_jobs=-1)]: Done 2678 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 3428 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 4278 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 5228 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 6278 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 7428 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 8678 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 10028 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 11478 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed

Time elapsed: 0:04:29.609567
-0.719829674634333
MSE : 99.683198%, 
In Western Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 582 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 1582 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 2982 tasks      | elapsed:   17.7s
[Parallel(n_jobs=-1)]: Done 4782 tasks      | elapsed:   29.1s
[Parallel(n_jobs=-1)]: Done 6982 tasks      | elapsed:   43.1s
[Parallel(n_jobs=-1)]: Done 9582 tasks      | elapsed:   59.1s
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.2min finished


Time elapsed: 0:01:13.534583
-0.12378553586740494
MSE : 99.735411%, 
In Australia and New Zealand


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-1)]: Done 1292 tasks      | elapsed:   18.4s
[Parallel(n_jobs=-1)]: Done 1992 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done 2892 tasks      | elapsed:   43.0s
[Parallel(n_jobs=-1)]: Done 3992 tasks      | elapsed:   58.8s
[Parallel(n_jobs=-1)]: Done 5292 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 6792 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 8492 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 10392 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  3.1min finished


Time elapsed: 0:03:09.064875
-3.573577729982969
MSE : 99.617627%, 
In Caribbean


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 808 tasks      | elapsed:   11.1s
[Parallel(n_jobs=-1)]: Done 1808 tasks      | elapsed:   25.1s
[Parallel(n_jobs=-1)]: Done 3208 tasks      | elapsed:   44.9s
[Parallel(n_jobs=-1)]: Done 5008 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 6158 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 7458 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 8958 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 10658 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  2.7min finished


Time elapsed: 0:02:44.166914
-0.9943356968514577
MSE : 99.445081%, 
In Eastern Europe


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 1792 tasks      | elapsed:   21.8s
[Parallel(n_jobs=-1)]: Done 3192 tasks      | elapsed:   39.5s
[Parallel(n_jobs=-1)]: Done 4632 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 5732 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 7032 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 8532 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 10232 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  2.6min finished


Time elapsed: 0:02:39.203679
-1.0423107332601427
MSE : 99.497081%, 
In Central America


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  36 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 591 tasks      | elapsed:   11.1s
[Parallel(n_jobs=-1)]: Done 1341 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done 2071 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 2521 tasks      | elapsed:   44.3s
[Parallel(n_jobs=-1)]: Done 3075 tasks      | elapsed:   54.7s
[Parallel(n_jobs=-1)]: Done 4372 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 5872 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 7572 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 9472 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 11572 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  3.5min finished


Time elapsed: 0:03:29.431455
-1.392178411638842
MSE : 98.366339%, 
In Western Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 1112 tasks      | elapsed:   10.0s
[Parallel(n_jobs=-1)]: Done 2112 tasks      | elapsed:   19.5s
[Parallel(n_jobs=-1)]: Done 3512 tasks      | elapsed:   32.3s
[Parallel(n_jobs=-1)]: Done 5312 tasks      | elapsed:   48.3s
[Parallel(n_jobs=-1)]: Done 7512 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 10112 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.8min finished


Time elapsed: 0:01:49.649392
-4.338456778434453
MSE : 98.243487%, 
In Southern Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 538 tasks      | elapsed:   11.4s
[Parallel(n_jobs=-1)]: Done 850 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done 1550 tasks      | elapsed:   30.4s
[Parallel(n_jobs=-1)]: Done 2450 tasks      | elapsed:   48.9s
[Parallel(n_jobs=-1)]: Done 3550 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 4850 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 6350 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 8050 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 9950 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  3.9min finished


Time elapsed: 0:03:57.024736
-1.1359711939845714
MSE : 99.787084%, 
In Eastern Africa


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 804 tasks      | elapsed:   11.4s
[Parallel(n_jobs=-1)]: Done 1380 tasks      | elapsed:   19.3s
[Parallel(n_jobs=-1)]: Done 2080 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 2980 tasks      | elapsed:   42.8s
[Parallel(n_jobs=-1)]: Done 4080 tasks      | elapsed:   58.7s
[Parallel(n_jobs=-1)]: Done 5380 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 6880 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 8580 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 10480 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  2.8min finished


Time elapsed: 0:02:50.364077
-1.162605535989575
MSE : 99.449110%, 
In South-Eastern Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 780 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 1364 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done 2064 tasks      | elapsed:   33.5s
[Parallel(n_jobs=-1)]: Done 2964 tasks      | elapsed:   49.9s
[Parallel(n_jobs=-1)]: Done 4064 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 5364 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 6864 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 8564 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 10464 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  3.4min finished


Time elapsed: 0:03:24.022449
-0.694905509800148
MSE : 99.751964%, 
In Southern Europe


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 1278 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 2778 tasks      | elapsed:   18.9s
[Parallel(n_jobs=-1)]: Done 4878 tasks      | elapsed:   34.0s
[Parallel(n_jobs=-1)]: Done 7578 tasks      | elapsed:   52.5s
[Parallel(n_jobs=-1)]: Done 10878 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.4min finished


Time elapsed: 0:01:23.935668
-0.5390501320173476
MSE : 99.842377%, 
In Melanesia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 820 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 1820 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done 3220 tasks      | elapsed:   38.0s
[Parallel(n_jobs=-1)]: Done 5020 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 7220 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 9820 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  2.4min finished


Time elapsed: 0:02:24.861966
-0.1982230547060188
MSE : 99.849216%, 
In Western Europe


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done 812 tasks      | elapsed:   11.1s
[Parallel(n_jobs=-1)]: Done 1812 tasks      | elapsed:   23.8s
[Parallel(n_jobs=-1)]: Done 3212 tasks      | elapsed:   44.1s
[Parallel(n_jobs=-1)]: Done 4160 tasks      | elapsed:   57.3s
[Parallel(n_jobs=-1)]: Done 5260 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 6560 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 8060 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 9760 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 11660 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  2.8min finished


Time elapsed: 0:02:50.436262
-0.6913374618045836
MSE : 99.788240%, 
In Northern Europe


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  32 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1144 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 3144 tasks      | elapsed:   23.6s
[Parallel(n_jobs=-1)]: Done 5944 tasks      | elapsed:   45.5s
[Parallel(n_jobs=-1)]: Done 9544 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.5min finished


Time elapsed: 0:01:31.863909
-0.32309207260518097
MSE : 99.548115%, 
In Central Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  44 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 1544 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 4044 tasks      | elapsed:   27.9s
[Parallel(n_jobs=-1)]: Done 7544 tasks      | elapsed:   52.1s
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.4min finished


Time elapsed: 0:01:22.002705
-0.8687858870469481
MSE : 96.291909%, 
In Eastern Asia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 1600 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 3600 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done 6400 tasks      | elapsed:   36.6s
[Parallel(n_jobs=-1)]: Done 10000 tasks      | elapsed:   57.4s
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.1min finished


Time elapsed: 0:01:08.510370
-1.5720284220978231
MSE : 97.085735%, 
In Northern America


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-1)]: Done 2152 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done 5652 tasks      | elapsed:   28.4s
[Parallel(n_jobs=-1)]: Done 10552 tasks      | elapsed:   53.4s
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:  1.0min finished


Time elapsed: 0:01:00.921593
-0.6144293615965793
MSE : 99.927275%, 
In Polynesia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


Fitting 4 folds for each of 3000 candidates, totalling 12000 fits


[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1240 tasks      | elapsed:    5.8s
[Parallel(n_jobs=-1)]: Done 3240 tasks      | elapsed:   15.4s
[Parallel(n_jobs=-1)]: Done 6040 tasks      | elapsed:   28.5s
[Parallel(n_jobs=-1)]: Done 9640 tasks      | elapsed:   45.1s
[Parallel(n_jobs=-1)]: Done 12000 out of 12000 | elapsed:   55.8s finished


Time elapsed: 0:00:56.094617
-3.129318610345384
MSE : 99.855260%, 
In Micronesia


  corr /= X_norms
  F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)



