# Pipeline--Model Building 

In [2]:
import sys
import webhoseio
import json
import time
import datetime
import pandas as pd
pd.set_option('display.max_columns', None)
import pprint

import pymongo
from pymongo import MongoClient

from hatesonar import Sonar ##Using hate speech detection library
sonar = Sonar()

import nltk
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer() 
from nltk.tokenize import word_tokenize
import csv
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")

nltk.download('stopwords')
stopwords_vocab = nltk.corpus.stopwords.words('english')

from sklearn.model_selection import train_test_split

import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
import gensim
from gensim import models
from scipy import sparse
import re



[nltk_data] Downloading package stopwords to C:\Users\May
[nltk_data]     Xiao\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!




### Here is just showing how we query data from webhose. Since the data queried from webhose cannot exceed 30 days, this query beginning on October 25 does not work anymore. Please run the query from MongoDB directly to pull the data and skip cells below

## Getting Train and Test Data from Webhose (Don't Run Cells Below Please)

In [None]:
YOUR_API_KEY = '11b110fe-ba22-4142-946f-e0e3c3a1db9a'

In [None]:
webhoseio.config(token=YOUR_API_KEY)

In [None]:
d = datetime.date(2018,10,25)
unixtime = time.mktime(d.timetuple())

In [None]:
query_params = { "q": "Anti-semitism OR Antifa OR jews","site":"8ch.net",\
                "language":"english",
                "sort": "published", 
                'accuracy_confidence': 'high',
                'ts':unixtime,
                'format':'json'}
output = webhoseio.query("filterWebContent", query_params)

In [None]:
output_next_list = []
for b in output['posts']:
    output_next_list.append(b)

In [None]:
page_num = 1+int(output['totalResults']/100)

n=0
a=0

while n<=page_num:
    output_next = webhoseio.get_next()
    for i in output_next['posts']:
        output_next_list.append(i)
        a+=1
    n+=1 

## Import Data into Cloud MongoDB (Don't Run Cells Below Please)

In [None]:
client = pymongo.MongoClient("mongodb://May:7184393@pipeline-shard-00-00-jpov0.mongodb.net:27017,pipeline-shard-00-01-jpov0.mongodb.net:27017,pipeline-shard-00-02-jpov0.mongodb.net:27017/test?ssl=true&replicaSet=Pipeline-shard-0&authSource=admin&retryWrites=true")
db = client.test

In [None]:
print (db)

In [None]:
record_id = db.pipeline.insert_many(output_next_list)

In [None]:
print (record_id)
print (db.list_collection_names())

## Query Data from MongoDB and Filter Them by Hate Speech Library (Start Running Cells Below Please)

In [6]:
client = MongoClient('mongodb://May:7184393@pipeline-shard-00-00-jpov0.mongodb.net:27017,pipeline-shard-00-01-jpov0.mongodb.net:27017,pipeline-shard-00-02-jpov0.mongodb.net:27017/test?ssl=true&replicaSet=Pipeline-shard-0&authSource=admin&retryWrites=true')

In [7]:
db = client.test

In [8]:
collection = db.pipeline

In [9]:
db.collection_names(include_system_collections=False)

  """Entry point for launching an IPython kernel.


['pipeline']

In [10]:
posts = db['pipeline']

In [11]:
posts.count_documents({})

39741

In [12]:
# We only want posts labeled as hate speech with confidence over 0.5 and store them in hate_speech_posts list; store the rest in offensive language.
hate_speech_posts = []
for post in posts.find():
    a = post.get('text')
    ab = sonar.ping(text=a)
    if ab.get('top_class') == 'hate_speech' and ab.get('classes')[0].get('confidence') >= 0.5:
        hate_speech_posts.append(post) 

In [13]:
# Define a function to tokenize and lemmanize a post, and then check the existence of action word
def threats_detections(text,terms):
    sent_tokenize_list = sent_tokenize(text)
    for i in sent_tokenize_list:
        word_tokenize_list = word_tokenize(i)
        tagged_list = nltk.pos_tag(word_tokenize_list)
        
        lemmatized_list = [None]*len(word_tokenize_list)
        n=0
        for w in word_tokenize_list:
            if tagged_list[n][1][0] == 'V':
                lemmatized_list[n]=wordnet_lemmatizer.lemmatize(w,pos='v')
                n=n+1
            else:
                lemmatized_list[n]=wordnet_lemmatizer.lemmatize(w)
                n = n+1

        for word in lemmatized_list:
            word = word.encode('ascii','ignore').lower()
        
        if "I" or "I'll" in lemmatized_list:
            if "will" or "would" in lemmatized_list:
                if len(set(terms)&set(lemmatized_list)) >=1:
                    return 'yes'
                    continue
            elif "be" and "go" and "to" in lemmatized_list:
                if len(set(terms)&set(lemmatized_list)) >=1:
                    return 'yes'
                    continue

In [14]:
# import action words and store in action_words_list list
with open("action_words.csv") as a:
    reader = csv.reader(a, delimiter="\t")
    action_words = list(reader)
    action_words_list=[None]*len(action_words)
    
    for w in range(len(action_words)):
        action_words_list[w]=action_words[w][0]
    print (action_words_list)

['abuse', 'assault', 'alarm', 'alert', 'ambush', 'ammunition', 'anguish', 'annihilate', 'arrest', 'assassinate', 'assault', 'attack', 'barrage', 'barricade', 'battle', 'beat', 'blackmail', 'blast', 'blindside', 'blow up', 'bomb', 'bombard', 'brawl', 'breach', 'bullet', 'bunker', 'burn', 'bury', 'caution', 'char', 'catch', 'chase', 'cheat', 'chop', 'combat', 'commit', 'conduct', 'conflagrate', 'conspire', 'conquer', 'counterattack', 'damage', 'dash', 'defend', 'demolish', 'destroy', 'deter', 'detonate', 'devastate', 'direct', 'disarray', 'dominate', 'dscape', 'exonerate', 'execute', 'explode', 'expunge', 'extort', 'ferment', 'feud', 'fight', 'firebomb', 'force', 'fright', 'garrison', 'grenade', 'guard', 'gun', 'hammer', 'harm', 'hijack', 'hit', 'ignite', 'incite', 'inflame', 'interdict', 'intervene', 'intimidate', 'invade', 'ire', 'jeer', 'kidnap', 'kill', 'knife', 'lynch', 'maim', 'manacle', 'maraud', 'massacre', 'menace', 'murder', 'neutralize', 'nitrate', 'overthrow', 'penalize', 'pe

In [15]:
#filter for action words and store a new attribute in hate_speech_posts list
threaten_statement_T = {'threaten_statement':'True'}
threaten_statement_F = {'threaten_statement':'False'}

for s in range(len(hate_speech_posts)):
    txt = hate_speech_posts[s]['text']
    result = threats_detections(txt,action_words_list)
    if result == 'yes':
        hate_speech_posts[s].update(threaten_statement_T)
    else:
        hate_speech_posts[s].update(threaten_statement_F)

In [16]:
#store posts containing action words into a new list mixed_intention
mixed_intention =[] 
for i in range(len(hate_speech_posts)):
    if hate_speech_posts[i]['threaten_statement'] == 'True':
        mixed_intention.append(hate_speech_posts[i]['text'])

print(len(mixed_intention))

705


In [17]:
aaa = pd.DataFrame(mixed_intention)

In [18]:
#we manully identify a list of posts and label them as "1" (containing threatening statement in selected_intention list)
the_list = [16,55,74,77,78,86,104,119,131,170,\
        178,197,205,217,236,254,256,258,309,312,373,385,393,\
        604, 611, 632,\
        691, 704]
selected_intention = aaa.iloc[list,:]

for i in range(len(aaa)):
    if i in the_list:
        aaa.at[i, 'y'] = 1
    else:
        aaa.at[i, 'y'] = 0

In [19]:
#Since the number of posts labeled as "1" is so limited, we found more text containing threatening statement 
#and store them in dataframes to increase the size of threatening statement in train dataset

new = pd.read_csv('new_posts2.txt', sep='\t', index_col=0)
new1 = pd.DataFrame(new.loc[44])
new1['y']= 1
new1.columns = [0, 'y']

new2 = pd.read_csv('intention_ww.txt',  index_col=0)
new2['y']= 1
new2.columns = [0, 'y']

In [20]:
#concatenate them in a new dataframe called aaa1
aaa1 = pd.concat([aaa,new1,new2],ignore_index=True)

In [21]:
#split dataset only contain "1" into train and test in a ratio of 8:2
train, test = train_test_split(aaa1, test_size=0.2, random_state=42)

In [22]:
#we found posts without containing threatening statement and store them in no_intention list
no_intention =[]
for i in range(len(hate_speech_posts)):
    if hate_speech_posts[i]['threaten_statement'] == 'False':
        no_intention.append(hate_speech_posts[i]['text'])

print(len(no_intention))

3975


In [23]:
#we store them in a dataframe and add label as "0"
df_no_intent = pd.DataFrame(no_intention)
df_no_intent['y']=0

In [24]:
#we split "0" labeled dataframe into train and test
train_1 = df_no_intent.iloc[:2782,:]
test_1 = df_no_intent.iloc[2782:,:]

In [25]:
#we concatenate train and test datasets labeled as "0" and "1"
train_total = pd.concat([train,train_1],ignore_index=True)
test_total = pd.concat([test,test_1],ignore_index=True)

In [26]:
print("Train dataset has " + str(train_total['y'].count()) + ' observations.')
print("Test dataset has " + str(test_total['y'].count()) + ' observations.')

Train dataset has 3355 observations.
Test dataset has 1337 observations.


In [27]:
X_train, y_train = train_total[0], train_total['y']

In [28]:
X_test, y_test = test_total[0], test_total['y']

In [29]:
#this function is to prepare a column from dataframe as list of string for text preprocess.
def pd_to_list(col_name, df):
    '''
    Prepare a column from a data frame as list of strings for text preprocess.
    @col_name(string): a column name
    @return df_t(list): values formated as string.
    '''
    df_t=df.loc[:, [col_name]]
    df_t=df_t.values.tolist()
    df_t=[''.join(x) for x in df_t]
    return df_t


In [30]:
def tokenize_only(item):
    '''
    Define basic tokenize function for reuse.
    @itme(string): text
    @return filtered_tokens(list of string):  tokenized text
    '''
    # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token
    #tokens = [word.lower() for sent in nltk.sent_tokenize(item) for word in nltk.word_tokenize(sent)]
    cd = [w for w in word_tokenize(item.lower())]
    tokens = [word for word in cd if word not in stopwords_vocab]
    
    filtered_tokens = []
    # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation)
    for token in tokens:
        if re.search('[a-zA-Z]', str(token)):
            filtered_tokens.append(str(token))
    return filtered_tokens

In [31]:
def tokenize_and_stem(item):
    '''
    Define basic tokenize and stem function for reuse.
    @itme(string): text
    @return stems(list of string):  tokenized and stemmed text
    '''
    
    stems = [stemmer.stem(t) for t in tokenize_only(item)]
    return stems    

In [32]:
#this function is to transform the text column in train and test dataset into tfidfvector matrix
#we use character vector and word vector at the same time to better extract info from a set of words
def tfidf_vectorizer (train, test):
    char_vector = TfidfVectorizer(max_df=0.9, max_features=100000,\
                                   min_df=0.01, stop_words='english',analyzer='char',\
                                   use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(1,4))
    
    word_vector = TfidfVectorizer(max_df=0.9, max_features=100000,\
                                   min_df=0.01, stop_words='english',analyzer='word',\
                                   use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(1,1))

    train_char_matrix = char_vector.fit_transform(train)
    test_char_matrix = char_vector.transform(test)
    
    train_word_matrix = word_vector.fit_transform(train)
    test_word_matrix = word_vector.transform(test)
    
    train_tfidf_matrix = sparse.hstack([train_word_matrix, train_char_matrix])
    test_tfidf_matrix = sparse.hstack([test_word_matrix, test_char_matrix])
    
    return train_tfidf_matrix, test_tfidf_matrix

In [33]:
X_train_vec, X_test_vec = tfidf_vectorizer(X_train, X_test)

  'stop_words.' % sorted(inconsistent))


In [34]:
X_train_vec.shape

(3355, 6040)

In [35]:
X_test_vec.shape

(1337, 6040)

In [36]:
#transform numpy array to dataframe so that it can be import into h2o
train_x = pd.DataFrame(X_train_vec.toarray())
test_x = pd.DataFrame(X_test_vec.toarray())

In [37]:
train_pre = pd.concat([train_x,y_train],axis = 1)

In [38]:
test_pre = pd.concat([test_x,y_test],axis = 1)

In [39]:
train_pre.columns = train_pre.columns.astype(str)

In [40]:
test_pre.columns = test_pre.columns.astype(str)

## Building model by H2O library

In [41]:
import h2o
from h2o.automl import H2OAutoML

In [42]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode)
  Starting server from C:\Users\May Xiao\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\MAYXIA~1\AppData\Local\Temp\tmp374kq47p
  JVM stdout: C:\Users\MAYXIA~1\AppData\Local\Temp\tmp374kq47p\h2o_May_Xiao_started_from_python.out
  JVM stderr: C:\Users\MAYXIA~1\AppData\Local\Temp\tmp374kq47p\h2o_May_Xiao_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321... successful.


0,1
H2O cluster uptime:,02 secs
H2O cluster timezone:,America/New_York
H2O data parsing timezone:,UTC
H2O cluster version:,3.22.0.2
H2O cluster version age:,"21 days, 5 hours and 46 minutes"
H2O cluster name:,H2O_from_python_May_Xiao_j1sy68
H2O cluster total nodes:,1
H2O cluster free memory:,3.524 Gb
H2O cluster total cores:,4
H2O cluster allowed cores:,4


In [43]:
train = h2o.H2OFrame(train_pre)

Parse progress: |█████████████████████████████████████████████████████████| 100%


In [44]:
test = h2o.H2OFrame(test_pre)

Parse progress: |█████████████████████████████████████████████████████████| 100%


In [45]:
y = 'y'
X = [name for name in train.columns if name not in [y]]

In [46]:
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

### Train H2OAutoML model 

In [55]:
aml4 = H2OAutoML(nfolds=15,max_models = 100, max_runtime_secs=700, seed = 1,stopping_metric = 'logloss',\
                balance_classes=True,sort_metric = 'logloss')
aml4.train(x=X, y=y, training_frame=train)

AutoML progress: |████████████████████████████████████████████████████████| 100%


In [56]:
aml4.leaderboard

model_id,auc,logloss,mean_per_class_error,rmse,mse
DRF_1_AutoML_20181213_160621,0.831054,0.107072,0.341943,0.0958155,0.00918061




In [57]:
aml4.leader.model_performance(test)


ModelMetricsBinomial: drf
** Reported on test data. **

MSE: 0.008208410044109389
RMSE: 0.09060027618119819
LogLoss: 0.058013046858954066
Mean Per-Class Error: 0.12861648155765804
AUC: 0.9327094474153298
pr_auc: 0.09564683265254312
Gini: 0.8654188948306596
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.0009595777328863209: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1290.0,36.0,0.0271,(36.0/1326.0)
1,5.0,6.0,0.4545,(5.0/11.0)
Total,1295.0,42.0,0.0307,(41.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0009596,0.2264151,31.0
max f2,0.0009596,0.3488372,31.0
max f0point5,0.0041453,0.2173913,2.0
max accuracy,0.0048391,0.9910247,0.0
max precision,0.0041453,0.3333333,2.0
max recall,0.0002989,1.0,83.0
max specificity,0.0048391,0.9992459,0.0
max absolute_mcc,0.0009596,0.2684072,31.0
max min_per_class_accuracy,0.0006192,0.8181818,54.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.02 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0017110,8.6818182,8.6818182,0.0714286,0.0026190,0.0714286,0.0026190,0.0909091,0.0909091,768.1818182,768.1818182
,2,0.0201945,0.0012994,18.6993007,13.5050505,0.1538462,0.0014645,0.1111111,0.0020631,0.1818182,0.2727273,1769.9300699,1250.5050505
,3,0.0306657,0.0009598,17.3636364,14.8226164,0.1428571,0.0010307,0.1219512,0.0017106,0.1818182,0.4545455,1636.3636364,1382.2616408
,4,0.0403889,0.0009534,9.3496503,13.5050505,0.0769231,0.0009582,0.1111111,0.0015295,0.0909091,0.5454545,834.9650350,1250.5050505
,5,0.0501122,0.0009273,9.3496503,12.6987788,0.0769231,0.0009367,0.1044776,0.0014144,0.0909091,0.6363636,834.9650350,1169.8778833
,6,0.1017203,0.0006167,3.5230567,8.0434492,0.0289855,0.0006274,0.0661765,0.0010151,0.1818182,0.8181818,252.3056653,704.3449198
,7,0.2707554,0.0002997,0.5378117,3.3576092,0.0044248,0.0003524,0.0276243,0.0006014,0.0909091,0.9090909,-46.2188254,235.7609242
,8,0.3006731,0.0002974,3.0386364,3.3258706,0.025,0.0002985,0.0273632,0.0005712,0.0909091,1.0,203.8636364,232.5870647
,9,1.0,0.0,0.0,1.0,0.0,0.0000222,0.0082274,0.0001872,0.0,1.0,-100.0,0.0







In [576]:
model_path = h2o.save_model(model=aml4.leader, path="auto_h2o_best", force=True)

In [577]:
print(model_path)

C:\Users\May Xiao\demo\auto_h2o_best\DRF_1_AutoML_20181204_235019


### Generalized Linear Model with Grid Search

In [47]:
from h2o.grid.grid_search import H2OGridSearch
from h2o.estimators.glm import H2OGeneralizedLinearEstimator

In [48]:
hyper_parameters = { 'alpha': [0.01,0.1,0.3,0.5,0.7,0.9], 
                    'lambda': [1e-4,1e-3,1e-2,1e-1,1,10] }
      

search_criteria = { 'strategy': "RandomDiscrete", 'seed': 1,
                    'stopping_metric': "AUC", 
                    'stopping_tolerance': 0.001,
                    'stopping_rounds': 2 }
            
random_plus_manual = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial', nfolds=15,seed=1,balance_classes=True),
                                   hyper_params=hyper_parameters, 
                                   grid_id="random_plus_manual", 
                                   search_criteria=search_criteria)
    
random_plus_manual.train(x=X,y=y, training_frame=train)

glm Grid Build progress: |████████████████████████████████████████████████| 100%




In [49]:
random_plus_manual.show()

       alpha    lambda                    model_ids               logloss
0      [0.3]    [0.01]   random_plus_manual_model_9  0.035902316333919826
1      [0.5]    [0.01]  random_plus_manual_model_12  0.036597366264307235
2     [0.01]     [0.1]  random_plus_manual_model_24   0.03733352185179091
3      [0.1]    [0.01]  random_plus_manual_model_16   0.03761673364769872
4      [0.7]    [0.01]  random_plus_manual_model_31  0.039856316816505144
5      [0.9]   [0.001]  random_plus_manual_model_34  0.041118949264909846
6      [0.7]   [0.001]  random_plus_manual_model_30   0.04193091786744568
7      [0.9]    [0.01]  random_plus_manual_model_23   0.04248499005949807
8      [0.5]   [0.001]  random_plus_manual_model_29  0.043020133300378856
9     [0.01]    [0.01]  random_plus_manual_model_35  0.043976464703850686
10     [0.3]   [0.001]  random_plus_manual_model_21    0.0442436423658609
11     [0.1]   [0.001]   random_plus_manual_model_5   0.04604385494833087
12     [0.1]     [0.1]   random_plus_m

In [50]:
random_plus_manual[0]

Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  random_plus_manual_model_9


ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.0032817956088473936
RMSE: 0.05728695845345076
LogLoss: 0.013693330516312921
Null degrees of freedom: 3354
Residual degrees of freedom: 3254
Null deviance: 333.3014546974723
Residual deviance: 91.88224776445973
AIC: 293.88224776445975
AUC: 0.9989010305430568
pr_auc: 0.909058123350612
Gini: 0.9978020610861136
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.172829030034661: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,3322.0,4.0,0.0012,(4.0/3326.0)
1,2.0,27.0,0.069,(2.0/29.0)
Total,3324.0,31.0,0.0018,(6.0/3355.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.1728290,0.9,29.0
max f2,0.1728290,0.9183673,29.0
max f0point5,0.2698770,0.9504132,22.0
max accuracy,0.2698770,0.9982116,22.0
max precision,0.7040327,1.0,0.0
max recall,0.0374256,1.0,78.0
max specificity,0.7040327,1.0,0.0
max absolute_mcc,0.1728290,0.8996077,29.0
max min_per_class_accuracy,0.0374256,0.9828623,78.0


Gains/Lift Table: Avg response rate:  0.86 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104322,0.1543095,89.2463054,89.2463054,0.7714286,0.3994653,0.7714286,0.3994653,0.9310345,0.9310345,8824.6305419,8824.6305419
,2,0.0202683,0.0490881,3.5057471,47.6369168,0.0303030,0.0755683,0.4117647,0.2422800,0.0344828,0.9655172,250.5747126,4663.6916836
,3,0.0301043,0.0312018,3.5057471,33.2178218,0.0303030,0.0385947,0.2871287,0.1757294,0.0344828,1.0,250.5747126,3221.7821782
,4,0.0402385,0.0232638,0.0,24.8518519,0.0,0.0261479,0.2148148,0.1380570,0.0,1.0,-100.0,2385.1851852
,5,0.0500745,0.0190027,0.0,19.9702381,0.0,0.0209980,0.1726190,0.1150633,0.0,1.0,-100.0,1897.0238095
,6,0.1001490,0.0092722,0.0,9.9851190,0.0,0.0131500,0.0863095,0.0641066,0.0,1.0,-100.0,898.5119048
,7,0.1502235,0.0065600,0.0,6.6567460,0.0,0.0076662,0.0575397,0.0452931,0.0,1.0,-100.0,565.6746032
,8,0.2,0.0050433,0.0,5.0,0.0,0.0057363,0.0432191,0.0354482,0.0,1.0,-100.0,400.0
,9,0.3001490,0.0032236,0.0,3.3316783,0.0,0.0039676,0.0287984,0.0249442,0.0,1.0,-100.0,233.1678252




ModelMetricsBinomialGLM: glm
** Reported on cross-validation data. **

MSE: 0.008155369684399744
RMSE: 0.0903070854606644
LogLoss: 0.035902316333919826
Null degrees of freedom: 3354
Residual degrees of freedom: 3278
Null deviance: 336.79627762115575
Residual deviance: 240.904542600602
AIC: 394.904542600602
AUC: 0.9399713853235739
pr_auc: 0.14539292295401007
Gini: 0.8799427706471479
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.03896269929330961: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,3260.0,66.0,0.0198,(66.0/3326.0)
1,14.0,15.0,0.4828,(14.0/29.0)
Total,3274.0,81.0,0.0238,(80.0/3355.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0389627,0.2727273,75.0
max f2,0.0339970,0.4009434,87.0
max f0point5,0.1263260,0.2479339,22.0
max accuracy,0.7754252,0.9910581,0.0
max precision,0.2988839,0.375,7.0
max recall,0.0026143,1.0,342.0
max specificity,0.7754252,0.9996993,0.0
max absolute_mcc,0.0339970,0.3122985,87.0
max min_per_class_accuracy,0.0079943,0.8770295,233.0


Gains/Lift Table: Avg response rate:  0.86 %, avg score:  0.69 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0101341,0.0880276,20.4158215,20.4158215,0.1764706,0.2203926,0.1764706,0.2203926,0.2068966,0.2068966,1941.5821501,1941.5821501
,2,0.0202683,0.0450799,20.4158215,20.4158215,0.1764706,0.0635569,0.1764706,0.1419747,0.2068966,0.4137931,1941.5821501,1941.5821501
,3,0.0301043,0.0322436,17.5287356,19.4725162,0.1515152,0.0377336,0.1683168,0.1079157,0.1724138,0.5862069,1652.8735632,1847.2516217
,4,0.0402385,0.0244100,3.4026369,15.4252874,0.0294118,0.0277883,0.1333333,0.0877355,0.0344828,0.6206897,240.2636917,1442.5287356
,5,0.0500745,0.0197942,3.5057471,13.0839491,0.0303030,0.0220713,0.1130952,0.0748372,0.0344828,0.6551724,250.5747126,1208.3949097
,6,0.1001490,0.0101912,3.4431445,8.2635468,0.0297619,0.0140877,0.0714286,0.0444624,0.1724138,0.8275862,244.3144499,726.3546798
,7,0.1502235,0.0069245,1.3772578,5.9681171,0.0119048,0.0082381,0.0515873,0.0323876,0.0689655,0.8965517,37.7257800,496.8117132
,8,0.2,0.0051857,0.0,4.4827586,0.0,0.0060629,0.0387481,0.0258359,0.0,0.8965517,-100.0,348.2758621
,9,0.3001490,0.0034894,0.6886289,3.2167928,0.0059524,0.0042394,0.0278054,0.0186299,0.0689655,0.9655172,-31.1371100,221.6792795



Cross-Validation Metrics Summary: 


0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid,cv_6_valid,cv_7_valid,cv_8_valid,cv_9_valid,cv_10_valid,cv_11_valid,cv_12_valid,cv_13_valid,cv_14_valid,cv_15_valid
accuracy,0.7650279,0.2733306,0.9502075,0.9834025,0.9709544,0.9126638,0.0,0.9956332,1.0,1.0,0.0,0.9633027,0.9951691,0.9909502,0.9449542,0.7681818,0.0
auc,0.9346334,0.0697358,0.9364407,0.9607843,0.8974895,0.9122807,,0.9956141,1.0,1.0,,0.9751938,0.998366,0.9852713,0.787037,0.7671233,
err,0.0368355,0.0405751,0.0497925,0.0165975,0.0290456,0.0873362,0.0063627,0.0043668,0.0,0.0,0.0096546,0.0366972,0.0048309,0.0090498,0.0550459,0.2318182,0.0119340
err_count,9.833333,12.284589,12.0,4.0,7.0,20.0,,1.0,0.0,0.0,,8.0,1.0,2.0,12.0,51.0,
f0point5,0.3750104,0.2614594,0.2631579,0.4347826,0.1666667,0.0588235,0.0003693,0.5555556,1.0,1.0,0.0010364,0.3191489,0.7894737,0.9090909,0.1,0.0239234,0.0031266
---,---,---,---,---,---,---,---,---,---,---,---,---,---,---,---,---,---
r2,0.0912779,0.1262193,-0.0422169,0.0687497,-0.0387629,0.0045587,,0.2373157,0.3850815,0.590018,,0.0797268,0.1210181,0.1054006,-0.0716693,-0.0700510,
recall,0.6622222,0.2689991,0.6,0.6666667,0.5,1.0,,1.0,1.0,1.0,,1.0,1.0,0.6666667,0.5,1.0,
residual_deviance,15.264286,9.293151,42.259052,21.855213,21.07358,11.210784,,6.2722945,5.8786283,4.1969395,,20.170294,17.732721,41.40201,23.167734,13.745042,



See the whole table with table.as_data_frame()
Scoring History: 


0,1,2,3,4,5
,timestamp,duration,iterations,negative_log_likelihood,objective
,2018-12-13 13:01:18,0.000 sec,0,166.6507273,0.0496723
,2018-12-13 13:01:18,0.024 sec,1,83.1475625,0.0390455
,2018-12-13 13:01:18,0.038 sec,2,66.5475144,0.0313504
,2018-12-13 13:01:18,0.052 sec,3,55.5739135,0.0302832
,2018-12-13 13:01:18,0.076 sec,4,54.2777245,0.0302304
,2018-12-13 13:01:18,0.093 sec,5,54.1553051,0.0302301
,2018-12-13 13:01:18,0.202 sec,6,44.7603173,0.0298362
,2018-12-13 13:01:18,0.225 sec,7,45.6236056,0.0297559
,2018-12-13 13:01:18,0.246 sec,8,45.8975549,0.0297476




In [51]:
manual_hyper_parameters = {'alpha': [0.3], 'lambda': [0.01]}
random_plus_manual = H2OGridSearch(H2OGeneralizedLinearEstimator(family='binomial', nfolds=15,seed=1,balance_classes=True),
                                   manual_hyper_parameters, 
                                   grid_id="random_plus_manual")

random_plus_manual.train(x=X,y=y, training_frame=train)

random_plus_manual.show()
print(random_plus_manual.sort_by('F1', False))

glm Grid Build progress: |████████████████████████████████████████████████| 100%
       alpha    lambda                    model_ids               logloss
0      [0.3]    [0.01]   random_plus_manual_model_9  0.035902316333919826
1      [0.3]    [0.01]  random_plus_manual_model_37  0.035902316333919826
2      [0.5]    [0.01]  random_plus_manual_model_12  0.036597366264307235
3     [0.01]     [0.1]  random_plus_manual_model_24   0.03733352185179091
4      [0.1]    [0.01]  random_plus_manual_model_16   0.03761673364769872
5      [0.7]    [0.01]  random_plus_manual_model_31  0.039856316816505144
6      [0.9]   [0.001]  random_plus_manual_model_34  0.041118949264909846
7      [0.7]   [0.001]  random_plus_manual_model_30   0.04193091786744568
8      [0.9]    [0.01]  random_plus_manual_model_23   0.04248499005949807
9      [0.5]   [0.001]  random_plus_manual_model_29  0.043020133300378856
10    [0.01]    [0.01]  random_plus_manual_model_35  0.043976464703850686
11     [0.3]   [0.001]  random_

0,1,2
Model Id,"Hyperparameters: [alpha, lambda]",F1
random_plus_manual_model_17,"[0.01, 0.0001]","[[0.9940655968188258, 1.0]]"
random_plus_manual_model_15,"[0.5, 0.0001]","[[0.9791742235697483, 1.0]]"
random_plus_manual_model_28,"[0.1, 0.0001]","[[0.9775935609710088, 1.0]]"
random_plus_manual_model_22,"[0.7, 0.0001]","[[0.9744094332702191, 1.0]]"
random_plus_manual_model_7,"[0.3, 0.0001]","[[0.9743038277543911, 1.0]]"
---,---,---
random_plus_manual_model_13,"[0.1, 1.0]","[[0.00864381520119213, 0.017139479905437353]]"
random_plus_manual_model_33,"[0.5, 0.1]","[[0.00864381520119213, 0.017139479905437353]]"
random_plus_manual_model_19,"[0.3, 0.1]","[[0.00864381520119213, 0.017139479905437353]]"



See the whole table with table.as_data_frame()



In [52]:
def plot_perf(grid, test):
    print('best')
    for index, model in enumerate(grid.models):
        print ('model',index)
        print ('id',model.model_id)
        print (model.model_performance(test))

In [53]:
plot_perf(random_plus_manual, test)

best
model 0
id random_plus_manual_model_9

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007015396495615427
RMSE: 0.0837579637742909
LogLoss: 0.03241286433915599
Null degrees of freedom: 1336
Residual degrees of freedom: 1236
Null deviance: 127.54309218673963
Residual deviance: 86.67199924290308
AIC: 288.6719992429031
AUC: 0.9220828191416427
pr_auc: 0.17380488096095625
Gini: 0.8441656382832854
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.10693209171037364: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1317.0,9.0,0.0068,(9.0/1326.0)
1,5.0,6.0,0.4545,(5.0/11.0)
Total,1322.0,15.0,0.0105,(14.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.1069321,0.4615385,14.0
max f2,0.1069321,0.5084746,14.0
max f0point5,0.1069321,0.4225352,14.0
max accuracy,0.6420616,0.9925206,0.0
max precision,0.6420616,1.0,0.0
max recall,0.0030679,1.0,274.0
max specificity,0.6420616,1.0,0.0
max absolute_mcc,0.1069321,0.4619844,14.0
max min_per_class_accuracy,0.0053390,0.8181818,183.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.65 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.1076384,43.4090909,43.4090909,0.3571429,0.2108979,0.3571429,0.2108979,0.4545455,0.4545455,4240.9090909,4240.9090909
,2,0.0201945,0.0520570,9.3496503,27.0101010,0.0769231,0.0709875,0.2222222,0.1435336,0.0909091,0.5454545,834.9650350,2601.0101010
,3,0.0306657,0.0311936,8.6818182,20.7516630,0.0714286,0.0394266,0.1707317,0.1079849,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0214426,0.0,15.7558923,0.0,0.0264060,0.1296296,0.0883455,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0171402,0.0,12.6987788,0.0,0.0186514,0.1044776,0.0748228,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0085071,0.0,6.3493894,0.0,0.0125176,0.0522388,0.0436702,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0058965,1.8141113,4.8376300,0.0149254,0.0071220,0.0398010,0.0314875,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0045040,1.8141113,4.0817503,0.0149254,0.0051754,0.0335821,0.0249094,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0031198,0.9138756,3.0310587,0.0075188,0.0037048,0.0249377,0.0178765,0.0909091,0.9090909,-8.6124402,203.1058717




model 1
id random_plus_manual_model_37

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007015396495615427
RMSE: 0.0837579637742909
LogLoss: 0.03241286433915599
Null degrees of freedom: 1336
Residual degrees of freedom: 1236
Null deviance: 127.54309218673963
Residual deviance: 86.67199924290308
AIC: 288.6719992429031
AUC: 0.9220828191416427
pr_auc: 0.17380488096095625
Gini: 0.8441656382832854
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.10693209171037364: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1317.0,9.0,0.0068,(9.0/1326.0)
1,5.0,6.0,0.4545,(5.0/11.0)
Total,1322.0,15.0,0.0105,(14.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.1069321,0.4615385,14.0
max f2,0.1069321,0.5084746,14.0
max f0point5,0.1069321,0.4225352,14.0
max accuracy,0.6420616,0.9925206,0.0
max precision,0.6420616,1.0,0.0
max recall,0.0030679,1.0,274.0
max specificity,0.6420616,1.0,0.0
max absolute_mcc,0.1069321,0.4619844,14.0
max min_per_class_accuracy,0.0053390,0.8181818,183.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.65 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.1076384,43.4090909,43.4090909,0.3571429,0.2108979,0.3571429,0.2108979,0.4545455,0.4545455,4240.9090909,4240.9090909
,2,0.0201945,0.0520570,9.3496503,27.0101010,0.0769231,0.0709875,0.2222222,0.1435336,0.0909091,0.5454545,834.9650350,2601.0101010
,3,0.0306657,0.0311936,8.6818182,20.7516630,0.0714286,0.0394266,0.1707317,0.1079849,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0214426,0.0,15.7558923,0.0,0.0264060,0.1296296,0.0883455,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0171402,0.0,12.6987788,0.0,0.0186514,0.1044776,0.0748228,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0085071,0.0,6.3493894,0.0,0.0125176,0.0522388,0.0436702,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0058965,1.8141113,4.8376300,0.0149254,0.0071220,0.0398010,0.0314875,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0045040,1.8141113,4.0817503,0.0149254,0.0051754,0.0335821,0.0249094,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0031198,0.9138756,3.0310587,0.0075188,0.0037048,0.0249377,0.0178765,0.0909091,0.9090909,-8.6124402,203.1058717




model 2
id random_plus_manual_model_12

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007182742442499503
RMSE: 0.08475106160101774
LogLoss: 0.03344284259613828
Null degrees of freedom: 1336
Residual degrees of freedom: 1296
Null deviance: 127.54309218673963
Residual deviance: 89.42616110207382
AIC: 171.42616110207382
AUC: 0.9264020293432059
pr_auc: 0.1769506764985306
Gini: 0.8528040586864118
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.08120202246185766: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1315.0,11.0,0.0083,(11.0/1326.0)
1,5.0,6.0,0.4545,(5.0/11.0)
Total,1320.0,17.0,0.012,(16.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0812020,0.4285714,16.0
max f2,0.0812020,0.4918033,16.0
max f0point5,0.1581486,0.3846154,6.0
max accuracy,0.5039117,0.9925206,0.0
max precision,0.5039117,1.0,0.0
max recall,0.0048743,1.0,264.0
max specificity,0.5039117,1.0,0.0
max absolute_mcc,0.0812020,0.4330714,16.0
max min_per_class_accuracy,0.0063432,0.8181818,204.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.78 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0941159,34.7272727,34.7272727,0.2857143,0.2137217,0.2857143,0.2137217,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0584045,18.6993007,27.0101010,0.1538462,0.0729000,0.2222222,0.1459186,0.1818182,0.5454545,1769.9300699,2601.0101010
,3,0.0306657,0.0331409,0.0,17.7871397,0.0,0.0448488,0.1463415,0.1114070,0.0,0.5454545,-100.0,1678.7139690
,4,0.0403889,0.0243892,9.3496503,15.7558923,0.0769231,0.0272585,0.1296296,0.0911490,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0201468,0.0,12.6987788,0.0,0.0217768,0.1044776,0.0776887,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0098075,0.0,6.3493894,0.0,0.0140357,0.0522388,0.0458622,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0071156,1.8141113,4.8376300,0.0149254,0.0082356,0.0398010,0.0333200,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0056324,1.8141113,4.0817503,0.0149254,0.0063536,0.0335821,0.0265784,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0042466,1.8277512,3.3341646,0.0150376,0.0048962,0.0274314,0.0193870,0.1818182,1.0,82.7751196,233.4164589




model 3
id random_plus_manual_model_24

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007249904389125243
RMSE: 0.08514637038139232
LogLoss: 0.03371750264619653
Null degrees of freedom: 1336
Residual degrees of freedom: 310
Null deviance: 127.54309218673963
Residual deviance: 90.16060207592953
AIC: 2144.1606020759295
AUC: 0.9353832442067735
pr_auc: 0.1972772783704423
Gini: 0.8707664884135471
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.03623072469760192: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1316.0,10.0,0.0075,(10.0/1326.0)
1,5.0,6.0,0.4545,(5.0/11.0)
Total,1321.0,16.0,0.0112,(15.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0362307,0.4444444,14.0
max f2,0.0362307,0.5,14.0
max f0point5,0.0520718,0.4651163,7.0
max accuracy,0.5511684,0.9925206,0.0
max precision,0.5511684,1.0,0.0
max recall,0.0047890,1.0,224.0
max specificity,0.5511684,1.0,0.0
max absolute_mcc,0.0362307,0.4468572,14.0
max min_per_class_accuracy,0.0101148,0.8181818,121.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.51 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0424160,34.7272727,34.7272727,0.2857143,0.0912363,0.2857143,0.0912363,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0247135,18.6993007,27.0101010,0.1538462,0.0299752,0.2222222,0.0617402,0.1818182,0.5454545,1769.9300699,2601.0101010
,3,0.0306657,0.0208595,8.6818182,20.7516630,0.0714286,0.0226224,0.1707317,0.0483829,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0179334,0.0,15.7558923,0.0,0.0191493,0.1296296,0.0413452,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0160238,0.0,12.6987788,0.0,0.0169461,0.1044776,0.0366110,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0110671,0.0,6.3493894,0.0,0.0131435,0.0522388,0.0248773,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0077418,3.6282225,5.4423338,0.0298507,0.0090607,0.0447761,0.0196051,0.1818182,0.8181818,262.8222524,444.2333786
,8,0.2004488,0.0060021,0.0,4.0817503,0.0,0.0068451,0.0335821,0.0164151,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0043198,1.8277512,3.3341646,0.0150376,0.0050930,0.0274314,0.0126599,0.1818182,1.0,82.7751196,233.4164589




model 4
id random_plus_manual_model_16

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.0071041649500102454
RMSE: 0.08428620853977384
LogLoss: 0.03352842883536345
Null degrees of freedom: 1336
Residual degrees of freedom: 1022
Null deviance: 127.54309218673963
Residual deviance: 89.65501870576188
AIC: 719.6550187057619
AUC: 0.9256821609762786
pr_auc: 0.18465884660932716
Gini: 0.8513643219525573
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.03423814374546152: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1312.0,14.0,0.0106,(14.0/1326.0)
1,4.0,7.0,0.3636,(4.0/11.0)
Total,1316.0,21.0,0.0135,(18.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0342381,0.4375000,20.0
max f2,0.0342381,0.5384615,20.0
max f0point5,0.0700162,0.3921569,9.0
max accuracy,0.7883189,0.9925206,0.0
max precision,0.7883189,1.0,0.0
max recall,0.0012002,1.0,283.0
max specificity,0.7883189,1.0,0.0
max absolute_mcc,0.0342381,0.4546424,20.0
max min_per_class_accuracy,0.0046712,0.8181818,134.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.38 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0546161,34.7272727,34.7272727,0.2857143,0.1568074,0.2857143,0.1568074,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0277954,28.0489510,31.5117845,0.2307692,0.0374077,0.2592593,0.0993187,0.2727273,0.6363636,2704.8951049,3051.1784512
,3,0.0306657,0.0176183,0.0,20.7516630,0.0,0.0226444,0.1707317,0.0731372,0.0,0.6363636,-100.0,1975.1662971
,4,0.0403889,0.0141917,0.0,15.7558923,0.0,0.0161865,0.1296296,0.0594269,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0113366,0.0,12.6987788,0.0,0.0126138,0.1044776,0.0503437,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0055542,1.8141113,7.2564450,0.0149254,0.0076563,0.0597015,0.0290000,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0034013,1.8141113,5.4423338,0.0149254,0.0042739,0.0447761,0.0207580,0.0909091,0.8181818,81.4111262,444.2333786
,8,0.2004488,0.0024242,0.0,4.0817503,0.0,0.0028134,0.0335821,0.0162718,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0013531,0.9138756,3.0310587,0.0075188,0.0018206,0.0249377,0.0114788,0.0909091,0.9090909,-8.6124402,203.1058717




model 5
id random_plus_manual_model_31

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007340494194087787
RMSE: 0.0856766840750025
LogLoss: 0.03476446896922084
Null degrees of freedom: 1336
Residual degrees of freedom: 1314
Null deviance: 127.54309218673963
Residual deviance: 92.96019002369653
AIC: 138.96019002369653
AUC: 0.9166666666666666
pr_auc: 0.21650556171391794
Gini: 0.8333333333333333
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.08337913359587781: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1318.0,8.0,0.006,(8.0/1326.0)
1,5.0,6.0,0.4545,(5.0/11.0)
Total,1323.0,14.0,0.0097,(13.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0833791,0.4800000,13.0
max f2,0.0833791,0.5172414,13.0
max f0point5,0.1115796,0.4651163,7.0
max accuracy,0.2976804,0.9917726,1.0
max precision,0.2976804,0.5,1.0
max recall,0.0042792,1.0,369.0
max specificity,0.3737257,0.9992459,0.0
max absolute_mcc,0.0833791,0.4786877,13.0
max min_per_class_accuracy,0.0076578,0.8181818,202.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.80 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0767598,52.0909091,52.0909091,0.4285714,0.1530144,0.4285714,0.1530144,0.5454545,0.5454545,5109.0909091,5109.0909091
,2,0.0201945,0.0511156,0.0,27.0101010,0.0,0.0576011,0.2222222,0.1070747,0.0,0.5454545,-100.0,2601.0101010
,3,0.0306657,0.0345138,0.0,17.7871397,0.0,0.0405829,0.1463415,0.0843701,0.0,0.5454545,-100.0,1678.7139690
,4,0.0403889,0.0235110,0.0,13.5050505,0.0,0.0284225,0.1111111,0.0709013,0.0,0.5454545,-100.0,1250.5050505
,5,0.0501122,0.0188456,9.3496503,12.6987788,0.0769231,0.0207707,0.1044776,0.0611744,0.0909091,0.6363636,834.9650350,1169.8778833
,6,0.1002244,0.0113058,0.0,6.3493894,0.0,0.0144968,0.0522388,0.0378356,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0081122,1.8141113,4.8376300,0.0149254,0.0094132,0.0398010,0.0283615,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0065919,1.8141113,4.0817503,0.0149254,0.0073327,0.0335821,0.0231043,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0048440,0.9138756,3.0310587,0.0075188,0.0055909,0.0249377,0.0172956,0.0909091,0.9090909,-8.6124402,203.1058717




model 6
id random_plus_manual_model_34

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007013047279360251
RMSE: 0.08374393876192027
LogLoss: 0.036378326285873466
Null degrees of freedom: 1336
Residual degrees of freedom: 1238
Null deviance: 127.54309218673963
Residual deviance: 97.27564448842574
AIC: 295.27564448842577
AUC: 0.8978129713423831
pr_auc: 0.20770999213195426
Gini: 0.7956259426847663
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.16300815275042105: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1322.0,4.0,0.003,(4.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1329.0,8.0,0.0082,(11.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.1630082,0.4210526,7.0
max f2,0.0615417,0.4615385,20.0
max f0point5,0.1677774,0.4838710,4.0
max accuracy,0.8871542,0.9925206,0.0
max precision,0.8871542,1.0,0.0
max recall,0.0002152,1.0,346.0
max specificity,0.8871542,1.0,0.0
max absolute_mcc,0.1630082,0.4223866,7.0
max min_per_class_accuracy,0.0006979,0.7684766,248.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.42 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.1062826,34.7272727,34.7272727,0.2857143,0.2381705,0.2857143,0.2381705,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0403414,18.6993007,27.0101010,0.1538462,0.0644882,0.2222222,0.1545457,0.1818182,0.5454545,1769.9300699,2601.0101010
,3,0.0306657,0.0188054,0.0,17.7871397,0.0,0.0300293,0.1463415,0.1120279,0.0,0.5454545,-100.0,1678.7139690
,4,0.0403889,0.0112833,9.3496503,15.7558923,0.0769231,0.0146104,0.1296296,0.0885755,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0075811,0.0,12.6987788,0.0,0.0093021,0.1044776,0.0731941,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0024696,0.0,6.3493894,0.0,0.0043787,0.0522388,0.0387864,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0013961,1.8141113,4.8376300,0.0149254,0.0018960,0.0398010,0.0264896,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0009024,0.0,3.6282225,0.0,0.0010890,0.0298507,0.0201394,0.0,0.7272727,-100.0,262.8222524
,9,0.2999252,0.0004387,0.9138756,2.7279528,0.0075188,0.0006513,0.0224439,0.0136758,0.0909091,0.8181818,-8.6124402,172.7952845




model 7
id random_plus_manual_model_30

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007144738394376583
RMSE: 0.08452655437421179
LogLoss: 0.03704531462459164
Null degrees of freedom: 1336
Residual degrees of freedom: 1210
Null deviance: 127.54309218673963
Residual deviance: 99.05917130615758
AIC: 353.05917130615757
AUC: 0.9044631838749485
pr_auc: 0.17200371838851
Gini: 0.808926367749897
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.10631738785358237: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1320.0,6.0,0.0045,(6.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1327.0,10.0,0.0097,(13.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.1063174,0.3809524,9.0
max f2,0.0516650,0.4545455,21.0
max f0point5,0.1063174,0.3921569,9.0
max accuracy,0.9000966,0.9925206,0.0
max precision,0.9000966,1.0,0.0
max recall,0.0001568,1.0,345.0
max specificity,0.9000966,1.0,0.0
max absolute_mcc,0.0516650,0.3787366,21.0
max min_per_class_accuracy,0.0006362,0.7828054,238.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.37 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0773610,34.7272727,34.7272727,0.2857143,0.2163401,0.2857143,0.2163401,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0317100,18.6993007,27.0101010,0.1538462,0.0544943,0.2222222,0.1384143,0.1818182,0.5454545,1769.9300699,2601.0101010
,3,0.0306657,0.0188609,8.6818182,20.7516630,0.0714286,0.0255052,0.1707317,0.0998600,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0100715,0.0,15.7558923,0.0,0.0133103,0.1296296,0.0790239,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0064403,0.0,12.6987788,0.0,0.0080741,0.1044776,0.0652576,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0022002,0.0,6.3493894,0.0,0.0037086,0.0522388,0.0344831,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0011742,1.8141113,4.8376300,0.0149254,0.0016054,0.0398010,0.0235239,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0007313,0.0,3.6282225,0.0,0.0009160,0.0298507,0.0178719,0.0,0.7272727,-100.0,262.8222524
,9,0.2999252,0.0003469,1.8277512,3.0310587,0.0150376,0.0005220,0.0249377,0.0121175,0.1818182,0.9090909,82.7751196,203.1058717




model 8
id random_plus_manual_model_23

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007566034757405741
RMSE: 0.0869829567065051
LogLoss: 0.0364726711215278
Null degrees of freedom: 1336
Residual degrees of freedom: 1322
Null deviance: 127.54309218673963
Residual deviance: 97.52792257896517
AIC: 127.52792257896517
AUC: 0.8893802276155217
pr_auc: 0.1988804717510593
Gini: 0.7787604552310434
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.08653086027061417: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1322.0,4.0,0.003,(4.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1329.0,8.0,0.0082,(11.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0865309,0.4210526,7.0
max f2,0.0274887,0.4545455,32.0
max f0point5,0.0865309,0.4651163,7.0
max accuracy,0.0865309,0.9917726,7.0
max precision,0.0865309,0.5,7.0
max recall,0.0052216,1.0,381.0
max specificity,0.2660262,0.9992459,0.0
max absolute_mcc,0.0865309,0.4223866,7.0
max min_per_class_accuracy,0.0071825,0.8181818,243.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.81 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0538443,43.4090909,43.4090909,0.3571429,0.1114201,0.3571429,0.1114201,0.4545455,0.4545455,4240.9090909,4240.9090909
,2,0.0201945,0.0361550,9.3496503,27.0101010,0.0769231,0.0474056,0.2222222,0.0805983,0.0909091,0.5454545,834.9650350,2601.0101010
,3,0.0306657,0.0252250,8.6818182,20.7516630,0.0714286,0.0281622,0.1707317,0.0626933,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0208679,0.0,15.7558923,0.0,0.0231243,0.1296296,0.0531674,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0174027,0.0,12.6987788,0.0,0.0192409,0.1044776,0.0465847,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0104973,0.0,6.3493894,0.0,0.0131102,0.0522388,0.0298474,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0080788,1.8141113,4.8376300,0.0149254,0.0091869,0.0398010,0.0229606,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0067125,1.8141113,4.0817503,0.0149254,0.0073621,0.0335821,0.0190610,0.0909091,0.8181818,81.4111262,308.1750339
,9,1.0,0.0052216,0.2274003,1.0,0.0018709,0.0052998,0.0082274,0.0080582,0.1818182,1.0,-77.2599711,0.0




model 9
id random_plus_manual_model_29

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007250494311334698
RMSE: 0.0851498344762613
LogLoss: 0.038359610408333085
Null degrees of freedom: 1336
Residual degrees of freedom: 1182
Null deviance: 127.54309218673963
Residual deviance: 102.57359823188222
AIC: 412.5735982318822
AUC: 0.9053544494720965
pr_auc: 0.1815672549022817
Gini: 0.8107088989441931
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.06310103722289909: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1317.0,9.0,0.0068,(9.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1323.0,14.0,0.0112,(15.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0631010,0.4,13.0
max f2,0.0216709,0.4545455,32.0
max f0point5,0.0631010,0.3731343,13.0
max accuracy,0.9170103,0.9925206,0.0
max precision,0.9170103,1.0,0.0
max recall,0.0000895,1.0,357.0
max specificity,0.9170103,1.0,0.0
max absolute_mcc,0.0631010,0.3973449,13.0
max min_per_class_accuracy,0.0006526,0.8181818,218.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.32 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0628860,43.4090909,43.4090909,0.3571429,0.2035341,0.3571429,0.2035341,0.4545455,0.4545455,4240.9090909,4240.9090909
,2,0.0201945,0.0266077,9.3496503,27.0101010,0.0769231,0.0422026,0.2222222,0.1258560,0.0909091,0.5454545,834.9650350,2601.0101010
,3,0.0306657,0.0159963,8.6818182,20.7516630,0.0714286,0.0202651,0.1707317,0.0898006,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0076297,0.0,15.7558923,0.0,0.0102871,0.1296296,0.0706584,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0051385,0.0,12.6987788,0.0,0.0062904,0.1044776,0.0581691,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0016911,0.0,6.3493894,0.0,0.0029244,0.0522388,0.0305468,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0009339,1.8141113,4.8376300,0.0149254,0.0012353,0.0398010,0.0207763,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0005236,1.8141113,4.0817503,0.0149254,0.0007024,0.0335821,0.0157578,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0002527,0.9138756,3.0310587,0.0075188,0.0003680,0.0249377,0.0106535,0.0909091,0.9090909,-8.6124402,203.1058717




model 10
id random_plus_manual_model_35

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007328299979756485
RMSE: 0.08560549035988571
LogLoss: 0.04039460271600919
Null degrees of freedom: 1336
Residual degrees of freedom: -1145
Null deviance: 127.54309218673963
Residual deviance: 108.01516766260872
AIC: 5072.015167662608
AUC: 0.9279446044151927
pr_auc: 0.2046206926211877
Gini: 0.8558892088303853
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.025656021373149082: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1323.0,3.0,0.0023,(3.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1330.0,7.0,0.0075,(10.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0256560,0.4444444,6.0
max f2,0.0109462,0.4545455,21.0
max f0point5,0.0256560,0.5128205,6.0
max accuracy,0.8863768,0.9925206,0.0
max precision,0.8863768,1.0,0.0
max recall,0.0003894,1.0,273.0
max specificity,0.8863768,1.0,0.0
max absolute_mcc,0.0256560,0.4523245,6.0
max min_per_class_accuracy,0.0012204,0.8181818,162.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.16 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0134254,43.4090909,43.4090909,0.3571429,0.0858677,0.3571429,0.0858677,0.4545455,0.4545455,4240.9090909,4240.9090909
,2,0.0201945,0.0087832,9.3496503,27.0101010,0.0769231,0.0111573,0.2222222,0.0498960,0.0909091,0.5454545,834.9650350,2601.0101010
,3,0.0306657,0.0053021,8.6818182,20.7516630,0.0714286,0.0071865,0.1707317,0.0353123,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0041564,0.0,15.7558923,0.0,0.0045899,0.1296296,0.0279161,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0035347,0.0,12.6987788,0.0,0.0038629,0.1044776,0.0232491,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0019299,1.8141113,7.2564450,0.0149254,0.0025487,0.0597015,0.0128989,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0011798,1.8141113,5.4423338,0.0149254,0.0014880,0.0447761,0.0090953,0.0909091,0.8181818,81.4111262,444.2333786
,8,0.2004488,0.0007875,0.0,4.0817503,0.0,0.0009717,0.0335821,0.0070644,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0004015,0.9138756,3.0310587,0.0075188,0.0005764,0.0249377,0.0049125,0.0909091,0.9090909,-8.6124402,203.1058717




model 11
id random_plus_manual_model_21

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.0074586016603852626
RMSE: 0.086363196214506
LogLoss: 0.0405414419150284
Null degrees of freedom: 1336
Residual degrees of freedom: 1112
Null deviance: 127.54309218673963
Residual deviance: 108.40781568078444
AIC: 558.4078156807844
AUC: 0.910427807486631
pr_auc: 0.1723562306961594
Gini: 0.820855614973262
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.04087757819199777: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1316.0,10.0,0.0075,(10.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1322.0,15.0,0.012,(16.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0408776,0.3846154,14.0
max f2,0.0144302,0.4545455,32.0
max f0point5,0.1333413,0.3703704,3.0
max accuracy,0.9393224,0.9925206,0.0
max precision,0.9393224,1.0,0.0
max recall,0.0000612,1.0,350.0
max specificity,0.9393224,1.0,0.0
max absolute_mcc,0.0408776,0.3833700,14.0
max min_per_class_accuracy,0.0008461,0.8181818,162.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.27 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0410255,34.7272727,34.7272727,0.2857143,0.1877757,0.2857143,0.1877757,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0153721,18.6993007,27.0101010,0.1538462,0.0266127,0.2222222,0.1101787,0.1818182,0.5454545,1769.9300699,2601.0101010
,3,0.0306657,0.0101969,8.6818182,20.7516630,0.0714286,0.0128087,0.1707317,0.0769304,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0055027,0.0,15.7558923,0.0,0.0069822,0.1296296,0.0600910,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0037987,0.0,12.6987788,0.0,0.0045244,0.1044776,0.0493094,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0012202,1.8141113,7.2564450,0.0149254,0.0021269,0.0597015,0.0257182,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0006421,1.8141113,5.4423338,0.0149254,0.0008901,0.0447761,0.0174421,0.0909091,0.8181818,81.4111262,444.2333786
,8,0.2004488,0.0003776,0.0,4.0817503,0.0,0.0005039,0.0335821,0.0132076,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0001602,0.9138756,3.0310587,0.0075188,0.0002508,0.0249377,0.0089102,0.0909091,0.9090909,-8.6124402,203.1058717




model 12
id random_plus_manual_model_5

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007302567268300604
RMSE: 0.08545505993386585
LogLoss: 0.04237303641667682
Null degrees of freedom: 1336
Residual degrees of freedom: 487
Null deviance: 127.54309218673963
Residual deviance: 113.30549937819528
AIC: 1813.3054993781952
AUC: 0.9249622926093515
pr_auc: 0.15197927456023563
Gini: 0.849924585218703
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.021414360935569116: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1315.0,11.0,0.0083,(11.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1321.0,16.0,0.0127,(17.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0214144,0.3703704,15.0
max f2,0.0060286,0.4320988,36.0
max f0point5,0.9747128,0.3333333,0.0
max accuracy,0.9747128,0.9925206,0.0
max precision,0.9747128,1.0,0.0
max recall,0.0001066,1.0,291.0
max specificity,0.9747128,1.0,0.0
max absolute_mcc,0.0214144,0.3707104,15.0
max min_per_class_accuracy,0.0005452,0.8181818,175.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.18 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0266080,34.7272727,34.7272727,0.2857143,0.1319188,0.2857143,0.1319188,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0089843,9.3496503,22.5084175,0.0769231,0.0143847,0.1851852,0.0753283,0.0909091,0.4545455,834.9650350,2150.8417508
,3,0.0306657,0.0048578,17.3636364,20.7516630,0.1428571,0.0063977,0.1707317,0.0517910,0.1818182,0.6363636,1636.3636364,1975.1662971
,4,0.0403889,0.0032690,0.0,15.7558923,0.0,0.0040467,0.1296296,0.0402970,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0027677,0.0,12.6987788,0.0,0.0029571,0.1044776,0.0330519,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0008947,1.8141113,7.2564450,0.0149254,0.0016300,0.0597015,0.0173410,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0005469,0.0,4.8376300,0.0,0.0006903,0.0398010,0.0117908,0.0,0.7272727,-100.0,383.7630032
,8,0.2004488,0.0002874,1.8141113,4.0817503,0.0149254,0.0003974,0.0335821,0.0089424,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0001164,0.9138756,3.0310587,0.0075188,0.0001873,0.0249377,0.0060386,0.0909091,0.9090909,-8.6124402,203.1058717




model 13
id random_plus_manual_model_2

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007995817551964674
RMSE: 0.08941933544801524
LogLoss: 0.041845755650583925
Null degrees of freedom: 1336
Residual degrees of freedom: 1313
Null deviance: 127.54309218673963
Residual deviance: 111.8955506096614
AIC: 159.8955506096614
AUC: 0.933429315782257
pr_auc: 0.13531281307696955
Gini: 0.8668586315645139
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.024634817163452845: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1313.0,13.0,0.0098,(13.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1319.0,18.0,0.0142,(19.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0246348,0.3448276,17.0
max f2,0.0204352,0.4109589,28.0
max f0point5,0.0246348,0.3012048,17.0
max accuracy,0.0466611,0.9910247,0.0
max precision,0.0246348,0.2777778,17.0
max recall,0.0075765,1.0,345.0
max specificity,0.0466611,0.9992459,0.0
max absolute_mcc,0.0246348,0.3485918,17.0
max min_per_class_accuracy,0.0093884,0.8831071,154.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.85 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0259919,26.0454545,26.0454545,0.2142857,0.0318168,0.2142857,0.0318168,0.2727273,0.2727273,2504.5454545,2504.5454545
,2,0.0201945,0.0206551,18.6993007,22.5084175,0.1538462,0.0236712,0.1851852,0.0278949,0.1818182,0.4545455,1769.9300699,2150.8417508
,3,0.0306657,0.0164006,8.6818182,17.7871397,0.0714286,0.0181088,0.1463415,0.0245533,0.0909091,0.5454545,768.1818182,1678.7139690
,4,0.0403889,0.0141115,9.3496503,15.7558923,0.0769231,0.0150832,0.1296296,0.0222734,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0123945,0.0,12.6987788,0.0,0.0132319,0.1044776,0.0205191,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0100658,1.8141113,7.2564450,0.0149254,0.0110358,0.0597015,0.0157775,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0090056,3.6282225,6.0470375,0.0298507,0.0094138,0.0497512,0.0136562,0.1818182,0.9090909,262.8222524,504.7037540
,8,0.2004488,0.0085758,0.0,4.5352782,0.0,0.0087850,0.0373134,0.0124384,0.0,0.9090909,-100.0,353.5278155
,9,0.2999252,0.0079398,0.0,3.0310587,0.0,0.0082454,0.0249377,0.0110477,0.0,0.9090909,-100.0,203.1058717




model 14
id random_plus_manual_model_26

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007546541396379847
RMSE: 0.08687083167772625
LogLoss: 0.04275674430157778
Null degrees of freedom: 1336
Residual degrees of freedom: 719
Null deviance: 127.54309218673963
Residual deviance: 114.33153426241972
AIC: 1350.3315342624196
AUC: 0.9224256136020843
pr_auc: 0.1356939264352401
Gini: 0.8448512272041686
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.023733977630843024: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1313.0,13.0,0.0098,(13.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1319.0,18.0,0.0142,(19.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0237340,0.3448276,17.0
max f2,0.0070512,0.4268293,37.0
max f0point5,0.9895437,0.3333333,0.0
max accuracy,0.9895437,0.9925206,0.0
max precision,0.9895437,1.0,0.0
max recall,0.0000693,1.0,314.0
max specificity,0.9895437,1.0,0.0
max absolute_mcc,0.0237340,0.3485918,17.0
max min_per_class_accuracy,0.0005994,0.8181818,155.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.26 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0392001,34.7272727,34.7272727,0.2857143,0.2004360,0.2857143,0.2004360,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0124707,18.6993007,27.0101010,0.1538462,0.0199546,0.2222222,0.1135376,0.1818182,0.5454545,1769.9300699,2601.0101010
,3,0.0306657,0.0061859,8.6818182,20.7516630,0.0714286,0.0081751,0.1707317,0.0775601,0.0909091,0.6363636,768.1818182,1975.1662971
,4,0.0403889,0.0039119,0.0,15.7558923,0.0,0.0050150,0.1296296,0.0600956,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0028571,0.0,12.6987788,0.0,0.0034826,0.1044776,0.0491110,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0008788,1.8141113,7.2564450,0.0149254,0.0016749,0.0597015,0.0253929,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0004101,1.8141113,5.4423338,0.0149254,0.0005950,0.0447761,0.0171269,0.0909091,0.8181818,81.4111262,444.2333786
,8,0.2004488,0.0002473,0.0,4.0817503,0.0,0.0003254,0.0335821,0.0129266,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0000861,0.9138756,3.0310587,0.0075188,0.0001509,0.0249377,0.0086892,0.0909091,0.9090909,-8.6124402,203.1058717




model 15
id random_plus_manual_model_22

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007469496797264417
RMSE: 0.08642625062597831
LogLoss: 0.043968175590752764
Null degrees of freedom: 1336
Residual degrees of freedom: 571
Null deviance: 127.54309218673963
Residual deviance: 117.5709015296745
AIC: 1649.5709015296745
AUC: 0.921911421911422
pr_auc: 0.1353905793628141
Gini: 0.8438228438228439
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.02078511834528239: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1315.0,11.0,0.0083,(11.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1321.0,16.0,0.0127,(17.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0207851,0.3703704,15.0
max f2,0.0207851,0.4166667,15.0
max f0point5,0.9917095,0.3333333,0.0
max accuracy,0.9917095,0.9925206,0.0
max precision,0.9917095,1.0,0.0
max recall,0.0000594,1.0,311.0
max specificity,0.9917095,1.0,0.0
max absolute_mcc,0.0207851,0.3707104,15.0
max min_per_class_accuracy,0.0004279,0.8181818,169.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.23 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0318053,34.7272727,34.7272727,0.2857143,0.1795551,0.2857143,0.1795551,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0095266,9.3496503,22.5084175,0.0769231,0.0167629,0.1851852,0.1011737,0.0909091,0.4545455,834.9650350,2150.8417508
,3,0.0306657,0.0051993,8.6818182,17.7871397,0.0714286,0.0066255,0.1463415,0.0688890,0.0909091,0.5454545,768.1818182,1678.7139690
,4,0.0403889,0.0033295,9.3496503,15.7558923,0.0769231,0.0041819,0.1296296,0.0533113,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0025460,0.0,12.6987788,0.0,0.0028595,0.1044776,0.0435222,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0007035,1.8141113,7.2564450,0.0149254,0.0013883,0.0597015,0.0224553,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0003786,1.8141113,5.4423338,0.0149254,0.0005056,0.0447761,0.0151387,0.0909091,0.8181818,81.4111262,444.2333786
,8,0.2004488,0.0002132,0.0,4.0817503,0.0,0.0002801,0.0335821,0.0114241,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0000717,0.9138756,3.0310587,0.0075188,0.0001251,0.0249377,0.0076765,0.0909091,0.9090909,-8.6124402,203.1058717




model 16
id random_plus_manual_model_15

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007430764303093029
RMSE: 0.08620188108790335
LogLoss: 0.045053653389467416
Null degrees of freedom: 1336
Residual degrees of freedom: 327
Null deviance: 127.54309218673963
Residual deviance: 120.47346916343699
AIC: 2140.473469163437
AUC: 0.9140957082133552
pr_auc: 0.12801257844597164
Gini: 0.8281914164267103
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.01676713196077798: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1315.0,11.0,0.0083,(11.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1321.0,16.0,0.0127,(17.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0167671,0.3703704,15.0
max f2,0.0167671,0.4166667,15.0
max f0point5,0.9900319,0.3333333,0.0
max accuracy,0.9900319,0.9925206,0.0
max precision,0.9900319,1.0,0.0
max recall,0.0000429,1.0,319.0
max specificity,0.9900319,1.0,0.0
max absolute_mcc,0.0167671,0.3707104,15.0
max min_per_class_accuracy,0.0003906,0.8181818,167.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.20 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0248330,34.7272727,34.7272727,0.2857143,0.1560023,0.2857143,0.1560023,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0078066,9.3496503,22.5084175,0.0769231,0.0131204,0.1851852,0.0872073,0.0909091,0.4545455,834.9650350,2150.8417508
,3,0.0306657,0.0053284,8.6818182,17.7871397,0.0714286,0.0064154,0.1463415,0.0596199,0.0909091,0.5454545,768.1818182,1678.7139690
,4,0.0403889,0.0030414,9.3496503,15.7558923,0.0769231,0.0041675,0.1296296,0.0462702,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0021638,0.0,12.6987788,0.0,0.0025664,0.1044776,0.0377904,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0006793,1.8141113,7.2564450,0.0149254,0.0012871,0.0597015,0.0195387,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0003673,1.8141113,5.4423338,0.0149254,0.0004709,0.0447761,0.0131828,0.0909091,0.8181818,81.4111262,444.2333786
,8,0.2004488,0.0002300,0.0,4.0817503,0.0,0.0002905,0.0335821,0.0099597,0.0,0.8181818,-100.0,308.1750339
,9,0.2999252,0.0000761,0.9138756,3.0310587,0.0075188,0.0001312,0.0249377,0.0066999,0.0909091,0.9090909,-8.6124402,203.1058717




model 17
id random_plus_manual_model_6

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.00813962099949077
RMSE: 0.09021984814601923
LogLoss: 0.04661304652320896
Null degrees of freedom: 1336
Residual degrees of freedom: 1300
Null deviance: 127.54309218673963
Residual deviance: 124.64328640306073
AIC: 198.64328640306073
AUC: 0.9304127245303716
pr_auc: 0.11706912414432052
Gini: 0.8608254490607432
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.01033450053917526: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1307.0,19.0,0.0143,(19.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1313.0,24.0,0.0187,(25.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0103345,0.2857143,23.0
max f2,0.0100112,0.3896104,31.0
max f0point5,0.0108042,0.2380952,12.0
max accuracy,0.0118809,0.9910247,0.0
max precision,0.0108042,0.2307692,12.0
max recall,0.0085037,1.0,338.0
max specificity,0.0118809,0.9992459,0.0
max absolute_mcc,0.0100112,0.3057090,31.0
max min_per_class_accuracy,0.0088210,0.8763198,149.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0107437,26.0454545,26.0454545,0.2142857,0.0112023,0.2142857,0.0112023,0.2727273,0.2727273,2504.5454545,2504.5454545
,2,0.0201945,0.0101658,18.6993007,22.5084175,0.1538462,0.0104515,0.1851852,0.0108408,0.1818182,0.4545455,1769.9300699,2150.8417508
,3,0.0306657,0.0098396,8.6818182,17.7871397,0.0714286,0.0099963,0.1463415,0.0105524,0.0909091,0.5454545,768.1818182,1678.7139690
,4,0.0403889,0.0095457,9.3496503,15.7558923,0.0769231,0.0096661,0.1296296,0.0103391,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0093633,9.3496503,14.5128901,0.0769231,0.0094690,0.1194030,0.0101702,0.0909091,0.7272727,834.9650350,1351.2890095
,6,0.1002244,0.0089267,1.8141113,8.1635007,0.0149254,0.0091000,0.0671642,0.0096351,0.0909091,0.8181818,81.4111262,716.3500678
,7,0.1503366,0.0087824,1.8141113,6.0470375,0.0149254,0.0088465,0.0497512,0.0093723,0.0909091,0.9090909,81.4111262,504.7037540
,8,0.2004488,0.0087105,0.0,4.5352782,0.0,0.0087446,0.0373134,0.0092154,0.0,0.9090909,-100.0,353.5278155
,9,0.2999252,0.0085938,0.0,3.0310587,0.0,0.0086515,0.0249377,0.0090284,0.0,0.9090909,-100.0,203.1058717




model 18
id random_plus_manual_model_14

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 19
id random_plus_manual_model_25

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 20
id random_plus_manual_model_19

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 21
id random_plus_manual_model_33

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 22
id random_plus_manual_model_13

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 23
id random_plus_manual_model_32

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 24
id random_plus_manual_model_8

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 25
id random_plus_manual_model_1

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 26
id random_plus_manual_model_4

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 27
id random_plus_manual_model_10

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 28
id random_plus_manual_model_36

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 29
id random_plus_manual_model_27

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 30
id random_plus_manual_model_3

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 31
id random_plus_manual_model_11

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 32
id random_plus_manual_model_18

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.008159858447420681
RMSE: 0.09033193481499598
LogLoss: 0.04769749146848899
Null degrees of freedom: 1336
Residual degrees of freedom: 1336
Null deviance: 127.54309218673963
Residual deviance: 127.54309218673937
AIC: 129.5430921867394
AUC: 0.5
pr_auc: 0.0
Gini: 0.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.00864381520119213: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,0.0,1326.0,1.0,(1326.0/1326.0)
1,0.0,11.0,0.0,(0.0/11.0)
Total,0.0,1337.0,0.9918,(1326.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0086438,0.0163205,0.0
max f2,0.0086438,0.0398262,0.0
max f0point5,0.0086438,0.0102631,0.0
max accuracy,0.0086438,0.0082274,0.0
max precision,0.0086438,0.0082274,0.0
max recall,0.0086438,1.0,0.0
max specificity,0.0086438,0.0,0.0
max absolute_mcc,0.0086438,0.0,0.0
max min_per_class_accuracy,0.0086438,0.0,0.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.86 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,1.0,0.0086438,1.0,1.0,0.0082274,0.0086438,0.0082274,0.0086438,1.0,1.0,0.0,0.0




model 33
id random_plus_manual_model_7

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007378727043940846
RMSE: 0.08589951713450342
LogLoss: 0.04687321940534431
Null degrees of freedom: 1336
Residual degrees of freedom: -694
Null deviance: 127.54309218673963
Residual deviance: 125.33898868988791
AIC: 4187.338988689888
AUC: 0.9098793363499246
pr_auc: 0.12259985184810611
Gini: 0.8197586726998491
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.0095701886031649: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1313.0,13.0,0.0098,(13.0/1326.0)
1,6.0,5.0,0.5455,(6.0/11.0)
Total,1319.0,18.0,0.0142,(19.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0095702,0.3448276,17.0
max f2,0.0058592,0.4054054,29.0
max f0point5,0.9847531,0.3333333,0.0
max accuracy,0.9847531,0.9925206,0.0
max precision,0.9847531,1.0,0.0
max recall,0.0000478,1.0,322.0
max specificity,0.9847531,1.0,0.0
max absolute_mcc,0.0095702,0.3485918,17.0
max min_per_class_accuracy,0.0002551,0.8181818,212.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.15 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0138952,34.7272727,34.7272727,0.2857143,0.1101376,0.2857143,0.1101376,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0059562,9.3496503,22.5084175,0.0769231,0.0086883,0.1851852,0.0612917,0.0909091,0.4545455,834.9650350,2150.8417508
,3,0.0306657,0.0036700,8.6818182,17.7871397,0.0714286,0.0049255,0.1463415,0.0420447,0.0909091,0.5454545,768.1818182,1678.7139690
,4,0.0403889,0.0023125,0.0,13.5050505,0.0,0.0029695,0.1111111,0.0326377,0.0,0.5454545,-100.0,1250.5050505
,5,0.0501122,0.0017933,9.3496503,12.6987788,0.0769231,0.0020392,0.1044776,0.0267007,0.0909091,0.6363636,834.9650350,1169.8778833
,6,0.1002244,0.0006381,1.8141113,7.2564450,0.0149254,0.0010723,0.0597015,0.0138865,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0003510,0.0,4.8376300,0.0,0.0004652,0.0398010,0.0094127,0.0,0.7272727,-100.0,383.7630032
,8,0.2004488,0.0002210,1.8141113,4.0817503,0.0149254,0.0002856,0.0335821,0.0071309,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0000813,0.9138756,3.0310587,0.0075188,0.0001434,0.0249377,0.0048133,0.0909091,0.9090909,-8.6124402,203.1058717




model 34
id random_plus_manual_model_20

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007396700773024162
RMSE: 0.08600407416526361
LogLoss: 0.04861544121658362
Null degrees of freedom: 1336
Residual degrees of freedom: -3698
Null deviance: 127.54309218673963
Residual deviance: 129.997689813145
AIC: 10199.997689813144
AUC: 0.9056286850404497
pr_auc: 0.1997962734661427
Gini: 0.8112573700808994
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.011571815100471814: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1323.0,3.0,0.0023,(3.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1330.0,7.0,0.0075,(10.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0115718,0.4444444,6.0
max f2,0.0032438,0.4375000,35.0
max f0point5,0.0115718,0.5128205,6.0
max accuracy,0.9856428,0.9925206,0.0
max precision,0.9856428,1.0,0.0
max recall,0.0000419,1.0,323.0
max specificity,0.9856428,1.0,0.0
max absolute_mcc,0.0115718,0.4523245,6.0
max min_per_class_accuracy,0.0002082,0.8122172,214.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.11 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0069305,34.7272727,34.7272727,0.2857143,0.0836176,0.2857143,0.0836176,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0040870,9.3496503,22.5084175,0.0769231,0.0052417,0.1851852,0.0458811,0.0909091,0.4545455,834.9650350,2150.8417508
,3,0.0306657,0.0025710,17.3636364,20.7516630,0.1428571,0.0033089,0.1707317,0.0313442,0.1818182,0.6363636,1636.3636364,1975.1662971
,4,0.0403889,0.0016504,0.0,15.7558923,0.0,0.0019758,0.1296296,0.0242741,0.0,0.6363636,-100.0,1475.5892256
,5,0.0501122,0.0012156,0.0,12.6987788,0.0,0.0014379,0.1044776,0.0198432,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0005191,0.0,6.3493894,0.0,0.0007959,0.0522388,0.0103195,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0003094,1.8141113,4.8376300,0.0149254,0.0004057,0.0398010,0.0070149,0.0909091,0.7272727,81.4111262,383.7630032
,8,0.2004488,0.0001894,1.8141113,4.0817503,0.0149254,0.0002459,0.0335821,0.0053227,0.0909091,0.8181818,81.4111262,308.1750339
,9,0.2999252,0.0000945,0.9138756,3.0310587,0.0075188,0.0001350,0.0249377,0.0036021,0.0909091,0.9090909,-8.6124402,203.1058717




model 35
id random_plus_manual_model_28

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007410910953407385
RMSE: 0.08608664793919778
LogLoss: 0.050607650657927584
Null degrees of freedom: 1336
Residual degrees of freedom: -2831
Null deviance: 127.54309218673963
Residual deviance: 135.32485785930106
AIC: 8471.324857859301
AUC: 0.9002125325654738
pr_auc: 0.12431237755877624
Gini: 0.8004250651309477
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.008372398772181634: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1316.0,10.0,0.0075,(10.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1323.0,14.0,0.0127,(17.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0083724,0.32,13.0
max f2,0.0035678,0.4054054,29.0
max f0point5,0.9824187,0.3333333,0.0
max accuracy,0.9824187,0.9925206,0.0
max precision,0.9824187,1.0,0.0
max recall,0.0000287,1.0,328.0
max specificity,0.9824187,1.0,0.0
max absolute_mcc,0.0035678,0.3216420,29.0
max min_per_class_accuracy,0.0001390,0.7941176,226.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.11 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0081442,34.7272727,34.7272727,0.2857143,0.0895037,0.2857143,0.0895037,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0037832,0.0,18.0067340,0.0,0.0057099,0.1481481,0.0491585,0.0,0.3636364,-100.0,1700.6734007
,3,0.0306657,0.0022939,17.3636364,17.7871397,0.1428571,0.0030645,0.1463415,0.0334191,0.1818182,0.5454545,1636.3636364,1678.7139690
,4,0.0403889,0.0015052,9.3496503,15.7558923,0.0769231,0.0018456,0.1296296,0.0258181,0.0909091,0.6363636,834.9650350,1475.5892256
,5,0.0501122,0.0011748,0.0,12.6987788,0.0,0.0012999,0.1044776,0.0210608,0.0,0.6363636,-100.0,1169.8778833
,6,0.1002244,0.0004629,1.8141113,7.2564450,0.0149254,0.0007103,0.0597015,0.0108856,0.0909091,0.7272727,81.4111262,625.6445047
,7,0.1503366,0.0002470,0.0,4.8376300,0.0,0.0003360,0.0398010,0.0073690,0.0,0.7272727,-100.0,383.7630032
,8,0.2004488,0.0001567,0.0,3.6282225,0.0,0.0001994,0.0298507,0.0055766,0.0,0.7272727,-100.0,262.8222524
,9,0.2999252,0.0000692,0.9138756,2.7279528,0.0075188,0.0001015,0.0224439,0.0037607,0.0909091,0.8181818,-8.6124402,172.7952845




model 36
id random_plus_manual_model_17

ModelMetricsBinomialGLM: glm
** Reported on test data. **

MSE: 0.007408993367611253
RMSE: 0.0860755096854573
LogLoss: 0.05340201778982284
Null degrees of freedom: 1336
Residual degrees of freedom: -4264
Null deviance: 127.54309218673963
Residual deviance: 142.79699556998273
AIC: 11344.796995569983
AUC: 0.8626422597010831
pr_auc: 0.10951837806270537
Gini: 0.7252845194021662
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.011061699678360131: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1317.0,9.0,0.0068,(9.0/1326.0)
1,7.0,4.0,0.6364,(7.0/11.0)
Total,1324.0,13.0,0.012,(16.0/1337.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0110617,0.3333333,12.0
max f2,0.0110617,0.3508772,12.0
max f0point5,0.9974050,0.3333333,0.0
max accuracy,0.9974050,0.9925206,0.0
max precision,0.9974050,1.0,0.0
max recall,0.0000160,1.0,350.0
max specificity,0.9974050,1.0,0.0
max absolute_mcc,0.0110617,0.3285012,12.0
max min_per_class_accuracy,0.0001121,0.7963801,230.0


Gains/Lift Table: Avg response rate:  0.82 %, avg score:  0.12 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0104712,0.0103209,34.7272727,34.7272727,0.2857143,0.0906549,0.2857143,0.0906549,0.3636364,0.3636364,3372.7272727,3372.7272727
,2,0.0201945,0.0055983,0.0,18.0067340,0.0,0.0073361,0.1481481,0.0505384,0.0,0.3636364,-100.0,1700.6734007
,3,0.0306657,0.0030171,8.6818182,14.8226164,0.0714286,0.0037633,0.1219512,0.0345664,0.0909091,0.4545455,768.1818182,1382.2616408
,4,0.0403889,0.0013820,9.3496503,13.5050505,0.0769231,0.0020851,0.1111111,0.0267468,0.0909091,0.5454545,834.9650350,1250.5050505
,5,0.0501122,0.0010191,9.3496503,12.6987788,0.0769231,0.0011541,0.1044776,0.0217811,0.0909091,0.6363636,834.9650350,1169.8778833
,6,0.1002244,0.0004149,0.0,6.3493894,0.0,0.0006426,0.0522388,0.0112118,0.0,0.6363636,-100.0,534.9389417
,7,0.1503366,0.0001982,0.0,4.2329263,0.0,0.0002865,0.0348259,0.0075700,0.0,0.6363636,-100.0,323.2926278
,8,0.2004488,0.0001177,1.8141113,3.6282225,0.0149254,0.0001538,0.0298507,0.0057160,0.0909091,0.7272727,81.4111262,262.8222524
,9,0.2999252,0.0000551,0.9138756,2.7279528,0.0075188,0.0000805,0.0224439,0.0038469,0.0909091,0.8181818,-8.6124402,172.7952845






In [60]:
h2o.cluster().shutdown(prompt=False)

H2O session _sid_83a0 closed.


### The reason we choose H2OAutoML rather than GLM is first AUC of auto is slightly higher and after running multiple times, the performance of H2OAutoML is always stable.