In [26]:
import numpy as np
from sklearn.feature_extraction import FeatureHasher
import pandas as pd
import os

# Prepare Training data

In [2]:
# local_raw_data = '../data/ordered_ex1_train.csv'
local_raw_data = 'ordered_ex1_train.csv'

In [3]:
raw_data_df = pd.read_csv(local_raw_data)

In [4]:
# target (percentage of having adverse events) = number_of_sae_subjects / enrollment
raw_data_df['target'] = raw_data_df['number_of_sae_subjects'] / raw_data_df['enrollment']

In [5]:
# drop number_of_sae_subjects and enrollment
raw_data_df.drop(raw_data_df.columns[[7,8]], axis=1, inplace=True)

In [6]:
# convert dataframe to ndarray
raw_data = raw_data_df.as_matrix()

In [7]:
# map has_us_facilities value ('t','f') -> (1, 0)
raw_data[raw_data[:,4] == 't',4]= 1
raw_data[raw_data[:,4] == 'f',4]= 0

## Feature hashing

In [8]:
def merge_category(id_cat):
    output = []
    prev_id = None
    tmp = {}
    for nct_id, cat in id_cat:
        if prev_id is None or prev_id == nct_id:
            if prev_id is None:
                prev_id = nct_id
            tmp[cat] = 1
        else:
            output.append(tmp)
            # reset
            tmp = {}
            tmp[cat] = 1
            prev_id = nct_id
    output.append(tmp)
    print('output number of merged unique ids: {}'.format(len(output)))
    return output

### Convert conditions, interventions and countries to hash feature

In [9]:
unique_ids = np.unique(raw_data[:,0])
number_of_uniqueID = unique_ids.shape[0]
print('number of unique nct_ids: {}'.format(number_of_uniqueID))

number of unique nct_ids: 16677


In [10]:
conditions = raw_data[:,[0,1]]
interventions = raw_data[:,[0,2]]
countries = raw_data[:,[0,5]]

> TODO: draw frequency distribution of the three features

In [11]:
interventions[:,1]

array(['ganciclovir', 'ganciclovir', 'ganciclovir', ..., 'oxycodone',
       'fentanyl', 'adenosine'], dtype=object)

In [12]:
# preprocess the high dimentional features before feed into feature hasher
merged_conditions = merge_category(conditions)
merged_interventions = merge_category(interventions)
merged_countries = merge_category(countries)

output number of merged unique ids: 16677
output number of merged unique ids: 16677
output number of merged unique ids: 16677


In [13]:
number_of_conditions = np.unique(conditions[:,1]).shape[0]
print('number of unique conditions: {}'.format(number_of_conditions))

number_of_interventions = np.unique(interventions[:,1]).shape[0]
print('number of unique interventions: {}'.format(number_of_interventions))

number_of_countries = np.unique(countries[:,1]).shape[0]
print('number of unique countries: {}'.format(number_of_countries))

number of unique conditions: 1909
number of unique interventions: 1846
number of unique countries: 142


In [14]:
# feature hasher
conditions_hasher = FeatureHasher(n_features=int(number_of_conditions * 0.2),
                                                             non_negative=True,input_type='dict')
interventions_hasher = FeatureHasher(n_features=int(number_of_interventions * 0.2),
                                                             non_negative=True,input_type='dict')
countries_hasher = FeatureHasher(n_features=int(number_of_countries),
                                                             non_negative=True,input_type='dict')



In [15]:
# apply feature hashing
conditions_feature = conditions_hasher.fit_transform(merged_conditions).toarray()
print('conditions_feature shape: {}'.format(conditions_feature.shape))

interventions_feature = interventions_hasher.fit_transform(merged_interventions).toarray()
print('interventions_feature shape: {}'.format(interventions_feature.shape))

countries_feature = countries_hasher.fit_transform(merged_countries).toarray()
print('countries_feature shape: {}'.format(countries_feature.shape))

conditions_feature shape: (16677, 381)
interventions_feature shape: (16677, 369)
countries_feature shape: (16677, 142)


### Appending the hashed feature to training data

In [17]:
raw_data[0]

array(['NCT00000143', 'hiv infections', 'ganciclovir', 19, 1,
       'United States', 1, 0.0], dtype=object)

In [18]:
# drop the old conditions, interventions and countries
prev_id = None
new_data = []
tmp = []
idx = 0
for data in raw_data:
    cur_id = data[0]
    if prev_id is None or cur_id != prev_id:
        tmp.append(cur_id)
        tmp.append(data[3])
        tmp.append(data[4])
        tmp.append(data[6])
        
        tmp += conditions_feature[idx].tolist()
        tmp += interventions_feature[idx].tolist()
        tmp += countries_feature[idx].tolist()
        
        tmp.append(data[7])
        
        new_data.append(tmp)
        
        # update
        prev_id = cur_id
        tmp = []
        idx += 1

# new data shape: nct_id, number_of_facilities, has_us_facility, 
# number_of_sponsors, conditions_features, interventions_features, contries_features, percentage_of_adverse_event (target)
new_data = np.array(new_data)
print('reconstructed training data shape: {}'.format(new_data.shape))

reconstructed training data shape: (16677, 897)


> note: remove the first nct_id column before send to model

In [19]:
# randomly shuffle the data before categorization
np.random.shuffle(new_data)

In [20]:
train_size = int(new_data.shape[0] * 0.7)
train_features  = new_data[:train_size, :-1]
train_features

array([['NCT01349959', '7', '1', ..., '0.0', '0.0', '0.0'],
       ['NCT00703937', '1', '1', ..., '0.0', '0.0', '0.0'],
       ['NCT01575899', '1', '0', ..., '0.0', '0.0', '0.0'],
       ...,
       ['NCT00805142', '22', '0', ..., '0.0', '0.0', '0.0'],
       ['NCT01733316', '13', '1', ..., '0.0', '1.0', '0.0'],
       ['NCT01027351', '1', '0', ..., '0.0', '0.0', '0.0']], dtype='<U22')

In [21]:
train_size = int(new_data.shape[0] * 0.7)
train_features  = new_data[:train_size, :-1]
train_labels = new_data[:train_size, -1]
print('train_features shape: {}'.format(train_features.shape))

validation_size = int(new_data.shape[0] * 0.2)
validation_features = new_data[train_size:train_size + validation_size, :-1]
validation_labels = new_data[train_size:train_size + validation_size, -1]
print('validation_features shape: {}'.format(validation_features.shape))

test_features = new_data[train_size + validation_size:, :-1]
test_labels = new_data[train_size + validation_size:, -1]
print('test_features shape: {}'.format(test_features.shape))

train_features shape: (11673, 896)
validation_features shape: (3335, 896)
test_features shape: (1669, 896)


In [33]:
feature_dim = train_features[:,1:].shape[1]
print('feature dimention: {}'.format(feature_dim))

feature dimention: 895


### Data conversion

convert the numpy array into recordIO-protobuf or CSV format that can be used by sagemaker linear_learner
model specs: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html

Since algorithms have particular input and output requirements, converting the dataset is also part of the process that a data scientist goes through prior to initiating training. In this particular case, the Amazon SageMaker implementation of Linear Learner takes recordIO-wrapped protobuf, where the data we have today is a pickle-ized numpy array on disk.

Most of the conversion effort is handled by the Amazon SageMaker Python SDK, imported as `sagemaker` below.

In [35]:
import boto3
import re
import sagemaker.amazon.common as smac
import io
from sagemaker import get_execution_role
import sagemaker

bucket = 'sagemaker-tiber-solution'
prefix = 'logistic-regression/yangz5'
s3_train_key = '{}/train/recordio-pb-data'.format(prefix)
s3_train_path = os.path.join('s3://', bucket, s3_train_key)

# Define IAM role
role = get_execution_role()

In [30]:
# traing data should be excluding the first nct_id column 
vectors = np.array([t.tolist() for t in train_features[:,1:]]).astype('float32')
labels = np.array([t.tolist() for t in train_labels]).astype('float32')
buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)
boto3.resource('s3').Bucket(bucket).Object(s3_train_key).upload_fileobj(buf)

Wrapping the model training setup in a convenience function that takes in the S3 location of the training data, the model hyperparameters that define our training job, and the S3 output path for model artifacts.  Inside the function, we'll hardcode the algorithm container, the number and type of EC2 instances to train on, and the input and output data formats.

In [32]:
def predictor_from_hyperparams(s3_train_data, hyperparams, output_path):
    """
    Create an Estimator from the given hyperparams, fit to training data, and return a deployed predictor
    """
    # specify algorithm containers and instantiate an Estimator with given hyperparams
    containers = {
        'us-west-2': '174872318107.dkr.ecr.us-west-2.amazonaws.com/linear-learner:latest',
        'us-east-1': '382416733822.dkr.ecr.us-east-1.amazonaws.com/linear-learner:latest',
        'us-east-2': '404615174143.dkr.ecr.us-east-2.amazonaws.com/linear-learner:latest',
        'eu-west-1': '438346466558.dkr.ecr.eu-west-1.amazonaws.com/linear-learner:latest'}
    linear = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
        role,
        train_instance_count=1,
        train_instance_type='ml.m4.xlarge',
        output_path=output_path,
        sagemaker_session=sagemaker.Session())
    linear.set_hyperparameters(**hyperparams)
    # train model
    linear.fit({'train': s3_train_data})
#     # deploy a predictor
#     linear_predictor = linear.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')
#     linear_predictor.content_type = 'text/csv'
#     linear_predictor.serializer = csv_serializer
#     linear_predictor.deserializer = json_deserializer
#     return linear_predictor

    # return the trained model, for now
    return linear

In [37]:
# Training a binary classifier with default settings: logistic regression
defaults_hyperparams = {
    'feature_dim': feature_dim,
    'predictor_type': 'regressor',
    'epochs': 10
}
defaults_output_path = 's3://{}/{}/defaults/output'.format(bucket, prefix)
defaults_predictor = predictor_from_hyperparams(s3_train_path, defaults_hyperparams, defaults_output_path)

INFO:sagemaker:Creating training-job with name: linear-learner-2018-04-11-16-39-20-286


..................................................................
[31mDocker entrypoint called with argument(s): train[0m
[31m[04/11/2018 16:44:44 INFO 139860162471744] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'10', u'init_bias': u'0.0', u'lr_scheduler_factor': u'0.99', u'num_calibration_samples': u'10000000', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'10', u'lr_scheduler_step': u'100', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_scheduler_minimum_lr': u'0.00001', u'target_recall': u'0.8', u'num_models': u'32', u'early_stopping_patience': u'3', u'momentum': u'0.0', u'unbias_label': u'auto', u'wd': u'0.0', u'optimizer': u'adam', u'early_stopping_tolerance': u'0.001', u'learning_rate': u'auto', u'_kvstore': u'auto', u'normalize_data': u'true', u'binary_classi

[31m#metrics {"Metrics": {"training_mse": {"count": 1, "max": 0.6891122963645242, "sum": 0.6891122963645242, "min": 0.6891122963645242}}, "EndTime": 1523465087.327766, "Dimensions": {"model": 0, "Host": "algo-1", "Operation": "training", "Algorithm": "Linear Learner", "epoch": 6}, "StartTime": 1523465087.327691}
[0m
[31m#metrics {"Metrics": {"training_mse": {"count": 1, "max": 0.6891054271351207, "sum": 0.6891054271351207, "min": 0.6891054271351207}}, "EndTime": 1523465087.327848, "Dimensions": {"model": 1, "Host": "algo-1", "Operation": "training", "Algorithm": "Linear Learner", "epoch": 6}, "StartTime": 1523465087.327834}
[0m
[31m#metrics {"Metrics": {"training_mse": {"count": 1, "max": 0.6906183471679688, "sum": 0.6906183471679688, "min": 0.6906183471679688}}, "EndTime": 1523465087.327899, "Dimensions": {"model": 2, "Host": "algo-1", "Operation": "training", "Algorithm": "Linear Learner", "epoch": 6}, "StartTime": 1523465087.327886}
[0m
[31m#metrics {"Metrics": {"training_mse

===== Job Complete =====
Billable seconds: 172


In [None]:
logistic_predictor = defaults_predictor.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

In [40]:
from sagemaker.predictor import csv_serializer, json_deserializer
logistic_predictor.content_type = 'text/csv'
logistic_predictor.serializer = csv_serializer
logistic_predictor.deserializer = json_deserializer

In [90]:
result = []

for idx, train_line in enumerate(test_features):
    prediction = logistic_predictor.predict(train_line[1:].reshape(1, -1))
    result.append((train_line[0], prediction['predictions'][0]['score'], float(test_labels[idx])))
#     print('idx: {}, nct_id: {}, prediction: {}, label: {}'.format(idx, train_line[0], prediction['predictions'][0]['score'], train_labels[idx]))

{'predictions': [{'score': 1.4546172618865967}]}
idx: 0, nct_id: NCT01349959, prediction: 1.4546172618865967, label: 0.425
{'predictions': [{'score': 0.5182403326034546}]}
idx: 1, nct_id: NCT00703937, prediction: 0.5182403326034546, label: 0.08192090395480225
{'predictions': [{'score': 0.7158364057540894}]}
idx: 2, nct_id: NCT01575899, prediction: 0.7158364057540894, label: 0.009615384615384616
{'predictions': [{'score': 0.03394302725791931}]}
idx: 3, nct_id: NCT00621517, prediction: 0.03394302725791931, label: 0.03333333333333333
{'predictions': [{'score': 0.6007420420646667}]}
idx: 4, nct_id: NCT01100853, prediction: 0.6007420420646667, label: 0.32
{'predictions': [{'score': 0.29297369718551636}]}
idx: 5, nct_id: NCT01444651, prediction: 0.29297369718551636, label: 0.0
{'predictions': [{'score': 0.23890921473503113}]}
idx: 6, nct_id: NCT02114931, prediction: 0.23890921473503113, label: 0.22269807280513917
{'predictions': [{'score': -0.05315151810646057}]}
idx: 7, nct_id: NCT00885079,

{'predictions': [{'score': 0.03668370842933655}]}
idx: 66, nct_id: NCT00985010, prediction: 0.03668370842933655, label: 0.0
{'predictions': [{'score': 1.4654052257537842}]}
idx: 67, nct_id: NCT01317615, prediction: 1.4654052257537842, label: 1.9387755102040816
{'predictions': [{'score': -0.047054439783096313}]}
idx: 68, nct_id: NCT00427700, prediction: -0.047054439783096313, label: 0.0
{'predictions': [{'score': -0.0583270788192749}]}
idx: 69, nct_id: NCT01017263, prediction: -0.0583270788192749, label: 0.0
{'predictions': [{'score': -0.10395672917366028}]}
idx: 70, nct_id: NCT00468481, prediction: -0.10395672917366028, label: 0.01818181818181818
{'predictions': [{'score': 0.47391563653945923}]}
idx: 71, nct_id: NCT00986154, prediction: 0.47391563653945923, label: 0.5062711046792089
{'predictions': [{'score': 0.2617266774177551}]}
idx: 72, nct_id: NCT00632970, prediction: 0.2617266774177551, label: 0.0
{'predictions': [{'score': 0.3707788586616516}]}
idx: 73, nct_id: NCT00141726, predi

{'predictions': [{'score': 0.5711166858673096}]}
idx: 128, nct_id: NCT01071070, prediction: 0.5711166858673096, label: 0.027777777777777776
{'predictions': [{'score': 0.6696575880050659}]}
idx: 129, nct_id: NCT01738646, prediction: 0.6696575880050659, label: 0.8541666666666666
{'predictions': [{'score': 1.191772699356079}]}
idx: 130, nct_id: NCT00581919, prediction: 1.191772699356079, label: 0.84375
{'predictions': [{'score': 0.879747748374939}]}
idx: 131, nct_id: NCT01042509, prediction: 0.879747748374939, label: 1.3333333333333333
{'predictions': [{'score': 0.8589233756065369}]}
idx: 132, nct_id: NCT00672243, prediction: 0.8589233756065369, label: 0.5
{'predictions': [{'score': -0.6865968108177185}]}
idx: 133, nct_id: NCT00676130, prediction: -0.6865968108177185, label: 0.013071895424836602
{'predictions': [{'score': 0.9139930605888367}]}
idx: 134, nct_id: NCT00108069, prediction: 0.9139930605888367, label: 0.9302325581395349
{'predictions': [{'score': 0.21574559807777405}]}
idx: 135

{'predictions': [{'score': 0.056925803422927856}]}
idx: 208, nct_id: NCT00106106, prediction: 0.056925803422927856, label: 0.03571428571428571
{'predictions': [{'score': 0.238401859998703}]}
idx: 209, nct_id: NCT01188564, prediction: 0.238401859998703, label: 0.02666666666666667
{'predictions': [{'score': 0.050325408577919006}]}
idx: 210, nct_id: NCT01411774, prediction: 0.050325408577919006, label: 0.0
{'predictions': [{'score': 0.522557258605957}]}
idx: 211, nct_id: NCT00788593, prediction: 0.522557258605957, label: 0.18292682926829268
{'predictions': [{'score': 0.30909085273742676}]}
idx: 212, nct_id: NCT00535782, prediction: 0.30909085273742676, label: 0.6439393939393939
{'predictions': [{'score': 0.06693802773952484}]}
idx: 213, nct_id: NCT00343915, prediction: 0.06693802773952484, label: 0.03745318352059925
{'predictions': [{'score': 0.37124693393707275}]}
idx: 214, nct_id: NCT00844714, prediction: 0.37124693393707275, label: 0.0
{'predictions': [{'score': 0.16901680827140808}]}


{'predictions': [{'score': 0.044431865215301514}]}
idx: 269, nct_id: NCT01337297, prediction: 0.044431865215301514, label: 0.0
{'predictions': [{'score': 0.7967870831489563}]}
idx: 270, nct_id: NCT01085331, prediction: 0.7967870831489563, label: 1.5625
{'predictions': [{'score': 0.10628631711006165}]}
idx: 271, nct_id: NCT00826943, prediction: 0.10628631711006165, label: 0.0
{'predictions': [{'score': 0.47470948100090027}]}
idx: 272, nct_id: NCT00661427, prediction: 0.47470948100090027, label: 1.540983606557377
{'predictions': [{'score': 0.1277557909488678}]}
idx: 273, nct_id: NCT00959192, prediction: 0.1277557909488678, label: 0.0
{'predictions': [{'score': 0.027279138565063477}]}
idx: 274, nct_id: NCT01155999, prediction: 0.027279138565063477, label: 0.0
{'predictions': [{'score': 0.5507966876029968}]}
idx: 275, nct_id: NCT01280981, prediction: 0.5507966876029968, label: 0.041666666666666664
{'predictions': [{'score': 0.6560589671134949}]}
idx: 276, nct_id: NCT01459653, prediction: 0

{'predictions': [{'score': 0.16477534174919128}]}
idx: 336, nct_id: NCT00590161, prediction: 0.16477534174919128, label: 0.0
{'predictions': [{'score': 0.05879366397857666}]}
idx: 337, nct_id: NCT01529515, prediction: 0.05879366397857666, label: 0.2200392927308448
{'predictions': [{'score': 0.3229176104068756}]}
idx: 338, nct_id: NCT00586261, prediction: 0.3229176104068756, label: 0.05555555555555555
{'predictions': [{'score': 0.4749770760536194}]}
idx: 339, nct_id: NCT00182637, prediction: 0.4749770760536194, label: 0.0
{'predictions': [{'score': 2.5997378826141357}]}
idx: 340, nct_id: NCT00505635, prediction: 2.5997378826141357, label: 3.8
{'predictions': [{'score': -0.41076213121414185}]}
idx: 341, nct_id: NCT02120300, prediction: -0.41076213121414185, label: 0.11475409836065574
{'predictions': [{'score': 2.2810282707214355}]}
idx: 342, nct_id: NCT00045305, prediction: 2.2810282707214355, label: 9.176470588235293
{'predictions': [{'score': 0.8829565048217773}]}
idx: 343, nct_id: NCT

{'predictions': [{'score': 0.7287136316299438}]}
idx: 416, nct_id: NCT00499252, prediction: 0.7287136316299438, label: 0.47058823529411764
{'predictions': [{'score': -0.09604662656784058}]}
idx: 417, nct_id: NCT00920439, prediction: -0.09604662656784058, label: 0.0
{'predictions': [{'score': 0.025048047304153442}]}
idx: 418, nct_id: NCT00669110, prediction: 0.025048047304153442, label: 0.05
{'predictions': [{'score': 1.1046960353851318}]}
idx: 419, nct_id: NCT01203930, prediction: 1.1046960353851318, label: 1.6476190476190475
{'predictions': [{'score': 0.18449681997299194}]}
idx: 420, nct_id: NCT01830790, prediction: 0.18449681997299194, label: 0.0
{'predictions': [{'score': 0.3869425654411316}]}
idx: 421, nct_id: NCT02547714, prediction: 0.3869425654411316, label: 0.0
{'predictions': [{'score': 0.6990094780921936}]}
idx: 422, nct_id: NCT00286156, prediction: 0.6990094780921936, label: 0.26666666666666666
{'predictions': [{'score': 0.3917088806629181}]}
idx: 423, nct_id: NCT00452530, p

{'predictions': [{'score': 0.08316203951835632}]}
idx: 495, nct_id: NCT01649297, prediction: 0.08316203951835632, label: 0.04679552390640895
{'predictions': [{'score': 1.1676609516143799}]}
idx: 496, nct_id: NCT00436956, prediction: 1.1676609516143799, label: 9.322033898305085
{'predictions': [{'score': 0.3672439455986023}]}
idx: 497, nct_id: NCT00664430, prediction: 0.3672439455986023, label: 0.0
{'predictions': [{'score': -0.0763099193572998}]}
idx: 498, nct_id: NCT00570089, prediction: -0.0763099193572998, label: 0.0
{'predictions': [{'score': 0.3042437434196472}]}
idx: 499, nct_id: NCT00559364, prediction: 0.3042437434196472, label: 0.14
{'predictions': [{'score': 0.2851126790046692}]}
idx: 500, nct_id: NCT00411411, prediction: 0.2851126790046692, label: 0.0
{'predictions': [{'score': 0.051415786147117615}]}
idx: 501, nct_id: NCT01520727, prediction: 0.051415786147117615, label: 0.03125
{'predictions': [{'score': 0.20622693002223969}]}
idx: 502, nct_id: NCT00710970, prediction: 0.2

{'predictions': [{'score': 0.3979906141757965}]}
idx: 567, nct_id: NCT00983801, prediction: 0.3979906141757965, label: 1.3448275862068966
{'predictions': [{'score': 1.312972068786621}]}
idx: 568, nct_id: NCT01081951, prediction: 1.312972068786621, label: 0.49382716049382713
{'predictions': [{'score': 0.06797200441360474}]}
idx: 569, nct_id: NCT00858468, prediction: 0.06797200441360474, label: 0.03553299492385787
{'predictions': [{'score': 0.6733284592628479}]}
idx: 570, nct_id: NCT01465802, prediction: 0.6733284592628479, label: 1.0889830508474576
{'predictions': [{'score': 0.3902820348739624}]}
idx: 571, nct_id: NCT00426231, prediction: 0.3902820348739624, label: 0.0
{'predictions': [{'score': 0.07961167395114899}]}
idx: 572, nct_id: NCT02653495, prediction: 0.07961167395114899, label: 0.0
{'predictions': [{'score': 0.447179913520813}]}
idx: 573, nct_id: NCT01171690, prediction: 0.447179913520813, label: 0.0
{'predictions': [{'score': 0.4683153033256531}]}
idx: 574, nct_id: NCT0144968

{'predictions': [{'score': 0.9172056913375854}]}
idx: 646, nct_id: NCT01024387, prediction: 0.9172056913375854, label: 0.8666666666666667
{'predictions': [{'score': 0.1508714109659195}]}
idx: 647, nct_id: NCT02246062, prediction: 0.1508714109659195, label: 0.0
{'predictions': [{'score': 0.5752447843551636}]}
idx: 648, nct_id: NCT00732992, prediction: 0.5752447843551636, label: 0.8333333333333334
{'predictions': [{'score': 0.364010214805603}]}
idx: 649, nct_id: NCT00506064, prediction: 0.364010214805603, label: 0.0
{'predictions': [{'score': 0.3501519560813904}]}
idx: 650, nct_id: NCT00759941, prediction: 0.3501519560813904, label: 0.0
{'predictions': [{'score': 0.6806118488311768}]}
idx: 651, nct_id: NCT00325403, prediction: 0.6806118488311768, label: 0.3037249283667622
{'predictions': [{'score': 1.203331470489502}]}
idx: 652, nct_id: NCT00101686, prediction: 1.203331470489502, label: 1.3692870201096892
{'predictions': [{'score': 0.2961477041244507}]}
idx: 653, nct_id: NCT02462720, pre

{'predictions': [{'score': 0.258107453584671}]}
idx: 722, nct_id: NCT01664624, prediction: 0.258107453584671, label: 0.05
{'predictions': [{'score': 0.6709847450256348}]}
idx: 723, nct_id: NCT00615927, prediction: 0.6709847450256348, label: 0.78125
{'predictions': [{'score': 0.07020381093025208}]}
idx: 724, nct_id: NCT00796822, prediction: 0.07020381093025208, label: 0.0
{'predictions': [{'score': -0.37771469354629517}]}
idx: 725, nct_id: NCT01915914, prediction: -0.37771469354629517, label: 0.056074766355140186
{'predictions': [{'score': 0.5051495432853699}]}
idx: 726, nct_id: NCT01401166, prediction: 0.5051495432853699, label: 0.17008196721311475
{'predictions': [{'score': 1.095563292503357}]}
idx: 727, nct_id: NCT01471353, prediction: 1.095563292503357, label: 0.8837209302325582
{'predictions': [{'score': 0.3258143663406372}]}
idx: 728, nct_id: NCT00449150, prediction: 0.3258143663406372, label: 0.17091454272863568
{'predictions': [{'score': 0.06484818458557129}]}
idx: 729, nct_id: 

{'predictions': [{'score': 0.2927403450012207}]}
idx: 793, nct_id: NCT00849017, prediction: 0.2927403450012207, label: 0.3592233009708738
{'predictions': [{'score': 0.34446293115615845}]}
idx: 794, nct_id: NCT02006732, prediction: 0.34446293115615845, label: 0.069221260815822
{'predictions': [{'score': 1.135772705078125}]}
idx: 795, nct_id: NCT01281501, prediction: 1.135772705078125, label: 0.0
{'predictions': [{'score': 1.1877495050430298}]}
idx: 796, nct_id: NCT00310076, prediction: 1.1877495050430298, label: 2.103448275862069
{'predictions': [{'score': 1.0053138732910156}]}
idx: 797, nct_id: NCT01025284, prediction: 1.0053138732910156, label: 1.046875
{'predictions': [{'score': 0.6095116138458252}]}
idx: 798, nct_id: NCT01767467, prediction: 0.6095116138458252, label: 0.8556338028169014
{'predictions': [{'score': 1.0351508855819702}]}
idx: 799, nct_id: NCT00999830, prediction: 1.0351508855819702, label: 0.14814814814814814
{'predictions': [{'score': 0.22959581017494202}]}
idx: 800, 

{'predictions': [{'score': -0.0016182661056518555}]}
idx: 864, nct_id: NCT00348374, prediction: -0.0016182661056518555, label: 0.06806282722513089
{'predictions': [{'score': 0.06970228254795074}]}
idx: 865, nct_id: NCT01217606, prediction: 0.06970228254795074, label: 0.021621621621621623
{'predictions': [{'score': 0.10658887028694153}]}
idx: 866, nct_id: NCT01617005, prediction: 0.10658887028694153, label: 0.0
{'predictions': [{'score': 0.21468251943588257}]}
idx: 867, nct_id: NCT00209027, prediction: 0.21468251943588257, label: 0.0
{'predictions': [{'score': 0.14939625561237335}]}
idx: 868, nct_id: NCT01429051, prediction: 0.14939625561237335, label: 0.5434782608695652
{'predictions': [{'score': 0.22673138976097107}]}
idx: 869, nct_id: NCT01849770, prediction: 0.22673138976097107, label: 0.09333333333333334
{'predictions': [{'score': 0.17724883556365967}]}
idx: 870, nct_id: NCT00263666, prediction: 0.17724883556365967, label: 0.85
{'predictions': [{'score': 0.38078317046165466}]}
idx:

{'predictions': [{'score': 0.051940083503723145}]}
idx: 938, nct_id: NCT01651806, prediction: 0.051940083503723145, label: 0.06153846153846154
{'predictions': [{'score': 0.21003113687038422}]}
idx: 939, nct_id: NCT01047839, prediction: 0.21003113687038422, label: 0.06
{'predictions': [{'score': 0.2879931926727295}]}
idx: 940, nct_id: NCT00915525, prediction: 0.2879931926727295, label: 0.7322074788902292
{'predictions': [{'score': 0.3102015256881714}]}
idx: 941, nct_id: NCT01241539, prediction: 0.3102015256881714, label: 0.0
{'predictions': [{'score': 0.4443473815917969}]}
idx: 942, nct_id: NCT00659295, prediction: 0.4443473815917969, label: 0.008852843462966583
{'predictions': [{'score': 0.2970251441001892}]}
idx: 943, nct_id: NCT00442962, prediction: 0.2970251441001892, label: 0.037037037037037035
{'predictions': [{'score': 1.131123661994934}]}
idx: 944, nct_id: NCT00365274, prediction: 1.131123661994934, label: 0.0
{'predictions': [{'score': 0.3274155855178833}]}
idx: 945, nct_id: NC

{'predictions': [{'score': 0.34894365072250366}]}
idx: 1008, nct_id: NCT00465088, prediction: 0.34894365072250366, label: 0.03015075376884422
{'predictions': [{'score': 0.2890653610229492}]}
idx: 1009, nct_id: NCT00586820, prediction: 0.2890653610229492, label: 0.0
{'predictions': [{'score': 0.7510496973991394}]}
idx: 1010, nct_id: NCT00362414, prediction: 0.7510496973991394, label: 0.9333333333333333
{'predictions': [{'score': 0.03760905563831329}]}
idx: 1011, nct_id: NCT01729559, prediction: 0.03760905563831329, label: 0.01616161616161616
{'predictions': [{'score': 0.3258793354034424}]}
idx: 1012, nct_id: NCT01120704, prediction: 0.3258793354034424, label: 0.058823529411764705
{'predictions': [{'score': 0.18378326296806335}]}
idx: 1013, nct_id: NCT00423878, prediction: 0.18378326296806335, label: 0.25116279069767444
{'predictions': [{'score': 0.35115477442741394}]}
idx: 1014, nct_id: NCT01364727, prediction: 0.35115477442741394, label: 1.0
{'predictions': [{'score': 0.043468222022056

{'predictions': [{'score': 0.3024524450302124}]}
idx: 1069, nct_id: NCT01541358, prediction: 0.3024524450302124, label: 0.0
{'predictions': [{'score': -0.04811197519302368}]}
idx: 1070, nct_id: NCT00705523, prediction: -0.04811197519302368, label: 0.0
{'predictions': [{'score': 0.2642759680747986}]}
idx: 1071, nct_id: NCT00976495, prediction: 0.2642759680747986, label: 0.03896103896103896
{'predictions': [{'score': -0.21579423546791077}]}
idx: 1072, nct_id: NCT01032018, prediction: -0.21579423546791077, label: 0.20666666666666667
{'predictions': [{'score': 0.8317981958389282}]}
idx: 1073, nct_id: NCT00061373, prediction: 0.8317981958389282, label: 0.9444444444444444
{'predictions': [{'score': 0.3097149431705475}]}
idx: 1074, nct_id: NCT00594425, prediction: 0.3097149431705475, label: 0.03333333333333333
{'predictions': [{'score': 0.354033887386322}]}
idx: 1075, nct_id: NCT01147068, prediction: 0.354033887386322, label: 0.058673469387755105
{'predictions': [{'score': 0.11091171205043793

{'predictions': [{'score': -0.06876438856124878}]}
idx: 1148, nct_id: NCT00475501, prediction: -0.06876438856124878, label: 0.2
{'predictions': [{'score': 0.8696052432060242}]}
idx: 1149, nct_id: NCT01039207, prediction: 0.8696052432060242, label: 1.4838709677419355
{'predictions': [{'score': -0.14585816860198975}]}
idx: 1150, nct_id: NCT01489670, prediction: -0.14585816860198975, label: 0.0
{'predictions': [{'score': 0.5988616943359375}]}
idx: 1151, nct_id: NCT01517295, prediction: 0.5988616943359375, label: 0.0
{'predictions': [{'score': 0.6112058162689209}]}
idx: 1152, nct_id: NCT00834899, prediction: 0.6112058162689209, label: 1.6153846153846154
{'predictions': [{'score': 0.5514388084411621}]}
idx: 1153, nct_id: NCT00075400, prediction: 0.5514388084411621, label: 0.5384615384615384
{'predictions': [{'score': 0.6746212244033813}]}
idx: 1154, nct_id: NCT00391872, prediction: 0.6746212244033813, label: 0.5657216494845361
{'predictions': [{'score': 0.27796968817710876}]}
idx: 1155, nct

{'predictions': [{'score': -0.08385133743286133}]}
idx: 1221, nct_id: NCT00602355, prediction: -0.08385133743286133, label: 0.2962962962962963
{'predictions': [{'score': 0.23278501629829407}]}
idx: 1222, nct_id: NCT00696488, prediction: 0.23278501629829407, label: 0.0
{'predictions': [{'score': 1.077821969985962}]}
idx: 1223, nct_id: NCT00377637, prediction: 1.077821969985962, label: 1.1297297297297297
{'predictions': [{'score': 0.16567137837409973}]}
idx: 1224, nct_id: NCT01167257, prediction: 0.16567137837409973, label: 0.0967741935483871
{'predictions': [{'score': 1.3670995235443115}]}
idx: 1225, nct_id: NCT00482911, prediction: 1.3670995235443115, label: 0.5
{'predictions': [{'score': 1.4907422065734863}]}
idx: 1226, nct_id: NCT01049945, prediction: 1.4907422065734863, label: 1.2714285714285714
{'predictions': [{'score': 0.9243993759155273}]}
idx: 1227, nct_id: NCT00990652, prediction: 0.9243993759155273, label: 0.7
{'predictions': [{'score': 0.4737935960292816}]}
idx: 1228, nct_id

{'predictions': [{'score': 0.47216904163360596}]}
idx: 1296, nct_id: NCT00611806, prediction: 0.47216904163360596, label: 0.11428571428571428
{'predictions': [{'score': 1.0758240222930908}]}
idx: 1297, nct_id: NCT01016015, prediction: 1.0758240222930908, label: 1.5337078651685394
{'predictions': [{'score': 0.12174242734909058}]}
idx: 1298, nct_id: NCT00436826, prediction: 0.12174242734909058, label: 0.21962616822429906
{'predictions': [{'score': -0.03238862752914429}]}
idx: 1299, nct_id: NCT01552057, prediction: -0.03238862752914429, label: 0.01272264631043257
{'predictions': [{'score': 0.019596487283706665}]}
idx: 1300, nct_id: NCT00980980, prediction: 0.019596487283706665, label: 0.0
{'predictions': [{'score': 0.40242788195610046}]}
idx: 1301, nct_id: NCT02197247, prediction: 0.40242788195610046, label: 1.2682926829268293
{'predictions': [{'score': 0.13325627148151398}]}
idx: 1302, nct_id: NCT01683266, prediction: 0.13325627148151398, label: 0.24408014571949
{'predictions': [{'score'

{'predictions': [{'score': 0.7367956638336182}]}
idx: 1374, nct_id: NCT00423657, prediction: 0.7367956638336182, label: 0.09705882352941177
{'predictions': [{'score': 0.4430394470691681}]}
idx: 1375, nct_id: NCT01618266, prediction: 0.4430394470691681, label: 0.5226666666666666
{'predictions': [{'score': 0.08418908715248108}]}
idx: 1376, nct_id: NCT01612858, prediction: 0.08418908715248108, label: 0.7
{'predictions': [{'score': 0.5149733424186707}]}
idx: 1377, nct_id: NCT00538850, prediction: 0.5149733424186707, label: 0.24615384615384617
{'predictions': [{'score': 0.5159800052642822}]}
idx: 1378, nct_id: NCT00778830, prediction: 0.5159800052642822, label: 1.0173010380622838
{'predictions': [{'score': 0.9089498519897461}]}
idx: 1379, nct_id: NCT00112723, prediction: 0.9089498519897461, label: 0.0
{'predictions': [{'score': 0.32447564601898193}]}
idx: 1380, nct_id: NCT00785798, prediction: 0.32447564601898193, label: 0.0
{'predictions': [{'score': 0.10582707822322845}]}
idx: 1381, nct_i

{'predictions': [{'score': -0.04162335395812988}]}
idx: 1450, nct_id: NCT00086684, prediction: -0.04162335395812988, label: 0.05420054200542006
{'predictions': [{'score': 0.46975794434547424}]}
idx: 1451, nct_id: NCT00360126, prediction: 0.46975794434547424, label: 0.0
{'predictions': [{'score': 0.11689753830432892}]}
idx: 1452, nct_id: NCT00141271, prediction: 0.11689753830432892, label: 0.07276119402985075
{'predictions': [{'score': 0.3637228012084961}]}
idx: 1453, nct_id: NCT01113710, prediction: 0.3637228012084961, label: 0.034934497816593885
{'predictions': [{'score': 0.09678108990192413}]}
idx: 1454, nct_id: NCT01181895, prediction: 0.09678108990192413, label: 0.011494252873563218
{'predictions': [{'score': 0.6761760711669922}]}
idx: 1455, nct_id: NCT00412464, prediction: 0.6761760711669922, label: 0.0
{'predictions': [{'score': 0.3926265239715576}]}
idx: 1456, nct_id: NCT00279708, prediction: 0.3926265239715576, label: 1.018181818181818
{'predictions': [{'score': 0.1311095356941

{'predictions': [{'score': -0.08132842183113098}]}
idx: 1517, nct_id: NCT01374269, prediction: -0.08132842183113098, label: 0.0
{'predictions': [{'score': 0.6886093616485596}]}
idx: 1518, nct_id: NCT00979654, prediction: 0.6886093616485596, label: 0.6440677966101694
{'predictions': [{'score': 0.7128810882568359}]}
idx: 1519, nct_id: NCT01545700, prediction: 0.7128810882568359, label: 0.0
{'predictions': [{'score': -0.0452694296836853}]}
idx: 1520, nct_id: NCT00131664, prediction: -0.0452694296836853, label: 0.0
{'predictions': [{'score': 0.1773330569267273}]}
idx: 1521, nct_id: NCT00385801, prediction: 0.1773330569267273, label: 0.0
{'predictions': [{'score': 0.15377284586429596}]}
idx: 1522, nct_id: NCT01254721, prediction: 0.15377284586429596, label: 0.061068702290076333
{'predictions': [{'score': 0.43551790714263916}]}
idx: 1523, nct_id: NCT00695019, prediction: 0.43551790714263916, label: 0.16568047337278108
{'predictions': [{'score': 0.09013909101486206}]}
idx: 1524, nct_id: NCT00

{'predictions': [{'score': 0.16846120357513428}]}
idx: 1597, nct_id: NCT01316055, prediction: 0.16846120357513428, label: 0.0
{'predictions': [{'score': 0.7782439589500427}]}
idx: 1598, nct_id: NCT00080223, prediction: 0.7782439589500427, label: 1.891566265060241
{'predictions': [{'score': -0.19395706057548523}]}
idx: 1599, nct_id: NCT00741013, prediction: -0.19395706057548523, label: 0.09090909090909091
{'predictions': [{'score': 0.29734620451927185}]}
idx: 1600, nct_id: NCT01675050, prediction: 0.29734620451927185, label: 0.0
{'predictions': [{'score': -0.013708233833312988}]}
idx: 1601, nct_id: NCT00751933, prediction: -0.013708233833312988, label: 0.0
{'predictions': [{'score': 0.2961443364620209}]}
idx: 1602, nct_id: NCT01097785, prediction: 0.2961443364620209, label: 0.0
{'predictions': [{'score': 0.11042539775371552}]}
idx: 1603, nct_id: NCT02225860, prediction: 0.11042539775371552, label: 0.0
{'predictions': [{'score': 0.21867096424102783}]}
idx: 1604, nct_id: NCT00183274, pred

{'predictions': [{'score': 0.7371551990509033}]}
idx: 1674, nct_id: NCT00691210, prediction: 0.7371551990509033, label: 2.763157894736842
{'predictions': [{'score': 0.22088098526000977}]}
idx: 1675, nct_id: NCT00320671, prediction: 0.22088098526000977, label: 0.1919191919191919
{'predictions': [{'score': -0.02733558416366577}]}
idx: 1676, nct_id: NCT01453348, prediction: -0.02733558416366577, label: 0.015873015873015872
{'predictions': [{'score': 0.31015878915786743}]}
idx: 1677, nct_id: NCT00733304, prediction: 0.31015878915786743, label: 0.14285714285714285
{'predictions': [{'score': 1.807772159576416}]}
idx: 1678, nct_id: NCT01798004, prediction: 1.807772159576416, label: 0.22
{'predictions': [{'score': 1.3885934352874756}]}
idx: 1679, nct_id: NCT00379574, prediction: 1.3885934352874756, label: 2.2448979591836733
{'predictions': [{'score': -0.13665896654129028}]}
idx: 1680, nct_id: NCT01091974, prediction: -0.13665896654129028, label: 0.0
{'predictions': [{'score': 0.442531824111938

{'predictions': [{'score': 0.06445476412773132}]}
idx: 1751, nct_id: NCT00332709, prediction: 0.06445476412773132, label: 0.43373493975903615
{'predictions': [{'score': -0.07503920793533325}]}
idx: 1752, nct_id: NCT00627016, prediction: -0.07503920793533325, label: 0.006557377049180328
{'predictions': [{'score': 0.1825583279132843}]}
idx: 1753, nct_id: NCT00898560, prediction: 0.1825583279132843, label: 0.0
{'predictions': [{'score': 0.011304885149002075}]}
idx: 1754, nct_id: NCT02322788, prediction: 0.011304885149002075, label: 0.0
{'predictions': [{'score': -0.24671131372451782}]}
idx: 1755, nct_id: NCT00616018, prediction: -0.24671131372451782, label: 0.0
{'predictions': [{'score': 1.577042579650879}]}
idx: 1756, nct_id: NCT00254163, prediction: 1.577042579650879, label: 0.6630434782608695
{'predictions': [{'score': -0.05762827396392822}]}
idx: 1757, nct_id: NCT00459537, prediction: -0.05762827396392822, label: 0.06025267249757046
{'predictions': [{'score': 1.4693667888641357}]}
idx

{'predictions': [{'score': 0.15635935962200165}]}
idx: 1828, nct_id: NCT01621178, prediction: 0.15635935962200165, label: 0.36568457538994803
{'predictions': [{'score': 0.9495487809181213}]}
idx: 1829, nct_id: NCT00193180, prediction: 0.9495487809181213, label: 1.4594594594594594
{'predictions': [{'score': 0.3070336580276489}]}
idx: 1830, nct_id: NCT01159054, prediction: 0.3070336580276489, label: 0.09090909090909091
{'predictions': [{'score': 1.0248491764068604}]}
idx: 1831, nct_id: NCT01059630, prediction: 1.0248491764068604, label: 0.9494949494949495
{'predictions': [{'score': 0.5057131052017212}]}
idx: 1832, nct_id: NCT00023764, prediction: 0.5057131052017212, label: 0.6407766990291263
{'predictions': [{'score': 0.06358516216278076}]}
idx: 1833, nct_id: NCT00836953, prediction: 0.06358516216278076, label: 0.0
{'predictions': [{'score': 0.7719640135765076}]}
idx: 1834, nct_id: NCT00639002, prediction: 0.7719640135765076, label: 2.5384615384615383
{'predictions': [{'score': 0.6869941

{'predictions': [{'score': -0.03008812665939331}]}
idx: 1905, nct_id: NCT00957996, prediction: -0.03008812665939331, label: 0.5470085470085471
{'predictions': [{'score': 1.4683263301849365}]}
idx: 1906, nct_id: NCT00856830, prediction: 1.4683263301849365, label: 2.3666666666666667
{'predictions': [{'score': 0.06959143280982971}]}
idx: 1907, nct_id: NCT01873989, prediction: 0.06959143280982971, label: 0.0
{'predictions': [{'score': 0.24441643059253693}]}
idx: 1908, nct_id: NCT00882908, prediction: 0.24441643059253693, label: 0.17098445595854922
{'predictions': [{'score': -0.012610971927642822}]}
idx: 1909, nct_id: NCT00485693, prediction: -0.012610971927642822, label: 0.13043478260869565
{'predictions': [{'score': 0.08576838672161102}]}
idx: 1910, nct_id: NCT01432561, prediction: 0.08576838672161102, label: 0.0
{'predictions': [{'score': 1.0664646625518799}]}
idx: 1911, nct_id: NCT00281840, prediction: 1.0664646625518799, label: 1.1666666666666667
{'predictions': [{'score': 0.2274393886

{'predictions': [{'score': 0.6082243919372559}]}
idx: 1987, nct_id: NCT00529568, prediction: 0.6082243919372559, label: 0.6258234519104084
{'predictions': [{'score': 0.813810408115387}]}
idx: 1988, nct_id: NCT00586105, prediction: 0.813810408115387, label: 0.9743589743589743
{'predictions': [{'score': 0.2911914885044098}]}
idx: 1989, nct_id: NCT01470118, prediction: 0.2911914885044098, label: 0.0
{'predictions': [{'score': 0.21352742612361908}]}
idx: 1990, nct_id: NCT01149434, prediction: 0.21352742612361908, label: 0.5789473684210527
{'predictions': [{'score': 0.08118988573551178}]}
idx: 1991, nct_id: NCT01078363, prediction: 0.08118988573551178, label: 0.08333333333333333
{'predictions': [{'score': 0.06547890603542328}]}
idx: 1992, nct_id: NCT00775684, prediction: 0.06547890603542328, label: 0.0
{'predictions': [{'score': 0.7545897960662842}]}
idx: 1993, nct_id: NCT01372618, prediction: 0.7545897960662842, label: 0.0
{'predictions': [{'score': -0.20019716024398804}]}
idx: 1994, nct_i

{'predictions': [{'score': 0.16801303625106812}]}
idx: 2062, nct_id: NCT01976312, prediction: 0.16801303625106812, label: 0.14285714285714285
{'predictions': [{'score': 0.12128996849060059}]}
idx: 2063, nct_id: NCT00857792, prediction: 0.12128996849060059, label: 0.0
{'predictions': [{'score': 0.014480412006378174}]}
idx: 2064, nct_id: NCT00575380, prediction: 0.014480412006378174, label: 0.0
{'predictions': [{'score': 0.17881059646606445}]}
idx: 2065, nct_id: NCT00405275, prediction: 0.17881059646606445, label: 0.6062322946175638
{'predictions': [{'score': -0.3102126121520996}]}
idx: 2066, nct_id: NCT00628355, prediction: -0.3102126121520996, label: 0.0
{'predictions': [{'score': 0.43228137493133545}]}
idx: 2067, nct_id: NCT01519661, prediction: 0.43228137493133545, label: 0.7197452229299363
{'predictions': [{'score': 0.7414284348487854}]}
idx: 2068, nct_id: NCT02074982, prediction: 0.7414284348487854, label: 0.2603550295857988
{'predictions': [{'score': 0.49185311794281006}]}
idx: 20

{'predictions': [{'score': 0.229192852973938}]}
idx: 2138, nct_id: NCT01652690, prediction: 0.229192852973938, label: 0.0033333333333333335
{'predictions': [{'score': 0.29984474182128906}]}
idx: 2139, nct_id: NCT00339833, prediction: 0.29984474182128906, label: 0.0
{'predictions': [{'score': 0.11916138231754303}]}
idx: 2140, nct_id: NCT00385138, prediction: 0.11916138231754303, label: 0.04008202833706189
{'predictions': [{'score': 0.17030328512191772}]}
idx: 2141, nct_id: NCT00437489, prediction: 0.17030328512191772, label: 0.10204081632653061
{'predictions': [{'score': 0.9540189504623413}]}
idx: 2142, nct_id: NCT00516165, prediction: 0.9540189504623413, label: 0.6785714285714286
{'predictions': [{'score': -0.080881267786026}]}
idx: 2143, nct_id: NCT00506285, prediction: -0.080881267786026, label: 0.0
{'predictions': [{'score': -0.011330246925354004}]}
idx: 2144, nct_id: NCT01228747, prediction: -0.011330246925354004, label: 0.027700831024930747
{'predictions': [{'score': 0.20918835699

{'predictions': [{'score': 0.5096172094345093}]}
idx: 2217, nct_id: NCT00182793, prediction: 0.5096172094345093, label: 1.5
{'predictions': [{'score': 0.7437575459480286}]}
idx: 2218, nct_id: NCT00390806, prediction: 0.7437575459480286, label: 0.8707627118644068
{'predictions': [{'score': 1.2632238864898682}]}
idx: 2219, nct_id: NCT01998919, prediction: 1.2632238864898682, label: 0.525974025974026
{'predictions': [{'score': 0.068916916847229}]}
idx: 2220, nct_id: NCT01947855, prediction: 0.068916916847229, label: 0.0
{'predictions': [{'score': 0.23343302309513092}]}
idx: 2221, nct_id: NCT00826618, prediction: 0.23343302309513092, label: 0.0
{'predictions': [{'score': 0.6707368493080139}]}
idx: 2222, nct_id: NCT00412243, prediction: 0.6707368493080139, label: 0.6470588235294118
{'predictions': [{'score': 0.09782835841178894}]}
idx: 2223, nct_id: NCT00270231, prediction: 0.09782835841178894, label: 0.0
{'predictions': [{'score': 0.557004451751709}]}
idx: 2224, nct_id: NCT00434148, predic

{'predictions': [{'score': -0.011091232299804688}]}
idx: 2296, nct_id: NCT00179517, prediction: -0.011091232299804688, label: 0.0
{'predictions': [{'score': 0.24841995537281036}]}
idx: 2297, nct_id: NCT00420212, prediction: 0.24841995537281036, label: 0.6701782820097245
{'predictions': [{'score': -0.05527925491333008}]}
idx: 2298, nct_id: NCT01063855, prediction: -0.05527925491333008, label: 0.030303030303030304
{'predictions': [{'score': 0.681551992893219}]}
idx: 2299, nct_id: NCT00505687, prediction: 0.681551992893219, label: 0.6397849462365591
{'predictions': [{'score': -0.00765499472618103}]}
idx: 2300, nct_id: NCT01122381, prediction: -0.00765499472618103, label: 0.0
{'predictions': [{'score': 0.2571282684803009}]}
idx: 2301, nct_id: NCT02007096, prediction: 0.2571282684803009, label: 0.0
{'predictions': [{'score': -0.13008809089660645}]}
idx: 2302, nct_id: NCT01916629, prediction: -0.13008809089660645, label: 0.0
{'predictions': [{'score': 0.4012206792831421}]}
idx: 2303, nct_id:

{'predictions': [{'score': -0.06733614206314087}]}
idx: 2366, nct_id: NCT00285584, prediction: -0.06733614206314087, label: 0.07317073170731707
{'predictions': [{'score': -0.01834005117416382}]}
idx: 2367, nct_id: NCT00616642, prediction: -0.01834005117416382, label: 0.0
{'predictions': [{'score': 0.03281569480895996}]}
idx: 2368, nct_id: NCT01659996, prediction: 0.03281569480895996, label: 0.035868005738880916
{'predictions': [{'score': 0.31479915976524353}]}
idx: 2369, nct_id: NCT01371786, prediction: 0.31479915976524353, label: 0.0
{'predictions': [{'score': 0.37576428055763245}]}
idx: 2370, nct_id: NCT00490802, prediction: 0.37576428055763245, label: 0.0
{'predictions': [{'score': -0.5119583606719971}]}
idx: 2371, nct_id: NCT00789880, prediction: -0.5119583606719971, label: 0.0
{'predictions': [{'score': 0.3663763403892517}]}
idx: 2372, nct_id: NCT00128492, prediction: 0.3663763403892517, label: 2.427007299270073
{'predictions': [{'score': 0.15714767575263977}]}
idx: 2373, nct_id: 

{'predictions': [{'score': 1.281890630722046}]}
idx: 2427, nct_id: NCT00410904, prediction: 1.281890630722046, label: 1.2
{'predictions': [{'score': 0.47487765550613403}]}
idx: 2428, nct_id: NCT00187135, prediction: 0.47487765550613403, label: 0.0
{'predictions': [{'score': 0.2992916703224182}]}
idx: 2429, nct_id: NCT00727220, prediction: 0.2992916703224182, label: 0.0
{'predictions': [{'score': 0.33597609400749207}]}
idx: 2430, nct_id: NCT00003895, prediction: 0.33597609400749207, label: 0.05555555555555555
{'predictions': [{'score': 0.37797999382019043}]}
idx: 2431, nct_id: NCT01467661, prediction: 0.37797999382019043, label: 1.4878048780487805
{'predictions': [{'score': 0.1297808736562729}]}
idx: 2432, nct_id: NCT01498822, prediction: 0.1297808736562729, label: 0.18413597733711048
{'predictions': [{'score': 0.12652595341205597}]}
idx: 2433, nct_id: NCT00266864, prediction: 0.12652595341205597, label: 0.06451612903225806
{'predictions': [{'score': 0.011871516704559326}]}
idx: 2434, n

{'predictions': [{'score': 0.5045048594474792}]}
idx: 2499, nct_id: NCT00361257, prediction: 0.5045048594474792, label: 0.18691588785046728
{'predictions': [{'score': 0.39079731702804565}]}
idx: 2500, nct_id: NCT01228071, prediction: 0.39079731702804565, label: 0.0
{'predictions': [{'score': 1.758340835571289}]}
idx: 2501, nct_id: NCT00890552, prediction: 1.758340835571289, label: 2.2
{'predictions': [{'score': -0.1759030520915985}]}
idx: 2502, nct_id: NCT02139228, prediction: -0.1759030520915985, label: 0.0
{'predictions': [{'score': 0.5035610795021057}]}
idx: 2503, nct_id: NCT00348309, prediction: 0.5035610795021057, label: 0.25467914438502676
{'predictions': [{'score': 1.2896676063537598}]}
idx: 2504, nct_id: NCT01933932, prediction: 1.2896676063537598, label: 0.9705882352941176
{'predictions': [{'score': 0.6032673120498657}]}
idx: 2505, nct_id: NCT00856180, prediction: 0.6032673120498657, label: 1.5
{'predictions': [{'score': 0.3626681864261627}]}
idx: 2506, nct_id: NCT00995930, pr

{'predictions': [{'score': 0.7892321944236755}]}
idx: 2575, nct_id: NCT00561912, prediction: 0.7892321944236755, label: 0.0
{'predictions': [{'score': 0.09896683692932129}]}
idx: 2576, nct_id: NCT01137890, prediction: 0.09896683692932129, label: 0.0
{'predictions': [{'score': 0.2621575891971588}]}
idx: 2577, nct_id: NCT00958074, prediction: 0.2621575891971588, label: 0.18181818181818182
{'predictions': [{'score': 0.7085848450660706}]}
idx: 2578, nct_id: NCT00705250, prediction: 0.7085848450660706, label: 0.2777777777777778
{'predictions': [{'score': 0.035338595509529114}]}
idx: 2579, nct_id: NCT02097849, prediction: 0.035338595509529114, label: 0.0
{'predictions': [{'score': -0.080881267786026}]}
idx: 2580, nct_id: NCT01063153, prediction: -0.080881267786026, label: 0.0
{'predictions': [{'score': 0.9055885076522827}]}
idx: 2581, nct_id: NCT00476047, prediction: 0.9055885076522827, label: 0.0
{'predictions': [{'score': 1.0474568605422974}]}
idx: 2582, nct_id: NCT01153672, prediction: 1.

{'predictions': [{'score': 0.5693077445030212}]}
idx: 2646, nct_id: NCT00770562, prediction: 0.5693077445030212, label: 0.1553398058252427
{'predictions': [{'score': 0.11103314161300659}]}
idx: 2647, nct_id: NCT01475838, prediction: 0.11103314161300659, label: 0.3150684931506849
{'predictions': [{'score': 0.4739445149898529}]}
idx: 2648, nct_id: NCT00267085, prediction: 0.4739445149898529, label: 0.36363636363636365
{'predictions': [{'score': 0.34518879652023315}]}
idx: 2649, nct_id: NCT02019550, prediction: 0.34518879652023315, label: 0.020618556701030927
{'predictions': [{'score': 0.43241173028945923}]}
idx: 2650, nct_id: NCT00853242, prediction: 0.43241173028945923, label: 0.27793696275071633
{'predictions': [{'score': -0.2160847783088684}]}
idx: 2651, nct_id: NCT00537940, prediction: -0.2160847783088684, label: 0.15082644628099173
{'predictions': [{'score': 0.8213600516319275}]}
idx: 2652, nct_id: NCT00878709, prediction: 0.8213600516319275, label: 0.1630281690140845
{'predictions'

{'predictions': [{'score': 0.4625098705291748}]}
idx: 2719, nct_id: NCT00364793, prediction: 0.4625098705291748, label: 1.0
{'predictions': [{'score': 0.11257365345954895}]}
idx: 2720, nct_id: NCT00527826, prediction: 0.11257365345954895, label: 0.49065420560747663
{'predictions': [{'score': 2.3306355476379395}]}
idx: 2721, nct_id: NCT00313586, prediction: 2.3306355476379395, label: 5.3908629441624365
{'predictions': [{'score': 0.897990345954895}]}
idx: 2722, nct_id: NCT00589667, prediction: 0.897990345954895, label: 2.0
{'predictions': [{'score': -0.277965784072876}]}
idx: 2723, nct_id: NCT00659945, prediction: -0.277965784072876, label: 0.0
{'predictions': [{'score': -0.0867689847946167}]}
idx: 2724, nct_id: NCT01050660, prediction: -0.0867689847946167, label: 0.16176470588235295
{'predictions': [{'score': 0.2532286047935486}]}
idx: 2725, nct_id: NCT00278525, prediction: 0.2532286047935486, label: 0.0
{'predictions': [{'score': 1.4184551239013672}]}
idx: 2726, nct_id: NCT00863512, pr

{'predictions': [{'score': 0.01516154408454895}]}
idx: 2788, nct_id: NCT01772758, prediction: 0.01516154408454895, label: 0.0
{'predictions': [{'score': 0.020960569381713867}]}
idx: 2789, nct_id: NCT01257230, prediction: 0.020960569381713867, label: 0.05778894472361809
{'predictions': [{'score': -0.46900373697280884}]}
idx: 2790, nct_id: NCT00802360, prediction: -0.46900373697280884, label: 0.023121387283236993
{'predictions': [{'score': 0.21028026938438416}]}
idx: 2791, nct_id: NCT01875731, prediction: 0.21028026938438416, label: 0.0
{'predictions': [{'score': 0.01732511818408966}]}
idx: 2792, nct_id: NCT00544713, prediction: 0.01732511818408966, label: 0.0
{'predictions': [{'score': 0.2713344991207123}]}
idx: 2793, nct_id: NCT00404352, prediction: 0.2713344991207123, label: 0.11218568665377177
{'predictions': [{'score': 0.9340965151786804}]}
idx: 2794, nct_id: NCT00686855, prediction: 0.9340965151786804, label: 0.043478260869565216
{'predictions': [{'score': 0.4504493474960327}]}
idx

{'predictions': [{'score': 0.06483852863311768}]}
idx: 2860, nct_id: NCT00677820, prediction: 0.06483852863311768, label: 0.02666666666666667
{'predictions': [{'score': -0.23995709419250488}]}
idx: 2861, nct_id: NCT00475904, prediction: -0.23995709419250488, label: 0.0
{'predictions': [{'score': 0.19307953119277954}]}
idx: 2862, nct_id: NCT01120405, prediction: 0.19307953119277954, label: 0.13833333333333334
{'predictions': [{'score': 0.6579636931419373}]}
idx: 2863, nct_id: NCT00532129, prediction: 0.6579636931419373, label: 0.94
{'predictions': [{'score': -0.04067021608352661}]}
idx: 2864, nct_id: NCT01204671, prediction: -0.04067021608352661, label: 0.04764971023824855
{'predictions': [{'score': 0.6818802356719971}]}
idx: 2865, nct_id: NCT00050986, prediction: 0.6818802356719971, label: 1.018181818181818
{'predictions': [{'score': 0.6815996170043945}]}
idx: 2866, nct_id: NCT00349752, prediction: 0.6815996170043945, label: 0.2988505747126437
{'predictions': [{'score': -0.007443785667

{'predictions': [{'score': 0.3711455464363098}]}
idx: 2939, nct_id: NCT00710684, prediction: 0.3711455464363098, label: 0.250733137829912
{'predictions': [{'score': 0.16666430234909058}]}
idx: 2940, nct_id: NCT01578785, prediction: 0.16666430234909058, label: 0.0
{'predictions': [{'score': 0.07292443513870239}]}
idx: 2941, nct_id: NCT02121041, prediction: 0.07292443513870239, label: 0.0
{'predictions': [{'score': 0.5385250449180603}]}
idx: 2942, nct_id: NCT00055237, prediction: 0.5385250449180603, label: 1.1578947368421053
{'predictions': [{'score': 1.3888170719146729}]}
idx: 2943, nct_id: NCT00448357, prediction: 1.3888170719146729, label: 1.3148148148148149
{'predictions': [{'score': 0.08476626873016357}]}
idx: 2944, nct_id: NCT00257933, prediction: 0.08476626873016357, label: 0.0
{'predictions': [{'score': 0.8210828304290771}]}
idx: 2945, nct_id: NCT00296504, prediction: 0.8210828304290771, label: 0.5883134130146083
{'predictions': [{'score': 0.41033753752708435}]}
idx: 2946, nct_id

{'predictions': [{'score': 0.653271496295929}]}
idx: 3016, nct_id: NCT01323790, prediction: 0.653271496295929, label: 0.11142857142857143
{'predictions': [{'score': 0.7313491702079773}]}
idx: 3017, nct_id: NCT00254540, prediction: 0.7313491702079773, label: 2.0980392156862746
{'predictions': [{'score': 0.37659257650375366}]}
idx: 3018, nct_id: NCT02639247, prediction: 0.37659257650375366, label: 0.04804804804804805
{'predictions': [{'score': -0.009715616703033447}]}
idx: 3019, nct_id: NCT00770146, prediction: -0.009715616703033447, label: 0.1518987341772152
{'predictions': [{'score': 0.1523602455854416}]}
idx: 3020, nct_id: NCT01986946, prediction: 0.1523602455854416, label: 0.11764705882352941
{'predictions': [{'score': 0.09827253222465515}]}
idx: 3021, nct_id: NCT01093976, prediction: 0.09827253222465515, label: 0.0
{'predictions': [{'score': 0.12425008416175842}]}
idx: 3022, nct_id: NCT00248625, prediction: 0.12425008416175842, label: 0.10869565217391304
{'predictions': [{'score': 0

{'predictions': [{'score': -0.04155886173248291}]}
idx: 3094, nct_id: NCT01722266, prediction: -0.04155886173248291, label: 0.0
{'predictions': [{'score': -0.1823623776435852}]}
idx: 3095, nct_id: NCT02226549, prediction: -0.1823623776435852, label: 0.0
{'predictions': [{'score': 0.7701080441474915}]}
idx: 3096, nct_id: NCT00721630, prediction: 0.7701080441474915, label: 1.125
{'predictions': [{'score': 1.0261743068695068}]}
idx: 3097, nct_id: NCT01063907, prediction: 1.0261743068695068, label: 3.221052631578947
{'predictions': [{'score': 0.4455451965332031}]}
idx: 3098, nct_id: NCT01582945, prediction: 0.4455451965332031, label: 0.0
{'predictions': [{'score': 1.5268640518188477}]}
idx: 3099, nct_id: NCT01223027, prediction: 1.5268640518188477, label: 1.3404255319148937
{'predictions': [{'score': 0.10866817831993103}]}
idx: 3100, nct_id: NCT00842985, prediction: 0.10866817831993103, label: 0.0
{'predictions': [{'score': 0.2649381160736084}]}
idx: 3101, nct_id: NCT01043146, prediction: 

{'predictions': [{'score': 0.6697953939437866}]}
idx: 3155, nct_id: NCT00746551, prediction: 0.6697953939437866, label: 0.0
{'predictions': [{'score': 1.3606560230255127}]}
idx: 3156, nct_id: NCT00528398, prediction: 1.3606560230255127, label: 1.045045045045045
{'predictions': [{'score': 0.9704583287239075}]}
idx: 3157, nct_id: NCT01628913, prediction: 0.9704583287239075, label: 1.0
{'predictions': [{'score': -0.028919994831085205}]}
idx: 3158, nct_id: NCT00867529, prediction: -0.028919994831085205, label: 0.1746031746031746
{'predictions': [{'score': 0.20332828164100647}]}
idx: 3159, nct_id: NCT01546623, prediction: 0.20332828164100647, label: 0.24375
{'predictions': [{'score': 0.35424837470054626}]}
idx: 3160, nct_id: NCT00578136, prediction: 0.35424837470054626, label: 0.0
{'predictions': [{'score': 0.6491160988807678}]}
idx: 3161, nct_id: NCT00203502, prediction: 0.6491160988807678, label: 1.95
{'predictions': [{'score': 0.02754509449005127}]}
idx: 3162, nct_id: NCT01974700, predic

{'predictions': [{'score': -0.015947431325912476}]}
idx: 3216, nct_id: NCT00965081, prediction: -0.015947431325912476, label: 0.006493506493506494
{'predictions': [{'score': 0.0938134491443634}]}
idx: 3217, nct_id: NCT00710385, prediction: 0.0938134491443634, label: 0.0
{'predictions': [{'score': -0.022026628255844116}]}
idx: 3218, nct_id: NCT00489086, prediction: -0.022026628255844116, label: 0.1111111111111111
{'predictions': [{'score': 0.33556556701660156}]}
idx: 3219, nct_id: NCT00313170, prediction: 0.33556556701660156, label: 0.3006993006993007
{'predictions': [{'score': 0.7030251026153564}]}
idx: 3220, nct_id: NCT01369732, prediction: 0.7030251026153564, label: 0.0
{'predictions': [{'score': 0.6315816640853882}]}
idx: 3221, nct_id: NCT02203032, prediction: 0.6315816640853882, label: 0.10894495412844037
{'predictions': [{'score': 1.874809980392456}]}
idx: 3222, nct_id: NCT00689936, prediction: 1.874809980392456, label: 2.097350585335798
{'predictions': [{'score': 0.41133365035057

{'predictions': [{'score': 0.08732502162456512}]}
idx: 3292, nct_id: NCT01047358, prediction: 0.08732502162456512, label: 0.024271844660194174
{'predictions': [{'score': -0.03458404541015625}]}
idx: 3293, nct_id: NCT00816907, prediction: -0.03458404541015625, label: 0.136986301369863
{'predictions': [{'score': 0.3459576964378357}]}
idx: 3294, nct_id: NCT00442572, prediction: 0.3459576964378357, label: 0.0
{'predictions': [{'score': 0.12276425957679749}]}
idx: 3295, nct_id: NCT01055834, prediction: 0.12276425957679749, label: 0.0
{'predictions': [{'score': 0.8196880221366882}]}
idx: 3296, nct_id: NCT01020305, prediction: 0.8196880221366882, label: 0.4
{'predictions': [{'score': 0.022533118724822998}]}
idx: 3297, nct_id: NCT01500772, prediction: 0.022533118724822998, label: 0.0
{'predictions': [{'score': 0.4792381227016449}]}
idx: 3298, nct_id: NCT01106586, prediction: 0.4792381227016449, label: 0.4392655367231638
{'predictions': [{'score': 0.9372298717498779}]}
idx: 3299, nct_id: NCT004

{'predictions': [{'score': 0.9071757197380066}]}
idx: 3368, nct_id: NCT00889187, prediction: 0.9071757197380066, label: 1.0
{'predictions': [{'score': 0.014931142330169678}]}
idx: 3369, nct_id: NCT00832572, prediction: 0.014931142330169678, label: 0.0
{'predictions': [{'score': 0.022673994302749634}]}
idx: 3370, nct_id: NCT01237340, prediction: 0.022673994302749634, label: 0.11864406779661017
{'predictions': [{'score': 0.27826374769210815}]}
idx: 3371, nct_id: NCT00468208, prediction: 0.27826374769210815, label: 0.7
{'predictions': [{'score': 1.1961336135864258}]}
idx: 3372, nct_id: NCT00074165, prediction: 1.1961336135864258, label: 1.8823529411764706
{'predictions': [{'score': 0.462844580411911}]}
idx: 3373, nct_id: NCT00151411, prediction: 0.462844580411911, label: 0.0
{'predictions': [{'score': 0.13988502323627472}]}
idx: 3374, nct_id: NCT00660907, prediction: 0.13988502323627472, label: 0.14543960558751026
{'predictions': [{'score': 1.1540861129760742}]}
idx: 3375, nct_id: NCT0078

{'predictions': [{'score': -0.23792213201522827}]}
idx: 3434, nct_id: NCT00304265, prediction: -0.23792213201522827, label: 0.0
{'predictions': [{'score': 0.3708491921424866}]}
idx: 3435, nct_id: NCT01652573, prediction: 0.3708491921424866, label: 0.0
{'predictions': [{'score': 0.5599980354309082}]}
idx: 3436, nct_id: NCT00772668, prediction: 0.5599980354309082, label: 0.6666666666666666
{'predictions': [{'score': 0.14026106894016266}]}
idx: 3437, nct_id: NCT01290679, prediction: 0.14026106894016266, label: 0.15776081424936386
{'predictions': [{'score': 0.17615842819213867}]}
idx: 3438, nct_id: NCT01166958, prediction: 0.17615842819213867, label: 0.0
{'predictions': [{'score': 0.061321407556533813}]}
idx: 3439, nct_id: NCT00303823, prediction: 0.061321407556533813, label: 0.02040816326530612
{'predictions': [{'score': 0.007159769535064697}]}
idx: 3440, nct_id: NCT00422162, prediction: 0.007159769535064697, label: 0.0855457227138643
{'predictions': [{'score': 0.19456058740615845}]}
idx:

{'predictions': [{'score': 1.4166903495788574}]}
idx: 3512, nct_id: NCT01984892, prediction: 1.4166903495788574, label: 0.0
{'predictions': [{'score': -0.014292150735855103}]}
idx: 3513, nct_id: NCT00940108, prediction: -0.014292150735855103, label: 0.03783783783783784
{'predictions': [{'score': 0.05092443525791168}]}
idx: 3514, nct_id: NCT00402194, prediction: 0.05092443525791168, label: 0.0
{'predictions': [{'score': 0.6343109607696533}]}
idx: 3515, nct_id: NCT00506415, prediction: 0.6343109607696533, label: 0.6502525252525253
{'predictions': [{'score': 0.06546521186828613}]}
idx: 3516, nct_id: NCT02258334, prediction: 0.06546521186828613, label: 0.009615384615384616
{'predictions': [{'score': -0.05659067630767822}]}
idx: 3517, nct_id: NCT02128490, prediction: -0.05659067630767822, label: 0.07407407407407407
{'predictions': [{'score': 0.46146154403686523}]}
idx: 3518, nct_id: NCT02604173, prediction: 0.46146154403686523, label: 0.0
{'predictions': [{'score': 0.118227019906044}]}
idx:

{'predictions': [{'score': 0.39075836539268494}]}
idx: 3590, nct_id: NCT00195702, prediction: 0.39075836539268494, label: 1.789983844911147
{'predictions': [{'score': 0.14279088377952576}]}
idx: 3591, nct_id: NCT01596231, prediction: 0.14279088377952576, label: 0.0
{'predictions': [{'score': -0.12152588367462158}]}
idx: 3592, nct_id: NCT00609674, prediction: -0.12152588367462158, label: 0.0
{'predictions': [{'score': 0.029803171753883362}]}
idx: 3593, nct_id: NCT00831428, prediction: 0.029803171753883362, label: 0.0
{'predictions': [{'score': 0.10019263625144958}]}
idx: 3594, nct_id: NCT01573767, prediction: 0.10019263625144958, label: 0.004319654427645789
{'predictions': [{'score': 0.480633944272995}]}
idx: 3595, nct_id: NCT00318474, prediction: 0.480633944272995, label: 0.021739130434782608
{'predictions': [{'score': 0.2062617540359497}]}
idx: 3596, nct_id: NCT00273910, prediction: 0.2062617540359497, label: 0.0
{'predictions': [{'score': 0.3223632872104645}]}
idx: 3597, nct_id: NCT0

{'predictions': [{'score': 0.6827464699745178}]}
idx: 3665, nct_id: NCT01036802, prediction: 0.6827464699745178, label: 0.6666666666666666
{'predictions': [{'score': 0.42850157618522644}]}
idx: 3666, nct_id: NCT02004990, prediction: 0.42850157618522644, label: 0.0
{'predictions': [{'score': 0.8334275484085083}]}
idx: 3667, nct_id: NCT00319735, prediction: 0.8334275484085083, label: 1.1219512195121952
{'predictions': [{'score': 0.5125675201416016}]}
idx: 3668, nct_id: NCT01125813, prediction: 0.5125675201416016, label: 0.125
{'predictions': [{'score': 0.11801734566688538}]}
idx: 3669, nct_id: NCT00785577, prediction: 0.11801734566688538, label: 0.03663003663003663
{'predictions': [{'score': 0.09017579257488251}]}
idx: 3670, nct_id: NCT00668525, prediction: 0.09017579257488251, label: 0.03420752565564424
{'predictions': [{'score': 0.14179369807243347}]}
idx: 3671, nct_id: NCT01359449, prediction: 0.14179369807243347, label: 0.04878048780487805
{'predictions': [{'score': 1.457583189010620

KeyboardInterrupt: 

In [None]:
def evaluate(linear_predictor, test_features, test_labels, model_name, verbose=True):
    """
    Evaluate a model on a test set given the prediction endpoint.  Return binary classification metrics.
    """
    # split the test data set into 100 batches and evaluate using prediction endpoint
    prediction_batches = [linear_predictor.predict(batch)['predictions'] for batch in np.array_split(test_features, 100)]
    # parse raw predictions json to exctract predicted label
    test_preds = np.concatenate([np.array([x['predicted_label'] for x in batch]) for batch in prediction_batches])
    
    # calculate binary classification metrics
    recall = tp / (tp + fn)
    precision = tp / (tp + fp)
    accuracy = (tp + tn) / (tp + fp + tn + fn)
    f1 = 2 * precision * recall / (precision + recall)
    
    if verbose:
        print(pd.crosstab(test_labels, test_preds, rownames=['actuals'], colnames=['predictions']))
        print("\n{:<11} {:.3f}".format('Recall:', recall))
        print("{:<11} {:.3f}".format('Precision:', precision))
        print("{:<11} {:.3f}".format('Accuracy:', accuracy))
        print("{:<11} {:.3f}".format('F1:', f1))
        
    return {'TP': tp, 'FP': fp, 'FN': fn, 'TN': tn, 'Precision': precision, 'Recall': recall, 'Accuracy': accuracy, 
             'F1': f1, 'Model': model_name}

The predictor.predict method call takes one parameter, the input data for which you want the SageMaker Endpoint to provide inference. predict will serialize the input data, and send it in as request to the SageMaker Endpoint by an InvokeEndpoint SageMaker operation. InvokeEndpoint operation requests can be made by predictor.predict, by boto3 SageMaker.runtime client or by AWS CLI.