#### Load the data

In [1]:
import pandas as pd
import pickle
import math
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, roc_auc_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm



In [2]:
data_dir = "../../data/"
def save_obj(obj, name ):
    with open( name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(data_dir + name + '.pkl', 'rb') as f:
        return pickle.load(f)
# load data
Old_Labeled_data = load_obj("SBdata")
BasicFeatures = load_obj("pre_features-v2")
pre_features = BasicFeatures

def transform_data(Old_Labeled_data):
    labeled_edges = []
    for Old_Labeled_edge in Old_Labeled_data:
        label = int(Old_Labeled_edge[1])
        source = int(Old_Labeled_edge[0][0])
        sink = int(Old_Labeled_edge[0][1])
        labeled_edges.append((source, sink, label))
    return labeled_edges
labeled_edges = transform_data(Old_Labeled_data)

In [137]:
test_edges = labeled_edges[0:20]

In [3]:
# Function
#Salton Similarity
def salton_similarity(node1, node2):
    n1 = pre_features[node1]
    n2 = pre_features[node2]
    common_neighors = list(set(n1[2]).intersection(n2[2]))
    inter = len(common_neighors)
    degree_out_flow = n1[6]
    degree_in_flow = n2[4]
    
    if inter == 0:
        return 0
    else:
        try:
            sqrt_of_degree = math.sqrt(degree_out_flow * degree_in_flow)
            salton = inter / sqrt_of_degree
            probability = 1 /(1 - math.log(salton)*0.2)
            return probability
        except:
            return 0

#Cosine
def Cosine(Node1, Node2):
    n1 = pre_features[Node1]
    n2 = pre_features[Node2]
    common_neighors = list(set(n1[2]).intersection(n2[2]))
    lm = len(common_neighors)
    if lm == 0:
        return 0
    else:
        return (0.0+lm)/(len(n1[2])*len(n2[2]))

def get_jaccard_coefficient(source, sink):
    """
    in: source::Node object
    in: sink::Node object
    return: jaccard's cofficient::numeric
    """
    # transform
    neighbours_of_source_list = BasicFeatures[source][2]
    neighbours_of_sink_list = BasicFeatures[sink][2]
    
    neigbours_set_of_source = set(neighbours_of_source_list)
    neigbours_set_of_sink = set(neighbours_of_sink_list)
    union_neighbours = neigbours_set_of_source | neigbours_set_of_sink
    common_neighbours = neigbours_set_of_source & neigbours_set_of_sink
    if len(union_neighbours)==0:
        return 0.0
    return(len(common_neighbours)/len(union_neighbours))

def get_preferential_attachment(source, sink):
    # transform
    neighbours_of_source_list = BasicFeatures[source][2]
    neighbours_of_sink_list = BasicFeatures[sink][2]
    
    neigbours_set_of_source = set(neighbours_of_source_list)
    neigbours_set_of_sink = set(neighbours_of_sink_list)
    
    return len(neigbours_set_of_source)*len(neigbours_set_of_sink)

def get_adamic_adar(source, sink):
    # transform
    neighbours_of_source_list = BasicFeatures[source][2]
    neighbours_of_sink_list = BasicFeatures[sink][2]

    neigbours_set_of_source = set(neighbours_of_source_list)
    neigbours_set_of_sink = set(neighbours_of_sink_list)
    common_neighbours = neigbours_set_of_source & neigbours_set_of_sink
    # get the summation
    score = 0
    for common_node in common_neighbours:
        if math.log(len(BasicFeatures[common_node][2])) == 0:
            return 0.0
        score = score + 1/math.log(len(BasicFeatures[common_node][2]))
    return score

def get_resource_allocation(source, sink):
    neighbours_of_source_list = BasicFeatures[source][2]
    neighbours_of_sink_list = BasicFeatures[sink][2]
#     print(neighbours_of_source_list)
#     print(neighbours_of_sink_list)
    neigbours_set_of_source = set(neighbours_of_source_list)
    neigbours_set_of_sink = set(neighbours_of_sink_list)
    
    common_neighbours = neigbours_set_of_source & neigbours_set_of_sink
#     print(common_neighbours)
    score=0
    for common_node in common_neighbours:
        # number of the neighbours of the common_node
        try:
            single_common_node_score = 1/BasicFeatures[common_node][0]
        except:
            single_common_node_score=0
        score = score + single_common_node_score
    return score
    

# def get_resource_allocation_v2(source, sink):
#     in_neighbours_of_source_list = set(BasicFeatures[source][3])
#     in_neighbours_of_sink_list = set(BasicFeatures[sink][3])

#     out_neighbours_of_source_list = set(BasicFeatures[source][5])
#     out_neighbours_of_sink_list = set(BasicFeatures[sink][5])

#     common_neighbours_1 = out_neighbours_of_source_list & in_neighbours_of_sink_list
#     common_neighbours_2 = in_neighbours_of_source_list & out_neighbours_of_sink_list

#     score_x=0
#     score_y=0
#     for common_node in common_neighbours_1:
#         # number of the neighbours of the common_node
#         try:
#             # only count the out neighobour
#             single_common_node_score = 1/BasicFeatures[common_node][5]
#         except:
#             single_common_node_score=0
#         score_x = score_x + single_common_node_score
    
#     for common_node in common_neighbours_2:
#         # number of the neighbours of the common_node
#         try:
#             # only count the out neighobour
#             single_common_node_score = 1/BasicFeatures[common_node][5]
#         except:
#             single_common_node_score=0
#         score_y = score_y + single_common_node_score
#     score = score_x+score_y
#     return score
    
    
    
    
# how similar are the outbound neighbors of source to sink
# either JA, PA, AA
def get_outbound_similarity_score(source, sink, metric):
    # get the outbound_node of source
    outbound_node_for_source_set = set(BasicFeatures[source][5])
    summation = 0
    for outbound_node_for_source in outbound_node_for_source_set:
        summation =summation + metric(sink,outbound_node_for_source)
    if len(outbound_node_for_source_set) == 0:
        return 0
    score = 1/len(outbound_node_for_source_set)*summation
    return score

# either JA, PA, AA
def get_inbound_similarity_score(source, sink, metric):
    # get the inbound_node of sink
    inbound_node_for_sink_set = set(BasicFeatures[sink][3])
    summation = 0
    for inbound_node_for_sink in inbound_node_for_sink_set:
        summation =summation + metric(source,inbound_node_for_sink)
    if len(inbound_node_for_sink_set) == 0:
        return 0
    score = 1/len(inbound_node_for_sink_set)*summation
    return score

def get_common_neighbours(node1, node2):
    try:
        n1 = pre_features[node1]
        n2 = pre_features[node2]
        common_neighors = list(set(n1[2]).intersection(n2[2]))
        return common_neighors
    except:
        return 0

def get_training_df(final_edges):
    training_df = pd.DataFrame()
    for edge in tqdm(final_edges, mininterval=600):
        source = edge[0]
        sink = edge[1]
        label = edge[2]
        common_neighbours = get_common_neighbours(source,sink)
        num_of_neighbours_source=BasicFeatures[source][0]
        num_of_in_neighbours_source=BasicFeatures[source][4]
        num_of_out_neighbours_source=BasicFeatures[source][6]

        num_of_neighbours_sink=BasicFeatures[sink][0]
        num_of_in_neighbours_sink=BasicFeatures[sink][4]
        num_of_out_neighbours_sink=BasicFeatures[sink][6]
        
        num_of_neighbours_sum=BasicFeatures[source][0] + BasicFeatures[sink][0]
        num_of_in_neighbours_sum=BasicFeatures[source][4] + BasicFeatures[sink][4]
        num_of_out_neighbours_sum=BasicFeatures[source][6] + BasicFeatures[sink][6]
        
        salton_similarity_score = salton_similarity(source, sink)
        cosine = Cosine(source, sink)
        jaccard_coefficient = get_jaccard_coefficient(source, sink)
        preferential_attachment = get_preferential_attachment(source, sink)
        adamic_adar = get_adamic_adar(source, sink)
        resource_allocation = get_resource_allocation(source, sink)

#         salton_similarity_score_out = get_outbound_similarity_score(source, sink, salton_similarity)
#         cosine_out = get_outbound_similarity_score(source, sink, Cosine)
#         jaccard_coefficient_out = get_outbound_similarity_score(source, sink, get_jaccard_coefficient)
#         preferential_attachment_out = get_outbound_similarity_score(source, sink, get_preferential_attachment)
#         adamic_adar_out = get_outbound_similarity_score(source, sink, get_adamic_adar)
#         resource_allocation_out = get_outbound_similarity_score(source, sink, get_resource_allocation)

#         salton_similarity_score_in = get_inbound_similarity_score(source, sink, salton_similarity)
#         cosine_in = get_inbound_similarity_score(source, sink, Cosine)
#         jaccard_coefficient_in = get_inbound_similarity_score(source, sink, get_jaccard_coefficient)
#         preferential_attachment_in = get_inbound_similarity_score(source, sink, get_preferential_attachment)
#         adamic_adar_in = get_inbound_similarity_score(source, sink, get_adamic_adar)
#         resource_allocation_in = get_inbound_similarity_score(source, sink, get_resource_allocation)

# add the basic features
        df_row = pd.DataFrame([
                               source, 
                               sink, 
                               label,
                               num_of_neighbours_source,
                               num_of_in_neighbours_source,
                               num_of_out_neighbours_source,
                               num_of_neighbours_sink,
                               num_of_in_neighbours_sink,
                               num_of_out_neighbours_sink,
                               num_of_neighbours_sum,
                               num_of_in_neighbours_sum,
                               num_of_out_neighbours_sum,      
                               salton_similarity_score, 
                               cosine, 
                               jaccard_coefficient,
                               preferential_attachment, 
                               adamic_adar, 
                               resource_allocation
#                                salton_similarity_score_out,
#                                cosine_out,
#                                jaccard_coefficient_out,
#                                preferential_attachment_out,
#                                adamic_adar_out,
#                                resource_allocation_out,
#                                salton_similarity_score_in,
#                                cosine_in,
#                                jaccard_coefficient_in,
#                                preferential_attachment_in,
#                                adamic_adar_in,
#                                resource_allocation_in
                              ]).T
        training_df = training_df.append(df_row)
    training_df.rename(columns={
        0:'source', 
        1:'sink', 
        2:'label',
        3:'num_of_neighbours_source',
        4:'num_of_in_neighbours_source',
        5:'num_of_out_neighbours_source',
        6:'num_of_neighbours_sink',
        7:'num_of_in_neighbours_sink',
        8:'num_of_out_neighbours_sink',
        9:'num_of_neighbours_sum',
        10:'num_of_in_neighbours_sum',
        11:'num_of_out_neighbours_sum',      
        12:'salton_similarity_score', 
        13:'cosine', 
        14:'jaccard_coefficient',
        15:'preferential_attachment', 
        16:'adamic_adar', 
        17:'resource_allocation'
#         19:'salton_similarity_score_out',
#         20:'cosine_out',
#         21:'jaccard_coefficient_out',
#         22:'preferential_attachment_out',
#         23:'adamic_adar_out',
#         24:'resource_allocation_out',
#         25:'salton_similarity_score_in',
#         26:'cosine_in',
#         27:'jaccard_coefficient_in',
#         28:'preferential_attachment_in',
#         29:'adamic_adar_in',
#         30:'resource_allocation_in'           
    },inplace=True)
    training_df[['source', 'sink', 'label']] = training_df[['source', 'sink', 'label']].astype(int)
    return training_df

# data 需要为array
def rescale_min_max(data): 
    """
    min-max normalisation
    """
    scaler = MinMaxScaler()
    scaler.fit(data)
    result = scaler.transform(data)
    return pd.DataFrame(result)

def standardise(data):
    """remove the mean and transform to unit variance"""
    scaler = StandardScaler()
    scaler.fit(data)
    result = scaler.transform(data)
    return pd.DataFrame(result)


In [6]:
# normalise the training data
# training_df = get_training_df(labeled_edges)
training_data_v1 = load_obj("un_normalised_final_training_data_df_rf")

# final_training_data_df = training_data_v1.iloc[:,3:30]
# final_labels_df = training_data_v1.iloc[:,2]
training_df= training_data_v1
final_labels_df = training_df.iloc[:,2]
measurement_to_normal = training_df.iloc[:,3:30]
# 使用标准化
final_training_data_df = rescale_min_max(measurement_to_normal)
final_training_data_df.rename(columns={
        0:'num_of_neighbours_source',
        1:'num_of_in_neighbours_source',
        2:'num_of_out_neighbours_source',
        3:'num_of_neighbours_sink',
        4:'num_of_in_neighbours_sink',
        5:'num_of_out_neighbours_sink',
        6:'num_of_neighbours_sum',
        7:'num_of_in_neighbours_sum',
        8:'num_of_out_neighbours_sum',      
        9:'salton_similarity_score', 
        10:'cosine', 
        11:'jaccard_coefficient',
        12:'preferential_attachment', 
        13:'adamic_adar', 
        14:'resource_allocation'
#         15:'salton_similarity_score_out',
#         16:'cosine_out',
#         17:'jaccard_coefficient_out',
#         18:'preferential_attachment_out',
#         19:'adamic_adar_out',
#         20:'resource_allocation_out'
#         21:'salton_similarity_score_in',
#         22:'cosine_in',
#         23:'jaccard_coefficient_in',
#         24:'preferential_attachment_in',
#         25:'adamic_adar_in',
#         26:'resource_allocation_in'           
    },inplace=True)


In [18]:
save_obj(training_df,'un_normalised_final_training_data_df_rf')

In [8]:
final_training_data_df

Unnamed: 0,num_of_neighbours_source,num_of_in_neighbours_source,num_of_out_neighbours_source,num_of_neighbours_sink,num_of_in_neighbours_sink,num_of_out_neighbours_sink,num_of_neighbours_sum,num_of_in_neighbours_sum,num_of_out_neighbours_sum,salton_similarity_score,cosine,jaccard_coefficient,preferential_attachment,adamic_adar,resource_allocation
0,0.000370,0.029132,0.000188,0.000827,0.055372,0.000480,0.001195,0.062092,0.000667,0.390960,0.000547,0.058775,4.844166e-05,0.003545,0.001057
1,0.000370,0.029132,0.000188,0.000428,0.021901,0.000291,0.000796,0.037498,0.000478,0.413237,0.000960,0.080810,2.505577e-05,0.002965,0.000388
2,0.000370,0.029132,0.000188,0.001205,0.117149,0.000469,0.001572,0.107484,0.000655,0.400112,0.000637,0.073335,7.550246e-05,0.005721,0.000956
3,0.000370,0.029132,0.000188,0.015113,0.254132,0.013594,0.015448,0.208137,0.013753,0.400520,0.000075,0.008528,1.165507e-03,0.008352,0.001550
4,0.000370,0.029132,0.000188,0.000467,0.045455,0.000181,0.000836,0.054805,0.000368,0.425668,0.001525,0.138358,2.806253e-05,0.006982,0.004367
5,0.000370,0.029132,0.000188,0.000103,0.006612,0.000062,0.000472,0.026264,0.000249,0.418278,0.002368,0.080943,5.678896e-06,0.002318,0.001164
6,0.000370,0.029132,0.000188,0.000920,0.098967,0.000297,0.001287,0.094125,0.000483,0.386826,0.000611,0.065230,5.746193e-05,0.004233,0.000803
7,0.000370,0.029132,0.000188,0.008642,0.160331,0.007678,0.008992,0.139214,0.007849,0.369738,0.000060,0.006854,6.527997e-04,0.003516,0.000336
8,0.000370,0.029132,0.000188,0.000754,0.013223,0.000675,0.001122,0.031122,0.000861,0.406431,0.000382,0.031784,5.913235e-05,0.001870,0.000141
9,0.000370,0.029132,0.000188,0.000296,0.026860,0.000127,0.000665,0.041142,0.000314,0.310756,0.000185,0.012473,1.603550e-05,0.000363,0.000023


In [9]:
final_labels_df = training_df.iloc[:,2]

In [10]:
final_labels_df

0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
    ..
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
Name: label, Length: 389478, dtype: int64

In [12]:
final_labels_df

0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
    ..
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
Name: label, Length: 389478, dtype: int64

In [20]:
final_labels_df

0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
    ..
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    1
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
0    0
Name: label, Length: 389478, dtype: int64

In [None]:
X=final_training_data_df
# count=0
# get the data and label
y=final_labels_df

# training model
from sklearn.model_selection import train_test_split
X_t, X_test, y_t, y_test = train_test_split(X,y)
X_train, X_validation, y_train, y_validation  = train_test_split(X_t,y_t)

# Gridsearch settings
rf = RandomForestClassifier()
X_train = X_t
y_train = y_t

pipeline = Pipeline([
       ('clf', RandomForestClassifier(criterion='entropy'))
   ])
parameters = {
       'clf__n_estimators': (50, 100),
       'clf__max_depth': (10, 100),
       'clf__min_samples_split': (2, 10, 100),
       'clf__min_samples_leaf': (2, 10, 100)
}
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1,
   verbose=1, scoring='roc_auc', cv=3)

grid_search.fit(X_train, y_train)
print('Best score: %0.3f' % grid_search.best_score_)
print('Best parameters set:')
best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print('\t%s: %r' % (param_name, best_parameters[param_name]))
predictions = grid_search.predict(X_test)
print('Accuracy:', accuracy_score(y_test, predictions))
print('Precision:', precision_score(y_test, predictions))
print('Recall:', recall_score(y_test, predictions))




# make the prediction

with open(data_dir + "test-public.txt", "r") as f:
     test_data = f.readlines()
test_data = [i.split() for i in test_data[1:]]
def predict():
    """
    make the prediction using the jaccard's coefficient
    """
    result = []
    for line in tqdm(test_data, mininterval=50):
        # converse to integer
        source = int(line[1].strip())
        sink = int(line[2].strip())
        common_neighbours = get_common_neighbours(source,sink)
        num_of_neighbours_source=BasicFeatures[source][0]
        num_of_in_neighbours_source=BasicFeatures[source][4]
        num_of_out_neighbours_source=BasicFeatures[source][6]

        num_of_neighbours_sink=BasicFeatures[sink][0]
        num_of_in_neighbours_sink=BasicFeatures[sink][4]
        num_of_out_neighbours_sink=BasicFeatures[sink][6]
        
        num_of_neighbours_sum=BasicFeatures[source][0] + BasicFeatures[sink][0]
        num_of_in_neighbours_sum=BasicFeatures[source][4] + BasicFeatures[sink][4]
        num_of_out_neighbours_sum=BasicFeatures[source][6] + BasicFeatures[sink][6]
        
        salton_similarity_score = salton_similarity(source, sink)
        cosine = Cosine(source, sink)
        jaccard_coefficient = get_jaccard_coefficient(source, sink)
        preferential_attachment = get_preferential_attachment(source, sink)
        adamic_adar = get_adamic_adar(source, sink)
        resource_allocation = get_resource_allocation(source, sink)

#         salton_similarity_score_out = get_outbound_similarity_score(source, sink, salton_similarity)
#         cosine_out = get_outbound_similarity_score(source, sink, Cosine)
#         jaccard_coefficient_out = get_outbound_similarity_score(source, sink, get_jaccard_coefficient)
#         preferential_attachment_out = get_outbound_similarity_score(source, sink, get_preferential_attachment)
#         adamic_adar_out = get_outbound_similarity_score(source, sink, get_adamic_adar)
#         resource_allocation_out = get_outbound_similarity_score(source, sink, get_resource_allocation)

#         salton_similarity_score_in = get_inbound_similarity_score(source, sink, salton_similarity)
#         cosine_in = get_inbound_similarity_score(source, sink, Cosine)
#         jaccard_coefficient_in = get_inbound_similarity_score(source, sink, get_jaccard_coefficient)
#         preferential_attachment_in = get_inbound_similarity_score(source, sink, get_preferential_attachment)
#         adamic_adar_in = get_inbound_similarity_score(source, sink, get_adamic_adar)
#         resource_allocation_in = get_inbound_similarity_score(source, sink, get_resource_allocation)

#         df_row = pd.DataFrame([cosine, jaccard_coefficient, preferential_attachment, adamic_adar, resource_allocation]).T
        X_test = pd.DataFrame([
                               num_of_neighbours_source,
                               num_of_in_neighbours_source,
                               num_of_out_neighbours_source,
                               num_of_neighbours_sink,
                               num_of_in_neighbours_sink,
                               num_of_out_neighbours_sink,
                               num_of_neighbours_sum,
                               num_of_in_neighbours_sum,
                               num_of_out_neighbours_sum,      
                               salton_similarity_score, 
                               cosine, 
                               jaccard_coefficient,
                               preferential_attachment, 
                               adamic_adar, 
                               resource_allocation
#                                salton_similarity_score_out,
#                                cosine_out,
#                                jaccard_coefficient_out,
#                                preferential_attachment_out,
#                                adamic_adar_out,
#                                resource_allocation_out,
#                                salton_similarity_score_in,
#                                cosine_in,
#                                jaccard_coefficient_in,
#                                preferential_attachment_in,
#                                adamic_adar_in,
#                                resource_allocation_in
                              ]).T
        single_result = grid_search.predict(X_test)[0]
        print(single_result)
        result.append((line[0], single_result))
    return result
result = predict()




# save the result

import csv
import time
'''
Description: get time
Input: 
Output: time
''' 
def nowtime():
    return time.strftime("%Y%m%d-%H%M", time.localtime())


"""
Description: Save prediction result to files
Input: (1) result
       (2) filename
Output: 
"""
def save_prediction_to_csv(result,filename):
    headers = ['id','Prediction']

    with open(filename + str(nowtime()) + ".csv", 'w', encoding = 'utf8') as f:
        f_csv = csv.writer(f)
        f_csv.writerow(headers)
        f_csv.writerows(result)
save_prediction_to_csv(result, "shawn_rf_min_max")

Fitting 3 folds for each of 36 candidates, totalling 108 fits


In [14]:
result

[('1', 1),
 ('2', 1),
 ('3', 1),
 ('4', 1),
 ('5', 1),
 ('6', 1),
 ('7', 1),
 ('8', 1),
 ('9', 1),
 ('10', 1),
 ('11', 1),
 ('12', 1),
 ('13', 1),
 ('14', 1),
 ('15', 1),
 ('16', 1),
 ('17', 1),
 ('18', 1),
 ('19', 1),
 ('20', 1),
 ('21', 1),
 ('22', 1),
 ('23', 1),
 ('24', 1),
 ('25', 1),
 ('26', 1),
 ('27', 1),
 ('28', 1),
 ('29', 1),
 ('30', 1),
 ('31', 1),
 ('32', 1),
 ('33', 1),
 ('34', 1),
 ('35', 1),
 ('36', 1),
 ('37', 1),
 ('38', 1),
 ('39', 1),
 ('40', 1),
 ('41', 1),
 ('42', 1),
 ('43', 1),
 ('44', 1),
 ('45', 1),
 ('46', 1),
 ('47', 1),
 ('48', 1),
 ('49', 1),
 ('50', 1),
 ('51', 1),
 ('52', 1),
 ('53', 1),
 ('54', 1),
 ('55', 1),
 ('56', 1),
 ('57', 1),
 ('58', 1),
 ('59', 1),
 ('60', 1),
 ('61', 1),
 ('62', 1),
 ('63', 1),
 ('64', 1),
 ('65', 1),
 ('66', 1),
 ('67', 1),
 ('68', 1),
 ('69', 1),
 ('70', 1),
 ('71', 1),
 ('72', 1),
 ('73', 1),
 ('74', 1),
 ('75', 1),
 ('76', 1),
 ('77', 1),
 ('78', 1),
 ('79', 1),
 ('80', 1),
 ('81', 1),
 ('82', 1),
 ('83', 1),
 ('84', 1),
 

# Save the result of the prediction

# Modelling

# Please save the training set as the csv file.

# Prediction