In [10]:
import os
os.environ["OMP_NUM_THREADS"] = "4"
import random
import gc
import re
import math
import hashlib
import datetime
import warnings
import copy

from collections import defaultdict
from multiprocessing import Pool, cpu_count

import tensorflow
import pandas as pd
import lightgbm as lgb
import numpy as np
import keras
from gensim import matutils
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
import scipy.spatial.distance
import nltk
from nltk.cluster import KMeansClusterer
from pandas.util import hash_pandas_object
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

# Where the downloaded data are
input_path = '/Users/tyamgin/Projects/mlbootcamp/championship21/data'
# Where to store results
output_path = '/Users/tyamgin/Projects/mlbootcamp/championship21/res'

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.
Using TensorFlow backend.


In [14]:
np.array([2, -2]) ** 1.2

  """Entry point for launching an IPython kernel.


array([2.29739671,        nan])

In [34]:
def ppow(a, p):
    return np.where(a >= 0, np.clip(a, 0, a) ** p, -(np.clip(-a, 0, -a) ** p))

In [38]:
def min2(a):
    if hasattr(a, 'values'):
        a = a.values
    if len(a) <= 1:
        return a[0]
    return sorted(a)[1]

def max2(a):
    if hasattr(a, 'values'):
        a = a.values
    if len(a) <= 1:
        return a[0]
    return sorted(a)[-2]

In [71]:
class MutableSettings:
    def __init__(self, scale=1, min_value=None, max_value=None):
        self.scale = scale
        self.min_value = min_value
        self.max_value = max_value
        
class Mutable:
    def __init__(self, settings):
        self.settings = settings

class MutableNumber(Mutable):
    def __init__(self, value, settings):
        self.value = value
        self.settings = settings
    def ensure_ranges(self):
        if self.settings.min_value is not None and self.value < self.settings.min_value:
            self.value = self.settings.min_value
        if self.settings.max_value is not None and self.value > self.settings.max_value:
            self.value = self.settings.max_value
        
class MutableFloat(MutableNumber):
    def mutate(self):
        result = MutableFloat(np.random.normal(self.value, self.settings.scale), self.settings)
        result.ensure_ranges()
        return result
    def crossover_with(self, other):
        result = MutableFloat((self.value + other.value) / 2, self.settings)
        result.ensure_ranges()
        return result
    
class MutableFloatFactory:
    def __init__(self, min_value, max_value, start_value=None):
        self.min_value = min_value
        self.max_value = max_value
        self.start_value = start_value
        self.scale = (max_value - min_value) / 7
    def get(self):
        result = self.start_value
        if result is None:
            result = np.random.uniform(self.min_value, self.max_value)
        return MutableFloat(result, MutableSettings(scale=1, min_value=self.min_value, max_value=self.max_value))

class Chromosome:
    def __init__(self):
        self.items = []
    def mutate(self):
        result = Chromosome()
        for x in self.items:
            result.items.append(x.mutate())
        return result
    def crossover_with(self, other):
        result = Chromosome()
        for i in range(len(self.items)):
            result.items.append(self.items[i].crossover_with(other.items[i]))
        return result
    def get_values(self):
        return list(map(lambda x: x.value, self.items))
        
class Population:
    def __init__(self, size, items, score_getter):
        self.size = size
        self.chromosomes = []
        self.score_getter = score_getter
        self.epochs_count = 0
        for i in range(size):
            chromosome = Chromosome()
            for item in items:
                chromosome.items.append(item.get())
            self.chromosomes.append(chromosome)
        self.print_info()
        
    def get_best_values(self):
        return self.chromosomes[0].get_values()
    
    def get_score(self, chromosome):
        return self.score_getter(chromosome.get_values())
        
    def print_info(self):
        scores = map(lambda c: self.get_score(c), self.chromosomes)
        print('Top scores at {} epoch: {}'.format(self.epochs_count, ', '.join(map(str, scores))))
        print('Best values: {}'.format(', '.join(map(str, self.get_best_values()))))
        
    def do_epoch(self):
        self.epochs_count += 1
        pairs = self.chromosomes
        for i in range(1, len(self.chromosomes)):
            for j in range(i):
                pairs.append(self.chromosomes[i].crossover_with(self.chromosomes[j]).mutate())

        pairs.sort(key=lambda c: self.get_score(c), reverse=True)
        self.chromosomes = pairs[0:self.size]
        self.print_info()
        

In [76]:
def score_getter(x):
    s = 0
    min_diff = 1e10
    for i in range(1, len(x)):
        s += x[i] - x[i - 1]
        min_diff = min(min_diff, x[i] - x[i - 1])
    return s + min_diff
"""
population = Population(size=10, items=[
    MutableFloatFactory(min_value=0, max_value=5),
    MutableFloatFactory(min_value=0, max_value=5),
    MutableFloatFactory(min_value=0, max_value=5),
    MutableFloatFactory(min_value=0, max_value=5),
    MutableFloatFactory(min_value=0, max_value=5),
], score_getter=score_getter)
for i in range(1000):
    population.do_epoch()
"""

Top scores at 0 epoch: -6.458426121819672, -2.1089771263846595, 0.599622141850384, 0.5312028884758293, 3.2879293756310863, 3.6570331557787292, -5.113071666375585, -6.326377092105146, -1.9773510082347694, -3.9729121668203438
Best values: 3.6517259911337225, 1.3883202378422728, 4.434600859055781, 2.9230907232862413, 0.058195296300146415
Top scores at 1 epoch: 4.3430298858964305, 3.2645656268624528, 3.1143245622351596, 2.8939387698412817, 2.614608544685871, 1.5544145658332784, 1.2091506130631808, 1.0913347627620107, 0.9855469896334808, 0.30489626615405463
Best values: 0, 0.9271618248268778, 1.567253124017348, 3.0519555991615803, 3.7029385867059603
Top scores at 2 epoch: 4.579909579930063, 4.536780986146072, 4.199176579952184, 3.90660992274248, 3.5553434219165583, 3.311129178881516, 3.2770429031516812, 3.161145308658569, 3.018429411860052, 2.956389449803353
Best values: 0, 3.394444536930818, 2.974354116860881, 3.348624236735279, 5
Top scores at 3 epoch: 5.310195092329065, 5.211283731995618

Top scores at 70 epoch: 5.824257904804446, 5.569144659838646, 5.5300768056188705, 5.527812315051171, 5.366199050752388, 5.366162605660532, 5.288923667075313, 5.223254709595804, 5.218013922780823, 5.168009310758246
Best values: 0, 0.8550397868301847, 1.679297691634631, 3.519670292815578, 5
Top scores at 71 epoch: 5.798971629571843, 5.613575403391233, 5.577189753895775, 5.502201012947466, 5.309838411652205, 5.248988503642412, 5.148721903109571, 5.0, 5.0, 5.0
Best values: 0, 0.896725491608499, 2.111998302049246, 2.9109699316210893, 5
Top scores at 72 epoch: 5.6602732978004475, 5.510687721911535, 5.403490919029384, 5.309459650226845, 5.26457453335142, 5.256433117087404, 5.248768467832997, 5.074620191396692, 5.0, 5.0
Best values: 0, 1.1307902136645758, 1.9689665936589906, 3.2192629602309744, 4.822096917806033
Top scores at 73 epoch: 5.594847084824412, 5.417210371338681, 5.395017614927494, 5.375977853226161, 5.331045979168668, 5.30164881116171, 5.301378776701657, 5.21281099887849, 5.11643919

Top scores at 147 epoch: 5.457800219616396, 5.321211837172816, 5.108075399943951, 5.0880981544645465, 5.057711428234332, 5.0, 5.0, 5.0, 5.0, 5.0
Best values: 0, 0.5653200433181818, 2.3373144709486677, 4.542199780383604, 5
Top scores at 148 epoch: 5.3731160023279525, 5.271071096165357, 5.084319771002286, 5.0, 4.867774448375048, 4.7461403497089965, 4.626602160925096, 4.59667549656357, 4.534309008894562, 4.51758000234437
Best values: 0.05769203596103902, 0.6317604812059986, 3.1132502512132008, 3.5440582895021917, 5
Top scores at 149 epoch: 5.807411146899091, 5.4526174995461805, 5.447222103631631, 5.302211610249044, 5.1454762766673845, 5.013102019527163, 5.0, 5.0, 5.0, 5.0
Best values: 0, 0.807411146899091, 1.995255970430523, 4.096145696743762, 5
Top scores at 150 epoch: 5.889538714139361, 5.620940618237993, 5.5725142197956465, 5.485695189303264, 5.191385039482201, 5.164881910192795, 5.065420827664228, 5.0, 5.0, 4.929516479634941
Best values: 0, 1.7557969636423172, 2.975153208483143, 4.110

Top scores at 225 epoch: 5.5919497751546725, 5.51178131566709, 5.485168543581893, 5.394663832746389, 5.12416488751267, 5.094199522078454, 5.0679745275501435, 5.007851502450199, 5.0, 4.8366550397995605
Best values: 0, 0.7074554343813175, 1.8357634816922017, 4.4080502248453275, 5
Top scores at 226 epoch: 5.506534244027309, 5.489721704963946, 5.393340488074171, 5.375473399230328, 5.312367840500419, 5.152543228193649, 5.115942049440782, 5.0831071265392, 5.0, 5.0
Best values: 0, 0.5065342440273093, 2.132503445069175, 3.0653421503230587, 5
Top scores at 227 epoch: 5.733579128075717, 5.540390432716924, 5.253222836785825, 5.1955026894760525, 5.174772523318691, 5.0616600541217505, 5.0, 5.0, 5.0, 5.0
Best values: 0, 1.1844193749195597, 1.9179985029952766, 3.218057647831512, 5
Top scores at 228 epoch: 5.545450550450233, 5.500234785412161, 5.38585354484064, 5.0, 5.0, 5.0, 5.0, 5.0, 4.99389069456328, 4.991610788740526
Best values: 0, 0.5454505504502329, 1.1924491755165452, 3.754958050447901, 5
Top 

Top scores at 304 epoch: 5.921326541120834, 5.3880958700665715, 5.21915748003886, 5.061345603572237, 5.050710821150991, 5.0, 5.0, 4.864122724000254, 4.778438158456807, 4.722423936541997
Best values: 0, 0.9984621739065753, 1.9197887150274098, 3.99645196595368, 5
Top scores at 305 epoch: 5.559380425753387, 5.443021066361508, 5.4230317910727734, 5.276827593945185, 5.255588232296424, 5.1686045173160515, 5.129577416778848, 5.0, 5.0, 4.794825481219412
Best values: 0, 2.244232632624522, 3.029894664814901, 4.440619574246613, 5
Top scores at 306 epoch: 5.751331794215527, 5.43665436259456, 5.412674090655832, 5.107713459892509, 5.009367957001439, 5.0, 5.0, 5.0, 5.0, 5.0
Best values: 0, 0.7513317942155265, 2.191815616358581, 3.3767128575452934, 5
Top scores at 307 epoch: 5.966477146556956, 5.933397870523798, 5.4884805927961455, 5.438107877843694, 5.435635664129354, 5.380678441530985, 5.34368332360059, 5.271638760169293, 5.1995619731793665, 5.1387824073501225
Best values: 0, 1.5990249000360754, 2.8

Top scores at 386 epoch: 5.492548190099287, 5.4452526016519185, 5.432408454683941, 5.072980045022699, 5.002796787624881, 5.0, 5.0, 5.0, 4.96664842614558, 4.948436017405248
Best values: 0, 0.49254819009928663, 2.526209660858716, 3.8730102991898363, 5
Top scores at 387 epoch: 5.281846388073975, 5.2407406880339975, 5.152091936341595, 5.012236957007209, 5.0, 5.0, 5.0, 5.0, 4.816902894904711, 4.661333738984208
Best values: 0, 0.28184638807397566, 3.7281581037086085, 4.325798480506581, 5
Top scores at 388 epoch: 5.58096507939401, 5.2816470420147645, 5.046388796607536, 5.024502809713788, 5.019101806459638, 5.0, 5.0, 5.0, 5.0, 5.0
Best values: 0, 1.0678664997847642, 3.529723787939184, 4.41903492060599, 5
Top scores at 389 epoch: 5.671006000066983, 5.665586823476488, 5.625944508572983, 5.590267167044157, 5.547164570956607, 5.498194949234886, 5.438389649888116, 5.397347067647356, 5.254785549598204, 5.221453137186523
Best values: 0, 0.8894452129472752, 3.651846437375891, 4.322852437442874, 5
Top 

Top scores at 465 epoch: 5.959017439357152, 5.141736397583511, 5.120979151215403, 5.015566394549355, 5.0, 5.0, 5.0, 5.0, 4.878340032814289, 4.82682592644505
Best values: 0.016696407538520166, 0.9924102544341918, 2.1875009696207526, 3.3345829863017826, 5
Top scores at 466 epoch: 5.7665712281638895, 5.743915279353224, 5.0, 5.0, 5.0, 5.0, 4.990135098226449, 4.832293796426484, 4.722633968469977, 4.690573511719068
Best values: 0, 0.7665712281638897, 2.527941514166877, 4.130539095491255, 5
Top scores at 467 epoch: 6.067090026404553, 5.737195638063753, 5.370169820631317, 5.310413420019677, 5.101775545974267, 5.098770084279337, 5.032302984565505, 5.0, 5.0, 5.0
Best values: 0, 1.0670900264045529, 2.3509312916067358, 3.916182880764664, 5
Top scores at 468 epoch: 5.381259941777307, 5.224219466157237, 5.196997594543856, 5.142892697200884, 5.129824528615583, 5.078128029217098, 5.0, 5.0, 5.0, 5.0
Best values: 0, 0.3812599417773066, 2.336997905028775, 4.189263569516095, 5
Top scores at 469 epoch: 5.4

Top scores at 541 epoch: 6.027392859447163, 5.6466454518843285, 5.548057968803584, 5.461059549045412, 5.4210913464943875, 5.394499297812074, 5.194485112871516, 5.170229382954295, 5.108915080025859, 5.058020784764967
Best values: 0, 1.151567544983809, 2.9073106286596873, 3.9726071405528365, 5
Top scores at 542 epoch: 5.837108284135299, 5.579071617931337, 5.488060854242884, 5.308116572689112, 5.290263229507114, 5.271326656109987, 5.2316620002671534, 5.193079383576327, 5.000757566062495, 5.0
Best values: 0.17128391387871372, 1.5232269784061168, 2.5810139838387265, 3.5894061818527394, 5
Top scores at 543 epoch: 5.861287408151845, 5.344969181299942, 5.295631286154667, 5.213935142971902, 5.190907776673523, 5.139327034274297, 5.0, 5.0, 4.9764991925896025, 4.849137862238965
Best values: 0, 0.8991087509009215, 1.7603961590527661, 3.985413483782274, 5
Top scores at 544 epoch: 5.496450077012958, 5.4949860627997165, 5.42557998176947, 5.413570037367846, 5.233808099601123, 5.224124732946099, 5.14332

Top scores at 620 epoch: 5.857524629785548, 5.548558760420767, 5.442257227439487, 5.412710986979003, 5.370805863744531, 5.346829355875534, 5.270072113976145, 5.193178124213941, 5.0, 4.8973141685456305
Best values: 0, 1.0561116121717111, 3.205934330069455, 4.063458959855003, 5
Top scores at 621 epoch: 5.883605747616671, 5.621471927647366, 5.377369303080789, 5.294236367239076, 5.215272218797962, 5.013503588095816, 5.0, 4.997655587849083, 4.942209746870812, 4.910469556235652
Best values: 0, 0.8836057476166708, 2.8972206737167365, 3.907214669973173, 5
Top scores at 622 epoch: 5.759738127842621, 5.436346438454484, 5.21543544857433, 5.200487198243869, 5.175247012979043, 5.162872099579378, 5.093400994644379, 5.042763964165637, 5.0, 5.0
Best values: 0, 0.8750871883715161, 2.8975089078190637, 3.657247035661685, 5
Top scores at 623 epoch: 5.878527666258455, 5.737695695701764, 5.422554828830048, 5.2605050205611565, 5.234255265100772, 5.163950330314263, 5.131772947577648, 5.0, 4.951387207918756, 4

Top scores at 698 epoch: 5.728815544286153, 5.543216643200147, 5.3108744515967, 5.096156162551553, 5.074930558542642, 5.0, 5.0, 4.998034569462231, 4.950193015689692, 4.916822090606637
Best values: 0, 1.0142835327879518, 1.8590378497024074, 3.8887582053079157, 4.8840612273716975
Top scores at 699 epoch: 5.613012235657723, 5.506723666351654, 5.448727154956307, 5.3854685597854175, 5.236463312312669, 5.0, 4.765096178493231, 4.750017993423732, 4.721720943172978, 4.702622226806945
Best values: 0, 1.0022483155230555, 2.244726652001008, 2.857738887658731, 5
Top scores at 700 epoch: 5.485377223803175, 5.36601435041872, 5.239951692881834, 5.129742937897124, 5.0614625416587575, 5.012979279681478, 5.0, 5.0, 4.939618733895237, 4.933734443495097
Best values: 0, 1.162641982214581, 3.0931939802058777, 4.514622776196825, 5
Top scores at 701 epoch: 6.026532516108421, 5.404456378181536, 5.352155129437765, 5.345212103601334, 5.252070856370368, 5.216404911985576, 5.133016234882054, 5.002852381180194, 4.923

Top scores at 782 epoch: 5.629881700063945, 5.3784241433645, 5.362263148851793, 5.1242507679456075, 5.011009658864919, 5.007221370531635, 5.0, 5, 4.999721310932244, 4.9383808277186825
Best values: 0, 0.629881700063945, 2.50567980931077, 3.6473192049060454, 5
Top scores at 783 epoch: 5.5243320550424295, 5.37757854105461, 5.25677353182833, 5.011437795513326, 5.0, 5, 5.0, 5.0, 5.0, 4.731979281099352
Best values: 0, 1.1230465830615137, 3.932193388892234, 4.4756679449575705, 5
Top scores at 784 epoch: 6.01514755812514, 5.905483212239827, 5.470707642010668, 5.395440525829529, 5.149838165306439, 5.020877444306541, 5.0, 5.0, 5.0, 5.0
Best values: 0, 1.4467957655634236, 2.461943323688563, 3.9219425567331445, 5
Top scores at 785 epoch: 6.133016150796166, 5.4985733200719915, 5.35130809203705, 5.220414570621883, 5.106041431664483, 5.102549137780523, 5.0325622204615765, 5.0, 5.0, 5.0
Best values: 0.001998338288572361, 1.137012827373311, 2.6263950997404386, 3.852202246820258, 5
Top scores at 786 epo

Top scores at 862 epoch: 5.548050331287774, 5.276790673022786, 5.1562396290258565, 5.06484203208384, 5.033043239713997, 5.0, 5.0, 5.0, 4.724068360295813, 4.67440734256324
Best values: 0, 0.5480503312877743, 2.5219962850294606, 4.24469206282171, 5
Top scores at 863 epoch: 5.670295630772667, 5.6673714590501465, 5.488668716372409, 5.203273749891555, 5.167489594301349, 5.165420839416188, 5.074799050459419, 5.0, 5.0, 5.0
Best values: 0, 0.9073820069968515, 3.009091217535256, 4.329704369227333, 5
Top scores at 864 epoch: 5.893379489265918, 5.395446639090564, 5.347489790728649, 5.326710163978919, 5.291413289470178, 5.248896037861901, 5.238103179026858, 5.104066504340366, 5.0579176606450815, 5.0
Best values: 0, 1.5623637944374669, 2.676656930933085, 3.5700364201990027, 5
Top scores at 865 epoch: 5.428293388085004, 5.155548345807585, 5.095779217201516, 5.055143575437103, 5.007574402639304, 5.0, 5.0, 4.966520475457481, 4.964126779844696, 4.95249987556822
Best values: 0, 2.620878127675052, 3.2669

Top scores at 943 epoch: 5.7050386583264014, 5.3819998829233695, 5.318081595537663, 5.178611259427919, 5.14998657445052, 5.003965593480941, 5.0, 5.0, 5.0, 5.0
Best values: 0, 0.705038658326401, 2.647844660379115, 4.135806020827232, 5
Top scores at 944 epoch: 5.459613714550391, 5.395997857140616, 5.051880927179731, 5.041703062649995, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
Best values: 0, 2.2345400836812797, 3.820341751718883, 4.540386285449609, 5
Top scores at 945 epoch: 5.662735573670372, 5.430215110805078, 5.095976100139311, 5.00863973859501, 5.0, 5.0, 5.0, 5.0, 4.843479111023067, 4.823251490015565
Best values: 0, 1.2367525293368222, 3.577198172805627, 4.239933746475999, 5
Top scores at 946 epoch: 5.1384144012960755, 5.059427832211713, 5.0, 4.9039445301622, 4.893662448131737, 4.829312964780854, 4.7721631721375015, 4.770474897266833, 4.741566120610068, 4.727449097117373
Best values: 0, 2.6714419086994132, 2.8098563099954887, 3.057601252151585, 5
Top scores at 947 epoch: 5.5871120256026146, 5.480

In [81]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    #torch.manual_seed(seed)
    #torch.cuda.manual_seed(seed)
    #torch.backends.cudnn.deterministic = True
    tensorflow.set_random_seed(seed)