In [1]:
import os
import numpy as np 
import pandas as pd

In [2]:
X_train = pd.read_csv("train.genotype.txt", sep = " ", header=None).values
y_train = pd.read_csv("train.phenotype.txt", sep = " ", header=None).values

X_test = pd.read_csv("test.genotype.txt", sep = " ", header=None).values

In [3]:
y_train = y_train.ravel()

In [4]:
from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer, make_column_transformer

from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error,make_scorer,r2_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

scorer = make_scorer(r2_score)

In [5]:
from sklearn.neural_network import MLPRegressor

from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import f_regression, mutual_info_regression
from sklearn.feature_selection import SelectKBest

In [6]:
select_k_best = SelectKBest(score_func = f_regression,k = 20)

#you can have different classifier for your final classifier
clf = MLPRegressor() 

params = {
          "alpha":[1e-1,0.5,1,5,1e1,1e2],
          "hidden_layer_sizes":[(5,),(10,),(20,),(25,),(50,),(100,)],
          #"model__learning_rate":['adaptive'],
          "max_iter":[1000],
          "activation":['relu']
     }

g_s = GridSearchCV(clf, 
                      param_grid=params,
                      cv= KFold(n_splits=10,shuffle = True, random_state = 42), scoring = scorer,verbose = 10,n_jobs = -1)

pipeline  = Pipeline([('f_s',select_k_best),
                      ('g_s',g_s)])

pipeline.fit(X_train, y_train)
y_test = pipeline.predict(X_test)

df_g = pd.DataFrame(g_s.cv_results_)
sorted_g = df_g.sort_values(by='rank_test_score', ascending=True)

Fitting 10 folds for each of 36 candidates, totalling 360 fits


In [7]:
sorted_g[:10]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_activation,param_alpha,param_hidden_layer_sizes,param_max_iter,params,split0_test_score,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
21,0.982635,0.231488,0.000403,0.000438,relu,5.0,"(25,)",1000,"{'activation': 'relu', 'alpha': 5, 'hidden_lay...",0.072974,...,0.157954,0.085099,0.10697,0.044303,0.137419,0.106548,0.099917,0.101601,0.031818,1
22,1.104596,0.208491,0.0007,0.000458,relu,5.0,"(50,)",1000,"{'activation': 'relu', 'alpha': 5, 'hidden_lay...",0.070045,...,0.158073,0.089087,0.104872,0.043669,0.133885,0.102547,0.102864,0.101439,0.031324,2
20,0.954039,0.144055,0.00059,0.000483,relu,5.0,"(20,)",1000,"{'activation': 'relu', 'alpha': 5, 'hidden_lay...",0.070384,...,0.15438,0.08917,0.101923,0.045958,0.137424,0.108242,0.104259,0.101341,0.031091,3
23,1.31932,0.235962,0.00081,0.000569,relu,5.0,"(100,)",1000,"{'activation': 'relu', 'alpha': 5, 'hidden_lay...",0.069824,...,0.158936,0.089854,0.103127,0.042393,0.137031,0.108766,0.107983,0.100379,0.034109,4
19,1.010759,0.351534,0.001002,0.000775,relu,5.0,"(10,)",1000,"{'activation': 'relu', 'alpha': 5, 'hidden_lay...",0.059231,...,0.150224,0.089646,0.10651,0.043733,0.133506,0.112561,0.109444,0.099688,0.032763,5
18,0.940261,0.40367,0.000902,0.000301,relu,5.0,"(5,)",1000,"{'activation': 'relu', 'alpha': 5, 'hidden_lay...",0.067008,...,0.159014,0.086518,0.103735,0.04391,0.134137,0.099731,0.106,0.09923,0.032087,6
24,0.958918,0.337207,0.0005,0.0005,relu,10.0,"(5,)",1000,"{'activation': 'relu', 'alpha': 10.0, 'hidden_...",0.068067,...,0.151365,0.092881,0.09424,0.046437,0.11556,0.104831,0.093102,0.095319,0.027942,7
27,0.806983,0.339745,0.000702,0.00046,relu,10.0,"(25,)",1000,"{'activation': 'relu', 'alpha': 10.0, 'hidden_...",0.064699,...,0.14494,0.087502,0.101272,0.04471,0.12061,0.100883,0.096385,0.095216,0.02783,8
29,0.948247,0.235959,0.001077,0.001108,relu,10.0,"(100,)",1000,"{'activation': 'relu', 'alpha': 10.0, 'hidden_...",0.069023,...,0.143286,0.084475,0.09665,0.047019,0.123001,0.092951,0.101749,0.09478,0.026903,9
28,0.909107,0.215358,0.000837,0.000413,relu,10.0,"(50,)",1000,"{'activation': 'relu', 'alpha': 10.0, 'hidden_...",0.072999,...,0.139608,0.089054,0.098802,0.045917,0.11741,0.09731,0.089112,0.094641,0.025016,10


# save and zip the file

In [8]:
pd.DataFrame(y_test).to_csv(f"predictions.csv", sep = " ", header = None, index = None)
os.system("zip -r predictions.zip predictions.csv")

0