In [1]:
from shutil import copyfile
copyfile(src = "../usr/lib/modellib/modellib.py", dst = "../working/ModelLib.py")

'../working/ModelLib.py'

In [2]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset, Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import KFold,StratifiedKFold
from tqdm.auto import tqdm
from ModelLib import Create_model,stratified_group_k_fold
import random
import os
from copy import deepcopy
import math
from glob import glob

In [3]:
random.seed(831)
os.environ['PYTHONHASHSEED'] = str(721)
np.random.seed(1111)
torch.manual_seed(1117)
torch.cuda.manual_seed(1001)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# device = 'cpu'
device = 'cuda'

In [4]:
# train_x = np.load('../input/covid19fe/train_x.npy')
# test_x = np.load('../input/covid19fe/test_x.npy')
train_x = np.load('../input/covid19fe/train_aug_x.npy')
test_x = np.load('../input/covid19fe/test_aug_x.npy')
train_bpps = np.load('../input/covid19fe/train_bpps.npy')
test_bpps = np.load('../input/covid19fe/test_bpps.npy')
train_viennarna_bpps = np.load('../input/covid19extrafeatures/train_viennarna_bpps.npy')
test_viennarna_bpps = np.load('../input/covid19extrafeatures/test_viennarna_bpps.npy')
train_mat = np.load('../input/covid19extrafeatures/train_mat.npy')
test_mat = np.load('../input/covid19extrafeatures/test_mat.npy')
train_aug_mat = np.load('../input/covid19fe/train_aug_mat.npy')
test_aug_mat = np.load('../input/covid19fe/test_aug_mat.npy')
label = np.load('../input/covid19fe/label.npy')
label_error = np.load('../input/covid19fe/label_error.npy')
signal_to_noise = np.load('../input/covid19fe/signal_to_noise.npy')

In [5]:
train_bpps = np.concatenate([np.expand_dims(train_bpps,axis=1),np.expand_dims(train_viennarna_bpps,axis=1),np.expand_dims(train_mat,axis=1),np.expand_dims(train_aug_mat,axis=1)],axis=1)
test_bpps = np.concatenate([np.expand_dims(test_bpps,axis=1),np.expand_dims(test_viennarna_bpps,axis=1),np.expand_dims(test_mat,axis=1),np.expand_dims(test_aug_mat,axis=1)],axis=1)

In [6]:
train = pd.read_json('../input/stanford-covid-vaccine/train.json',lines=True).drop('index',axis=1)
test = pd.read_json('../input/stanford-covid-vaccine/test.json',lines=True).drop('index',axis=1)

train_length = train.seq_length.values
test_length = test.seq_length.values

train_scored = train.seq_scored.values
test_scored = test.seq_scored.values

SN_filter_mask = (train.SN_filter==1).values
SN_filter = np.where(SN_filter_mask)[0]
signal_filter = np.where(signal_to_noise > 1)[0]

In [7]:
# from sklearn.preprocessing import OneHotEncoder,scale
# from sklearn.cluster import KMeans
# cluster_features = OneHotEncoder().fit_transform(train_x[:,:107,0].reshape(-1,1)).toarray().reshape([len(train),-1])
# cluster_features = scale(cluster_features,axis=0)
# kmeans_model = KMeans(n_clusters=200, random_state=721).fit(cluster_features)
# cluster = kmeans_model.labels_

In [8]:
class Covid19Dataset(Dataset):
    def __init__(self,X,bpps,mat,seq_length,scored_length,label=None,label_error=None,signal_to_noise=None,SN_filter_mask=None):
        self.X = X.astype(np.int)
#         self.bpps = np.log(bpps + 1e-8).astype(np.float32)
        self.bpps = bpps.astype(np.float32)
        
#         self.bpps = np.log(bpps + 1e-8)
#         self.bpps = np.concatenate([bpps.reshape([-1,130,130,1]),mat.reshape([-1,130,130,1])],axis=-1).astype(np.float32)
        if label is not None:
            self.label = label.astype(np.float32)
            self.signal_to_noise = signal_to_noise.astype(np.float32)
            self.label_error=label_error.astype(np.float32)
            self.SN_filter_mask = SN_filter_mask
        else:
            self.label = None
        self.mask = np.zeros([len(X),130],dtype=bool)
        for i in range(len(seq_length)):
            if seq_length[i] < 130:
                self.mask[i,seq_length[i]:] = True
        self.scored_mask = np.ones([len(X),130],dtype=bool)
        for i in range(len(scored_length)):
            if scored_length[i] < 130:
                self.scored_mask[i,scored_length[i]:] = False
        self.seq_length = seq_length


    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        N = self.seq_length[idx]
        X = self.X[idx,:N]
        bpps = self.bpps[idx,:,:N,:N]
        mask = self.mask[idx,:N]
        scored_mask = self.scored_mask[idx,:N]
        if self.label is not None:
            label = self.label[idx,:N]
            label_error= self.label_error[idx,:N]
            signal_to_noise = self.signal_to_noise[idx]
            SN_filter_mask = self.SN_filter_mask[idx]
            return X,bpps,mask,scored_mask,label,label_error,signal_to_noise,SN_filter_mask
        else:
            return X,bpps,mask,scored_mask

In [9]:
nepochs = 300
n_fold = 5
kf = StratifiedKFold(n_fold,shuffle=True,random_state=831)

dataset = Covid19Dataset(train_x,train_bpps,train_mat,train_length,train_scored,label,label_error,signal_to_noise,SN_filter_mask)
cv_score = []
# cv_score = [0.20199335118134817, 0.19580934941768646, 0.19966551661491394]
loss_weights = torch.Tensor([1.2,1.2,1.2,0.7,0.7]).reshape(1,5).to(device)
oof = np.zeros([len(train_x),68,3])
# oof = np.load('temp_oof.npy')
for fold,(trn_group, test_group) in tqdm(enumerate(kf.split(train_x,SN_filter_mask)),total=n_fold):
#     trn_group = np.intersect1d(trn_group,signal_filter)
    test_group = np.intersect1d(test_group,SN_filter)
    traindataset = Covid19Dataset(train_x[trn_group],
                             train_bpps[trn_group],
                             train_mat[trn_group],
                             train_length[trn_group],
                             train_scored[trn_group],
                             label[trn_group],
                             label_error[trn_group],
                             signal_to_noise[trn_group],
                             SN_filter_mask[trn_group])
    valdataset = Covid19Dataset(train_x[test_group],
                             train_bpps[test_group],
                             train_mat[test_group],
                             train_length[test_group],
                             train_scored[test_group],
                             label[test_group],
                             label_error[test_group],
                             signal_to_noise[test_group],
                             SN_filter_mask[test_group])
    
    args_loader = {'batch_size': 128, 'shuffle': True, 'num_workers': 0, 'pin_memory': True, 'drop_last': True}
    train_loader = DataLoader(traindataset, **args_loader)
    args_loader = {'batch_size': 128, 'shuffle': False, 'num_workers': 0, 'pin_memory': True, 'drop_last': False}
    val_loader = DataLoader(valdataset, **args_loader)
    
    dataloaders = {'train' : train_loader, 'val' : val_loader}
    
    model,optimizer,scheduler = Create_model(device)
    best_model = {'reactivity':None,'deg_Mg_pH10':None,"deg_Mg_50C":None}
    best_loss = {'reactivity': np.inf,'deg_Mg_pH10': np.inf,'deg_Mg_50C': np.inf}
    for epoch in tqdm(range(nepochs)):
        epoch_loss = {'train': 0.0, 'val': 0.0, 'val_clean': 0.0, 'val_aug': 0.0,
                      'reactivity': 0.0,'deg_Mg_pH10': 0.0,'deg_Mg_50C': 0.0
                     }
        MA_loss = []
        test_pred = []
        test_pred_aug = []
        test_y = []
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            for x_b,bpps_b,mask_b,scored_mask_b,label_b,label_error_b,signal_to_noise_b,SN_filter_mask_b in dataloaders[phase]:
                x_b = x_b.long().to(device)
                bpps_b = bpps_b.to(device)
                mask_b = mask_b.to(device)
                label_b = label_b.to(device)
                label_error_b = label_error_b.to(device)
                signal_to_noise_b = signal_to_noise_b.to(device).unsqueeze(1).unsqueeze(1)
                signal_to_noise_b = torch.clamp(signal_to_noise_b/4.5,0,10)
#                 signal_to_noise_b = torch.clamp(signal_to_noise_b/5,0,10)
#                 signal_to_noise_b = torch.clamp(torch.log(1 + signal_to_noise_b)/1.5,0,10)
#                 signal_to_noise_b = torch.sqrt(torch.clamp(signal_to_noise_b,0,999))/2

                label_error_b = torch.log(1+1.0/label_error_b[:,:68]) / 2.2496114573105803
    
                SN_filter_mask_b = SN_filter_mask_b.to(device)
                if phase=='train':
                    aug_mask = torch.randint(low=0,high=2,size=[len(bpps_b)],dtype=bool).to(device)
                    x_b[aug_mask,:,-4] = x_b[aug_mask,:,-2]
                    x_b[aug_mask,:,-3] = x_b[aug_mask,:,-1]
                    x_b = x_b[:,:,:-2]
                    bpps_b[aug_mask,-2] = bpps_b[aug_mask,-1]
                    bpps_b = bpps_b[:,:-1]
                else:
                    x_b_aug = x_b.clone()
                    x_b_aug[:,:,-4] = x_b_aug[:,:,-2]
                    x_b_aug[:,:,-3] = x_b_aug[:,:,-1]
                    x_b_aug  = x_b_aug[:,:,:-2]
                    bpps_b_aug = bpps_b.clone()
                    bpps_b_aug[:,-2] = bpps_b_aug[:,-1]
                    bpps_b_aug = bpps_b_aug[:,:-1]
                    
                    x_b = x_b[:,:,:-2]
                    bpps_b = bpps_b[:,:-1]
                
#                 if phase=='train':
#                     label_b += torch.normal(torch.zeros_like(label_b),1) * 0.001*label_error_b
                
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase=='train'):
                    preds = model(x_b,bpps_b)
                    if phase=='val':
                        preds_aug = model(x_b_aug,bpps_b_aug)
                        preds2 = 0.5*(preds + preds_aug)
                        test_pred.append(preds[:,:68,:3].detach().cpu().numpy())
                        test_pred_aug.append(preds2[:,:68,:3].detach().cpu().numpy())
                        test_y.append(label_b[:,:68,:3].detach().cpu().numpy())

        
                    loss = (preds[:,:68] - label_b[:,:68])**2
                    loss = torch.sqrt((loss * signal_to_noise_b).reshape(-1,5).mean(0)).mean()
#                     loss = torch.sqrt((loss * signal_to_noise_b).reshape(-1,5)[:,:3].mean(0)).mean()
                
#                     loss = (preds[:,:68] - label_b[:,:68])**2
#                     loss = (label_error_b * loss).mean()

                    if phase=='train':
                        loss.backward()
                        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
                        optimizer.step()
#                         scheduler.step()
                if phase == 'train':
                    running_loss += loss.item() / len(dataloaders[phase])
                else:
                    running_loss += loss.item() / len(dataloaders[phase])
            if phase == 'train':
                epoch_loss['train'] = running_loss
            else:
                epoch_loss['val'] = running_loss
                test_pred = np.concatenate(test_pred,axis=0)
                test_pred_aug = np.concatenate(test_pred_aug,axis=0)
                test_y = np.concatenate(test_y,axis=0)
                epoch_loss['val_clean'] = np.sqrt(((test_pred-test_y)**2).reshape(-1,3).mean(0)).mean()
                res = ((test_pred_aug-test_y)**2).reshape(-1,3)
                epoch_loss['val_aug'] = np.sqrt(res.mean(0)).mean()
                epoch_loss['reactivity'] = np.sqrt(res[:,0].mean())
                epoch_loss['deg_Mg_pH10'] = np.sqrt(res[:,1].mean())
                epoch_loss['deg_Mg_50C'] = np.sqrt(res[:,2].mean())
        scheduler.step()
        print("Epoch {}/{}   -   loss: {:5.5f} - val_loss: {:5.5f} - val_clean_loss: {:5.5f} - val_aug_loss: {:5.5f} - reactivity: {:5.5f} - deg_Mg_pH10: {:5.5f} - deg_Mg_50C: {:5.5f}".format(epoch+1, nepochs, epoch_loss['train'], epoch_loss['val'], epoch_loss['val_clean'], epoch_loss['val_aug'], epoch_loss['reactivity'], epoch_loss['deg_Mg_pH10'], epoch_loss['deg_Mg_50C']))
        for i,cat in enumerate(['reactivity','deg_Mg_pH10','deg_Mg_50C']):
            if epoch_loss[cat] < best_loss[cat]:
                best_loss[cat] = epoch_loss[cat]
                torch.save(model.state_dict(), f'fold{fold+1}_{cat}_model.pt')
                oof[test_group,:,i] = test_pred_aug[:,:,i]
#     cv_score += best_score / 5
    cv_score.append(sum(best_loss.values())/3)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

Ranger optimizer loaded. 
Gradient Centralization usage = False


HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value)


Epoch 1/300   -   loss: 0.53991 - val_loss: 0.52912 - val_clean_loss: 0.47813 - val_aug_loss: 0.47828 - reactivity: 0.44463 - deg_Mg_pH10: 0.51555 - deg_Mg_50C: 0.47466
Epoch 2/300   -   loss: 0.46525 - val_loss: 0.45463 - val_clean_loss: 0.41650 - val_aug_loss: 0.41722 - reactivity: 0.37165 - deg_Mg_pH10: 0.47330 - deg_Mg_50C: 0.40669
Epoch 3/300   -   loss: 0.41719 - val_loss: 0.38921 - val_clean_loss: 0.36791 - val_aug_loss: 0.36854 - reactivity: 0.31898 - deg_Mg_pH10: 0.42639 - deg_Mg_50C: 0.36025
Epoch 4/300   -   loss: 0.38816 - val_loss: 0.37502 - val_clean_loss: 0.35627 - val_aug_loss: 0.35653 - reactivity: 0.31227 - deg_Mg_pH10: 0.41051 - deg_Mg_50C: 0.34683
Epoch 5/300   -   loss: 0.37246 - val_loss: 0.35624 - val_clean_loss: 0.34113 - val_aug_loss: 0.34045 - reactivity: 0.30241 - deg_Mg_pH10: 0.38812 - deg_Mg_50C: 0.33081
Epoch 6/300   -   loss: 0.35808 - val_loss: 0.34260 - val_clean_loss: 0.32762 - val_aug_loss: 0.32726 - reactivity: 0.29097 - deg_Mg_pH10: 0.37377 - deg_Mg

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

Epoch 1/300   -   loss: 0.53521 - val_loss: 0.52897 - val_clean_loss: 0.49202 - val_aug_loss: 0.49220 - reactivity: 0.45638 - deg_Mg_pH10: 0.53264 - deg_Mg_50C: 0.48759
Epoch 2/300   -   loss: 0.46155 - val_loss: 0.45565 - val_clean_loss: 0.43368 - val_aug_loss: 0.43414 - reactivity: 0.38953 - deg_Mg_pH10: 0.48848 - deg_Mg_50C: 0.42441
Epoch 3/300   -   loss: 0.41682 - val_loss: 0.39495 - val_clean_loss: 0.38172 - val_aug_loss: 0.38156 - reactivity: 0.33013 - deg_Mg_pH10: 0.43945 - deg_Mg_50C: 0.37510
Epoch 4/300   -   loss: 0.38687 - val_loss: 0.38344 - val_clean_loss: 0.36884 - val_aug_loss: 0.36853 - reactivity: 0.32427 - deg_Mg_pH10: 0.42221 - deg_Mg_50C: 0.35912
Epoch 5/300   -   loss: 0.36939 - val_loss: 0.36383 - val_clean_loss: 0.35078 - val_aug_loss: 0.35022 - reactivity: 0.31591 - deg_Mg_pH10: 0.39904 - deg_Mg_50C: 0.33572
Epoch 6/300   -   loss: 0.35755 - val_loss: 0.35117 - val_clean_loss: 0.33962 - val_aug_loss: 0.33916 - reactivity: 0.30667 - deg_Mg_pH10: 0.38598 - deg_Mg

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

Epoch 1/300   -   loss: 0.55004 - val_loss: 0.51856 - val_clean_loss: 0.47142 - val_aug_loss: 0.47135 - reactivity: 0.43585 - deg_Mg_pH10: 0.50687 - deg_Mg_50C: 0.47133
Epoch 2/300   -   loss: 0.46183 - val_loss: 0.43607 - val_clean_loss: 0.40398 - val_aug_loss: 0.40407 - reactivity: 0.35449 - deg_Mg_pH10: 0.46012 - deg_Mg_50C: 0.39760
Epoch 3/300   -   loss: 0.41496 - val_loss: 0.38620 - val_clean_loss: 0.36611 - val_aug_loss: 0.36635 - reactivity: 0.31752 - deg_Mg_pH10: 0.41784 - deg_Mg_50C: 0.36370
Epoch 4/300   -   loss: 0.38874 - val_loss: 0.37415 - val_clean_loss: 0.35433 - val_aug_loss: 0.35409 - reactivity: 0.30833 - deg_Mg_pH10: 0.40349 - deg_Mg_50C: 0.35046
Epoch 5/300   -   loss: 0.37066 - val_loss: 0.37322 - val_clean_loss: 0.34859 - val_aug_loss: 0.34837 - reactivity: 0.31138 - deg_Mg_pH10: 0.39103 - deg_Mg_50C: 0.34270
Epoch 6/300   -   loss: 0.35771 - val_loss: 0.34399 - val_clean_loss: 0.32478 - val_aug_loss: 0.32462 - reactivity: 0.29012 - deg_Mg_pH10: 0.36714 - deg_Mg

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

Epoch 1/300   -   loss: 0.53515 - val_loss: 0.55033 - val_clean_loss: 0.49208 - val_aug_loss: 0.49219 - reactivity: 0.45420 - deg_Mg_pH10: 0.52836 - deg_Mg_50C: 0.49403
Epoch 2/300   -   loss: 0.45288 - val_loss: 0.46365 - val_clean_loss: 0.42411 - val_aug_loss: 0.42462 - reactivity: 0.37462 - deg_Mg_pH10: 0.47648 - deg_Mg_50C: 0.42276
Epoch 3/300   -   loss: 0.41063 - val_loss: 0.41397 - val_clean_loss: 0.37911 - val_aug_loss: 0.37936 - reactivity: 0.32882 - deg_Mg_pH10: 0.43492 - deg_Mg_50C: 0.37434
Epoch 4/300   -   loss: 0.38561 - val_loss: 0.39335 - val_clean_loss: 0.36476 - val_aug_loss: 0.36483 - reactivity: 0.31936 - deg_Mg_pH10: 0.41782 - deg_Mg_50C: 0.35731
Epoch 5/300   -   loss: 0.36906 - val_loss: 0.38204 - val_clean_loss: 0.35219 - val_aug_loss: 0.35140 - reactivity: 0.31331 - deg_Mg_pH10: 0.39879 - deg_Mg_50C: 0.34210
Epoch 6/300   -   loss: 0.35713 - val_loss: 0.36439 - val_clean_loss: 0.33696 - val_aug_loss: 0.33605 - reactivity: 0.30010 - deg_Mg_pH10: 0.38222 - deg_Mg

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

Epoch 1/300   -   loss: 0.54916 - val_loss: 0.50650 - val_clean_loss: 0.47460 - val_aug_loss: 0.47471 - reactivity: 0.44428 - deg_Mg_pH10: 0.50511 - deg_Mg_50C: 0.47475
Epoch 2/300   -   loss: 0.46921 - val_loss: 0.43299 - val_clean_loss: 0.41591 - val_aug_loss: 0.41640 - reactivity: 0.37841 - deg_Mg_pH10: 0.46247 - deg_Mg_50C: 0.40833
Epoch 3/300   -   loss: 0.42340 - val_loss: 0.38236 - val_clean_loss: 0.37123 - val_aug_loss: 0.37101 - reactivity: 0.32829 - deg_Mg_pH10: 0.41464 - deg_Mg_50C: 0.37010
Epoch 4/300   -   loss: 0.39450 - val_loss: 0.36543 - val_clean_loss: 0.35446 - val_aug_loss: 0.35419 - reactivity: 0.31649 - deg_Mg_pH10: 0.39636 - deg_Mg_50C: 0.34973
Epoch 5/300   -   loss: 0.37736 - val_loss: 0.34867 - val_clean_loss: 0.33842 - val_aug_loss: 0.33799 - reactivity: 0.30860 - deg_Mg_pH10: 0.37571 - deg_Mg_50C: 0.32966
Epoch 6/300   -   loss: 0.36334 - val_loss: 0.34007 - val_clean_loss: 0.32845 - val_aug_loss: 0.32798 - reactivity: 0.30007 - deg_Mg_pH10: 0.36301 - deg_Mg

In [10]:
for i in range(n_fold):
    print(f"fold {i+1} score:",cv_score[i])
print()
print("CV score:",np.mean(cv_score))
np.save('oof_{:5.5f}'.format(np.mean(cv_score)),oof)

fold 1 score: 0.20348056654135385
fold 2 score: 0.2161213755607605
fold 3 score: 0.2033812403678894
fold 4 score: 0.21425066888332367
fold 5 score: 0.2046167403459549

CV score: 0.20837011833985644


In [11]:
dataset = Covid19Dataset(test_x,test_bpps,test_mat,test_length,test_scored)
args_loader = {'batch_size': 1, 'shuffle': False, 'num_workers': 0, 'pin_memory': True, 'drop_last': False}
test_loader = DataLoader(dataset, **args_loader)
test_predictions = np.zeros([len(test_x),130,5])
for j,col in enumerate(['reactivity', 'deg_Mg_pH10', 'deg_Mg_50C']):
    paths = glob(f'fold*_{col}_model.pt')
    with torch.no_grad():
        for path in tqdm(paths):
            model.load_state_dict(torch.load(path))
            model.eval().to(device)
            predictions = []
            for x_b,bpps_b,mask_b,scored_mask_b in test_loader:
                x_b = x_b.long().to(device)
                bpps_b = bpps_b.to(device)
                mask_b = mask_b.to(device)
                x_b_aug = x_b.clone()
                x_b_aug[:,:,-4] = x_b_aug[:,:,-2]
                x_b_aug[:,:,-3] = x_b_aug[:,:,-1]
                x_b_aug  = x_b_aug[:,:,:-2]
                bpps_b_aug = bpps_b.clone()
                bpps_b_aug[:,-2] = bpps_b_aug[:,-1]
                bpps_b_aug = bpps_b_aug[:,:-1]
                x_b = x_b[:,:,:-2]
                bpps_b = bpps_b[:,:-1]

                preds = model(x_b,bpps_b)
                preds_aug = model(x_b_aug,bpps_b_aug)
                preds = 0.5*(preds + preds_aug)

                p = torch.zeros([preds.shape[0],130,preds.shape[2]])
                p[:,:preds.shape[1]] = preds.cpu()
                predictions.append(p)
            predictions = torch.cat(predictions,dim=0).numpy()
            test_predictions[:,:,j] += predictions[:,:,j] / len(paths)

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [12]:
ss = pd.read_csv("../input/stanford-covid-vaccine/sample_submission.csv",index_col=0)

In [13]:
for n,row in tqdm(test.iterrows(),total=len(test)):
    test_id = row['id']
    seq_len = row['seq_length']
    for i in range(seq_len):
        for j,col in enumerate(['reactivity', 'deg_Mg_pH10', 'deg_Mg_50C']):
            ss.loc[test_id+'_'+str(i),col] = test_predictions[n,i,j]

HBox(children=(FloatProgress(value=0.0, max=3634.0), HTML(value='')))




In [14]:
ss.to_csv("submission_cnn_{:5.5f}.csv".format(np.mean(cv_score)),index=True)