In [1]:
import time

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
# from sklearn.preprocessing import LabelEncoder 

In [2]:
# pd.set_option('mode.chained_assignment', None)

### Defining Methods

In [3]:
def get_key(my_dict, val):
    for key, value in my_dict.items():
         if val == value:
             return key
 
    return "key doesn't exist"

### Reading Data

In [4]:
NOTEBOOKS_PATH = '../data/NL2ML_ structure - levin.csv'
DATASETS_PATH = '../data/NL2ML_ structure - data_structure.csv'

In [5]:
notebooks = pd.read_csv(NOTEBOOKS_PATH, skiprows=1, nrows=96)
datasets = pd.read_csv(DATASETS_PATH)

In [6]:
notebooks.head(2)

Unnamed: 0,chunk_id,dataset_id,notebook_id,splitting_id,code_block,data_format,graph_vertex,errors,graph_vertex_m1,graph_vertex_m2,...,python_methods_m3,python_methods_p1,python_methods_p2,python_methods_p3,kaggle_link,kaggle_comments,kaggle_upvotes,kaggle_section,kaggle_section_overview,kaggle_score
0,1,jane_street,1,1,import os\nimport numpy as np\nimport matplotl...,,Environment.import_modules,False,,,...,,,,,,,,,,
1,2,jane_street,1,1,"print('Reading datasets...', end='')\n\ntrain_...",Table,Data_Extraction.load_from_csv,False,,,...,,,,,,,,,,


In [7]:
datasets.head(2)

Unnamed: 0,dataset_id,url,Name,TL;DR (In plain English),ProblemType,number of columns (for tabular),number of entries,image resolution,number of images,Data Format,LabelType,Number of classes,Loss Function/Metrics,Target Column(s) Name,Columns DTypes
0,,https://www.kaggle.com/c/titanic/data,Titanic - Machine Learning from Disaster\n,,classification,25,-,-,-,csv,,2,categorizationaccuracy,Survived,"String, Integer, Decimal, Other"
1,,https://www.kaggle.com/c/competitive-data-scie...,Predict Future Sales,,regression,18,-,-,-,csv,,-,rmse,item_cnt_day,"String, Decimal, Id, Other"


In [8]:
nl2ml = notebooks.merge(datasets, on=['dataset_id'], how='left')

In [9]:
nl2ml.head(2)

Unnamed: 0,chunk_id,dataset_id,notebook_id,splitting_id,code_block,data_format,graph_vertex,errors,graph_vertex_m1,graph_vertex_m2,...,number of columns (for tabular),number of entries,image resolution,number of images,Data Format,LabelType,Number of classes,Loss Function/Metrics,Target Column(s) Name,Columns DTypes
0,1,jane_street,1,1,import os\nimport numpy as np\nimport matplotl...,,Environment.import_modules,False,,,...,303,-,-,-,csv,,2,custom metrics,action,"Decimal, Boolean, Integer, Other"
1,2,jane_street,1,1,"print('Reading datasets...', end='')\n\ntrain_...",Table,Data_Extraction.load_from_csv,False,,,...,303,-,-,-,csv,,2,custom metrics,action,"Decimal, Boolean, Integer, Other"


### Vertices Preprocessing

In [10]:
nl2ml['graph_vertex'].apply(lambda x: x.split(';')[0].split('.')[0]).value_counts()

Data_Transform       32
EDA                  20
Model_Train          14
Visualization        11
Environment           7
Hyperparam_Tuning     4
Data_Extraction       4
Exporatory_DA         3
Data_Export           1
Name: graph_vertex, dtype: int64

In [11]:
nl2ml.columns

Index(['chunk_id', 'dataset_id', 'notebook_id', 'splitting_id', 'code_block',
       'data_format', 'graph_vertex', 'errors', 'graph_vertex_m1',
       'graph_vertex_m2', 'graph_vertex_m3', 'graph_vertex_p1',
       'graph_vertex_p2', 'graph_vertex_p3', 'comments', 'libraries', 'ast',
       'graph_vertex_regex', 'python_methods', 'docstrings',
       'python_methods_m1', 'python_methods_m2', 'python_methods_m3',
       'python_methods_p1', 'python_methods_p2', 'python_methods_p3',
       'kaggle_link', 'kaggle_comments', 'kaggle_upvotes', 'kaggle_section',
       'kaggle_section_overview', 'kaggle_score', 'url', 'Name',
       'TL;DR (In plain English)', 'ProblemType',
       'number of columns (for tabular)', 'number of entries',
       'image resolution', 'number of images', 'Data Format', 'LabelType',
       'Number of classes', 'Loss Function/Metrics', 'Target Column(s) Name',
       'Columns DTypes'],
      dtype='object')

In [12]:
nl2ml['vertex_l1'], nl2ml['vertex_l2'] = nl2ml['graph_vertex'].apply(lambda x: x.split(';')[0].split('.')[0]), nl2ml['graph_vertex'].apply(lambda x: x.split(';')[0].split('.')[1])

### Missing Values

In [13]:
nl2ml.replace('-', -1, inplace=True)
nl2ml.fillna(-1, inplace=True)

In [14]:
TASK_FEATURES = ['ProblemType',
                'number of columns (for tabular)', 'number of entries',
                'LabelType', 'Number of classes', 'Loss Function/Metrics',
                'Target Column(s) Name']
TARGET_COLUMN = 'vertex_l1'

### Grouping chunks by notebooks

In [15]:
def group_by_notebooks(data:pd.DataFrame, vertex_col:str='vertex_l1') -> pd.DataFrame:
    notebook_cols = [['notebook_id', vertex_col] + TASK_FEATURES]
    df = pd.DataFrame(columns=notebook_cols)
    for i, notebook_id in enumerate(data['notebook_id'].unique()):
        notebook = data[data['notebook_id'] == notebook_id].reset_index(drop=True)
        vertices_seq = " ".join(notebook[vertex_col])
        task_features = notebook[TASK_FEATURES].loc[0]
        row = [notebook_id, vertices_seq] + task_features.tolist()
        df.loc[i] = row
        print('notebook #{} done'.format(notebook_id))
    return df

### Taking Train Features

In [16]:
train = group_by_notebooks(nl2ml, TARGET_COLUMN)
X, y = train[TASK_FEATURES], train[TARGET_COLUMN]

notebook #1 done
notebook #7 done
notebook #9 done


In [17]:
train

Unnamed: 0,notebook_id,vertex_l1,ProblemType,number of columns (for tabular),number of entries,LabelType,Number of classes,Loss Function/Metrics,Target Column(s) Name
0,1,Environment Data_Extraction Data_Transform Mod...,classification,303,-1,-1.0,2,custom metrics,action
1,7,Environment Data_Extraction EDA EDA Data_Extra...,regression,163,-1,-1.0,-1,rmsle,SalePrice
2,9,Environment Environment Data_Extraction Expora...,regression,163,-1,-1.0,-1,rmsle,SalePrice


### Converting Dtypes

In [18]:
cat_encodings = {}
for i, col in enumerate(X.columns):
    try:
        X[col] =  X[col].astype('float32')
    except:
        X[col] = pd.Categorical(X[col])
        cat_encodings.update({i:dict(enumerate(X[col].cat.categories))})
        X[col] = X[col].cat.codes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = pd.Categorical(X[col])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].cat.codes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] =  X[col].astype('float32')


### Target Preprocessing: Encoding Vertices

In [19]:
lang = {vertice:i+2 for i, vertice in enumerate(nl2ml[TARGET_COLUMN].unique())} #TODO: save the dict as a local file
lang.update({'<start>':1, '<end>':max(lang.values())+1})
def encode_vertices(vertices_seq, lang:dict=lang):
    encoded = np.flip(np.array([lang[vertex] for vertex in vertices_seq[0].split(' ')]))
    return encoded

### Target Preprocessing: Padding Sequences

In [20]:
Y = tf.keras.preprocessing.sequence.pad_sequences(y.apply(encode_vertices, axis=1))

### Defining Constants

In [21]:
# BUFFER_SIZE = len(input_tensor_train)
BATCH_SIZE = 1
steps_per_epoch = len(X)//BATCH_SIZE
embedding_dim = 128
units = 512
# vocab_inp_size = len(inp_lang.word_index) + 1
# vocab_tar_size = len(targ_lang.word_index) + 1

### Creating tf.Dataset

In [22]:
dataset = tf.data.Dataset.from_tensor_slices((X.values, Y))
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [23]:
max_length_targ, max_length_feat = Y.shape[1], X.values.shape[1]

### Building the Model

In [24]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
    # self.attention = BahdanauAttention(self.dec_units)

  def call(self, x): #, hidden):#, enc_output):
    # enc_output shape == (batch_size, max_length, hidden_size)
    # context_vector, attention_weights = self.attention(hidden, enc_output)
    attention_weights = tf.ones(x.shape)
    context_vector = tf.ones(x.shape)
    # print("X Vector has {} type and {} shape".format(type(x), x.shape))
    # print("Context Vector has {} type and {} shape".format(type(context_vector), context_vector.shape))
    # print("Attention Vector has {} type and {} shape".format(type(context_vector), context_vector.shape))
    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)
 
    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, state, attention_weights

In [25]:
# sample_hidden = tf.zeros((BATCH_SIZE, units))

In [26]:
decoder = Decoder(len(lang)+2, embedding_dim, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((BATCH_SIZE, 1)))
                                      #, sample_hidden)
                                      #, sample_output)

print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) (1, 13)


In [27]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [423]:
@tf.function
def train_step(inp, targ):#, enc_hidden):
  loss = 0
  batch_perplexity_object = PerplexityMetric()
  batch_perplexity = 0

  with tf.GradientTape() as tape:
    # enc_output, enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = tf.zeros((BATCH_SIZE, units)) #enc_hidden

    dec_input = tf.expand_dims([1] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]): # for each vertex (token) from solution (sequence)
      # passing enc_output to the decoder
      predictions, dec_hidden, _ = decoder(dec_input)#, dec_hidden, enc_output)
      print('targ shape: {}, pred shape: {}'.format(tf.shape(targ[:, t]), tf.shape(predictions)))
      loss += loss_function(targ[:, t], predictions)

      ##TODO:
      perplexity += perplexity_metric(targ[:, t], predictions)
      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  ##TODO:
  # batch_perplexity = perplexity_metric(targ, predictions) 
  
  variables = decoder.trainable_variables # + encoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))
  return batch_loss

In [152]:
# def compute_bleu(reference_corpus, translation_corpus, max_order=4,
#                  smooth=False):
#   """Computes BLEU score of translated segments against one or more references.
#   Args:
#     reference_corpus: list of lists of references for each translation. Each
#         reference should be tokenized into a list of tokens.
#     translation_corpus: list of translations to score. Each translation
#         should be tokenized into a list of tokens.
#     max_order: Maximum n-gram order to use when computing BLEU score.
#     smooth: Whether or not to apply Lin et al. 2004 smoothing.
#   Returns:
#     3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
#     precisions and brevity penalty.
#   """
#   matches_by_order = [0] * max_order
#   possible_matches_by_order = [0] * max_order
#   reference_length = 0
#   translation_length = 0
#   for (references, translation) in zip(reference_corpus,
#                                        translation_corpus):
#     reference_length += min(len(r) for r in references)
#     translation_length += len(translation)

#     merged_ref_ngram_counts = collections.Counter()
#     for reference in references:
#       merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
#     translation_ngram_counts = _get_ngrams(translation, max_order)
#     overlap = translation_ngram_counts & merged_ref_ngram_counts
#     for ngram in overlap:
#       matches_by_order[len(ngram)-1] += overlap[ngram]
#     for order in range(1, max_order+1):
#       possible_matches = len(translation) - order + 1
#       if possible_matches > 0:
#         possible_matches_by_order[order-1] += possible_matches

#   precisions = [0] * max_order
#   for i in range(0, max_order):
#     if smooth:
#       precisions[i] = ((matches_by_order[i] + 1.) /
#                        (possible_matches_by_order[i] + 1.))
#     else:
#       if possible_matches_by_order[i] > 0:
#         precisions[i] = (float(matches_by_order[i]) /
#                          possible_matches_by_order[i])
#       else:
#         precisions[i] = 0.0

#   if min(precisions) > 0:
#     p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
#     geo_mean = math.exp(p_log_sum)
#   else:
#     geo_mean = 0

#   ratio = float(translation_length) / reference_length

#   if ratio > 1.0:
#     bp = 1.
#   else:
#     bp = math.exp(1 - 1. / ratio)

#   bleu = geo_mean * bp

#   return (bleu, precisions, bp, ratio, translation_length, reference_length)

In [430]:
class PerplexityMetric(tf.keras.metrics.Metric):
    """
    USAGE NOTICE: this metric accepts only logits for now (i.e. expect the same behaviour as from tf.keras.losses.SparseCategoricalCrossentropy with the a provided argument "from_logits=True", 
		here the same loss is used with "from_logits=True" enforced so you need to provide it in such a format)
    METRIC DESCRIPTION:
    Popular metric for evaluating language modelling architectures.
    More info: http://cs224d.stanford.edu/lecture_notes/LectureNotes4.pdf.
    DISCLAIMER: Original function created by Kirill Mavreshko in https://github.com/kpot/keras-transformer/blob/b9d4e76c535c0c62cadc73e37416e4dc18b635ca/example/run_gpt.py#L106. 
    My "contribution": I converted Kirill method's logic (and added a padding masking to to it) into this new Tensorflow 2.0 way of doing things via a stateful "Metric" object. This required making the metric a fully-fledged object by subclassing the Metric class. 
    """
    def __init__(self, name='perplexity', **kwargs):
      super(PerplexityMetric, self).__init__(name=name, **kwargs)
      self.cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False, reduction='none')
      # self.cross_entropy = tf.keras.losses.CategoricalCrossentropy(from_logits=False, reduction='none')
      self.perplexity = self.add_weight(name='tp', initializer='zeros')

		# Consider uncommenting the decorator for a performance boost (?)  		
    # @tf.function
    def _calculate_perplexity(self, real, pred):
			# The next 4 lines zero-out the padding from loss calculations, 
			# this follows the logic from: https://www.tensorflow.org/beta/tutorials/text/transformer#loss_and_metrics 			
      mask = tf.math.logical_not(tf.math.equal(real, 0))
      loss_ = self.cross_entropy(real, pred)
      print(loss_)
      mask = tf.cast(mask, dtype=loss_.dtype)
      loss_ *= mask
			# Calculating the perplexity steps: 			
      step1 = K.mean(loss_, axis=-1)
      step2 = K.exp(step1)
      perplexity = K.mean(step2)
      return perplexity

    def update_state(self, y_true, y_pred, sample_weight=None):
      # TODO:FIXME: handle sample_weight !
      if sample_weight is not None:
          print("WARNING! Provided 'sample_weight' argument to the perplexity metric. Currently this is not handled and won't do anything differently..")
      perplexity = self._calculate_perplexity(y_true, y_pred)
			# Remember self.perplexity is a tensor (tf.Variable), so using simply "self.perplexity = perplexity" will result in error because of mixing EagerTensor and Graph operations 
      self.perplexity.assign_add(perplexity)
      # self.perplexity.assign() ##TODO
      print('current perplexity is: {}'.format(self.perplexity))

    def result(self):
      return self.perplexity

    def reset_states(self):
      # The state of the metric will be reset at the start of each epoch.
      self.perplexity.assign(0.)

In [431]:
perplexity_metric = PerplexityMetric()

In [432]:
perplexity_metric.result()

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

In [441]:
loss_function([0.1], [1.0, 0.0, 0.0, 0.0])

<tf.Tensor: shape=(), dtype=float32, numpy=0.7436683>

In [439]:
loss_function([[0.0], [1.0]],
                    [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]])

<tf.Tensor: shape=(), dtype=float32, numpy=0.37183416>

In [433]:
perplexity_metric([[0.0], [1.0]],
                    [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]])

tf.Tensor([3.576278e-07 3.576278e-07], shape=(2,), dtype=float32)
current perplexity is: <tf.Variable 'tp:0' shape=() dtype=float32, numpy=1.0000002>


<tf.Tensor: shape=(), dtype=float32, numpy=1.0000002>

In [None]:
# perplexity_metric.result()

In [182]:
checkpoint_dir = './decoder_training_checkpoints/'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer
                                , metrics=perplexity_metric
                                #  , encoder=encoder
                                 , decoder=decoder)

### Model Training

In [155]:
EPOCHS = 2

print('Initial Perplexity: {} '.format(perplexity_metric.result()))
for epoch in range(EPOCHS):
  start = time.time()

  # enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0
  # total_batch_perplexity = 0
  for (batch, (feat, targ)) in enumerate(dataset.take(steps_per_epoch)):
    # print ('Features: {}, Target: {}'.format(feat, targ))
    batch_loss = train_step(feat, targ)#, enc_hidden) ##TODO: return batch_perplexity
    total_loss += batch_loss
    # total_batch_perplexity += batch_perplexity
    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f} Perplexity {:.4f}'.format(epoch + 1,
                                                    batch,
                                                    batch_loss.numpy(),
                                                    perplexity_metric.result()))
##TODO: avg perplexity over batches
# epoch_perplexity = total_batch_perplexity / n_batches
if (epoch + 1) % 2 == 0:
  print('saving')
  checkpoint.save(file_prefix = checkpoint_prefix)
  print('saved')

print('Epoch {} Loss {:.4f} Perplexity {:.4f}'.format(epoch + 1,
                                    total_loss / steps_per_epoch
                                    # , epoch_perplexity
                                    ))
print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

nsor("strided_slice_140:0", shape=(1,), dtype=int32)
calculating perplexity for Tensor("strided_slice_142:0", shape=(1,), dtype=int32) and Tensor("decoder/dense/BiasAdd_35:0", shape=(1, 13), dtype=float32)
current t is 37
current targ[:, t] is Tensor("strided_slice_144:0", shape=(1,), dtype=int32)
calculating perplexity for Tensor("strided_slice_146:0", shape=(1,), dtype=int32) and Tensor("decoder/dense/BiasAdd_36:0", shape=(1, 13), dtype=float32)
current t is 38
current targ[:, t] is Tensor("strided_slice_148:0", shape=(1,), dtype=int32)
calculating perplexity for Tensor("strided_slice_150:0", shape=(1,), dtype=int32) and Tensor("decoder/dense/BiasAdd_37:0", shape=(1, 13), dtype=float32)
current t is 39
current targ[:, t] is Tensor("strided_slice_152:0", shape=(1,), dtype=int32)
calculating perplexity for Tensor("strided_slice_154:0", shape=(1,), dtype=int32) and Tensor("decoder/dense/BiasAdd_38:0", shape=(1, 13), dtype=float32)
current t is 40
current targ[:, t] is Tensor("strided_sl

In [183]:
def preprocess_task(task_vector):
    # either convert to float32 or encode to categoricals
    for i, el in enumerate(task_vector):
        try:
            task_vector[i] = float(task_vector[i])
        except:
            task_vector[i] = get_key(cat_encodings[i], task_vector[i])
    return task_vector.astype('float32')

### Model Evaluation

In [184]:
example_task_vector = X.loc[0]

In [189]:
def evaluate(task_vector):
  attention_plot = np.zeros((max_length_targ, max_length_feat))

  task_vector = preprocess_task(task_vector)

  # inputs = [inp_lang.word_index[i] for i in task_vector.split(' ')]
  # inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs],
  #                                                        maxlen=max_length_feat,
  #                                                        padding='post')
  inputs = tf.convert_to_tensor(task_vector) #inputs)

  result = ''

  # hidden = [tf.zeros((1, units))]
  # enc_out, enc_hidden = encoder(inputs, hidden)

  dec_hidden = tf.zeros((BATCH_SIZE, units)) #enc_hidden
  dec_input = tf.expand_dims([1], 0)

  for t in range(max_length_targ):
    # print(t, max_length_targ)
    predictions, dec_hidden, attention_weights = decoder(dec_input
                                                        #  , dec_hidden,
                                                        #  , enc_out
                                                         )
    
    # storing the attention weights to plot later on
    attention_weights = tf.reshape(attention_weights, (-1, ))
    attention_plot[t] = attention_weights.numpy()

    predicted_id = tf.argmax(predictions[0]).numpy()

    result = get_key(lang, predicted_id) + ' ' + result #targ_lang.index_word[predicted_id] + ' '

    if get_key(lang, predicted_id) == '<end>':
      return result, task_vector, attention_plot

    # the predicted ID is fed back into the model
    dec_input = tf.expand_dims([predicted_id], 0)

  return result, task_vector, attention_plot

In [190]:
# perplexity_metric(Y[0], result)

In [191]:
evaluate(example_task_vector)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  task_vector[i] = float(task_vector[i])
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred shape is [ 1 13]
pred 

('Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform Data_Transform ',
 ProblemType                          0.0
 number of columns (for tabular)    303.0
 number of entries        

In [39]:
result, task_vector, attention_plot = evaluate(example_task_vector)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  task_vector[i] = float(task_vector[i])


### Metrics

In [40]:
# get_scores()

## Текущие вопросы
1. Мы решаем seq2seq element-wise или отображение из задачи в последовательность вершин?

    1.1 А что если сначала обучить unsupervised-сеть на последовательностях вершин (предсказывать следующую вершину)? То есть инициализация весов

2. Как измерить "правильность" сгенерированных последовательностей вершин?

3. Что если обучать последовательность вершин от конца к началу?

--4. Какие фичи мы берём для первой версии модели?

5. Какие есть референс-архитектуры, на которые можно обратить внимание?

    5.1 Как должна выглядеть архитектура нашей нейросети

6. Что генерить: верхнеуровневые вершины/конкатенацию уровней вершин/верхнеуровневые + низкоуровневые вершины по отдельности?