In [None]:
# Libraries

import matplotlib
import os
import pandas
import platform
import seaborn
import shutil
import sys

In [None]:
# Ensure source path

ROOT = os.getcwd()

while not ROOT.endswith('upolanc-thesis') :
	ROOT = os.path.abspath(os.path.join(ROOT, os.pardir))

	if len(ROOT) < len('upolanc-thesis') :
		if   platform.system() == 'Linux'   : ROOT = '/d/hpc/projects/FRI/up4472/upolanc-thesis'
		elif platform.system() == 'Windows' : ROOT = 'C:\\Developer\\Workspace\\PyCharm\\Projects\\upolanc-thesis'
		else : raise ValueError()

		print(f'Warning : could not find correct directory, using default : {ROOT}')
		print()

		break

if ROOT not in sys.path :
	sys.path.append(ROOT)

os.chdir(ROOT)

In [None]:
# Code

from source.python        import runtime
from source.python.io     import loader
from source.python.io     import writer
from source.python.report import report_concat
from source.python.report import report_filter
from source.python.report import report_load
from source.python.report import report_plot

runtime.set_numpy_format()
runtime.set_pandas_format()
runtime.set_plot_theme()

In [None]:
# List reports folder content

PRINT_FILES = False

ROOT_DIR = os.path.join(ROOT, 'reports')
DATA_DIR = os.path.join(ROOT, 'output', 'nbp13-report')

shutil.rmtree(DATA_DIR, ignore_errors = True)
os.makedirs(DATA_DIR, exist_ok = True)

if PRINT_FILES :
	for index, name in enumerate(os.listdir(ROOT_DIR), start = 0) :
		path_l0 = os.path.join(ROOT_DIR, name)

		if index == 0 : print('{} :'.format(path_l0))
		else          : print('\n{} :'.format(path_l0))

		for name in os.listdir(path_l0) :
			print('...\\{}'.format(name))

		print()

In [None]:
# Load existing configurations

CONFIG_DIR = os.path.join(ROOT, 'resources', 'tuner')

ZCONFIG_PATH = os.path.join(CONFIG_DIR, 'zrimec.json')
WCONFIG_PATH = os.path.join(CONFIG_DIR, 'washburn.json')

ZCONFIG = list()
WCONFIG = list()

CONFIG_MAX       = 100
CONFIG_BEST      = 10
CONFIG_OVERWRITE = True
CONFIG_EXTEND    = False

if os.path.exists(ZCONFIG_PATH) and CONFIG_EXTEND :
	ZCONFIG = loader.load_json(filename = ZCONFIG_PATH)

	print('Sucessufully loaded configuration [{:88s}] with [{:2d}] elements'.format(ZCONFIG_PATH, len(ZCONFIG)))
	print()

if os.path.exists(WCONFIG_PATH) and CONFIG_EXTEND :
	WCONFIG = loader.load_json(filename = WCONFIG_PATH)

	print('Sucessufully loader configuration [{:88s}] with [{:2d}] elements'.format(WCONFIG_PATH, len(WCONFIG)))
	print()

# 1. Tuner CNN

In [None]:
# Load tune model reports

report_tune_model = report_load.load_cnn_tune_reports(
	root = ROOT_DIR,
	show = False,
	n    = 5
)

In [None]:
# Display overall regression prediction

R2_THRESHOLD = 0.00

for key, dataframe in report_tune_model['regression'].items() :
	nan_mse = dataframe['valid_loss'].isnull().values.ravel().sum()
	pos_r2  = dataframe[dataframe['valid_r2'] > R2_THRESHOLD]['valid_r2'].count()
	neg_r2  = dataframe[dataframe['valid_r2'] < R2_THRESHOLD]['valid_r2'].count()

	print('{:50s} : Size = {:4d} || NaN Loss = {:4d} || Pos R2 = {:4d} || Neg R2 = {:4d}'.format(key, len(dataframe), nan_mse, pos_r2, neg_r2))

print()

In [None]:
# Concat reports

report = report_concat.concat_cnn_tune_reports(
	reports = report_tune_model,
	mode    = 'regression',
	n       = None
)

## 1.1 All

In [None]:
# All reports

if report is not None :
	report['Target'] = report[['Target0', 'Target1', 'Target2']].agg(lambda x : '-'.join(x.dropna()), axis = 1)

	old_cols = report.columns.tolist()
	new_cols = old_cols[-1:] + old_cols[:-1]
	report = report[new_cols]

	report = report.rename(columns = {
		'Valid_MSE' : 'MSE',
		'Valid_R2'  : 'R2'
	})

	report['Sequence'] = report['Sequence'].replace('tf2150', 'transcript 2150 bp')
	report['Sequence'] = report['Sequence'].replace('tf6150', 'transcript 6150 bp')
	report['Sequence'] = report['Sequence'].replace('po0512', 'promoter 512 bp')
	report['Sequence'] = report['Sequence'].replace('po4096', 'promoter 4096 bp')
	report['Sequence'] = report['Sequence'].replace('po4096', 'promoter + 5\'utr 4096 bp')
	report['Filter']   = report['Filter'].replace('f1', '1')
	report['Filter']   = report['Filter'].replace('f2', '2')
	report['Filter']   = report['Filter'].replace('f3', '3')
	report['Filter']   = report['Filter'].replace('f4', '4')
	report['Filter']   = report['Filter'].replace('f5', '5')
	report['Filter']   = report['Filter'].replace('f6', '6')
	report['Filter']   = report['Filter'].astype(int)

	report = report.drop(columns = ['Target0', 'Target1', 'Target2', 'Sequence', 'ID', 'Train_MSE', 'Valid_MAE', 'Optimizer', 'Scheduler'])
	report = report[['Target', 'Model', 'Filter', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Batch', 'Epoch', 'MSE', 'R2']]

report.head(n = 10)

## 1.2 Zrimec

In [None]:
# Only zrimec

x = report
x = report[report['Model'] == 'zrimec']
x = x.reset_index(drop = True)
x = x.drop(columns = ['Filter', 'Epoch'])
x = x.head(n = 10)

x

## 1.3 Washburn

In [None]:
# Only washburn

x = report
x = report[report['Model'] == 'washburn']
x = x.reset_index(drop = True)
x = x.drop(columns = ['Filter', 'Epoch'])
x = x.head(n = 10)

x

In [None]:
# Save top model configurations (to same format as model params)

for key in report_tune_model['regression'].keys() :
	df = report_tune_model['regression'][key]
	df = df.sort_values('valid_r2', ascending = False)

	if   key.startswith('zrimec')   : config = ZCONFIG
	elif key.startswith('washburn') : config = WCONFIG
	else : raise ValueError()

	if   key.startswith('zrimec')   : layers = [3, 3]
	elif key.startswith('washburn') : layers = [6, 3]
	else : raise ValueError()

	for i in range(CONFIG_BEST) :
		item = {
			'id' : 'id'
		}

		temp = df.iloc[i, :].to_dict()

		if 'trial_id' in temp.keys() :
			item['id'] = item['id'] + '_{:s}'.format(temp['trial_id'])

		if 'pid' in temp.keys() :
			item['id'] = item['id'] + '_{:d}'.format(temp['pid'])

		for k, v in temp.items() :
			if   k.startswith('valid')  : pass
			elif k.startswith('train')  : pass
			elif k.startswith('config') : k = k[7:]
			else : continue

			if k.startswith('model/convx/') :
				for i in range(2, layers[0] + 1) :
					oldkey = 'x/'
					newkey = str(i) + '/'

					item[k.replace(oldkey, newkey)] = v

			elif k.startswith('model/maxpoolx/') :
				for i in range(1, layers[1] + 1) :
					oldkey = 'x/'
					newkey = str(i) + '/'

					item[k.replace(oldkey, newkey)] = v

			else :
				item[k] = v

		config.append(item)

ZCONFIG = sorted(ZCONFIG, key = lambda x : x['valid_r2'], reverse = True)
WCONFIG = sorted(WCONFIG, key = lambda x : x['valid_r2'], reverse = True)

ZCONFIG = ZCONFIG[:CONFIG_MAX]
WCONFIG = WCONFIG[:CONFIG_MAX]

if CONFIG_OVERWRITE :
	if len(ZCONFIG) > 0 :
		writer.write_json(filename = ZCONFIG_PATH, data = ZCONFIG)

		print('Sucessufully written configuration [{:88s}] with [{:2d}] elements'.format(ZCONFIG_PATH, len(ZCONFIG)))

	if len(WCONFIG) > 0 :
		writer.write_json(filename = WCONFIG_PATH, data = WCONFIG)

		print('Sucessufully written configuration [{:88s}] with [{:2d}] elements'.format(WCONFIG_PATH, len(WCONFIG)))

	print()

# 2. Tuner Data

In [None]:
# Display top few results for each tune configuration

report_tune_data = report_load.load_data_tune_reports(
	root = ROOT_DIR,
	show = False,
	n    = 25
)

## 2.1 All

In [None]:
# Concat reports and display top few

report = report_concat.concat_data_tune_reports(
	reports = report_tune_data,
	mode    = 'regression',
	n       = None
)

if report is not None :
	report['Target'] = report[['Target0', 'Target1', 'Target2']].agg(lambda x : '-'.join(x.dropna()), axis = 1)

	report = report.rename(columns = {
		'Valid_MSE' : 'MSE',
		'Valid_R2'  : 'R2'
	})

	old_cols = report.columns.tolist()
	new_cols = old_cols[-1:] + old_cols[:-1]
	report = report[new_cols]

	report = report.drop(columns = ['Target0', 'Target1', 'Target2', 'ID', 'Sequence', 'Valid_MAE', 'Train_MSE'])
	report = report[['Target', 'Model', 'Filter', 'Epoch', 'Lambda', 'MSE', 'R2']]

report.head(n = 10)

## 2.2 Checkpoints

In [None]:
# Display top reports and checkpoints (log2, boxcox)

x = report.head(n = 5)
a = report[report['Lambda'] ==  '0.00000']
b = report[report['Lambda'] == '-0.14']

x = pandas.concat((x, a))
x = pandas.concat((x, b))
x = x.reset_index(drop = True)

x

# 3. Model CNN

In [None]:
# Load cnn model reports

report_cnn_model = report_load.load_cnn_reports(
	root = ROOT_DIR
)

## 3.1 All

In [None]:
# Display model performance for regression

report = report_cnn_model['regression']

if len(report) > 0 :
	report['Target'] = report[['Target0', 'Target1', 'Target2']].agg(lambda x : '-'.join(x.dropna()), axis = 1)
	report['Params'] = report[['Model', 'Param']].agg(lambda x : '-'.join([str(i) for i in x]), axis = 1)

	report['Sequence'] = report['Sequence'].replace('tf2150', 'transcript 2150 bp')
	report['Sequence'] = report['Sequence'].replace('tf6150', 'transcript 6150 bp')
	report['Sequence'] = report['Sequence'].replace('po0512', 'promoter 512 bp')
	report['Sequence'] = report['Sequence'].replace('po4096', 'promoter 4096 bp')
	report['Sequence'] = report['Sequence'].replace('po5000', 'promoter 5000 bp')
	report['Sequence'] = report['Sequence'].replace('pu4096', 'promoter + 5\'utr 4096 bp')
	report['Sequence'] = report['Sequence'].replace('pu5000', 'promoter + 5\'utr 5000 bp')
	report['Filter']   = report['Filter'].replace('f1', '1')
	report['Filter']   = report['Filter'].replace('f2', '2')
	report['Filter']   = report['Filter'].replace('f3', '3')
	report['Filter']   = report['Filter'].replace('f4', '4')
	report['Filter']   = report['Filter'].replace('f5', '5')
	report['Filter']   = report['Filter'].replace('f6', '6')
	report['Filter']   = report['Filter'].astype(int)

	old_cols = report.columns.tolist()
	new_cols = old_cols[-2:] + old_cols[:-2]
	report = report[new_cols]

	report = report.reset_index(drop = True)
	report = report.drop(columns = ['Target0', 'Target1', 'Target2', 'Model', 'Param', 'Epochs', 'Optimizer', 'Scheduler'])

report.head(n = 10)

In [None]:
# Display all results

x = report
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')
x['Params'] = x['Params'].str.replace('washburn-0', 'washburn')
x['Params'] = x['Params'].str.replace('zrimec-0',   'zrimec')

x.head(n = 10)

## 3.2 Baseline

In [None]:
# Display the performance of the model with no features

x = report
x = x[x['Filter']   == 2]
x = x[x['Features'] == 0]
x = x[x['Params'].str.startswith('zrimec-0')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('zrimec-0', 'zrimec')

x

In [None]:
# Display the performance of the model with no features

x = report
x = x[x['Filter']   == 2]
x = x[x['Features'] == 0]
x = x[x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['Filter', 'Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')

x

In [None]:
# Display the performance of the model with no features

x = report
x = x[x['Filter']   == 2]
x = x[x['Features'] == 0]
x = x[x['Params'].str.startswith('zrimec-0') | x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')
x['Params'] = x['Params'].str.replace('zrimec-0',   'zrimec')

x

## 3.3 Filter

In [None]:
# Display the performance of the model with different filters

x = report
x = x[x['Sequence'] == 'transcript 2150 bp']
x = x[x['Features'] != 0]
x = x[x['Target']   == 'global-mean']
x = x[x['Params'].str.startswith('zrimec-0')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('zrimec-0', 'zrimec')

x

In [None]:
# Display the performance of the model with different filters

x = report
x = x[x['Sequence'] == 'transcript 2150 bp']
x = x[x['Features'] != 0]
x = x[x['Target']   == 'global-mean']
x = x[x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')

x

In [None]:
# Display the performance of the model with different filters

x = report
x = x[x['Sequence'] == 'transcript 2150 bp']
x = x[x['Features'] != 0]
x = x[x['Target']   == 'global-mean']
x = x[x['Params'].str.startswith('zrimec-0') | x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')
x['Params'] = x['Params'].str.replace('zrimec-0',   'zrimec')

x

## 3.4 Sequence

In [None]:
# Display the performance of the model with different input sequences

x = report
x = x[x['Filter']   == 2]
x = x[x['Target']   == 'global-mean']
x = x[x['Features'] != 0]
x = x[x['Params'].str.startswith('zrimec-0')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('zrimec-0', 'zrimec')

x

In [None]:
# Display the performance of the model with different input sequences

x = report
x = x[x['Filter']   == 2]
x = x[x['Target']   == 'global-mean']
x = x[x['Features'] != 0]
x = x[x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')

x

In [None]:
# Display the performance of the model with different input sequences

x = report
x = x[x['Filter']   == 2]
x = x[x['Target']   == 'global-mean']
x = x[x['Features'] != 0]
x = x[x['Params'].str.startswith('zrimec-0') | x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')
x['Params'] = x['Params'].str.replace('zrimec-0',   'zrimec')

x

## 3.5 Zrimec

In [None]:
# Display the performance of the model for the specific architecture

x = report
x = x[x['Features'] != 0]
x = x[x['Params'].str.startswith('zrimec-0')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)
x = x.head(n = 10)

x

## 3.6 Washburn

In [None]:
# Display the performance of the model for the specific architecture

x = report
x = x[x['Features'] != 0]
x = x[x['Params'].str.startswith('washburn')]
x = x.drop(columns = ['Features', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x

# 4. Model FC

In [None]:
# Load fc model reports

report_fc_model = report_load.load_fc_reports(
	root = ROOT_DIR
)

## 4.1 All

In [None]:
# Display the performance of the model for only features

report = report_fc_model['regression']

if len(report) > 0 :
	report['Target'] = report[['Target0', 'Target1', 'Target2']].agg(lambda x : '-'.join(x.dropna()), axis = 1)
	report['Params'] = report[['Model', 'Param']].agg(lambda x : '-'.join([str(i) for i in x]), axis = 1)

	old_cols = report.columns.tolist()
	new_cols = old_cols[-2:] + old_cols[:-2]
	report = report[new_cols]

	report = report.copy()
	report.insert(2, 'Sequence', 'none')

	report['Filter']   = report['Filter'].replace('f1', '1')
	report['Filter']   = report['Filter'].replace('f2', '2')
	report['Filter']   = report['Filter'].replace('f3', '3')
	report['Filter']   = report['Filter'].replace('f4', '4')
	report['Filter']   = report['Filter'].replace('f5', '5')
	report['Filter']   = report['Filter'].replace('f6', '6')
	report['Filter']   = report['Filter'].astype(int)

	report = report.reset_index(drop = True)
	report = report.drop(columns = ['Target0', 'Target1', 'Target2', 'Arch', 'Model', 'Param', 'Epochs'])

report.head(n = 10)

In [None]:
# Display the performance of the model for the specific architecture

x = report
x = x[x['Params'].str.startswith('zrimec-0') | x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['FC1', 'FC2', 'Batch', 'Optimizer', 'Scheduler', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')
x['Params'] = x['Params'].str.replace('zrimec-0',   'zrimec')

x

## 4.2 Zrimec

In [None]:
# Display the performance of the model for the specific architecture

x = report
x = x[x['Params'].str.startswith('zrimec-0')]
x = x.drop(columns = ['FC1', 'FC2', 'Batch', 'Optimizer', 'Scheduler', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('zrimec-0', 'zrimec')

x

## 4.3 Washburn

In [None]:
# Display the performance of the model for the specific architecture

x = report
x = x[x['Params'].str.startswith('washburn-1')]
x = x.drop(columns = ['FC1', 'FC2', 'Batch', 'Optimizer', 'Scheduler', 'LR', 'Beta1', 'Beta2', 'Decay', 'Dropout', 'Gamma', 'Batch'])
x = x.reset_index(drop = True)

x['Params'] = x['Params'].str.replace('washburn-1', 'washburn')

x

# 5. BERT

In [None]:
# Approximate value used for non-explode reports (512 x 32 = 16384)

STEPS_PER_EPOCH = 512
LINEWIDTH       = 2
ALPHA           = 0.8
FONT_SCALE      = 3.0

matplotlib.rcParams.update({'font.size' : int(12 * FONT_SCALE)})
seaborn.set_theme(font_scale = FONT_SCALE)

In [None]:
# Load bert model reports

report_bert_model = report_load.load_bert_reports(
	root = ROOT_DIR,
	show = False
)

## 5.1 All

In [None]:
# Concat reports and display top few

report = report_concat.concat_bert_reports(
	data            = report_bert_model,
	mode            = 'regression',
	metric          = 'eval_r2',
	ascending       = False,
	steps_per_epoch = STEPS_PER_EPOCH
)

if report is not None :
	to_str = lambda x : x if len(x) == 5 else ' {}'.format(x)

	report['Target'] = report[['Target0', 'Target1', 'Target2']].agg(lambda x : '-'.join(x.dropna()), axis = 1)

	old_cols = report.columns.tolist()
	new_cols = old_cols[-1:] + old_cols[:-1]
	report = report[new_cols]

	report['Pooler'] = report['Pooler'].replace('def', 'first')
	report['Pooler'] = report['Pooler'].replace('dna', 'mean')

	report['Sequence'] = report['Sequence'].replace('tf2150', 'transcript 2150 bp')
	report['Sequence'] = report['Sequence'].replace('tf6150', 'transcript 6150 bp')
	report['Sequence'] = report['Sequence'].replace('po0512', 'promoter 512 bp')
	report['Sequence'] = report['Sequence'].replace('po4096', 'promoter 4096 bp')
	report['Sequence'] = report['Sequence'].replace('po5000', 'promoter 5000 bp')
	report['Sequence'] = report['Sequence'].replace('pu4096', 'promoter + 5\'utr 4096 bp')
	report['Sequence'] = report['Sequence'].replace('pu5000', 'promoter + 5\'utr 5000 bp')
	report['Filter'] = report['Filter'].replace('f1', 1)
	report['Filter'] = report['Filter'].replace('f2', 2)
	report['Filter'] = report['Filter'].replace('f3', 3)
	report['Filter'] = report['Filter'].replace('f4', 4)
	report['Filter'] = report['Filter'].replace('f5', 5)
	report['Filter'] = report['Filter'].replace('f6', 6)
	report['Filter'] = report['Filter'].astype(int)

	report = report.rename(columns = {
		'Eval_R2' : 'R2',
		'Kmer'    : 'KMer'
	})

	report = report.drop(columns = ['Target0', 'Target1', 'Target2', 'Mode', 'Arch', 'LR', 'Steps', 'Step', 'Eval_ME', 'Eval_MAPE', 'Eval_MAE'])
	report = report.reset_index(drop = True)
	report = report[['Target', 'Pooler', 'Type', 'Layer', 'KMer', 'Feature', 'Filter', 'Sequence', 'Epoch', 'Epochs', 'R2']]

report.head(n = 10)

## 5.2 Baseline

In [None]:
# Display the performance of the model with no features

x = report
x = x[x['KMer']     == 3]
x = x[x['Filter']   == 2]
x = x[x['Feature']  == 0]
x = x.drop(columns = ['Layer', 'Feature'])
x = x.reset_index(drop = True)

x

In [None]:
# Plot baseline

report_plot.models_bert_r2(
	data = report_filter.filter_bert_reports(
		reports = report_bert_model,
		keep_only = ['global-mean', 'rnn', '-3-', '00'],
		drop_only = []
	),
	mode            = 'regression',
	step            = 'epoch',
	steps_min       = int(  2 * STEPS_PER_EPOCH),
	steps_max       = int(150 * STEPS_PER_EPOCH),
	steps_per_epoch = STEPS_PER_EPOCH,
	alpha           = ALPHA,
	linewidth       = LINEWIDTH,
	groupby         = 'none',
	style           = 'sequence',
	filename        = os.path.join(DATA_DIR, 'bert-baseline-sequence')
)

## 5.3 K-Mer

In [None]:
# Display the performance of the model for the different kmers

x = report
x = x[x['Pooler']   == 'first']
x = x[x['Type']     == 'def']
x = x[x['Filter']   == 2]
x = x[x['Feature']  != 0]
x = x[x['Sequence'] == 'promoter 512 bp']
x = x[x['Target']   == 'global-mean']
x = x.drop(columns = ['Layer', 'Feature'])
x = x.reset_index(drop = True)

x

In [None]:
# Plot kmer

report_plot.models_bert_r2(
	data = report_filter.filter_bert_reports(
		reports = report_bert_model,
		keep_only = ['global-mean', 'f2', 'v1', 'def'],
		drop_only = ['00']
	),
	mode            = 'regression',
	step            = 'epoch',
	steps_min       = int(  5 * STEPS_PER_EPOCH),
	steps_max       = int(250 * STEPS_PER_EPOCH),
	steps_per_epoch = STEPS_PER_EPOCH,
	alpha           = ALPHA,
	linewidth       = LINEWIDTH,
	groupby         = 'none',
	style           = 'kmer',
	filename        = os.path.join(DATA_DIR, 'bert-kmer')
)

## 5.4 Filter

In [None]:
# Display the performance of the model for the different filters

x = report
x = x[x['Pooler']   == 'first']
x = x[x['Type']     == 'def']
x = x[x['KMer']     == 3]
x = x[x['Feature']  != 0]
x = x[x['Sequence'] == 'promoter 512 bp']
x = x[x['Target']   == 'global-mean']
x = x.drop(columns = ['Layer', 'Feature'])
x = x.reset_index(drop = True)

x

In [None]:
# Plot filter

report_plot.models_bert_r2(
	data = report_filter.filter_bert_reports(
		reports = report_bert_model,
		keep_only = ['global-mean', '-3-', 'v1', 'def'],
		drop_only = ['00']
	),
	mode            = 'regression',
	step            = 'epoch',
	steps_min       = int(  5 * STEPS_PER_EPOCH),
	steps_max       = int(250 * STEPS_PER_EPOCH),
	steps_per_epoch = STEPS_PER_EPOCH,
	alpha           = ALPHA,
	linewidth       = LINEWIDTH,
	groupby         = 'none',
	style           = 'filter',
	filename        = os.path.join(DATA_DIR, 'bert-filter')
)

## 5.5 Pooler

In [None]:
# Display the performance of the model for the different poolers

x = report
x = x[x['KMer']     == 3]
x = x[x['Filter']   == 2]
x = x[x['Feature']  != 0]
x = x[x['Sequence'] == 'promoter 512 bp']
x = x[x['Target']   == 'global-mean']
x = x.drop(columns = ['Layer', 'Feature'])
x = x.reset_index(drop = True)

x

In [None]:
# Plot architecture

report_plot.models_bert_r2(
	data = report_filter.filter_bert_reports(
		reports = report_bert_model,
		keep_only = ['global-mean', '-3-', 'f2', 'po0512'],
		drop_only = ['00']
	),
	mode            = 'regression',
	step            = 'epoch',
	steps_min       = int(  5 * STEPS_PER_EPOCH),
	steps_max       = int(250 * STEPS_PER_EPOCH),
	steps_per_epoch = STEPS_PER_EPOCH,
	alpha           = ALPHA,
	linewidth       = LINEWIDTH,
	groupby         = 'none',
	style           = 'pooler-architecture',
	filename        = os.path.join(DATA_DIR, 'bert-pooler')
)

## 5.6 Sequence

In [None]:
# Display the performance of the model for the different input sequences

x = report
x = x[x['Pooler']   == 'first']
x = x[x['Type']     == 'rnn']
x = x[x['KMer']     == 3]
x = x[x['Filter']   == 2]
x = x[x['Feature']  != 0]
x = x[x['Target']   == 'global-mean']
x = x.drop(columns = ['Layer', 'Feature'])
x = x.reset_index(drop = True)

x

In [None]:
# Plot sequence

report_plot.models_bert_r2(
	data = report_filter.filter_bert_reports(
		reports = report_bert_model,
		keep_only = ['global-mean', '-3-', 'f2', 'v1', 'rnn'],
		drop_only = ['00']
	),
	mode            = 'regression',
	step            = 'epoch',
	steps_min       = int(  5 * STEPS_PER_EPOCH),
	steps_max       = int(250 * STEPS_PER_EPOCH),
	steps_per_epoch = STEPS_PER_EPOCH,
	alpha           = ALPHA,
	linewidth       = LINEWIDTH,
	groupby         = 'none',
	style           = 'sequence',
	filename        = os.path.join(DATA_DIR, 'bert-sequence')
)

## 5.7 Final

In [None]:
# Display the performance of the model

x = report
x = x[x['KMer']     == 3]
x = x[x['Feature']  != 0]
x = x[x['Pooler']   == 'first']
x = x[x['Type']     == 'rnn']
x = x[x['Target']   != 'tissue-mean-seedling']
x = x.drop(columns = ['Layer', 'Feature'])
x = x.reset_index(drop = True)

x.head(n = 10)