In [1]:
from Data_process import load_rawdata, datasets_split
from FunDNN.Model import run_model
from FunDNN.preprocessor import load_data, get_dataloader
from GenSAN.preprocessor import GenSAN_preprocessor, get_pre_GECs
from GenSAN.train_function import run_GenSAN_model
from config_parser import TranscriptionNet_Hyperparameters

Load TranscriptionNet model hyperparameters

In [2]:
config = TranscriptionNet_Hyperparameters()

Load raw data and get training, validation and test sets

In [3]:
node_feature, RNAi_GECs, OE_GECs, CRISPR_GECs = load_rawdata("example_data/raw_data/")

RNAi_MMScaler = datasets_split(RNAi_GECs, node_feature, "example_data/datasets/RNAi/")
OE_MMScaler = datasets_split(OE_GECs, node_feature, "example_data/datasets/OE/")
CRISPR_MMScaler = datasets_split(CRISPR_GECs, node_feature, "example_data/datasets/CRISPR/")

RNAi FunDNN model to predict RNAi_pre-GECs

In [4]:
RNAi_feature_train, RNAi_feature_valid, RNAi_feature_test = load_data(config.FunDNN_RNAi_path, "feature_dict.pkl")

RNAi_GECs_train, RNAi_GECs_valid, RNAi_GECs_test = load_data(config.FunDNN_RNAi_path, "GECs_dict.pkl")

RNAi_train_dataloader, RNAi_valid_dataloader = get_dataloader(config.FunDNN_batch_size, RNAi_feature_train,
                                                              RNAi_GECs_train,
                                                              RNAi_feature_valid, RNAi_GECs_valid)

RNAi_pre_GECs = run_model(num_layers=config.FunDNN_layers,
                          hidden_nodes=config.FunDNN_hidden_nodes,
                          activate_func=config.FunDNN_activation_func,
                          dropout_rate=config.FunDNN_dropout_rate,
                          learning_rate=config.FunDNN_learning_rate,
                          epochs=config.FunDNN_epochs,
                          train_dataloader=RNAi_train_dataloader,
                          valid_dataloader=RNAi_valid_dataloader,
                          beta=config.PMSELoss_beta,
                          feature_test=RNAi_feature_test,
                          gecs_test=RNAi_GECs_test,
                          save_path=config.FunDNN_save_path,
                          node_feature=node_feature,
                          name="RNAi")

Node feature dimension:
train data:torch.Size([212, 512])
valid data:torch.Size([31, 512])
test data:torch.Size([60, 512])

GECS data dimension:
train data:torch.Size([212, 978])
valid data:torch.Size([31, 978])
test data:(60, 978)

end of epoch:  0 | time: 0.20s | train loss:0.22079 | valid loss:0.22789 | train MseLoss:0.13644 | train PccLoss:0.98001 | valid MseLoss:0.14536 | valid PccLoss:0.97066
end of epoch:  1 | time: 0.05s | train loss:0.22067 | valid loss:0.22785 | train MseLoss:0.13643 | train PccLoss:0.97880 | valid MseLoss:0.14535 | valid PccLoss:0.97027
end of epoch:  2 | time: 0.05s | train loss:0.21985 | valid loss:0.22780 | train MseLoss:0.13559 | train PccLoss:0.97820 | valid MseLoss:0.14535 | valid PccLoss:0.96988
end of epoch:  3 | time: 0.05s | train loss:0.22082 | valid loss:0.22775 | train MseLoss:0.13679 | train PccLoss:0.97710 | valid MseLoss:0.14534 | valid PccLoss:0.96948
end of epoch:  4 | time: 0.05s | train loss:0.22067 | valid loss:0.22770 | train MseLoss:0.

OE FunDNN model to predict OE_pre-GECs

In [5]:
OE_feature_train, OE_feature_valid, OE_feature_test = load_data(config.FunDNN_OE_path, "feature_dict.pkl")

OE_GECs_train, OE_GECs_valid, OE_GECs_test = load_data(config.FunDNN_OE_path, "GECs_dict.pkl")

OE_train_dataloader, OE_valid_dataloader = get_dataloader(config.FunDNN_batch_size, OE_feature_train,
                                                          OE_GECs_train,
                                                          OE_feature_valid, OE_GECs_valid)

OE_pre_GECs = run_model(num_layers=config.FunDNN_layers,
                        hidden_nodes=config.FunDNN_hidden_nodes,
                        activate_func=config.FunDNN_activation_func,
                        dropout_rate=config.FunDNN_dropout_rate,
                        learning_rate=config.FunDNN_learning_rate,
                        epochs=config.FunDNN_epochs,
                        train_dataloader=OE_train_dataloader,
                        valid_dataloader=OE_valid_dataloader,
                        beta=config.PMSELoss_beta,
                        feature_test=OE_feature_test,
                        gecs_test=OE_GECs_test,
                        save_path=config.FunDNN_save_path,
                        node_feature=node_feature,
                        name="OE")

Node feature dimension:
train data:torch.Size([212, 512])
valid data:torch.Size([31, 512])
test data:torch.Size([60, 512])

GECS data dimension:
train data:torch.Size([212, 978])
valid data:torch.Size([31, 978])
test data:(60, 978)

end of epoch:  0 | time: 0.05s | train loss:0.22610 | valid loss:0.23280 | train MseLoss:0.13810 | train PccLoss:1.01811 | valid MseLoss:0.14628 | valid PccLoss:1.01150
end of epoch:  1 | time: 0.05s | train loss:0.22563 | valid loss:0.23275 | train MseLoss:0.13755 | train PccLoss:1.01835 | valid MseLoss:0.14627 | valid PccLoss:1.01109
end of epoch:  2 | time: 0.05s | train loss:0.22553 | valid loss:0.23270 | train MseLoss:0.13732 | train PccLoss:1.01946 | valid MseLoss:0.14626 | valid PccLoss:1.01068
end of epoch:  3 | time: 0.05s | train loss:0.22522 | valid loss:0.23265 | train MseLoss:0.13705 | train PccLoss:1.01877 | valid MseLoss:0.14625 | valid PccLoss:1.01026
end of epoch:  4 | time: 0.05s | train loss:0.22546 | valid loss:0.23260 | train MseLoss:0.

CRISPR FunDNN model to predict CRISPR_pre-GECs

In [6]:
CRISPR_feature_train, CRISPR_feature_valid, CRISPR_feature_test = load_data(config.FunDNN_CRISPR_path,
                                                                            "feature_dict.pkl")

CRISPR_GECs_train, CRISPR_GECs_valid, CRISPR_GECs_test = load_data(config.FunDNN_CRISPR_path, "GECs_dict.pkl")

CRISPR_train_dataloader, CRISPR_valid_dataloader = get_dataloader(config.FunDNN_batch_size, CRISPR_feature_train,
                                                                  CRISPR_GECs_train,
                                                                  CRISPR_feature_valid, CRISPR_GECs_valid)

CRISPR_pre_GECs = run_model(num_layers=config.FunDNN_layers,
                            hidden_nodes=config.FunDNN_hidden_nodes,
                            activate_func=config.FunDNN_activation_func,
                            dropout_rate=config.FunDNN_dropout_rate,
                            learning_rate=config.FunDNN_learning_rate,
                            epochs=config.FunDNN_epochs,
                            train_dataloader=CRISPR_train_dataloader,
                            valid_dataloader=CRISPR_valid_dataloader,
                            beta=config.PMSELoss_beta,
                            feature_test=CRISPR_feature_test,
                            gecs_test=CRISPR_GECs_test,
                            save_path=config.FunDNN_save_path,
                            node_feature=node_feature,
                            name="CRISPR")

Node feature dimension:
train data:torch.Size([212, 512])
valid data:torch.Size([31, 512])
test data:torch.Size([60, 512])

GECS data dimension:
train data:torch.Size([212, 978])
valid data:torch.Size([31, 978])
test data:(60, 978)

end of epoch:  0 | time: 0.05s | train loss:0.22322 | valid loss:0.23214 | train MseLoss:0.13636 | train PccLoss:1.00489 | valid MseLoss:0.14618 | valid PccLoss:1.00578
end of epoch:  1 | time: 0.05s | train loss:0.22335 | valid loss:0.23209 | train MseLoss:0.13678 | train PccLoss:1.00251 | valid MseLoss:0.14617 | valid PccLoss:1.00535
end of epoch:  2 | time: 0.05s | train loss:0.22384 | valid loss:0.23204 | train MseLoss:0.13742 | train PccLoss:1.00165 | valid MseLoss:0.14616 | valid PccLoss:1.00492
end of epoch:  3 | time: 0.05s | train loss:0.22331 | valid loss:0.23198 | train MseLoss:0.13679 | train PccLoss:1.00191 | valid MseLoss:0.14615 | valid PccLoss:1.00449
end of epoch:  4 | time: 0.05s | train loss:0.22366 | valid loss:0.23193 | train MseLoss:0.

RNAi GenSAN model to predict RNAi_predict-GECs

In [7]:
GenSAN_train, GenSAN_valid, GenSAN_test, OE_combine, CRISPR_combine = GenSAN_preprocessor(true_GECs1=OE_GECs,
                                                                                          true_GECs2=CRISPR_GECs,
                                                                                          predict_GECs1=OE_pre_GECs,
                                                                                          predict_GECs2=CRISPR_pre_GECs,
                                                                                          pre_GECS=RNAi_pre_GECs,
                                                                                          input_path=config.FunDNN_RNAi_path,
                                                                                          file_name="feature_dict.pkl")

GenSAN_train_dataloader, GenSAN_valid_dataloader = get_dataloader(batch_size=config.GenSAN_batch_size,
                                                                  node_train=GenSAN_train,
                                                                  gecs_train=RNAi_GECs_train,
                                                                  node_valid=GenSAN_valid,
                                                                  gecs_valid=RNAi_GECs_valid)

input_matrix = get_pre_GECs(RNAi_pre_GECs, OE_combine, CRISPR_combine)

RNAi_predict_GECs = run_GenSAN_model(blocks=config.GenSAN_blocks,
                                     GECs_dimension=config.GenSAN_GECs_dimension,
                                     hidden_nodes=config.FunDNN_hidden_nodes,
                                     heads=config.GenSAN_heads,
                                     dropout_rate=config.GenSAN_dropout_rate,
                                     recycles=config.GenSAN_recycles,
                                     learning_rate=config.GenSAN_learning_rate,
                                     weight_decay=config.GenSAN_weight_decay,
                                     epochs=config.GenSAN_epochs,
                                     train_dataloader=GenSAN_train_dataloader,
                                     valid_dataloader=GenSAN_valid_dataloader,
                                     beta=config.PMSELoss_beta,
                                     warmup_epoch=config.GenSAN_warmup_epochs,
                                     pre_gecs_test=GenSAN_test,
                                     gecs_test=RNAi_GECs_test,
                                     save_path=config.FunDNN_save_path,
                                     input_matrix=input_matrix,
                                     length=64,
                                     pre_GECs=RNAi_pre_GECs,
                                     scaler=RNAi_MMScaler,
                                     name="RNAi")

pre-GECS dimension:
train data:torch.Size([212, 3, 978])
valid data:torch.Size([31, 3, 978])
test data:torch.Size([60, 3, 978])

end of epoch:  0 | time: 0.85s | train loss:0.81737 | valid loss:0.77192 | train MseLoss:0.79777 | train PccLoss:0.99377 | valid MseLoss:0.74874 | valid PccLoss:0.98058
end of epoch:  1 | time: 0.79s | train loss:0.78880 | valid loss:0.77192 | train MseLoss:0.76762 | train PccLoss:0.97937 | valid MseLoss:0.74874 | valid PccLoss:0.98058
end of epoch:  2 | time: 0.74s | train loss:0.78698 | valid loss:0.76264 | train MseLoss:0.76565 | train PccLoss:0.97894 | valid MseLoss:0.73890 | valid PccLoss:0.97632
end of epoch:  3 | time: 0.74s | train loss:0.77711 | valid loss:0.74380 | train MseLoss:0.75519 | train PccLoss:0.97437 | valid MseLoss:0.71893 | valid PccLoss:0.96763
end of epoch:  4 | time: 0.75s | train loss:0.75281 | valid loss:0.71468 | train MseLoss:0.72932 | train PccLoss:0.96429 | valid MseLoss:0.68809 | valid PccLoss:0.95404
end of epoch:  5 | time: 0