This script predicts scores for ORF candidates in each experiment and saves the positive ones. This is the second and final step of the ORF score prediction.
Specifically,
(1) for each experiment, we load the feature set and predict the ORF scores with the model trained from script "run_modeling_main.ipynb"
(2) any ORFs with scores > 0.5 will be added to a set named "postive_orf_key"
(3) We go through the table "merged_orfs_found_by_any_caller.csv" and add rows appearing in "postive_orf_key" to a new table "{local_folder}/merged_orfs_positive.csv". In this way, our program won't consume tons of memory and the resulting table won't contain duplicate ORFs.

In [1]:
import os
import torch
import pandas as pd

import utils
import modeling_utils

In [2]:
local_folder= "./data"
model = torch.load(f"{local_folder}/best_model")
model.eval()

MLP(
  (fc1): Linear(in_features=27, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [3]:
postive_orf_key = set()
for experiment_name in open(f"{local_folder}/experiments.txt"):
    experiment_name = experiment_name.split("PRE")[1].strip()[:-1]
    features = []
    utils.download_feature_set(
        experiment_name=experiment_name,
        local_folder="./data/")
    
    # skip experiments that failed riboseq QC
    if not os.path.exists(f"{local_folder}/{experiment_name}_orf_features.csv"):
        continue
        
    utils.read_features_no_labels(
        data_path=f"{local_folder}/{experiment_name}_orf_features.csv",
        features = features)
    utils.remove_local_copy(experiment_name, local_folder)
    
    (data_x, data_y, _, _, _, _) = utils.get_dataset(features, validation_chroms={}, test_chroms={})
    y_pred_list = modeling_utils.predict(model, data_x)
    
    for orf_idx, (orf_key, _, _) in enumerate(features):
        if y_pred_list[orf_idx] >= 0.5:
            postive_orf_key.add(orf_key)

download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_MB1_1A/output/VPR_orfcalling_20240307222145_MB1_1A_orf_features.csv to data/VPR_orfcalling_20240307222145_MB1_1A_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_MB1_1B/output/VPR_orfcalling_20240307222145_MB1_1B_orf_features.csv to data/VPR_orfcalling_20240307222145_MB1_1B_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_MB1_2A/output/VPR_orfcalling_20240307222145_MB1_2A_orf_features.csv to data/VPR_orfcalling_20240307222145_MB1_2A_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_MB1_2B/output/VPR_orfcalling_20240307222145_MB1_2B_orf_features.csv to data/VPR_orfcalling_20240307222145_MB1_2B_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_YL12_M0_a/output/VPR_orfcalling_20240307222145_YL12_M0_a_orf_features.csv to data/VPR_orfcalling_20240307222145_YL12_M0_a_orf_features.csv
download: s3://velia-

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222145_YL12_M1_a/output/VPR_orfcalling_20240307222145_YL12_M1_a_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_YL12_M2_a/output/VPR_orfcalling_20240307222145_YL12_M2_a_orf_features.csv to data/VPR_orfcalling_20240307222145_YL12_M2_a_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222145_YL12_M2_b/output/VPR_orfcalling_20240307222145_YL12_M2_b_orf_features.csv to data/VPR_orfcalling_20240307222145_YL12_M2_b_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222145_YL12_M2_c/output/VPR_orfcalling_20240307222145_YL12_M2_c_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222208_BL11_PBMCL1_0p5/output/VPR_orfcalling_20240307222208_BL11_PBMCL1_0p5_orf_features.csv to data/VPR_orfcalling_20240307222208_BL11_PBMCL1_0p5_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222208_BL11_PBMCL1_2/output/VPR_orfcalling_20240307222208_BL11_PBMCL1_2_orf_features.csv to data/VPR_orfcalling_20240307222208_BL11_PBMCL1_2_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222208_BL11_PBMCL2_0p5/output/VPR_orfcalling_20240307222208_BL11_PBMCL2_0p5_orf_features.csv to data/VPR_orfcalling_20240307222208_BL11_PBMCL2_0p5_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222208_BL11_PBMCL2_2/output/VPR_orfcalling_20240307222208_BL11_PBMCL2_2_orf_features.csv to data/VPR_orfcalling_20240307222208_BL11_PBMCL2_2_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222208_BL11_PBMCU1_0p5/output/VPR_orfcalling_20240307222208_BL11_PBMCU1

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222241_HCC1954_RPF_1/output/VPR_orfcalling_20240307222241_HCC1954_RPF_1_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HCC1954_rep2/output/VPR_orfcalling_20240307222241_HCC1954_rep2_orf_features.csv to data/VPR_orfcalling_20240307222241_HCC1954_rep2_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HCC1954_rep3/output/VPR_orfcalling_20240307222241_HCC1954_rep3_orf_features.csv to data/VPR_orfcalling_20240307222241_HCC1954_rep3_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HEK293T-DMSO-rep1_SRR8449569/output/VPR_orfcalling_20240307222241_HEK293T-DMSO-rep1_SRR8449569_orf_features.csv to data/VPR_orfcalling_20240307222241_HEK293T-DMSO-rep1_SRR8449569_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HEK293T-DMSO-rep2_SRR8449570/output/VPR_orfcalling_20240307222241_HEK293T-DMSO-rep2_SRR8449570_orf_features.csv to data/VPR_orfcalling_20240307222241_HEK293T-DMSO-rep2_SRR8449570_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_2024030

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222241_HEK293T-LoRes_SRR8449566/output/VPR_orfcalling_20240307222241_HEK293T-LoRes_SRR8449566_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HEK293T-MedRes_SRR8449567/output/VPR_orfcalling_20240307222241_HEK293T-MedRes_SRR8449567_orf_features.csv to data/VPR_orfcalling_20240307222241_HEK293T-MedRes_SRR8449567_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HEK293T-TG-rep1_SRR8449573/output/VPR_orfcalling_20240307222241_HEK293T-TG-rep1_SRR8449573_orf_features.csv to data/VPR_orfcalling_20240307222241_HEK293T-TG-rep1_SRR8449573_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HEK293T-TG-rep2_SRR8449574/output/VPR_orfcalling_20240307222241_HEK293T-TG-rep2_SRR8449574_orf_features.csv to data/VPR_orfcalling_20240307222241_HEK293T-TG-rep2_SRR8449574_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HEK293T-TM-rep1_SRR8449571/output/VPR_orfcalling_20240307222241_HEK293T-TM-rep1_SRR8449571_orf_features.csv to data/VPR_orfcalling_20240307222241_HEK293T-TM-rep1_SRR8449571_orf_

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222241_HEK293T-TM-rep2_SRR8449572/output/VPR_orfcalling_20240307222241_HEK293T-TM-rep2_SRR8449572_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HeLaS3-HiRes-rep1_SRR8449577/output/VPR_orfcalling_20240307222241_HeLaS3-HiRes-rep1_SRR8449577_orf_features.csv to data/VPR_orfcalling_20240307222241_HeLaS3-HiRes-rep1_SRR8449577_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HeLaS3-HiRes-rep2_SRR8449578/output/VPR_orfcalling_20240307222241_HeLaS3-HiRes-rep2_SRR8449578_orf_features.csv to data/VPR_orfcalling_20240307222241_HeLaS3-HiRes-rep2_SRR8449578_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222241_HeLaS3-LoRes-rep1_SRR8449575/output/VPR_orfcalling_20240307222241_HeLaS3-LoRes-rep1_SRR8449575_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HeLaS3-LoRes-rep2_SRR8449576/output/VPR_orfcalling_20240307222241_HeLaS3-LoRes-rep2_SRR8449576_orf_features.csv to data/VPR_orfcalling_20240307222241_HeLaS3-LoRes-rep2_SRR8449576_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HepG2_chol-oleic_rep1/output/VPR_orfcalling_20240307222241_HepG2_chol-oleic_rep1_orf_features.csv to data/VPR_orfcalling_20240307222241_HepG2_chol-oleic_rep1_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HepG2_chol-oleic_rep2/output/VPR_orfcalling_20240307222241_HepG2_chol-oleic_rep2_orf_features.csv to data/VPR_orfcalling_20240307222241_HepG2_chol-oleic_rep2_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_HepG2_control_rep1/output/VPR_orfcalling_20240307222241_HepG2_control_rep1_orf_features.csv to data/VPR_orfcalling_20240307222241_HepG2_control_rep1_orf_features.csv
download: s3://velia-piperuns-de

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222241_MCF-7_RPF_2/output/VPR_orfcalling_20240307222241_MCF-7_RPF_2_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MCF-7_RPF_3/output/VPR_orfcalling_20240307222241_MCF-7_RPF_3_orf_features.csv to data/VPR_orfcalling_20240307222241_MCF-7_RPF_3_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MCF-7_RPF_4/output/VPR_orfcalling_20240307222241_MCF-7_RPF_4_orf_features.csv to data/VPR_orfcalling_20240307222241_MCF-7_RPF_4_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MCF-7_RPF_5/output/VPR_orfcalling_20240307222241_MCF-7_RPF_5_orf_features.csv to data/VPR_orfcalling_20240307222241_MCF-7_RPF_5_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MCF-7_rep6/output/VPR_orfcalling_20240307222241_MCF-7_rep6_orf_features.csv to data/VPR_orfcalling_20240307222241_MCF-7_rep6_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MCF-7_rep7/output/VPR_orfcalling_20240307222241_MCF-7_rep7_orf_features.csv to data/VPR_orfcalling_2024

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240307222241_MDA-MB-231_RPF_1/output/VPR_orfcalling_20240307222241_MDA-MB-231_RPF_1_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MDA-MB-231_rep2/output/VPR_orfcalling_20240307222241_MDA-MB-231_rep2_orf_features.csv to data/VPR_orfcalling_20240307222241_MDA-MB-231_rep2_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_MDA-MB-231_rep3/output/VPR_orfcalling_20240307222241_MDA-MB-231_rep3_orf_features.csv to data/VPR_orfcalling_20240307222241_MDA-MB-231_rep3_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_cardiomyocyte-rep1_SRR9113067/output/VPR_orfcalling_20240307222241_cardiomyocyte-rep1_SRR9113067_orf_features.csv to data/VPR_orfcalling_20240307222241_cardiomyocyte-rep1_SRR9113067_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240307222241_cardiomyocyte-rep2_SRR9113068/output/VPR_orfcalling_20240307222241_cardiomyocyte-rep2_SRR9113068_orf_features.csv to data/VPR_orfcalling_20240307222241_cardiomyocyte-rep2_SRR9113068_orf_features.csv
download: s3://velia-piperuns-de

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX1059898/output/VPR_orfcalling_20240308012528_SRX1059898_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX1059899/output/VPR_orfcalling_20240308012528_SRX1059899_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX1059900/output/VPR_orfcalling_20240308012528_SRX1059900_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX1059901/output/VPR_orfcalling_20240308012528_SRX1059901_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX1059902/output/VPR_orfcalling_20240308012528_SRX1059902_orf_features.csv" 

download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX10921846/output/VPR_orfcalling_20240308012528_SRX10921846_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX10921846_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX10921847/output/VPR_orfcalling_20240308012528_SRX10921847_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX10921847_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811942/output/VPR_orfcalling_20240308012528_SRX11811942_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811943/output/VPR_orfcalling_20240308012528_SRX11811943_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811944/output/VPR_orfcalling_20240308012528_SRX11811944_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811945/output/VPR_orfcalling_20240308012528_SRX11811945_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811946/output/VPR_orfcalling_20240308012528_SRX11811946_orf_feat

download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811959/output/VPR_orfcalling_20240308012528_SRX11811959_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811959_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811960/output/VPR_orfcalling_20240308012528_SRX11811960_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811960_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811961/output/VPR_orfcalling_20240308012528_SRX11811961_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811961_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811962/output/VPR_orfcalling_20240308012528_SRX11811962_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811962_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811963/output/VPR_orfcalling_20240308012528_SRX11811963_orf_features.csv to data/VPR_orfcalling

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811964/output/VPR_orfcalling_20240308012528_SRX11811964_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811965/output/VPR_orfcalling_20240308012528_SRX11811965_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811966/output/VPR_orfcalling_20240308012528_SRX11811966_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811967/output/VPR_orfcalling_20240308012528_SRX11811967_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811968/output/VPR_orfcalling_20240308012528_SRX11811968_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811968_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811969/output/VPR_orfcalling_20240308012528_SRX11811969_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811969_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811970/output/VPR_orfcalling_20240308012528_SRX11811970_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811970_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811971/output/VPR_orfcalling_20240308012528_SRX11811971_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811971_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811972/output/VPR_orfcalling_20240308012528_SRX11811972_orf_features.csv to data/VPR_orfcalling

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811974/output/VPR_orfcalling_20240308012528_SRX11811974_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811975/output/VPR_orfcalling_20240308012528_SRX11811975_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811975_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811976/output/VPR_orfcalling_20240308012528_SRX11811976_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811977/output/VPR_orfcalling_20240308012528_SRX11811977_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811977_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811978/output/VPR_orfcalling_20240308012528_SRX11811978_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811978_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811979/output/VPR_orfcalling_20240308012528_SRX11811979_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811979_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811980/output/VPR_orfcalling_20240308012528_SRX11811980_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11811980_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11811981/output/VPR_orfcalling_20240308012528_SRX11811981_orf_features.csv to data/VPR_orfcalling

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811985/output/VPR_orfcalling_20240308012528_SRX11811985_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811986/output/VPR_orfcalling_20240308012528_SRX11811986_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811987/output/VPR_orfcalling_20240308012528_SRX11811987_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811988/output/VPR_orfcalling_20240308012528_SRX11811988_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11811989/output/VPR_orfcalling_20240308012528_SRX11811989_orf_feat

download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX11812006/output/VPR_orfcalling_20240308012528_SRX11812006_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX11812006_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11812007/output/VPR_orfcalling_20240308012528_SRX11812007_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11812008/output/VPR_orfcalling_20240308012528_SRX11812008_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11812009/output/VPR_orfcalling_20240308012528_SRX11812009_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11812010/output/VPR_orfcalling_20240308012528_SRX11812010_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX11812011/output/VPR_orfcalling_20240308012528_SRX11812011_orf_feat

download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884275/output/VPR_orfcalling_20240308012528_SRX3884275_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884275_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884277/output/VPR_orfcalling_20240308012528_SRX3884277_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884277_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX3884280/output/VPR_orfcalling_20240308012528_SRX3884280_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884282/output/VPR_orfcalling_20240308012528_SRX3884282_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884282_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884284/output/VPR_orfcalling_20240308012528_SRX3884284_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884284_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX3884286/output/VPR_orfcalling_20240308012528_SRX3884286_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884288/output/VPR_orfcalling_20240308012528_SRX3884288_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884288_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884290/output/VPR_orfcalling_20240308012528_SRX3884290_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884290_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884292/output/VPR_orfcalling_20240308012528_SRX3884292_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884292_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884294/output/VPR_orfcalling_20240308012528_SRX3884294_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884294_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884296/output/VPR_orfcalling_20240308012528_SRX3884296_orf_features.csv to data/VPR_orfcalling_2024030801252

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX3884300/output/VPR_orfcalling_20240308012528_SRX3884300_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX3884302/output/VPR_orfcalling_20240308012528_SRX3884302_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884304/output/VPR_orfcalling_20240308012528_SRX3884304_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884304_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX3884306/output/VPR_orfcalling_20240308012528_SRX3884306_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884308/output/VPR_orfcalling_20240308012528_SRX3884308_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884308_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX3884310/output/VPR_orfcalling_20240308012528_SRX3884310_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884312/output/VPR_orfcalling_20240308012528_SRX3884312_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884312_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX3884314/output/VPR_orfcalling_20240308012528_SRX3884314_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX3884314_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX5766767/output/VPR_orfcalling_20240308012528_SRX5766767_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX5766767_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX5766768/output/VPR_orfcalling_20240308012528_SRX5766768_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX5766768_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX5766769/output/VPR_orfcalling_20240308012528_SRX5766769_orf_features.csv to data/VPR_orfcalling_2024030801252

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX5766771/output/VPR_orfcalling_20240308012528_SRX5766771_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX663295/output/VPR_orfcalling_20240308012528_SRX663295_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX663296/output/VPR_orfcalling_20240308012528_SRX663296_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX663297/output/VPR_orfcalling_20240308012528_SRX663297_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX663298/output/VPR_orfcalling_20240308012528_SRX663298_orf_features.csv" does not

download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190759/output/VPR_orfcalling_20240308012528_SRX8190759_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190759_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190760/output/VPR_orfcalling_20240308012528_SRX8190760_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190760_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190763/output/VPR_orfcalling_20240308012528_SRX8190763_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190763_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190764/output/VPR_orfcalling_20240308012528_SRX8190764_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190764_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012528_SRX8190767/output/VPR_orfcalling_20240308012528_SRX8190767_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190768/output/VPR_orfcalling_20240308012528_SRX8190768_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190768_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190771/output/VPR_orfcalling_20240308012528_SRX8190771_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190771_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012528_SRX8190772/output/VPR_orfcalling_20240308012528_SRX8190772_orf_features.csv to data/VPR_orfcalling_20240308012528_SRX8190772_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012804_SRX663267/output/VPR_orfcalling_20240308012804_SRX663267_orf_features.csv to data/VPR_orfcalling_20240308012804_SRX663267_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012804_SRX663268/output/VPR_orfcalling_20240308012804_SRX663268_orf_features.csv to data/VPR_orfcalling_20240308012804_SRX

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012804_SRX663273/output/VPR_orfcalling_20240308012804_SRX663273_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012804_SRX663274/output/VPR_orfcalling_20240308012804_SRX663274_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012804_SRX952421/output/VPR_orfcalling_20240308012804_SRX952421_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012804_SRX952422/output/VPR_orfcalling_20240308012804_SRX952422_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012804_SRX952425/output/VPR_orfcalling_20240308012804_SRX952425_orf_features.csv" does not e

download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_ERX3391949/output/VPR_orfcalling_20240308012940_ERX3391949_orf_features.csv to data/VPR_orfcalling_20240308012940_ERX3391949_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_ERX3391950/output/VPR_orfcalling_20240308012940_ERX3391950_orf_features.csv to data/VPR_orfcalling_20240308012940_ERX3391950_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX1254413/output/VPR_orfcalling_20240308012940_SRX1254413_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX1254413_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX1447296/output/VPR_orfcalling_20240308012940_SRX1447296_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX1447296_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX1447297/output/VPR_orfcalling_20240308012940_SRX1447297_orf_features.csv to data/VPR_orfcalling_2024030801294

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876050/output/VPR_orfcalling_20240308012940_SRX876050_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876051/output/VPR_orfcalling_20240308012940_SRX876051_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876052/output/VPR_orfcalling_20240308012940_SRX876052_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876053/output/VPR_orfcalling_20240308012940_SRX876053_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876054/output/VPR_orfcalling_20240308012940_SRX876054_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876054_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876055/output/VPR_orfcalling_20240308012940_SRX876055_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876055_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876056/output/VPR_orfcalling_20240308012940_SRX876056_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876056_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876057/output/VPR_orfcalling_20240308012940_SRX876057_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876057_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876058/output/VPR_orfcalling_20240308012940_SRX876058_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876058_or

fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876066/output/VPR_orfcalling_20240308012940_SRX876066_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876067/output/VPR_orfcalling_20240308012940_SRX876067_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876068/output/VPR_orfcalling_20240308012940_SRX876068_orf_features.csv" does not exist


download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876069/output/VPR_orfcalling_20240308012940_SRX876069_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876069_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876070/output/VPR_orfcalling_20240308012940_SRX876070_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876070_orf_features.csv
download: s3://velia-piperuns-dev/VPR_orfcalling_20240308012940_SRX876071/output/VPR_orfcalling_20240308012940_SRX876071_orf_features.csv to data/VPR_orfcalling_20240308012940_SRX876071_orf_features.csv


fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876072/output/VPR_orfcalling_20240308012940_SRX876072_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876073/output/VPR_orfcalling_20240308012940_SRX876073_orf_features.csv" does not exist
fatal error: An error occurred (404) when calling the HeadObject operation: Key "VPR_orfcalling_20240308012940_SRX876074/output/VPR_orfcalling_20240308012940_SRX876074_orf_features.csv" does not exist


In [4]:
len(postive_orf_key)

9695

In [5]:
def save_orfs(local_folder, postive_orf_key):    
    merged_orfs_info = {}
    for index, row in pd.read_csv(
        f"{local_folder}/merged_orfs_found_by_any_caller.csv",
        sep='\t',
        index_col=0).iterrows():
        orf_key = (row["chrom_id"], str(row["orf_start"]), str(row["orf_end"]), row["strand"], row["exon_blocks"])
        
        if orf_key in postive_orf_key:
            merged_orfs_info[orf_key] = row
    df = pd.DataFrame(merged_orfs_info.values()).reset_index(drop=True)
    df.to_csv(
        f"{local_folder}/merged_orfs_positive.csv",
        sep="\t",
        index=False)

In [6]:
save_orfs(local_folder, postive_orf_key)