# Running Oktoberfest

## 1- Import necessary python packages

In [1]:
import os
from oktoberfest.runner import run_job
import json
import urllib.request
import shutil
from tqdm import tqdm
%load_ext autoreload
%autoreload 2

## 2- Download files from zenodo required to run different tasks

### A- Get the current directory and set the file name

In [2]:
download_dir = os.getcwd()
download_file = os.path.join(download_dir, 'Oktoberfest_input.zip')
url = 'https://zenodo.org/record/7613029/files/Oktoberfest_input.zip'

download = False  # set this to false if you already have the file and don't want to download again in the next step

### B- Download and extract files from zenodo to the same directory

In [3]:
if download:
    with tqdm(unit="B", total=2739196307, unit_scale=True, unit_divisor=1024, miniters=1, desc=url.split("/")[-1]) as t:
        urllib.request.urlretrieve(url=url, filename=download_file, reporthook=lambda blocks, block_size, _: t.update(blocks * block_size - t.n))
    shutil.unpack_archive(download_file, download_dir)

### C- Check downloaded files

In [4]:
input_dir = download_file[:-4]
print(f'Downloaded data is stored in {input_dir}\nContents:')
os.listdir(input_dir)

Downloaded data is stored in /home/karim/projects/oktoberfest/tutorials/Oktoberfest_input
Contents:


['config_files',
 'GN20170722_SK_HLA_G0103_R1_02.raw',
 'msms.txt',
 'GN20170722_SK_HLA_G0103_R2_01.raw',
 'GN20170722_SK_HLA_G0103_R2_02.raw',
 'GN20170722_SK_HLA_G0103_R1_01.raw',
 'peptides_spectral_library.csv']

## 3- Running Different Tasks

### A- Spectral Library Generation

#### Generate config file

In [5]:
task_config_spectral_lib = {
    "type": "SpectralLibraryGeneration",
    "tag": "",
    "inputs": {
            "search_results": input_dir + "/msms.txt",
            "search_results_type": "Sage",
            "library_input": input_dir + "/peptides_spectral_library.csv",
            "library_input_type": "peptides"
        },
    "output": "./out",
    "models":{
        "intensity": "Prosit_2020_intensity_HCD",
        "irt": "Prosit_2019_irt"
    },
    "outputFormat": "spectronaut",
    "prediction_server": "koina.proteomicsdb.org:443",
    "ssl": True,
}

#### Save config as json

In [6]:
with open('./spectral_library_config.json', 'w') as fp:
    json.dump(task_config_spectral_lib, fp)

#### Run spectral library generation job

In [7]:
run_job("./spectral_library_config.json")

2023-10-26 16:23:17,705 - INFO - oktoberfest.utils.config::read Reading configuration from ./spectral_library_config.json
2023-10-26 16:23:17,706 - INFO - oktoberfest.runner::run_job Oktoberfest version 0.5.0
Copyright 2023, Wilhelmlab at Technical University of Munich
2023-10-26 16:23:17,707 - INFO - oktoberfest.runner::run_job Job executed with the following config:
2023-10-26 16:23:17,707 - INFO - oktoberfest.runner::run_job {
    "type": "SpectralLibraryGeneration",
    "tag": "",
    "inputs": {
        "search_results": "/home/karim/projects/oktoberfest/tutorials/Oktoberfest_input/msms.txt",
        "search_results_type": "Sage",
        "library_input": "/home/karim/projects/oktoberfest/tutorials/Oktoberfest_input/peptides_spectral_library.csv",
        "library_input_type": "peptides"
    },
    "output": "./out",
    "models": {
        "intensity": "Prosit_2020_intensity_HCD",
        "irt": "Prosit_2019_irt"
    },
    "outputFormat": "spectronaut",
    "prediction_server": 

Inferring predictions for 5 spectra with batch site 1000:   0%|          | 0/1 [00:00<?, ?it/s]

Inferring predictions for 5 spectra with batch site 1000:   0%|          | 0/1 [00:00<?, ?it/s]

### B- CE Calibration

#### Generate config file

In [13]:
task_config_ce_calibration = {
    "type": "CollisionEnergyCalibration",
    "tag": "",
    "inputs":{
        "search_results": input_dir + "/msms.txt",
        "search_results_type": "Sage",
        "spectra": input_dir,
        "spectra_type": "raw"
    },
    "output": "./out",
    "models": {
        "intensity": "Prosit_2020_intensity_HCD",
        "irt": "Prosit_2019_irt"
    },
    "prediction_server": "koina.proteomicsdb.org:443",
    "ssl": True,
    "thermoExe": "/home/karim/projects/essentials/therm/ThermoRawFileParser.exe",
    "massTolerance": 20,
    "unitMassTolerance": "ppm",
    "numThreads": 1
}

#### Save config as json

In [14]:
with open('./ce_calibration_config.json', 'w') as fp:
    json.dump(task_config_ce_calibration, fp)

#### Run ce calibration job

In [16]:
run_job("./ce_calibration_config.json")

2023-10-26 16:49:15,954 - INFO - oktoberfest.utils.config::read Reading configuration from ./ce_calibration_config.json
2023-10-26 16:49:15,956 - INFO - oktoberfest.runner::run_job Oktoberfest version 0.5.0
Copyright 2023, Wilhelmlab at Technical University of Munich
2023-10-26 16:49:15,956 - INFO - oktoberfest.runner::run_job Job executed with the following config:
2023-10-26 16:49:15,957 - INFO - oktoberfest.runner::run_job {
    "type": "CollisionEnergyCalibration",
    "tag": "",
    "inputs": {
        "search_results": "/home/karim/projects/oktoberfest/tutorials/Oktoberfest_input/msms.txt",
        "search_results_type": "Sage",
        "spectra": "/home/karim/projects/oktoberfest/tutorials/Oktoberfest_input",
        "spectra_type": "raw"
    },
    "output": "./out",
    "models": {
        "intensity": "Prosit_2020_intensity_HCD",
        "irt": "Prosit_2019_irt"
    },
    "prediction_server": "koina.proteomicsdb.org:443",
    "ssl": true,
    "thermoExe": "/home/karim/projec

Waiting for tasks to complete:   0%|          | 0/4 [00:00<?, ?it/s]

2023-10-26 16:49:37,224 - INFO - oktoberfest.preprocessing.preprocessing::merge_spectra_and_peptides Merging rawfile and search result
2023-10-26 16:49:37,256 - INFO - oktoberfest.preprocessing.preprocessing::merge_spectra_and_peptides There are 26718 matched identifications
2023-10-26 16:49:37,262 - INFO - oktoberfest.preprocessing.preprocessing::annotate_spectral_library Annotating spectra...
2023-10-26 14:49:37,434 - INFO - spectrum_fundamentals.fragments::_get_modifications Error Modification [+57.0214] not found
2023-10-26 16:49:37,436 - ERROR - oktoberfest.utils.multiprocessing_pool::check_pool Caught Unknown exception, terminating workers


2023-10-26 16:49:37,436 - ERROR - oktoberfest.utils.multiprocessing_pool::check_pool Caught Unknown exception, terminating workers


2023-10-26 16:49:37,437 - ERROR - oktoberfest.utils.multiprocessing_pool::check_pool multiprocessing.pool.RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/karim/miniconda3/envs/oktoberfest/lib/python3.10/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/home/karim/projects/oktoberfest/oktoberfest/runner.py", line 211, in _ce_calib
    library = _annotate_and_get_library(spectra_file, config)
  File "/home/karim/projects/oktoberfest/oktoberfest/runner.py", line 82, in _annotate_and_get_library
    pp.annotate_spectral_library(library, mass_tol=config.mass_tolerance, unit_mass_tol=config.unit_mass_tolerance)
  File "/home/karim/projects/oktoberfest/oktoberfest/preprocessing/preprocessing.py", line 371, in annotate_spectral_library
    df_annotated_spectra = annotate_spectra(psms.spectra_data, mass_tol, unit_mass_tol)
  File "/home/karim/projects/spectrum_fundamentals/spectrum_fundamentals/annotation/annotation.py", line 

2023-10-26 16:49:37,437 - ERROR - oktoberfest.utils.multiprocessing_pool::check_pool multiprocessing.pool.RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/karim/miniconda3/envs/oktoberfest/lib/python3.10/multiprocessing/pool.py", line 125, in worker
    result = (True, func(*args, **kwds))
  File "/home/karim/projects/oktoberfest/oktoberfest/runner.py", line 211, in _ce_calib
    library = _annotate_and_get_library(spectra_file, config)
  File "/home/karim/projects/oktoberfest/oktoberfest/runner.py", line 82, in _annotate_and_get_library
    pp.annotate_spectral_library(library, mass_tol=config.mass_tolerance, unit_mass_tol=config.unit_mass_tolerance)
  File "/home/karim/projects/oktoberfest/oktoberfest/preprocessing/preprocessing.py", line 371, in annotate_spectral_library
    df_annotated_spectra = annotate_spectra(psms.spectra_data, mass_tol, unit_mass_tol)
  File "/home/karim/projects/spectrum_fundamentals/spectrum_fundamentals/annotation/annotation.py", line 

2023-10-26 16:49:37,439 - ERROR - oktoberfest.utils.multiprocessing_pool::check_pool Modification not found.


2023-10-26 16:49:37,439 - ERROR - oktoberfest.utils.multiprocessing_pool::check_pool Modification not found.


AttributeError: 'tuple' object has no attribute 'tb_frame'

### C- Rescoring

#### Generate config file

In [None]:
task_config_rescoring = {
    "type": "Rescoring",
    "tag": "",
    "inputs":{
        "search_results": input_dir + "/msms.txt",
        "search_results_type": "Sage",
        "spectra": input_dir,
        "spectra_type": "raw"
    },
    "output": "./out",
    "models": {
        "intensity": "Prosit_2020_intensity_HCD",
        "irt": "Prosit_2019_irt"
    },
    "prediction_server": "koina.proteomicsdb.org:443",
    "ssl": True,
    "thermoExe": "ThermoRawFileParser.exe",
    "numThreads": 4,
    "fdr_estimation_method": "percolator",
    "regressionMethod": "spline",
    "allFeatures": False,
    "massTolerance": 20,
    "unitMassTolerance": "ppm"
}

#### Save config as json

In [None]:
with open('./rescoring_config.json', 'w') as fp:
    json.dump(task_config_rescoring, fp)

#### Run rescoring job

In [None]:
run_job("rescoring_config.json")