# Example Script for parsing an ISA JSON

## Import statements

In [1]:
import json
import os
from ena_upload.json_parsing.ena_submission import EnaSubmission
from dotenv import dotenv_values


## Reading a JSON file

In [2]:

# Read json file
isa_json_file = open(
    "tests/test_data/multi_study_multi_assay_stream_investigation_v3.json"
)
isa_json = json.load(isa_json_file)

## Setting some extra parameters

In [3]:


# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.
export_to_excel = False
outputfolder = "./output_folder/"

required_assays = [
    {"assay_stream": "Ena stream 1"},
    {"ena_study_title": "Extra study"}
    ]

## Parsing

In [4]:


submission = EnaSubmission.from_isa_json(isa_json, required_assays)
submission_dfs = submission.generate_dataframes()

## Output

In [5]:

if (not os.path.exists(outputfolder)) and export_to_excel:
    os.makedirs(outputfolder)

for k, df in submission_dfs.items():
    print(f"Dataframe {k}:")
    display(df)
    if export_to_excel:
        df.to_excel(f"{outputfolder}{k}.xlsx")

print("Done!")


Dataframe study:


Unnamed: 0,alias,title,study_type,study_abstract,new_study_type,pubmed_id
0,https://datahub.elixir-belgium.org/studies/27_28,Ena Study 1,Whole Genome Sequencing,This is Ena Study 1.,,56.0
1,https://datahub.elixir-belgium.org/studies/50_51,Extra study,Whole Genome Sequencing,blablabla,,


Dataframe sample:


Unnamed: 0,alias,title,sample_description,collection date,accession,submission date,status,geographic location (country and/or sea),taxon_id,Parameter Value 1,Parameter Value 2
0,https://datahub.elixir-belgium.org/samples/142,Sample title 1,Sample description 1,2023,,,,Afghanistan,1234,,
1,https://datahub.elixir-belgium.org/samples/143,Sample title 2,Sample description 2,2022,,,,Afghanistan,1234,,
2,https://datahub.elixir-belgium.org/samples/144,Sample title 3,Sample description 3,2021,,,,Albania,2345,,
3,https://datahub.elixir-belgium.org/samples/145,Sample title 4,Sample description 4,2020,,,,Albania,2345,,
4,https://datahub.elixir-belgium.org/samples/237,Sample title 1,This is sample 1,2001,,,,Antarctica,254564,A,C
5,https://datahub.elixir-belgium.org/samples/238,Sample title 2,This is sample 2,2002,,,,Antarctica,254564,A,C
6,https://datahub.elixir-belgium.org/samples/239,Sample title 3,This is sample 3,2003,,,,Antarctica,254564,B,C
7,https://datahub.elixir-belgium.org/samples/240,Sample title 4,This is sample 4,2004,,,,Antarctica,254564,A,D


Dataframe experiment:


Unnamed: 0,alias,study_alias,sample_alias,library_name,title,accession,submission date,status,library_construction_protocol,design_description,library_source,library_strategy,library_selection,library_layout,insert_size,platform,instrument_model
0,https://datahub.elixir-belgium.org/samples/146,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/142,Library 1,Library title 1,,,,My special protocol 1,Library description 1,GENOMIC,WGS,RANDOM,SINGLE,123,LS454,454 GS
1,https://datahub.elixir-belgium.org/samples/147,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/143,Library 2,Library title 2,,,,My special protocol 2,Library description 2,GENOMIC SINGLE CELL,WGA,PCR,PAIRED,234,Illumina,Illumina Genome Analyzer
2,https://datahub.elixir-belgium.org/samples/148,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/144,Library 3,Library title 3,,,,My special protocol 3,Library description 3,TRANSCRIPTOMIC,WXS,RANDOM PCR,SINGLE,345,PacBio,PacBio RS
3,https://datahub.elixir-belgium.org/samples/149,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/145,Library 4,Library title 4,,,,My special protocol 4,Library description 4,TRANSCRIPTOMIC SINGLE CELL,RNA-Seq,RT-PCR,PAIRED,456,Themo Fisher Scientific,AB 3730xL Genetic Analyzer
4,https://datahub.elixir-belgium.org/samples/241,https://datahub.elixir-belgium.org/studies/50_51,https://datahub.elixir-belgium.org/samples/237,Library 1,Library title 1,,,,My library construction protocol,,GENOMIC,WGS,PCR,PAIRED,1,LS454,454 GS
5,https://datahub.elixir-belgium.org/samples/242,https://datahub.elixir-belgium.org/studies/50_51,https://datahub.elixir-belgium.org/samples/238,Library 2,Library title 2,,,,My library construction protocol,,GENOMIC,WGS,PCR,SINGLE,1,LS454,454 GS
6,https://datahub.elixir-belgium.org/samples/243,https://datahub.elixir-belgium.org/studies/50_51,https://datahub.elixir-belgium.org/samples/239,Library 3,Library title 3,,,,My library construction protocol,,GENOMIC,WGS,PCR,SINGLE,1,LS454,454 GS
7,https://datahub.elixir-belgium.org/samples/244,https://datahub.elixir-belgium.org/studies/50_51,https://datahub.elixir-belgium.org/samples/240,Library 4,Library title 4,,,,My library construction protocol,,METAGENOMIC,WGS,PCR,SINGLE,1,Illumina,Illumina Genome Analyzer II
8,https://datahub.elixir-belgium.org/samples/245,https://datahub.elixir-belgium.org/studies/50_51,https://datahub.elixir-belgium.org/samples/240,Library 5,Library title 5,,,,My library construction protocol,,METAGENOMIC,WGS,PCR,SINGLE,1,Illumina,Illumina Genome Analyzer II


Dataframe run:


Unnamed: 0,alias,experiment_alias,file_name,file_type,file checksum,accession,submission date,status
0,https://datahub.elixir-belgium.org/samples/150,https://datahub.elixir-belgium.org/samples/146,data_file_1.bam,bam,,,,
1,https://datahub.elixir-belgium.org/samples/151,https://datahub.elixir-belgium.org/samples/147,data_file_2.cram,cram,,,,
2,https://datahub.elixir-belgium.org/samples/152,https://datahub.elixir-belgium.org/samples/148,data_file_3.fastq,fastq,,,,
3,https://datahub.elixir-belgium.org/samples/153,https://datahub.elixir-belgium.org/samples/149,data_file_4.sff,sff,,,,
4,https://datahub.elixir-belgium.org/samples/246...,https://datahub.elixir-belgium.org/samples/241,data_file_1_A.fastq,fastq,,,,
5,https://datahub.elixir-belgium.org/samples/246...,https://datahub.elixir-belgium.org/samples/241,data_file_1_B.fastq,fastq,,,,
6,https://datahub.elixir-belgium.org/samples/248,https://datahub.elixir-belgium.org/samples/242,data_file_3.fastq,fastq,,,,
7,https://datahub.elixir-belgium.org/samples/249,https://datahub.elixir-belgium.org/samples/243,data_file_4.fastq,fastq,,,,
8,https://datahub.elixir-belgium.org/samples/250,https://datahub.elixir-belgium.org/samples/244,data_file_5.fastq,fastq,,,,
9,https://datahub.elixir-belgium.org/samples/251,https://datahub.elixir-belgium.org/samples/245,data_file_6.fastq,fastq,,,,


Done!


# Automated pipeline for DataHub

In [None]:
config = dotenv_values(".env")
datahub_token = config["DATAHUB_API_TOKEN"]

In [None]:
import requests

url = "https://datahub-dev.elixir-belgium.org/single_pages/16/export_isa"
url = "http://localhost:3000/single_pages/2/export_isa"
data = {"key": "value"}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=data, headers=headers)
