## Installing Gen3 SDK client and import modules

In [None]:
!pip install gen3

In [2]:
from gen3.file import Gen3File
from gen3.query import Gen3Query
from gen3.auth import Gen3Auth
from gen3.submission import Gen3Submission
from gen3.index import Gen3Index

## Setting up data common access

In [3]:
endpoint = "https://caninedc.org/"
auth = Gen3Auth(endpoint, refresh_file = "/home/jovyan/.gen3/credentials.json")
sub = Gen3Submission(endpoint, auth)
file = Gen3File(endpoint, auth)

## Sample code to pull programs and projects

In [4]:
sub.get_programs()

{'links': ['/v0/submission/Canine']}

If the above returns {'links': []}, skip the next cell.

In [5]:
sub.get_projects("Canine")

{'links': ['/v0/submission/Canine/Korean_DongGyeongi',
  '/v0/submission/Canine/Osteosarcoma',
  '/v0/submission/Canine/Cornell_GWAS',
  '/v0/submission/Canine/Mizzou_Comparative_Resequencing',
  '/v0/submission/Canine/Glioma',
  '/v0/submission/Canine/Bladder_cancer',
  '/v0/submission/Canine/melanoma',
  '/v0/submission/Canine/B_cell_lymphoma',
  '/v0/submission/Canine/Non-Hodgkin_lymphoma',
  '/v0/submission/Canine/NHGRI',
  '/v0/submission/Canine/PMed_trial']}

## Use gen3 sdk to download a sample fastq file (guid provided)

In [None]:
!gen3 drs-pull object dg.C78ne/4527012c-3a5f-481d-820c-da7b77a26b48

In [None]:
!gunzip SRR7012463_1.fastq.gz

## Use Bioinfokit to read the fastq file, get information such as sequence, base count etc.

In [None]:
!pip install bioinfokit

In [None]:
from bioinfokit.analys import fastq
records = fastq.fastq_reader(file='SRR7012463_1.fastq')
# read fastq file
for record in records:
    _, sequence, _, quality = record     # process sequence, headers and related info
    base_count = {}
    base_count['A'] = sequence.count('A')
    base_count['C'] = sequence.count('C')
    base_count['G'] = sequence.count('G')
    base_count['T'] = sequence.count('T')
    print(sequence)
    print(quality) 
    print(base_count)
    #print(base_count)

Above code snippet prints out basic information from FASTQ file, such as sequence, quality and base counts. For detailed fastq file format information, https://support.illumina.com/bulletins/2016/04/fastq-files-explained.html