Skip to content

Commit

Permalink
remove code for lesson interactivity
Browse files Browse the repository at this point in the history
  • Loading branch information
widdowquinn committed Mar 6, 2018
1 parent 95c5f99 commit 766a5cf
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 42 deletions.
20 changes: 8 additions & 12 deletions 01-introduction/01-introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -147,21 +147,17 @@
"# Test after a hash (#) is a comment in Python\n",
"\n",
"# Loads the sequence input/output code from Biopython\n",
"from Bio import SeqIO\n",
"\n",
"# This is a relative path, compared to this notebook the FASTA file\n",
"# is under the sub-directory data:\n",
"filename = \"data/glycoside_hydrolases_nt.fasta\"\n",
"# Define a relative path. Compared to this notebook the FASTA file\n",
"# is under the sub-directory data (\"data/glycoside_hydrolases_nt.fasta\"):\n",
"\n",
"# Using Biopython's SeqIO.parse(...) function with two arguments,\n",
"# the input filename and the file format, here \"fasta\" \n",
"for record in SeqIO.parse(filename, \"fasta\"):\n",
" # Python for loops use indentation, traditionally four spaces\n",
" # These percentage signs are a common way for inserting values\n",
" # into strings, %s for another string, %i for an integer number:\n",
" print(\"%s length %i\" % (record.id, len(record.seq)))\n",
"\n",
"print(\"Done\")"
"# the input filename and the file format, here \"fasta\", loop over\n",
"# all the sequence records in the file\n",
"# HINT:\n",
"# Python for loops use indentation, traditionally four spaces\n",
"# Percentage signs are a common way for inserting values\n",
"# into strings, %s for another string, %i for an integer number:"
]
},
{
Expand Down
19 changes: 6 additions & 13 deletions 01-introduction/02-annotation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,17 @@
"# a key point here is if you run this you'll all get the same file. #\n",
"########################################################################\n",
"\n",
"# Biopython's module to access the NCBI Entrez Programming Utilities\n",
"from Bio import Entrez\n",
"# ImportBiopython's module to access the NCBI Entrez Programming Utilities\n",
"\n",
"# The NCBI likes to know who is using their services in case of problems,\n",
"Entrez.email = \"your.name.here@example.org\"\n",
"# so define the email address\n",
"\n",
"accession = \"NC_004547\"\n",
"# Create a variable with the accession number\n",
"\n",
"print(\"Fetching %s from NCBI...\" % accession)\n",
"# Fetch a handle using the return type \"gbwithparts\" to match\n",
"# \"GenBank (full)\" on the website\n",
"\n",
"# Return type \"gbwithparts\" matches \"GenBank (full)\" on the website\n",
"fetch_handle = Entrez.efetch(\"nuccore\", id=accession, rettype=\"gbwithparts\", retmode=\"text\")\n",
"\n",
"# Open an output file, and write all the data from the NCBI to it\n",
"with open(accession + \".gbk\", \"w\") as output_handle:\n",
" output_handle.write(fetch_handle.read())\n",
"\n",
"print(\"Saved %s.gbk\" % accession)"
"# Open an output file, and write all the data from the NCBI to it"
]
},
{
Expand Down
34 changes: 17 additions & 17 deletions 01-introduction/03-parsing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -64,24 +64,28 @@
"metadata": {},
"outputs": [],
"source": [
"# Biopython's SeqIO module handles sequence input/output\n",
"from Bio import SeqIO\n",
"# Import Biopython's SeqIO module to handle sequence input/output\n",
"\n",
"# We provide this function because it's a pain to type out\n",
"def get_cds_feature_with_qualifier_value(seq_record, name, value):\n",
" \"\"\"Function to look for CDS feature by annotation value in sequence record.\n",
" \n",
" e.g. You can use this for finding features by locus tag, gene ID, or protein ID.\n",
" e.g. You can use this for finding features by locus tag, gene ID,\n",
" or protein ID.\n",
" \"\"\"\n",
" # Loop over the features\n",
" for feature in genome_record.features:\n",
" if feature.type == \"CDS\" and value in feature.qualifiers.get(name, []):\n",
" if feature.type == \"CDS\" and value in \\\n",
" feature.qualifiers.get(name, []):\n",
" return feature\n",
" # Could not find it\n",
" return None\n",
"\n",
"# Read the downloaded file with SeqIO.read()\n",
"genome_record = SeqIO.read(\"NC_004547.gbk\", \"genbank\")\n",
"cds_feature = get_cds_feature_with_qualifier_value(genome_record, \"old_locus_tag\", \"ECA0662\")\n",
"print(cds_feature)"
"\n",
"# Get the feature with old_locus_tag ECA0662 and print the result\n",
"# (use variable name cds_feature)\n"
]
},
{
Expand Down Expand Up @@ -138,7 +142,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(cds_feature.location)"
"# Print the location string for cds_feature"
]
},
{
Expand All @@ -154,11 +158,9 @@
"metadata": {},
"outputs": [],
"source": [
"gene_sequence = cds_feature.extract(genome_record.seq)\n",
"print(\"CDS nucleotide sequence:\")\n",
"print(gene_sequence)\n",
"print(\"Start codon is %s\" % gene_sequence[:3]) # Python's way to get first three letters\n",
"print(\"Stop codon is %s\" % gene_sequence[-3:]) # Python trick for last three letters"
"# Extract the CDS feature sequence to the variable gene_sequence\n",
"\n",
"# Print information on the gene sequence start/stop codons"
]
},
{
Expand All @@ -184,9 +186,7 @@
"metadata": {},
"outputs": [],
"source": [
"protein_sequence = gene_sequence.translate(table=11, cds=True)\n",
"print(\"Translated into amino acids:\")\n",
"print(protein_sequence)"
"# Translate the gene_sequence to protein and print the sequence"
]
},
{
Expand All @@ -204,7 +204,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(protein_sequence == cds_feature.qualifiers[\"translation\"][0])"
"# Check that our translation is the same as the annotation"
]
},
{
Expand All @@ -225,7 +225,6 @@
"source": [
"# This assumed you've already loaded the GenBank file as genome_record,\n",
"# and have the function get_cds_feature_with_qualifier_value defined.\n",
"\n",
"old_tags = [\"ECA0662\", \"ECA1451\", \"ECA1871\", \"ECA2166\",\n",
" \"ECA3646\", \"ECA4387\", \"ECA4407\", \"ECA4432\"]\n",
"\n",
Expand Down Expand Up @@ -256,6 +255,7 @@
"# Biopython's SeqIO module handles sequence input/output\n",
"from Bio import SeqIO\n",
"\n",
"# \n",
"def get_cds_feature_with_qualifier_value(seq_record, name, value):\n",
" \"\"\"Function to look for CDS feature by annotation value in sequence record.\n",
" \n",
Expand Down

0 comments on commit 766a5cf

Please sign in to comment.