remove code for lesson interactivity

widdowquinn · Mar 6, 2018 · 766a5cf · 766a5cf
1 parent 95c5f99
commit 766a5cf
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 42 deletions.
diff --git a/01-introduction/01-introduction.ipynb b/01-introduction/01-introduction.ipynb
@@ -147,21 +147,17 @@
     "# Test after a hash (#) is a comment in Python\n",
     "\n",
     "# Loads the sequence input/output code from Biopython\n",
-    "from Bio import SeqIO\n",
     "\n",
-    "# This is a relative path, compared to this notebook the FASTA file\n",
-    "# is under the sub-directory data:\n",
-    "filename = \"data/glycoside_hydrolases_nt.fasta\"\n",
+    "# Define a relative path. Compared to this notebook the FASTA file\n",
+    "# is under the sub-directory data (\"data/glycoside_hydrolases_nt.fasta\"):\n",
     "\n",
     "# Using Biopython's SeqIO.parse(...) function with two arguments,\n",
-    "# the input filename and the file format, here \"fasta\" \n",
-    "for record in SeqIO.parse(filename, \"fasta\"):\n",
-    "    # Python for loops use indentation, traditionally four spaces\n",
-    "    # These percentage signs are a common way for inserting values\n",
-    "    # into strings, %s for another string, %i for an integer number:\n",
-    "    print(\"%s length %i\" % (record.id, len(record.seq)))\n",
-    "\n",
-    "print(\"Done\")"
+    "# the input filename and the file format, here \"fasta\", loop over\n",
+    "# all the sequence records in the file\n",
+    "# HINT:\n",
+    "# Python for loops use indentation, traditionally four spaces\n",
+    "# Percentage signs are a common way for inserting values\n",
+    "# into strings, %s for another string, %i for an integer number:"
    ]
   },
   {

diff --git a/01-introduction/02-annotation.ipynb b/01-introduction/02-annotation.ipynb
@@ -103,24 +103,17 @@
     "# a key point here is if you run this you'll all get the same file.    #\n",
     "########################################################################\n",
     "\n",
-    "# Biopython's module to access the NCBI Entrez Programming Utilities\n",
-    "from Bio import Entrez\n",
+    "# ImportBiopython's module to access the NCBI Entrez Programming Utilities\n",
     "\n",
     "# The NCBI likes to know who is using their services in case of problems,\n",
-    "Entrez.email = \"your.name.here@example.org\"\n",
+    "# so define the email address\n",
     "\n",
-    "accession = \"NC_004547\"\n",
+    "# Create a variable with the accession number\n",
     "\n",
-    "print(\"Fetching %s from NCBI...\" % accession)\n",
+    "# Fetch a handle using the return type \"gbwithparts\" to match\n",
+    "# \"GenBank (full)\" on the website\n",
     "\n",
-    "# Return type \"gbwithparts\" matches \"GenBank (full)\" on the website\n",
-    "fetch_handle = Entrez.efetch(\"nuccore\", id=accession, rettype=\"gbwithparts\", retmode=\"text\")\n",
-    "\n",
-    "# Open an output file, and write all the data from the NCBI to it\n",
-    "with open(accession + \".gbk\", \"w\") as output_handle:\n",
-    "    output_handle.write(fetch_handle.read())\n",
-    "\n",
-    "print(\"Saved %s.gbk\" % accession)"
+    "# Open an output file, and write all the data from the NCBI to it"
    ]
   },
   {

diff --git a/01-introduction/03-parsing.ipynb b/01-introduction/03-parsing.ipynb
@@ -64,24 +64,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Biopython's SeqIO module handles sequence input/output\n",
-    "from Bio import SeqIO\n",
+    "# Import Biopython's SeqIO module to handle sequence input/output\n",
     "\n",
+    "# We provide this function because it's a pain to type out\n",
     "def get_cds_feature_with_qualifier_value(seq_record, name, value):\n",
     "    \"\"\"Function to look for CDS feature by annotation value in sequence record.\n",
     "    \n",
-    "    e.g. You can use this for finding features by locus tag, gene ID, or protein ID.\n",
+    "    e.g. You can use this for finding features by locus tag, gene ID,\n",
+    "    or protein ID.\n",
     "    \"\"\"\n",
     "    # Loop over the features\n",
     "    for feature in genome_record.features:\n",
-    "        if feature.type == \"CDS\" and value in feature.qualifiers.get(name, []):\n",
+    "        if feature.type == \"CDS\" and value in \\\n",
+    "                feature.qualifiers.get(name, []):\n",
     "            return feature\n",
     "    # Could not find it\n",
     "    return None\n",
     "\n",
+    "# Read the downloaded file with SeqIO.read()\n",
     "genome_record = SeqIO.read(\"NC_004547.gbk\", \"genbank\")\n",
-    "cds_feature = get_cds_feature_with_qualifier_value(genome_record, \"old_locus_tag\", \"ECA0662\")\n",
-    "print(cds_feature)"
+    "\n",
+    "# Get the feature with old_locus_tag ECA0662 and print the result\n",
+    "# (use variable name cds_feature)\n"
    ]
   },
   {
@@ -138,7 +142,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(cds_feature.location)"
+    "# Print the location string for cds_feature"
    ]
   },
   {
@@ -154,11 +158,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gene_sequence = cds_feature.extract(genome_record.seq)\n",
-    "print(\"CDS nucleotide sequence:\")\n",
-    "print(gene_sequence)\n",
-    "print(\"Start codon is %s\" % gene_sequence[:3])  # Python's way to get first three letters\n",
-    "print(\"Stop codon is %s\" % gene_sequence[-3:])  # Python trick for last three letters"
+    "# Extract the CDS feature sequence to the variable gene_sequence\n",
+    "\n",
+    "# Print information on the gene sequence start/stop codons"
    ]
   },
   {
@@ -184,9 +186,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "protein_sequence = gene_sequence.translate(table=11, cds=True)\n",
-    "print(\"Translated into amino acids:\")\n",
-    "print(protein_sequence)"
+    "# Translate the gene_sequence to protein and print the sequence"
    ]
   },
   {
@@ -204,7 +204,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(protein_sequence == cds_feature.qualifiers[\"translation\"][0])"
+    "# Check that our translation is the same as the annotation"
    ]
   },
   {
@@ -225,7 +225,6 @@
    "source": [
     "# This assumed you've already loaded the GenBank file as genome_record,\n",
     "# and have the function get_cds_feature_with_qualifier_value defined.\n",
-    "\n",
     "old_tags = [\"ECA0662\", \"ECA1451\", \"ECA1871\", \"ECA2166\",\n",
     "            \"ECA3646\", \"ECA4387\", \"ECA4407\", \"ECA4432\"]\n",
     "\n",
@@ -256,6 +255,7 @@
     "# Biopython's SeqIO module handles sequence input/output\n",
     "from Bio import SeqIO\n",
     "\n",
+    "# \n",
     "def get_cds_feature_with_qualifier_value(seq_record, name, value):\n",
     "    \"\"\"Function to look for CDS feature by annotation value in sequence record.\n",
     "    \n",