In [1]:
import sys 
sys.path.append("../../")
from dnaquine import *

**Example code 1: Create a blunt-end DNA object by specifying its sequence** 

In [2]:
fragment = DNA(seq="CCGGTATGCGTCGA") 

**Example code 2: Create a sticky-end DNA object by specifying its structure**

In [3]:
fragment = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
fragment.getdnaseq(display=True)

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'


('CCGGTATGCG----', 'TCGACGCATA----')

**Example code 3: Create an annotated DNA object from GenBank format file**

In [4]:
plasmid = DNA(record="input/pGGA.gb")
print(plasmid)

<dna.DNA object; project='pGGA', length='2174 bp', topology='circular'>


**Example code 4: Print a double-strand DNA sequence with sticky ends**

In [5]:
from dnaquine import *
fragment = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
fragment.getdnaseq(display=True)

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'


('CCGGTATGCG----', 'TCGACGCATA----')

**Example code 5: Print DNAfeatures with formatted table**

In [6]:
plasmid.printfeature()

feature_id  feature_type  qualifier:label                  start  end   strand  
0           source        null                             0      2174  +       
1100        source        pGGA                             0      2174  +       
100         primer_bind   Forward (CW) Analysis Primer     233    260   +       
200         promoter      SP6 promoter                     253    272   +       
300         misc_feature  upstream MCS                     282    304   +       
400         misc_feature  BsaI insert                      314    359   +       
500         misc_feature  downstream MCS                   372    406   +       
600         primer_bind   Cloning Analysis Reverse Primer  410    436   -       
700         promoter      T7 promoter                      417    436   -       
800         CDS           CmR                              546    1206  -       
900         promoter      cat promoter                     1206   1309  -       
1000        rep_origin    or

**Example code 6: Search DNA sequences with a regular expression**

In [7]:
feature_list = plasmid.finddna(key_attribute="sequence",query="[ATGC]{20}[ATGC]GG")
plasmid.printfeature(feature_list, seq=True, attribute=["start", "end", "strand"])

start  end   strand  sequence                 
88     111   +       ACGACTCAACAGCTTAACGTTGG  
137    160   +       ACTCTCACTCTTACCGAACTTGG  
200    223   +       AAACGAATCGACCGATTGTTAGG  
237    260   +       AGGAAGGTTTAAACGCATTTAGG  
267    290   +       ATAGAAGTGTGTATCGCTCGAGG  
293    316   +       CCGAATTCGAAGACTTGGTACGG  
327    350   +       TTTCCAGATCTGATAACTTGTGG  
376    399   +       TTAAGACGTCAGAATTCTCGAGG  
422    445   +       AGTGAGTCGTATTAATTTCGCGG  
490    513   +       ATCCGCTCATGAGTAGCACCAGG  
587    610   +       CATTAAGCATTCTGCCGACATGG  
628    651   +       TGATGAACCTGAATCGCCAGCGG  
669    692   +       TTGCGTATAATATTTGCCCATGG  
702    725   +       GGCGAAGAAGTTGTCCATATTGG  
738    761   +       AAAACTGGTGAAACTCACCCAGG  
781    804   +       ATATTCTCAATAAACCCTTTAGG  
845    868   +       AATATATGTGTAGAAACTGCCGG  
899    922   +       AAAACGTTTCAGTTTGCTCATGG  
965    988   +       CACCGTCTTTCATTGCCATACGG  
989    1012  +       ATTCCGGATGAGCATTCATCAGG  
1015   1038  

In [8]:
feature_list = plasmid.finddna(query="SWSWSWDSDSBHBRHH", key_attribute="sequence")
plasmid.printfeature(feature_list, seq=True)

feature_id  feature_type  qualifier:label  start  end   strand  sequence          
null        misc_feature  null             90     106   +       GACTCAACAGCTTAAC  
null        misc_feature  null             1932   1948  -       GTCAGAGGTGGCGAAA  



**Example code 7: Search CDS features**

In [9]:
feature_list = plasmid.finddna(query="CDS")
for feature in feature_list:
    print(feature)

type: CDS
location: [546:1206](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: gene, Value: ['cat']
    Key: label, Value: ['CmR']
    Key: note, Value: ['confers resistance to chloramphenicol']
    Key: product, Value: ['chloramphenicol acetyltransferase']
    Key: translation, Value: ['MEKKITGYTTVDISQWHRKEHFEAFQSVAQCTYNQTVQLDITAFLKTVKKNKHKFYPAFIHILARLMNAHPEFRMAMKDGELVIWDSVHPCYTVFHEQTETFSSLWSEYHDDFRQFLHIYSQDVACYGENLAYFPKGFIENMFFVSANPWVSFTSFDLNVANMDNFFAPVFTMGKYYTQGDKVLMPLAIQVHHAVCDGFHVGRMLNELQQYCDEWQGGA']



**Example code 8: Cut pGGA plasmid at multiple positions**

In [10]:
fragment1, fragment2 = cutdna(plasmid ,1000, 2000)
print(fragment1)
print(fragment2)
fragment3, fragment4 = cutdna(fragment2, 500)
print(fragment3)
print(fragment4)

<dna.DNA object; project='pGGA', length='1000 bp', topology='linear'>
<dna.DNA object; project='pGGA', length='1174 bp', topology='linear'>
<dna.DNA object; project='pGGA', length='500 bp', topology='linear'>
<dna.DNA object; project='pGGA', length='674 bp', topology='linear'>


**Example code 9: Crop a fragmented dna object in a specific region**  
If you want to get only fragment2, please use ```cropdna``` function as follows.

In [11]:
fragment2 = cropdna(plasmid ,2000, 1000)
print(fragment2)

<dna.DNA object; project='pGGA', length='1174 bp', topology='linear'>


**Example code 10: : Digest pGGA plasmid by EcoRI**  
The recognition sequence where EcoRI cut is "5′-G^AATT_C-3′". There are three. EcoRI site in the pGGA plasmid. First, search the EcoRI sequence from the pGGA plasmid. Then, cut the EcoRI site according to its cutting format.  

In [12]:
sites = plasmid.finddna("G^AATT_C", key_attribute="sequence")
fragments = cutdna(plasmid, *sites)
for fragment in fragments:
    print(fragment)
    fragment.getdnaseq(display=True, hide_middle=10)

<dna.DNA object; project='pGGA', length='96 bp', topology='linear'>
5' AATTCGAAGA...CGTCAG---- 3'
3' ----GCTTCT...GCAGTCTTAA 5'

<dna.DNA object; project='pGGA', length='604 bp', topology='linear'>
5' AATTCTCGAG...ATACGG---- 3'
3' ----GAGCTC...TATGCCTTAA 5'

<dna.DNA object; project='pGGA', length='1486 bp', topology='linear'>
5' AATTCCGGAT...GATCCG---- 3'
3' ----GGCCTA...CTAGGCTTAA 5'



**Example code 11: Digest pGGA plasmid by BsaI**  
The recognition sequence where BsaI cut is "5′-GGTCTC(1/5)-3′". There are two BsaI in the pGGA plasmid. First, search the BsaI sequence from the pGGA plasmid. Then, cut the BsaI site according to its cutting format.  
https://international.neb.com/tools-and-resources/selection-charts/enzymes-with-nonpalindromic-sequences

In [13]:
sites = plasmid.finddna("GGTCTCN^NNNN_", key_attribute="sequence")
plasmid.printfeature(sites, attribute=["start","end","strand","sequence"])
fragments = cutdna(plasmid,*sites)
for fragment in fragments:
    print(fragment)
    fragment.getdnaseq(display=True, hide_middle=10)
    
sites = plasmid.finddna("GGTCTC(1/5)", key_attribute="sequence")
plasmid.printfeature(sites, attribute=["start","end","strand","sequence"])
fragments = cutdna(plasmid,*sites)
for fragment in fragments:
    print(fragment)
    fragment.getdnaseq(display=True, hide_middle=10)

start  end  strand  sequence     
348    359  +       GGTCTCACCAT  
314    325  -       GGTCTCGCTCC  

<dna.DNA object; project='pGGA', length='2137 bp', topology='linear'>
5' CCATTCCTGT...TGGTAC---- 3'
3' ----AGGACA...ACCATGCCTC 5'

<dna.DNA object; project='pGGA', length='45 bp', sequence='GGAGCGAGACCGCTTTCCAGATCTGATAACTTGTGGTCTCACCAT', topology='linear'>
5' GGAGCGAGAC...GTCTCA---- 3'
3' ----GCTCTG...CAGAGTGGTA 5'

start  end  strand  sequence     
348    359  +       GGTCTCACCAT  
314    325  -       GGTCTCGCTCC  

<dna.DNA object; project='pGGA', length='2137 bp', topology='linear'>
5' CCATTCCTGT...TGGTAC---- 3'
3' ----AGGACA...ACCATGCCTC 5'

<dna.DNA object; project='pGGA', length='45 bp', sequence='GGAGCGAGACCGCTTTCCAGATCTGATAACTTGTGGTCTCACCAT', topology='linear'>
5' GGAGCGAGAC...GTCTCA---- 3'
3' ----GCTCTG...CAGAGTGGTA 5'



**Example code 12: Trim single-stranded DNA on both ends to generate sticky ends**  
Sticky ends can be generated by trimming single-stranded DNA sequences when their end structures are given by top and bottom strand strings with "\*" and "-" separated by "/." The letters "-" show nucleotide letters being trimmed and “\*” shod nucleotide letters being remained. 

In [14]:
fragment = cropdna(plasmid, 100, 120)
fragment.getdnaseq(display=True)
fragment = modifyends(fragment, "----/****", "**/--")
fragment.getdnaseq(display=True)

5' CTTAACGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAACGGTGC 5'
5' ----ACGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAACGGT-- 5'


('----ACGTTGGCTTGCCACG', '--TGGCAAGCCAACGTTAAG')

The following code also can execute same process with the above one.

In [15]:
fragment = cropdna(plasmid,'104/100', '120/118')
fragment.getdnaseq(display=True)

5' ----ACGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAACGGT-- 5'


('----ACGTTGGCTTGCCACG', '--TGGCAAGCCAACGTTAAG')

A regex-like format can be used for the end stracture specification.

In [16]:
fragment = cropdna(plasmid, 100, 120)
fragment = modifyends(fragment, "-{5}/*{5}","*{5}/-{5}")
fragment.getdnaseq(display=True)

5' -----CGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAAC----- 5'


('-----CGTTGGCTTGCCACG', '-----CAAGCCAACGTTAAG')

**Example code 13: Add sticky ends to a blunt end DNA object**  
If end sequence structures are given with nucleotide letters, they are added to the DNA ends. The same operation can be achieved by the join function described later.

In [17]:
fragment = cropdna(plasmid, 100, 120)
fragment = modifyends(fragment,"---ATGC/ATGTACG","TACG---/ATGCTAC")
fragment.getdnaseq(display=True)

5' ---ATGCCTTAACGTTGGCTTGCCACGTACG--- 3'
3' ATGTACGGAATTGCAACCGAACGGTGCATGCTAC 5'


('---ATGCCTTAACGTTGGCTTGCCACGTACG---', 'CATCGTACGTGGCAAGCCAACGTTAAGGCATGTA')

**Example code 14:  Flip chloramphenicol resistant gene in pGGA plasmid**

In [18]:
site = plasmid.finddna("CmR")[0]
fragment1, fragment2 = cutdna(plasmid, site.start, site.end)
fragment1   = flipdna(fragment1)
new_plasmid = joindna(fragment1, fragment2, topology="circular")
plasmid.printfeature(plasmid.finddna("CmR"))
new_plasmid.printfeature(new_plasmid.finddna("CmR"))

feature_id  feature_type  qualifier:label  start  end   strand  
800         CDS           CmR              546    1206  -       

feature_id  feature_type  qualifier:label  start  end  strand  
100         CDS           CmR              0      660  +       



**Example code 16: Break start codon of the CDS features**

In [19]:
print("CmR:", plasmid.getdnaseq(plasmid.finddna("CDS")[0])[0:20])
new_plasmid = editsequence(plasmid, key_attribute="feature_type", query="CDS", 
                           target_range=(0,3), old_value="ATG", new_value="GTG")
print("CmR:", new_plasmid.getdnaseq(plasmid.finddna("CmR")[0])[0:20])

CmR: ATGGAGAAAAAAATCACTGG
CmR: GTGGAGAAAAAAATCACTGG


**Example code 16: Convert feature type from ‘CDS’ to ‘gene’**

In [20]:
plasmid.printfeature(plasmid.finddna("CDS"))
new_plasmid = editfeature(plasmid, key_attribute="feature type", query="CDS", target_attribute="feature type", operation=replaceattribute("gene"))
new_plasmid.printfeature(new_plasmid.finddna("gene"))

feature_id  feature_type  qualifier:label  start  end   strand  
800         CDS           CmR              546    1206  -       

feature_id  feature_type  qualifier:label  start  end   strand  
800         gene          CmR              546    1206  -       



**Example code 17: Create new features where SpCas9 can bind.** 

In [21]:
new_plasmid = editfeature(plasmid, key_attribute="sequence", query="[ATGC]{20}[ATGC]GG", target_attribute="feature id", operation=createattribute("spCas9_target*"))
new_plasmid = editfeature(new_plasmid, key_attribute="feature id", query="spCas9_target[0-9]+", target_attribute="feature type", operation=replaceattribute("misc_bind"))

In [22]:
features = new_plasmid.finddna("misc_bind")

In [23]:
new_plasmid.printfeature(features, attribute=["feature_id", "feature_type", "start", "end", "strand", "sequence"])

feature_id        feature_type  start  end   strand  sequence                 
spCas9_target1    misc_bind     137    160   +       ACTCTCACTCTTACCGAACTTGG  
spCas9_target2    misc_bind     200    223   +       AAACGAATCGACCGATTGTTAGG  
spCas9_target3    misc_bind     237    260   +       AGGAAGGTTTAAACGCATTTAGG  
spCas9_target4    misc_bind     267    290   +       ATAGAAGTGTGTATCGCTCGAGG  
spCas9_target5    misc_bind     293    316   +       CCGAATTCGAAGACTTGGTACGG  
spCas9_target6    misc_bind     327    350   +       TTTCCAGATCTGATAACTTGTGG  
spCas9_target7    misc_bind     376    399   +       TTAAGACGTCAGAATTCTCGAGG  
spCas9_target8    misc_bind     422    445   +       AGTGAGTCGTATTAATTTCGCGG  
spCas9_target9    misc_bind     490    513   +       ATCCGCTCATGAGTAGCACCAGG  
spCas9_target10   misc_bind     587    610   +       CATTAAGCATTCTGCCGACATGG  
spCas9_target11   misc_bind     628    651   +       TGATGAACCTGAATCGCCAGCGG  
spCas9_target12   misc_bind     669    692   +      

In [24]:
new_plasmid = editfeature (plasmid, key_attribute="sequence:|300..400|", query=None, target_attribute="feature_id", operation=createattribute())
new_plasmid.printfeature()

feature_id  feature_type  qualifier:label                  start  end   strand  
0           source        null                             0      2174  +       
1100        source        pGGA                             0      2174  +       
100         primer_bind   Forward (CW) Analysis Primer     233    260   +       
200         promoter      SP6 promoter                     253    272   +       
300         misc_feature  upstream MCS                     282    304   +       
101         misc_feature  null                             300    400   +       
400         misc_feature  BsaI insert                      314    359   +       
500         misc_feature  downstream MCS                   372    406   +       
600         primer_bind   Cloning Analysis Reverse Primer  410    436   -       
700         promoter      T7 promoter                      417    436   -       
800         CDS           CmR                              546    1206  -       
900         promoter      ca