In [1]:
import sys 
sys.path.append("../../")
from dna import *

**Example code 1: Create a blunt-end DNA object by specifying its sequence** 

In [2]:
fragment = DNA(seq="CCGGTATGCGTCGA") 

**Example code 2: Create a sticky-end DNA object by specifying its structure**

In [3]:
fragment = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
fragment.getdnaseq(display=True)

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'


**Example code 3: Create an annotated DNA object from GenBank format file**

In [4]:
plasmid = DNA(record="input/pGGA.gb")
print(plasmid)

<dna.DNA object; project='pGGA', length='2174 bp', topology='circular'>


**Example code 4: Print a double-strand DNA sequence with sticky ends**

In [5]:
fragment.getdnaseq(display=True)

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'


**Example code 5: Search for a DNA sequence with a regular expression**

In [6]:
feature_list = plasmid.finddna(key_attribute="sequence",query="[ATGC]{20}[ATGC]GG")
for feature in feature_list:
    print(feature.start, feature.end, plasmid.getdnaseq(feature.start, feature.end, feature.strand), sep="\t")

88	111	ACGACTCAACAGCTTAACGTTGG
137	160	ACTCTCACTCTTACCGAACTTGG
200	223	AAACGAATCGACCGATTGTTAGG
237	260	AGGAAGGTTTAAACGCATTTAGG
267	290	ATAGAAGTGTGTATCGCTCGAGG
293	316	CCGAATTCGAAGACTTGGTACGG
327	350	TTTCCAGATCTGATAACTTGTGG
376	399	TTAAGACGTCAGAATTCTCGAGG
422	445	AGTGAGTCGTATTAATTTCGCGG
490	513	ATCCGCTCATGAGTAGCACCAGG
587	610	CATTAAGCATTCTGCCGACATGG
628	651	TGATGAACCTGAATCGCCAGCGG
669	692	TTGCGTATAATATTTGCCCATGG
702	725	GGCGAAGAAGTTGTCCATATTGG
738	761	AAAACTGGTGAAACTCACCCAGG
781	804	ATATTCTCAATAAACCCTTTAGG
845	868	AATATATGTGTAGAAACTGCCGG
899	922	AAAACGTTTCAGTTTGCTCATGG
965	988	CACCGTCTTTCATTGCCATACGG
989	1012	ATTCCGGATGAGCATTCATCAGG
1015	1038	GCAAGAATGTGAATAAAGGCCGG
1044	1067	CTTGTGCTTATTTTTCTTTACGG
1077	1100	GGCCGTAATATCCAGCTGAACGG
1142	1165	AATGTTCTTTACGATGCCATTGG
1234	1257	GATAACTCAAAAAATACGCCCGG
1266	1289	TATTTCATTATGGTGAAAGTTGG
1380	1403	GACCCCGTAGAAAAGATCAAAGG
1451	1474	AAAAAAACCACCGCTACCAGCGG
1496	1519	GCTACCAACTCTTTTTCCGAAGG
1557	1580	CTTCTAGTGTAGCCGTAGTTAGG
1622	1645	TCTGCTAATC

In [7]:
feature_list = plasmid.finddna(query="SWSWSWDSDSBHBRHH", key_attribute="sequence")
plasmid.printfeature(feature_list, seq=True)

feature_id  qualifier:label  feature_type  start  end   strand  sequence          
null        null             misc_feature  90     106   +       GACTCAACAGCTTAAC  
null        null             misc_feature  1932   1948  -       GTCAGAGGTGGCGAAA  



**Example code 6: Search CDS features**

In [8]:
feature_list = plasmid.finddna(query="CDS")
for feature in feature_list:
    print(feature)

type: CDS
location: [546:1206](-)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: gene, Value: ['cat']
    Key: label, Value: ['CmR']
    Key: note, Value: ['confers resistance to chloramphenicol']
    Key: product, Value: ['chloramphenicol acetyltransferase']
    Key: translation, Value: ['MEKKITGYTTVDISQWHRKEHFEAFQSVAQCTYNQTVQLDITAFLKTVKKNKHKFYPAFIHILARLMNAHPEFRMAMKDGELVIWDSVHPCYTVFHEQTETFSSLWSEYHDDFRQFLHIYSQDVACYGENLAYFPKGFIENMFFVSANPWVSFTSFDLNVANMDNFFAPVFTMGKYYTQGDKVLMPLAIQVHHAVCDGFHVGRMLNELQQYCDEWQGGA']



**Example code 7: Display all of the “primer_bind” features in a GenBank file**  

In [9]:
feature_list = plasmid.finddna("primer_bind")
plasmid.printfeature(feature_list)

feature_id  qualifier:label                  feature_type  start  end  strand  
100         Forward (CW) Analysis Primer     primer_bind   233    260  +       
600         Cloning Analysis Reverse Primer  primer_bind   410    436  -       



**Example code 8: Cut pUC19 plasmid at multiple positions 
Cut a circular 2,686-bp plasmid pUC19 with two positions to produce three linear fragments.**

In [10]:
fragment1, fragment2 = cutdna(plasmid ,1000, 2000)
print(fragment1)
print(fragment2)
fragment3, fragment4 = cutdna(fragment2, 500)
print(fragment3)
print(fragment4)

<dna.DNA object; project='pGGA', length='1000 bp', topology='linear'>
<dna.DNA object; project='pGGA', length='1174 bp', topology='linear'>
<dna.DNA object; project='pGGA', length='500 bp', topology='linear'>
<dna.DNA object; project='pGGA', length='674 bp', topology='linear'>


**Example code 9: Crop a fragmented dna object in a specific region**  
If you want to get only fragment2, please use ```cropdna``` function as follows.

In [11]:
fragment2 = cropdna(plasmid ,2000, 1000)
print(fragment2)

<dna.DNA object; project='pGGA', length='1174 bp', topology='linear'>


**Example code 10: Simurate EcoRI digestion for pGGGA plasmid**  
The recognition sequence where EcoRI cut is "5′-G/AATTC-3′". There are three. EcoRI site in the pGGA plasmid. First, search the EcoRI sequence from the pUC19 plasmid. Then, cut the EcoRI site according to its cutting format.  

In [12]:
sites = plasmid.finddna("G^AATT_C", key_attribute="sequence")
fragments = cutdna(plasmid, *sites)
for fragment in fragments:
    print(fragment)
    fragment.getdnaseq(display=True, whole=False, end_length=10)

<dna.DNA object; project='pGGA', length='96 bp', topology='linear'>
5' AATTCGAAGA...CGTCAG---- 3'
3' ----GCTTCT...GCAGTCTTAA 5'

<dna.DNA object; project='pGGA', length='604 bp', topology='linear'>
5' AATTCTCGAG...ATACGG---- 3'
3' ----GAGCTC...TATGCCTTAA 5'

<dna.DNA object; project='pGGA', length='1486 bp', topology='linear'>
5' AATTCCGGAT...GATCCG---- 3'
3' ----GGCCTA...CTAGGCTTAA 5'



**Example code 11: Digest pGGA plasmid by BsaI**  
The recognition sequence where BsaI cut is "5′-GGTCTC(N1)/(N5)-3′". There are two BsaI in the pGGA plasmid. First, search the BsaI sequence from the pUC19 plasmid. Then, cut the BsaI site according to its cutting format.  
https://international.neb.com/tools-and-resources/selection-charts/enzymes-with-nonpalindromic-sequences

In [13]:
sites = plasmid.finddna("GGTCTCN^NNNN_", key_attribute="sequence")
plasmid.printfeature(sites)
fragments = cutdna(plasmid,*sites)
for fragment in fragments:
    print(fragment)
    fragment.getdnaseq(display=True, whole=False, end_length=10)
    
sites = plasmid.finddna("GGTCTC(1/5)", key_attribute="sequence")
plasmid.printfeature(sites)
fragments = cutdna(plasmid,*sites)
for fragment in fragments:
    print(fragment)
    fragment.getdnaseq(display=True, whole=False, end_length=10)

feature_id  qualifier:label  feature_type  start  end  strand  
null        null             misc_feature  348    359  +       
null        null             misc_feature  314    325  -       

<dna.DNA object; project='pGGA', length='45 bp', sequence='GGAGCGAGACCGCTTTCCAGATCTGATAACTTGTGGTCTCACCAT', topology='linear'>
5' GGAGCGAGAC...GTCTCA---- 3'
3' ----GCTCTG...CAGAGTGGTA 5'

<dna.DNA object; project='pGGA', length='2137 bp', topology='linear'>
5' CCATTCCTGT...TGGTAC---- 3'
3' ----AGGACA...ACCATGCCTC 5'

feature_id  qualifier:label  feature_type  start  end  strand  
null        null             misc_feature  348    359  +       
null        null             misc_feature  314    325  -       

<dna.DNA object; project='pGGA', length='45 bp', sequence='GGAGCGAGACCGCTTTCCAGATCTGATAACTTGTGGTCTCACCAT', topology='linear'>
5' GGAGCGAGAC...GTCTCA---- 3'
3' ----GCTCTG...CAGAGTGGTA 5'

<dna.DNA object; project='pGGA', length='2137 bp', topology='linear'>
5' CCATTCCTGT...TGGTAC---- 3'
3' ----AG

**Example code 12: Trim single-stranded DNA on both ends to generate sticky ends**  
Sticky ends can be generated by trimming single-stranded DNA sequences when their end structures are given by top and bottom strand strings with "\*" and "-" separated by "/." The letters "-" show nucleotide letters being trimmed and “\*” shod nucleotide letters being remained. 

In [14]:
fragment = cropdna(plasmid, 100, 120)
fragment.getdnaseq(display=True)
fragment = modifyends(fragment, "----/****", "**/--")
fragment.getdnaseq(display=True)

5' CTTAACGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAACGGTGC 5'
5' ----ACGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAACGGT-- 5'


The following code also can execute same process with the above one.

In [15]:
fragment = cropdna(plasmid,'104/100', '120/118')
fragment.getdnaseq(display=True)

5' ----ACGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAACGGT-- 5'


A regex-like format can be used for the end stracture specification.

In [16]:
fragment = cropdna(plasmid, 100, 120)
fragment = modifyends(fragment, "-{5}/*{5}","*{5}/-{5}")
fragment.getdnaseq(display=True)

5' -----CGTTGGCTTGCCACG 3'
3' GAATTGCAACCGAAC----- 5'


If an invalid end sequence structure is given, error message will be returned. 

In [17]:
#Source code (continued from the previous example)#
fragment = cropdna(fragment, 100, 120)
fragment = modifyends(fragment,"******/****--","----**/******")  

IndexError: list index out of range

**Example code 13: Add sticky ends to a blunt end DNA object**  
If end sequence structures are given with nucleotide letters, they are added to the DNA ends. The same operation can be achieved by the join function described later.

In [18]:
fragment = cropdna(plasmid, 100, 120)
fragment = modifyends(fragment,"---ATGC/ATGTACG","TACG---/ATGCTAC")
fragment.getdnaseq(display=True)

5' ---ATGCCTTAACGTTGGCTTGCCACGTACG--- 3'
3' ATGTACGGAATTGCAACCGAACGGTGCATGCTAC 5'


**Example code 14:  Flip chloramphenicol resistant gene in pGGA plasmid**

In [19]:
site = plasmid.finddna("CmR")[0]
fragment1, fragment2 = cutdna(plasmid, site.location.start.position, site.location.end.position)
print(fragment1, fragment2)
fragment2   = flipdna(fragment2)
new_plasmid = joindna(fragment1, fragment2, topology="circular")
plasmid.printfeature(plasmid.finddna("CmR"))
new_plasmid.printfeature(new_plasmid.finddna("CmR"))

<dna.DNA object; project='pGGA', length='660 bp', topology='linear'> <dna.DNA object; project='pGGA', length='1514 bp', topology='linear'>
feature_id  qualifier:label  feature_type  start  end   strand  
800         CmR              CDS           546    1206  -       

feature_id  qualifier:label  feature_type  start  end  strand  
100         CmR              CDS           0      660  -       



**Example code 15: Convert feature type from ‘CDS’ to ‘gene’**

In [20]:
plasmid.printfeature(plasmid.finddna("CDS"))
new_plasmid = editdna(plasmid, key_attribute="feature type", query="CDS", target_attribute="feature type", operation=replaceattribute("gene"))
new_plasmid.printfeature(new_plasmid.finddna("gene"))

feature_id  qualifier:label  feature_type  start  end   strand  
800         CmR              CDS           546    1206  -       

feature_id  qualifier:label  feature_type  start  end   strand  
800         CmR              gene          546    1206  -       



**Example code 16: Break start codon of the CDS features**

In [21]:
print("CmR:", plasmid.getdnaseq(plasmid.finddna("CmR")[0])[0:10])
new_plasmid = editdna(plasmid, key_attribute="feature type", query="CDS", target_attribute="sequence:!0..3!", operation=replaceattribute("ATG", "GTG"))
print("CmR:", new_plasmid.getdnaseq(plasmid.finddna("CmR")[0])[0:10])

CmR: ATGGAGAAAA
CmR: GTGGAGAAAA


**Example code 17: Create new features where SpCas9 can bind.** 

In [22]:
new_plasmid = editdna(plasmid, key_attribute="sequence", query="[ATGC]{20}[ATGC]GG", target_attribute="feature id", operation=createattribute("spCas9_target*"))
new_plasmid = editdna(new_plasmid, key_attribute="feature id", query="spCas9_target[0-9]+", target_attribute="feature type", operation=replaceattribute("misc_bind"))

In [23]:
features = new_plasmid.finddna("misc_bind")

In [24]:
new_plasmid.printfeature(features, seq=True)

feature_id        qualifier:label  feature_type  start  end   strand  sequence                 
spCas9_target1    null             misc_bind     137    160   +       ACTCTCACTCTTACCGAACTTGG  
spCas9_target2    null             misc_bind     200    223   +       AAACGAATCGACCGATTGTTAGG  
spCas9_target3    null             misc_bind     237    260   +       AGGAAGGTTTAAACGCATTTAGG  
spCas9_target4    null             misc_bind     267    290   +       ATAGAAGTGTGTATCGCTCGAGG  
spCas9_target5    null             misc_bind     293    316   +       CCGAATTCGAAGACTTGGTACGG  
spCas9_target6    null             misc_bind     327    350   +       TTTCCAGATCTGATAACTTGTGG  
spCas9_target7    null             misc_bind     376    399   +       TTAAGACGTCAGAATTCTCGAGG  
spCas9_target8    null             misc_bind     422    445   +       AGTGAGTCGTATTAATTTCGCGG  
spCas9_target9    null             misc_bind     490    513   +       ATCCGCTCATGAGTAGCACCAGG  
spCas9_target10   null             misc_

In [25]:
new_plasmid = editdna(plasmid, key_attribute="sequence:|250..500|", query=None, target_attribute="feature_id", operation=createattribute())
new_plasmid.printfeature()
#new_plasmid = editdna(new_plasmid, key_attribute="feature_ID", query="1", target_attribute="position", operation=replaceattribute((100,200)))
#new_plasmid.printfeature()
#new_plasmid = editdna(new_plasmid, key_attribute="feature_ID", query="1", target_attribute="strand", operation=replaceattribute(-1))
#new_plasmid.printfeature()

feature_id  qualifier:label                  feature_type  start  end   strand  
0           null                             source        0      2174  +       
1100        pGGA                             source        0      2174  +       
1           null                             misc_feature  0      250   +       
100         Forward (CW) Analysis Primer     primer_bind   233    260   +       
200         SP6 promoter                     promoter      253    272   +       
300         upstream MCS                     misc_feature  282    304   +       
400         BsaI insert                      misc_feature  314    359   +       
500         downstream MCS                   misc_feature  372    406   +       
600         Cloning Analysis Reverse Primer  primer_bind   410    436   -       
700         T7 promoter                      promoter      417    436   -       
800         CmR                              CDS           546    1206  -       
900         cat promoter    