In [1]:
import sys 
sys.path.append("../../")
from dna import *

**Example code 1: Create a blunt-end DNA object by specifying its sequence** 

In [2]:
fragment = DNA(seq="CCGGTATGCGTCGA") 

**Example code 2: Create a sticky-end DNA object by specifying its structure**

In [3]:
fragment = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 

**Example code 3: Create an annotated DNA object from GenBank format file**

In [4]:
plasmid = DNA(record="pUC19.gbk")
print(plasmid)

<dna.DNA object; project='pUC19', length='2686 bp', topology='circular'>


**Example code 4: Print a double-strand DNA sequence with sticky ends**

In [5]:
fragment.getdnaseq(display=True)

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'


**Example code 5: Display all of the “primer_bind” features in a GenBank file**  

In [6]:
plasmid.printfeature(feature_type=["primer_bind"], seq=True)

feature ID  qualifier:label  feature type  start position  end position  strand  sequence                 
100         pBR322ori-F      primer_bind   117             137           +       GGGAAACGCCTGGTATCTTT     
200         L4440            primer_bind   370             388           +       AGCGAGTCAGTGAGCGAG       
600         M13/pUC Reverse  primer_bind   583             606           +       AGCGGATAACAATTTCACACAGG  
700         M13 rev          primer_bind   602             619           +       CAGGAAACAGCTATGAC        
800         M13 Reverse      primer_bind   602             619           +       CAGGAAACAGCTATGAC        
1100        M13 Forward      primer_bind   688             706           -       TGTAAAACGACGGCCAGT       
1200        M13 fwd          primer_bind   688             705           -       GTAAAACGACGGCCAGT        
1300        M13/pUC Forward  primer_bind   697             720           -       CCCAGTCACGACGTTGTAAAACG  
1400        pRS-marker       primer_b

**Example code 6: Search for a DNA sequence with a regular expression**

In [7]:
feature_list = plasmid.finddna(attribute="sequence",query="[ATGC]{20}[ATGC]GG")
for feature in feature_list:
    print(feature.location.start, feature.location.end, feature.location.strand, plasmid.getdnaseq(feature), sep="\t")

25	48	1	GAAAGCGCCACGCTTCCCGAAGG
55	78	1	GCGGACAGGTATCCGGTAAGCGG
83	106	1	TCGGAACAGGAGAGCGCACGAGG
107	130	1	AGCTTCCAGGGGGAAACGCCTGG
175	198	1	CGATTTTTGTGATGCTCGTCAGG
210	233	1	ATGGAAAAACGCCAGCAACGCGG
239	262	1	TACGGTTCCTGGCCTTTTGCTGG
285	308	1	TGCGTTATCCCCTGATTCTGTGG
366	389	1	GCGCAGCGAGTCAGTGAGCGAGG
413	436	1	AACCGCCTCTCCCCGCGCGTTGG
443	466	1	ATTAATGCAGCTGGCACGACAGG
507	530	1	TGTGAGTTAGCTCACTCATTAGG
537	560	1	GGCTTTACACTTTATGCTTCCGG
565	588	1	ATGTTGTGTGGAATTGTGAGCGG
627	650	1	CGCCAAGCTTGCATGCCTGCAGG
670	693	1	GGTACCGAGCTCGAATTCACTGG
696	719	1	TCGTTTTACAACGTCGTGACTGG
756	779	1	CACATCCCCCTTTCGCCAGCTGG
815	838	1	CAACAGTTGCGCAGCCTGAATGG
860	883	1	TTTCTCCTTACGCATCTGTGCGG
963	986	1	CACCCGCTGACGCGCCCTGACGG
1013	1036	1	AGACAAGCTGTGACCGTCTCCGG
1069	1092	1	ACCGAAACGCGCGAGACGAAAGG
1096	1119	1	CGTGATACGCCTATTTTTATAGG
1133	1156	1	ATAATGGTTTCTTAGACGTCAGG
1160	1183	1	ACTTTTCGGGGAAATGTGCGCGG
1254	1277	1	TGCTTCAATAATATTGAAAAAGG
1306	1329	1	CGCCCTTATTCCCTTTTTTGCGG
1345	1368	1	TTTTGCTCACCCAGAAACGCTGG
137

**Example code 7: Search CDS features**

In [8]:
feature_list = plasmid.finddna(query="CDS")
for feature in feature_list:
    print(feature)

type: CDS
location: [614:938](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: gene, Value: ['lacZ fragment']
    Key: label, Value: ['lacZ-alpha']
    Key: product, Value: ['LacZ-alpha fragment of beta-galactosidase']
    Key: translation, Value: ['MTMITPSLHACRSTLEDPRVPSSNSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEARTDRPSQQLRSLNGEWRLMRYFLLTHLCGISHRIWCTLSTICSDAA']

type: CDS
location: [1283:2144](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: gene, Value: ['bla']
    Key: label, Value: ['AmpR']
    Key: note, Value: ['confers resistance to ampicillin, carbenicillin, and related antibiotics']
    Key: product, Value: ['beta-lactamase']
    Key: translation, Value: ['MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW']



**Example code 8: Cut pUC19 plasmid at multiple positions 
Cut a circular 2,686-bp plasmid pUC19 with two positions to produce three linear fragments.**

In [9]:
fragment1, fragment2 = cutdna(plasmid ,1000, 2000)
print(fragment1)
print(fragment2)

<dna.DNA object; project='pUC19', length='1686 bp', topology='linear'>
<dna.DNA object; project='pUC19', length='1000 bp', topology='linear'>


**Example code 9: Crop a fragmented dna object in a specific region**
If you want to get only fragment2, please use ```cropdna``` function as follows.

In [10]:
fragment2 = cropdna(plasmid ,2000, 1000)
print(fragment2)

<dna.DNA object; project='pUC19', length='1686 bp', topology='linear'>


**Example code 10: Simurate EcoRI digestion for pUC19 plasmid**  
The recognition sequence where EcoRI cut is "G↓AATTC" and it is unique site in the pUC19 plasmid. First, search the EcoRI seqeunce from the pUC19 DNA sequence. Second, modify end sequence according to the cut end of EcoRI.

In [11]:
site = plasmid.finddna("GAATTC", attribute="sequence", strand=1)[0] 
linear_plasmid = cutdna(plasmid, (site.location.start+1,site.location.start+5))[0]
print(linear_plasmid)
linear_plasmid.getdnaseq(display=True, whole=False, end_length=10)

<dna.DNA object; project='pUC19', length='2690 bp', topology='linear'>
5' AATTCACTGG...AGCTCG---- 3'
3' ----GTGACC...TCGAGCTTAA 5'



**Example code 11: Trim single-stranded DNA on both ends to generate sticky ends**  
Sticky ends can be generated by trimming single-stranded DNA sequences when their end structures are given by top and bottom strand strings with "\*" and "-" separated by "/." The letters "-" show nucleotide letters being trimmed and “\*” shod nucleotide letters being remained. 

In [12]:
fragment = cropdna(plasmid, 100, 120)
fragment.getdnaseq(display=True)
print("\nTrimmed")
fragment = modifyends(fragment, "----/****", "**/--")
fragment.getdnaseq(display=True)

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'

Trimmed
5' ----GGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCC-- 5'


The following code also can execute same process with the above one.

In [13]:
fragment = cropdna(plasmid, (104,100), (120,118))
fragment.getdnaseq(display=True)

5' ----GGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCC-- 5'


A regex-like format can be used for the end stracture specification.

In [14]:
fragment = cropdna(plasmid, 100, 120)
fragment = modifyends(fragment, "-{5}/*{5}","*{5}/-{5}")
fragment.getdnaseq(display=True)

5' -----GGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGT----- 5'


If an invalid end sequence structure is given, error message will be returned. 

In [15]:
#Source code (continued from the previous example)#
fragment = cropdna(fragment, 100, 120)
fragment = modifyends(fragment,"******/****--","----**/******")  

TypeError: Please sepcify a proper sequence pattern for the 'left' argument

**Example code 12: Add sticky ends to a blunt end DNA object**  
If end sequence structures are given with nucleotide letters, they are added to the DNA ends. The same operation can be achieved by the join function described later.

In [16]:
fragment = cropdna(plasmid, 100, 120)
fragment = modifyends(fragment,"---ATGC/ATGTACG","TACG---/ATGCTAC")
fragment.getdnaseq(display=True)

5' ---ATGCACGAGGGAGCTTCCAGGGGGTACG--- 3'
3' ATGTACGTGCTCCCTCGAAGGTCCCCCATGCTAC 5'


**Example code 13:  Flip ampicillin resistant gene in pUC19 plasmid**

In [17]:
site = plasmid.finddna("AmpR")[0]
fragment1, fragment2 = cutdna(plasmid, site.location.start.position, site.location.end.position)
print(fragment1, fragment2)
fragment2   = flipdna(fragment2)
new_plasmid = joindna(fragment1, fragment2, topology="circular")
plasmid.printfeature(feature_type=["CDS"])
new_plasmid.printfeature(feature_type=["CDS"])

<dna.DNA object; project='pUC19', length='1825 bp', topology='linear'> <dna.DNA object; project='pUC19', length='861 bp', topology='linear'>
DNA.dna_dict['pUC19_144'] = joindna(*[DNA.dna_dict['pUC19_142'],DNA.dna_dict['pUC19_143']], topology='circular', project='pUC19')
feature ID  qualifier:label  feature type  start position  end position  strand  
900         lacZ-alpha       CDS           614             938           +       
1800        AmpR             CDS           1283            2144          +       

feature ID  qualifier:label  feature type  start position  end position  strand  
1400        lacZ-alpha       CDS           1156            1480          +       
2400        AmpR             CDS           1825            2686          -       



**Example code 14: Convert feature type from ‘CDS’ to ‘gene’**

In [18]:
plasmid.printfeature(feature_type=["CDS"])
new_plasmid = editdna(plasmid, key_attribute="feature type", query="CDS", target_attribute="feature type", operation=replacedna("CDS", "gene"), destructive=False)
new_plasmid.printfeature(feature_type=["gene"])

feature ID  qualifier:label  feature type  start position  end position  strand  
900         lacZ-alpha       CDS           614             938           +       
1800        AmpR             CDS           1283            2144          +       

feature ID  qualifier:label  feature type  start position  end position  strand  
900         lacZ-alpha       gene          614             938           +       
1800        AmpR             gene          1283            2144          +       



**Example code 15: Break start codon of the CDS features**

In [19]:
print("lacZ-alpha:", plasmid.getdnaseq(plasmid.finddna("900","feature_ID")[0])[0:10])
print("AmpR:", plasmid.getdnaseq(plasmid.finddna("1800","feature_ID")[0])[0:10])
new_plasmid = editdna(plasmid, key_attribute="feature type", query="CDS", target_attribute="sequence:!0..3!", operation=replacedna("ATG", "GTG"), destructive=False)
print("lacZ-alpha:", new_plasmid.getdnaseq(plasmid.finddna("900","feature_ID")[0])[0:10])
print("AmpR:", new_plasmid.getdnaseq(plasmid.finddna("1800","feature_ID")[0])[0:10])
print(new_plasmid.finddna("900", attribute="feature_ID")[0])
print(new_plasmid.finddna("1800", attribute="feature_ID")[0])

lacZ-alpha: ATGACCATGA
AmpR: ATGAGTATTC
lacZ-alpha: GTGACCATGA
AmpR: GTGAGTATTC
type: CDS
location: [614:938](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: editing_history, Value: [pUC19:lacZ-alpha:614..938]:0..3:ATG2GTG:324
    Key: gene, Value: ['lacZ fragment']
    Key: label, Value: ['lacZ-alpha']
    Key: product, Value: ['LacZ-alpha fragment of beta-galactosidase']
    Key: translation, Value: ['MTMITPSLHACRSTLEDPRVPSSNSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEARTDRPSQQLRSLNGEWRLMRYFLLTHLCGISHRIWCTLSTICSDAA']

type: CDS
location: [1283:2144](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: editing_history, Value: [pUC19:AmpR:1283..2144]:0..3:ATG2GTG:861
    Key: gene, Value: ['bla']
    Key: label, Value: ['AmpR']
    Key: note, Value: ['confers resistance to ampicillin, carbenicillin, and related antibiotics']
    Key: product, Value: ['beta-lactamase']
    Key: translation, Value: ['MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVL

**Example code 15: Create new features where SpCas9 can bind.** 

In [20]:
new_plasmid = editdna(plasmid, key_attribute="sequence", query="[ATGC]{20}[ATGC]GG", target_attribute="feature ID", operation=createdna("spCas9_target"), destructive=False)
new_plasmid = editdna(new_plasmid, key_attribute="feature ID", query="spCas9_target_[0-9]+", target_attribute="feature type", operation=replacedna("misc_bind"), destructive=False)
new_plasmid.printfeature(feature_type=["misc_bind"])

feature ID         qualifier:label  feature type  start position  end position  strand  
spCas9_target_125  N.A.             misc_bind     6               29            -       
spCas9_target_124  N.A.             misc_bind     41              64            -       
spCas9_target_1    N.A.             misc_bind     55              78            +       
spCas9_target_123  N.A.             misc_bind     67              90            -       
spCas9_target_2    N.A.             misc_bind     83              106           +       
spCas9_target_3    N.A.             misc_bind     107             130           +       
spCas9_target_122  N.A.             misc_bind     125             148           -       
spCas9_target_121  N.A.             misc_bind     159             182           -       
spCas9_target_4    N.A.             misc_bind     175             198           +       
spCas9_target_120  N.A.             misc_bind     207             230           -       
spCas9_target_5    N.