In [1]:
import sys 
sys.path.append("../../")
from dna import *

In [2]:
#Example code1 : Create a blunt-end DNA object by specifying its sequence
brick = DNA(seq="CCGGTATGCGTCGA")
print(brick)

<dna.DNA object; project='dna', length='14 bp', sequence='CCGGTATGCGTCGA', topology='linear'>


In [3]:
#Example code 2: Create a sticky-end DNA object by specifying its structure 
brick = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
print(brick)

<dna.DNA object; project='dna', length='14 bp', sequence='CCGGTATGCGTCGA', topology='linear'>


In [4]:
#Example code 3: Create an annotated DNA object from GenBank format file 
brick = DNA(record="pUC19.gbk")
print(brick)

<dna.DNA object; project='pUC19', length='2686 bp', topology='circular'>


In [5]:
#Example code 4-1: Print a double-strand DNA sequence with sticky ends
brick = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
brick.getdnaseq(display=True)
brick.getdnaseq()

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'


('CCGGTATGCG----', 'TCGACGCATA----')

In [6]:
#Example code 4-1: Print a double-strand DNA sequence with sticky ends
brick = DNA(record="pUC19.gbk", project="pUC19")
#brick.printdnaseq(linebreak=70)

In [7]:
brick.printfeature()

feature ID  qualifier:label   feature type  start position  end position  strand  
0           N.A.              source        0               2686          +       
2100        pUC19             source        0               2686          +       
100         pBR322ori-F       primer_bind   117             137           +       
200         L4440             primer_bind   370             388           +       
300         CAP binding site  protein_bind  504             526           +       
400         lac promoter      promoter      540             571           +       
500         lac operator      protein_bind  578             595           +       
600         M13/pUC Reverse   primer_bind   583             606           +       
700         M13 rev           primer_bind   602             619           +       
800         M13 Reverse       primer_bind   602             619           +       
900         lacZ-alpha        CDS           614             938           +       
1000

In [8]:
brick.getdnaseq(brick.getdnafeature(100))

'GGGAAACGCCTGGTATCTTT'

In [9]:
brick.printfeature(feature_type=["primer_bind"], attribute=["feature ID","qualifier:note"], detail=False)

feature ID  qualifier:note                                              
100         pBR322 origin, forward primer                               
200         L4440 vector, forward primer                                
600         In lacZ gene                                                
700         common sequencing primer, one of multiple similar variants  
800         In lacZ gene. Also called M13-rev                           
1100        In lacZ gene. Also called M13-F20 or M13 (-21) Forward      
1200        common sequencing primer, one of multiple similar variants  
1300        In lacZ gene                                                
1400        pRS vectors, use to sequence yeast selectable marker        
1500        pGEX vectors, reverse primer                                
1600        pBR322 vectors, upsteam of EcoRI site, forward primer       
1900        Ampicillin resistance gene, reverse primer                  



In [10]:
brick.printfeature(feature_type=["primer_bind","CDS"], attribute=["$DEFAULT","qualifier:note"], detail=False)

feature ID  qualifier:label  feature type  start position  end position  strand  qualifier:note                                                            
100         pBR322ori-F      primer_bind   117             137           +       pBR322 origin, forward primer                                             
200         L4440            primer_bind   370             388           +       L4440 vector, forward primer                                              
600         M13/pUC Reverse  primer_bind   583             606           +       In lacZ gene                                                              
700         M13 rev          primer_bind   602             619           +       common sequencing primer, one of multiple similar variants                
800         M13 Reverse      primer_bind   602             619           +       In lacZ gene. Also called M13-rev                                         
900         lacZ-alpha       CDS           614             938  

In [11]:
brick.finddna("GGATCC")
print([b for b in brick.finddna("GGATCC")])

[]


In [12]:
print(brick.finddna("GGATCC", max_mismatch=1))

[]


In [13]:
feature_list = brick.finddna("[ATGC]{20}[ATGC]GG")
for feat in feature_list:
    print(b.location.start, b.location.end, b.location.strand, brick.getdnaseq(b), sep="\t") 

In [14]:
brick.finddna(query="CDS", attribute="feature type")
for feat in feature_list:
    print(b.location.start, b.location.end, b.location.strand, brick.getdnaseq(b), sep="\t")

In [15]:
for b in brick.finddna("[ATGC]CC[ATGC]{18}[ATGC]GG", attribute="sequence:|1000..1500|"):
    print(b) 

type: misc_feature
location: [1031:1055](+)
qualifiers:

type: misc_feature
location: [1069:1093](+)
qualifiers:

type: misc_feature
location: [1070:1094](-)
qualifiers:

type: misc_feature
location: [1032:1056](-)
qualifiers:



In [16]:
brick.printfeature(feature_type=["CDS"])
print("lacZ-alpha", cropdna(brick,614,938).seq[0:10])
print("AmpR", cropdna(brick,1283,2144).seq[0:10])
editdna(brick, key_attribute="feature type", query="CDS", target_attribute="sequence:!0..3!", operation=replacedna("ATG", "GTG"))
print("lacZ-alpha", cropdna(brick,614,938).seq[0:10])
print("AmpR", cropdna(brick,1283,2144).seq[0:10])

feature ID  qualifier:label  feature type  start position  end position  strand  
900         lacZ-alpha       CDS           614             938           +       
1800        AmpR             CDS           1283            2144          +       

lacZ-alpha ATGACCATGA
AmpR ATGAGTATTC
lacZ-alpha GTGACCATGA
AmpR GTGAGTATTC


In [17]:
brick.printfeature()
editdna(brick, key_attribute="feature type", query="primer_bind",
        target_attribute="feature type", operation=replacedna("primer_bind", "primer"))
brick.printfeature()

feature ID  qualifier:label   feature type  start position  end position  strand  
0           N.A.              source        0               2686          +       
2100        pUC19             source        0               2686          +       
100         pBR322ori-F       primer_bind   117             137           +       
200         L4440             primer_bind   370             388           +       
300         CAP binding site  protein_bind  504             526           +       
400         lac promoter      promoter      540             571           +       
500         lac operator      protein_bind  578             595           +       
600         M13/pUC Reverse   primer_bind   583             606           +       
700         M13 rev           primer_bind   602             619           +       
800         M13 Reverse       primer_bind   602             619           +       
900         lacZ-alpha        CDS           614             938           +       
1000

In [18]:
brick.printfeature()

feature ID  qualifier:label   feature type  start position  end position  strand  
0           N.A.              source        0               2686          +       
2100        pUC19             source        0               2686          +       
100         pBR322ori-F       primer        117             137           +       
200         L4440             primer        370             388           +       
300         CAP binding site  protein_bind  504             526           +       
400         lac promoter      promoter      540             571           +       
500         lac operator      protein_bind  578             595           +       
600         M13/pUC Reverse   primer        583             606           +       
700         M13 rev           primer        602             619           +       
800         M13 Reverse       primer        602             619           +       
900         lacZ-alpha        CDS           614             938           +       
1000

In [19]:
editdna(brick, key_attribute="feature ID", query="100", target_attribute="qualifier:label", operation=removedna()) 

In [20]:
brick.printfeature()

feature ID  qualifier:label   feature type  start position  end position  strand  
0           N.A.              source        0               2686          +       
2100        pUC19             source        0               2686          +       
100         N.A.              primer        117             137           +       
200         L4440             primer        370             388           +       
300         CAP binding site  protein_bind  504             526           +       
400         lac promoter      promoter      540             571           +       
500         lac operator      protein_bind  578             595           +       
600         M13/pUC Reverse   primer        583             606           +       
700         M13 rev           primer        602             619           +       
800         M13 Reverse       primer        602             619           +       
900         lacZ-alpha        CDS           614             938           +       
1000

In [21]:
editdna(brick, key_attribute="feature ID", query="100", target_attribute="feature ID", operation=removedna())

In [22]:
brick.printfeature()

feature ID  qualifier:label   feature type  start position  end position  strand  
0           N.A.              source        0               2686          +       
2100        pUC19             source        0               2686          +       
200         L4440             primer        370             388           +       
300         CAP binding site  protein_bind  504             526           +       
400         lac promoter      promoter      540             571           +       
500         lac operator      protein_bind  578             595           +       
600         M13/pUC Reverse   primer        583             606           +       
700         M13 rev           primer        602             619           +       
800         M13 Reverse       primer        602             619           +       
900         lacZ-alpha        CDS           614             938           +       
1000        MCS               misc_feature  631             688           +       
1100

In [23]:
#editdna(brick, key_attribute=None, query=None, target_attibute="feature ID", operation=createedna("New_feature"))

In [24]:
editdna(brick, key_attribute="sequence:|100..120|", query=None, target_attribute="feature ID", operation=createdna("New_feature"))
editdna(brick, key_attribute="sequence:|2600..100|", query=None, target_attribute="feature ID", operation=createdna("New_feature"))

In [25]:
brick.printfeature()

feature ID     qualifier:label   feature type  start position  end position  strand  
0              N.A.              source        0               2686          +       
2100           pUC19             source        0               2686          +       
New_feature    N.A.              misc_feature  100             120           +       
200            L4440             primer        370             388           +       
300            CAP binding site  protein_bind  504             526           +       
400            lac promoter      promoter      540             571           +       
500            lac operator      protein_bind  578             595           +       
600            M13/pUC Reverse   primer        583             606           +       
700            M13 rev           primer        602             619           +       
800            M13 Reverse       primer        602             619           +       
900            lacZ-alpha        CDS           614    

In [26]:
editdna(brick, key_attribute="sequence", query="GG[ATGC]{19}[ATGC]GG", target_attribute="feature ID", operation=createdna("spCas9_target"))
editdna(brick, key_attribute="feature ID", query="spCas9_target_[0-9]+", target_attribute="feature type", operation=replacedna("misc_bind"))
brick.printfeature(feature_type=["misc_bind"])

feature ID        qualifier:label  feature type  start position  end position  strand  
spCas9_target_12  N.A.             misc_bind     403             427           -       
spCas9_target_11  N.A.             misc_bind     534             558           -       
spCas9_target_1   N.A.             misc_bind     648             672           +       
spCas9_target_10  N.A.             misc_bind     956             980           -       
spCas9_target_2   N.A.             misc_bind     1513            1537          +       
spCas9_target_9   N.A.             misc_bind     1533            1557          -       
spCas9_target_3   N.A.             misc_bind     1887            1911          +       
spCas9_target_4   N.A.             misc_bind     2117            2141          +       
spCas9_target_8   N.A.             misc_bind     2488            2512          -       
spCas9_target_7   N.A.             misc_bind     2521            2545          -       
spCas9_target_5   N.A.          

In [26]:
editdna(brick, key_attribute="feature ID", query="spCas9_target_[0-9]+", target_attribute="feature type", operation=replacedna("misc_bind"))

In [27]:
brick.printfeature(feature_type=["misc_bind"])

feature ID        qualifier:label  feature type  start position  end position  strand  
spCas9_target_1   N.A.             misc_bind     47              71            +       
spCas9_target_13  N.A.             misc_bind     403             427           -       
spCas9_target_12  N.A.             misc_bind     534             558           -       
spCas9_target_2   N.A.             misc_bind     648             672           +       
spCas9_target_11  N.A.             misc_bind     956             980           -       
spCas9_target_3   N.A.             misc_bind     1513            1537          +       
spCas9_target_10  N.A.             misc_bind     1533            1557          -       
spCas9_target_4   N.A.             misc_bind     1887            1911          +       
spCas9_target_5   N.A.             misc_bind     2117            2141          +       
spCas9_target_9   N.A.             misc_bind     2488            2512          -       
spCas9_target_8   N.A.          

In [28]:
sub_brick1, sub_brick2 = cutdna(brick ,1000, 2000)
print(len(brick.seq), len(sub_brick1.seq), len(sub_brick2.seq))
sub_brick1.printdnafeature(feature_key=["source"])
sub_brick2.printdnafeature(feature_key=["source"])

2686 1686 1000


AttributeError: 'DNA' object has no attribute 'printdnafeature'

In [29]:
sub_brick = cropdna(brick, 1000, 2000,)
brick.printdnafeature() 
print()
sub_brick.printdnafeature() 

AttributeError: 'DNA' object has no attribute 'printdnafeature'

In [14]:
brick.printdnafeature(feature_key=["primer_bind"], with_seq=True)
print()
sub_brick = cropdna(brick,brick.finddna("800")[0].sstart,brick.finddna("1100")[0].send)
sub_brick.printdnaseq(whole=True)
sub_brick.printdnafeature() 

Feature_ID  Label            Type         Start  End   Strand  Seq                      
100         pBR322ori-F      primer_bind  117    137   +       GGGAAACGCCTGGTATCTTT     
200         L4440            primer_bind  370    388   +       AGCGAGTCAGTGAGCGAG       
600         M13/pUC Reverse  primer_bind  583    606   +       AGCGGATAACAATTTCACACAGG  
700         M13 rev          primer_bind  602    619   +       CAGGAAACAGCTATGAC        
800         M13 Reverse      primer_bind  602    619   +       CAGGAAACAGCTATGAC        
1100        M13 Forward      primer_bind  688    706   -       TGTAAAACGACGGCCAGT       
1200        M13 fwd          primer_bind  688    705   -       GTAAAACGACGGCCAGT        
1300        M13/pUC Forward  primer_bind  697    720   -       CCCAGTCACGACGTTGTAAAACG  
1400        pRS-marker       primer_bind  913    933   -       CGGCATCAGAGCAGATTGTA     
1500        pGEX 3'          primer_bind  1032   1055  +       CCGGGAGCTGCATGTGTCAGAGG  
1600        pBRforEco

In [15]:
sub_brick = cropdna(brick, 100, 120)
sub_brick.printdnaseq()
sub_brick_m = modifyends(sub_brick, "" ,"")
sub_brick_m.printdnaseq()
sub_brick_m = modifyends(sub_brick,"----**/******", "******/****--")
sub_brick_m.printdnaseq()
sub_brick_m = modifyends(sub_brick,"----GG/CCCCCC", "******/****--")
sub_brick_m.printdnaseq()

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'

5' ----GGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCC-- 5'

5' ----GGACGAGGGAGCTTCCAGGGGG 3'
3' CCCCCCTGCTCCCTCGAAGGTCCC-- 5'



In [16]:
sub_brick = cropdna(brick, 100, 120)
sub_brick.printdnaseq()  
sub_brick = modifyends(sub_brick, "******/****--", "----**/******")
sub_brick.printdnaseq()

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'



TypeError: Please sepcify a proper sequence pattern for the 'left' argument

In [17]:
#Source code (continued from previous one)#
sub_brick = cropdna(brick, 100, 120)
sub_brick = modifyends(sub_brick,"ATGTACG","ATGCTAC")
sub_brick.printdnaseq(whole=True) 

5' ATGTACGACGAGGGAGCTTCCAGGGGGATGCTAC 3'
3' TACATGCTGCTCCCTCGAAGGTCCCCCTACGATG 5'



In [18]:
sub_brick = cropdna(brick, 100, 120)
sub_brick = modifyends(sub_brick,"---ATGC/ATGTACG","TACG---/ATGCTAC")
sub_brick.printdnaseq() 

5' ---ATGCACGAGGGAGCTTCCAGGGGGTACG--- 3'
3' ATGTACGTGCTCCCTCGAAGGTCCCCCATGCTAC 5'



In [19]:
print(len(brick.seq))
brick_EcoRI = modifyends(cutdna(brick, brick.finddna("GAATTC")[0].sstart)[0],"-*****/-----*","G----/CTTAA")
brick_EcoRI.printdnaseq(whole=False)
print(len(sub_brick.seq))

2686
5' AATTCACTGG...AGCTCG---- 3'
3' ----GTGACC...TCGAGCTTAA 5'

34


In [20]:
SacI = "GAGCTC"
SalI = "GTCGAC"
sub_brick = cropdna(brick, brick.finddna(SacI)[0].sstart, brick.finddna(SalI)[0].send) 
sub_brick.printdnaseq() 
sub_brick = modifyends(sub_brick, "-----*/-*****", "*-----/*****-") 
sub_brick.printdnaseq()
print(len(sub_brick.seq))

5' GAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGACACCCGCCAACACCCGCTGACGCGCCCTGACGGGCTTGTCTGCTCCCGGCATCCGCTTACAGACAAGCTGTGACCGTCTCCGGGAGCTGCATGTGTCAGAGGTTTTCACCGTCATCACCGAAACGCGCGAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAAC

In [21]:
SacI = "GAGCTC"
SalI = "GTCGAC"
EGFP = DNA(record="EGFP.fasta") 
EGFP.adddnafeature(0,len(EGFP.seq),qualifiers={"label":"EGFP"})
EGFP = modifyends(EGFP, SacI, SalI) 
EGFP.printdnaseq() 
EGFP = modifyends(EGFP, "-----*/-*****", "*-----/*****-")
EGFP.printdnaseq() 
product = joindna(EGFP, sub_brick, topology="circular")
print(len(product.seq))

5' GAGCTCATGTCCACCAACTTATCAGTGATAAAGAATCCGCGCGTTCAATCGGACCAGCGGAGGCTGGTCCGGAGGCCAGACGTGAAACCCAACATACCCCTGATCGTAATTCTGAGGTCGACTCTAGAGGATCCCCGGGTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAATTCCAACTGAGCGCCGGTCGCTACCATTACCAACTTGTCTGGTGTCAAAAATAATAGGCCTACTAGTCGGCCGTACGGGCCCTTTCGTCTCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGG

In [22]:
product.printdnafeature()

Feature_ID  Label                   Type          Start  End   Strand  
0           EGFP                    misc_feature  5      1085  +       
100         MCS:47..57:57           misc_feature  1090   1101  +       
200         lacZ-alpha:64..324:324  CDS           1090   1351  +       
300         pUC19:678..654:2686     source        1090   3753  +       
400         M13 fwd                 primer_bind   1101   1118  -       
500         M13 Forward             primer_bind   1101   1119  -       
600         M13/pUC Forward         primer_bind   1110   1133  -       
700         pRS-marker              primer_bind   1326   1346  -       
800         pGEX 3'                 primer_bind   1445   1468  +       
900         pBRforEco               primer_bind   1505   1524  -       
1000        AmpR promoter           promoter      1591   1696  +       
1100        AmpR                    CDS           1696   2557  +       
1200        Amp-R                   primer_bind   1914   1934  -

In [23]:
product_shifted = joindna(cutdna(product, 2727)[0])

In [24]:
product_shifted.printdnafeature()

Feature_ID  Label                   Type          Start  End   Strand  
0           ori                     rep_origin    0      589   +       
200         feat2                   misc_feature  336    382   +       
300         pBR322ori-F             primer_bind   489    509   +       
400         L4440                   primer_bind   742    760   +       
500         CAP binding site        protein_bind  876    898   +       
600         lac promoter            promoter      912    943   +       
700         lac operator            protein_bind  950    967   +       
800         M13/pUC Reverse         primer_bind   955    978   +       
1000        M13 Reverse             primer_bind   974    991   +       
900         M13 rev                 primer_bind   974    991   +       
1100        lacZ-alpha:1..40:324    CDS           986    1026  +       
1200        MCS:1..23:57            misc_feature  1003   1026  +       
1300        EGFP                    misc_feature  1031   2111  +