In [1]:
import sys 
sys.path.append("../../")
from dna import *

In [2]:
#Example code1 : Create a blunt-end DNA object by specifying its sequence
brick = DNA(seq="CCGGTATGCGTCGA")
print(brick)

<dna.DNA object; project='None', length='14 bp', sequence='CCGGTATGCGTCGA', topology='linear'>


In [3]:
#Example code 2: Create a sticky-end DNA object by specifying its structure 
brick = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
print(brick)

<dna.DNA object; project='None', length='14 bp', sequence='CCGGTATGCGTCGA', topology='linear'>


In [4]:
#Example code 3: Create an annotated DNA object from GenBank format file 
brick = DNA(record="pUC19.gbk")
print(brick)

<dna.DNA object; project='None', length='2686 bp', topology='circular'>


In [5]:
#Example code 4-1: Print a double-strand DNA sequence with sticky ends
brick = DNA(seq="CCGGTATGCG----/----ATACGCAGCT") 
brick.printdnaseq()

5' CCGGTATGCG---- 3'
3' ----ATACGCAGCT 5'



In [6]:
#Example code 4-1: Print a double-strand DNA sequence with sticky ends
brick = DNA(record="pUC19.gbk")
brick.printdnaseq(linebreak=70)

5' GAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCG 3'
3' CTCTATGGATGTCGCACTCGATACTCTTTCGCGGTGCGAAGGGCTTCCCTCTTTCCGCCTGTCCATAGGC 5'

5' GTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATA 3'
3' CATTCGCCGTCCCAGCCTTGTCCTCTCGCGTGCTCCCTCGAAGGTCCCCCTTTGCGGACCATAGAAATAT 5'

5' GTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCT 3'
3' CAGGACAGCCCAAAGCGGTGGAGACTGAACTCGCAGCTAAAAACACTACGAGCAGTCCCCCCGCCTCGGA 5'

5' ATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTC 3'
3' TACCTTTTTGCGGTCGTTGCGCCGGAAAAATGCCAAGGACCGGAAAACGACCGGAAAACGAGTGTACAAG 5'

5' TTTCCTGCGTTATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCC 3'
3' AAAGGACGCAATAGGGGACTAAGACACCTATTGGCATAATGGCGGAAACTCACTCGACTATGGCGAGCGG 5'

5' GCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCC 3'
3' CGTCGGCTTGCTGGCTCGCGTCGCTCAGTCACTCGCTCCTTCGCCTTCTCGCGGGTTATGCGTTTGGCGG 5'

5' TCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGC

In [7]:
brick.printfeature(feature_type=["primer_bind"], attribute=["feature ID","qualifier:note"], detail=False)

feature ID  qualifier:note                                              
100         pBR322 origin, forward primer                               
200         L4440 vector, forward primer                                
600         In lacZ gene                                                
700         common sequencing primer, one of multiple similar variants  
800         In lacZ gene. Also called M13-rev                           
1100        In lacZ gene. Also called M13-F20 or M13 (-21) Forward      
1200        common sequencing primer, one of multiple similar variants  
1300        In lacZ gene                                                
1400        pRS vectors, use to sequence yeast selectable marker        
1500        pGEX vectors, reverse primer                                
1600        pBR322 vectors, upsteam of EcoRI site, forward primer       
1900        Ampicillin resistance gene, reverse primer                  



In [8]:
brick.printfeature(feature_type=["primer_bind"], attribute=["$DEFAULT","qualifier:note","sequence"], detail=False)

feature ID  qualifier:label  feature type  start position  end position  strand  qualifier:note                                              sequence                 
100         pBR322ori-F      primer_bind   117             137           +       pBR322 origin, forward primer                               GGGAAACGCCTGGTATCTTT     
200         L4440            primer_bind   370             388           +       L4440 vector, forward primer                                AGCGAGTCAGTGAGCGAG       
600         M13/pUC Reverse  primer_bind   583             606           +       In lacZ gene                                                AGCGGATAACAATTTCACACAGG  
700         M13 rev          primer_bind   602             619           +       common sequencing primer, one of multiple similar variants  CAGGAAACAGCTATGAC        
800         M13 Reverse      primer_bind   602             619           +       In lacZ gene. Also called M13-rev                           CAGGAAACAGCTATGAC       

In [9]:
brick.finddna("GGATCC")
print([(b.subject.start,b.subject.end,b.subject.strand) for b in brick.finddna("GGATCC")])

[(661, 667, 1), (661, 667, -1)]


In [10]:
print([(b.subject.start,b.subject.end) for b in brick.finddna("GGATCC", max_mismatch=1)])

[(661, 667), (661, 667), (62, 69), (1000, 1007), (1423, 1430), (2316, 2323), (203, 209), (242, 248), (1181, 1187), (1222, 1228), (1441, 1446), (1744, 1749), (1771, 1777), (1982, 1988), (2221, 2226), (2306, 2311), (2392, 2397), (2316, 2323), (1423, 1430), (1000, 1007), (62, 69), (2392, 2397), (2306, 2311), (2221, 2226), (2208, 2213), (1982, 1988), (1771, 1777), (1441, 1446), (1222, 1228), (1181, 1187), (242, 248), (203, 209), (1706, 1711), (1928, 1933), (2208, 2213), (1928, 1933), (1744, 1749), (1706, 1711)]


In [11]:
for b in brick.finddna("[ATGC]CC[ATGC]{18}[ATGC]GG"):
    print(b.seq, b.subject.start, b.subject.end, b.subject.strand, sep="\t") 

CCCGAAGGGAGAAAGGCGGACAGG	40	64	1
GCCAGCAACGCGGCCTTTTTACGG	220	244	1
TCCGGGAGCTGCATGTGTCAGAGG	1031	1055	1
ACCGAAACGCGCGAGACGAAAGGG	1069	1093	1
ACCGCTTTTTTGCACAACATGGGG	1721	1745	1
TCCCGGCAACAATTAATAGACTGG	1883	1907	1
ACCGGATAAGGCGCAGCGGTCGGG	2605	2629	1
GCCCGACCGCTGCGCCTTATCCGG	2606	2630	-1
TCCAGTCTATTAATTGTTGCCGGG	1884	1908	-1
CCCCCATGTTGTGCAAAAAAGCGG	1722	1746	-1
GCCCTTTCGTCTCGCGCGTTTCGG	1070	1094	-1
ACCTCTGACACATGCAGCTCCCGG	1032	1056	-1
ACCGTAAAAAGGCCGCGTTGCTGG	221	245	-1
ACCTGTCCGCCTTTCTCCCTTCGG	41	65	-1


In [12]:
cds_list = brick.finddna(query="CDS", attribute="feature type")

In [13]:
cds_list = brick.finddna(query="[CA]DS",  attribute="feature type")
print([(cds.subject.start,cds.subject.end,cds.subject.strand) for cds in cds_list])

[(614, 938, 1), (1283, 2144, 1)]


In [14]:
for b in brick.finddna("[ATGC]CC[ATGC]{18}[ATGC]GG", attribute="sequence:|1000..1500|"):
    print(b.subject.start, b.subject.end, b.subject.strand) 

1031 1055 1
1069 1093 1
1070 1094 -1
1032 1056 -1


In [9]:
brick.printdnafeature(feature_key=["misc_feature"], with_seq=True)

Feature_ID  Label  Type          Start  End  Strand  Seq                                                        
1000        MCS    misc_feature  631    688  +       AAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTC  



In [10]:
brick.adddnafeature(200, 220, qualifiers={"label":"feat1"}) 
brick.adddnafeature(2650, 10, qualifiers={"label":"feat2"}) #feat on origin
brick.printdnafeature(feature_key=["misc_feature"], with_seq=True)

Feature_ID  Label  Type          Start  End  Strand  Seq                                                        
101         feat1  misc_feature  200    220  +       GGCGGAGCCTATGGAAAAAC                                       
1000        MCS    misc_feature  631    688  +       AAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTC  
1901        feat2  misc_feature  2650   10   +       ACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTA             



In [11]:
brick.removednafeature("101")
brick.printdnafeature(feature_key=["misc_feature"], with_seq=True)

Feature_ID  Label  Type          Start  End  Strand  Seq                                                        
1000        MCS    misc_feature  631    688  +       AAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTC  
1901        feat2  misc_feature  2650   10   +       ACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTA             



In [12]:
sub_brick1, sub_brick2 = cutdna(brick ,1000, 2000)
print(len(brick.seq), len(sub_brick1.seq), len(sub_brick2.seq))
sub_brick1.printdnafeature(feature_key=["source"])
sub_brick2.printdnafeature(feature_key=["source"])

2686 1686 1000
Feature_ID  Label                  Type    Start  End   Strand  
100         pUC19:2001..1000:2686  source  0      1686  +       

Feature_ID  Label                  Type    Start  End   Strand  
0           pUC19:1001..2000:2686  source  0      1000  +       



In [13]:
sub_brick = cropdna(brick, 1000, 2000,)
brick.printdnafeature() 
print()
sub_brick.printdnafeature() 

Feature_ID  Label             Type          Start  End   Strand  
0           pUC19             source        0      2686  +       
100         pBR322ori-F       primer_bind   117    137   +       
200         L4440             primer_bind   370    388   +       
300         CAP binding site  protein_bind  504    526   +       
400         lac promoter      promoter      540    571   +       
500         lac operator      protein_bind  578    595   +       
600         M13/pUC Reverse   primer_bind   583    606   +       
700         M13 rev           primer_bind   602    619   +       
800         M13 Reverse       primer_bind   602    619   +       
900         lacZ-alpha        CDS           614    938   +       
1000        MCS               misc_feature  631    688   +       
1100        M13 Forward       primer_bind   688    706   -       
1200        M13 fwd           primer_bind   688    705   -       
1300        M13/pUC Forward   primer_bind   697    720   -       
1400      

In [14]:
brick.printdnafeature(feature_key=["primer_bind"], with_seq=True)
print()
sub_brick = cropdna(brick,brick.finddna("800")[0].sstart,brick.finddna("1100")[0].send)
sub_brick.printdnaseq(whole=True)
sub_brick.printdnafeature() 

Feature_ID  Label            Type         Start  End   Strand  Seq                      
100         pBR322ori-F      primer_bind  117    137   +       GGGAAACGCCTGGTATCTTT     
200         L4440            primer_bind  370    388   +       AGCGAGTCAGTGAGCGAG       
600         M13/pUC Reverse  primer_bind  583    606   +       AGCGGATAACAATTTCACACAGG  
700         M13 rev          primer_bind  602    619   +       CAGGAAACAGCTATGAC        
800         M13 Reverse      primer_bind  602    619   +       CAGGAAACAGCTATGAC        
1100        M13 Forward      primer_bind  688    706   -       TGTAAAACGACGGCCAGT       
1200        M13 fwd          primer_bind  688    705   -       GTAAAACGACGGCCAGT        
1300        M13/pUC Forward  primer_bind  697    720   -       CCCAGTCACGACGTTGTAAAACG  
1400        pRS-marker       primer_bind  913    933   -       CGGCATCAGAGCAGATTGTA     
1500        pGEX 3'          primer_bind  1032   1055  +       CCGGGAGCTGCATGTGTCAGAGG  
1600        pBRforEco

In [15]:
sub_brick = cropdna(brick, 100, 120)
sub_brick.printdnaseq()
sub_brick_m = modifyends(sub_brick, "" ,"")
sub_brick_m.printdnaseq()
sub_brick_m = modifyends(sub_brick,"----**/******", "******/****--")
sub_brick_m.printdnaseq()
sub_brick_m = modifyends(sub_brick,"----GG/CCCCCC", "******/****--")
sub_brick_m.printdnaseq()

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'

5' ----GGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCC-- 5'

5' ----GGACGAGGGAGCTTCCAGGGGG 3'
3' CCCCCCTGCTCCCTCGAAGGTCCC-- 5'



In [16]:
sub_brick = cropdna(brick, 100, 120)
sub_brick.printdnaseq()  
sub_brick = modifyends(sub_brick, "******/****--", "----**/******")
sub_brick.printdnaseq()

5' ACGAGGGAGCTTCCAGGGGG 3'
3' TGCTCCCTCGAAGGTCCCCC 5'



TypeError: Please sepcify a proper sequence pattern for the 'left' argument

In [17]:
#Source code (continued from previous one)#
sub_brick = cropdna(brick, 100, 120)
sub_brick = modifyends(sub_brick,"ATGTACG","ATGCTAC")
sub_brick.printdnaseq(whole=True) 

5' ATGTACGACGAGGGAGCTTCCAGGGGGATGCTAC 3'
3' TACATGCTGCTCCCTCGAAGGTCCCCCTACGATG 5'



In [18]:
sub_brick = cropdna(brick, 100, 120)
sub_brick = modifyends(sub_brick,"---ATGC/ATGTACG","TACG---/ATGCTAC")
sub_brick.printdnaseq() 

5' ---ATGCACGAGGGAGCTTCCAGGGGGTACG--- 3'
3' ATGTACGTGCTCCCTCGAAGGTCCCCCATGCTAC 5'



In [19]:
print(len(brick.seq))
brick_EcoRI = modifyends(cutdna(brick, brick.finddna("GAATTC")[0].sstart)[0],"-*****/-----*","G----/CTTAA")
brick_EcoRI.printdnaseq(whole=False)
print(len(sub_brick.seq))

2686
5' AATTCACTGG...AGCTCG---- 3'
3' ----GTGACC...TCGAGCTTAA 5'

34


In [20]:
SacI = "GAGCTC"
SalI = "GTCGAC"
sub_brick = cropdna(brick, brick.finddna(SacI)[0].sstart, brick.finddna(SalI)[0].send) 
sub_brick.printdnaseq() 
sub_brick = modifyends(sub_brick, "-----*/-*****", "*-----/*****-") 
sub_brick.printdnaseq()
print(len(sub_brick.seq))

5' GAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGTTAAGCCAGCCCCGACACCCGCCAACACCCGCTGACGCGCCCTGACGGGCTTGTCTGCTCCCGGCATCCGCTTACAGACAAGCTGTGACCGTCTCCGGGAGCTGCATGTGTCAGAGGTTTTCACCGTCATCACCGAAACGCGCGAGACGAAAGGGCCTCGTGATACGCCTATTTTTATAGGTTAATGTCATGATAATAATGGTTTCTTAGACGTCAGGTGGCACTTTTCGGGGAAATGTGCGCGGAACCCCTATTTGTTTATTTTTCTAAATACATTCAAATATGTATCCGCTCATGAGACAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCCGTATTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAAC

In [21]:
SacI = "GAGCTC"
SalI = "GTCGAC"
EGFP = DNA(record="EGFP.fasta") 
EGFP.adddnafeature(0,len(EGFP.seq),qualifiers={"label":"EGFP"})
EGFP = modifyends(EGFP, SacI, SalI) 
EGFP.printdnaseq() 
EGFP = modifyends(EGFP, "-----*/-*****", "*-----/*****-")
EGFP.printdnaseq() 
product = joindna(EGFP, sub_brick, topology="circular")
print(len(product.seq))

5' GAGCTCATGTCCACCAACTTATCAGTGATAAAGAATCCGCGCGTTCAATCGGACCAGCGGAGGCTGGTCCGGAGGCCAGACGTGAAACCCAACATACCCCTGATCGTAATTCTGAGGTCGACTCTAGAGGATCCCCGGGTACCGGTCGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGACCTACGGCGTGCAGTGCTTCAGCCGCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATCCGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCACCCAGTCCGCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAATTCCAACTGAGCGCCGGTCGCTACCATTACCAACTTGTCTGGTGTCAAAAATAATAGGCCTACTAGTCGGCCGTACGGGCCCTTTCGTCTCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGG

In [22]:
product.printdnafeature()

Feature_ID  Label                   Type          Start  End   Strand  
0           EGFP                    misc_feature  5      1085  +       
100         MCS:47..57:57           misc_feature  1090   1101  +       
200         lacZ-alpha:64..324:324  CDS           1090   1351  +       
300         pUC19:678..654:2686     source        1090   3753  +       
400         M13 fwd                 primer_bind   1101   1118  -       
500         M13 Forward             primer_bind   1101   1119  -       
600         M13/pUC Forward         primer_bind   1110   1133  -       
700         pRS-marker              primer_bind   1326   1346  -       
800         pGEX 3'                 primer_bind   1445   1468  +       
900         pBRforEco               primer_bind   1505   1524  -       
1000        AmpR promoter           promoter      1591   1696  +       
1100        AmpR                    CDS           1696   2557  +       
1200        Amp-R                   primer_bind   1914   1934  -

In [23]:
product_shifted = joindna(cutdna(product, 2727)[0])

In [24]:
product_shifted.printdnafeature()

Feature_ID  Label                   Type          Start  End   Strand  
0           ori                     rep_origin    0      589   +       
200         feat2                   misc_feature  336    382   +       
300         pBR322ori-F             primer_bind   489    509   +       
400         L4440                   primer_bind   742    760   +       
500         CAP binding site        protein_bind  876    898   +       
600         lac promoter            promoter      912    943   +       
700         lac operator            protein_bind  950    967   +       
800         M13/pUC Reverse         primer_bind   955    978   +       
1000        M13 Reverse             primer_bind   974    991   +       
900         M13 rev                 primer_bind   974    991   +       
1100        lacZ-alpha:1..40:324    CDS           986    1026  +       
1200        MCS:1..23:57            misc_feature  1003   1026  +       
1300        EGFP                    misc_feature  1031   2111  +