# Quick demo for Plasmid: A python based tool for gene editing

In [2]:
import plasmid as pge
import importlib
importlib.reload(pge)

<module 'plasmid' from '/home/zchen/Public/python/lib/python3.11/site-packages/plasmid/__init__.py'>

## Manipulating and displaying genbank files
### Reading genbank or fasta files

Genbank, fasta, and text files can be read using the `plasmid` dataframe class. This function is analogous to `read_csv` in pandas. This data structure uses Biopython in the backend to store genbank information such as sequence and genomic features or tags. The underlying data in Biopython is redisplayed into a more user friendly for splicing, sorting, and concatenating like a pandas dataframe.

In [3]:
df = pge.read_genbank('../data/xRFP.gb')
df

reading  ../data/xRFP.gb  as genbank file


<class 'plasmid.plasmid.Plasmid'> at 0x7f5b1a4804d0
molecule_type:DNA
topology:circular
data_file_division:   
date:05-DEC-2022
accessions:['<unknown', 'id>']
keywords:['']
source:
organism:. .
taxonomy:[]
comment:
ApEinfo:methylated:1
                              locus_tag          type        location  length   
0                         AmpR promoter      promoter       [0:29](+)      29  \
1                              AmpR RBS           RBS      [29:70](+)      41   
2                          SpectomycinR           CDS     [70:868](+)     798   
3                             BBa_B0053    terminator    [878:948](+)      70   
4   J23108, 0.51, Constitutive Promoter      promoter  [1032:1067](+)      35   
5          RBS 1.00 strength, BBa_B0034           RBS  [1086:1098](+)      12   
6                       LacI, BBa_C0012           CDS  [1104:2196](+)    1092   
7                             Esp3I_fix  misc_feature  [2072:2078](+)       6   
8         Forward Terminator, BBa_B

In [9]:
gr = df.to_graphic()
gr
#gr.bokeh_plot()

<class 'plasmid.graphic.Graphic'> at 0x7f5b19f87f50
Colorized Plasmid 
[38;2;15;127;254mTTCAAATATCTATCCGCTCATGAGACAAT[39m[38;2;0;255;255mAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAAT[39m[38;2;212;0;55mATGAGTGAAAAAGTGCCCGCCGAGATTTCGGTGCAACTATCACAAGCACTCAACGTCATCGGGCGCCACTTGGAGTCGACGTTGCTGGCCGTGCATTTGTACGGCTCCGCACTGGATGGCGGATTGAAACCGTACAGTGATATTGATTTGCTGGTGACTGTAGCTGCACCGCTCAATGATGCCGTGCGGCAAGCCCTGCTCGTCGATCTCTTGGAGGTTTCAGCTTCCCCTGGCCAAAACAAGGCACTCCGCGCCTTGGAAGTGACCATCGTCGTGCACAGTGACATCGTACCTTGGCGTTATCCGGCCAGGCGGGAACTGCAGTTCGGAGAGTGGCAGCGCAAAGACATCCTTGCGGGCATCTTCGAGCCCGCCACAACCGATTCTGACTTGGCGATTCTGCTAACAAAGGCAAAGCAACATAGCGTCGTCTTGGCAGGTTCAGCAGCGAAGGATCTCTTCAGCTCAGTCCCAGAAAGCGATCTATTCAAGGCACTGGCCGATACTCTGAAGCTATGGAACTCGCCGCCAGATTGGGCGGGCGATGAGCGGAATGTAGTGCTTACTTTGTCTCGTATCTGGTACACCGCAGCAACCGGCAAGATCGCGCCAAAGGATGTTGCTGCCACTTGGGCAATGGCACGCTTGCCAGCTCAACATCAGCCCATCCTGTTGAATGCCAAGCGGGCTTATCTTGGGCAAGAAGAAGATTATTTGCCCGCTCGTGCGGATCAGGTGGCGGCGCTCATTAAATTCGTGAAGTATGAAGCAGTTAAACTGCTTGGTGCCAGCCAATGA

The raw sequence with color annotations can be displayed with `.print()`. The dataframe can also be directly cast as a string to obtain the raw sequence.

In [3]:
print(df.get_colored())

[38;2;15;127;254mTTCAAATATCTATCCGCTCATGAGACAAT[39m[38;2;0;255;255mAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAAT[39m[38;2;212;0;55mATGAGTGAAAAAGTGCCCGCCGAGATTTCGGTGCAACTATCACAAGCACTCAACGTCATCGGGCGCCACTTGGAGTCGACGTTGCTGGCCGTGCATTTGTACGGCTCCGCACTGGATGGCGGATTGAAACCGTACAGTGATATTGATTTGCTGGTGACTGTAGCTGCACCGCTCAATGATGCCGTGCGGCAAGCCCTGCTCGTCGATCTCTTGGAGGTTTCAGCTTCCCCTGGCCAAAACAAGGCACTCCGCGCCTTGGAAGTGACCATCGTCGTGCACAGTGACATCGTACCTTGGCGTTATCCGGCCAGGCGGGAACTGCAGTTCGGAGAGTGGCAGCGCAAAGACATCCTTGCGGGCATCTTCGAGCCCGCCACAACCGATTCTGACTTGGCGATTCTGCTAACAAAGGCAAAGCAACATAGCGTCGTCTTGGCAGGTTCAGCAGCGAAGGATCTCTTCAGCTCAGTCCCAGAAAGCGATCTATTCAAGGCACTGGCCGATACTCTGAAGCTATGGAACTCGCCGCCAGATTGGGCGGGCGATGAGCGGAATGTAGTGCTTACTTTGTCTCGTATCTGGTACACCGCAGCAACCGGCAAGATCGCGCCAAAGGATGTTGCTGCCACTTGGGCAATGGCACGCTTGCCAGCTCAACATCAGCCCATCCTGTTGAATGCCAAGCGGGCTTATCTTGGGCAAGAAGAAGATTATTTGCCCGCTCGTGCGGATCAGGTGGCGGCGCTCATTAAATTCGTGAAGTATGAAGCAGTTAAACTGCTTGGTGCCAGCCAATGA[39mTAATACTAGC[38;2;157;27;28mTCCGGCAAAAAAACGGGCAAGGTGTCACCACCCTGCCCT

The underlying sequence can be sliced like a string.

In [4]:
print(df[0:2411].__repr__())
print(df[0:2411].get_colored())

<class 'plasmid.plasmid.Plasmid'> at 0x7f3f942e50d0
molecule_type:DNA
topology:circular
data_file_division:   
date:05-DEC-2022
accessions:['<unknown', 'id>']
keywords:['']
source:
organism:. .
taxonomy:[]
comment:
ApEinfo:methylated:1
                                          locus_tag          type   
0                         AmpR promoter subsequence      promoter  \
1                              AmpR RBS subsequence           RBS   
2                          SpectomycinR subsequence           CDS   
3                             BBa_B0053 subsequence    terminator   
4   J23108, 0.51, Constitutive Promoter subsequence      promoter   
5          RBS 1.00 strength, BBa_B0034 subsequence           RBS   
6                       LacI, BBa_C0012 subsequence           CDS   
7                             Esp3I_fix subsequence  misc_feature   
8         Forward Terminator, BBa_B1002 subsequence    terminator   
9   pLacO2, single operon pLac ZC082818 subsequence      promoter   
10   

Plasmids are circular, not linear. Therefore, when we view a plasmids, it is from a certain origin or reference position. We can choose a new reference position to view the plasmid via `.set_origin`

In [5]:
x = df[1032:2411]
print(x.get_colored())
y = x.set_origin(1198)
print(y.get_colored())

[38;2;64;128;0mCTGACAGCTAGCTCAGTCCTAGGTATAATGCTAGC[39mAAAAGAATTCAAAAGATCT[38;2;0;255;255mAAAGAGGAGAAA[39mGGATCT[38;2;252;102;101mATGGTGAATGTGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATCAGACCGTTTCCCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGCGGAGCTGAATTACATTCCCAACCGCGTGGCACAACAACTGGCGGGCAAACAGTCGTTGCTGATTGGCGTTGCCACCTCCAGTCTGGCCCTGCACGCGCCGTCGCAAATTGTCGCGGCGATTAAATCTCGCGCCGATCAACTGGGTGCCAGCGTGGTGGTGTCGATGGTAGAACGAAGCGGCGTCGAAGCCTGTAAAGCGGCGGTGCACAATCTTCTCGCGCAACGCGTCAGTGGGCTGATCATTAACTATCCGCTGGATGACCAGGATGCCATTGCTGTGGAAGCTGCCTGCACTAATGTTCCGGCGTTATTTCTTGATGTCTCTGACCAGACACCCATCAACAGTATTATTTTCTCCCATGAAGACGGTACGCGACTGGGCGTGGAGCATCTGGTCGCATTGGGTCACCAGCAAATCGCGCTGTTAGCGGGCCCATTAAGTTCTGTCTCGGCGCGTCTGCGTCTGGCTGGCTGGCATAAATATCTCACTCGCAATCAAATTCAGCCGATAGCGGAACGGGAAGGCGACTGGAGTGCCATGTCCGGTTTTCAACAAACCATGCAAATGCTGAATGAGGGCATCGTTCCCACTGCGATGCTGGTTGCCAACGATCAGATGGCGCTGGGCGCAATGCGCGCCATTACCGAGTCCGGGCTGCGCGTTGGTGCGGATATCTCGGTAGTGGGATACGACGATACCGAAGACAGCTCATGTTATATCCCGCCGTT

Genes can also be selected and sliced out of the dataframe via bool selection like in pandas 

In [6]:
y = x[x['type']=='terminator']
print(y.__repr__())
print(y.get_colored())

<class 'plasmid.plasmid.Plasmid'> at 0x7f3fef2c1a10
molecule_type:DNA
topology:circular
data_file_division:   
date:05-DEC-2022
accessions:['<unknown', 'id>']
keywords:['']
source:
organism:. .
taxonomy:[]
comment:
ApEinfo:methylated:1
                                   locus_tag        type        location   
0  Forward Terminator, BBa_B1002 subsequence  terminator  [1164:1198](+)  \
1             S. pyog terminator subsequence  terminator  [1298:1339](+)   
2  Forward Terminator, BBa_B1010 subsequence  terminator  [1339:1379](+)   

   length    color  
0      34  [38;2;128;64;0m#804000[39m  
1      41  [38;2;128;64;0m#804000[39m  
2      40  [38;2;128;64;0m#804000[39m  
total length:1379

CTGACAGCTAGCTCAGTCCTAGGTATAATGCTAGCAAAAGAATTCAAAAGATCTAAAGAGGAGAAAGGATCTATGGTGAATGTGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATCAGACCGTTTCCCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGCGGAGCTGAATTACATTCCCAACCGCGTGGCACAACAACTGGCGGGCAAACAGTCGTTGCTGATTGGCGTTGCCACCTCCAGTCT

Components can be spliced out via bool selection like below.

In [7]:
y = x[x['locus_tag'].str.contains('LacI')]
y = y.splice()
print(y.__repr__())
print(y.get_colored())

<class 'plasmid.plasmid.Plasmid'> at 0x7f3fef2c1a10
molecule_type:DNA
topology:circular
data_file_division:   
date:05-DEC-2022
accessions:['<unknown', 'id>']
keywords:['']
source:
organism:. .
taxonomy:[]
comment:
ApEinfo:methylated:1
                                 locus_tag type     location  length    color
0  LacI, BBa_C0012 subsequence subsequence  CDS  [0:1092](+)    1092  [38;2;252;102;101m#fc6665[39m
total length:1092

[38;2;252;102;101mATGGTGAATGTGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATCAGACCGTTTCCCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGCGGAGCTGAATTACATTCCCAACCGCGTGGCACAACAACTGGCGGGCAAACAGTCGTTGCTGATTGGCGTTGCCACCTCCAGTCTGGCCCTGCACGCGCCGTCGCAAATTGTCGCGGCGATTAAATCTCGCGCCGATCAACTGGGTGCCAGCGTGGTGGTGTCGATGGTAGAACGAAGCGGCGTCGAAGCCTGTAAAGCGGCGGTGCACAATCTTCTCGCGCAACGCGTCAGTGGGCTGATCATTAACTATCCGCTGGATGACCAGGATGCCATTGCTGTGGAAGCTGCCTGCACTAATGTTCCGGCGTTATTTCTTGATGTCTCTGACCAGACACCCATCAACAGTATTATTTTCTCCCATGAAGACGGTACGCGACTGGGCGTGGAGCATCTGGTCGCATTGGGTCAC

DNA sequences can be translated to amino acid sequences via `.translate()`

In [8]:
y = x[x['locus_tag'].str.contains('LacI')].splice()
print(y.get_colored())
print(y.translate())

[38;2;252;102;101mATGGTGAATGTGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATCAGACCGTTTCCCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGCGGAGCTGAATTACATTCCCAACCGCGTGGCACAACAACTGGCGGGCAAACAGTCGTTGCTGATTGGCGTTGCCACCTCCAGTCTGGCCCTGCACGCGCCGTCGCAAATTGTCGCGGCGATTAAATCTCGCGCCGATCAACTGGGTGCCAGCGTGGTGGTGTCGATGGTAGAACGAAGCGGCGTCGAAGCCTGTAAAGCGGCGGTGCACAATCTTCTCGCGCAACGCGTCAGTGGGCTGATCATTAACTATCCGCTGGATGACCAGGATGCCATTGCTGTGGAAGCTGCCTGCACTAATGTTCCGGCGTTATTTCTTGATGTCTCTGACCAGACACCCATCAACAGTATTATTTTCTCCCATGAAGACGGTACGCGACTGGGCGTGGAGCATCTGGTCGCATTGGGTCACCAGCAAATCGCGCTGTTAGCGGGCCCATTAAGTTCTGTCTCGGCGCGTCTGCGTCTGGCTGGCTGGCATAAATATCTCACTCGCAATCAAATTCAGCCGATAGCGGAACGGGAAGGCGACTGGAGTGCCATGTCCGGTTTTCAACAAACCATGCAAATGCTGAATGAGGGCATCGTTCCCACTGCGATGCTGGTTGCCAACGATCAGATGGCGCTGGGCGCAATGCGCGCCATTACCGAGTCCGGGCTGCGCGTTGGTGCGGATATCTCGGTAGTGGGATACGACGATACCGAAGACAGCTCATGTTATATCCCGCCGTTAACCACCATCAAACAGGATTTTCGCCTGCTGGGGCAAACCAGCGTGGACCGCTTGCTGCAACTCTCTCAGGGCCAGGCGGTGAAGGGCAATCAGCTGTTGCCCGTTTCACTGGTG

### Editing genes and generating new constructs
New genes and constructs can be generated by combining genetic parts via concatenation.

In [9]:
# slice out the RFP gene
RFP = pge.read_genbank('../data/dcas9_RFP.gb')
RFP = RFP[RFP['locus_tag'].str.contains('mRFP')].splice()
print(RFP.get_colored())
# slice out the ribosome binding site
RBS = pge.read_genbank('../data/xRFP.gb')
RBS = RBS[RBS['locus_tag'].str.contains('BBa_B0034')].splice()
print(RBS.get_colored())
# slice out the promoter
pLac = pge.read_genbank('../data/xRFP.gb')
pLac = pLac[pLac['locus_tag'].str.contains('pLac')].splice()
print(pLac.get_colored())

# assemble the promoter, rbs, and mRFP
df = pLac + 'gagacc' + RBS + 'ggtctc' + RFP
print(df.__repr__())
print(df.get_colored())

reading  ../data/dcas9_RFP.gb  as genbank file
[38;2;128;0;64mATGGCGAGTAGCGAAGACGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACCCAGACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCCGCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAAACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGACGGTGGTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACCAACTTCCCGTCCGACGGTCCGGTTATGCAGAAAAAAACCATGGGTTGGGAAGCTTCCACCGAACGTATGTACCCGGAAGACGGTGCTCTGAAAGGTGAAATCAAAATGCGTCTGAAACTGAAAGACGGTGGTCACTACGACGCTGAAGTTAAAACCACCTACATGGCTAAAAAACCGGTTCAGCTGCCGGGTGCTTACAAAACCGACATCAAACTGGACATCACCTCCCACAACGAAGACTACACCATCGTTGAACAGTACGAACGTGCTGAAGGTCGTCACTCCACCGGTGCTTAA[39m
reading  ../data/xRFP.gb  as genbank file
[38;2;0;255;255mAAAGAGGAGAAA[39m
reading  ../data/xRFP.gb  as genbank file
[38;2;0;128;128mAATTGACAATGTGAGCGAGTAACAAGATACTGAGCACA[39m
<class 'plasmid.plasmid.Plasmid'> at 0x7f3f9575cd50
molecule_type:DNA
top

## Annotating features and writing genbank files
Annotation of new genes or features can be done with the `.annotate` function. The data can be written to genbank files via `.to_genbank`

In [10]:
help(df.annotate)

Help on method annotate in module plasmid.plasmid:

annotate(name, sequence, feature='unknown', color=None, circular=True, inplace=False) method of plasmid.plasmid.Plasmid instance
    Adds annotations to a plasmid using sequence information
    name = name of the gene
    sequence = DNA or amino acid sequence of the feature
    feature = type of genetic feature such as cds, mRNA, primer_bind
    color = [fwd_color, rev_color] to use
    circular = search as though sequence is a circular construct
    inplace = performs modifications inplace
    returns a modified plasmid dataframe



In [11]:
print(df.__repr__())
print(df.get_colored())

<class 'plasmid.plasmid.Plasmid'> at 0x7f12c0541d20
molecule_type:DNA
topology:circular
                             locus_tag      type     location  length    color
0  pLacO2, single operon pLac ZC082818  promoter    [0:38](+)      38  [38;2;0;128;128m#008080[39m
1         RBS 1.00 strength, BBa_B0034       RBS   [44:56](+)      12     [38;2;0;255;255mcyan[39m
2      mRFP, uniprot drFP583, pdb 2H5O       CDS  [62:740](+)     678  [38;2;128;0;64m#800040[39m
total length:740

[38;2;0;128;128mAATTGACAATGTGAGCGAGTAACAAGATACTGAGCACA[39mgagacc[38;2;0;255;255mAAAGAGGAGAAA[39mggtctc[38;2;128;0;64mATGGCGAGTAGCGAAGACGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACCCAGACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCCGCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAAACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGACGGTGGTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACCAACTTCCCGT

In [12]:
df = df.annotate(name='BsaI', sequence='GGTCTC', color=['red','orange'], feature='protein_bind')
df = df.drop_duplicates()
print(df.__repr__())
print(df.get_colored())
df.to_genbank('demo_RFP.gb')

<class 'plasmid.plasmid.Plasmid'> at 0x7f12c056bc10
molecule_type:DNA
topology:circular
                             locus_tag          type     location  length  \
0  pLacO2, single operon pLac ZC082818      promoter    [0:38](+)      38   
1                                 BsaI  protein_bind   [38:44](-)       6   
2         RBS 1.00 strength, BBa_B0034           RBS   [44:56](+)      12   
3                                 BsaI  protein_bind   [56:62](+)       6   
4      mRFP, uniprot drFP583, pdb 2H5O           CDS  [62:740](+)     678   

     color  
0  [38;2;0;128;128m#008080[39m  
1   [38;2;255;165;0morange[39m  
2     [38;2;0;255;255mcyan[39m  
3      [38;2;255;0;0mred[39m  
4  [38;2;128;0;64m#800040[39m  
total length:740

[38;2;0;128;128mAATTGACAATGTGAGCGAGTAACAAGATACTGAGCACA[39m[38;2;255;165;0mgagacc[39m[38;2;0;255;255mAAAGAGGAGAAA[39m[38;2;255;0;0mggtctc[39m[38;2;128;0;64mATGGCGAGTAGCGAAGACGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCG

In [13]:
with open('demo_RFP.gb','r') as f:
    text = f.read()
print(text)

LOCUS       .                        740 bp    DNA     circular UNK 01-JAN-1980
DEFINITION  .
ACCESSION   <unknown id>
VERSION     <unknown id>
KEYWORDS    .
SOURCE      .
  ORGANISM  .
            .
FEATURES             Location/Qualifiers
     promoter        1..38
                     /locus_tag="pLacO2, single operon pLac ZC082818"
                     /ApEinfo_label="pLacO2, single operon pLac ZC082818"
                     /ApEinfo_fwdcolor="#008080"
                     /ApEinfo_revcolor="#008080"
     protein_bind    complement(39..44)
                     /locus_tag="BsaI"
                     /ApEinfo_fwdcolor="red"
                     /ApEinfo_revcolor="orange"
                     /ApEinfo_label="BsaI"
     RBS             45..56
                     /locus_tag="RBS 1.00 strength, BBa_B0034"
                     /ApEinfo_label="RBS 1.00 strength, BBa_B0034"
                     /ApEinfo_fwdcolor="cyan"
                     /ApEinfo_revcolor="cyan"
     protein_bind    57..

Annotations can also search for amino acid sequences.

In [14]:
print(RFP.translate())

df = pLac + 'gagacc' + RBS + 'ggtctc' + RFP
df = df.annotate(name='peptide', sequence='DGALKGEIKMRLKLKDG', color='orange')
df = df.drop_duplicates()
print(df.get_colored())

MASSEDVIKEFMRFKVRMEGSVNGHEFEIEGEGEGRPYEGTQTAKLKVTKGGPLPFAWDILSPQFQYGSKAYVKHPADIPDYLKLSFPEGFKWERVMNFEDGGVVTVTQDSSLQDGEFIYKVKLRGTNFPSDGPVMQKKTMGWEASTERMYPEDGALKGEIKMRLKLKDGGHYDAEVKTTYMAKKPVQLPGAYKTDIKLDITSHNEDYTIVEQYERAEGRHSTGA*
[38;2;0;128;128mAATTGACAATGTGAGCGAGTAACAAGATACTGAGCACA[39mgagacc[38;2;0;255;255mAAAGAGGAGAAA[39mggtctc[38;2;128;0;64mATGGCGAGTAGCGAAGACGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCCGTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACCCAGACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCCGCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAAACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGACGGTGGTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACCAACTTCCCGTCCGACGGTCCGGTTATGCAGAAAAAAACCATGGGTTGGGAAGCTTCCACCGAACGTATGTACCCGGAA[39m[38;2;255;165;0mGACGGTGCTCTGAAAGGTGAAATCAAAATGCGTCTGAAACTGAAAGACGGT[39m[38;2;128;0;64mGGTCACTACGACGCTGAAGTTAAAACCACCTACATGGCTAAAAAACCGGTTCAGCTGCCGGGTGCTTACAAAACCGACATCAAACTGGACATCACCTC



## Extension PCR
This library provides functionality for designing primers for cloning the DNA constructs using the `Design` class.

In [11]:
pcr = pge.Designer()
help(pcr.xtPCR)

Help on method xtPCR in module plasmid.designer:

xtPCR(fL, seq, fR=None, padding=[2, 2], niter=3, w=[10, 100, 1, 1, 2], get_cost=False) method of plasmid.designer.Designer instance
    Find primers which can seed and extend a PCR fragment
    fL = flanking sequence on 5' end
    seq = sequence on 3' end which gets amplified
    fR = flanking sequence on 3' end
    padding = number of extra primers to try
    w = weights for cost function
    method = optimization method
    returns list of primers



The following shows to obtain extension PCR primers that will add promoter and rbs sequences to the RFP gene.

In [12]:
# slice out the RFP gene
RFP = pge.read_genbank('../data/dcas9_RFP.gb')
RFP = RFP[RFP['locus_tag'].str.contains('mRFP')].splice()
# slice out the ribosome binding site
RBS = pge.read_genbank('../data/xRFP.gb')
RBS = RBS[RBS['locus_tag'].str.contains('BBa_B0034')].splice()
# slice out the promoter
pLac = pge.read_genbank('../data/xRFP.gb')
pLac = pLac[pLac['locus_tag'].str.contains('pLac')].splice()

# assemble the promoter, rbs, and mRFP
df = pLac + 'gagacc' + RBS + 'ggtctc' + RFP

reading  ../data/dcas9_RFP.gb  as genbank file
reading  ../data/xRFP.gb  as genbank file
reading  ../data/xRFP.gb  as genbank file


In [13]:
pcr = pge.Designer()
pcr.params['xtPCR']['Tm'] = 55         # target annealing temperature for xtPCR
pcr.params['xtPCR']['len'] = [15, 60]  # defines the [min, max] primer lengths
pcr.params['verbose'] = False

insert = pLac + 'gagacc' + RBS + 'ggtctc'
res = pcr.xtPCR(insert, RFP, ' ')
print(res)
print(res.values)

running fwd
running rev
  locus_tag         Tm                                           sequence   
0       0_F  55.381908  AGATACTGAGCACAgagaccAAAGAGGAGAAAggtctc ATGGCGA...  \
1     fin_F  55.348127      AATTGACAATGTGAGCGAGTAACA AGATACTGAGCACAgagacc   
0     fin_R  56.777386                                 TTAAGCACCGGTGGAGTG   

                 annealed  strand  
0  ATGGCGAGTAGCGAAGACGTTA       1  
1    AGATACTGAGCACAgagacc       1  
0      TTAAGCACCGGTGGAGTG      -1  
[['0_F' 55.38190849297598
  'AGATACTGAGCACAgagaccAAAGAGGAGAAAggtctc ATGGCGAGTAGCGAAGACGTTA'
  'ATGGCGAGTAGCGAAGACGTTA' 1]
 ['fin_F' 55.34812743102748
  'AATTGACAATGTGAGCGAGTAACA AGATACTGAGCACAgagacc' 'AGATACTGAGCACAgagacc'
  1]
 ['fin_R' 56.777386483231 '  TTAAGCACCGGTGGAGTG' 'TTAAGCACCGGTGGAGTG' -1]]


## Gibson assembly
The following shows how to design primers for gibson assembly.

In [14]:
pcr = pge.Designer()
help(pcr.Gibson)

Help on method Gibson in module plasmid.designer:

Gibson(seqlist, w=[10, 1], method='differential_evolution', circular=True) method of plasmid.designer.Designer instance
    Design primers for gibson assembly
    seqlist = list of sequences to assemble via gibson in order 
    circular = assemble fragments into a circular construct
    returns list of primers



In [15]:
def get_parts():
    # slice out the LacI gene
    LacI = pge.read_genbank('../data/xRFP.gb')
    LacI = LacI[LacI['locus_tag'].str.contains('LacI')].splice()

    # slice out the RFP gene
    RFP = pge.read_genbank('../data/dcas9_RFP.gb')
    RFP = RFP[RFP['locus_tag'].str.contains('mRFP')].splice()

    # slice out the origin of replication
    df = pge.read_genbank('../data/xRFP.gb')
    vec = df[df['locus_tag'].str.contains('pSC101')]
    start = vec['start'][0]
    stop = vec['end'][0]
    vec = df[start:stop]
    return LacI, RFP, vec

In [16]:
LacI, RFP, vec = get_parts()
seq = []
seq+= [[' ',LacI,'AAAActttt']]
seq+= [[' ',RFP,'CGCCctttt']]
seq+= [[' ',vec,'GGGGctttt']]

pcr = pge.Designer()
pcr.params['gibson']['Tm'] = 50     # target annealing temperature of gibson fragments    
pcr.params['gibson']['window'] = 30 # +/i window in bp around frag edges to look for gibson overlap
pcr.params['gibson']['len'] = 20    # length of gibson overlap

pcr.params['xtPCR']['Tm'] = 55         # target annealing temperature for xtPCR
pcr.params['xtPCR']['len'] = [15, 60]  # defines the [min, max] primer lengths
pcr.params['xtPCR']['nM'] = [20, 500]  # defines the [seed, finisher] primer conc in nM
pcr.params['verbose'] = False

res = pcr.Gibson(seq)
print(res)

reading  ../data/xRFP.gb  as genbank file
reading  ../data/dcas9_RFP.gb  as genbank file
reading  ../data/xRFP.gb  as genbank file
res.x [10.95407762 17.41633213 26.61257437]
res.fun -57.0
exclude: []
overlaps: ['GCGGGCAGTAAAAAActttt', 'TTAACGCCctttt CTGTCA', 'tttt ATGGTGAATGTGAAA']
Tm overlap: [49.290031925644485, 48.14694596334914, 41.2522012369904]
processing primers for frag 0
running fwd
running rev
processing primers for frag 1
running fwd
running rev
processing primers for frag 2
running fwd
running rev
     locus_tag         Tm                                           sequence   
0  frag0_fin_F  55.851352                      tttt  ATGGTGAATGTGAAACCAGTAAC  \
1  frag0_fin_R  56.106442                        aaaagTTTT TTACTGCCCGCTTTCCA   
2  frag1_fin_F  55.335316            GCGGGCAGTAAAAAActttt  ATGGCGAGTAGCGAAGA   
3  frag1_fin_R  56.777386                TGACAG aaaagGGCG TTAAGCACCGGTGGAGTG   
4  frag2_fin_F  55.363272               TTAACGCCctttt  CTGTCAGACCAAGTTTACGAG   
5  f

## Golden gate assembly
The following shows how to design primers for golden gate assembly

In [17]:
pcr = pge.Designer()
help(pcr.GoldenGate)

Help on method GoldenGate in module plasmid.designer:

GoldenGate(seqlist, exclude=[], w=[0, 1], circular=True) method of plasmid.designer.Designer instance
    Design primers for goldengate assembly
    seqlist = list of sequences to assemble
    exclude = sites to exclude
    circular = assemble fragments into a circular construct
    returns list of primers



In [18]:
LacI, RFP, vec = get_parts()
seq = []
seq+= [['',LacI,'AAAActttt']]
seq+= [['',RFP,'CGCCctttt']]
seq+= [['',vec,'GGGGctttt']]

pcr = pge.Designer()
pcr.params['goldengate']['window'] = 20 # +/i window in bp around frag edges to look for overlap
pcr.params['goldengate']['ggN'] = 4     # length of golden gate overlap
pcr.params['goldengate']['ggsite'] = 'GGTCTCc'     # golden gate enzyme site
pcr.params['goldengate']['padding'] = 'atatatatgg' # padding around the golden gate site
pcr.params['xtPCR']['len'] = [15, 60]  # defines the [min, max] primer lengths
pcr.params['xtPCR']['nM'] = [20, 500] # defines the [seed, finisher] primer conc in nM
pcr.params['xtPCR']['Tm'] = 55 # defines the [seed, finisher] primer conc in nM

res = pcr.GoldenGate(seq)
print(res)

reading  ../data/xRFP.gb  as genbank file
reading  ../data/dcas9_RFP.gb  as genbank file
reading  ../data/xRFP.gb  as genbank file
res.x [27.21765366 15.28962375 12.57292265]
res.fun -12.0
exclude: []
overlaps: ['GTAG', 'cttt', 'GGGc']
Tm overlap: [-63.72743868625798, -70.43593665137047, -41.01531509599255]
processing primers for frag 0
running fwd


  df = fun(x) - f0


running rev
processing primers for frag 1
running fwd


  df = fun(x) - f0


running rev
processing primers for frag 2
running fwd


  df = fun(x) - f0


running rev
     locus_tag         Tm                                           sequence   
0  frag0_fin_F  55.851352  atatatatggGGTCTCcGGGctttt ATGGTGAATGTGAAACCAGTAAC  \
1  frag0_fin_R  56.106442  atatatatggGGTCTCcCTACTCGCCATaaaagTTTT TTACTGCC...   
2  frag1_fin_F  55.129431           atatatatggGGTCTCc GTAGCGAAGACGTTATCAAAGA   
3  frag1_fin_R  56.777386       atatatatggGGTCTCcaaagGGCG TTAAGCACCGGTGGAGTG   
4  frag2_fin_F  55.363272       atatatatggGGTCTCcctttt CTGTCAGACCAAGTTTACGAG   
5  frag2_fin_R  54.627626     atatatatggGGTCTCcgCCCC GTTACATTGTCGATCTGTTCATG   
6         seq0        NaN  atatatatggGGTCTCcGGGcttttATGGTGAATGTGAAACCAGTA...   
7         seq1        NaN  atatatatggGGTCTCcGTAGCGAAGACGTTATCAAAGAGTTCATG...   
8         seq2        NaN  atatatatggGGTCTCccttttCTGTCAGACCAAGTTTACGAGCTC...   

                  annealed  strand  
0  ATGGTGAATGTGAAACCAGTAAC     1.0  
1        TTACTGCCCGCTTTCCA    -1.0  
2   GTAGCGAAGACGTTATCAAAGA     1.0  
3       TTAAGCACCGGTGGAGTG    -1.0  
4 

  df = fun(x) - f0
