Design MERFISH probes using the example inputs from Jeff Moffitt. The original MATLAB design pipeline can be found at https://github.com/ZhuangLab/MERFISH_analysis.

In [None]:
# Download the input data
# This is for the UNIX-like operating systems. If you are using Windows, just download the files accordingly.
!mkdir temporary_data
!wget http://zhuang.harvard.edu/merfish/MERFISHData/MERFISH_Examples2.zip -O temporary_data/MERFISH_Examples2.zip
!unzip temporary_data/MERFISH_Examples2.zip -d temporary_data

In [None]:
# Load the codebook
import MERFISH_probe_design.IO.file_io as fio
codebook_file = 'temporary_data/MERFISH_Examples2/codebook.csv'
cb_version, cb_name, bit_names, barcode_table = fio.load_merlin_codebook(codebook_file)

In [None]:
barcode_table

In [None]:
# Load the transcriptome
transcripts_fasta_file = 'temporary_data/MERFISH_Examples2/transcripts.fasta'
fpkm_tracking_file = 'temporary_data/MERFISH_Examples2/isoforms.fpkm_tracking'

transcriptome = fio.load_transcriptome(transcripts_fasta_file, fpkm_tracking_file)

In [None]:
# Make sure that the transcriptome data frame has the standard column names!
# transcript_id, sequence, gene_id, gene_short_name, FPKM
transcriptome

In [None]:
# Initialize the probe dictionary
import MERFISH_probe_design.probe_design.probe_dict as pd
gene_ids = list(barcode_table['name'][barcode_table['id'] != '']) # Get the non-blank gene names
probe_dict = pd.init_probe_dict(gene_ids, transcriptome, 'gene_short_name', 30)
pd.print_probe_dict(probe_dict)

In [None]:
# Select the transcripts that we want to target
transcript_ids = set(barcode_table['id'][barcode_table['id'] != '']) # Get the non-blank transcript ids
probe_dict = pd.select_transcripts_by_ids(probe_dict, transcript_ids)
pd.print_probe_dict(probe_dict)

In [None]:
probe_dict['VPS13D']['ENST00000613099.4']

In [None]:
# Design/Filter target regions

In [None]:
# Load the readout sequences
import MERFISH_probe_design.probe_design.readout_sequences as rs
readout_fasta_file = 'temporary_data/MERFISH_Examples2/readouts.fasta'
readout_seqs = fio.load_fasta_into_df(readout_fasta_file)
rs.append_on_bit_ids_to_readout_sequences(readout_seqs, bit_names)
readout_seqs

In [None]:
# Add the readout sequences
rs.add_readout_seqs_to_probes_random(probe_dict, readout_seqs, barcode_table, 3, n_threads=24)

In [None]:
probe_dict['VPS13D']['ENST00000613099.4']

In [None]:
# Add primers

In [None]:
# Blast
# Make a blastdb from the human transcriptome
# Make a blastdb from the designed oligos
# Blast and filter designs

In [None]:
# Quality check