# Analyzing Random Access Guide Sequences

NOTE: This notebook is meant to be run in the Nucleaseq GitHub repository presented in Jones et al. 2021

https://www.nature.com/articles/s41587-020-0646-5

For simplicity, this code will work by just pasting it into the `nucleaseq/notebooks/CRISPR Model Calculator Tutorial.ipynb` notebook.

All scores are .0002 except pairs 11-7 (.000458), 13-10 (0.003492), and 24-22 (0.000632)

In [None]:
import seaborn as sns
import pandas as pd

# The list of random access guides given in the main text
guides = [
    'CTCGCAGAGGTGGCGTCGACCAAGG',
    'CTCGCAGAGGTGGCGCGATTGTTGG',
    'CTCGCAGAGGTGGCGATAGCATAGG',
    'CTCGCAGAGGTGGCGTCGCCTTTGG',
    'CTCGCAGAGGTGGCGTGCCAACAGG',
    'CTCGCAGAGGTGGCGATCGCTCTGG',
    'CTCGCAGAGGTGGCGAGAACCGAGG',
    'CTCGCAGAGGTGGCGGACGAACTGG',
    'CTCGCAGAGGTGGCGCACAGAGAGG',
    'CTCGCAGAGGTGGCGCATTAAATGG',
    'CTCGCAGAGGTGGCGAGAATCTAGG',
    'CTCGCAGAGGTGGCGCTAGCTGTGG',
    'CTCGCAGAGGTGGCGCTTTACAAGG',
    'CTCGCAGAGGTGGCGAAGTGCTTGG',
    'CTCGCAGAGGTGGCGGGTTCCTAGG',
    'CTCGCAGAGGTGGCGTTTCCAGTGG',
    'CTCGCAGAGGTGGCGTGGGATGAGG',
    'CTCGCAGAGGTGGCGTAAACGCTGG',
    'CTCGCAGAGGTGGCGGATGAGAAGG',
    'CTCGCAGAGGTGGCGTCCTCAGTGG',
    'CTCGCAGAGGTGGCGGCCACATAGG',
    'CTCGCAGAGGTGGCGCCCATCCTGG',
    'CTCGCAGAGGTGGCGCCTGGTTAGG',
    'CTCGCAGAGGTGGCGTCCATCGTGG',
    'CTCGCAGAGGTGGCGGACCTTAAGG',
]

def get_seq_pam(seq):
    """
    Given a sequence,
    Returns one string that is the 20nt sequence and 
    one string that is the last three nts of the given 
    seq, which is the PAM sequence
    """
    return seq[2:-3], seq[-3:]

def get_activation_score(seq1, seq2):
    """
    Given two sequences,
    Returns a score from 0 to 1 (1 being perfect match, 0 meaning least activation).
    """
    s1, pam1 = get_seq_pam(seq1)
    s2, pam2 = get_seq_pam(seq2)
    return 10**log10_crispr_specificity('HF1', pam1, s1, s2)

data = []
for s1 in guides:
    s1vs2 = []
    for s2 in guides:
        s1vs2.append(get_activation_score(s1, s2))
    data.append(s1vs2)
df = pd.DataFrame(data)

mask = np.zeros_like(df)
mask[np.triu_indices_from(mask)] = True
f, ax = plt.subplots(figsize=(7, 7))
ax = plt.axes() 
sns.heatmap(df, mask=mask, vmin = 0, vmax=0.002,
                 xticklabels=[i + 1 for i in range(len(guides))], # to start count at 1
                 yticklabels=[i + 1 for i in range(len(guides))],
                 annot=False, linewidths=.1, cmap='YlGnBu') #cmap='flare' also good

plt.title('Guide on Guide Activations with HiFi Cas9')
plt.xlabel('Guide')
plt.ylabel('Guide')