# Signal is proportional to fraction folded

**Note: first run of this cell takes a while**

In [3]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../../src')
from util import *
from config import * 
import generate_dataset
from study_gen import study
import plots
import ipynbname

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



## ∆T –  Does the Tm change with reaction Temp?

### Change in Temp (w/o prediction)
- For each A/C base in ROI, plot mutation fraction vs. T
    - should be linear for some portion, then level off when hydrolysis is faster than DMS reaction


In [1]:
print('Temperature_samples: ', temperature_samples)
# Edit the construct name below to change the plot
construct = '3042-O-flank_1=hp1-DB'
##################################################

plot = plots.change_in_temp_mut_frac_vs_temperature(study, temperature_samples, construct)
save_plotly_fig(ipynbname.path(), '[A] Change in temperature (w o prediction)/'+construct, plot['fig'])
plot['fig'].show()


NameError: name 'temperature_samples' is not defined

### Mutation rate across the family vs pred ∆G (at 37 °C)
- Curve with Predicted ∆G
for each base in ROI, plot mutation fraction vs. pred ∆G (at 37 °C)
- fit each curve to ∆∆Goffset 


In [10]:
print('Samples with t=37c:', samples:=study.df[study.df['temperature_k']==310]['sample'].unique())
## Edit the parameters below to change the sample and family
sample = samples[0]
family = 'hp1'
#####################

plot = plots.mut_rate_across_family_vs_deltaG(study, sample, family)
save_plotly_fig(ipynbname.path(), '[B] Change in temperature (w prediction at 37)/{}/{}'.format(sample, family), plot['fig'])
plot['fig'].show()

Samples with t=37c: ['05_1_S24_reads' '2_1_s28new_reads' '1_1_S26new_reads'
 '10_1_s32new_reads' '01_02_S23_reads' 'lauren470_S1' '5_1_S30new_reads'
 'lauren473_S4' '2_2_S29new_reads' 'lauren471_S2' '5_2_S31_reads'
 '10_2_s33new_reads' '1_2_S27new_reads' 'lauren472_S3'
 'Lauren_603_10min_S9_L001' '01_1_S22_reads' '05_2_S25_reads']


### Change in Temp (w/ prediction at T) 
- For each base in ROI at each temp, plot mutation fraction vs. pred ∆G (at that temp) 
    - fit each curve to ∆∆Goffset
    - simple model—the offset should be the same if the algorithm is just off by some amount??


In [None]:
savefig2(ipynbname.path(), '[C] Change in temperature (w prediction at T)')

##   ∆t – is the signal in the linear regime at the working reaction time? 
### Change in reaction time
- For each construct at each position, plot Mutation fraction vs. reaction time
    - scatter plot
    - fit linear region (for now that might just be to the first 3 points 1-5 min)--goodness of fit??




In [None]:
print('Samples with change in reaction time:', reaction_time_samples:=study.df[study.df['reaction_time']!=0]['sample'].unique())
# Edit the construct name below to change the plot
construct = '3042-O-flank_1=hp1-DB'
##################################################

plot = plots.change_in_reaction_time(study, samples=reaction_time_samples, construct=construct)
save_plotly_fig(ipynbname.path(), '[D] Change in reaction time/'+construct, plot['fig'])
plot['fig'].show()


## ∆[DMS] -  is the signal in the linear regime at the working concentration?
### Change in [DMS]
- For each construct at each position, plot Mutation fraction vs. [DMS]
    - fit linear region (for now that might just be to the first 3 points 0.5-1% DMS)--goodness of fit??


In [None]:
print('Samples with DMS concentration:', dms_concentration_samples:=study.df[study.df['dms_concentration_mM']>0]['sample'].unique())
# Edit the construct name below to change the plot
construct = '3042-O-flank_1=hp1-DB'
##################################################

plot = plots.change_in_dms_conc(study, samples=dms_concentration_samples, construct=construct)
save_plotly_fig(ipynbname.path(), '[E] Change in [DMS]/'+construct, plot['fig'])
plot['fig'].show()
        

In [None]:
from scipy.optimize import curve_fit
import plotly.graph_objects as go

def sigmoid(x, a, b, c):
    RT = 1.987204258*310/1000
    return a / (1 + b*np.exp(-x/RT)) + c


# Generate fake data with noise
a = 0.04
c = 0
b = 1e-5
xdata = np.linspace(-20, 0, 4)
ydata = sigmoid(xdata, a, b, c ) + 0.005 * np.random.normal(size=len(xdata))

# Fit the data
popt, pcov = curve_fit(sigmoid, xdata, ydata, p0=[0.04, 0.02, 0.00], bounds=([0, 0, 0], [0.1, np.inf, 0.05]), max_nfev=1000)

# Print optimal parameters and covariance matrix
print('a =', popt[0])
print('b =', popt[1])
print('c =', popt[2])
print('covariance matrix:')
print(pcov)

# Do a Monte Carlo simulation to estimate the uncertainty in the fit parameters using a multinormal distribution
# with the covariance matrix as the covariance matrix
N = 1000
param_samples = np.clip(np.random.multivariate_normal(popt, pcov, N).T.reshape(3, N, 1), 0, np.inf)

# Compute the sigmoid for each set of parameters for each x value
xdata_MC = np.linspace(-20, 0, 1000)
y_fit = sigmoid(xdata_MC, *popt)

y_MC = sigmoid(xdata_MC.reshape(1, -1) , param_samples[0], param_samples[1], param_samples[2])

# Plot the raw data, the fit, and the confidance interval from the Monte Carlo simulation as an error plot
go.Figure(data=[
    go.Scatter(x=xdata, y=ydata, mode='markers', name='data'),
    go.Scatter(x=xdata_MC, y=y_fit, mode='lines', name='fit'),

    go.Scatter(
        x=np.concatenate((xdata_MC, xdata_MC[::-1])), # x, then x reversed
        y=np.concatenate((np.percentile(y_MC, 97.5, axis=0), np.percentile(y_MC, 2.5, axis=0)[::-1])), # upper, then lower reversed
        fill='toself',
        fillcolor='rgba(0,100,80,0.2)',
        line=dict(color='rgba(255,255,255,0)'),
        hoverinfo="skip",
        showlegend=False
    )
])