## **Stochastic Simulation Algorithm (SSA)**

**Stochastic Kinetics of mRNA Molecules in a General Transcription Model**

*Yuntao Lu and Yunxin Zhang*

School of Mathematical Sciences, Fudan University, Shanghai 200433, China

Email: `yuntaolu22@m.fudan.edu.cn` and `xyz@fudan.edu.cn`

This script is written to perform SSA of the transcription model described in our manuscript. 

This script is based on the package `gillespy2`. For detailed description, see
`S. Matthew et al., GillesPy2: A Biochemical Modeling Framework for Simulation Driven Biological Discovery, Lett. Biomath. 10, 1 (2023).`

The script requires only the parameter matrices $D_0$ and $D_1$ (provided as `np.ndarray`), after which SSA runs automatically.


### **Visualization Methods**  
1. **Sample Paths**:  
   Plots complete trajectories generated by SSA to assess whether the system reaches steady state within the simulated time frame, while estimating the range of mRNA copy number variability.  

2. **Distribution Histogram**:  
   Generates a histogram of the mRNA copy number distribution by aggregating final-time-point values from multiple simulated trajectories.


In [1]:
import gillespy2
import numpy as np
import time

In [2]:
import Parameters_for_Figures

In [3]:
# ===================================================================
# Specify parameters in the model here.
# Input:
# D0 (np.ndarray): Transition rate matrix for non-transcription events
# D1 (np.ndarray): Transition rate matrix for transcription events
# ===================================================================

In [4]:
D0=Parameters_for_Figures.D0_3a
D1=Parameters_for_Figures.D1_3a

In [5]:
# D0=Parameters_for_Figures.D0_3b
# D1=Parameters_for_Figures.D1_3b

In [6]:
# D0=Parameters_for_Figures.D0_3c
# D1=Parameters_for_Figures.D1_3c

In [7]:
# D0=Parameters_for_Figures.D0_3d
# D1=Parameters_for_Figures.D1_3d

In [8]:
d=1

In [9]:
class Transcription(gillespy2.Model):
    def __init__(self, parameter_values=None):
        # Initialize the model
        gillespy2.Model.__init__(self, name="Transcription")

        N=D0.shape[0]
        
        # Parameters established
        params = []
        for i in range(N):
            for j in range(N):
                if str(float(D1[i, j]))==0:
                    pass
                param_name = f'p{i+1}_{j+1}'
                param_value = str(float(D1[i, j]))
                params.append(gillespy2.Parameter(name=param_name, expression=param_value))
        for i in range(N):
            for j in range(N):
                if i == j:
                    continue
                if str(float(D0[i, j]))==0:
                    pass
                param_name = f't{i+1}_{j+1}'
                param_value = str(float(D0[i, j])) 
                params.append(gillespy2.Parameter(name=param_name, expression=param_value))
        params.append(gillespy2.Parameter(name='d', expression=1))
        self.add_parameter(params)

        # Species established
        mRNA = gillespy2.Species(name='mRNA', initial_value=0)
        state1 = gillespy2.Species(name='state1', initial_value=1)
        state=[]
        for i in range(2,N+1):
            state_name=f"state{i}"
            state.append(gillespy2.Species(name=state_name, initial_value=0))
        state.append(state1)
        state.append(mRNA)
        self.add_species(state)
        state_only=[]
        for i in range(1,N+1):
            state_only.append(f"state{i}")

        # Reaction Objects Built
        reactions = []
        for i in range(N):
            for j in range(N):
                rxn_name = f"Pr{i+1}_{j+1}"
                rate_param = f'p{i+1}_{j+1}'
                reactions.append(gillespy2.Reaction(
                    name=rxn_name,
                    reactants={state_only[i]: 1},
                    products={state_only[j]: 1, mRNA: 1},
                    rate=rate_param
                ))
        for i in range(N):
            for j in range(N):
                if i==j:
                    continue
                rxn_name = f"Tr{i+1}_{j+1}"
                rate_param = f't{i+1}_{j+1}'
                reactions.append(gillespy2.Reaction(
                    name=rxn_name,
                    reactants={state_only[i]: 1},
                    products={state_only[j]: 1},
                    rate=rate_param
                ))
        reactions.append(gillespy2.Reaction(
            name="dec",
            reactants={mRNA: 1},
            products={},
            rate='d'
        ))
        self.add_reaction(reactions)
        
        # TimeSpan specifies the time points at which to keep data from a simulation
        self.timespan(np.linspace(0, 50, 2))

In [10]:
# This is original SSA using Python solver NumPySSASolver from gillespy2.
# model= gillespy2.NumPySSASolver(model=Transcription())
# Except for timing the running time in Figure 4, C++ Solver is recommended because it is much faster.
model= gillespy2.SSACSolver(model=Transcription())

# This is Tau-Leaping simulation algorithm, an accelerated version of original SSA.
# We use the Python solver TauLeapingSolver from gillespy2.
# model= gillespy2.TauLeapingSolver(model=MAP())

# Start timing the programme
start_time = time.time()

trajectories=100000

results = model.run(number_of_trajectories=trajectories)

# Timing ends here
end_time = time.time()
timing = end_time - start_time

print(f"Program execution time for SSA: {timing:.4f} seconds.")

Program execution time for SSA: 23.7329 seconds.


In [None]:
# # Assume results is a list of dictionaries, each containing 'time' and 'mRNA' keys
# # Extract time data (assuming all results have identical time points, so we only need to extract once)
# time_data = results[0]['time']

# # Extract all mRNA data
# mRNA_data = []
# for i in range(10):
#     mRNA_data.append(results[i]['mRNA'])

# # Convert mRNA_data to numpy array
# mRNA_data = np.array(mRNA_data)

# # Store time_data and mRNA_data in a dictionary
# data_to_save = {
#     'time': time_data,
#     'mRNA': mRNA_data
# }

# # Save to .npy file
# np.save('sample_paths_SSA.npy', data_to_save)

In [11]:
# Recording mRNA copy number for each trajectory. 
# The list mRNA_number will be used to plot the histogram of the mRNA copy number distribution
mRNA_number = []
for item in results:
    number = int(item['mRNA'][-1])
    mRNA_number.append(number)
# np.save('fig3_SSA_a.npy', mRNA_number)
# np.save('fig3_SSA_b.npy', mRNA_number)
# np.save('fig3_SSA_c.npy', mRNA_number)
# np.save('fig3_SSA_d.npy', mRNA_number)

In [7]:
print(type(results))

<class 'gillespy2.core.results.Results'>


`deter(D0,D1,t)` returns the (continuous) number of mRNA molecules at time `t` obtained using deterministic description: **Reaction Rate Equation**.

In this model, solution to the Reaction Rate Equation is exactly the expectation of mRNA copy number in stochastic model.

In [20]:
# N0 is the initial number of mRNA molecules in the system.
N0=0

In [21]:
def deter(D0,D1,t):
    # ===================================================================
    # Specify parameters in the model here.
    # Input:
    # D0 (np.ndarray): Transition rate matrix for non-transcription events
    # D1 (np.ndarray): Transition rate matrix for transcription events
    # ===================================================================
    D = D0 + D1
    
    DT = np.copy(D.T)
    DT[0, :] = 1
    b = [1]
    b = np.append(b, np.zeros(D0.shape[0] - 1))

    # pi is the stationary distribution of the underlying Markov chain
    pi = np.linalg.solve(DT, b)
    pi = pi.reshape(1, D0.shape[0])
    # e is a vector of all ones
    e=np.ones(D0.shape[0])
    e=e.reshape(D0.shape[0],1)

    # B_1 is the first order binomial moment, and is exactly the MEAN of mRNA copy number.
    B1= pi @ D1 @ e/d
    
    return B1*(1-np.exp(-d*t))+N0*np.exp(-d*t)

In [22]:
ode=[]
for t in results[0]['time']:
    ode.append(deter(D0,D1,t)[0,0])

In [23]:
# Save the results in a `.npy` file
# np.save('sample_paths_ode.npy', ode)

## **Visualization 1: Complete Sample Paths**

In [14]:
import matplotlib.pyplot as plt

In [19]:
plt.figure(figsize=(18, 11), dpi=100)

# The following list contains 20 different colors
colors = [
    '#A52A2A','#653700','#0343DF','#ADD8E6','#7BC8F6','#9ACD32',
    '#FFA500','#800080','#FFC0CB','#D2B48C','#006400','#8B4513','#00FFFF',
    '#FF6347','#BA55D3','#A9A9A9','#4682B4','#3CB371','#FFD700','#8B0000'
]

# Specify the number of sample paths to plot. 
# Num should a nonnegative integer smaller than 20. Note that (Num+1) sample paths will be plotted in total.
Num=9

for i in range(Num):
    plt.plot(results[0]['time'], results[i]['mRNA'], color=colors[i], linestyle="-",
             linewidth=.5)
plt.plot(results[0]['time'], results[-1]['mRNA'], color="#C79FEF", linestyle="-",
         linewidth=.5, label='Sample Path of mRNA Molecule Number')
plt.plot(results[0]['time'],ode,linestyle="--",label='Deterministic Description',linewidth=6,c='#13EAC9')

# for i in range(10):
#     # plt.plot(results[0]['time'], results[i]['state1'], c='blue', linewidth=.5)
#     # plt.plot(results[0]['time'], results[i]['state2'], c='orange', linewidth=.5)
#     # plt.plot(results[0]['time'], results[i]['state3'], c='green', linewidth=.5)
#     # plt.plot(results[0]['time'], results[i]['state4'], c='red', linewidth=.5)
#     # plt.plot(results[0]['time'], results[i]['state5'], c='green', linewidth=.5)
#     plt.plot(results[0]['time'], results[i]['mRNA'], c='red', linewidth=.5)
# # plt.plot(results[0]['time'], results[-1]['state1'], c='blue', linewidth=.5, label='Number of Gene in State1',alpha=0.5)
# # plt.plot(results[0]['time'], results[-1]['state2'], c='orange', linewidth=.5, label='Number of Gene in State2',alpha=0.5)
# # plt.plot(results[0]['time'], results[-1]['state3'], c='green', linewidth=.5, label='Number of Gene in State3',alpha=0.5)
# # plt.plot(results[0]['time'], results[-1]['state4'], c='red', linewidth=.5, label='Number of Gene in State4',alpha=0.5)
# # plt.plot(results[0]['time'], results[-1]['state5'], c='green', linewidth=.5, label='Number of Gene in State5',alpha=0.5)
# plt.plot(results[0]['time'], results[-1]['mRNA'], color="#030764", marker=10, linestyle="-",
#          markerfacecolor='#008080',linewidth=4,markersize=30, label='Number of mRNA Molecules')


plt.title("Sample Paths of Stochastic Simulation",
          # fontname='Times New Roman',
          fontname='DejaVu Sans',
          fontsize=35,fontweight='bold',color='green')
plt.xlabel('Time', 
           # fontname='Times New Roman',
           fontname='DejaVu Sans',
           fontsize=35,fontweight='bold',color='green')
plt.ylabel('Number of mRNA Molecules', 
           # fontname='Times New Roman',
           fontname='DejaVu Sans',
           fontsize=35,fontweight='bold',color='green')
plt.xticks(
    ticks=list(range(0,41,10)),
               fontsize=40,
               color='blue',
               # fontname='Times New Roman',
               fontname='DejaVu Sans',
               ha='right')
plt.yticks(
    ticks=np.arange(0, 121, 20),
               fontsize=40,
               color='blue',
               rotation=90,
               # fontname='Times New Roman',
               fontname='DejaVu Sans',
               ha='right')
legend=plt.legend(loc='upper right', frameon=True, shadow=True, markerscale=1,
           fancybox=True,prop={'family': 'DejaVu Sans','size': 30})
legend.get_frame().set_color('#E6E6FA')
legend.get_frame().set_alpha(0.8)
plt.savefig('SSAtraj7223.pdf', bbox_inches='tight',format='pdf',dpi=800)

# The generated plot can be considerably large. Avoid displaying it directly in this Notebook if possible.
# plt.show()
plt.close()

## **Visualization 2: Histogram of the mRNA Copy Number Distribution**

In [21]:
plt.figure(figsize=(18, 18),dpi=300)
plt.rcParams['axes.linewidth'] = 3

# plt.xlim(0, 40)
# plt.ylim(0, 0.1)
plt.xticks(
    # ticks=list(range(0,45,5)),
           fontsize=20,
           color='blue',
           # fontname='Times New Roman',
           fontname='DejaVu Sans',
           ha='right')
plt.yticks(
    # ticks=np.arange(0.02, 0.12, 0.02),
           fontsize=20,
           color='blue',
           rotation=90,
           # fontname='Times New Roman',
           fontname='DejaVu Sans',
           ha='right')
plt.title(f'Distribution of mRNA Copy Number \nvia Stochastic Simulation Algorithm',
          # fontname='Times New Roman',
          fontname='DejaVu Sans',
          fontsize=35,fontweight='bold',color='green')
plt.xlabel('Number of mRNA Molecules',fontsize=45,fontweight='bold',
           labelpad=10,
           fontname='DejaVu Sans',
           # fontname='Times New Roman',
           color='green')
plt.ylabel('Probability',fontsize=45,fontweight='bold',
           labelpad=10,
           # fontname='Times New Roman',
           fontname='DejaVu Sans',
           color='green',rotation=90)
plt.tick_params(direction='out',width=3,length=10)
# plt.axhline(0, color='blue', linewidth=2)
# plt.axhline(0, color='blue', linewidth=2)

max_number=max(mRNA_number)
min_number=min(mRNA_number)

plt.hist(mRNA_number,
         bins=np.arange(min_number-0.5, max_number+0.5, 1),
         rwidth=0.8,
         # bins=np.arange(-0.5, 41.5, 1),
         density=True, color='#228B22', edgecolor='green', alpha=0.3,
             linewidth=2,align='mid',label='Stochastic Simulation')
plt.legend(loc='upper right', frameon=True, shadow=True, markerscale=2,
           fancybox=True,prop={'family': 
                               # 'Times New Roman',
                               'DejaVu Sans',
                               'size': 20})
# print("Saving figure...")
# plt.savefig('Example2.pdf', format='pdf',edgecolor='black', dpi=600)
# plt.show()
plt.close()