In [3]:
%env PYTHONX=-Xfrozen_modules=of
! pip install -r requirements.txt

env: PYTHONX=-Xfrozen_modules=of


In [4]:
import pyslim
import tskit
import msprime
import pandas as pd
import numpy as np
import io
import random
import matplotlib.pyplot as plt
from IPython.display import display,SVG

<!-- parameters -->
- sample_size: 50
- sequence_length: 100000
- mutation_rate: 3e-08
- recombination_rate: 3e-09
- N: 50000
- bottleneck_intensity : 1

In [None]:
random_seed = random.randint(1, 1000000)
output_pdf_path = f"output/output_bottleneck/bottleneck_txt/bottleneck_{bottleneck_intensity}.txt"
with open(output_pdf_path,"a") as file:
    file.write(f" the random seed used for this simulation is {random_seed} \n")

In [None]:
# Loading the sequence in the ts format
ts_load = tskit.load(f"output/output_bottleneck/bottleneck_trees/bottleneck_{bottleneck_intensity}.trees")

In [None]:
# Recapitating the data

rts = pyslim.recapitate(ts_load,
                        recombination_rate = recombination_rate,
                        ancestral_Ne = N,
                        random_seed =random_seed)
orig_max_roots = max(t.num_roots for t in ts_load.trees()) 
recap_max_roots = max(t.num_roots for t in rts.trees())
print(f"Maximum number of roots before recapitation: {orig_max_roots}\n"
      f"After recapitation: {recap_max_roots}")
with open(output_pdf_path,"a") as txt_file:
    txt_file.write(f"Maximum number of roots before recapitation: {orig_max_roots}\n"
                   f"After recapitation: {recap_max_roots} \n")



In [None]:
# Simplification of the data

rng = np.random.default_rng(seed=random_seed)
alive_inds = pyslim.individuals_alive_at(rts, 0)
keep_indivs = rng.choice(alive_inds, sample_size, replace=False)
keep_nodes = []
for i in keep_indivs:
  keep_nodes.extend(rts.individual(i).nodes)

sts = rts.simplify(keep_nodes, keep_input_roots=True)

with open(output_pdf_path,"a") as txt_file:
  txt_file.write((f"Before, there were {rts.num_samples} sample nodes (and {rts.num_individuals} individuals)\n"
                  f"in the tree sequence, and now there are {sts.num_samples} sample nodes\n"
                  f"(and {sts.num_individuals} individuals).\n"))

In [None]:
next_id = pyslim.next_slim_mutation_id(sts)
ts = msprime.sim_mutations(sts,
                           rate=mutation_rate,
                           model=msprime.SLiMMutationModel(type=0, next_id=next_id),
                           keep=True)

In [None]:
# Turning the tree sequences into the vcf file
nts = pyslim.generate_nucleotides(ts)
nts = pyslim.convert_alleles(nts)

vcf_file_path = f"output/output_bottleneck/vcf/bottleneck_{bottleneck_intensity}.vcf"
with open(vcf_file_path, "w") as vcf_file:
    nts.write_vcf(vcf_file)
fasta_file_path= f"output/output_bottleneck/fasta/bottleneck_{bottleneck_intensity}.fasta"
with open(fasta_file_path,"w") as fasta_file:
    nts.write_fasta(fasta_file)


In [None]:
# Computing the different statistics
ns = nts.segregating_sites(span_normalise = False)
k = nts.allele_frequency_spectrum(polarised = True,span_normalise=False,mode="site")/ns
k = k[2:]
plt.ioff()
plt.subplot(2,2,1)
plt.stairs(k)
plt.xlabel("The categories from 1 to 100")
plt.ylabel("Count")
plt.title(f"Allele frequency spectrum ")
with open(output_pdf_path,"a") as txt_file:
  txt_file.write((f" These are the population statistics related to this data : - \n"
                  f" The allele frequency numbers are :-  \n{k}\n"
                  f"1. The Tajima'D for the given sequence is {nts.Tajimas_D()}\n "))
plt.savefig(f"output/output_bottleneck/plots/allele_frequency_spectrum_{bottleneck_intensity}.jpg")


In [None]:
meta_file_statistics = "output/output_bottleneck/meta_data/meta_Stats.tab"

with open(meta_file_statistics, "a") as file:
    file.write(f"{fasta_file_path}\t{bottleneck_intensity}\t{nts.Tajimas_D()}\t{nts.diversity()}\t{ns}\n")


In [None]:
num_windows = 100
k = nts.allele_frequency_spectrum(polarised = True,
                                  windows=np.linspace(0, ts.sequence_length, num_windows + 1),
                                  span_normalise=True,
                                  mode="site") 

file_path = f"output/output_bottleneck/arrays/array_data_{bottleneck_intensity}.txt"

np.savetxt(file_path, k)