In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.5-py2.py3-none-any.whl (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.1/88.1 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting appdirs>=1.4.0 (from pycuda)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  

In [2]:
!pip install  bio

Collecting bio
  Downloading bio-1.7.1-py3-none-any.whl (280 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.0/281.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting biopython>=1.80 (from bio)
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gprofiler-official (from bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl (9.3 kB)
Collecting mygene (from bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl (5.4 kB)
Collecting biothings-client>=0.2.6 (from mygene->bio)
  Downloading biothings_client-0.3.1-py2.py3-none-any.whl (29 kB)
Installing collected packages: biopython, gprofiler-official, biothings-client, mygene, bio
Successfully installed bio-1.7.1 biopython-1.83 biothings-client-0.3.1 gprofiler-official-1.0.0 mygene-3.2.2


In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import gc
import psutil
import os
import argparse
import numpy as np
import matplotlib.pyplot as plt
import time

import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
from Bio import SeqIO
import matplotlib

# Cambiar el backend a Agg
matplotlib.use('Agg')

def read_fasta(file_path):
    """Lee una o más secuencias de un archivo FASTA y las concatena"""
    sequences = []
    with open(file_path, "r") as file:
        for seq_record in SeqIO.parse(file, "fasta"):
            sequences.append(str(seq_record.seq))
    return ''.join(sequences)

def estimate_remaining_time(start_time, current_step, total_steps):
    """Estima el tiempo restante para completar la ejecución"""
    elapsed_time = time.time() - start_time
    if elapsed_time < 1e-6:
        return float('inf')
    steps_per_second = current_step / elapsed_time
    remaining_steps = total_steps - current_step
    if steps_per_second <= 0:
        return float('inf')
    estimated_remaining_time = remaining_steps / steps_per_second
    return estimated_remaining_time

def monitor_memory(used_bytes, max_system_percentage):
    """Monitoriza el uso de memoria y devuelve True si se excede el umbral"""
    process = psutil.Process(os.getpid())
    memory_info = process.memory_info()
    return memory_info.rss > used_bytes or psutil.virtual_memory().percent > max_system_percentage

def create_dotplot_gpu(seq1, seq2, output_dir, window_size=500, step_size=100, memory_limit=5*1024**3, max_system_memory=85):
    """Crea y dibuja un dotplot por partes usando una ventana deslizante en la GPU"""
    len1, len2 = len(seq1), len(seq2)
    total_steps = ((len1 - window_size) // step_size + 1) * ((len2 - window_size) // step_size + 1)
    current_step = 0
    start_time = time.time()
    last_save_time = start_time
    save_interval = 10 * 60  # 60 minutos en segundos (1 hora)
    image_counter = 0

    os.makedirs(output_dir, exist_ok=True)
    elapsed_times = []

    # Kernel de CUDA para crear el dotplot
    kernel_code = """
    __global__ void dotplot_kernel(unsigned char* seq1, unsigned char* seq2, int window_size, int* result) {
        int idx = threadIdx.x + blockIdx.x * blockDim.x;
        int idy = threadIdx.y + blockIdx.y * blockDim.y;

        if (idx < window_size && idy < window_size) {
            result[idx * window_size + idy] = (seq1[idx] == seq2[idy]);
        }
    }
    """
    mod = SourceModule(kernel_code)
    dotplot_kernel = mod.get_function("dotplot_kernel")

    for i in range(0, len1 - window_size + 1, step_size):
        for j in range(0, len2 - window_size + 1, step_size):

            if monitor_memory(memory_limit, max_system_memory):
                print("Uso de memoria cerca del límite. Liberando memoria.")
                gc.collect()
                continue

            step_start_time = time.time()

            # Convertir las secuencias a arrays de bytes (más compacto que arrays de caracteres)
            window1 = np.frombuffer(seq1[i:i+window_size].encode('utf-8'), dtype=np.uint8)
            window2 = np.frombuffer(seq2[j:j+window_size].encode('utf-8'), dtype=np.uint8)

            # Asignar memoria en la GPU
            seq1_gpu = cuda.mem_alloc(window1.nbytes)
            seq2_gpu = cuda.mem_alloc(window2.nbytes)
            result_gpu = cuda.mem_alloc(window_size * window_size * np.int32(0).nbytes)

            # Copiar datos a la GPU
            cuda.memcpy_htod(seq1_gpu, window1)
            cuda.memcpy_htod(seq2_gpu, window2)

            # Definir el tamaño de los bloques y la grilla
            block_size = (32, 32, 1)
            grid_size = (window_size // 32 + 1, window_size // 32 + 1, 1)

            # Ejecutar el kernel
            dotplot_kernel(seq1_gpu, seq2_gpu, np.int32(window_size), result_gpu, block=block_size, grid=grid_size)

            # Copiar el resultado de vuelta a la CPU
            result = np.empty((window_size, window_size), dtype=np.int32)
            cuda.memcpy_dtoh(result, result_gpu)

            fig, ax = plt.subplots(figsize=(10, 10))
            ax.imshow(result, cmap='gray_r', extent=(j, j+window_size, i, i+window_size), origin='lower')

            step_elapsed_time = time.time() - step_start_time
            elapsed_times.append(step_elapsed_time)

            current_step += 1
            if current_step % 2000 == 0:
                elapsed_time = time.time() - start_time
                remaining_time = estimate_remaining_time(start_time, current_step, total_steps)
                print(f"Progreso: {current_step}/{total_steps} pasos completados. Tiempo transcurrido: {elapsed_time:.2f} segundos. Tiempo restante estimado: {remaining_time:.2f} segundos")

                current_time = time.time()
                if current_time - last_save_time >= save_interval:
                    image_counter += 1
                    image_path = os.path.join(output_dir, f'dotplot_{image_counter}.png')
                    plt.savefig(image_path)
                    print(f"Imagen intermedia guardada en {image_path}")
                    last_save_time = current_time

                gc.collect()

            plt.close(fig)  # Cierra la figura para liberar memoria

    final_image_path = os.path.join(output_dir, 'dotplot_final.png')
    plt.savefig(final_image_path)
    print(f"Dotplot final guardado en {final_image_path}")
    gc.collect()

def main(seq1_path, seq2_path, memory_limit, max_system_memory):
    overall_start_time = time.time()

    print("Leyendo secuencias...")
    read_start_time = time.time()
    seq1 = read_fasta(seq1_path)
    seq2 = read_fasta(seq2_path)
    read_elapsed_time = time.time() - read_start_time
    print(f"Tiempo de carga de datos: {read_elapsed_time:.2f} segundos")

    output_dir = os.path.join('.', 'Imagenes', 'Pycuda')

    print("Creando dotplot...")
    dotplot_start_time = time.time()
    create_dotplot_gpu(seq1, seq2, output_dir, memory_limit=memory_limit, max_system_memory=max_system_memory)
    dotplot_elapsed_time = time.time() - dotplot_start_time
    print(f"Tiempo de generación de imagen: {dotplot_elapsed_time:.2f} segundos")

    overall_elapsed_time = time.time() - overall_start_time
    print(f"Tiempo total de ejecución: {overall_elapsed_time:.2f} segundos")

# Configuración del entorno interactivo en Google Colab
import ipywidgets as widgets
from IPython.display import display
from google.colab import files

# Crear widgets de texto para las rutas de entrada
seq1_input = widgets.Text(description="Ruta Seq1:")
seq2_input = widgets.Text(description="Ruta Seq2:")
memory_limit_input = widgets.IntText(value=5, description='Límite de Memoria (GB):')
max_system_memory_input = widgets.IntSlider(value=85, min=0, max=100, description='Memoria Máxima del Sistema (%):')

# Crear un botón para activar el cálculo
run_button = widgets.Button(description='Ejecutar Análisis')

# Mostrar los widgets
display(seq1_input, seq2_input, memory_limit_input, max_system_memory_input, run_button)

# Definir la acción del clic del botón
def on_run_button_clicked(b):
    # Convierte el límite de memoria de GB a bytes para el script
    memory_limit_bytes = memory_limit_input.value * 1024**3
    main(seq1_input.value, seq2_input.value, memory_limit_bytes, max_system_memory_input.value)

# Vincula el botón con la función
run_button.on_click(on_run_button_clicked)

# Permitir la subida de archivos desde el sistema local
uploaded = files.upload()

Progreso: 24000/2676496 pasos completados. Tiempo transcurrido: 479.17 segundos. Tiempo restante estimado: 52958.43 segundos
Progreso: 26000/2676496 pasos completados. Tiempo transcurrido: 518.79 segundos. Tiempo restante estimado: 52886.59 segundos
Progreso: 28000/2676496 pasos completados. Tiempo transcurrido: 560.04 segundos. Tiempo restante estimado: 52973.40 segundos
Progreso: 30000/2676496 pasos completados. Tiempo transcurrido: 598.98 segundos. Tiempo restante estimado: 52840.21 segundos
Progreso: 32000/2676496 pasos completados. Tiempo transcurrido: 638.95 segundos. Tiempo restante estimado: 52803.24 segundos
Imagen intermedia guardada en ./Imagenes/Pycuda/dotplot_1.png
Progreso: 34000/2676496 pasos completados. Tiempo transcurrido: 678.32 segundos. Tiempo restante estimado: 52719.15 segundos
Progreso: 36000/2676496 pasos completados. Tiempo transcurrido: 718.13 segundos. Tiempo restante estimado: 52672.95 segundos
Progreso: 38000/2676496 pasos completados. Tiempo transcurrido: