In [None]:
# pip install biopython

In [None]:
# RNA seq
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

def global_alignment(rna1, rna2):
    """ Perform global alignment between two RNA sequences and print the results. """
    # Perform global alignment using Bio.pairwise2
    alignments = pairwise2.align.globalxx(rna1, rna2)
    
    # Print alignment results
    for a in alignments:
        print(format_alignment(*a))

def local_alignment(rna1, rna2):
    """ Perform local alignment between two RNA sequences and print the results. """
    # Perform local alignment using Bio.pairwise2
    alignments = pairwise2.align.localxx(rna1, rna2)
    
    # Print alignment results
    for a in alignments:
        print(format_alignment(*a))

def main():
    # Example RNA sequences
    rna1 = "AUGCUUCAG"
    rna2 = "AUGCUUCC"

    # Call the global alignment function
    print("Global Alignment:")
    global_alignment(rna1, rna2)

    # Call the local alignment function
    print("\nLocal Alignment:")
    local_alignment(rna1, rna2)

if __name__ == "__main__":
    main()


In [None]:
# GIP
import numpy as np
import pandas as pd
import os

def compute_gip_kernel(association_matrix):
    def calculate_normalization_constant(matrix):
        """ Calculate the normalization constant 'r' for the Gaussian Kernel based on association data. """
        squared_norms = np.sum(np.square(np.linalg.norm(matrix, axis=1)))
        r = squared_norms / matrix.shape[0]
        return r
    
    num_entities = association_matrix.shape[0]
    kernel_matrix = np.zeros((num_entities, num_entities))
    r = calculate_normalization_constant(association_matrix)
    
    for i in range(num_entities):
        for j in range(num_entities):
            squared_distance = np.square(np.linalg.norm(association_matrix[i, :] - association_matrix[j, :]))
            if r == 0:
                kernel_matrix[i, j] = 0
            elif i == j:
                kernel_matrix[i, j] = 1
            else:
                kernel_matrix[i, j] = np.exp(-squared_distance / r)
    
    return kernel_matrix

def save_matrix(matrix, filename):
    """ Save the given matrix to a CSV file, ensuring the directory exists. """
    directory = os.path.dirname(filename)
    if not os.path.exists(directory):
        os.makedirs(directory)
    pd.DataFrame(matrix).to_csv(filename, header=None, index=None)

def main():
    # Load association data from a CSV file into a NumPy array
    disease_rna_association = np.array(pd.read_csv('./dataset/ass_del.csv', header=None))

    # Calculate GIP kernels for disease and RNA
    gip_disease_sim = compute_gip_kernel(disease_rna_association)
    gip_rna_sim = compute_gip_kernel(disease_rna_association.T)

    # Save the computed GIP kernel matrices ensuring the file exists
    save_matrix(gip_disease_sim, './dataset/disease_gip_kernel.csv')
    save_matrix(gip_rna_sim, './dataset/rna_gip_kernel.csv')

if __name__ == '__main__':
    main()


In [None]:
"""MISIM v2.0 Overview
MISIM v2.0 is a tool hosted on http://www.lirmed.com/misim/onevsall, designed to compute microRNA functional similarity. It leverages the HMDD v3.0 dataset to infer similarities based on microRNA-disease associations.

Usage Guide
Input Data: Submit your microRNA data into the tool's input field.
Analysis: MISIM v2.0 calculates similarity scores that aid in predicting new miRNA-disease associations.
Visualization: The tool provides network visualization and functional analysis of the microRNAs analyzed.
Access: This tool is available for academic use; commercial users need to contact the administrators.
This guide should help you get started with using MISIM v2.0 in your research."""

In [None]:
library(DOSE)

# Function to calculate semantic similarity between diseases
calculate_disease_similarity <- function(disease_ids) {
  # Ensure the DOSE package is loaded
  if (!("DOSE" %in% rownames(installed.packages()))) {
    BiocManager::install("DOSE")
    library(DOSE)
  }
  
  # Add "DOID:" prefix if not already present
  disease_ids <- ifelse(grepl("^DOID:", disease_ids), disease_ids, paste0("DOID:", disease_ids))
  
  # Calculate semantic similarity
  similarity_matrix <- doSim(disease_ids, disease_ids, measure = "Wang")
  
  return(similarity_matrix)
}

# Example usage with multiple disease IDs
disease_ids <- c("0014667", "0050156", "0080315", "14330")
similarity_matrix <- calculate_disease_similarity(disease_ids)
print(similarity_matrix)


In [None]:
# Batch download from NCBI could be planned using "https://www.ncbi.nlm.nih.gov/sites/batchentrez"