### Imports

#######################################
##############################################
# Laplacian Regularization & WNN for DTI
# Generic for all 4 families (Enzyme, GPCR, Ion Channel, Nuclear Receptor)
# Author: Vitor
# Date: 2025-05-03
##############################################

In [1]:
import pandas as pd
import numpy as np
import os
from scipy.sparse import csr_matrix
from numpy.linalg import solve

In [2]:
# --- Step 1: Load original matrices ---
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype_index = 0  # Change index as needed for GPCR, ion_channel, etc.
ltype = ligants_type[ltype_index]

files_matrix_temp = {
    'Y': 'e_admat_dgc.txt',
    'St': 'e_simmat_dc.txt',
    'Sd': 'e_simmat_dg.txt'
}

df_temp = {}
for key, fname in files_matrix_temp.items():
    path = os.path.join(base_dir, 'data', 'split', ltype, fname)
    print(f"ðŸ“¥ Loading: {path}")
    df_temp[key] = pd.read_csv(path, delimiter='\t', index_col=0)

Y = df_temp['Y'].values.astype(float)
Sd = df_temp['Sd'].values.astype(float)
St = df_temp['St'].values.astype(float)


ðŸ“¥ Loading: c:\Users\riskf\OneDrive\DTI_2026\MatrixFactorization\temp\data\split\enzyme\e_admat_dgc.txt


FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\riskf\\OneDrive\\DTI_2026\\MatrixFactorization\\temp\\data\\split\\enzyme\\e_admat_dgc.txt'

In [None]:
# --- Step 2: Symmetric Normalization ---
def normalize_laplacian(S):
    D = np.diag(1.0 / np.sqrt(np.maximum(S.sum(axis=1), 1e-8)))
    return D @ S @ D  # D^(-1/2) * S * D^(-1/2)

Sd_norm = normalize_laplacian(Sd)
St_norm = normalize_laplacian(St)

In [None]:
# --- Step 3: Apply propagation ---
alpha = 0.5  # balancing weight
Y_smooth = alpha * (Sd_norm @ Y) + (1 - alpha) * (Y @ St_norm)

In [None]:
# --- Step 5 (Optional): Binarize if needed ---
Y_smooth_binary = (Y_smooth >= 0.5).astype(int)

In [None]:
# --- Step 6: Save output ---
output_file = 'laplacian_Y_prime.csv'
output_path = os.path.join(base_dir, 'data', 'split', ltype, output_file)
pd.DataFrame(Y_smooth_binary, index=df_temp['Y'].index, columns=df_temp['Y'].columns).to_csv(output_path)

print(f" Smoothed interaction matrix saved to: {output_path}")
print(f" Positive interactions in Y': {(Y_smooth_binary == 1).sum()}")

âœ… Smoothed interaction matrix saved to: C:\Users\riskf\OneDrive\DTI - Data augmentation\data\split\enzyme\laplacian_Y_prime.csv
âœ… Positive interactions in Y': 0


In [None]:
import pandas as pd
import numpy as np
import os

# Load as before
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype_index = 0
ltype = ligants_type[ltype_index]

files_matrix_temp = {
    'Y': 'e_admat_dgc.txt',
    'St': 'e_simmat_dc.txt',
    'Sd': 'e_simmat_dg.txt'
}

df_temp = {}
for key, fname in files_matrix_temp.items():
    path = os.path.join(base_dir, 'data', 'split', ltype, fname)
    print(f" Loading: {path}")
    df_temp[key] = pd.read_csv(path, delimiter='\t', index_col=0)

Y = df_temp['Y'].values.astype(float)
Sd = df_temp['Sd'].values.astype(float)
St = df_temp['St'].values.astype(float)

# Symmetric normalization
def normalize_laplacian(S):
    D = np.diag(1.0 / np.sqrt(np.maximum(S.sum(axis=1), 1e-8)))
    return D @ S @ D

Sd_norm = normalize_laplacian(Sd)
St_norm = normalize_laplacian(St)

# Smoothed propagation
alpha = 0.5
Y_smooth = alpha * (Sd_norm @ Y) + (1 - alpha) * (Y @ St_norm)

# TOP-K binarization: for each protein (row), keep top k predicted values
k = 5  # choose number of predicted interactions to keep per row (e.g., 5)

Y_smooth_binary = np.zeros_like(Y_smooth)
for i in range(Y_smooth.shape[0]):
    top_k_indices = np.argsort(Y_smooth[i, :])[-k:]  # get top-k for each row
    Y_smooth_binary[i, top_k_indices] = 1

# Save final binary matrix
output_file = 'laplacian_Y_prime_topk.csv'
output_path = os.path.join(base_dir, 'data', 'split', ltype, output_file)
pd.DataFrame(Y_smooth_binary, index=df_temp['Y'].index, columns=df_temp['Y'].columns).to_csv(output_path)

print(f" Final top-k smoothed Y matrix saved at: {output_path}")
print(f" Total positive interactions (Y'==1): {(Y_smooth_binary == 1).sum()}")


ðŸ“¥ Loading: C:\Users\riskf\OneDrive\DTI - Data augmentation\data\split\enzyme\e_admat_dgc.txt
ðŸ“¥ Loading: C:\Users\riskf\OneDrive\DTI - Data augmentation\data\split\enzyme\e_simmat_dc.txt
ðŸ“¥ Loading: C:\Users\riskf\OneDrive\DTI - Data augmentation\data\split\enzyme\e_simmat_dg.txt
âœ… Final top-k smoothed Y matrix saved at: C:\Users\riskf\OneDrive\DTI - Data augmentation\data\split\enzyme\laplacian_Y_prime_topk.csv
âœ… Total positive interactions (Y'==1): 3320
