# Test_2016_290

In [None]:
import pandas as pd
from plapt import Plapt

In [None]:
benchmark_data = pd.read_csv("data/Test2016_290.csv")
    
# Extract sequences and smiles from benchmark dataset
prot_seqs = benchmark_data['seq'].tolist()
mol_smiles = benchmark_data['smiles_can'].tolist()
experimental_pKd = benchmark_data['neg_log10_affinity_M'].tolist()

In [None]:
plapt = Plapt(caching=True)

In [None]:
predictions = plapt.predict_affinity(prot_seqs,mol_smiles)

In [None]:
import numpy as np
from scipy.stats import spearmanr

predicted_pKd = [d['neg_log10_affinity_M'] for d in predictions]

# Ensure the lengths of the lists are the same
if len(predicted_pKd) != len(experimental_pKd):
    raise ValueError("The lengths of the predicted and experimental lists do not match.")

# Convert lists to numpy arrays for easier calculations
predicted_pKd = np.array(predicted_pKd)
experimental_pKd = np.array(experimental_pKd)

# Calculate MSE
mse = np.mean((predicted_pKd - experimental_pKd) ** 2)

# Calculate MAE
mae = np.mean(np.abs(predicted_pKd - experimental_pKd))

# Calculate RMSE
rmse = np.sqrt(mse)

# Calculate Spearman's correlation
spearman_corr, _ = spearmanr(predicted_pKd, experimental_pKd)

print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"Spearman's Correlation: {spearman_corr}")

In [None]:
predictions = plapt.score_candidates(
    "MSDNGPQNQRNAPRITFGGPSDSTGSNQNGERSGARSKQRRPQGLPNNTASWFTALTQHGKEDLKFPRGQGVPINTNSSPDDQIGYYRRATRRIRGGDGKMKDLSPRWYFYYLGTGPEAGLPYGANKDGIIWVATEGALNTPKDHIGTRNPANNAAIVLQLPQGTTLPKGFYAEGSRGGSQASSRSSSRSRNSSRNSTPGSSRGTSPARMAGNGGDAALALLLLDRLNQLESKMSGKGQQQQGQTVTKKSAAEASKKPRQKRTATKAYNVTQAFGRRGPEQTQGNFGDQELIRQGTDYKHWPQIAQFAPSASAFFGMSRIGMEVTPSGTWLTYTGAIKLDDKDPNFKDQVILLNKHIDAYKTFPPTEPKKDKKKKADETQALPQRQKKQQTVTLLPAADLDDFSKQLQQSMSSADSTQA",
    mol_smiles
    )
predictions = pd.DataFrame(predictions)

In [None]:
predicted_pKd = predictions["neg_log10_affinity_M"]

# CSAR HiQ 36

In [None]:
import pandas as pd
from plapt import Plapt

In [None]:
benchmark_data = pd.read_csv("data/CSAR-HiQ_36.csv")
    
# Extract sequences and smiles from benchmark dataset
prot_seqs = benchmark_data['seq'].tolist()
mol_smiles = benchmark_data['smiles_can'].tolist()
experimental_pKd = benchmark_data['neg_log10_affinity_M'].tolist()

In [None]:
plapt = Plapt(caching=True)

In [None]:
predictions = plapt.predict_affinity(prot_seqs,mol_smiles)

In [None]:
import numpy as np
from scipy.stats import spearmanr, pearsonr

predicted_pKd = [d['neg_log10_affinity_M'] for d in predictions]

# Ensure the lengths of the lists are the same
if len(predicted_pKd) != len(experimental_pKd):
    raise ValueError("The lengths of the predicted and experimental lists do not match.")

# Convert lists to numpy arrays for easier calculations
predicted_pKd = np.array(predicted_pKd)
experimental_pKd = np.array(experimental_pKd)

# Calculate MSE
mse = np.mean((predicted_pKd - experimental_pKd) ** 2)

# Calculate MAE
mae = np.mean(np.abs(predicted_pKd - experimental_pKd))

# Calculate RMSE
rmse = np.sqrt(mse)

# Calculate Spearman's correlation
spearman_corr, _ = spearmanr(predicted_pKd, experimental_pKd)
pearson_corr, _ = pearsonr(predicted_pKd, experimental_pKd)

print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"Spearman's Correlation: {spearman_corr}")
print(f"Pearsons's Correlation: {pearson_corr}")

In [None]:
import json

features = plapt.get_cached_features()
with open('data/CSAR-HiQ_36_features.json', 'w') as file:
    json.dump(features, file)

with open('data/CSAR-HiQ_36_pKd.json', 'w') as file:
    json.dump(experimental_pKd, file)
plapt.clear_cache()

## Custom Benchmark

In [None]:
from datasets import load_dataset, Dataset
import random
random.seed(2101)
benchmark_data = load_dataset("jglaser/binding_affinity")['train'].select(random.sample(range(10001,20001), 1000))

In [None]:
benchmark_data.to_csv("data/benchmark1k2101.csv")

In [None]:
prot_seqs = benchmark_data['seq']
mol_smiles = benchmark_data['smiles_can']
experimental_pKd = benchmark_data['neg_log10_affinity_M']

In [None]:
from plapt import Plapt
plapt = Plapt(caching=True)

In [None]:
predictions = plapt.predict_affinity(prot_seqs,mol_smiles)

In [None]:
import numpy as np
from scipy.stats import spearmanr

predicted_pKd = [d['neg_log10_affinity_M'] for d in predictions]

# Ensure the lengths of the lists are the same
if len(predicted_pKd) != len(experimental_pKd):
    raise ValueError("The lengths of the predicted and experimental lists do not match.")

# Convert lists to numpy arrays for easier calculations
predicted_pKd = np.array(predicted_pKd)
experimental_pKd = np.array(experimental_pKd)

# Calculate MSE
mse = np.mean((predicted_pKd - experimental_pKd) ** 2)

# Calculate MAE
mae = np.mean(np.abs(predicted_pKd - experimental_pKd))

# Calculate RMSE
rmse = np.sqrt(mse)

# Calculate Spearman's correlation
spearman_corr, _ = spearmanr(predicted_pKd, experimental_pKd)

print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")
print(f"Spearman's Correlation: {spearman_corr}")

In [None]:
import json

features = plapt.get_cached_features()
with open('data/benchmark1k2101_features.json', 'w') as file:
    json.dump(features, file)

with open('data/benchmark1k2101_pKd.json', 'w') as file:
    json.dump(experimental_pKd, file)
plapt.clear_cache()

# Visualization

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr

# Plotting the straight line test
plt.scatter(experimental_pKd, predicted_pKd, color='blue')
plt.plot([min(experimental_pKd), max(experimental_pKd)], [min(predicted_pKd), max(predicted_pKd)], color='red') # Line
plt.xlabel('Experimental pKd')
plt.ylabel('Predicted pKd')
plt.title('Straight Line Test')
plt.show()

# Calculating Metrics
mse = mean_squared_error(experimental_pKd, predicted_pKd)
mae = mean_absolute_error(experimental_pKd, predicted_pKd)
rmse = np.sqrt(mse)
pearson_corr, _ = pearsonr(experimental_pKd, predicted_pKd)

print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)
print("Pearson's Correlation Coefficient:", pearson_corr)
