In [2]:
import pandas as pd
import os

# Define the path to the Documents directory
documents_path = os.path.join(os.path.expanduser('~'), 'Documents')

# Function to load a CSV file into a pandas DataFrame
def load_csv(file_name):
    file_path = os.path.join(documents_path, file_name)
    return pd.read_csv(file_path)

# Load each CSV file
df_predictedTest_experimentalTest = load_csv("predictedTest_experimentalTest.csv")
df_predictedTrain_experimentalTrain = load_csv("predictedTrain_experimentalTrain.csv")
df_predictedTest2016290_experimentalTest2016290 = load_csv("predictedTest2016290_experimentalTest2016290.csv")
df_predictedCSARHiQ36_experimentalCSARHiQ36 = load_csv("predictedCSARHiQ36_experimentalCSARHiQ36.csv")
df_predictedBenchmark1k2101_experimentalBenchmark1k2101 = load_csv("predictedBenchmark1k2101_experimentalBenchmark1k2101.csv")

# Example to check the loaded data (uncomment the line below to see the data)
# print(df_predictedTest_experimentalTest.head())

In [16]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os

# Ensure the 'Graphs' folder exists
diagrams_folder = 'Graphs'
if not os.path.exists(diagrams_folder):
    os.makedirs(diagrams_folder)

# Function to plot scatter plot with histograms and save as PNG
def plot_scatter_with_histograms_and_save(df, color, file_name, label):
    fig = plt.figure(figsize=(5, 5))
    grid = plt.GridSpec(4, 4, hspace=0.2, wspace=0.2)
    main_ax = fig.add_subplot(grid[1:4, :-1])
    y_hist_ax = fig.add_subplot(grid[1:4, -1], sharey=main_ax)
    x_hist_ax = fig.add_subplot(grid[0, :-1], sharex=main_ax)

    main_ax.scatter(df.iloc[:, 0], df.iloc[:, 1], s=30, alpha=0.7, color=color, edgecolors='black', linewidth=0.5, label=label)
    main_ax.set(xlim=(0, 12), ylim=(0, 12))
    main_ax.grid(True)
    main_ax.plot([0, 12], [0, 12], 'k--')
    main_ax.set_xlabel('Predicted pKa', fontsize=12)
    main_ax.set_ylabel('Experimental pKa', fontsize=12)
    main_ax.legend(loc='upper left')  # Adding the legend

    x_hist_ax.hist(df.iloc[:, 0], bins=40, color=color, alpha=0.7)
    y_hist_ax.hist(df.iloc[:, 1], bins=40, color=color, alpha=0.7, orientation='horizontal')

    x_hist_ax.tick_params(axis='both', which='both', left=False, bottom=False, labelleft=False, labelbottom=False)
    y_hist_ax.tick_params(axis='both', which='both', left=False, bottom=False, labelleft=False, labelbottom=False)

    x_hist_ax.spines['top'].set_visible(False)
    x_hist_ax.spines['right'].set_visible(False)
    x_hist_ax.spines['left'].set_visible(False)
    y_hist_ax.spines['top'].set_visible(False)
    y_hist_ax.spines['right'].set_visible(False)
    y_hist_ax.spines['bottom'].set_visible(False)

    # plt.show()
    plt.savefig(os.path.join(diagrams_folder, file_name))  # Save the figure
    plt.close(fig)  # Close the figure to prevent display


# Colors for each plot
colors = ['blue', 'green', 'red', 'purple', 'orange']
file_names = ['Test.png', 'Train.png', 'Test2016290.png', 'CSARHiQ36.png', 'Benchmark1k2101.png']
labels = ['Test Set', 'Training Set', 'Test2016_290', 'CSAR-HiQ 36', 'Benchmark1k2101']

# Generating and saving each plot
plot_scatter_with_histograms_and_save(df_predictedTest_experimentalTest, colors[0], file_names[0], labels[0])
plot_scatter_with_histograms_and_save(df_predictedTrain_experimentalTrain, colors[1], file_names[1], labels[1])
plot_scatter_with_histograms_and_save(df_predictedTest2016290_experimentalTest2016290, colors[2], file_names[2], labels[2])
plot_scatter_with_histograms_and_save(df_predictedCSARHiQ36_experimentalCSARHiQ36, colors[3], file_names[3], labels[3])
plot_scatter_with_histograms_and_save(df_predictedBenchmark1k2101_experimentalBenchmark1k2101, colors[4], file_names[4], labels[4])
