In [4]:
#Import the libraries
import os
import subprocess
import csv
import pandas as pd

# Base input folder containing ligand and protein subfolders
base_input_folder = "C:\\Users\\Rac\\OneDrive\\Desktop\\PDBBIND2014"

# Path to the Vina executable
vina_executable = "vina"

# Get a list of subfolders in the base input folder
subfolders = [f for f in os.listdir(base_input_folder) if os.path.isdir(os.path.join(base_input_folder, f))]

# Define the output folder for docked results
output_base_folder = os.path.join(base_input_folder, "docked_results")
if not os.path.exists(output_base_folder):
    os.makedirs(output_base_folder)

# Loop through the first 100 subfolders
for folder_name in subfolders[:100]:
    folder_path = os.path.join(base_input_folder, folder_name)

    # Find ligand and protein files
    ligand_file = os.path.join(folder_path, f"{folder_name}_ligand.pdbqt")
    protein_file = os.path.join(folder_path, f"{folder_name}_protein.pdbqt")
    
    # Read docking parameters from config.txt
    config_file = os.path.join(folder_path, "config.txt")
    with open(config_file, "r") as f:
        config_lines = f.readlines()
        center_x = float(config_lines[0].split("=")[1].strip())
        center_y = float(config_lines[1].split("=")[1].strip())
        center_z = float(config_lines[2].split("=")[1].strip())
        size_x = float(config_lines[3].split("=")[1].strip())
        size_y = float(config_lines[4].split("=")[1].strip())
        size_z = float(config_lines[5].split("=")[1].strip())

    # Define the output folder for docked results
    output_folder = os.path.join(output_base_folder, folder_name)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Define the output PDBQT file path
    output_pdbqt_file = os.path.join(output_folder, "docked_result.pdbqt")

    # Perform docking using AutoDock Vina
    docking_command = f"{vina_executable} --receptor {protein_file} --ligand {ligand_file} " \
                       f"--center_x {center_x} --center_y {center_y} --center_z {center_z} " \
                       f"--size_x {size_x} --size_y {size_y} --size_z {size_z} --out {output_pdbqt_file}"
    subprocess.call(docking_command, shell=True)

    # Extract and save docking results to CSV file
    docking_info = []
    with open(output_pdbqt_file, "r") as f:
        for line in f:
            if line.startswith("REMARK VINA RESULT:"):
                docking_info.append(line.strip().split()[3:])
    
    csv_file = os.path.join(output_folder, "docking_results.csv")
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Folder", "Affinity (kcal/mol)", "Dist from best mode (rmsd l.b.)", "Dist from best mode (rmsd u.b.)"])
        for i, info in enumerate(docking_info, start=1):
            writer.writerow([folder_name] + info)

    print(f"Docking completed for {folder_name}, results saved in the 'docked_results' folder.")

print("Docking processes for the first 100 subfolders completed.")

# Part to combine CSV files from all subfolders
input_folder = os.path.join(output_base_folder, "docked_results")
subfolders = [f for f in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, f))]

data_frames = []

for folder_name in subfolders:
    folder_path = os.path.join(input_folder, folder_name)
    csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

    for csv_file in csv_files:
        csv_path = os.path.join(folder_path, csv_file)
        csv_data = pd.read_csv(csv_path)
        data_frames.append(csv_data)

combined_data = pd.concat(data_frames, ignore_index=True)

combined_csv_file = os.path.join(input_folder, "combined_docking_results.csv")
combined_data.to_csv(combined_csv_file, index=False)

print("Combined CSV file saved:", combined_csv_file)


Docking completed for 10gs, results saved in the 'docked_results' folder.
Docking completed for 1a30, results saved in the 'docked_results' folder.
Docking completed for 1bcu, results saved in the 'docked_results' folder.
Docking completed for 1e66, results saved in the 'docked_results' folder.
Docking completed for 1f8b, results saved in the 'docked_results' folder.
Docking completed for 1f8c, results saved in the 'docked_results' folder.
Docking completed for 1f8d, results saved in the 'docked_results' folder.
Docking completed for 1gpk, results saved in the 'docked_results' folder.
Docking completed for 1h23, results saved in the 'docked_results' folder.
Docking completed for 1hfs, results saved in the 'docked_results' folder.
Docking completed for 1hnn, results saved in the 'docked_results' folder.
Docking completed for 1igj, results saved in the 'docked_results' folder.
Docking completed for 1jyq, results saved in the 'docked_results' folder.
Docking completed for 1kel, results sa

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\Rac\\OneDrive\\Desktop\\PDBBIND2014\\docked_results\\docked_results'

In [5]:
# Part to combine CSV files from all subfolders
input_folder = os.path.join(base_input_folder, "docked_results")  # Update this line
subfolders = [f for f in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, f))]

data_frames = []

for folder_name in subfolders:
    folder_path = os.path.join(input_folder, folder_name)
    
    # Get a list of CSV files in the subfolder
    csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]
    
    # Read each CSV file and append to the list of DataFrames
    for csv_file in csv_files:
        csv_path = os.path.join(folder_path, csv_file)
        csv_data = pd.read_csv(csv_path)
        data_frames.append(csv_data)

# Concatenate all DataFrames in the list
combined_data = pd.concat(data_frames, ignore_index=True)

# Path to save the combined CSV file
combined_csv_file = os.path.join(input_folder, "combined_docking_results.csv")

# Save the combined DataFrame to a CSV file
combined_data.to_csv(combined_csv_file, index=False)

print("Combined CSV file saved:", combined_csv_file)


Combined CSV file saved: C:\Users\Rac\OneDrive\Desktop\PDBBIND2014\docked_results\combined_docking_results.csv
