This is a file about making the graph neural networks out of the smile annotation we are given for making the use of graph neural networks

# Importing Libraries

In [None]:
!pip install rdkit-pypi

In [None]:
!pip install torch
!pip install torch-geometric

In [14]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem

import torch
from torch_geometric.data import Data

import numpy as np
import matplotlib.pyplot as plt

# Smile to Image
In this section we are converting the input smile to a structure.png to show the structure of it

In [13]:
def smiles_to_mol(smiles):
    mol = Chem.MolFromSmiles(smiles)
    return mol

def add_hydrogens(mol):
    mol = Chem.AddHs(mol)
    return mol

def draw_molecule(mol, filename=None):
    if filename:
        Draw.MolToImageFile(mol, filename)
    else:
        Draw.MolToImage(mol)

def generate_3d_coordinates(mol):
    AllChem.EmbedMolecule(mol, randomSeed=42)  # You can choose any seed value

# Example usage:
smiles_string = "CCO"
mol = smiles_to_mol(smiles_string)

# Add explicit hydrogens
mol = add_hydrogens(mol)

# Generate 3D coordinates for visualization
generate_3d_coordinates(mol)

# Draw and display the molecule
draw_molecule(mol, filename="molecule.png")

# Smile to Graphs
In this section we are converting the smiles to the graph format

In [16]:
def smiles_to_graph(smiles):
    mol = Chem.MolFromSmiles(smiles)

    # Add explicit hydrogens
    mol = Chem.AddHs(mol)

    # Generate 3D coordinates for visualization
    AllChem.EmbedMolecule(mol, randomSeed=42)  # You can choose any seed value

    # Get atom features and adjacency matrix
    num_atoms = mol.GetNumAtoms()
    atom_features = np.zeros((num_atoms, 3))  # You may need to adjust the feature dimensions
    adjacency_matrix = np.zeros((num_atoms, num_atoms))

    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        adjacency_matrix[i, j] = adjacency_matrix[j, i] = 1  # Adjacency matrix is symmetric

    for atom in mol.GetAtoms():
        idx = atom.GetIdx()
        atom_features[idx, 0] = atom.GetAtomicNum()  # Atom type or atomic number
        atom_features[idx, 1] = atom.GetTotalNumHs()  # Number of hydrogen atoms
        atom_features[idx, 2] = atom.GetFormalCharge()  # Formal charge

    # Convert to PyTorch tensors
    atom_features = torch.tensor(atom_features, dtype=torch.float)
    adjacency_matrix = torch.tensor(adjacency_matrix, dtype=torch.float)

    # Create PyTorch Geometric data object
    edge_index = torch.nonzero(adjacency_matrix, as_tuple=True)
    data = Data(x=atom_features, edge_index=edge_index)

    return data

# Example usage:
smiles_string = "CCO"
graph_data = smiles_to_graph(smiles_string)

print("Node Features:")
print(graph_data.x)
print("\nEdge Index:")
print(graph_data.edge_index)


Node Features:
tensor([[6., 0., 0.],
        [6., 0., 0.],
        [8., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])

Edge Index:
(tensor([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6, 7, 8]), tensor([1, 3, 4, 5, 0, 2, 6, 7, 1, 8, 0, 0, 0, 1, 1, 2]))


The above are the input features that needs to be feeded into our graph neural network