# Generating a graph from a ".cif" file (MOF structure)

## Prerequisites

1. Python (tested on 3.9.1)
2. Numpy (tested on 1.20.2)
3. pycifrw [[conda]](https://anaconda.org/conda-forge/pycifrw) [[pip]](https://pypi.org/project/PyCifRW/) (tested on 4.4.1)
4. A list of covalent radius of the atoms (included here as **covalent_radius.json**)

In [1]:
import json
from itertools import product
import numpy as np
from pyvis.network import Network
import CifFile  # This is the pycifrw module

In [2]:
# Reading a cif file into a Python-Dictionary-like object
cif_filename = "example_cifs/DB0-m24_o19_o19_sra_repeat.cif"
mof = CifFile.ReadCif(cif_filename).first_block()

In [3]:
# Extract information into lists/arrays
charges = np.array(mof["_atom_type_partial_charge"], dtype=float)
labels = mof["_atom_site_label"]
elements = mof["_atom_site_type_symbol"]
n_atoms = len(elements)

In [4]:
# Load covalent radius of the atoms
with open("properties/covalent_radius.json", 'r') as rf:
    covalent_radius = json.load(rf)
cov_r = np.array([covalent_radius[i]  for i in elements], dtype=float)
cov_r_mat = (cov_r.repeat(n_atoms, axis=0) + np.tile(cov_r, n_atoms)).reshape(n_atoms, n_atoms)

In [5]:
# Load electronegativity of the atoms
with open("properties/electronegativity.json", 'r') as rf:
    electronegativity = json.load(rf)
eleneg = np.array([electronegativity[i]  for i in elements], dtype=float)

In [6]:
# Putting node information for the graph together
node_info = {
    "node_label": labels,
    "node_class": elements,
    "node_feature_1": cov_r,
    "node_feature_2": eleneg,
    "node_target": charges
}

## Coordinate conversion

Coordinates in the ".cif" files are fractional, the following code generates a conversion matrix for converting fractional to Cartesian coordinates. Read more [here](https://en.wikipedia.org/wiki/Fractional_coordinates).

In [7]:
cell_ang = np.deg2rad(np.array([
    mof["_cell_angle_alpha"],
    mof["_cell_angle_beta"],
    mof["_cell_angle_gamma"],
], dtype=float))

cell_vec = np.array([
    mof["_cell_length_a"],
    mof["_cell_length_b"],
    mof["_cell_length_c"],
], dtype=float)

cell_cos = np.cos(cell_ang)
cell_sin = np.sin(cell_ang)
cell_vol = np.prod(cell_vec) * \
    np.sqrt(1 - np.sum(cell_cos**2) + 2*np.prod(cell_cos))
    
frac2cart = np.zeros((3, 3))
frac2cart[0, 0] = cell_vec[0]
frac2cart[0, 1] = cell_vec[1] * cell_cos[2]
frac2cart[0, 2] = cell_vec[2] * cell_cos[1]
frac2cart[1, 1] = cell_vec[1] * cell_sin[2]
frac2cart[1, 2] = cell_vec[2] * \
    (cell_cos[0]-cell_cos[1]*cell_cos[2]) / cell_sin[2]
frac2cart[2, 2] = cell_vol / cell_vec[0] / cell_vec[1] / cell_sin[2]

# Option 1: Make a graph as if the MOF is a standalone molecule

This is a simple approach, but no [periodic boundary conditions](https://en.wikipedia.org/wiki/Periodic_boundary_conditions) are considered.

In [8]:
frac_xyz = np.array([
    mof["_atom_site_fract_x"],
    mof["_atom_site_fract_y"],
    mof["_atom_site_fract_z"]
], dtype=float)
cart_xyz = (frac2cart @ frac_xyz).T

In [9]:
cart_diff = cart_xyz.repeat(n_atoms, axis=0) - np.tile(cart_xyz, (n_atoms, 1))
cart_dist = np.linalg.norm(cart_diff, axis=1).reshape(n_atoms, n_atoms)

In [10]:
bond_tol = 0.25
upper_bound = (cart_dist < (cov_r_mat+bond_tol))
lower_bound = (cart_dist > 0)
edges = (upper_bound & lower_bound)
edge_weights = np.where(edges, cart_dist, 0)
np.count_nonzero(edges)

28

In [None]:
g = Network(width="1280px", height="720px", notebook=True)
g.add_nodes(range(n_atoms), label=labels)
g.add_edges(np.argwhere(edges).tolist())
g.show("opt1.html")

# Option 2: Make a graph of a MOF with minimum image convention

This first creates a 3x3x3 supercell, and then finds unique bonded atom pairs within the supercell.

In [11]:
frac_xyz = np.array([
    mof["_atom_site_fract_x"],
    mof["_atom_site_fract_y"],
    mof["_atom_site_fract_z"]
], dtype=float).T
frac_diff = frac_xyz.repeat(n_atoms, axis=0) - np.tile(frac_xyz, (n_atoms, 1))

In [12]:
cart_dist_mirrors = np.zeros((27, n_atoms ** 2))
for i, mirror in enumerate(product([-1, 0, 1], repeat=3)):
    frac_diff_mirror = (frac_diff + mirror).T
    cart_diff_mirror = frac2cart @ frac_diff_mirror
    cart_dist_mirrors[i] = np.linalg.norm(cart_diff_mirror, axis=0)
cart_dist = cart_dist_mirrors.min(0).reshape(n_atoms, n_atoms)

In [13]:
bond_tol = 0.25
upper_bound = (cart_dist < (cov_r_mat+bond_tol))
lower_bound = (cart_dist > 0)
edges = (upper_bound & lower_bound)
edge_weights = np.where(edges, cart_dist, 0)
np.count_nonzero(edges)

38

In [14]:
g = Network(width="1280px", height="720px", notebook=True)
g.add_nodes(range(n_atoms), label=labels)
g.add_edges(np.argwhere(edges).tolist())
g.show("opt2.html")