In [None]:
import dmol
import networkx as nx
import rdkit, rdkit.Chem, rdkit.Chem.rdDepictor, rdkit.Chem.Draw
import networkx as nx
import numpy as np
import random
import matplotlib.pyplot as plt
from networkx.drawing.nx_agraph import pygraphviz_layout
import requests
import time

In [None]:
def custom_layout(G, prog, ratio, args):
    import pygraphviz

    A = nx.nx_agraph.to_agraph(G)
    A.layout(prog=prog, args=args)
    A.graph_attr.update(size=2.1)
    node_pos = {}
    for n in G:
        node = pygraphviz.Node(A, n)
        try:
            xs = node.attr["pos"].split(",")
            node_pos[n] = tuple(float(x) for x in xs)
        except:
            print("no position for node", n)
            node_pos[n] = (0.0, 0.0)
    return node_pos

In [None]:
def random_compound(n=10, filter_length=100):
    cids = np.random.choice(10**8, size=10).astype(str)
    s = ",".join(cids)
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{s}/property/IUPACName,CanonicalSMILES/JSON"
    try:
        reply = requests.get(
            url,
            params={"Threshold": 90, "MaxRecords": 1},
            headers={"accept": "text/json"},
            timeout=10,
        )
    except requests.exceptions.Timeout:
        print("Pubchem seems to be down right now ️☠️☠️")
        return []
    data = []
    for i in range(n):
        try:
            smi, name = (
                reply.json()["PropertyTable"]["Properties"][i]["CanonicalSMILES"],
                reply.json()["PropertyTable"]["Properties"][i]["IUPACName"],
            )
            if len(name) < filter_length and "." not in smi:
                data.append((smi, name))
        except KeyError as e:
            continue
    return data

In [None]:
elements = set()
elements.add(6)


def smiles2graph(sml):
    m = rdkit.Chem.MolFromSmiles(sml)
    if m is None:
        return None
    # m = rdkit.Chem.AddHs(m)
    G = nx.Graph()
    for a in m.GetAtoms():
        elements.add(a.GetAtomicNum())
        G.add_node(a.GetIdx(), element=a.GetAtomicNum())
    for j in m.GetBonds():
        u = j.GetBeginAtomIdx()
        v = j.GetEndAtomIdx()
        G.add_edge(u, v)
    return G

In [None]:
examples = []

In [None]:
N = 1000
while len(examples) < N:
    examples.extend(random_compound())
    print(len(examples), end=",")
    time.sleep(0.2)

In [None]:
G = None
for smi, _ in examples[:25]:
    g = smiles2graph(smi)
    if g is None:
        continue
    if G is None:
        G = g
    else:
        G = nx.disjoint_union(g, G)

In [None]:
lelements = list(elements)
lelements.sort()
lelements.remove(6)
lelements.insert(0, 6)
lelements

In [None]:
ed = {k: i for i, k in enumerate(lelements)}
c = [f'C{ed[d["element"]]}' for n, d in G.nodes(data=True)]
fig = plt.figure(1, figsize=(15, 5))
pos = custom_layout(G, prog="dot", ratio=1.5, args="-Gmaxiter=5000")
# pos = nx.layout.spring_layout(G, iterations=100)
nx.draw(G, pos, node_size=5, node_color=c)
ax = plt.gca()
ax.set_facecolor("#f5f4e9")
ax.axis("off")
fig.set_facecolor("#f5f4e9")
plt.tight_layout()
plt.savefig("mols.png")

In [None]:
max([pos[i][0] for i in range(len(pos))]), max([pos[i][1] for i in range(len(pos))])