# <font color = 545AA7> Internet Database Access </font>

The goal of this notebook is to access online chemical databases. This requires the databases be accessible using a Python API (application programming interface) and requires the use of the Python **requests** modeuls that is included with all installations of Python.

In [68]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import requests
from pprint import pprint
from IPython.display import SVG

from rdkit.Chem import AllChem, Descriptors

import time

from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.ipython_useSVG = True

## <font color = 545AA7> Chemical Abstract Services </font>




In [113]:
search_base_url = "https://commonchemistry.cas.org/api/search?q="
 

data = requests.get(search_base_url + 'morphine').json()
pprint(data)

{'count': 1,
 'results': [{'images': ['<svg width="205" viewBox="0 0 205 160" '
                         'style="fill-opacity:1; color-rendering:auto; '
                         'color-interpolation:auto; text-rendering:auto; '
                         'stroke:black; stroke-linecap:square; '
                         'stroke-miterlimit:10; shape-rendering:auto; '
                         'stroke-opacity:1; fill:black; stroke-dasharray:none; '
                         'font-weight:normal; stroke-width:1; '
                         "font-family:'Open Sans'; font-style:normal; "
                         'stroke-linejoin:miter; font-size:12; '
                         'stroke-dashoffset:0; image-rendering:auto;" '
                         'height="160" class="cas-substance-image" '
                         'xmlns:xlink="http://www.w3.org/1999/xlink" '
                         'xmlns="http://www.w3.org/2000/svg"><svg '
                         'class="cas-substance-single-component"><rect y=

In [56]:
base_url = "https://commonchemistry.cas.org/api/detail?"
casrn = "130-95-0" 

data = requests.get(base_url + "cas_rn=" + casrn).json()

In [57]:
pprint(data)

{'canonicalSmile': 'OC(C=1C=CN=C2C=CC(OC)=CC21)C3N4CCC(C3)C(C=C)C4',
 'experimentalProperties': [{'name': 'Melting Point',
                             'property': '177 °C (decomp)',
                             'sourceNumber': 1}],
 'hasMolfile': True,
 'images': ['<svg width="280" viewBox="0 0 280 172" style="fill-opacity:1; '
            'color-rendering:auto; color-interpolation:auto; '
            'text-rendering:auto; stroke:black; stroke-linecap:square; '
            'stroke-miterlimit:10; shape-rendering:auto; stroke-opacity:1; '
            'fill:black; stroke-dasharray:none; font-weight:normal; '
            "stroke-width:1; font-family:'Open Sans'; font-style:normal; "
            'stroke-linejoin:miter; font-size:12; stroke-dashoffset:0; '
            'image-rendering:auto;" height="172" class="cas-substance-image" '
            'xmlns:xlink="http://www.w3.org/1999/xlink" '
            'xmlns="http://www.w3.org/2000/svg"><svg '
            'class="cas-substance-single-compo

In [58]:
data['canonicalSmile']

'OC(C=1C=CN=C2C=CC(OC)=CC21)C3N4CCC(C3)C(C=C)C4'

In [59]:
from rdkit import Chem

In [61]:
mol = Chem.MolFromSmiles(data['canonicalSmile'])

In [67]:
Chem.Descriptors.MolWt(mol)

324.4240000000001

## <font color = 545AA7> PubChem </font>


In [97]:
prolog = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
data_input = "/compound/smiles/"
smiles = 'OC(C=1C=CN=C2C=CC(OC)=CC21)C3N4CCC(C3)C(C=C)C4'
operation = "/property/Volume3D"
output = "/txt"

url = prolog + data_input + smiles + operation + output


res = requests.get(url)
print(res.text[:-2])

252.


In [89]:
print(url)

https://pubchem.ncbi.nlm.nih.gov/rest/pugcompound/smiles/OC(C=1C=CN=C2C=CC(OC)=CC21)C3N4CCC(C3)C(C=C)C4/property/MolecularFormulatxt


In [81]:
data = pd.read_csv('smiles.csv')

In [84]:
alcohols = []
for compound in data['SMILES']:
    mol = Chem.MolFromSmiles(compound)
    if Chem.Descriptors.fr_Al_OH(mol) + Chem.Descriptors.fr_Ar_OH(mol) > 0:
        alcohols.append(compound)

In [107]:
prolog = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
data_input = "/compound/smiles/"
#smiles = 'OC(C=1C=CN=C2C=CC(OC)=CC21)C3N4CCC(C3)C(C=C)C4'
operation = "/property/Volume3D"
output = "/txt"

volume = []
for alcohol in alcohols[:100]:
    res = requests.get(prolog + data_input + alcohol + operation + output)
    volume.append(res.text[:-2])
    time.sleep(1)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [110]:
[vol for vol in volume if len(vol) > 0 and vol[0] != 'S']

['49.',
 '74.',
 '67.',
 '41.',
 '7',
 '28.',
 '119.',
 '62.',
 '54.',
 '223.',
 '54.',
 '13',
 '131.',
 '80.',
 '66.',
 '79.',
 '79.',
 '99.',
 '91.',
 '143.',
 '139.',
 '6',
 '6',
 '110.',
 '125.',
 '92.',
 '156.',
 '118.',
 '92.',
 '14',
 '141.',
 '11',
 '10',
 '52.',
 '99.',
 '75.',
 '75.',
 '91.',
 '130.',
 '88.',
 '63.',
 '12',
 '75.',
 '73.',
 '76.',
 '109.',
 '93.',
 '88.',
 '89.',
 '89.',
 '87.',
 '106.',
 '102.',
 '9',
 '128.',
 '11',
 '145.',
 '136.',
 '158.',
 '156.',
 '171.',
 '153.',
 '184.',
 '197.',
 '77.',
 '111.',
 '100.',
 '79.',
 '141.',
 '132.',
 '124.',
 '90.',
 '14',
 '90.',
 '73.',
 '62.',
 '269.',
 '65.',
 '139.',
 '105.',
 '9',
 '78.',
 '39.',
 '92.',
 '92.',
 '79.',
 '74.',
 '92.',
 '105.',
 '118.']