Skip to content

Commit

Permalink
Add contribtion (#125)
Browse files Browse the repository at this point in the history
* add Frozen Featurizer descriptor
* add Mordred descriptor
  • Loading branch information
TsumiNa committed Jun 8, 2019
1 parent ba633c0 commit 6756a06
Show file tree
Hide file tree
Showing 22 changed files with 241 additions and 15 deletions.
File renamed without changes.
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -3,6 +3,7 @@ tqdm
seaborn
plotly
requests
mordred

###### Requirements with Version Specifiers ######
numpy == 1.16.*
Expand Down
60 changes: 60 additions & 0 deletions tests/extend_descriptors/descriptor/test_mordred.py
@@ -0,0 +1,60 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

import pandas as pd
import pytest
from mordred._base.pandas_module import MordredDataFrame
from rdkit import Chem

from xenonpy.contrib.extend_descriptors.descriptor import Mordred2DDescriptor


@pytest.fixture(scope='module')
def data():
# ignore numpy warning
import warnings
print('ignore NumPy RuntimeWarning\n')
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ndarray size changed")

smis = ['C(C(O)C1(O))C(CO)OC1O',
'CC(C1=CC=CC=C1)CC(C2=CC=CC=C2)CC(C3=CC=CC=C3)CC(C4=CC=CC=C4)',
' CC(C)CC(C)CC(C)',
'C(F)C(F)(F)']

mols = [Chem.MolFromSmiles(s) for s in smis]

err_smis = ['C(C(O)C1(O))C(CO)OC1O',
'CC(C1=CC=CC=C1)CC(C2=CC=CC=C2)CC(C3=CC=',
'Ccccccc',
'C(F)C(F)(F)']
yield dict(smis=smis, mols=mols, err_smis=err_smis)

print('test over')


def test_mordred_1(data):
mordred = Mordred2DDescriptor()
desc = mordred.transform(data['smis'])
assert isinstance(desc, MordredDataFrame)

mordred = Mordred2DDescriptor(return_type='df')
desc = mordred.transform(data['smis'])
assert isinstance(desc, pd.DataFrame)


def test_mordred_2(data):
mordred = Mordred2DDescriptor()
desc = mordred.transform(data['mols'])
assert isinstance(desc, MordredDataFrame)


def test_mordred_3(data):
mordred = Mordred2DDescriptor()
with pytest.raises(ValueError):
mordred.transform(data['err_smis'])


if __name__ == "__main__":
pytest.main()
8 changes: 7 additions & 1 deletion tests/foo/descriptor/test_foo.py
Expand Up @@ -2,8 +2,14 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from xenonpy.contrib.foo import hello_contrib
import pytest

from xenonpy.contrib.foo.descriptor import hello_contrib


def test_foo_1():
assert hello_contrib() == 'Hello contribution!'


if __name__ == "__main__":
pytest.main()
1 change: 1 addition & 0 deletions travis/linux-win/py36.yml
Expand Up @@ -18,5 +18,6 @@ dependencies:
- pip
- pip:
- ruamel.yaml
- mordred
- pymatgen==2019.5.8
- tqdm
1 change: 1 addition & 0 deletions travis/linux-win/py37.yml
Expand Up @@ -18,5 +18,6 @@ dependencies:
- pip
- pip:
- ruamel.yaml
- mordred
- pymatgen==2019.5.8
- tqdm
1 change: 1 addition & 0 deletions travis/osx/py36.yml
Expand Up @@ -18,5 +18,6 @@ dependencies:
- pip
- pip:
- ruamel.yaml
- mordred
- pymatgen==2019.5.8
- tqdm
1 change: 1 addition & 0 deletions travis/osx/py37.yml
Expand Up @@ -18,5 +18,6 @@ dependencies:
- pip
- pip:
- ruamel.yaml
- mordred
- pymatgen==2019.5.8
- tqdm
Binary file added xenonpy/contrib/.DS_Store
Binary file not shown.
7 changes: 4 additions & 3 deletions xenonpy/contrib/README.md
Expand Up @@ -9,10 +9,11 @@ get merged into XenonPy, but whose interfaces may still change, or which
require some testing to see whether they can find broader acceptance.

When adding a project, please stick to the following directory structure:
Create a project directory in `contrib/`, and mirror the portions of the
TensorFlow tree that your project requires underneath `contrib/my_project/`.
1. Create a project directory in `contrib/`, and mirror the portions of the XenonPy tree that your project requires underneath `contrib/my_project/`.
2. Provide a `README.md` under the root of the project directory, e.g `contrib/my_project/README.md`.

For example, let's say you create foo in `foo.py` and the testing codes

For example, let's say you create a project named `foo` with source file `foo.py` and the testing file
`foo_test.py`. If you were to merge those files directly into XenonPy,
they would live in `$ROOT/xenonpy/descriptor/foo.py` and
`$ROOT/tests/descriptor/foo_test.py`. In `contrib/`, they are part
Expand Down
20 changes: 20 additions & 0 deletions xenonpy/contrib/extend_descriptors/README.md
@@ -0,0 +1,20 @@
# Extend Descriptors

## FrozenFeaturizerDescriptor

This is a sample code for creating artificial descriptor based on a trained neural network.
This code creates a BaseFeaturizer object in XenonPy that can be used as input for training models.
The input is in the same format as the input of the descriptor used in the neural network.

By passing both the XenonPy descriptor object and XenonPy frozen featurizer object into this class when creating the Base Featurizer, the output will be a dataframe same as other typical XenonPy descriptors, while the number of columns is the number of neurons in the chosen hidden layers.


## Mordred2DDescriptor

This is a sample code for calculating the 2D Mordred descriptor:
https://github.com/mordred-descriptor/mordred

This code creates a BaseFeaturizer object in XenonPy that can be used as input for training models.

-----------
written by Stephen Wu, 2019.05.31
3 changes: 3 additions & 0 deletions xenonpy/contrib/extend_descriptors/__init__.py
@@ -0,0 +1,3 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
6 changes: 6 additions & 0 deletions xenonpy/contrib/extend_descriptors/descriptor/__init__.py
@@ -0,0 +1,6 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from .frozen_featurizer_descriptor import FrozenFeaturizerDescriptor
from .mordred_descriptor import Mordred2DDescriptor
@@ -0,0 +1,45 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from typing import Union

from xenonpy.descriptor import FrozenFeaturizer
from xenonpy.descriptor.base import BaseFeaturizer, BaseDescriptor


class FrozenFeaturizerDescriptor(BaseFeaturizer):

def __init__(self, descriptor_calculator: Union[BaseDescriptor, BaseFeaturizer],
frozen_featurizer: FrozenFeaturizer, *,
on_errors='raise',
return_type='any'):
"""
A featurizer for extracting artificial descriptors from neural networks
Parameters
----------
descriptor_calculator : BaseFeaturizer or BaseDescriptor
Convert input data into descriptors to keep consistency with the pre-trained model.
frozen_featurizer : FrozenFeaturizer
Extracting artificial descriptors from neural networks
"""

# fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class
super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type)
self.FP = descriptor_calculator
self.ff = frozen_featurizer
self.output = None
self.__authors__ = ['Stephen Wu', 'TsumiNa']

def featurize(self, x, *, depth=1):
# transform input to descriptor dataframe
tmp_df = self.FP.transform(x)
# convert descriptor dataframe to hidden layer dataframe
self.output = self.ff.transform(tmp_df, depth=depth, return_type='df')
return self.output

@property
def feature_labels(self):
# column names based on xenonpy frozen featurizer setting
return self.output.columns
@@ -0,0 +1,39 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from mordred import Calculator, descriptors
from rdkit import Chem

from xenonpy.descriptor.base import BaseFeaturizer


class Mordred2DDescriptor(BaseFeaturizer):

def __init__(self, *, on_errors='raise', return_type='any'):
# fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class
super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type)
self.output = None
self.__authors__ = ['Stephen Wu', 'TsumiNa']

def featurize(self, x):
# check if type(x) = list
if not isinstance(x, (list,)):
x = [x]
# check input format, assume SMILES if not RDKit-MOL
if not isinstance(x[0], Chem.rdchem.Mol):
x_mol = []
for z in x:
x_mol.append(Chem.MolFromSmiles(z))
if x_mol[-1] is None:
raise ValueError('can not convert Mol from SMILES %s' % z)
else:
x_mol = x

calc = Calculator(descriptors, ignore_3D=True)
self.output = calc.pandas(x_mol)
return self.output

@property
def feature_labels(self):
return self.output.columns
2 changes: 0 additions & 2 deletions xenonpy/contrib/foo/__init__.py
@@ -1,5 +1,3 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from .foo import hello_contrib
5 changes: 5 additions & 0 deletions xenonpy/contrib/foo/descriptor/__init__.py
@@ -0,0 +1,5 @@
# Copyright (c) 2019. yoshida-lab. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

from .foo import hello_contrib
@@ -1,4 +1,4 @@
# Copyright (c) 2019. TsumiNa. All rights reserved.
# Copyright (c) 2019. yoshida-lab. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

Expand Down
1 change: 1 addition & 0 deletions xenonpy/descriptor/compositions.py
Expand Up @@ -36,6 +36,7 @@ def __init__(self, *, one_hot_vec=False, n_jobs=-1, on_errors='raise', return_ty
super().__init__(n_jobs=n_jobs, on_errors=on_errors, return_type=return_type)
self.one_hot_vec = one_hot_vec
self._elems = self._elements.index.tolist()
self.__authors__ = ['TsumiNa']

def mix_function(self, elems, nums):
vec = np.zeros(len(self._elems), dtype=np.int)
Expand Down

0 comments on commit 6756a06

Please sign in to comment.