Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add Frozen Featurizer descriptor * add Mordred descriptor
- Loading branch information
Showing
22 changed files
with
241 additions
and
15 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Copyright (c) 2019. TsumiNa. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
import pandas as pd | ||
import pytest | ||
from mordred._base.pandas_module import MordredDataFrame | ||
from rdkit import Chem | ||
|
||
from xenonpy.contrib.extend_descriptors.descriptor import Mordred2DDescriptor | ||
|
||
|
||
@pytest.fixture(scope='module') | ||
def data(): | ||
# ignore numpy warning | ||
import warnings | ||
print('ignore NumPy RuntimeWarning\n') | ||
warnings.filterwarnings("ignore", message="numpy.dtype size changed") | ||
warnings.filterwarnings("ignore", message="numpy.ndarray size changed") | ||
|
||
smis = ['C(C(O)C1(O))C(CO)OC1O', | ||
'CC(C1=CC=CC=C1)CC(C2=CC=CC=C2)CC(C3=CC=CC=C3)CC(C4=CC=CC=C4)', | ||
' CC(C)CC(C)CC(C)', | ||
'C(F)C(F)(F)'] | ||
|
||
mols = [Chem.MolFromSmiles(s) for s in smis] | ||
|
||
err_smis = ['C(C(O)C1(O))C(CO)OC1O', | ||
'CC(C1=CC=CC=C1)CC(C2=CC=CC=C2)CC(C3=CC=', | ||
'Ccccccc', | ||
'C(F)C(F)(F)'] | ||
yield dict(smis=smis, mols=mols, err_smis=err_smis) | ||
|
||
print('test over') | ||
|
||
|
||
def test_mordred_1(data): | ||
mordred = Mordred2DDescriptor() | ||
desc = mordred.transform(data['smis']) | ||
assert isinstance(desc, MordredDataFrame) | ||
|
||
mordred = Mordred2DDescriptor(return_type='df') | ||
desc = mordred.transform(data['smis']) | ||
assert isinstance(desc, pd.DataFrame) | ||
|
||
|
||
def test_mordred_2(data): | ||
mordred = Mordred2DDescriptor() | ||
desc = mordred.transform(data['mols']) | ||
assert isinstance(desc, MordredDataFrame) | ||
|
||
|
||
def test_mordred_3(data): | ||
mordred = Mordred2DDescriptor() | ||
with pytest.raises(ValueError): | ||
mordred.transform(data['err_smis']) | ||
|
||
|
||
if __name__ == "__main__": | ||
pytest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,5 +18,6 @@ dependencies: | |
- pip | ||
- pip: | ||
- ruamel.yaml | ||
- mordred | ||
- pymatgen==2019.5.8 | ||
- tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,5 +18,6 @@ dependencies: | |
- pip | ||
- pip: | ||
- ruamel.yaml | ||
- mordred | ||
- pymatgen==2019.5.8 | ||
- tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,5 +18,6 @@ dependencies: | |
- pip | ||
- pip: | ||
- ruamel.yaml | ||
- mordred | ||
- pymatgen==2019.5.8 | ||
- tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,5 +18,6 @@ dependencies: | |
- pip | ||
- pip: | ||
- ruamel.yaml | ||
- mordred | ||
- pymatgen==2019.5.8 | ||
- tqdm |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Extend Descriptors | ||
|
||
## FrozenFeaturizerDescriptor | ||
|
||
This is a sample code for creating artificial descriptor based on a trained neural network. | ||
This code creates a BaseFeaturizer object in XenonPy that can be used as input for training models. | ||
The input is in the same format as the input of the descriptor used in the neural network. | ||
|
||
By passing both the XenonPy descriptor object and XenonPy frozen featurizer object into this class when creating the Base Featurizer, the output will be a dataframe same as other typical XenonPy descriptors, while the number of columns is the number of neurons in the chosen hidden layers. | ||
|
||
|
||
## Mordred2DDescriptor | ||
|
||
This is a sample code for calculating the 2D Mordred descriptor: | ||
https://github.com/mordred-descriptor/mordred | ||
|
||
This code creates a BaseFeaturizer object in XenonPy that can be used as input for training models. | ||
|
||
----------- | ||
written by Stephen Wu, 2019.05.31 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Copyright (c) 2019. TsumiNa. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Copyright (c) 2019. TsumiNa. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
from .frozen_featurizer_descriptor import FrozenFeaturizerDescriptor | ||
from .mordred_descriptor import Mordred2DDescriptor |
45 changes: 45 additions & 0 deletions
45
xenonpy/contrib/extend_descriptors/descriptor/frozen_featurizer_descriptor.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Copyright (c) 2019. TsumiNa. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
from typing import Union | ||
|
||
from xenonpy.descriptor import FrozenFeaturizer | ||
from xenonpy.descriptor.base import BaseFeaturizer, BaseDescriptor | ||
|
||
|
||
class FrozenFeaturizerDescriptor(BaseFeaturizer): | ||
|
||
def __init__(self, descriptor_calculator: Union[BaseDescriptor, BaseFeaturizer], | ||
frozen_featurizer: FrozenFeaturizer, *, | ||
on_errors='raise', | ||
return_type='any'): | ||
""" | ||
A featurizer for extracting artificial descriptors from neural networks | ||
Parameters | ||
---------- | ||
descriptor_calculator : BaseFeaturizer or BaseDescriptor | ||
Convert input data into descriptors to keep consistency with the pre-trained model. | ||
frozen_featurizer : FrozenFeaturizer | ||
Extracting artificial descriptors from neural networks | ||
""" | ||
|
||
# fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class | ||
super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type) | ||
self.FP = descriptor_calculator | ||
self.ff = frozen_featurizer | ||
self.output = None | ||
self.__authors__ = ['Stephen Wu', 'TsumiNa'] | ||
|
||
def featurize(self, x, *, depth=1): | ||
# transform input to descriptor dataframe | ||
tmp_df = self.FP.transform(x) | ||
# convert descriptor dataframe to hidden layer dataframe | ||
self.output = self.ff.transform(tmp_df, depth=depth, return_type='df') | ||
return self.output | ||
|
||
@property | ||
def feature_labels(self): | ||
# column names based on xenonpy frozen featurizer setting | ||
return self.output.columns |
39 changes: 39 additions & 0 deletions
39
xenonpy/contrib/extend_descriptors/descriptor/mordred_descriptor.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Copyright (c) 2019. TsumiNa. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
from mordred import Calculator, descriptors | ||
from rdkit import Chem | ||
|
||
from xenonpy.descriptor.base import BaseFeaturizer | ||
|
||
|
||
class Mordred2DDescriptor(BaseFeaturizer): | ||
|
||
def __init__(self, *, on_errors='raise', return_type='any'): | ||
# fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class | ||
super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type) | ||
self.output = None | ||
self.__authors__ = ['Stephen Wu', 'TsumiNa'] | ||
|
||
def featurize(self, x): | ||
# check if type(x) = list | ||
if not isinstance(x, (list,)): | ||
x = [x] | ||
# check input format, assume SMILES if not RDKit-MOL | ||
if not isinstance(x[0], Chem.rdchem.Mol): | ||
x_mol = [] | ||
for z in x: | ||
x_mol.append(Chem.MolFromSmiles(z)) | ||
if x_mol[-1] is None: | ||
raise ValueError('can not convert Mol from SMILES %s' % z) | ||
else: | ||
x_mol = x | ||
|
||
calc = Calculator(descriptors, ignore_3D=True) | ||
self.output = calc.pandas(x_mol) | ||
return self.output | ||
|
||
@property | ||
def feature_labels(self): | ||
return self.output.columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,3 @@ | ||
# Copyright (c) 2019. TsumiNa. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
from .foo import hello_contrib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Copyright (c) 2019. yoshida-lab. All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
from .foo import hello_contrib |
2 changes: 1 addition & 1 deletion
2
xenonpy/contrib/foo/foo.py → xenonpy/contrib/foo/descriptor/foo.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.