Add contribtion (#125)

* add Frozen Featurizer descriptor * add Mordred descriptor
yoshida-lab · Jun 8, 2019 · 6756a06 · 6756a06
1 parent ba633c0
commit 6756a06
Show file tree

Hide file tree

Showing 22 changed files with 241 additions and 15 deletions.
diff --git a/licences/placeholder → licences/.gitkeep b/licences/placeholder → licences/.gitkeep
diff --git a/requirements.txt b/requirements.txt
@@ -3,6 +3,7 @@ tqdm
 seaborn
 plotly
 requests
+mordred
 
 ###### Requirements with Version Specifiers ######
 numpy == 1.16.*

diff --git a/tests/extend_descriptors/descriptor/test_mordred.py b/tests/extend_descriptors/descriptor/test_mordred.py
@@ -0,0 +1,60 @@
+#  Copyright (c) 2019. TsumiNa. All rights reserved.
+#  Use of this source code is governed by a BSD-style
+#  license that can be found in the LICENSE file.
+
+import pandas as pd
+import pytest
+from mordred._base.pandas_module import MordredDataFrame
+from rdkit import Chem
+
+from xenonpy.contrib.extend_descriptors.descriptor import Mordred2DDescriptor
+
+
+@pytest.fixture(scope='module')
+def data():
+    # ignore numpy warning
+    import warnings
+    print('ignore NumPy RuntimeWarning\n')
+    warnings.filterwarnings("ignore", message="numpy.dtype size changed")
+    warnings.filterwarnings("ignore", message="numpy.ndarray size changed")
+
+    smis = ['C(C(O)C1(O))C(CO)OC1O',
+            'CC(C1=CC=CC=C1)CC(C2=CC=CC=C2)CC(C3=CC=CC=C3)CC(C4=CC=CC=C4)',
+            ' CC(C)CC(C)CC(C)',
+            'C(F)C(F)(F)']
+
+    mols = [Chem.MolFromSmiles(s) for s in smis]
+
+    err_smis = ['C(C(O)C1(O))C(CO)OC1O',
+                'CC(C1=CC=CC=C1)CC(C2=CC=CC=C2)CC(C3=CC=',
+                'Ccccccc',
+                'C(F)C(F)(F)']
+    yield dict(smis=smis, mols=mols, err_smis=err_smis)
+
+    print('test over')
+
+
+def test_mordred_1(data):
+    mordred = Mordred2DDescriptor()
+    desc = mordred.transform(data['smis'])
+    assert isinstance(desc, MordredDataFrame)
+
+    mordred = Mordred2DDescriptor(return_type='df')
+    desc = mordred.transform(data['smis'])
+    assert isinstance(desc, pd.DataFrame)
+
+
+def test_mordred_2(data):
+    mordred = Mordred2DDescriptor()
+    desc = mordred.transform(data['mols'])
+    assert isinstance(desc, MordredDataFrame)
+
+
+def test_mordred_3(data):
+    mordred = Mordred2DDescriptor()
+    with pytest.raises(ValueError):
+        mordred.transform(data['err_smis'])
+
+
+if __name__ == "__main__":
+    pytest.main()
diff --git a/tests/foo/descriptor/test_foo.py b/tests/foo/descriptor/test_foo.py
@@ -2,8 +2,14 @@
 #  Use of this source code is governed by a BSD-style
 #  license that can be found in the LICENSE file.
 
-from xenonpy.contrib.foo import hello_contrib
+import pytest
+
+from xenonpy.contrib.foo.descriptor import hello_contrib
 
 
 def test_foo_1():
     assert hello_contrib() == 'Hello contribution!'
+
+
+if __name__ == "__main__":
+    pytest.main()
diff --git a/travis/linux-win/py36.yml b/travis/linux-win/py36.yml
@@ -18,5 +18,6 @@ dependencies:
   - pip
   - pip:
       - ruamel.yaml
+      - mordred
       - pymatgen==2019.5.8
       - tqdm
diff --git a/travis/linux-win/py37.yml b/travis/linux-win/py37.yml
@@ -18,5 +18,6 @@ dependencies:
   - pip
   - pip:
       - ruamel.yaml
+      - mordred
       - pymatgen==2019.5.8
       - tqdm
diff --git a/travis/osx/py36.yml b/travis/osx/py36.yml
@@ -18,5 +18,6 @@ dependencies:
   - pip
   - pip:
       - ruamel.yaml
+      - mordred
       - pymatgen==2019.5.8
       - tqdm
diff --git a/travis/osx/py37.yml b/travis/osx/py37.yml
@@ -18,5 +18,6 @@ dependencies:
   - pip
   - pip:
       - ruamel.yaml
+      - mordred
       - pymatgen==2019.5.8
       - tqdm
diff --git a/xenonpy/contrib/.DS_Store b/xenonpy/contrib/.DS_Store
diff --git a/xenonpy/contrib/README.md b/xenonpy/contrib/README.md
@@ -9,10 +9,11 @@ get merged into XenonPy, but whose interfaces may still change, or which
 require some testing to see whether they can find broader acceptance.
 
 When adding a project, please stick to the following directory structure:
-Create a project directory in `contrib/`, and mirror the portions of the
-TensorFlow tree that your project requires underneath `contrib/my_project/`.
+1. Create a project directory in `contrib/`, and mirror the portions of the XenonPy tree that your project requires underneath `contrib/my_project/`.
+2. Provide a `README.md` under the root of the project directory, e.g  `contrib/my_project/README.md`.
 
-For example, let's say you create foo in `foo.py` and the testing codes
+
+For example, let's say you create a project named `foo` with source file `foo.py` and the testing file
 `foo_test.py`. If you were to merge those files directly into XenonPy,
 they would live in `$ROOT/xenonpy/descriptor/foo.py` and
 `$ROOT/tests/descriptor/foo_test.py`. In `contrib/`, they are part

diff --git a/xenonpy/contrib/extend_descriptors/README.md b/xenonpy/contrib/extend_descriptors/README.md
@@ -0,0 +1,20 @@
+# Extend Descriptors
+
+## FrozenFeaturizerDescriptor
+
+This is a sample code for creating artificial descriptor based on a trained neural network.
+This code creates a BaseFeaturizer object in XenonPy that can be used as input for training models.
+The input is in the same format as the input of the descriptor used in the neural network.
+
+By passing both the XenonPy descriptor object and XenonPy frozen featurizer object into this class when creating the Base Featurizer, the output will be a dataframe same as other typical XenonPy descriptors, while the number of columns is the number of neurons in the chosen hidden layers.
+
+
+## Mordred2DDescriptor
+
+This is a sample code for calculating the 2D Mordred descriptor:
+https://github.com/mordred-descriptor/mordred
+
+This code creates a BaseFeaturizer object in XenonPy that can be used as input for training models.
+
+-----------
+written by Stephen Wu, 2019.05.31
diff --git a/xenonpy/contrib/extend_descriptors/__init__.py b/xenonpy/contrib/extend_descriptors/__init__.py
@@ -0,0 +1,3 @@
+#  Copyright (c) 2019. TsumiNa. All rights reserved.
+#  Use of this source code is governed by a BSD-style
+#  license that can be found in the LICENSE file.
diff --git a/xenonpy/contrib/extend_descriptors/descriptor/__init__.py b/xenonpy/contrib/extend_descriptors/descriptor/__init__.py
@@ -0,0 +1,6 @@
+#  Copyright (c) 2019. TsumiNa. All rights reserved.
+#  Use of this source code is governed by a BSD-style
+#  license that can be found in the LICENSE file.
+
+from .frozen_featurizer_descriptor import FrozenFeaturizerDescriptor
+from .mordred_descriptor import Mordred2DDescriptor
diff --git a/xenonpy/contrib/extend_descriptors/descriptor/frozen_featurizer_descriptor.py b/xenonpy/contrib/extend_descriptors/descriptor/frozen_featurizer_descriptor.py
@@ -0,0 +1,45 @@
+#  Copyright (c) 2019. TsumiNa. All rights reserved.
+#  Use of this source code is governed by a BSD-style
+#  license that can be found in the LICENSE file.
+
+from typing import Union
+
+from xenonpy.descriptor import FrozenFeaturizer
+from xenonpy.descriptor.base import BaseFeaturizer, BaseDescriptor
+
+
+class FrozenFeaturizerDescriptor(BaseFeaturizer):
+
+    def __init__(self, descriptor_calculator: Union[BaseDescriptor, BaseFeaturizer],
+                 frozen_featurizer: FrozenFeaturizer, *,
+                 on_errors='raise',
+                 return_type='any'):
+        """
+        A featurizer for extracting artificial descriptors from neural networks
+
+        Parameters
+        ----------
+        descriptor_calculator : BaseFeaturizer or BaseDescriptor
+            Convert input data into descriptors to keep consistency with the pre-trained model.
+        frozen_featurizer : FrozenFeaturizer
+            Extracting artificial descriptors from neural networks
+        """
+
+        # fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class
+        super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type)
+        self.FP = descriptor_calculator
+        self.ff = frozen_featurizer
+        self.output = None
+        self.__authors__ = ['Stephen Wu', 'TsumiNa']
+
+    def featurize(self, x, *, depth=1):
+        # transform input to descriptor dataframe
+        tmp_df = self.FP.transform(x)
+        # convert descriptor dataframe to hidden layer dataframe
+        self.output = self.ff.transform(tmp_df, depth=depth, return_type='df')
+        return self.output
+
+    @property
+    def feature_labels(self):
+        # column names based on xenonpy frozen featurizer setting
+        return self.output.columns
diff --git a/xenonpy/contrib/extend_descriptors/descriptor/mordred_descriptor.py b/xenonpy/contrib/extend_descriptors/descriptor/mordred_descriptor.py
@@ -0,0 +1,39 @@
+#  Copyright (c) 2019. TsumiNa. All rights reserved.
+#  Use of this source code is governed by a BSD-style
+#  license that can be found in the LICENSE file.
+
+from mordred import Calculator, descriptors
+from rdkit import Chem
+
+from xenonpy.descriptor.base import BaseFeaturizer
+
+
+class Mordred2DDescriptor(BaseFeaturizer):
+
+    def __init__(self, *, on_errors='raise', return_type='any'):
+        # fix n_jobs to be 0 to skip automatic wrapper in XenonPy BaseFeaturizer class
+        super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type)
+        self.output = None
+        self.__authors__ = ['Stephen Wu', 'TsumiNa']
+
+    def featurize(self, x):
+        # check if type(x) = list
+        if not isinstance(x, (list,)):
+            x = [x]
+        # check input format, assume SMILES if not RDKit-MOL
+        if not isinstance(x[0], Chem.rdchem.Mol):
+            x_mol = []
+            for z in x:
+                x_mol.append(Chem.MolFromSmiles(z))
+                if x_mol[-1] is None:
+                    raise ValueError('can not convert Mol from SMILES %s' % z)
+        else:
+            x_mol = x
+
+        calc = Calculator(descriptors, ignore_3D=True)
+        self.output = calc.pandas(x_mol)
+        return self.output
+
+    @property
+    def feature_labels(self):
+        return self.output.columns
diff --git a/xenonpy/contrib/foo/__init__.py b/xenonpy/contrib/foo/__init__.py
@@ -1,5 +1,3 @@
 #  Copyright (c) 2019. TsumiNa. All rights reserved.
 #  Use of this source code is governed by a BSD-style
 #  license that can be found in the LICENSE file.
-
-from .foo import hello_contrib
diff --git a/xenonpy/contrib/foo/descriptor/__init__.py b/xenonpy/contrib/foo/descriptor/__init__.py
@@ -0,0 +1,5 @@
+#  Copyright (c) 2019. yoshida-lab. All rights reserved.
+#  Use of this source code is governed by a BSD-style
+#  license that can be found in the LICENSE file.
+
+from .foo import hello_contrib
diff --git a/xenonpy/contrib/foo/foo.py → xenonpy/contrib/foo/descriptor/foo.py b/xenonpy/contrib/foo/foo.py → xenonpy/contrib/foo/descriptor/foo.py
@@ -1,4 +1,4 @@
-#  Copyright (c) 2019. TsumiNa. All rights reserved.
+#  Copyright (c) 2019. yoshida-lab. All rights reserved.
 #  Use of this source code is governed by a BSD-style
 #  license that can be found in the LICENSE file.
 

diff --git a/xenonpy/descriptor/compositions.py b/xenonpy/descriptor/compositions.py
@@ -36,6 +36,7 @@ def __init__(self, *, one_hot_vec=False, n_jobs=-1, on_errors='raise', return_ty
         super().__init__(n_jobs=n_jobs, on_errors=on_errors, return_type=return_type)
         self.one_hot_vec = one_hot_vec
         self._elems = self._elements.index.tolist()
+        self.__authors__ = ['TsumiNa']
 
     def mix_function(self, elems, nums):
         vec = np.zeros(len(self._elems), dtype=np.int)