# Tools

In [34]:
import numpy as np
from matplotlib.ticker import FormatStrFormatter, PercentFormatter


#割合(x,y,z)[%]をXY座標に変換 convert rate(x,y,z)[%] into XY-coordinate
def transform_xy(x, y, z=None):
    if z is None:
        z = 100 - x - y
    h = np.sqrt(3) * 0.5
    x_ = (z / 100) + (x / 100) / 2
    y_ = (x / 100) * h
    
    return x_, y_

def transform_xy_halfsize(x, y, z=None):
    if z is None:
        z = 100 - x - y
    h = np.sqrt(3) * 0.5
    x_ = (z / 100) * 2 + (x / 100) * 2 / 2 - 0.5
    y_ = (x / 100) * 2 * h - h
    
    xy = np.stack([x_, y_])
    idx = (xy >= 0).all(axis=0)
    xy = xy[..., idx]
    
    return xy[0], xy[1], idx


#plot Triangle
def plot_tri(x,y,z,d, label_x,label_y,label_z, *additional_points, show_cbar=True, cbarlabel=None, percentage=False, reduce_ticks=False, ax=None, half_size=False, cbar_kw={}, **kwargs):
    import matplotlib.pyplot as plt

    
    if not ax:
        fig, ax = plt.subplots(dpi=150)
    else:
        fig = ax.get_figure()
        
    l = len(d)
    ax.set_aspect('equal', 'datalim')
    ax.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
    ax.tick_params(bottom=False, left=False, right=False, top=False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    h = np.sqrt(3.0)*0.5

    #外周-三角
    ax.plot([0.0, 1.0],[0.0, 0.0], 'k-', lw=2)
    ax.plot([0.0, 0.5],[0.0, h], 'k-', lw=2)
    ax.plot([1.0, 0.5],[0.0, h], 'k-', lw=2)


    #内側目盛
    for i in range(1,10):
        ax.plot([i/20.0, 1.0-i/20.0],[h*i/10.0, h*i/10.0], color='gray', lw=0.5,ls="-")
        ax.plot([1.0-i/20.0-0.025, 1.0-i/20.0],[h*i/10.0, h*i/10.0], color='black', lw=2,ls="-")

        ax.plot([i/20.0, i/10.0],[h*i/10.0, 0.0], color='gray', lw=0.5)
        ax.plot([i/20.0, i/20+0.05/4],[h*i/10.0, h*i/10-h/10*0.25], color='black', lw=2)

        ax.plot([0.5+i/20.0, i/10.0],[h*(1.0-i/10.0), 0.0], color='gray', lw=0.5)
        ax.plot([i/10.0+0.025/2, i/10.0],[h/40, 0.0], color='black', lw=2)



    #頂点のラベル
    ax.text(0.75+0.25/2+0.01, h/2, label_x, fontsize='xx-large', rotation=0)#x
    ax.text(0.25/2-0.13, h/2, label_y, fontsize='xx-large')#y
    ax.text(0.5-0.05, -0.17, label_z, fontsize='xx-large', rotation=0)#z

    #軸ラベル
    step = 2 if reduce_ticks else 1
    interval = 5 if half_size else 10
    shift = 50 if half_size else 0
    for i in range(0,11, step):
        r = i*interval
        ax.text((10-i)/20.0-0.01, h*(10-i)/10.0, '%d' % r, fontsize='medium', rotation=0,horizontalalignment='right') #x
        ax.text(0.5+(10-i)/20.0+0.01, h*(1.0-(10-i)/10.0), '%d' % (r + shift), fontsize='medium',horizontalalignment='left')#y
        ax.text(i/10.0-0.03, -0.07, '%d' % r, fontsize='medium', rotation=0) #z
        
    if half_size:
        X, Y, idx = transform_xy_halfsize(x,y,z)
        d = d[idx]
    else:
        X, Y = transform_xy(x, y, z)

    sca = ax.scatter(X, Y, c=d, s=5, alpha=0.8, **kwargs)
    
    for fn in additional_points:
        fn(ax)

    # Create colorbar
    if show_cbar:
        cbar = ax.figure.colorbar(sca, ax=ax, **cbar_kw)
        if percentage:
            cbar.ax.yaxis.set_major_formatter(PercentFormatter(1))
        if cbarlabel is not None:
            cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom", fontsize='x-large')

    fig.tight_layout()

In [1]:
from itertools import product

class ConfusionMatrixDisplay:
    """Confusion Matrix visualization.
    It is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to
    create a :class:`ConfusionMatrixDisplay`. All parameters are stored as
    attributes.
    Read more in the :ref:`User Guide <visualizations>`.
    Parameters
    ----------
    confusion_matrix : ndarray of shape (n_classes, n_classes)
        Confusion matrix.
    display_labels : ndarray of shape (n_classes,)
        Display labels for plot.
    Attributes
    ----------
    im_ : matplotlib AxesImage
        Image representing the confusion matrix.
    text_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, \
            or None
        Array of matplotlib axes. `None` if `include_values` is false.
    ax_ : matplotlib Axes
        Axes with confusion matrix.
    figure_ : matplotlib Figure
        Figure containing the confusion matrix.
    """
    def __init__(self, confusion_matrix, display_labels):
        self.confusion_matrix = confusion_matrix
        self.display_labels = display_labels

    def plot(self, *, include_values=True, cmap='viridis',
             xticks_rotation='horizontal', values_format=None, ax=None, anno_fontsize=14, tick_fontsize=14, label_fontsize=20):
        """Plot visualization.
        Parameters
        ----------
        include_values : bool, default=True
            Includes values in confusion matrix.
        cmap : str or matplotlib Colormap, default='viridis'
            Colormap recognized by matplotlib.
        xticks_rotation : {'vertical', 'horizontal'} or float, \
                         default='vertical'
            Rotation of xtick labels.
        values_format : str, default=None
            Format specification for values in confusion matrix. If `None`,
            the format specification is '.2f' for a normalized matrix, and
            'd' for a unnormalized matrix.
        ax : matplotlib axes, default=None
            Axes object to plot on. If `None`, a new figure and axes is
            created.
        Returns
        -------
        display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`
        """
        import matplotlib.pyplot as plt

        if ax is None:
            fig, ax = plt.subplots()
        else:
            fig = ax.figure

        cm = self.confusion_matrix
        n_classes = cm.shape[0]
        self.im_ = ax.imshow(cm, interpolation='nearest', cmap=cmap)
        self.text_ = None

        cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(256)

        if include_values:
            self.text_ = np.empty_like(cm, dtype=object)
            if values_format is None:
                values_format = '.2g'

            # print text with appropriate color depending on background
            thresh = (cm.max() - cm.min()) / 2.
            for i, j in product(range(n_classes), range(n_classes)):
                color = cmap_max if cm[i, j] < thresh else cmap_min
                self.text_[i, j] = ax.text(j, i,
                                           format(cm[i, j], values_format),
                                           ha="center", va="center",
                                           color=color, fontsize=anno_fontsize)

        cbar = fig.colorbar(self.im_, ax=ax)
        cbar.ax.tick_params(labelsize=anno_fontsize) 
        ax.set(xticks=np.arange(n_classes),
               yticks=np.arange(n_classes),
               xticklabels=self.display_labels,
               yticklabels=self.display_labels,
               ylabel="True label",
               xlabel="Predicted label")

        ax.set_ylim((n_classes - 0.5, -0.5))
        plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)

        ax.xaxis.label.set_size(label_fontsize)
        ax.yaxis.label.set_size(label_fontsize)
        ax.tick_params(labelsize=tick_fontsize)
        
        self.figure_ = fig
        self.ax_ = ax
        return self


In [181]:
from xenonpy.descriptor.base import BaseFeaturizer
from typing import Union, Sequence, Callable
from xenonpy.datatools import preset
from sklearn.preprocessing import MinMaxScaler
from pymatgen.core import Composition as PMGComp
from abc import ABCMeta

import numpy as np
import pandas as pd
import itertools
from joblib import Parallel, delayed

class RBFKernel():
    def __init__(self, sigma):
        self._sigma = sigma
        
    def __call__(self, x_i: np.ndarray, x_j: np.ndarray):
        # K(x_i, x_j) = exp(-||x_i - x_j||^2 / (2 * sigma^2))
        return np.exp(-(x_i[:, :, np.newaxis] - x_j).reshape(x_i.shape[0], -1) ** 2 / (2 * self._sigma ** 2))


class KernelMean(BaseFeaturizer):
    def __init__(self, 
                 kernel_func: Union[None, Callable[[np.ndarray, np.ndarray], np.ndarray]],
                 *,
                 feature_matrix: Union[None, pd.DataFrame] = None,
                 grid: Union[None, int, Sequence[int], Sequence[Sequence[float]]] = None,
                 on_errors='raise', return_type='any', target_col='composition', n_jobs: int = 1
                ):
        super().__init__(n_jobs=0, on_errors=on_errors, return_type=return_type, target_col=target_col)
        
        if feature_matrix is None:  # use elemental info
            feature_matrix = preset.elements_completed

        # re-scale to [0, 1]
        scaled_matrix = MinMaxScaler().fit_transform(feature_matrix)
        
        # calculate centers for each feature
        if grid is None:
            grid = scaled_matrix.values.mean(axis=0).reshape(-1, 1)  # use mean of feature as center
        elif isinstance(grid, int):
            grid = np.array([np.linspace(0, 1, grid)] * scaled_matrix.shape[1])  # create bins
        elif isinstance(grid, Sequence):
            grid = np.asarray(grid)
            if grid.ndim == 1:
                if grid.size != scaled_matrix.shape[1]:
                    raise ValueError(f'length of grid ({grid.size}) must be equal to feature size ({scaled_matrix.shape[1]})')
                grid = np.array([np.linspace(0, 1, grid) for i in grid])
            elif grid.ndim == 2:
                pass  # direct input
            else:
                raise ValueError('dim of grid must be 1 or 2')

        # calculate kernel matrix for featrues
        kernel_matrix = kernel_func(scaled_matrix, grid)
        
        # generate column names of output
        labels = itertools.chain(*[[f'{n}_k{k+1}' for k in range(g.size)] for n, g in zip(feature_matrix.columns, grid)])

        self.kernel_matrix_ = pd.DataFrame(kernel_matrix, index=feature_matrix.index, columns=labels)
        self.__n_jobs = n_jobs  # this param should not overwrite the property of parent class

    def featurize(self, comps):
        # Unified to python list
        if isinstance(comps, (pd.Series, np.ndarray)):
            comps = comps.tolist()

        size = len(comps)
        kernel_matrix = self.kernel_matrix_
        
        def inner(comp):
            # unified to python dict
            if isinstance(comp, PMGComp):
                comp = comp.as_dict()
                
            # calculate proportion vector for the given composition
            t = sum(comp.values())
            proportion_vec = np.zeros(kernel_matrix.shape[0])
            for (k, v) in comp.items():
                elem_i = kernel_matrix.index.get_loc(k)
                proportion_vec[elem_i] = v / t
                
            return proportion_vec
        
        proportion_matrix = Parallel(n_jobs=self.__n_jobs)(delayed(inner)(comp) for comp in comps)
        proportion_matrix = np.stack(proportion_matrix)
        
        # fast way using matrix calculation
        return (proportion_matrix.T[:, :, np.newaxis] @ (kernel_matrix.values)[:, np.newaxis, :]).sum(axis=0)

    @property
    def feature_labels(self):
        return self.kernel_matrix_.columns

In [2]:
def make_virtual_compounds(*elements, interval=0.5, pen_size=10, reorder_elements=True, max_proportion=100, **element_ratio):
    dim = len(elements)
    
    # for list type
    if dim != 0:
        # elements and element_ratio are mutually exclusive
        if len(element_ratio) != 0:
            raise ValueError('elements and element_ratio are mutually exclusive')

        # sort elements in the alphabetical order
        if reorder_elements:
            elements = tuple(sorted(tuple(set(elements))))
        
        # generate mesh
        grid = np.arange(0, 100, interval)
        mesh = np.array(np.meshgrid(*([grid] * dim))).T.reshape(-1, dim)

    # for dict type
    else:
        if len(element_ratio) == 0:
            return None

        # sort elements in the alphabetical order
        dim = len(element_ratio)
        
        if reorder_elements:
            elements, ratios = zip(*[(k, element_ratio[k]) for k in sorted(element_ratio.keys())])
        else:
            elements, ratios = zip(*element_ratio.items())
        
        # generate mesh
        s = [np.arange(max(r - pen_size, 0), min(r + pen_size, 100), interval) for r in ratios]
        mesh = np.array(np.meshgrid(*s)).T.reshape(-1, dim)
    
    # select the only ratios have sum equal to 100
    mesh = mesh[mesh.sum(1) == 100]
    
    if max_proportion < 100:
        mesh = mesh[mesh[..., 0] >= max_proportion]

    # assign element
    comps = [{e:f for e, f in zip(elements, frac_compositon) } for frac_compositon in mesh]
    # save as pandas dataframe
    return pd.DataFrame({'composition': comps, 'elements': [elements] * mesh.shape[0]})