In [1]:
from ase import io
import random
import seaborn as sns
from ase.calculators.emt import EMT
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from amptorch.data_preprocess import AtomsDataset
from amptorch.gaussian import SNN_Gaussian
from scipy.spatial import distance
from sklearn.metrics import pairwise
import scipy


%matplotlib inline

In [2]:
def pair_distance(base_fps, compared_fps, distance_type="cosine"):
    if distance_type=="cosine":
        base_distances = 1 - pairwise.cosine_similarity(base_fps, base_fps)
        compared_distances = 1 - pairwise.cosine_similarity(compared_fps, compared_fps)
    elif distance_type=="RBF":
        scale = 55
        base_distances = scipy.exp(-distance.cdist(base_fps, base_fps, 'sqeuclidean') / scale**2)
        compared_distances = scipy.exp(-distance.cdist(compared_fps, compared_fps, 'sqeuclidean') / scale**2)
    sns.distplot(base_distances.flatten(), kde=False, norm_hist=True, label='train')
    sns.distplot(compared_distances.flatten(), kde=False, norm_hist=True, label='MLMD')

def fingerprint_array(dataset):
    fp_length = dataset.fp_length
    fingerprints = dataset.fingerprint_dataset
    num_atoms = len(fingerprints[0])
    fp_array = np.empty((len(fingerprints), num_atoms*fp_length))
    for idx, image in enumerate(fingerprints):
        i = 0
        for atom in image:
            fp_array[idx, i:fp_length+i] = atom[1]
            i += fp_length
    return fp_array

def distance_resampled(base_images, compare_images, Gs, distance_metric='cosine'):
    forcetraining = False
    
    base_training_data = AtomsDataset(
    base_images,
    SNN_Gaussian,
    Gs,
    forcetraining=forcetraining,
    label='label',
    cores=4,
    lj_data=None,
    scaling=None,
    )
    
    base_fps = fingerprint_array(base_training_data)
    if distance_metric == 'cosine':
        base_distances = 1 - pairwise.cosine_similarity(base_fps, base_fps)
    elif distance_metric == 'RBF':
        scale = 55
        base_distances = scipy.exp(-distance.cdist(base_fps, base_fps, 'sqeuclidean') / scale**2)
        
    sns.distplot(base_distances.flatten(), kde=False, norm_hist=True)
    
    compare_training_data = AtomsDataset(
    compare_images,
    SNN_Gaussian,
    Gs,
    forcetraining=forcetraining,
    label='label',
    cores=4,
    lj_data=None,
    scaling=None,
    )
    compare_fingerprints = fingerprint_array(compare_training_data)
    
    if distance_metric == 'cosine':
        compare_distances = 1 - pairwise.cosine_similarity(compare_fingerprints, compare_fingerprints)
    elif distance_metric == 'RBF':
        compare_distances = scipy.exp(-distance.cdist(compare_fingerprints, compare_fingerprints, 'sqeuclidean') / scale**2)
    sns.distplot(compare_distances.flatten(), kde=False, norm_hist=True, label=str(1))

    if distance_metric == 'cosine':
        plt.xlabel('pairwise-cosine distances')
    elif distance_metric == 'RBF':
        plt.xlabel('pairwise-RBF distances')
    plt.ylabel('Normalized count')
    plt.legend()

In [5]:
base_images = io.read("../datasets/COCu_ber_100ps_300K.traj", ":2000")
compare_images = io.read("./new_runs/success/COCu_rep_200_5000_LJ.traj", ":2000")

Gs = {}
Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
Gs["G2_rs_s"] = [0] * 4
Gs["G4_etas"] = [0.005]
Gs["G4_zetas"] = [1.0, 4.0]
Gs["G4_gammas"] = [+1.0, -1]
Gs["cutoff"] = 5.876798323827276

In [None]:
distance_resampled(base_images, compare_images, Gs, "cosine")

Calculating fingerprints...
