# Loading `kissim` results

This is a short notebook showing how to load the `kissim` output JSON files as Python objects.

- `fingerprint.json`: Fingerprints for all successfully encoded structures
- `fingerprint_clean.json`: Fingerprints dataset without outlier structures
- `feature_distances.json`: Feature distances between all fingerprint pairs
- `fingerprint_distances.json`: Fingerprint distances between all fingerprint pairs

Take up to 20 GB of memory.

In [None]:
from pathlib import Path

import pandas as pd
from kissim.encoding import FingerprintGenerator
from kissim.comparison import FeatureDistancesGenerator, FingerprintDistanceGenerator

In [None]:
HERE = Path(_dh[-1])  # noqa: F821
RESULTS = HERE / "../results/"

## Load fingerprints

### Without outlier filtering

In [None]:
%%time
fingerprints = FingerprintGenerator.from_json(RESULTS / "fingerprints.json")
len(fingerprints.data)

### With outlier filtering

In [None]:
%%time
fingerprints = FingerprintGenerator.from_json(RESULTS / "fingerprints_clean.json")
len(fingerprints.data)

## Load feature distances

In [None]:
%%time
feature_distances = FeatureDistancesGenerator.from_json(RESULTS / "feature_distances.json")

In [None]:
len(feature_distances.data)

In [None]:
columns = feature_distances.data[0].data["feature_name"].to_list()
columns = ["structure_id1", "structure_id2"] + columns
print(*columns)

In [None]:
%%time
fd_data = [fd.data["distance"] for fd in feature_distances.data]

In [None]:
columns = ["structure_id1", "structure_id2"] + feature_distances.data[0].data[
    "feature_name"
].to_list()
fd_data = pd.DataFrame(fd_data, columns=columns)

In [None]:
%%time
fd_data = [
    [fd.structure_pair_ids[0], fd.structure_pair_ids[1]] + fd.data["distance"].to_list()
    for fd in feature_distances.data
]
columns = ["structure_id1", "structure_id2"] + feature_distances.data[0].data[
    "feature_name"
].to_list()
fd_data = pd.DataFrame(fd_data, columns=columns)

In [None]:
fd_data.to_csv()

## Load fingerprint distances

In [None]:
%%time
fingerprint_distances = FingerprintDistanceGenerator.from_json(
    RESULTS / "fingerprint_distances.json"
)

In [None]:
len(feature_distances.data)