# The `fingerprint_generator` module

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging 
from pathlib import Path

import numpy as np
import pandas as pd
from opencadd.databases.klifs import setup_remote, setup_local

from kissim.encoding import Fingerprint, FingerprintGenerator

INFO:opencadd.databases.klifs.api:If you want to see an non-truncated version of the DataFrames in this module, use `pd.set_option('display.max_columns', 50)` in your notebook.




In [3]:
#logger = logging.getLogger("opencadd")
#logger.setLevel(logging.ERROR)

## Set up KLIFS remote and local sessions

In [4]:
KLIFS_REMOTE = setup_remote()
KLIFS_LOCAL = setup_local(
    "/home/dominique/Documents/GitHub/kissim_app/data/external/20201223_KLIFS_ABL2_HUMAN/"
)

INFO:opencadd.databases.klifs.api:Set up remote session...
INFO:opencadd.databases.klifs.api:Remote session is ready!
INFO:opencadd.databases.klifs.api:Set up local session...
INFO:opencadd.databases.klifs.local:Load overview.csv...
INFO:opencadd.databases.klifs.local:Load KLIFS_export.csv...
INFO:opencadd.databases.klifs.local:Merge both csv files...
INFO:opencadd.databases.klifs.local:Add paths to coordinate folders to structures...
INFO:opencadd.databases.klifs.local:Add KLIFS IDs to structures (uses remote since not available locally!)...
INFO:opencadd.databases.klifs.api:Local session is ready!


## Select structure KLIFS IDs

In [5]:
structure_klifs_ids = KLIFS_LOCAL.structures.all_structures()["structure.klifs_id"].to_list()
structure_klifs_ids

[109, 118, 110, 113, 111, 116, 112, 114, 115, 117]

## Generate multiple fingerprints

```python
FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, klifs_session, n_cores
)
```

### Locally in sequence

Number of cores: 1

In [6]:
fingerprints1 = FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, KLIFS_LOCAL, 1
)

INFO:kissim.encoding.fingerprint_generator:Number of cores used: 1.
INFO:kissim.encoding.fingerprint_generator:Number of input structures: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints without None: 10
INFO:kissim.encoding.fingerprint_generator:Start of fingerprint generation: 2021-01-13 19:16:11.070047
INFO:kissim.encoding.fingerprint_generator:End of fingerprint generation: 2021-01-13 19:16:30.575705


In [7]:
fingerprints1.data

{109: <kissim.encoding.fingerprint.Fingerprint at 0x7f5935880a90>,
 118: <kissim.encoding.fingerprint.Fingerprint at 0x7f59366da940>,
 110: <kissim.encoding.fingerprint.Fingerprint at 0x7f593558bac0>,
 113: <kissim.encoding.fingerprint.Fingerprint at 0x7f59351f1df0>,
 111: <kissim.encoding.fingerprint.Fingerprint at 0x7f59366fa9a0>,
 116: <kissim.encoding.fingerprint.Fingerprint at 0x7f59354b20a0>,
 112: <kissim.encoding.fingerprint.Fingerprint at 0x7f59351ec130>,
 114: <kissim.encoding.fingerprint.Fingerprint at 0x7f5934b804c0>,
 115: <kissim.encoding.fingerprint.Fingerprint at 0x7f59352f4490>,
 117: <kissim.encoding.fingerprint.Fingerprint at 0x7f59352f4100>}

### Remotely in sequence

Number of cores: 1

In [8]:
fingerprints2 = FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, KLIFS_REMOTE, 1
)

INFO:kissim.encoding.fingerprint_generator:Number of cores used: 1.
INFO:kissim.encoding.fingerprint_generator:Number of input structures: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints without None: 10
INFO:kissim.encoding.fingerprint_generator:Start of fingerprint generation: 2021-01-13 19:16:30.601642
INFO:kissim.encoding.fingerprint_generator:End of fingerprint generation: 2021-01-13 19:17:04.563103


Needs _maybe_ waiting times to not overload KLIFS.

### Locally in parallel

Number of cores: 2

In [9]:
fingerprints3 = FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, KLIFS_LOCAL, 2
)

INFO:kissim.encoding.fingerprint_generator:Number of cores used: 2.
INFO:kissim.encoding.fingerprint_generator:Number of input structures: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints without None: 10
INFO:kissim.encoding.fingerprint_generator:Start of fingerprint generation: 2021-01-13 19:17:04.576006
INFO:kissim.encoding.fingerprint_generator:End of fingerprint generation: 2021-01-13 19:17:16.239090


### Remotely in parallel

Number of cores: 2

`RecursionError: maximum recursion depth exceeded`

## Save/load fingerprints

In [10]:
json_filepath = Path("fingerprints.json")

In [11]:
fingerprints1.to_json(json_filepath)

In [12]:
fingerprints1_reloaded = FingerprintGenerator.from_json(json_filepath)
fingerprints1_reloaded

<kissim.encoding.fingerprint_generator.FingerprintGenerator at 0x7f5935901dc0>

In [13]:
v1 = fingerprints1_reloaded.data[109].values_array()
v2 = fingerprints1.data[109].values_array()
np.nansum(v1) == np.nansum(v2)

True

In [14]:
json_filepath.unlink()