# The `fingerprint_generator` module

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging 

import pandas as pd
from opencadd.databases.klifs import setup_remote, setup_local

from kissim.encoding import Fingerprint, FingerprintGenerator

INFO:opencadd.databases.klifs.api:If you want to see an non-truncated version of the DataFrames in this module, use `pd.set_option('display.max_columns', 50)` in your notebook.




In [3]:
#logger = logging.getLogger("opencadd")
#logger.setLevel(logging.ERROR)

## Set up KLIFS remote and local sessions

In [4]:
KLIFS_REMOTE = setup_remote()
KLIFS_LOCAL = setup_local(
    "/home/dominique/Documents/GitHub/kissim_app/data/external/20201223_KLIFS_ABL2_HUMAN/"
)

INFO:opencadd.databases.klifs.api:Set up remote session...
INFO:opencadd.databases.klifs.api:Remote session is ready!
INFO:opencadd.databases.klifs.api:Set up local session...
INFO:opencadd.databases.klifs.local:Load overview.csv...
INFO:opencadd.databases.klifs.local:Load KLIFS_export.csv...
INFO:opencadd.databases.klifs.local:Merge both csv files...
INFO:opencadd.databases.klifs.local:Add paths to coordinate folders to structures...
INFO:opencadd.databases.klifs.local:Add KLIFS IDs to structures (uses remote since not available locally!)...
INFO:opencadd.databases.klifs.api:Local session is ready!


## Select structure KLIFS IDs

In [5]:
structure_klifs_ids = KLIFS_LOCAL.structures.all_structures()["structure.klifs_id"].to_list()
structure_klifs_ids

[109, 118, 110, 113, 111, 116, 112, 114, 115, 117]

## Generate multiple fingerprints

### Locally in sequence

In [6]:
fingerprints2 = FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, KLIFS_LOCAL, 1
)

INFO:kissim.encoding.fingerprint_generator:Number of cores used: 1.
INFO:kissim.encoding.fingerprint_generator:Number of input structures: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints without None: 10
INFO:kissim.encoding.fingerprint_generator:Start of fingerprint generation: 2021-01-13 11:49:23.599540
INFO:kissim.encoding.fingerprint_generator:End of fingerprint generation: 2021-01-13 11:49:42.996142


### Remotely in sequence

In [7]:
fingerprints1 = FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, KLIFS_REMOTE, 1
)

INFO:kissim.encoding.fingerprint_generator:Number of cores used: 1.
INFO:kissim.encoding.fingerprint_generator:Number of input structures: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints without None: 10
INFO:kissim.encoding.fingerprint_generator:Start of fingerprint generation: 2021-01-13 11:49:43.010776
INFO:kissim.encoding.fingerprint_generator:End of fingerprint generation: 2021-01-13 11:50:09.309013


Needs _maybe_ waiting times to not overload KLIFS.

### Locally in parallel

In [8]:
fingerprints3 = FingerprintGenerator.from_structure_klifs_ids(
    structure_klifs_ids, KLIFS_LOCAL, 2
)

INFO:kissim.encoding.fingerprint_generator:Number of cores used: 2.
INFO:kissim.encoding.fingerprint_generator:Number of input structures: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints: 10
INFO:kissim.encoding.fingerprint_generator:Number of fingerprints without None: 10
INFO:kissim.encoding.fingerprint_generator:Start of fingerprint generation: 2021-01-13 11:50:09.323643
INFO:kissim.encoding.fingerprint_generator:End of fingerprint generation: 2021-01-13 11:50:21.168236


### Remotely in parallel

Does not work. KLIFS session cannot be pickled?