# Computation of risk from abundance data


In [1]:
import numpy as np
import pandas as pd
from quasinet import qnet
import qbiome.quantizer

Load models:

In [2]:
ahctg = qnet.load_qnet("appr.joblib")
shctg = qnet.load_qnet("sub.joblib")

In [3]:
def risk(
    data,
    ahctg_model=ahctg,
    shctg_model=shctg,
):
    """
    data: numpy array of quantized abundance samples
    ahctg_model: qnet model of appropriate cohort
    shctg_model: qnet model of suboptimal cohort
    """
    null_seq = np.array([np.full_like(data[0, :], fill_value="")])

    risk_mat = qnet.qdistance_matrix(
        data,
        null_seq,
        shctg,
        shctg,
    ) / qnet.qdistance_matrix(
        data,
        null_seq,
        ahctg,
        ahctg,
    )

    return risk_mat

## Risk from relative abundance values

Load saved quantizer to quantize data

In [4]:
quantizer = qbiome.quantizer.Quantizer(num_levels=26)
quantizer.load_quantizer_states('quantizer.pkl')

Load data, quantize, then convert to numpy format for qnet

In [5]:
example_abundance_data = pd.read_csv('example_subjects.csv') # load data
example_abundance_data_quantized = quantizer._quantize_df(example_abundance_data) # quantize
example_abundance_data_np = np.char.replace( # convert to np.array for risk computation
        example_abundance_data_quantized.drop("subject_id", axis=1, errors="ignore").to_numpy(dtype="str"),
        "nan",
        "",
    )
example_abundance_data_quantized

Unnamed: 0,subject_id,Acidimicrobiia_25,Acidimicrobiia_26,Acidimicrobiia_27,Acidimicrobiia_28,Acidimicrobiia_29,Acidimicrobiia_30,Acidimicrobiia_31,Acidimicrobiia_32,Acidimicrobiia_33,...,unclassified_Verrucomicrobiota_27,unclassified_Verrucomicrobiota_28,unclassified_Verrucomicrobiota_29,unclassified_Verrucomicrobiota_30,unclassified_Verrucomicrobiota_31,unclassified_Verrucomicrobiota_32,unclassified_Verrucomicrobiota_33,unclassified_Verrucomicrobiota_34,unclassified_Verrucomicrobiota_35,unclassified_Verrucomicrobiota_36
0,11,,,M,M,A,M,,,,...,M,M,M,M,,,,,,
1,27,,M,M,,A,,M,M,,...,M,,M,,M,M,,,,


In [6]:
risk(example_abundance_data_np)

array([[0.80804781],
       [1.12779862]])

## Risk from existing quantized abundances

Load file of all pre-quantized abundances:

In [7]:
prequantized_data = np.loadtxt("abund_dat_quantized.txt", dtype="<U1", delimiter=" ")

In [8]:
risk(prequantized_data)

array([[0.65444916],
       [0.95872929],
       [1.0155565 ],
       [0.76591529],
       [1.06325994],
       [0.72912112],
       [0.98770607],
       [0.74518964],
       [1.34299046],
       [1.1600274 ],
       [1.59223262],
       [0.80804781],
       [0.93443369],
       [1.0700751 ],
       [0.63462867],
       [0.51973498],
       [0.66575817],
       [1.04084963],
       [0.76993127],
       [0.9633952 ],
       [1.40974047],
       [       inf],
       [1.43543213],
       [0.2664582 ],
       [1.40119216],
       [0.45092225],
       [2.59830513],
       [1.12779862],
       [1.83338529],
       [1.00368983],
       [1.21860718],
       [0.34894849],
       [0.93590533],
       [1.17171557],
       [1.16187922],
       [2.28898152],
       [0.61789316],
       [0.67838671],
       [1.29549634],
       [4.02580222],
       [0.95673041],
       [1.15500722],
       [0.93054767],
       [0.56503308],
       [1.10610798],
       [1.12984665],
       [0.90940088],
       [0.989