In [56]:
import numpy as np

In [57]:
mass = [482.01, 108.14, 279.92, 80.91]  # mw of core, linker, ext, removed unit when a new bond is formed (HBr in this case)

def renorm(goal: float, mass: list) -> tuple:
    """retur.... Only works for situation like tri-core + dual-linker + dual-ext

    Args:
        goal (float): peak of intex_goalt on the mass spectrum.
        mass (list): [mw of core, mw of linker, mw of extender, mw of removed unit for each bond formation]

    Returns:
        tuple: c_nc, c_ne, c_nlf, c_goal.
    """
    m_c, m_l, m_e, m_rm = mass

    # a1 is the coefficient of number of c -> c_nc
    # a2 is the coefficient of number of e -> c_ne
    # a3 is the coefficient of number of l with one free termini (nl_f) -> c_nlf
    # a4 is the constant modified goal -> c_goal

    c_nc = m_c + m_l - 2 * m_rm
    c_ne = m_e + m_l - 2 * m_rm
    c_nlf = m_l - m_rm
    c_goal = goal + m_l - 2 * m_rm
    
    # a = [mass[0] + mass[1] - 2*mass[3], mass[2] + mass[1] - 2*mass[3], mass[1] - mass[3], goal + mass[1] - 2*mass[3]]
    
    return c_nc, c_ne, c_nlf, c_goal

def get_comb(a: tuple) -> np.ndarray:
    """_summary_

    Args:
        a (tuple): _description_

    Returns:
        np.ndarray: _description_
    """

    c_nc, c_ne, c_nlf, c_goal = a

    # maximum possible number of nc, ne, nlf

    max_nc = c_goal//c_nc
    max_ne = c_goal//c_ne
    max_nlf = c_goal//c_nlf

    X = np.mgrid[ : max_nc + 1, : max_ne + 1, : max_nlf + 1] # a 3d space of [X_nc, X_ne, X_nlf], mgrid - start:end:step, default: step = 1, start = 0, all omited here; the last point is not included so a +1 is required.

    return X

def unit_calc(X: np.ndarray, a: tuple) -> np.ndarray:
    """_summary_

    Args:
        X (np.ndarray): _description_
        a (tuple): _description_

    Returns:
        np.ndarray: _description_
    """
    x_nc, x_ne, x_nlf = X # array of all the possible situations
    c_nc, c_ne, c_nlf = a[ : -1] # do not need the c_goal

    x_goal = c_nc * x_nc + c_ne * x_ne + c_nlf * x_nlf

    return x_goal

def get_mass(x_goal: np.ndarray, mass: tuple) -> np.ndarray:
    """_summary_

    Args:
        x_goal (np.ndarray): _description_
        mass (tuple): _description_

    Returns:
        np.ndarray: _description_
    """

    # reverse to solve the actual mass goal from the constant modified goal (c_goal)
    # x_goal can also be a number, depending on different scneriao 

    m_rm = mass[3]
    m_l = mass[1]

    real_goal = x_goal + 2*m_rm - m_l

    return real_goal

def get_mask(X: np.ndarray) -> np.ndarray:
    """_summary_

    Args:
        X (np.ndarray): _description_

    Returns:
        np.ndarray: _description_
    """
    # X is from get_comb
    # this function is a filter to remove unwanted results, check readme for discussion
    # return an array with bool

    x_nc, x_ne, x_nlf = X # array of all the possible situations

    return x_nlf <= x_nc+2

def find_closest(x_goal, a, k=5, mask=None):
    """return first k smallest err index and its x_goal and its err (id, res, err). Here mask=None means the filter is not executed. If in the future we don't need that constraint, simply set mask as None."""

    c_nc, c_ne, c_nlf, c_goal = a

    c_goal = a[-1]

    if mask is None:

        err_goal = np.abs(x_goal - c_goal) # difference between calculated goal and set goal

    else:

        err_goal = np.where(mask, np.abs(x_goal - c_goal), np.inf) # when the mask condition is met, workout the err_goal, otherwise set the error as inifinite because it is impoosible

    idx = tuple(np.array(np.unravel_index(np.argsort(err_goal, axis=None), err_goal.shape))[:,:k]) # extract the first k smallest elements; the first element of unravel_index is the matrix to be reshaped, it will be reshaped into the size of err_goal; 

    return idx, err_goal[idx]

def get_fraction(idx: tuple) -> tuple:
    """_summary_

    Args:
        idx (tuple): _description_

    Returns:
        tuple: _description_
    """

    n_c, n_e, n_lf = idx

    n_lb = n_c + n_e - 1 # number of bonded linker

    n_l = n_lf + n_lb # total linker

    n_rm = n_lf + 2*n_lb # removed units

    return n_c, n_l, n_e, n_rm
    

In [58]:
def find_solution(goal, mass=mass, k=1):

    a = renorm(goal, mass)
    X = get_comb(a)
    summation = unit_calc(X, a)
    mask = get_mask(X)
    idx, err = find_closest(summation, a, k=k, mask=mask)    
    total_mass = get_mass(summation, mass)

    attempted_mass = np.sort(np.ravel(total_mass)[np.ravel(mask)])

    print("all attempted valid mass:")
    print(attempted_mass)

    return idx, total_mass[idx], total_mass[idx] - goal

In [59]:
idx, calc_mass, err = find_solution(563.58, k=1)

print("index:", np.array(idx).T) # idx of n_c, n_e, n_lf
print("calculated mass:", calc_mass)
print("error to goal:", err)

all attempted valid mass:
[  53.68   80.91  108.14  279.92  307.15  334.38  482.01  506.16  509.24
  533.39  536.47  560.62  563.7   708.25  735.48  762.71  789.94  934.49
  961.72  988.95 1016.18]
index: [[1 0 3]]
calculated mass: [563.7]
error to goal: [0.12]


In [60]:
frac = np.array(get_fraction(idx)).T
print("[cores linkers exts rm_sites]:\n", frac)

[cores linkers exts rm_sites]:
 [[1 3 0 3]]
