In [1]:
import pandas as pd
import numpy as np
from itertools import product

In [2]:
def expand_ranges(ranges):
    return {k: np.arange(v[0], v[1], v[2]).tolist() for k, v in ranges.items()}

def enumerate_conditions(ranges, n_samples):
    """Return list mapping each peptide index to condition dict"""
    feature_ranges = expand_ranges(ranges)
    combos = list(product(*feature_ranges.values()))
    keys = list(feature_ranges.keys())
    conds = []
    for combo_id, combo in enumerate(combos):
        params = dict(zip(keys, combo))
        for _ in range(n_samples):
            conds.append(params)
    return conds

In [6]:
fiber_ranges = {
    'length': (7, 11, 1),
    'is_assembled': (1, 2, 1),
    'has_beta_sheet_content': (1, 2, 1),
    'net_charge': (0.4, 0.6, 0.05),
}

with open("gen_peptides/generated_fibers_init.txt") as f:
    peptides = [line.strip() for line in f if line.strip()]

conds = enumerate_conditions(fiber_ranges, n_samples=300)

df_fib = pd.DataFrame({
    "peptide": peptides,
    "index": range(len(peptides)),
    **{k: [c[k] for c in conds] for k in conds[0].keys()}
})

df_fib.to_csv("fibers_with_conditions.csv", index=False)
print(df_fib.head(10))

    peptide  index  length  is_assembled  has_beta_sheet_content  net_charge
0   WEPGTPW      0       7             1                       1         0.4
1  AKTPEMWH      1       7             1                       1         0.4
2   KIETGFE      2       7             1                       1         0.4
3  ELHYTPYN      3       7             1                       1         0.4
4   FYCAEFT      4       7             1                       1         0.4
5   CHITDWV      5       7             1                       1         0.4
6   VFIYCFV      6       7             1                       1         0.4
7   PYFEYAI      7       7             1                       1         0.4
8   AYHYGPD      8       7             1                       1         0.4
9   FDANWHL      9       7             1                       1         0.4


In [9]:
sphere_ranges = {
    'length': (4, 8, 1),
    'is_assembled': (1, 2, 1),
    'hydrophobic_moment': (0.6, 1.05, 0.1),
    'net_charge': (0.4, 0.6, 0.05),
}

with open("gen_peptides/generated_spheres_init.txt") as f:
    peptides = [line.strip() for line in f if line.strip()]

conds = enumerate_conditions(sphere_ranges, n_samples=60)

df_sp = pd.DataFrame({
    "peptide": peptides,
    "index": range(len(peptides)),
    **{k: [c[k] for c in conds] for k in conds[0].keys()}
})

df_sp.to_csv("spheres_with_conditions.csv", index=False)
print(df_sp.head(10))

  peptide  index  length  is_assembled  hydrophobic_moment  net_charge
0    PFQD      0       4             1                 0.6         0.4
1    WAIE      1       4             1                 0.6         0.4
2    CWEW      2       4             1                 0.6         0.4
3    VDWA      3       4             1                 0.6         0.4
4    LMYE      4       4             1                 0.6         0.4
5    VWEI      5       4             1                 0.6         0.4
6    WDYV      6       4             1                 0.6         0.4
7    YRED      7       4             1                 0.6         0.4
8    FKED      8       4             1                 0.6         0.4
9    PWEY      9       4             1                 0.6         0.4
