In [1]:
import pandas as pd
import itertools

def calc_edges(node1: float, node2: float, make_indicator: bool = False) -> float:
    """Calculates the symmetrical weight of edges in the graph
    Symmetrical means that if one of node values is equal to zero, returns zero

    Parameters
    ----------
    node1 : float
        value of node
    node2 : float
        value of node
    make_indicator : bool, default=False
        flag, whether to make weight of ages as ones for nonzero nodes

    Returns
    -------
    float
        edge weight
    """
    if node1 == 0 or node2 == 0:
        return 0
    return 1 if make_indicator else node1 / node2


In [2]:
df = pd.read_csv('data/сystic_fibrosis_11_01_2023 - сystic_fibrosis_11_01_2023.csv.csv')
df

Unnamed: 0,Sample Name,Group,16_00437621316582,17_021539190698192,18_038469966740735,18_29915765289428,18_63043828897158,18_87945690818552,18_943778909567843,19_040493421214805,...,429_0919133669904,430_05858392387927,431_0338918422529,443_04198484160145,445_0972017708983,447_05657737924486,517_0325219174968,518_0392964367509,533_0672614483142,575_8644098061574
0,Id_1,0,0.0,328.333333,15256.666667,0.000000,0.000000,1862.666667,142.666667,199622.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Id_2,0,0.0,287.666667,10707.333333,0.000000,0.000000,1628.666667,0.000000,204483.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Id_3,0,0.0,242.000000,8218.000000,0.000000,0.000000,1581.000000,0.000000,205679.666667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Id_4,1,0.0,207.000000,4295.000000,0.000000,110.666667,1788.666667,286.666667,202050.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Id_5,1,0.0,225.666667,4950.666667,0.000000,0.000000,1638.333333,0.000000,204870.666667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,Id_101,1,0.0,242.666667,4887.666667,0.000000,0.000000,1619.000000,101.000000,204885.666667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
101,Id_102,1,0.0,249.000000,4814.666667,0.000000,0.000000,1642.666667,100.666667,204434.333333,...,217.0,0.0,0.0,0.0,167.0,0.0,0.0,0.0,0.0,0.0
102,Id_103,0,0.0,227.666667,5039.666667,0.000000,0.000000,1627.000000,0.000000,205926.666667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103,Id_104,1,79.0,4752.000000,108119.000000,114.333333,1013.000000,2908.666667,425.000000,180078.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
# преобразуем названия узлов в удобочитаемые
map_nodes_dict = {v: f'n_{k - 1}' for k, v in enumerate(df.columns) if v not in ['Sample Name', 'Group']}
map_nodes_dict_inv = {v: k for k, v in map_nodes_dict.items()}

In [4]:
# dataFrame с комбинацией всех узлов
result_df = pd.DataFrame(list(itertools.combinations(map_nodes_dict.values(), 2)), columns=['p1', 'p2'])

In [5]:
for idx in df.index:
    result_df.loc[:, df.loc[idx, 'Sample Name']] = \
        result_df.apply(lambda x: calc_edges(df.iloc[idx, :][map_nodes_dict_inv[x['p1']]],
                                             df.iloc[idx, :][map_nodes_dict_inv[x['p2']]],
                                             make_indicator=True), axis=1)

In [6]:
result_df.head()

Unnamed: 0,p1,p2,Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,...,Id_96,Id_97,Id_98,Id_99,Id_100,Id_101,Id_102,Id_103,Id_104,Id_105
0,n_1,n_2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,n_1,n_3,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,n_1,n_4,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,n_1,n_5,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,n_1,n_6,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [7]:
result_df.to_csv('data/raw/сystic_fibrosis_network_symmetrical_indic_11_01_2023.csv', index=False)