In [10]:
#construct 4-bit landscape
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

# read input & collect all info into a dictionary
file = '../data/nk-landscape.tsv' # 10-bit landscape
df = pd.read_csv(file, delimiter="\t", dtype={'hap':str}) # hap column as strings (otherwise read as numbers)
print(df.head())
plt.figure()
#df['fit'].plot.hist(bins=10)
recs = df.to_dict("records")
dHap = {}
for rec in recs:
    dHap[rec['hap']] = {'id': rec['ranked_id'], 'fit': rec['fit']}
print(dHap)

  ranked_id         hap       fit model  N_max       r/s
0      H000  1001100011  1.000000    nk      4  1.171044
1      H001  0001100011  0.984447    nk      4  1.171044
2      H002  1001100111  0.961883    nk      4  1.171044
3      H003  0001100111  0.946330    nk      4  1.171044
4      H004  1001111011  0.929235    nk      4  1.171044
{'1001100011': {'id': 'H000', 'fit': 1.0}, '0001100011': {'id': 'H001', 'fit': 0.9844472003322132}, '1001100111': {'id': 'H002', 'fit': 0.9618830286879516}, '0001100111': {'id': 'H003', 'fit': 0.946330229020165}, '1001111011': {'id': 'H004', 'fit': 0.9292349291259612}, '0001111011': {'id': 'H005', 'fit': 0.9136821294581742}, '1101100011': {'id': 'H006', 'fit': 0.8992125144545016}, '1001101011': {'id': 'H007', 'fit': 0.8987230899143819}, '1001111111': {'id': 'H008', 'fit': 0.8911179578139131}, '1001100010': {'id': 'H009', 'fit': 0.8874674351420327}, '0101100011': {'id': 'H010', 'fit': 0.8836597147867145}, '0001101011': {'id': 'H011', 'fit': 0.88317029

<Figure size 432x288 with 0 Axes>

In [11]:
# build graph, add nodes & attach fitness as features
DG = nx.DiGraph() 
for hap in dHap: # hap as key
    DG.add_node(hap)
    for k in dHap[hap]: # k includes 'id' and 'fit'
        DG.nodes[hap][k] = dHap[hap][k]
    DG.nodes[hap]['alpha'] = dHap[hap]['fit'] # assign fitness as alpha (transparency)
print(DG.nodes)

['1001100011', '0001100011', '1001100111', '0001100111', '1001111011', '0001111011', '1101100011', '1001101011', '1001111111', '1001100010', '0101100011', '0001101011', '1001110011', '0001111111', '0001100010', '0001110011', '1101100111', '1001101111', '1001100110', '1101000011', '0101100111', '0001101111', '1001110111', '0001100110', '0001110111', '1101111011', '0101000011', '1001111010', '0101111011', '1001000011', '1101000111', '0001111010', '1101101011', '1110011000', '1101111111', '1010111000', '1101100010', '1001101010', '1011111011', '1110011001', '0101101011', '0001000011', '1101110011', '0101000111', '1010111001', '1001111110', '1110011100', '1101011011', '0101111111', '0010111000', '1010111100', '1110000000', '1001000111', '0101100010', '0001101010', '1101000000', '1001110010', '0011111011', '1110011101', '1010100000', '0101110011', '0110011000', '1001100000', '1011100011', '0010111001', '1010111101', '1110000001', '0001111110', '1101000001', '1101101111', '1010100001', '0010

In [7]:
# Test case: add edges recursively
# The total number of unique binary haps of length n is the sum of binomial coefficients
# which evaluates to 2^n (https://mathworld.wolfram.com/BinomialSums.html)
# The total number of 1-bit edges is: n2^2 - n*2^(n-1) = n* 2^(n-1)
def add_edge_test(G, hap, outList, k, seen):
    num_zeros = len([x for x in hap if x == '0'])
    if num_zeros == 0: # reached the end hap (all 1's)
        seen[hap] = 1
        return
    if hap in seen_nodes: # node already reached
        return
    else: # contains at least one zero & not seen
        seen[hap] = 1
        nabes = {'level':k, 'hap_list': []} # start a list
        for i in range(len(hap)): # mutate each 0 to 1
            if hap[i] == '0':
                mut = hap[0:i] + '1' + hap[(i+1):]
                nabes['hap_list'].append(mut)
        outList.append(nabes) # append hap list to output list
        k += 1 # next level
        for x in nabes['hap_list']: # recurse on each child node
            if x not in seen: # skip if seen
                add_edge_test(G, x, outList, k, seen)

out = []
seen_nodes = {}
add_edge_test(DG, '00000', out, 1, seen_nodes) # start recursion at '0000'
levels = np.unique(np.array([x['level'] for x in out]))
for lev in levels:
    flat = []
    hap = [x['hap_list'] for x in out if x['level'] == lev]
    for h in hap:
        flat.append(h)
    print(lev, ":", np.unique(np.array(flat)))
#print(out)
#for x in out:
#    print(x)   

1 : ['00001' '00010' '00100' '01000' '10000']
2 : ['00011' '00101' '00110' '01001' '01010' '01100' '10001' '10010' '10100'
 '11000']
3 : ['00111' '01011' '01101' '01110' '10011' '10101' '10110' '11001' '11010'
 '11100']
4 : ['01111' '10111' '11011' '11101' '11110']
5 : ['11111']


In [None]:
# now add edges & assign layers
# layout and draw Gallary:https://networkx.org/documentation/stable/auto_examples/index.html
def add_fit_edge(G, hap, seen, level):
    num_zeros = len([x for x in hap if x == '0'])
    if num_zeros == 0: # reached the end hap (all 1's)
        seen[hap] = 1
        return
    if hap in seen_nodes: # node already reached
        return
    else: # contains at least one zero & not seen
        seen[hap] = 1
        G.nodes[hap]['subset'] = level # networkx uses node feature 'subset' to store levels for multipartite layout
        nabes = [] # start a list of 1-hamming neighbors
        for i in range(len(hap)): # mutate each 0 to 1
            if hap[i] == '0':
                mut = hap[0:i] + '1' + hap[(i+1):]
                if G.nodes[hap]['fit'] > G.nodes[mut]['fit']:
                    G.add_edge(mut, hap)
                else:
                    G.add_edge(hap, mut)
                nabes.append(mut)
        level += 1
        for x in nabes: # recurse on each child node
            if x not in seen: # skip if seen
                G.nodes[x]['subset'] = level
                add_fit_edge(G, x, seen, level)
seen_nodes = {}
add_fit_edge(DG, '0000000000', seen_nodes, 0) # start point
#print(DG.nodes())
#print(len(DG.edges()))

# node coloring by fitness 
#node_colors = range(10)
node_alphas = [ DG.nodes[x]['fit'] for x in DG.nodes]
#print(node_alphas)

# multi-partite layout, subset by num of 0's
pos = nx.multipartite_layout(DG, align = 'horizontal')
plt.figure(figsize = (10,10), dpi=150)
nx.draw(DG, pos = pos,
        #with_labels = True, 
        #node_size = 1000,
        #alpha = node_alphas, #didn't work
       # edgecolors = "black",
#        node_color = ['orange'] * 5 + ['lightgray'] * 10,
#        edgecolors = ['red'] * 5 + ['black'] * 10,
        #font_size = 8
       )
#plt.show()
plt.savefig("graph.png")
#print(DG.adj[strains[0]])

In [23]:
# degree analysis: fitness peak has only in_degrees and no out_degrees
# Ref: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.bar.html
#print(DG.in_degree()) # in-degree
#print(DG.out_degree()) # in-degree
in_deg = [x[1] for x in DG.in_degree()]
out_deg = [x[1] for x in DG.out_degree()]
haps = [x[0] for x in DG.in_degree()]
fits = [DG.nodes[x]['fit'] for x in DG.nodes]
ids = [DG.nodes[x]['id'] for x in DG.nodes]
#print(in_deg)
#print(out_deg)
#print(haps)
dfDeg = pd.DataFrame({'hap': haps, 'in_deg': in_deg, 'out_deg':out_deg, 'fit':fits, 'id': ids} )
print(dfDeg.head())
print(dfDeg[dfDeg['out_deg'] ==0]) # local and global peaks
#dfDeg.plot.bar(x = 'hap', stacked = True)
#dfDeg.plot.bar(x = 'hap', subplots = True)

#print(dfDeg.head())

          hap  in_deg  out_deg       fit    id
0  1001100011      10        0  1.000000  H000
1  0001100011       9        1  0.984447  H001
2  1001100111       9        1  0.961883  H002
3  0001100111       8        2  0.946330  H003
4  1001111011      10        0  0.929235  H004
            hap  in_deg  out_deg       fit    id
0    1001100011      10        0  1.000000  H000
4    1001111011      10        0  0.929235  H004
33   1110011000      10        0  0.793799  H033
35   1010111000      10        0  0.788717  H035
51   1110000000      10        0  0.772744  H051
55   1101000000      10        0  0.770628  H055
59   1010100000      10        0  0.767662  H059
172  1101011000      10        0  0.699863  H172
