# Proteins

Standard name - systematic name

STE24 - YJR117W

RCE1  - YMR274C

RAM1  - YDL090C

Do not form complexes with each other


Found three useful ones:

- YDR388W (Cytoskeleton one): community 12; betweenness, information, PCA; general partition

- YLR025W (Endosomal one): community 8; information, subgraph, PCA; general partition

- YMR037C (DNA binding transcription): community 0, betweenness, information; SA partition

In [17]:
import networkx as nx

In [31]:
'''
STE24 - YJR117W
RCE1  - YMR274C
RAM1  - YDL090C
'''
# Proteins that we focus on
def print_protein(str):
    if str == 'YJR117W':
        return 'STE24(YJR117W)'
    elif str == 'YMR274C':
        return 'RCE1(YMR274C)'
    elif str == 'YDL090C':
        return 'RAM1(YDL090C)'

In [18]:
G = nx.read_weighted_edgelist("cleanData.txt",comments="#",nodetype=str)

In [25]:
# Store proteins into a list
all_proteins = ['YJR117W', 'YMR274C', 'YDL090C']

# First set

In [19]:
# Union all findings
general = {'YER095W', 'YBR010W', 'YLR025W', 'YBL037W', 'YGL045W', 'YDR477W', 'YLR113W', 'YDR388W'}
SA = {'YGR152C', 'YFL026W', 'YLR113W', 'YMR037C', 'YNL098C', 'YNL298W', 'YKL178C'}

# first set
# general n = 4, SA n = 3 number of chosen adjacent communities 
# only choose one representative each community
first_set = general.union(SA)

print(first_set)

{'YBL037W', 'YGR152C', 'YBR010W', 'YDR477W', 'YFL026W', 'YDR388W', 'YLR025W', 'YNL098C', 'YKL178C', 'YGL045W', 'YMR037C', 'YER095W', 'YNL298W', 'YLR113W'}


In [20]:
# check overlap with previous proposal
previous = {'YDL100C', 'YJL154C', 'YDR072C', 'YLL013C', 'YNL064C', 'YBL037W', 'YOL147C', 'YGL013C', 'YLR113W', 'YOR065W', 'YDR388W', 'YOL013C', 'YPL106C', 'YDR477W'}
overlap_proteins = set()

for node in general:
    if node in previous:
        overlap_proteins.add(node)

for node in SA:
    if node in previous:
        overlap_proteins.add(node)   
        
n_overlap = len(overlap_proteins)

print(f'There are {n_overlap} as the previous proposal: {overlap_proteins}')

There are 4 as the previous proposal: {'YBL037W', 'YDR477W', 'YLR113W', 'YDR388W'}


# Second set

In [44]:
second_set = set()

# adjacent community directly connect to the target proteins, one represeentative each
# n = 4 in general, n = 3 in SA
# one each
general_direct_adjacent = {'YOR065W', 'YPL106C', 'YDR477W', 'YDR072C', 'Q0130', 'YLL013C', 'YGR281W', 'YBL037W', 'YNL064C', 'YLR113W', 'YAL058W', 'YDR388W'}

# ranked ajacent community with two representatives each
# n = 4 in general, n = 3 in SA
# two each, only return the second representatives here
general_second_representatives = {'YHL007C', 'YML032C', 'YOL062C', 'YLR113W', 'YCL008C', 'YLR262C', 'YNL030W', 'YHL027W', 'YDL077C', 'YBL016W', 'YOL012C'}
SA_second_representatives = {'YKL209C', 'YFL026W', 'YJR092W', 'YJL128C', 'YIL033C', 'YLR319C', 'YMR037C', 'YNL098C', 'YNL298W', 'YGR040W', 'YKL178C'}


# select more ranked community - general partition n = 7, SA stays, cuz it makes no sense to expand
# n = 4 in general, n = 3 in SA
# one each
more_ranked = {'YMR250W', 'YER095W', 'YBR010W', 'YLR025W', 'YBL037W', 'YPL106C', 'YGR281W', 'YGL026C', 'YGL045W', 'YNL064C', 'YDR477W', 'YDR072C', 'YLR113W', 'YNR001C', 'YAL058W', 'YDR388W'}


# union all the results
# second_found = [general_direct_adjacent, general_second_representatives, SA_second_representatives, more_ranked]
second_found = [general_direct_adjacent, more_ranked]

for found_ls in second_found:
    for protein in found_ls:
        if (protein not in first_set) and (protein not in previous):
            second_set.add(protein)

print(f'Second set of potein: {second_set}')

Second set of potein: {'YGR281W', 'YGL026C', 'YNR001C', 'YMR250W', 'Q0130', 'YAL058W'}


In [43]:
# shortest path to the target
for target_protein in all_proteins:
    print('- ' + print_protein(target_protein) + ':')
    for rep in second_set:
        path = nx.shortest_path(G, source=target_protein, target=rep, weight=None, method='dijkstra')
        print(f"Shortest path to {rep}: {path}")
    print('')

- STE24(YJR117W):
Shortest path to YGR281W: ['YJR117W', 'YAL005C', 'YGR281W']
Shortest path to Q0130: ['YJR117W', 'YCL018W', 'Q0130']
Shortest path to YAL058W: ['YJR117W', 'YAL058W']

- RCE1(YMR274C):
Shortest path to YGR281W: ['YMR274C', 'YEL021W', 'YGR281W']
Shortest path to Q0130: ['YMR274C', 'YKR087C', 'Q0130']
Shortest path to YAL058W: ['YMR274C', 'YAL042W', 'YAL058W']

- RAM1(YDL090C):
Shortest path to YGR281W: ['YDL090C', 'YAL005C', 'YGR281W']
Shortest path to Q0130: ['YDL090C', 'YOL121C', 'Q0130']
Shortest path to YAL058W: ['YDL090C', 'YAL005C', 'YAL058W']

