# Generation of Molecular Networks and Mass2Motifs

Based on this publication https://www.biorxiv.org/content/10.1101/2024.02.09.579616v1.full, we used Molnetenhancer to merge the molecular networks with the Mass2Motifs obtained by MS2LDA

In [1]:
! curl -d "" 'https://gnps.ucsd.edu/ProteoSAFe/DownloadResult?task=2b86dd35cc4a4219bad07c3519ad78bf&view=download_cytoscape_data' -o GNPS_output_graphML.zip
! unzip -d GNPS_output_graphML/ GNPS_output_graphML.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0

100 12.2M    0 12.2M    0     0  3011k      0 --:--:--  0:00:04 --:--:-- 3173k
Archive:  GNPS_output_graphML.zip
  inflating: GNPS_output_graphML/FEATURE-BASED-MOLECULAR-NETWORKING-2b86dd35-download_cytoscape_data-main.graphml  
  inflating: GNPS_output_graphML/params.xml  
  inflating: GNPS_output_graphML/networking_pairs_results_file_filtered/d520b2f6040a413482158dee0076d195.tsv  
  inflating: GNPS_output_graphML/networkedges_selfloop/d32baed970a644eaa6e48d87ab57fb8a..selfloop  
  inflating: GNPS_output_graphML/clusterinfo_summary/3ca6cd4227284336a39bb22195c8f58f.tsv  
  inflating: GNPS_output_graphML/gnps_molecular_network_graphml/70ff0e719d224f75bff78ae5d86aa68b.graphml  
  inflating: GNPS_output_graphML/gnps_molecular_network_iin_collapse_graphml/87f0c87254b54928b36da1dd4880c44d.graphml  
  inflating: GNPS_output_graphML/spectra/specs_ms.mgf  
  inflating: GNPS_output_graphML/quantification_table_reformatted/d753d2fcd2784a4292187d519c4d81d5.csv  
  inflating: GNPS_output_graphML/D

In [2]:
!pip uninstall pandas -y

Found existing installation: pandas 1.5.3
Uninstalling pandas-1.5.3:
  Successfully uninstalled pandas-1.5.3


In [3]:
!pip install pandas==1.5.3

Collecting pandas==1.5.3
  Using cached pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (11 kB)
Using cached pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl (10.8 MB)
Installing collected packages: pandas
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tmap 1.2.1 requires networkx==2.2, but you have networkx 3.3 which is incompatible.
pyldavis 3.4.1 requires pandas>=2.0.0, but you have pandas 1.5.3 which is incompatible.
matchms 0.24.2 requires pandas<3.0.0,>=2.0.3, but you have pandas 1.5.3 which is incompatible.[0m[31m
[0mSuccessfully installed pandas-1.5.3


In [4]:
import pandas as pd
import os
import pyMolNetEnhancer
from networkx import *

In [5]:
from ms2lda.motif_parser import load_m2m_folder
import matchms
from matchms.importing import load_from_mgf
from ms2lda.Preprocessing.load_and_clean import clean_spectra
import tomotopy as tp

In [6]:
preprocessing_parameters = {
    "min_mz": 0, #
    "max_mz": 2000,
    "max_frags": 1000,
    "min_frags": 5,
    "min_intensity": 0.01,
    "max_intensity": 1
}

In [7]:
mushroom_mgf = '/Users/rosinatorres/Documents/PhD/WP1/Project/Code/MS2LDA/MS2LDA/datasets/specs_ms.mgf'
mushroom_spectra = list(load_from_mgf(mushroom_mgf))
cleaned_spectra = clean_spectra(mushroom_spectra, preprocessing_parameters)


In [8]:
spectra_dict=[]

for i in enumerate(cleaned_spectra):
    print(i[1].metadata)
    spectra_dict.append(i[1].metadata)

{'scans': '32', 'charge': 0, 'collision_energy': '0.0', 'retention_time': 64.419, 'ms_level': '2', 'precursor_mz': 147.1129, 'ionmode': None, 'retention_index': None, 'id': 'spec_0'}
{'scans': '112', 'charge': 1, 'collision_energy': '0.0', 'retention_time': 63.864, 'ms_level': '2', 'precursor_mz': 236.0796, 'ionmode': None, 'retention_index': None, 'id': 'spec_1'}
{'scans': '129', 'charge': 0, 'collision_energy': '0.0', 'retention_time': 68.448, 'ms_level': '2', 'precursor_mz': 213.0752, 'ionmode': None, 'retention_index': None, 'id': 'spec_2'}
{'scans': '132', 'charge': 1, 'collision_energy': '0.0', 'retention_time': 64.418, 'ms_level': '2', 'precursor_mz': 156.0422, 'ionmode': None, 'retention_index': None, 'id': 'spec_3'}
{'scans': '138', 'charge': 1, 'collision_energy': '0.0', 'retention_time': 64.963, 'ms_level': '2', 'precursor_mz': 175.1192, 'ionmode': None, 'retention_index': None, 'id': 'spec_4'}
{'scans': '189', 'charge': 1, 'collision_energy': '0.0', 'retention_time': 65.565

In [9]:
spectra_dict_df = pd.DataFrame(spectra_dict)

In [10]:
spectra_dict_df = spectra_dict_df.reset_index()  
spectra_dict_df.rename(columns={"index": "document"}, inplace=True)  

In [11]:
spectra_dict_df

Unnamed: 0,document,scans,charge,collision_energy,retention_time,ms_level,precursor_mz,ionmode,retention_index,id
0,0,32,0,0.0,64.419,2,147.1129,,,spec_0
1,1,112,1,0.0,63.864,2,236.0796,,,spec_1
2,2,129,0,0.0,68.448,2,213.0752,,,spec_2
3,3,132,1,0.0,64.418,2,156.0422,,,spec_3
4,4,138,1,0.0,64.963,2,175.1192,,,spec_4
...,...,...,...,...,...,...,...,...,...,...
2709,2709,18548,1,0.0,1920.142,2,824.5552,,,spec_2709
2710,2710,18551,1,0.0,1925.969,2,786.5998,,,spec_2710
2711,2711,18557,1,0.0,1936.681,2,579.4021,,,spec_2711
2712,2712,18561,1,0.0,1938.409,2,301.1798,,,spec_2712


In [12]:
spectra_dict_df.drop(columns=['charge', 'collision_energy','ms_level','ionmode','retention_index','id'], inplace=True)

In [13]:
spectra_dict_df.rename(columns={'precursor_mz': 'precursormass'}, inplace=True)
spectra_dict_df.rename(columns={'retention_time': 'parentrt'}, inplace=True)



In [14]:
spectra_dict_df

Unnamed: 0,document,scans,parentrt,precursormass
0,0,32,64.419,147.1129
1,1,112,63.864,236.0796
2,2,129,68.448,213.0752
3,3,132,64.418,156.0422
4,4,138,64.963,175.1192
...,...,...,...,...
2709,2709,18548,1920.142,824.5552
2710,2710,18551,1925.969,786.5998
2711,2711,18557,1936.681,579.4021
2712,2712,18561,1938.409,301.1798


In [15]:
spectra_dict_df_order = spectra_dict_df[['scans', 'precursormass', 'parentrt','document']]


In [16]:
spectra_dict_df_order

Unnamed: 0,scans,precursormass,parentrt,document
0,32,147.1129,64.419,0
1,112,236.0796,63.864,1
2,129,213.0752,68.448,2
3,132,156.0422,64.418,3
4,138,175.1192,64.963,4
...,...,...,...,...
2709,18548,824.5552,1920.142,2709
2710,18551,786.5998,1925.969,2710
2711,18557,579.4021,1936.681,2711
2712,18561,301.1798,1938.409,2712


In [17]:
lda_model = tp.LDAModel.load('/Users/rosinatorres/Documents/PhD/WP1/Project/Code/MS2LDA/MS2LDA/notebooks/Paper_results/CaseStudy_Mushrooms_200_2/ms2lda.bin')

In [18]:
data = []

for doc_index, doc in enumerate(lda_model.docs):
    topic_dist = doc.get_topic_dist()  
    overlap = sum(topic_dist)  

    for topic_id, prob in enumerate(topic_dist):
        if prob > 0: 
            data.append({
                "document": doc_index,
                "motif": f"motif_{topic_id}",
                "probability": prob,
                "overlap": overlap
            })

spectra_motif_df = pd.DataFrame(data)
print(spectra_motif_df)


        document      motif  probability  overlap
0              0    motif_0     0.000012      1.0
1              0    motif_1     0.000015      1.0
2              0    motif_2     0.000024      1.0
3              0    motif_3     0.000029      1.0
4              0    motif_4     0.000016      1.0
...          ...        ...          ...      ...
542795      2713  motif_195     0.000009      1.0
542796      2713  motif_196     0.000002      1.0
542797      2713  motif_197     0.000007      1.0
542798      2713  motif_198     0.000012      1.0
542799      2713  motif_199     0.000007      1.0

[542800 rows x 4 columns]


In [19]:
motifs = spectra_dict_df.merge(spectra_motif_df, on="document", how="inner")

In [20]:
motifs

Unnamed: 0,document,scans,parentrt,precursormass,motif,probability,overlap
0,0,32,64.419,147.1129,motif_0,0.000012,1.0
1,0,32,64.419,147.1129,motif_1,0.000015,1.0
2,0,32,64.419,147.1129,motif_2,0.000024,1.0
3,0,32,64.419,147.1129,motif_3,0.000029,1.0
4,0,32,64.419,147.1129,motif_4,0.000016,1.0
...,...,...,...,...,...,...,...
542795,2713,18562,1937.780,557.4202,motif_195,0.000009,1.0
542796,2713,18562,1937.780,557.4202,motif_196,0.000002,1.0
542797,2713,18562,1937.780,557.4202,motif_197,0.000007,1.0
542798,2713,18562,1937.780,557.4202,motif_198,0.000012,1.0


In [21]:
motifs["scans"] = pd.to_numeric(motifs["scans"])

In [22]:
motifs['scans'].max()

18562

In [23]:
edges = pd.read_csv('GNPS_output_graphML/networkedges_selfloop/' + str(os.listdir('GNPS_output_graphML/networkedges_selfloop/')[0]), sep = '\t')


In [24]:
edges

Unnamed: 0,CLUSTERID1,CLUSTERID2,DeltaMZ,MEH,Cosine,OtherScore,ComponentIndex,EdgeAnnotation
0,342,433,82.991,342.0,0.9614,342.0,1,
1,342,760,28.014,342.0,0.9596,342.0,1,CH2N
2,250,612,-45.063,250.0,0.9519,250.0,1,
3,250,276,-73.077,250.0,0.9788,250.0,1,C3H9N2
4,276,612,28.014,276.0,0.9406,276.0,1,CH2N
...,...,...,...,...,...,...,...,...
4724,17884,17884,0.000,1.0,1.0000,1.0,-1,
4725,12520,12520,0.000,1.0,1.0000,1.0,-1,
4726,12738,12738,0.000,1.0,1.0000,1.0,-1,
4727,17524,17524,0.000,1.0,1.0000,1.0,-1,


In [25]:
edges['CLUSTERID2'].max()

18562

In [26]:
from pyMolNetEnhancer import Mass2Motif_2_Network

In [27]:
motif_network = Mass2Motif_2_Network(edges,motifs,prob = 0.10,overlap = 0.3, top = 3)


  edges = edges.append(motifedges)


In [28]:
motif_network

{'nodes':            document              parentrt         precursormass  \
 scans                                                             
 32           [0, 0]      [64.419, 64.419]  [147.1129, 147.1129]   
 112          [1, 1]      [63.864, 63.864]  [236.0796, 236.0796]   
 129          [2, 2]      [68.448, 68.448]  [213.0752, 213.0752]   
 132          [3, 3]      [64.418, 64.418]  [156.0422, 156.0422]   
 138          [4, 4]      [64.963, 64.963]  [175.1192, 175.1192]   
 ...             ...                   ...                   ...   
 18548  [2709, 2709]  [1920.142, 1920.142]  [824.5552, 824.5552]   
 18551  [2710, 2710]  [1925.969, 1925.969]  [786.5998, 786.5998]   
 18557  [2711, 2711]  [1936.681, 1936.681]  [579.4021, 579.4021]   
 18561  [2712, 2712]  [1938.409, 1938.409]  [301.1798, 301.1798]   
 18562        [2713]             [1937.78]            [557.4202]   
 
                         motif                                probability  \
 scans                      

In [29]:
motif_network['nodes'].head()


Unnamed: 0_level_0,document,parentrt,precursormass,motif,probability,overlap,motif_104,motif_148,motif_108,motif_166,...,motif_155,motif_173,motif_110,motif_60,motif_19,motif_5,motif_193,motif_61,motif_32,motif_170
scans,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32,"[0, 0]","[64.419, 64.419]","[147.1129, 147.1129]","[motif_104, motif_148]","[0.36406275629997253, 0.6279398202896118]","[0.9999999772248884, 0.9999999772248884]",1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112,"[1, 1]","[63.864, 63.864]","[236.0796, 236.0796]","[motif_108, motif_166]","[0.7026256918907166, 0.29435989260673523]","[0.9999999957924501, 0.9999999957924501]",0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
129,"[2, 2]","[68.448, 68.448]","[213.0752, 213.0752]","[motif_23, motif_148]","[0.13132233917713165, 0.7854995131492615]","[0.9999999785250111, 0.9999999785250111]",0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
132,"[3, 3]","[64.418, 64.418]","[156.0422, 156.0422]","[motif_165, motif_192]","[0.27278199791908264, 0.6671414375305176]","[0.9999999684055183, 0.9999999684055183]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
138,"[4, 4]","[64.963, 64.963]","[175.1192, 175.1192]","[motif_22, motif_73]","[0.3911413848400116, 0.5795403122901917]","[0.9999999721569566, 0.9999999721569566]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
df = motif_network['nodes']
flatten_cols = ["document", "parentrt", "precursormass"]

for col in flatten_cols:
    df[col] = df[col].apply(lambda x: x[0] if isinstance(x, list) else x)

motif_network['nodes'] = df

In [31]:
motif_network['nodes']

Unnamed: 0_level_0,document,parentrt,precursormass,motif,probability,overlap,motif_104,motif_148,motif_108,motif_166,...,motif_155,motif_173,motif_110,motif_60,motif_19,motif_5,motif_193,motif_61,motif_32,motif_170
scans,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32,0,64.419,147.1129,"[motif_104, motif_148]","[0.36406275629997253, 0.6279398202896118]","[0.9999999772248884, 0.9999999772248884]",1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112,1,63.864,236.0796,"[motif_108, motif_166]","[0.7026256918907166, 0.29435989260673523]","[0.9999999957924501, 0.9999999957924501]",0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
129,2,68.448,213.0752,"[motif_23, motif_148]","[0.13132233917713165, 0.7854995131492615]","[0.9999999785250111, 0.9999999785250111]",0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
132,3,64.418,156.0422,"[motif_165, motif_192]","[0.27278199791908264, 0.6671414375305176]","[0.9999999684055183, 0.9999999684055183]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
138,4,64.963,175.1192,"[motif_22, motif_73]","[0.3911413848400116, 0.5795403122901917]","[0.9999999721569566, 0.9999999721569566]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18548,2709,1920.142,824.5552,"[motif_152, motif_187]","[0.23110973834991455, 0.725723147392273]","[0.9999999622953055, 0.9999999622953055]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18551,2710,1925.969,786.5998,"[motif_80, motif_152]","[0.3851470649242401, 0.6115683913230896]","[1.0000000107618234, 1.0000000107618234]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18557,2711,1936.681,579.4021,"[motif_88, motif_186]","[0.8461236953735352, 0.10356413573026657]","[1.0000000156255737, 1.0000000156255737]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18561,2712,1938.409,301.1798,"[motif_98, motif_186]","[0.14202067255973816, 0.8521063923835754]","[0.9999999713438683, 0.9999999713438683]",0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
motif_network['edges'].head()

Unnamed: 0,CLUSTERID1,interaction,CLUSTERID2,DeltaMZ,MEH,Cosine,OtherScore,ComponentIndex,EdgeAnnotation,shared_motifs,TopSharedMotifs
0,342,cosine,433,82.991,342.0,0.9614,342.0,1,,"[motif_152, motif_73]","[motif_152, motif_73, motif_123]"
1,342,cosine,760,28.014,342.0,0.9596,342.0,1,CH2N,"[motif_152, motif_73]","[motif_152, motif_73, motif_123]"
2,250,cosine,612,-45.063,250.0,0.9519,250.0,1,,"[motif_152, motif_123]","[motif_152, motif_73, motif_123]"
3,250,cosine,276,-73.077,250.0,0.9788,250.0,1,C3H9N2,"[motif_152, motif_123]","[motif_152, motif_73, motif_123]"
4,276,cosine,612,28.014,276.0,0.9406,276.0,1,CH2N,"[motif_152, motif_123]","[motif_152, motif_73, motif_123]"


In [33]:
motif_network['edges'].to_csv("Mass2Motifs_Edges_Classical.tsv",sep='\t',index=False)
motif_network['nodes'].to_csv("Mass2Motifs_Nodes_Classical.tsv",sep='\t',index=True)

In [34]:
from pyMolNetEnhancer import make_motif_graphml
import networkx as nx

In [35]:
!pip show networkx

Name: networkx
Version: 3.3
Summary: Python package for creating and manipulating graphs and networks
Home-page: 
Author: 
Author-email: Aric Hagberg <hagberg@lanl.gov>
License: 
Location: /Users/rosinatorres/anaconda3/envs/MS2LDA_v2/lib/python3.11/site-packages
Requires: 
Required-by: matchms, pyvis, tmap


In [36]:
!conda install networkx 

Channels:
 - conda-forge
 - bioconda
 - defaults
Platform: osx-arm64
Collecting package metadata (repodata.json): / ^C


In [37]:
import networkx as nx

In [38]:
type(motif_network['nodes'])

pandas.core.frame.DataFrame

In [None]:
def make_motif_graphml(nodes, edges):
    """Create a network file with Mass2Motifs mapped on nodes and shared Mass2Motifs mapped as multiple edges

    :param nodes: A dataframe showing Mass2Motifs per node
    :type nodes: pandas.core.frame.DataFrame
    :param edges: A dataframe showing shared Mass2Motifs for each network pair
    :type edges: pandas.core.frame.DataFrame
    :return: A network file with Mass2Motifs mapped on nodes and shared Mass2Motifs mapped as multiple edges
    :rtype: networkx.classes.graph.Graph

    """
    # convert lists to strings
    edges['shared_motifs'] = edges['shared_motifs'].replace('None', '')
    edges['TopSharedMotifs'] = edges['TopSharedMotifs'].replace('None', '')
    edges['shared_motifs'] = edges['shared_motifs'].agg(lambda x: ','.join(map(str, x)))
    edges['TopSharedMotifs'] = edges['TopSharedMotifs'].agg(lambda x: ','.join(map(str, x)))
    
    # create motif network with multiple edges
    MG = nx.from_pandas_edgelist(edges, 'CLUSTERID1', 'CLUSTERID2', edge_attr=list(set(list(edges.columns)) - set(['CLUSTERID1','CLUSTERID2'])), 
                             create_using=nx.MultiGraph())
    
    # map node attributes to network
    nodes['precursormass'] = nodes['precursormass'].agg(lambda x: ','.join(map(str, x)))
    nodes['parentrt'] = nodes['parentrt'].agg(lambda x: ','.join(map(str, x)))
    nodes['document'] = nodes['document'].agg(lambda x: ','.join(map(str, x)))
    nodes['motif'] = nodes['motif'].agg(lambda x: ','.join(map(str, x)))
    nodes['probability'] = nodes['probability'].agg(lambda x: ','.join(map(str, x)))
    nodes['overlap'] = nodes['overlap'].agg(lambda x: ','.join(map(str, x)))
    
    for column in nodes:
        nx.set_node_attributes(MG, pd.Series(nodes[column], index=nodes.index).to_dict(), column)
        
    return MG

In [None]:
import networkx as nx
import pandas as pd

def make_motif_graphml(nodes, edges):
    """Create a network file with Mass2Motifs mapped on nodes and shared Mass2Motifs mapped as multiple edges

    :param nodes: A dataframe showing Mass2Motifs per node
    :type nodes: pandas.core.frame.DataFrame
    :param edges: A dataframe showing shared Mass2Motifs for each network pair
    :type edges: pandas.core.frame.DataFrame
    :return: A network file with Mass2Motifs mapped on nodes and shared Mass2Motifs mapped as multiple edges
    :rtype: networkx.classes.graph.Graph
    """
    edges['shared_motifs'] = edges['shared_motifs'].replace('None', '')
    edges['TopSharedMotifs'] = edges['TopSharedMotifs'].replace('None', '')

    edges['shared_motifs'] = edges['shared_motifs'].apply(lambda x: ','.join(map(str, x)) if isinstance(x, list) else x)
    edges['TopSharedMotifs'] = edges['TopSharedMotifs'].apply(lambda x: ','.join(map(str, x)) if isinstance(x, list) else x)

    MG = nx.from_pandas_edgelist(edges, 'CLUSTERID1', 'CLUSTERID2', 
                                 edge_attr=list(set(edges.columns) - set(['CLUSTERID1', 'CLUSTERID2'])), 
                                 create_using=nx.MultiGraph())

    #convert lists into strings for each node attribute
    for column in ['precursormass', 'parentrt', 'document', 'motif', 'probability', 'overlap']:
        nodes[column] = nodes[column].apply(lambda x: ','.join(map(str, x)) if isinstance(x, list) else str(x))

    for column in nodes.columns:
        node_attributes = pd.Series(nodes[column], index=nodes.index).to_dict()
        nx.set_node_attributes(MG, node_attributes, column)

    return MG


In [None]:
MG = make_motif_graphml(motif_network['nodes'],motif_network['edges'])

In [None]:
nx.write_graphml(MG, "Motif_Network_Classical_mushrooms_n200.graphml", infer_numeric_types = True)
