Create a dictionary for the building block 

Key(Modification)-Value(Molecular weight) 

Key (e.g. 100) -> 1 block, 0 O-acetylation, 0 N-deacetylation 

For combination of *D*-*iso*-glutamine and *D*-aspartic acid

In [0]:
combi_code = {0:'AspGln', 1:'AspGlu', 2:'AsnGln', 3:'AsnGlu'}
blocks_set = [{}, {}, {}, {}]
# Bridge: Asp, peptide: Gln, code = 0
blocks_set[0] = {'100':1009.4452, '110':1051.4557, '101':967.4346, '111':1009.4452}
# Bridge: Asp, peptide: Glu, code = 1
blocks_set[1] = {'100':1010.4292, '110':1052.4397, '101':968.4186, '111':1010.4292}
# Bridge: Asn, peptide: Gln, code = 2
blocks_set[2] = {'100':1008.4611, '110':1050.4717, '101':966.4506, '111':1008.4611}
# Bridge: Asn, peptide: Glu, code = 3
blocks_set[3] = {'100':1009.4452, '110':1051.4557, '101':967.4346, '111':1009.4452}

# Choose the combination to work with
code = 0
blocks = blocks_set[code]

water = 18.0106
ala = 89.0477
lactate = 90.0317
asp = 133.0375
asn = 132.0535

if code == 0 or code == 1:
  bridge = asp
else:
  bridge = asn

# Threshold for MS value comparison
thres = 0.05
# Decimal place for MW calculation
round_pos = 4
# Threshold for elimination of diplicates
criteria = 0.0005

Create a dictionary for monomers

Eliminate duplicate values

In [42]:
monomers = {}
for key, value in blocks.items():
  new_key = key
  new_value = round(value, round_pos)
  if new_value not in monomers.values():
    monomers[new_key] = new_value

print(monomers)
print(len(monomers))

{'100': 1009.4452, '110': 1051.4557, '101': 967.4346}
3


Create a dictionary for dimers

Eliminate duplicates when difference is less than criteria

In [43]:
raw_dimers = {}
for key1, value1 in monomers.items():
  for key2, value2 in blocks.items():
    new_key = str(int(key1)+int(key2))
    new_value = round(value1+value2-water, round_pos)
    raw_dimers[new_key] = new_value

print(raw_dimers)
print(len(raw_dimers))

dimers = {}
for new_key, new_value in raw_dimers.items():
  flag = False
  for exist_value in dimers.values():
    # As soon as duplicate is detected, break the loop
    if abs(new_value-exist_value)<=criteria:
      flag = True
      break
  # If no duplication was detected, add the new pair to dictionary
  if flag == False:
    dimers[new_key] = new_value
    

print(dimers)
print(len(dimers))

{'200': 2000.8798, '210': 2042.8903, '201': 1958.8692, '211': 2000.8797, '220': 2084.9008, '221': 2042.8903, '202': 1916.8586, '212': 1958.8692}
8
{'200': 2000.8798, '210': 2042.8903, '201': 1958.8692, '220': 2084.9008, '202': 1916.8586}
5


Create a dictionary for trimers

Eliminate duplicates when difference is less than criteria

In [44]:
raw_trimers = {}
for key1, value1 in dimers.items():
  for key2, value2 in blocks.items():
    new_key = str(int(key1)+int(key2))
    new_value = round(value1+value2-water, round_pos)
    raw_trimers[new_key] = new_value

print(raw_trimers)
print(len(raw_trimers))

trimers = {}
for new_key, new_value in raw_trimers.items():
  flag = False
  for exist_value in trimers.values():
    # As soon as duplicate is detected, break the loop
    if abs(new_value-exist_value)<=criteria:
      flag = True
      break
  # If no duplication was detected, add the new pair to dictionary
  if flag == False:
    trimers[new_key] = new_value

print(trimers)
print(len(trimers))

{'300': 2992.3144, '310': 3034.3249, '301': 2950.3038, '311': 2992.3143, '320': 3076.3354, '321': 3034.3248, '302': 2908.2932, '312': 2950.3037, '330': 3118.3459, '331': 3076.3354, '303': 2866.2826, '313': 2908.2932}
12
{'300': 2992.3144, '310': 3034.3249, '301': 2950.3038, '320': 3076.3354, '302': 2908.2932, '330': 3118.3459, '303': 2866.2826}
7


Create a dictionary for tetramers

Eliminate duplicates when difference is less than criteria

In [45]:
raw_tetramers = {}
for key1, value1 in trimers.items():
  for key2, value2 in blocks.items():
    new_key = str(int(key1)+int(key2))
    new_value = round(value1+value2-water, round_pos)
    raw_tetramers[new_key] = new_value

print(raw_tetramers)
print(len(raw_tetramers))

tetramers = {}
for new_key, new_value in raw_tetramers.items():
  flag = False
  for exist_value in tetramers.values():
    # As soon as duplicate is detected, break the loop
    if abs(new_value-exist_value)<=criteria:
      flag = True
      break
  # If no duplication was detected, add the new pair to dictionary
  if flag == False:
    tetramers[new_key] = new_value

print(tetramers)
print(len(tetramers))

{'400': 3983.749, '410': 4025.7595, '401': 3941.7384, '411': 3983.7489, '420': 4067.77, '421': 4025.7594, '402': 3899.7278, '412': 3941.7383, '430': 4109.7805, '431': 4067.7699, '403': 3857.7172, '413': 3899.7277, '440': 4151.791, '441': 4109.7805, '404': 3815.7066, '414': 3857.7172}
16
{'400': 3983.749, '410': 4025.7595, '401': 3941.7384, '420': 4067.77, '402': 3899.7278, '430': 4109.7805, '403': 3857.7172, '440': 4151.791, '404': 3815.7066}
9


Create a dictionary for pentamers

Eliminate duplicates when difference is less than criteria

In [46]:
raw_pentamers = {}
for key1, value1 in tetramers.items():
  for key2, value2 in blocks.items():
    new_key = str(int(key1)+int(key2))
    new_value = round(value1+value2-water, round_pos)
    raw_pentamers[new_key] = new_value

print(raw_pentamers)
print(len(raw_pentamers))

pentamers = {}
for new_key, new_value in raw_pentamers.items():
  flag = False
  for exist_value in pentamers.values():
    # As soon as duplicate is detected, break the loop
    if abs(new_value-exist_value)<=criteria:
      flag = True
      break
  # If no duplication was detected, add the new pair to dictionary
  if flag == False:
    pentamers[new_key] = new_value

print(pentamers)
print(len(pentamers))

{'500': 4975.1836, '510': 5017.1941, '501': 4933.173, '511': 4975.1835, '520': 5059.2046, '521': 5017.194, '502': 4891.1624, '512': 4933.1729, '530': 5101.2151, '531': 5059.2045, '503': 4849.1518, '513': 4891.1623, '540': 5143.2256, '541': 5101.215, '504': 4807.1412, '514': 4849.1517, '550': 5185.2361, '551': 5143.2256, '505': 4765.1306, '515': 4807.1412}
20
{'500': 4975.1836, '510': 5017.1941, '501': 4933.173, '520': 5059.2046, '502': 4891.1624, '530': 5101.2151, '503': 4849.1518, '540': 5143.2256, '504': 4807.1412, '550': 5185.2361, '505': 4765.1306}
11


Combine monomers, dimers, trimers, tetramers and pentamers as list "Raw Final A"

In [47]:
raw_final_A = {}
raw_final_A.update(monomers)
raw_final_A.update(dimers)
raw_final_A.update(trimers)
raw_final_A.update(tetramers)
raw_final_A.update(pentamers)

print(raw_final_A)
print(len(raw_final_A))

{'100': 1009.4452, '110': 1051.4557, '101': 967.4346, '200': 2000.8798, '210': 2042.8903, '201': 1958.8692, '220': 2084.9008, '202': 1916.8586, '300': 2992.3144, '310': 3034.3249, '301': 2950.3038, '320': 3076.3354, '302': 2908.2932, '330': 3118.3459, '303': 2866.2826, '400': 3983.749, '410': 4025.7595, '401': 3941.7384, '420': 4067.77, '402': 3899.7278, '430': 4109.7805, '403': 3857.7172, '440': 4151.791, '404': 3815.7066, '500': 4975.1836, '510': 5017.1941, '501': 4933.173, '520': 5059.2046, '502': 4891.1624, '530': 5101.2151, '503': 4849.1518, '540': 5143.2256, '504': 4807.1412, '550': 5185.2361, '505': 4765.1306}
35


Account for removing (R) asp bridge for the first unit

'R' = Removed, 'N' = Not removed

Store the result in list "Raw Final B"

In [48]:
raw_final_B = {}
for key, value in raw_final_A.items():
  # Add items without removing the Asp bridge
  new_key_N = key + 'N'
  new_value_N = value
  raw_final_B[new_key_N] = new_value_N
  # Add items with the Asp bridge removed
  new_key_R = key + 'R'
  new_value_R = round(value-bridge+water, round_pos)
  raw_final_B[new_key_R] = new_value_R

print(raw_final_B)
print(len(raw_final_B))


{'100N': 1009.4452, '100R': 895.4023, '110N': 1051.4557, '110R': 937.4128, '101N': 967.4346, '101R': 853.3917, '200N': 2000.8798, '200R': 1886.8369, '210N': 2042.8903, '210R': 1928.8474, '201N': 1958.8692, '201R': 1844.8263, '220N': 2084.9008, '220R': 1970.8579, '202N': 1916.8586, '202R': 1802.8157, '300N': 2992.3144, '300R': 2878.2715, '310N': 3034.3249, '310R': 2920.282, '301N': 2950.3038, '301R': 2836.2609, '320N': 3076.3354, '320R': 2962.2925, '302N': 2908.2932, '302R': 2794.2503, '330N': 3118.3459, '330R': 3004.303, '303N': 2866.2826, '303R': 2752.2397, '400N': 3983.749, '400R': 3869.7061, '410N': 4025.7595, '410R': 3911.7166, '401N': 3941.7384, '401R': 3827.6955, '420N': 4067.77, '420R': 3953.7271, '402N': 3899.7278, '402R': 3785.6849, '430N': 4109.7805, '430R': 3995.7376, '403N': 3857.7172, '403R': 3743.6743, '440N': 4151.791, '440R': 4037.7481, '404N': 3815.7066, '404R': 3701.6637, '500N': 4975.1836, '500R': 4861.1407, '510N': 5017.1941, '510R': 4903.1512, '501N': 4933.173, '50

Account for the amino acid variation for the last unit

'3' = 3 alaines, '4' = 4 alanines, '5' = 5 alanies, 'L' = Extra lactate

Store the result in list "Raw Final C"

In [49]:
raw_final_C = {}
for key, value in raw_final_B.items():
  # Add items with one alanine removed
  new_key_3 = key + '3'
  new_value_3 = round(value-ala+water, round_pos)
  raw_final_C[new_key_3] = new_value_3
  # Add items without any change
  new_key_4 = key + '4'
  new_value_4 = value
  raw_final_C[new_key_4] = new_value_4
  # Add items with one extra alanine
  new_key_5 = key + '5'
  new_value_5 = round(value+ala-water, round_pos)
  raw_final_C[new_key_5] = new_value_5
  # Add items with one extra lactate
  new_key_L = key + 'L'
  new_value_L = round(value+lactate-water, round_pos)
  raw_final_C[new_key_L] = new_value_L

print(raw_final_C)
print(len(raw_final_C))


{'100N3': 938.4081, '100N4': 1009.4452, '100N5': 1080.4823, '100NL': 1081.4663, '100R3': 824.3652, '100R4': 895.4023, '100R5': 966.4394, '100RL': 967.4234, '110N3': 980.4186, '110N4': 1051.4557, '110N5': 1122.4928, '110NL': 1123.4768, '110R3': 866.3757, '110R4': 937.4128, '110R5': 1008.4499, '110RL': 1009.4339, '101N3': 896.3975, '101N4': 967.4346, '101N5': 1038.4717, '101NL': 1039.4557, '101R3': 782.3546, '101R4': 853.3917, '101R5': 924.4288, '101RL': 925.4128, '200N3': 1929.8427, '200N4': 2000.8798, '200N5': 2071.9169, '200NL': 2072.9009, '200R3': 1815.7998, '200R4': 1886.8369, '200R5': 1957.874, '200RL': 1958.858, '210N3': 1971.8532, '210N4': 2042.8903, '210N5': 2113.9274, '210NL': 2114.9114, '210R3': 1857.8103, '210R4': 1928.8474, '210R5': 1999.8845, '210RL': 2000.8685, '201N3': 1887.8321, '201N4': 1958.8692, '201N5': 2029.9063, '201NL': 2030.8903, '201R3': 1773.7892, '201R4': 1844.8263, '201R5': 1915.8634, '201RL': 1916.8474, '220N3': 2013.8637, '220N4': 2084.9008, '220N5': 2155.9

Account for the cyclization of aspartic acids

For each cyclization event, one moleclue of water is lost

'C0' = No cyclization, 'C1' = 1 cyclization event, etc

Store the result in list "Raw Final D"

In [50]:
# Helper function to calculate number of asp bridges
# based on the characteriztic string
def asp_bridge(c_string):
  ans = -1
  # Generally, number of asp bridges equals to
  # number of units
  ans = int(c_string[0])
  # When the asp of the first unit is removed,
  # available asp bridge is reduced by 1
  if c_string[3] == 'R':
    return ans-1
  else:
    return ans

if code == 0 or code == 1: 
  raw_final_D = {}
  for key, value in raw_final_C.items():
    cyc_max = asp_bridge(key)
    # Add the item without any cyclization first
    new_key = key + 'C0'
    new_value = value
    raw_final_D[new_key] = new_value
    # Add all items with at least 1 cyclization
    cyc_num = 1
    while cyc_num<=cyc_max:
      new_key_C = key + 'C' + str(cyc_num)
      new_value_C = round(value-cyc_num*water, round_pos)
      raw_final_D[new_key_C] = new_value_C
      cyc_num += 1
else:
  raw_final_D = raw_final_C

print(raw_final_D)
print(len(raw_final_D))
  

{'100N3': 938.4081, '100N4': 1009.4452, '100N5': 1080.4823, '100NL': 1081.4663, '100R3': 824.3652, '100R4': 895.4023, '100R5': 966.4394, '100RL': 967.4234, '110N3': 980.4186, '110N4': 1051.4557, '110N5': 1122.4928, '110NL': 1123.4768, '110R3': 866.3757, '110R4': 937.4128, '110R5': 1008.4499, '110RL': 1009.4339, '101N3': 896.3975, '101N4': 967.4346, '101N5': 1038.4717, '101NL': 1039.4557, '101R3': 782.3546, '101R4': 853.3917, '101R5': 924.4288, '101RL': 925.4128, '200N3': 1929.8427, '200N4': 2000.8798, '200N5': 2071.9169, '200NL': 2072.9009, '200R3': 1815.7998, '200R4': 1886.8369, '200R5': 1957.874, '200RL': 1958.858, '210N3': 1971.8532, '210N4': 2042.8903, '210N5': 2113.9274, '210NL': 2114.9114, '210R3': 1857.8103, '210R4': 1928.8474, '210R5': 1999.8845, '210RL': 2000.8685, '201N3': 1887.8321, '201N4': 1958.8692, '201N5': 2029.9063, '201NL': 2030.8903, '201R3': 1773.7892, '201R4': 1844.8263, '201R5': 1915.8634, '201RL': 1916.8474, '220N3': 2013.8637, '220N4': 2084.9008, '220N5': 2155.9

Output the final list as a CSV file

In [0]:
import csv

output_file_name = 'RefList_'+combi_code[code]+'.csv'
final = raw_final_D
with open(output_file_name, 'w') as f:
  f.write("Index, Property, Molecular weight\n")
  i = 1
  for key in final.keys():
    f.write("%d,%s,%s\n"%(i, key, final[key]))
    i = i+1

Download the output file from Google Colab workspace

In [52]:
!ls
from google.colab import files
files.download(output_file_name)

RefList_AsnGln.csv  RefList_AspGln.csv	sample_data
RefList_AsnGlu.csv  RefList_AspGlu.csv
