Notes
Two SNPs are identical if
- they share the same position
AND
- the have the same substitution.

When you compute the difference between X-Y, consider SNPs that are at least 80% in X, and at least 27% in Y.

Given
- A = BdWA1
- B = FAT_R_P1
- C = FAT_R_P2
- D = FAT_R_C1
- E = FAT_R_C2
- F = FAT_R_C3
- G = Fos_FAT_R_P
- H = Fos_FAT_R_C1
- I = Fos_FAT_R_C2
- J = Fos_FAT_R_C3

produce:
- Unique SNPs for FAT_R_P1 = (B-A-C)
- Unique SNPs for FAT_R_P2 = (C-A-B)
- Unique SNPs for FAT_R_C1 = (D-A-B-C-E-F)
- Unique SNPs for FAT_R_C2 = (E-A-B-C-D-F)
- Unique SNPs for FAT_R_C3 = (F-A-B-C-D-E)
- Common SNPs in FAT_R_Clones =(D+E+F)
- Unique SNPs for Fos_FAT_R_P = (G-A-B-C-D-E-F)
- Unique SNPs for Fos_FAT_R_C1 = (H-A-B-C-D-E-F-G-I-J)
- Unique SNPs for Fos_FAT_R_C2 = (I-A-B-C-D-E-F-G-H-J)
- Unique SNPs for Fos_FAT_R_C3 = (J-A-B-C-D-E-F-G-H-I)
- Common SNPs in Fos_FAT_R_Clones = (H+I+J)
- SNPs in the intersection between (D+E+F) and (H+I+J)

In [None]:
###
### support functions
###
import io
import pandas as pd
from google.colab import files

def read_file(fname):
  u = files.upload()
  for fn in u.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(u[fn])))
  tab = pd.read_excel(io.BytesIO(u[fname]),keep_default_na=False)
  my_dict = {}
  for index, row in tab.iterrows():
    change = row['Change']
    chromosome = row['Chromosome']
    minimum = row['Minimum']
    maximum = row['Maximum']
    frequency = row['Variant Frequency']
    ptype = row['Polymorphism Type']
    aachange = row['Amino Acid Change']
    cds = row['CDS']
    codon = row['CDS Codon Number']
    effect = row['Protein Effect']
    assert(minimum==maximum)
    my_dict[chromosome,minimum,change] = (float(frequency),ptype,aachange,cds,codon,effect)
  print('Read', len(my_dict), 'records')
  return my_dict

def dict_union(X_dict,Y_dict):
  #
  # computes (X u Y)
  #
  XunionY_dict = {}
  for y in Y_dict:
    XunionY_dict[y] = Y_dict[y]
  for x in X_dict:
    if x in Y_dict:
      if X_dict[x][0] > Y_dict[x][0]:
        XunionY_dict[x] = X_dict[x] # take the highest frequency
      else:
        XunionY_dict[x] = Y_dict[x]
    else:
      XunionY_dict[x] = X_dict[x]
  return XunionY_dict

def dict_print(my_dict):
  #
  # prints the dictionary
  #
  print('Change, Chromosome, Minimum, Maximum, Variant Frequency, Polymorphism Type, Amino Acid Change, CDS, CDS Codon Number, Protein Effect')
  for x in my_dict:
    print(str(x[2])+", "+str(x[0])+", "+str(x[1])+", "+str(x[1])+", "+str(my_dict[x][0])+", "+str(my_dict[x][1])+", "+str(my_dict[x][2])+", "+str(my_dict[x][3])+", "+str(my_dict[x][4])+", "+str(my_dict[x][5]))
  print('\n')

def dict_difference(X_dict,Y_dict):
  #
  # computes and prints X - Y
  #
  difference_dict = {}
  discarded_dict = {}
  for x in X_dict:
    if (X_dict[x][0] >= 0.8): # consider b only if at least 80%
      if x in Y_dict:
        if (Y_dict[x][0] >= 0.27): # consider a only if at least 27%
          discarded_dict[x] = 'SNP in common'
        else:
          difference_dict[x] = X_dict[x]
      else:
        difference_dict[x] = X_dict[x]
    else:
      discarded_dict[x] = 'below 80%'
  # print the CSVs
  dict_print(difference_dict)
  print('Change, Chromosome, Minimum, Maximum, Reason Discarded')
  for x in discarded_dict:
    print(str(x[2])+", "+str(x[0])+", "+str(x[1])+", "+str(x[1])+", "+str(discarded_dict[x]))
  # return (difference_dict, discarded_dict)

In [None]:
###
### READ A (WA1)
###
A_dict = read_file('A_BdWA_1 SNPs.xlsx')
print(A_dict)

Saving A_BdWA_1 SNPs.xlsx to A_BdWA_1 SNPs.xlsx
User uploaded file "A_BdWA_1 SNPs.xlsx" with length 39434 bytes
Read 345 records
{(1, 113883, 'T -> C'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1778266, 'G -> A'): (1.0, 'SNP (transition)', '', 'CDS', 588, 'None'), (1, 113855, 'T -> A'): (0.995, 'SNP (transversion)', '', '', '', ''), (1, 1762188, 'T -> C'): (0.993, 'SNP (transition)', '', 'CDS', 472, 'None'), (1, 1629254, 'C -> T'): (0.987, 'SNP (transition)', 'R -> Q', 'CDS', 837, 'Substitution'), (1, 1786550, 'A -> T'): (0.909, 'SNP (transversion)', '', '', '', ''), (1, 1786558, 'T -> A'): (0.899, 'SNP (transversion)', 'K -> I', 'CDS', 837, 'Substitution'), (1, 1786564, 'C -> A'): (0.892, 'SNP (transversion)', 'S -> I', 'CDS', 835, 'Substitution'), (1, 1786551, 'C -> T'): (0.89, 'SNP (transition)', '', 'CDS', 839, 'None'), (1, 1629286, 'T -> C'): (0.71, 'SNP (transition)', '', 'CDS', 826, 'None'), (1, 1629299, 'T -> A'): (0.707, 'SNP (transversion)', 'E -> V', 'CDS', 822, 'Subst

In [None]:
###
### READ B (FAT_R_P1)
###
B_dict = read_file('B_FAT_R_P1 SNPs .xlsx')
print(B_dict)

Saving B_FAT_R_P1 SNPs .xlsx to B_FAT_R_P1 SNPs  (1).xlsx
User uploaded file "B_FAT_R_P1 SNPs .xlsx" with length 76640 bytes
Read 820 records
{(1, 113883, 'T -> C'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1629196, 'G -> A'): (1.0, 'SNP (transition)', '', 'CDS', 856, 'None'), (1, 1691220, 'A -> G'): (1.0, 'SNP (transition)', 'T -> A', 'CDS', 451, 'Substitution'), (1, 1691234, 'C -> T'): (1.0, 'SNP (transition)', '', 'CDS', 455, 'None'), (1, 1759501, 'G -> C'): (1.0, 'SNP (transversion)', 'D -> H', 'CDS', 584, 'Substitution'), (1, 1764093, 'C -> T'): (1.0, 'SNP (transition)', '', 'CDS', 101, 'None'), (1, 1769370, 'G -> A'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1780320, 'T -> C'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1784033, 'T -> C'): (1.0, 'SNP (transition)', 'R -> G', 'CDS', 682, 'Substitution'), (1, 1842485, 'G -> T'): (1.0, 'SNP (transversion)', '', '', '', ''), (1, 2282973, 'A -> G'): (1.0, 'SNP (transition)', '', 'CDS', 2199, 'None'), (1, 2283050, 'A ->

In [None]:
###
### READ C (FAT_R_P2)
###
C_dict = read_file('C_FAT_R_P2.xlsx')
print(C_dict)

Saving C_FAT_R_P2.xlsx to C_FAT_R_P2.xlsx
User uploaded file "C_FAT_R_P2.xlsx" with length 28357 bytes
Read 209 records
{(1, 1762188, 'T -> C'): (0.98, 'SNP (transition)', '', 'CDS', 472, 'None'), (1, 113855, 'T -> A'): (0.972, 'SNP (transversion)', '', '', '', ''), (1, 113883, 'T -> C'): (0.971, 'SNP (transition)', '', '', '', ''), (1, 1770891, 'T -> C'): (0.544, 'SNP (transition)', '', 'CDS', 355, 'None'), (1, 1770895, 'G -> A'): (0.504, 'SNP (transition)', 'E -> K', 'CDS', 357, 'Substitution'), (1, 1770887, 'G -> A'): (0.496, 'SNP (transition)', 'R -> H', 'CDS', 354, 'Substitution'), (1, 1770867, 'A -> T'): (0.493, 'SNP (transversion)', '', 'CDS', 347, 'None'), (1, 1762339, 'A -> G'): (0.25, 'SNP (transition)', 'N -> D', 'CDS', 523, 'Substitution'), (2, 1587081, 'A -> G'): (0.764, 'SNP (transition)', '', '', '', ''), (2, 1576292, 'C -> T'): (0.552, 'SNP (transition)', '', '', '', ''), (2, 1576310, 'C -> T'): (0.452, 'SNP (transition)', '', '', '', ''), (2, 1576517, 'A -> T'): (0.421

In [None]:
###
### COMPUTE Unique SNPs for FAT_R_P1 = B - (A u C)
###
AunionC_dict = dict_union(A_dict,C_dict)
dict_difference(B_dict,AunionC_dict)

In [None]:
###
### COMPUTE Unique SNPs for FAT_R_P2 = C - (A u B)
###
AunionB_dict = dict_union(A_dict,B_dict)
dict_difference(C_dict, AunionB_dict)

In [None]:
###
### READ D (FAT_R_C1)
###
D_dict = read_file('D_FAT_RC1 SNPs.xlsx')
print(D_dict)

Saving D_FAT_RC1 SNPs.xlsx to D_FAT_RC1 SNPs.xlsx
User uploaded file "D_FAT_RC1 SNPs.xlsx" with length 65663 bytes
Read 618 records
{(1, 1692042, 'G -> A'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1769370, 'G -> A'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1788868, 'T -> C'): (1.0, 'SNP (transition)', '', 'CDS', 77, 'None'), (1, 1842323, 'C -> T'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1767129, 'G -> T'): (0.996, 'SNP (transversion)', 'K -> N', 'CDS', 106, 'Substitution'), (1, 1691220, 'A -> G'): (0.994, 'SNP (transition)', 'T -> A', 'CDS', 451, 'Substitution'), (1, 1762802, 'G -> C'): (0.994, 'SNP (transversion)', 'S -> T', 'CDS', 677, 'Substitution'), (1, 1784033, 'T -> C'): (0.994, 'SNP (transition)', 'R -> G', 'CDS', 682, 'Substitution'), (1, 1788864, 'T -> C'): (0.994, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1629196, 'G -> A'): (0.993, 'SNP (transition)', '', 'CDS', 856, 'None'), (1, 1691363, 'G -> C'): (0.993, 'SNP (transversion)', 'L 

In [None]:
###
### READ E (FAT_R_C2)
###
E_dict = read_file('E_FAT_R_C2.xlsx')
print(E_dict)

Saving E_FAT_R_C2.xlsx to E_FAT_R_C2.xlsx
User uploaded file "E_FAT_R_C2.xlsx" with length 83603 bytes
Read 908 records
{(1, 2282973, 'A -> G'): (1.0, 'SNP (transition)', '', 'CDS', 2199, 'None'), (1, 2286546, 'T -> C'): (1.0, 'SNP (transition)', '', 'CDS', 2030, 'None'), (1, 113883, 'T -> C'): (0.995, 'SNP (transition)', '', '', '', ''), (1, 1762188, 'T -> C'): (0.995, 'SNP (transition)', '', 'CDS', 472, 'None'), (1, 1780320, 'T -> C'): (0.995, 'SNP (transition)', '', '', '', ''), (1, 1842323, 'C -> T'): (0.995, 'SNP (transition)', '', '', '', ''), (1, 1842427, 'A -> T'): (0.995, 'SNP (transversion)', '', '', '', ''), (1, 1842485, 'G -> T'): (0.995, 'SNP (transversion)', '', '', '', ''), (1, 1691363, 'G -> C'): (0.994, 'SNP (transversion)', 'L -> F', 'CDS', 498, 'Substitution'), (1, 1788796, 'G -> A'): (0.994, 'SNP (transition)', '', 'CDS', 101, 'None'), (1, 1691220, 'A -> G'): (0.993, 'SNP (transition)', 'T -> A', 'CDS', 451, 'Substitution'), (1, 1788864, 'T -> C'): (0.99, 'SNP (tran

In [None]:
###
### READS F (FAT_R_C3)
###
F_dict = read_file('F_FAT_R_C3.xlsx')
print(F_dict)

Saving F_FAT_R_C3.xlsx to F_FAT_R_C3.xlsx
User uploaded file "F_FAT_R_C3.xlsx" with length 64341 bytes
Read 639 records
{(1, 1629118, 'C -> T'): (1.0, 'SNP (transition)', '', 'CDS', 882, 'None'), (1, 1691390, 'A -> T'): (1.0, 'SNP (transversion)', '', 'CDS', 507, 'None'), (1, 1764025, 'A -> G'): (1.0, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1767129, 'G -> T'): (1.0, 'SNP (transversion)', 'K -> N', 'CDS', 106, 'Substitution'), (1, 2282973, 'A -> G'): (1.0, 'SNP (transition)', '', 'CDS', 2199, 'None'), (1, 2286546, 'T -> C'): (1.0, 'SNP (transition)', '', 'CDS', 2030, 'None'), (1, 117631, 'C -> A'): (0.993, 'SNP (transversion)', '', 'CDS', 613, 'None'), (1, 1767042, 'A -> G'): (0.992, 'SNP (transition)', '', 'CDS', 77, 'None'), (1, 1767046, 'A -> G'): (0.992, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1842485, 'G -> T'): (0.992, 'SNP (transversion)', '', '', '', ''), (1, 2286469, 'T -> C'): (0.992, 'SNP (transition)', 'D -> G', 'CDS', 2056, 'Subst

In [None]:
###
### COMPUTE Unique SNPs for FAT_R_C1 = D - (A u B u C u E u F)
###
AuB_dict = dict_union(A_dict, B_dict)
CuE_dict = dict_union(C_dict, E_dict)
AuBuCuE_dict = dict_union(AuB_dict, CuE_dict)
AuBuCuEuF_dict = dict_union(AuBuCuE_dict, F_dict)
dict_difference(D_dict, AuBuCuEuF_dict)

In [None]:
###
### COMPUTE Unique SNPs for FAT_R_C2 = E - (A u B u C u D u F)
###
AuB_dict = dict_union(A_dict, B_dict)
CuD_dict = dict_union(C_dict, D_dict)
AuBuCuD_dict = dict_union(AuB_dict, CuD_dict)
AuBuCuDuF_dict = dict_union(AuBuCuD_dict, F_dict)
dict_difference(E_dict, AuBuCuDuF_dict)

In [None]:
###
### COMPUTE Unique SNPs for FAT_R_C3 = F - (A u B u C u D u E)
###
AuB_dict = dict_union(A_dict, B_dict)
CuD_dict = dict_union(C_dict, D_dict)
AuBuCuD_dict = dict_union(AuB_dict, CuD_dict)
AuBuCuDuE_dict = dict_union(AuBuCuD_dict, E_dict)
dict_difference(F_dict, AuBuCuDuE_dict)

In [None]:
###
### COMPUTE Common SNPs in FAT_R_Clones = D u E u F
###
DuE_dict = dict_union(D_dict, E_dict)
DuEuF_dict = dict_union(DuE_dict, F_dict)
dict_print(DuEuF_dict)

In [None]:
###
### READS G (Fos_FAT_R_P)
###
G_dict = read_file('G_Fos_FAT_R_P.xlsx')
print(G_dict)

Saving G_Fos_FAT_R_P.xlsx to G_Fos_FAT_R_P.xlsx
User uploaded file "G_Fos_FAT_R_P.xlsx" with length 56261 bytes
Read 537 records
{(1, 1764021, 'A -> G'): (1.0, 'SNP (transition)', '', 'CDS', 77, 'None'), (1, 1767042, 'A -> G'): (1.0, 'SNP (transition)', '', 'CDS', 77, 'None'), (1, 1762188, 'T -> C'): (0.997, 'SNP (transition)', '', 'CDS', 472, 'None'), (1, 1691390, 'A -> T'): (0.995, 'SNP (transversion)', '', 'CDS', 507, 'None'), (1, 1691363, 'G -> C'): (0.994, 'SNP (transversion)', 'L -> F', 'CDS', 498, 'Substitution'), (1, 1764025, 'A -> G'): (0.993, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1842355, 'C -> T'): (0.992, 'SNP (transition)', '', '', '', ''), (1, 113883, 'T -> C'): (0.991, 'SNP (transition)', '', '', '', ''), (1, 1842323, 'C -> T'): (0.991, 'SNP (transition)', '', '', '', ''), (1, 1842326, 'T -> G'): (0.991, 'SNP (transversion)', '', '', '', ''), (1, 2871378, 'C -> T'): (0.991, 'SNP (transition)', 'S -> L', 'CDS', 70, 'Substitution'), (1, 1784033, 'T 

In [None]:
###
### READS H (Fos_FAT_R_C1)
###
H_dict = read_file('H_Fos_FAT_R_C1.xlsx')
print(H_dict)

Saving H_Fos_FAT_R_C1.xlsx to H_Fos_FAT_R_C1.xlsx
User uploaded file "H_Fos_FAT_R_C1.xlsx" with length 78156 bytes
Read 814 records
{(1, 113883, 'T -> C'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 129018, 'T -> A'): (1.0, 'SNP (transversion)', '', '', '', ''), (1, 1478081, 'C -> T'): (1.0, 'SNP (transition)', 'A -> T', 'CDS', 434, 'Substitution'), (1, 1691220, 'A -> G'): (1.0, 'SNP (transition)', 'T -> A', 'CDS', 451, 'Substitution'), (1, 1692020, 'G -> A'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1692042, 'G -> A'): (1.0, 'SNP (transition)', '', '', '', ''), (1, 1762802, 'G -> C'): (1.0, 'SNP (transversion)', 'S -> T', 'CDS', 677, 'Substitution'), (1, 1764025, 'A -> G'): (1.0, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1764093, 'C -> T'): (1.0, 'SNP (transition)', '', 'CDS', 101, 'None'), (1, 1767042, 'A -> G'): (1.0, 'SNP (transition)', '', 'CDS', 77, 'None'), (1, 1767046, 'A -> G'): (1.0, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1

In [None]:
###
### READS I (Fos_FAT_R_C2)
###
I_dict = read_file('I_Fos_FAT_R_C2.xlsx')
print(I_dict)

Saving I_Fos_FAT_R_C2.xlsx to I_Fos_FAT_R_C2.xlsx
User uploaded file "I_Fos_FAT_R_C2.xlsx" with length 87879 bytes
Read 962 records
{(1, 1788755, 'T -> C'): (1.0, 'SNP (transition)', 'D -> G', 'CDS', 115, 'Substitution'), (1, 1762188, 'T -> C'): (0.996, 'SNP (transition)', '', 'CDS', 472, 'None'), (1, 1764108, 'G -> T'): (0.996, 'SNP (transversion)', 'K -> N', 'CDS', 106, 'Substitution'), (1, 1767129, 'G -> T'): (0.996, 'SNP (transversion)', 'K -> N', 'CDS', 106, 'Substitution'), (1, 1842388, 'G -> T'): (0.996, 'SNP (transversion)', '', '', '', ''), (1, 1629254, 'C -> T'): (0.993, 'SNP (transition)', 'R -> Q', 'CDS', 837, 'Substitution'), (1, 1783348, 'T -> C'): (0.993, 'SNP (transition)', '', '', '', ''), (1, 1842485, 'G -> T'): (0.992, 'SNP (transversion)', '', '', '', ''), (1, 1689642, 'T -> G'): (0.991, 'SNP (transversion)', '', '', '', ''), (1, 1842384, 'A -> G'): (0.989, 'SNP (transition)', '', '', '', ''), (1, 1788796, 'G -> A'): (0.988, 'SNP (transition)', '', 'CDS', 101, 'None

In [None]:
###
### READS J (Fos_FAT_R_C3)
###
J_dict = read_file('J_FOS_FAT_R_C3.xlsx')
print(J_dict)

Saving J_FOS_FAT_R_C3.xlsx to J_FOS_FAT_R_C3.xlsx
User uploaded file "J_FOS_FAT_R_C3.xlsx" with length 76659 bytes
Read 789 records
{(1, 1767046, 'A -> G'): (1.0, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1767042, 'A -> G'): (0.993, 'SNP (transition)', '', 'CDS', 77, 'None'), (1, 1842485, 'G -> T'): (0.992, 'SNP (transversion)', '', '', '', ''), (1, 113883, 'T -> C'): (0.988, 'SNP (transition)', '', '', '', ''), (1, 1762188, 'T -> C'): (0.988, 'SNP (transition)', '', 'CDS', 472, 'None'), (1, 1784033, 'T -> C'): (0.985, 'SNP (transition)', 'R -> G', 'CDS', 682, 'Substitution'), (1, 1692042, 'G -> A'): (0.984, 'SNP (transition)', '', '', '', ''), (1, 1780320, 'T -> C'): (0.984, 'SNP (transition)', '', '', '', ''), (1, 1842323, 'C -> T'): (0.984, 'SNP (transition)', '', '', '', ''), (1, 1842388, 'G -> T'): (0.984, 'SNP (transversion)', '', '', '', ''), (1, 1788864, 'T -> C'): (0.983, 'SNP (transition)', 'I -> V', 'CDS', 79, 'Substitution'), (1, 1629196, 'G -> A'): (0.9

In [None]:
###
### COMPUTE Unique SNPs for Fos_FAT_R_P = G - (A u B u C u D u E u F)
###
AuB_dict = dict_union(A_dict, B_dict)
CuD_dict = dict_union(C_dict, D_dict)
EuF_dict = dict_union(E_dict, F_dict)
AuBuCuD_dict = dict_union(AuB_dict, CuD_dict)
AuBuCuDuEuF_dict = dict_union(AuBuCuD_dict,EuF_dict)
dict_difference(G_dict, AuBuCuDuEuF_dict)

In [None]:
###
### COMPUTE Unique SNPs for Fos_FAT_R_C1 = H - (A u B u C u D u E u F u G u I u J)
###
AuB_dict = dict_union(A_dict, B_dict)
CuD_dict = dict_union(C_dict, D_dict)
AuBuCuD_dict = dict_union(AuB_dict, CuD_dict)
EuF_dict = dict_union(E_dict, F_dict)
GuI_dict = dict_union(G_dict, I_dict)
EuFuGuI_dict = dict_union(EuF_dict,GuI_dict)
AuBuCuDuEuFuGuI_dict = dict_union(AuBuCuD_dict,EuFuGuI_dict)
AuBuCuDuEuFuGuIuJ_dict = dict_union(AuBuCuDuEuFuGuI_dict,J_dict)
dict_difference(H_dict,AuBuCuDuEuFuGuIuJ_dict)

In [None]:
###
### COMPUTE Unique SNPs for Fos_FAT_R_C2 = I - (A u B u C u D u E u F u G u H u J)
###
AuB_dict = dict_union(A_dict, B_dict)
CuD_dict = dict_union(C_dict, D_dict)
AuBuCuD_dict = dict_union(AuB_dict, CuD_dict)
EuF_dict = dict_union(E_dict, F_dict)
GuH_dict = dict_union(G_dict, H_dict)
EuFuGuH_dict = dict_union(EuF_dict,GuH_dict)
AuBuCuDuEuFuGuH_dict = dict_union(AuBuCuD_dict,EuFuGuH_dict)
AuBuCuDuEuFuGuHuJ_dict = dict_union(AuBuCuDuEuFuGuH_dict,J_dict)
dict_difference(I_dict,AuBuCuDuEuFuGuHuJ_dict)

In [None]:
###
### COMPUTE Unique SNPs for Fos_FAT_R_C3 = J - (A u B u C u D u E u F u G u H u I)
###
AuB_dict = dict_union(A_dict, B_dict)
CuD_dict = dict_union(C_dict, D_dict)
AuBuCuD_dict = dict_union(AuB_dict, CuD_dict)
EuF_dict = dict_union(E_dict, F_dict)
GuH_dict = dict_union(G_dict, H_dict)
EuFuGuH_dict = dict_union(EuF_dict,GuH_dict)
AuBuCuDuEuFuGuH_dict = dict_union(AuBuCuD_dict,EuFuGuH_dict)
AuBuCuDuEuFuGuHuI_dict = dict_union(AuBuCuDuEuFuGuH_dict,I_dict)
dict_difference(J_dict,AuBuCuDuEuFuGuHuI_dict)

In [None]:
###
### COMPUTE Common SNPs in Fos_FAT_R_Clones = H u I u J
###
HuI_dict = dict_union(H_dict,I_dict)
HuIuJ_dict = dict_union(HuI_dict,J_dict)
dict_print(HuIuJ_dict)

In [None]:
###
### COMPUTE Common SNPs = D u E u F H u I u J
###
DuEuFuHuIuJ_dict = dict_union(DuEuF_dict,HuIuJ_dict)
dict_print(DuEuFuHuIuJ_dict)

In [None]:
###
### COMPUTE Common SNPs = (D u E u F) intersection (H u I u J)
###
DuEuFintHuIuJ_dict = {}
for x in DuEuF_dict:
  if x in HuIuJ_dict:
    if DuEuF_dict[x][0] > HuIuJ_dict[x][0]:
      DuEuFintHuIuJ_dict[x] = DuEuF_dict[x] # take the highest frequency
    else:
      DuEuFintHuIuJ_dict[x] = HuIuJ_dict[x]
dict_print(DuEuFintHuIuJ_dict)