# Aggregate vireo results form samples that are genotypes

In [1]:
# Location of vireo files
path_barcodes = '/nfs/users/nfs_l/lg18/team292/lg18/endometriosis/data/genotypes/vireo_donors_demultiplexing/SAMPLE/donor_ids.tsv'
path_barcodes_souporcell = '/nfs/users/nfs_l/lg18/team292/lg18/endometriosis/data/genotypes/souporcell_donors_demultiplexing/SAMPLE/clusters.tsv'
path_donors = '/nfs/users/nfs_l/lg18/team292/lg18/endometriosis/data/genotypes/vireo_match_donors/SAMPLE_donor_assignement.csv'

In [2]:
# Load L-F
LF = read.delim('demultiplexed_LF.csv', stringsAsFactors = F, sep = ',')
LF$source = 'LFHandfield'

In [3]:
# metadata linking libraries to genotype
metadata = read.delim('metadata-libraries.csv', stringsAsFactors = F, sep = ',', row.names = 1)
genotypes_meta = subset(metadata, multiplexed == 'yes' & StudyName == 'Marecckovaa')[, c('genotype0', 'genotype1', 'genotyped') ]
genotypes_meta = genotypes_meta[ grep('no', genotypes_meta$genotyped, invert = T), ]
genotypes_meta$sclibary = rownames(genotypes_meta)
genotypes_meta = subset(genotypes_meta, ! sclibary %in% c('UA_Endo12894973', 'UA_Endo12894974') ) # These are the libraries that Vireo did not manage to deconvolute

## build a dictionary linking vireo donors to genotypes

In [18]:
# for genotyped donors
vireodonor2genotype = list()
for (sam in rownames(genotypes_meta) ){
    multiplexed_genotypes = as.character(genotypes_meta[sam, 1:2])
    donors = read.delim(gsub('SAMPLE', sam, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)

    genotyped_genotypes = intersect(donors$variable, multiplexed_genotypes)
    nongenotyped_genotypes = setdiff(multiplexed_genotypes, genotyped_genotypes)

    for(geno in genotyped_genotypes){
        x = subset(donors, variable %in% geno)
        x = x[which.min(x$value), ]
        if( nrow(x) > 0 ){
            vireodonor2genotype[[sam]][[x$Patient]] = geno
        }
    }
    for (geno in nongenotyped_genotypes){
        patient = setdiff(c('donor0', 'donor1') , names(vireodonor2genotype[[sam]]))
        vireodonor2genotype[[sam]][[patient]] = geno
    }
}
# vireodonor2genotype

In [19]:
# For samples not genotyped - sharing genotypes
genotype_shared = 'FX9006'
genotype_other = 'FX9014'
sample_target = 'UA_Endo12865961'
sample_genotyped = 'UA_Endo11460963'
donor_in_genotyped = names(which(vireodonor2genotype[[sample_genotyped]] == genotype_shared))


donors2align = read.delim(gsub('SAMPLE', sample_target, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)
donor_in_target = donors2align$Patient[ intersect(which.min(donors2align$value), which(donors2align$variable == donor_in_genotyped)) ] 
other_donor_in_target = setdiff(donors2align$Patient, donor_in_target)


vireodonor2genotype[[sample_target]][[donor_in_target]] = genotype_shared
vireodonor2genotype[[sample_target]][[other_donor_in_target]] = genotype_other

vireodonor2genotype[sample_target]

In [20]:
# For HASHED samples not genotyped - sharing genotypes - NOTE this one was done using souporcell as Vireo failed
genotype_shared = 'FX1249'
genotype_other = 'FX1254'
sample_target = 'UA_Endo12680034'
sample_hashed = 'UA_Endo12680033'


hashing_demultiplexing = subset(LF, library == sample_hashed)
barcodes_genotype = read.delim(gsub('SAMPLE', sample_hashed, path_barcodes_souporcell), stringsAsFactors = F, sep = '\t', row.names = 1)
hashing_demultiplexing$donor = barcodes_genotype$assignment[ match(hashing_demultiplexing$barcode, rownames(barcodes_genotype)) ]
asso = table(hashing_demultiplexing[, c('genotype', 'donor') ])
asso
donor_in_hashed = names(which.max(asso[ genotype_shared , ]))
donor_in_hashed
cat(genotype_shared, 'genotype in sample ', sample_hashed,  ' is donor ', donor_in_hashed)

donors2align = read.delim(gsub('SAMPLE', sample_target, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)
donor_in_target = donors2align$Patient[ intersect(which.min(donors2align$value), which(donors2align$variable == donor_in_hashed)) ] 
other_donor_in_target = setdiff(donors2align$Patient, donor_in_target)


vireodonor2genotype[[sample_target]][[paste0('donor',donor_in_target)]] = genotype_shared
vireodonor2genotype[[sample_target]][[paste0('donor',other_donor_in_target)]] = genotype_other

vireodonor2genotype[sample_target]

         donor
genotype     0  0/1    1  1/0
  doublet   32  326   49  323
  FX1249  4814  341    6   97
  FX1254    10   73 6053  209

FX1249 genotype in sample  UA_Endo12680033  is donor  0

In [21]:
# For HASHED samples not genotyped - sharing genotypes
genotype_shared = 'SE02'
genotype_other = 'FX1285'
sample_target = 'UA_Endo12961679'
sample_hashed = 'UA_Endo13066782'


hashing_demultiplexing = subset(LF, library == sample_hashed)
barcodes_genotype = read.delim(gsub('SAMPLE', sample_hashed, path_barcodes), stringsAsFactors = F, sep = '\t', row.names = 1)
hashing_demultiplexing$donor = barcodes_genotype$donor_id[ match(hashing_demultiplexing$barcode, rownames(barcodes_genotype)) ]
asso = table(hashing_demultiplexing[, c('genotype', 'donor') ])
donor_in_hashed = names(which.max(asso[ genotype_shared , ]))
cat(genotype_shared, 'genotype in sample ', sample_hashed,  ' is donor ', donor_in_hashed)

donors2align = read.delim(gsub('SAMPLE', sample_target, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)
donor_in_target = donors2align$Patient[ intersect(which.min(donors2align$value), which(donors2align$variable == donor_in_hashed)) ] 
other_donor_in_target = setdiff(donors2align$Patient, donor_in_target)


vireodonor2genotype[[sample_target]][[donor_in_target]] = genotype_shared
vireodonor2genotype[[sample_target]][[other_donor_in_target]] = genotype_other

vireodonor2genotype[sample_target]

SE02 genotype in sample  UA_Endo13066782  is donor  donor1

In [22]:
# Add vireo results from HASHED samples not genotyped - sharing genotypes
genotype_shared = 'SE02'
genotype_other = 'FX1285'
sample_target = 'UA_Endo12961680'
sample_hashed = 'UA_Endo13066782'


hashing_demultiplexing = subset(LF, library == sample_hashed)
barcodes_genotype = read.delim(gsub('SAMPLE', sample_hashed, path_barcodes), stringsAsFactors = F, sep = '\t', row.names = 1)
hashing_demultiplexing$donor = barcodes_genotype$donor_id[ match(hashing_demultiplexing$barcode, rownames(barcodes_genotype)) ]
asso = table(hashing_demultiplexing[, c('genotype', 'donor') ])
donor_in_hashed = names(which.max(asso[ genotype_shared , ]))
cat(genotype_shared, 'genotype in sample ', sample_hashed,  ' is donor ', donor_in_hashed)

donors2align = read.delim(gsub('SAMPLE', sample_target, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)
donor_in_target = donors2align$Patient[ intersect(which.min(donors2align$value), which(donors2align$variable == donor_in_hashed)) ] 
other_donor_in_target = setdiff(donors2align$Patient, donor_in_target)


vireodonor2genotype[[sample_target]][[donor_in_target]] = genotype_shared
vireodonor2genotype[[sample_target]][[other_donor_in_target]] = genotype_other

vireodonor2genotype[sample_target]

SE02 genotype in sample  UA_Endo13066782  is donor  donor1

In [23]:
# Add vireo results from HASHED samples not genotyped - sharing genotypes
genotype_shared = 'FX1289'
genotype_other = 'SE03'
sample_target = 'UA_Endo12961681'
sample_hashed = 'UA_Endo13066782'


hashing_demultiplexing = subset(LF, library == sample_hashed)
barcodes_genotype = read.delim(gsub('SAMPLE', sample_hashed, path_barcodes), stringsAsFactors = F, sep = '\t', row.names = 1)
hashing_demultiplexing$donor = barcodes_genotype$donor_id[ match(hashing_demultiplexing$barcode, rownames(barcodes_genotype)) ]
asso = table(hashing_demultiplexing[, c('genotype', 'donor') ])
donor_in_hashed = names(which.max(asso[ genotype_shared , ]))
cat(genotype_shared, 'genotype in sample ', sample_hashed,  ' is donor ', donor_in_hashed)

donors2align = read.delim(gsub('SAMPLE', sample_target, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)
donor_in_target = donors2align$Patient[ intersect(which.min(donors2align$value), which(donors2align$variable == donor_in_hashed)) ] 
other_donor_in_target = setdiff(donors2align$Patient, donor_in_target)


vireodonor2genotype[[sample_target]][[donor_in_target]] = genotype_shared
vireodonor2genotype[[sample_target]][[other_donor_in_target]] = genotype_other

vireodonor2genotype[sample_target]

FX1289 genotype in sample  UA_Endo13066782  is donor  donor0

In [24]:
# Add vireo results from HASHED samples not genotyped - sharing genotypes
genotype_shared = 'FX1289'
genotype_other = 'SE03'
sample_target = 'UA_Endo12961682'
sample_hashed = 'UA_Endo13066782'


hashing_demultiplexing = subset(LF, library == sample_hashed)
barcodes_genotype = read.delim(gsub('SAMPLE', sample_hashed, path_barcodes), stringsAsFactors = F, sep = '\t', row.names = 1)
hashing_demultiplexing$donor = barcodes_genotype$donor_id[ match(hashing_demultiplexing$barcode, rownames(barcodes_genotype)) ]
asso = table(hashing_demultiplexing[, c('genotype', 'donor') ])
donor_in_hashed = names(which.max(asso[ genotype_shared , ]))
cat(genotype_shared, 'genotype in sample ', sample_hashed,  ' is donor ', donor_in_hashed)

donors2align = read.delim(gsub('SAMPLE', sample_target, path_donors), stringsAsFactors = F, sep = ',', row.names = 1)
donor_in_target = donors2align$Patient[ intersect(which.min(donors2align$value), which(donors2align$variable == donor_in_hashed)) ] 
other_donor_in_target = setdiff(donors2align$Patient, donor_in_target)


vireodonor2genotype[[sample_target]][[donor_in_target]] = genotype_shared
vireodonor2genotype[[sample_target]][[other_donor_in_target]] = genotype_other

vireodonor2genotype[sample_target]

FX1289 genotype in sample  UA_Endo13066782  is donor  donor0

## Assign barcodes to genotypes

In [25]:
barcodes_list = list()
for (sam in setdiff(names(vireodonor2genotype), 'UA_Endo12680034') ){
    barcodes = read.delim(gsub('SAMPLE', sam, path_barcodes), stringsAsFactors = F)[, 1:2]
    barcodes$library = sam
    barcodes$barcode = barcodes$cell
    barcodes$genotype = barcodes$donor_id
    barcodes$genotype[ barcodes$genotype == 'donor0' ] = vireodonor2genotype[[sam]]$donor0
    barcodes$genotype[ barcodes$genotype == 'donor1' ] = vireodonor2genotype[[sam]]$donor1
    barcodes_list[[sam]] = barcodes
}

In [28]:
# Add souporcell sample
sam = 'UA_Endo12680034'
barcodes = read.delim(gsub('SAMPLE', sam, path_barcodes_souporcell), stringsAsFactors = F)
barcodes$donor_id = paste0('donor', barcodes$assignment)
barcodes$donor_id[ barcodes$status  == 'doublet' ] = 'doublet'
barcodes$donor_id[ barcodes$status  == 'unassigned' ] = 'unassigned'
barcodes$cell = barcodes$barcode
barcodes$library = sam
barcodes$genotype = barcodes$donor_id
barcodes$genotype[ barcodes$assignment == '0' ] = vireodonor2genotype[[sam]]$donor0
barcodes$genotype[ barcodes$assignment == '1' ] = vireodonor2genotype[[sam]]$donor1
barcodes_list[[sam]] = barcodes[, names(barcodes_list[[1]]) ]

# Save Results

In [30]:
library(reshape2)
BARCODES = melt(barcodes_list, id.vars = names(barcodes_list[[1]]))
BARCODES$source = 'LGarciaAlonso'
write.csv(BARCODES, file='/nfs/users/nfs_l/lg18/team292/lg18/endometriosis/data/genotypes/demultiplexed_vireo.csv', row.names=F)

In [31]:
# Merge with L-F hashing deconvolution
LFf = subset(LF, ! LF$library %in% BARCODES$library )
merged_df = rbind(LFf, BARCODES[, colnames(LFf) ])
write.csv(merged_df, file='/nfs/users/nfs_l/lg18/team292/lg18/endometriosis/data/genotypes/demultiplexed_merged.csv', row.names=F)

# Compare VIREO predictions to L-F

In [63]:
barcodes_s = BARCODES[, c('library', 'barcode', 'genotype') ]
barcodes_s = subset(barcodes_s, genotype != 'unassigned' )
barcodes_s$id = apply(barcodes_s, 1, paste, collapse = '_')
barcodes_s$barcode_id = apply(barcodes_s[, 1:2], 1, paste, collapse = '_')
table(barcodes_s$library)


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
           9431            1970           18384           17526           14459 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
          15418           15036           20779           28040           21865 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
          15583           19951           25817            6073           21730 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
          21358           16584            6079           13270           17098 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
          17838           15084           15760           20344           19947 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
          18122           12032           18432           14665           15761 
UA_Endo12680031 UA_Endo1268

In [64]:
LF = read.delim('demultiplexed_LF.csv', stringsAsFactors = F, sep = ',')
LF = subset(LF, LF$library %in% barcodes_s$library )
LF$id = apply(LF, 1, paste, collapse = '_')
LF$barcode_id = apply(LF[, 1:2], 1, paste, collapse = '_')
table(LF$library)


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
           9621            2068           18262           20158           12914 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
          14362           13954           19359           27560           20506 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
          14618           17122           24461           11318           18979 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
          19565           16016            8864           12956           16630 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
          22917           14308           13755           14350           15025 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
          11883           11248           16927           12631           13828 
UA_Endo12680031 UA_Endo1268

In [65]:
barcodes_s$pred_LF = LF$genotype[ match(barcodes_s$barcode_id, LF$barcode_id) ]
barcodes_s$pred_match = barcodes_s$id %in% LF$id
table(barcodes_s$pred_match)


 FALSE   TRUE 
187656 503363 

In [67]:
# retrieve missmatch
missmatch = subset(barcodes_s, ! pred_match )
table(missmatch$library)
cat('Ratio missmatch per libary')
table(missmatch$library) / table(barcodes_s$library)
# How many doublets?
table(missmatch$genotype)
# How many missmatch?
table(is.na(missmatch$pred_LF))

187656 / (503363+187656) # missmatch ratio

96997 / (503363+187656) # missmatch %  I say "doublet"

96151 / (503363+187656) # missmatch % L-F says NA


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
            261             287            1319            1178           13882 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
           1873            2052            2641            1905            2303 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
          10258           11168            2436            5061            6227 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
           3339            8131            2802             932            1338 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
           4946            1561           15191            6954            5573 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
           7562           11489            2201            2838            2363 
UA_Endo12680031 UA_Endo1268

Ratio missmatch per libary


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
     0.02767469      0.14568528      0.07174717      0.06721442      0.96009406 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
     0.12148139      0.13647247      0.12709948      0.06793866      0.10532815 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
     0.65828146      0.55977144      0.09435643      0.83336078      0.28656236 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
     0.15633486      0.49029185      0.46093107      0.07023361      0.07825477 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
     0.27727324      0.10348714      0.96389594      0.34182068      0.27939038 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
     0.41728286      0.95487035      0.11941189      0.19352199      0.14992704 
UA_Endo12680031 UA_Endo1268


doublet  EX0045  EX0277  EX0288  EX0366  EX0540  EX0661  EX0789  EX0795  FX0011 
  96997     179    5229    5394     229     813    8863    5418     611     325 
 FX0022  FX0028  FX0033  FX0052  FX0056  FX0060  FX1074  FX1087  FX1090  FX1097 
    204     145     133   10592     264     178     699    3742     462     587 
 FX1100  FX1104  FX1106  FX1109  FX1111  FX1119  FX1122  FX1125  FX1129  FX1130 
    255     241     138    1830     152     253    2626     312     369     429 
 FX1136  FX1141  FX1146  FX1150  FX1156  FX1158  FX1160  FX1163  FX1170  FX1176 
    242    2023     481     189     706    2451    1218     115    1057     935 
 FX1205  FX1215  FX1223  FX1233  FX1249  FX1254  FX1259  FX1285  FX1289  FX9001 
   2868    4681    1460     682      76      11    1862      75      52    1814 
 FX9005  FX9006  FX9007  FX9008  FX9009  FX9011  FX9013  FX9014  FX9020  FX9021 
    841    7937     578     999     193     863     117    2548     127    1231 
 FX9022  FX9024  FX9027    


FALSE  TRUE 
91505 96151 

In [68]:
# missmatch due to 'doublet' in vireo
missmatch = subset(missmatch, genotype != 'doublet')
table(missmatch$library)
cat('Ratio genotype swap per libary')
table(missmatch$library) / table(barcodes_s$library)


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
            131             269             303             487           13460 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
            883            1771            1294             484             912 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
            533             592            1082            3905             391 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
            326             562             375             271             382 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
           1856             856           12425             858            1167 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
           1317           10623             331             431             574 
UA_Endo12680031 UA_Endo1268

Ratio genotype swap per libary


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
     0.01389036      0.13654822      0.01648172      0.02778729      0.93090808 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
     0.05727072      0.11778399      0.06227441      0.01726106      0.04171050 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
     0.03420394      0.02967270      0.04191037      0.64301004      0.01799356 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
     0.01526360      0.03388808      0.06168778      0.02042200      0.02234179 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
     0.10404754      0.05674887      0.78838832      0.04217460      0.05850504 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
     0.07267410      0.88289561      0.01795790      0.02938970      0.03641901 
UA_Endo12680031 UA_Endo1268

In [70]:
# missmatch due to NAs in L-F
missmatch = subset(missmatch, ! is.na(missmatch$pred_LF) )
nrow(missmatch)
table(missmatch$library)
table(missmatch$library) / table(barcodes_s$library)

54499 / (503363+187656) # missmatch % L-F says NA


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
             23             209              47             141           11334 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
            116             210              93             124             100 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
            131             125              81            3756              89 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
             60             145              90              33              42 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
            533             138           12195              89              72 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
             92           10340              61              54              56 
UA_Endo12680031 UA_Endo1268


UA_Endo10298210 UA_Endo10298211 UA_Endo10298212 UA_Endo10298213 UA_Endo11173480 
    0.002438766     0.106091371     0.002556571     0.008045190     0.783871637 
UA_Endo11173481 UA_Endo11173482 UA_Endo11282193 UA_Endo11460960 UA_Endo11460961 
    0.007523674     0.013966480     0.004475673     0.004422254     0.004573519 
UA_Endo11460962 UA_Endo11460963 UA_Endo11596522 UA_Endo12061415 UA_Endo12061703 
    0.008406597     0.006265350     0.003137468     0.618475218     0.004095720 
UA_Endo12061704 UA_Endo12061705 UA_Endo12061706 UA_Endo12269810 UA_Endo12269811 
    0.002809252     0.008743367     0.014805067     0.002486812     0.002456428 
UA_Endo12269812 UA_Endo12269813 UA_Endo12269814 UA_Endo12296823 UA_Endo12296824 
    0.029880031     0.009148767     0.773794416     0.004374754     0.003609565 
UA_Endo12449298 UA_Endo12449299 UA_Endo12449300 UA_Endo12449301 UA_Endo12604667 
    0.005076702     0.859375000     0.003309462     0.003682237     0.003553074 
UA_Endo12680031 UA_Endo1268

In [71]:
x = table(missmatch$library) / table(barcodes_s$library)
x[ x > 0.2 ]


UA_Endo11173480 UA_Endo12061415 UA_Endo12269814 UA_Endo12449299 UA_Endo12865961 
      0.7838716       0.6184752       0.7737944       0.8593750       0.2080399 
UA_Endo12865964 UA_Endo12865965 UA_Endo12961682 
      0.2158500       0.4336984       0.2464358 