In [1]:
import polars as pl
from pathlib import Path
import plotly.express as px

In [2]:
OPC_ID_TO_MOHD_ACCESSION = pl.read_csv("./OPC_ID_to_MOHD_accession.tsv", separator="\t").with_columns(
    pl.col("Sample").str.replace("MOHD_", ""),
    pl.col('OPC_ID').str.split('_').list.get(0).alias('Site'),
    pl.col('OPC_ID').str.split('_').list.get(1).alias('Kit_ID'),
    pl.col('OPC_ID').str.split('_').list.get(2).alias('Protocol'),
    pl.col('OPC_ID').str.split('_').list.get(3).alias('Aliquot_Number'),
).drop('OPC_ID')
multiqc_df = pl.read_csv("./general_stats_table.tsv", separator="\t", has_header=True)
qc_df = multiqc_df.join(OPC_ID_TO_MOHD_ACCESSION, on="Sample", how="inner")

display(OPC_ID_TO_MOHD_ACCESSION)
display(multiqc_df)
display(qc_df)


Sample,Site,Kit_ID,Protocol,Aliquot_Number
str,str,str,str,str
"""EA100001""","""CCH""","""0001""","""BC""","""01"""
"""EA100002""","""CCH""","""0002""","""BC""","""01"""
"""EA100003""","""CCH""","""0003""","""BC""","""01"""
"""EA100004""","""CKD""","""0001""","""BC""","""01"""
"""EA100005""","""CKD""","""0001""","""PC""","""01"""
…,…,…,…,…
"""EA100150""","""UIC""","""014""","""PC""","""01"""
"""EA100151""","""UIC""","""086""","""PC""","""01"""
"""EA100152""","""UIC""","""089""","""PC""","""01"""
"""EA100153""","""UIC""","""095""","""PC""","""01"""


Sample,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter
str,i64,i64,f64,f64,f64,f64,f64,f64
"""EA100001""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273
"""EA100002""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981
"""EA100003""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851
"""EA100004""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565
"""EA100005""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779
…,…,…,…,…,…,…,…,…
"""EA100150""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228
"""EA100151""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329
"""EA100152""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087
"""EA100153""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641


Sample,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter,Site,Kit_ID,Protocol,Aliquot_Number
str,i64,i64,f64,f64,f64,f64,f64,f64,str,str,str,str
"""EA100001""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273,"""CCH""","""0001""","""BC""","""01"""
"""EA100002""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981,"""CCH""","""0002""","""BC""","""01"""
"""EA100003""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851,"""CCH""","""0003""","""BC""","""01"""
"""EA100004""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565,"""CKD""","""0001""","""BC""","""01"""
"""EA100005""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779,"""CKD""","""0001""","""PC""","""01"""
…,…,…,…,…,…,…,…,…,…,…,…,…
"""EA100150""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228,"""UIC""","""014""","""PC""","""01"""
"""EA100151""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329,"""UIC""","""086""","""PC""","""01"""
"""EA100152""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087,"""UIC""","""089""","""PC""","""01"""
"""EA100153""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641,"""UIC""","""095""","""PC""","""01"""


Curated general qc data, now add FRiP data and TSS enrichment data

In [3]:
frag_len_files = list(Path("../results/frag_len").glob("*.txt"))
frag_len_files_list = sorted([str(x) for x in frag_len_files])
print(len(frag_len_files_list))

154


In [4]:
def parse_fragment_data(text):
    """Parse nucleosome fragment data into a dictionary."""
    data = {}
    
    for line in text.strip().split('\n'):
        if 'Total fragments:' in line:
            data['Total'] = int(line.split(':')[1].strip())
        elif 'fragments' in line:
            # Extract category name (before the parenthesis)
            category = line.split('(')[0].strip().rstrip(':').title()
            
            # Extract count (before 'fragments')
            count = int(line.split(':')[1].split('fragments')[0].strip())
            
            # Extract percentage
            percentage = float(line.split('(')[-1].rstrip('%)'))
            
            data[category] = float(percentage / 100)
    
    return data

In [None]:
records = []

for file in frag_len_files_list:
    sample = file.split("/")[-1].split("-")[0]
    with open(file, "r") as f:
        frag_len_string = f.read()
    frag_len_dict = parse_fragment_data(frag_len_string)
    # print(frag_len_dict)
    records.append(dict(Sample=sample, **frag_len_dict))
    
print(records)

frag_len_df = pl.from_records(records)
display(frag_len_df)

{'Total': 19349974, 'Nucleosome-Free': 0.3093, 'Mono-Nucleosomal': 0.5051, 'Di-Nucleosomal': 0.1358, 'Tri-Nucleosomal': 0.051500000000000004}
{'Total': 25183864, 'Nucleosome-Free': 0.48840000000000006, 'Mono-Nucleosomal': 0.3714, 'Di-Nucleosomal': 0.1039, 'Tri-Nucleosomal': 0.0391}
{'Total': 21937166, 'Nucleosome-Free': 0.37310000000000004, 'Mono-Nucleosomal': 0.4558, 'Di-Nucleosomal': 0.1281, 'Tri-Nucleosomal': 0.0452}
{'Total': 23740910, 'Nucleosome-Free': 0.47619999999999996, 'Mono-Nucleosomal': 0.3779, 'Di-Nucleosomal': 0.1081, 'Tri-Nucleosomal': 0.0409}
{'Total': 25998038, 'Nucleosome-Free': 0.4765, 'Mono-Nucleosomal': 0.38689999999999997, 'Di-Nucleosomal': 0.098, 'Tri-Nucleosomal': 0.0404}
{'Total': 16864010, 'Nucleosome-Free': 0.265, 'Mono-Nucleosomal': 0.5443, 'Di-Nucleosomal': 0.1423, 'Tri-Nucleosomal': 0.050199999999999995}
{'Total': 21413757, 'Nucleosome-Free': 0.425, 'Mono-Nucleosomal': 0.42560000000000003, 'Di-Nucleosomal': 0.111, 'Tri-Nucleosomal': 0.0404}
{'Total': 28384

Sample,Total,Nucleosome-Free,Mono-Nucleosomal,Di-Nucleosomal,Tri-Nucleosomal
str,i64,f64,f64,f64,f64
"""EA100001""",19349974,0.3093,0.5051,0.1358,0.0515
"""EA100002""",25183864,0.4884,0.3714,0.1039,0.0391
"""EA100003""",21937166,0.3731,0.4558,0.1281,0.0452
"""EA100004""",23740910,0.4762,0.3779,0.1081,0.0409
"""EA100005""",25998038,0.4765,0.3869,0.098,0.0404
…,…,…,…,…,…
"""EA100150""",38277208,0.5987,0.2971,0.0884,0.0187
"""EA100151""",61518770,0.7664,0.1744,0.0453,0.0168
"""EA100152""",52388174,0.6608,0.2712,0.0637,0.0081
"""EA100153""",65689488,0.7714,0.1831,0.0367,0.0121


In [6]:
qc_df = qc_df.join(frag_len_df, on="Sample", how="inner")
display(qc_df)

Sample,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter,Site,Kit_ID,Protocol,Aliquot_Number,Total,Nucleosome-Free,Mono-Nucleosomal,Di-Nucleosomal,Tri-Nucleosomal
str,i64,i64,f64,f64,f64,f64,f64,f64,str,str,str,str,i64,f64,f64,f64,f64
"""EA100001""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273,"""CCH""","""0001""","""BC""","""01""",19349974,0.3093,0.5051,0.1358,0.0515
"""EA100002""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981,"""CCH""","""0002""","""BC""","""01""",25183864,0.4884,0.3714,0.1039,0.0391
"""EA100003""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851,"""CCH""","""0003""","""BC""","""01""",21937166,0.3731,0.4558,0.1281,0.0452
"""EA100004""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565,"""CKD""","""0001""","""BC""","""01""",23740910,0.4762,0.3779,0.1081,0.0409
"""EA100005""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779,"""CKD""","""0001""","""PC""","""01""",25998038,0.4765,0.3869,0.098,0.0404
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""EA100150""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228,"""UIC""","""014""","""PC""","""01""",38277208,0.5987,0.2971,0.0884,0.0187
"""EA100151""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329,"""UIC""","""086""","""PC""","""01""",61518770,0.7664,0.1744,0.0453,0.0168
"""EA100152""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087,"""UIC""","""089""","""PC""","""01""",52388174,0.6608,0.2712,0.0637,0.0081
"""EA100153""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641,"""UIC""","""095""","""PC""","""01""",65689488,0.7714,0.1831,0.0367,0.0121


In [9]:
frip_score_files = list(Path("../results/frip_all/0.05").glob("*"))
frip_score_files_list = sorted([str(x) for x in frip_score_files])
print(len(frip_score_files_list))

154


In [10]:
def parse_frip_score(frip_data):
    """Parse FRiP score and return just the value."""
    return float(frip_data.split(':')[1].strip())

In [12]:
records = []

for file in frip_score_files_list:
    sample = file.split("/")[-1].split("-")[0]
    with open(file, "r") as f:
        frag_len_string = f.read()
    frip_score = parse_frip_score(frag_len_string)
    frip_score_dict = {"FRiP Score (0.05)": frip_score}
    # print(frag_len_dict)
    records.append(dict(Sample=sample, **frip_score_dict))
    
print(records)

frip_score_df = pl.from_records(records)
display(frip_score_df)

[{'Sample': 'EA100001', 'FRiP Score (0.05)': 0.06426428871597052}, {'Sample': 'EA100002', 'FRiP Score (0.05)': 0.08604824210425484}, {'Sample': 'EA100003', 'FRiP Score (0.05)': 0.035935694788110664}, {'Sample': 'EA100004', 'FRiP Score (0.05)': 0.06032742041051978}, {'Sample': 'EA100005', 'FRiP Score (0.05)': 0.1282895609164408}, {'Sample': 'EA100006', 'FRiP Score (0.05)': 0.055725454953028435}, {'Sample': 'EA100007', 'FRiP Score (0.05)': 0.06270703775546331}, {'Sample': 'EA100008', 'FRiP Score (0.05)': 0.17211825213019738}, {'Sample': 'EA100009', 'FRiP Score (0.05)': 0.041659405156999965}, {'Sample': 'EA100010', 'FRiP Score (0.05)': 0.10586508261488543}, {'Sample': 'EA100011', 'FRiP Score (0.05)': 0.2527326254933624}, {'Sample': 'EA100012', 'FRiP Score (0.05)': 0.037687991190672496}, {'Sample': 'EA100013', 'FRiP Score (0.05)': 0.04176776679636001}, {'Sample': 'EA100014', 'FRiP Score (0.05)': 0.08269282510190112}, {'Sample': 'EA100015', 'FRiP Score (0.05)': 0.08269282510190112}, {'Sampl

Sample,FRiP Score (0.05)
str,f64
"""EA100001""",0.064264
"""EA100002""",0.086048
"""EA100003""",0.035936
"""EA100004""",0.060327
"""EA100005""",0.12829
…,…
"""EA100150""",0.053717
"""EA100151""",0.117867
"""EA100152""",0.058016
"""EA100153""",0.158825


In [13]:
qc_df = qc_df.join(frip_score_df, on="Sample", how="inner")
display(qc_df)

Sample,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter,Site,Kit_ID,Protocol,Aliquot_Number,Total,Nucleosome-Free,Mono-Nucleosomal,Di-Nucleosomal,Tri-Nucleosomal,FRiP Score (0.05)
str,i64,i64,f64,f64,f64,f64,f64,f64,str,str,str,str,i64,f64,f64,f64,f64,f64
"""EA100001""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273,"""CCH""","""0001""","""BC""","""01""",19349974,0.3093,0.5051,0.1358,0.0515,0.064264
"""EA100002""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981,"""CCH""","""0002""","""BC""","""01""",25183864,0.4884,0.3714,0.1039,0.0391,0.086048
"""EA100003""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851,"""CCH""","""0003""","""BC""","""01""",21937166,0.3731,0.4558,0.1281,0.0452,0.035936
"""EA100004""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565,"""CKD""","""0001""","""BC""","""01""",23740910,0.4762,0.3779,0.1081,0.0409,0.060327
"""EA100005""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779,"""CKD""","""0001""","""PC""","""01""",25998038,0.4765,0.3869,0.098,0.0404,0.12829
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""EA100150""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228,"""UIC""","""014""","""PC""","""01""",38277208,0.5987,0.2971,0.0884,0.0187,0.053717
"""EA100151""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329,"""UIC""","""086""","""PC""","""01""",61518770,0.7664,0.1744,0.0453,0.0168,0.117867
"""EA100152""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087,"""UIC""","""089""","""PC""","""01""",52388174,0.6608,0.2712,0.0637,0.0081,0.058016
"""EA100153""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641,"""UIC""","""095""","""PC""","""01""",65689488,0.7714,0.1831,0.0367,0.0121,0.158825


In [17]:
for column in qc_df.columns:
    print(f'"{column}",')

"Sample",
"Fragment Length",
"Number of Peaks",
"Duplication",
"% Duplication",
"Reads After Filtering",
"GC content",
"% PF",
"% Adapter",
"Site",
"Kit_ID",
"Protocol",
"Aliquot_Number",
"Total",
"Nucleosome-Free",
"Mono-Nucleosomal",
"Di-Nucleosomal",
"Tri-Nucleosomal",
"FRiP Score (0.05)",


In [20]:
final_qc_df = qc_df.select(
    "Sample",
    "Site",
    "Kit_ID",
    "Protocol",
    "Aliquot_Number",
    "Fragment Length",
    "Number of Peaks",
    "Duplication",
    "% Duplication",
    "Reads After Filtering",
    "GC content",
    "% PF",
    "% Adapter",
    "Total",
    "Nucleosome-Free",
    "Mono-Nucleosomal",
    "Di-Nucleosomal",
    "Tri-Nucleosomal",
    "FRiP Score (0.05)",
)
display(final_qc_df)
final_qc_df.write_csv("final_qc_data.tsv", separator="\t")

Sample,Site,Kit_ID,Protocol,Aliquot_Number,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter,Total,Nucleosome-Free,Mono-Nucleosomal,Di-Nucleosomal,Tri-Nucleosomal,FRiP Score (0.05)
str,str,str,str,str,i64,i64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64
"""EA100001""","""CCH""","""0001""","""BC""","""01""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273,19349974,0.3093,0.5051,0.1358,0.0515,0.064264
"""EA100002""","""CCH""","""0002""","""BC""","""01""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981,25183864,0.4884,0.3714,0.1039,0.0391,0.086048
"""EA100003""","""CCH""","""0003""","""BC""","""01""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851,21937166,0.3731,0.4558,0.1281,0.0452,0.035936
"""EA100004""","""CKD""","""0001""","""BC""","""01""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565,23740910,0.4762,0.3779,0.1081,0.0409,0.060327
"""EA100005""","""CKD""","""0001""","""PC""","""01""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779,25998038,0.4765,0.3869,0.098,0.0404,0.12829
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""EA100150""","""UIC""","""014""","""PC""","""01""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228,38277208,0.5987,0.2971,0.0884,0.0187,0.053717
"""EA100151""","""UIC""","""086""","""PC""","""01""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329,61518770,0.7664,0.1744,0.0453,0.0168,0.117867
"""EA100152""","""UIC""","""089""","""PC""","""01""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087,52388174,0.6608,0.2712,0.0637,0.0081,0.058016
"""EA100153""","""UIC""","""095""","""PC""","""01""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641,65689488,0.7714,0.1831,0.0367,0.0121,0.158825


In [22]:
final_qc_df["Site"].unique().to_list()

['CKD', 'CCH', 'UIC', 'MOM', 'LEO', 'EXP']

In [3]:
final_qc_df = pl.read_csv("final_qc_data.tsv", separator="\t")
ccre_qc_df = pl.read_csv("./run_ccre_pipeline/ccres_qc.tsv", separator="\t")

display(final_qc_df, ccre_qc_df)

Sample,Site,Kit_ID,Protocol,Aliquot_Number,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter,Total,Nucleosome-Free,Mono-Nucleosomal,Di-Nucleosomal,Tri-Nucleosomal,FRiP Score (0.05)
str,str,i64,str,str,i64,i64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64
"""EA100001""","""CCH""",1,"""BC""","""01""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273,19349974,0.3093,0.5051,0.1358,0.0515,0.064264
"""EA100002""","""CCH""",2,"""BC""","""01""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981,25183864,0.4884,0.3714,0.1039,0.0391,0.086048
"""EA100003""","""CCH""",3,"""BC""","""01""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851,21937166,0.3731,0.4558,0.1281,0.0452,0.035936
"""EA100004""","""CKD""",1,"""BC""","""01""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565,23740910,0.4762,0.3779,0.1081,0.0409,0.060327
"""EA100005""","""CKD""",1,"""PC""","""01""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779,25998038,0.4765,0.3869,0.098,0.0404,0.12829
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""EA100150""","""UIC""",14,"""PC""","""01""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228,38277208,0.5987,0.2971,0.0884,0.0187,0.053717
"""EA100151""","""UIC""",86,"""PC""","""01""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329,61518770,0.7664,0.1744,0.0453,0.0168,0.117867
"""EA100152""","""UIC""",89,"""PC""","""01""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087,52388174,0.6608,0.2712,0.0637,0.0081,0.058016
"""EA100153""","""UIC""",95,"""PC""","""01""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641,65689488,0.7714,0.1831,0.0367,0.0121,0.158825


sample,no_ccres_with_zscore_gt_1.64
str,i64
"""EA100001""",62047
"""EA100002""",60086
"""EA100003""",55790
"""EA100004""",54474
"""EA100005""",62934
…,…
"""EA100150""",60839
"""EA100151""",66037
"""EA100152""",64203
"""EA100153""",51045


In [4]:
full_qc_df = final_qc_df.join(ccre_qc_df, left_on='Sample', right_on='sample', how='inner')
display(full_qc_df)

Sample,Site,Kit_ID,Protocol,Aliquot_Number,Fragment Length,Number of Peaks,Duplication,% Duplication,Reads After Filtering,GC content,% PF,% Adapter,Total,Nucleosome-Free,Mono-Nucleosomal,Di-Nucleosomal,Tri-Nucleosomal,FRiP Score (0.05),no_ccres_with_zscore_gt_1.64
str,str,i64,str,str,i64,i64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,f64,f64,i64
"""EA100001""","""CCH""",1,"""BC""","""01""",150,41752,2.0177,2.20324,45.957004,46.9936,97.245974,20.618273,19349974,0.3093,0.5051,0.1358,0.0515,0.064264,62047
"""EA100002""","""CCH""",2,"""BC""","""01""",150,63644,4.043,3.6359,61.387944,47.7256,97.486743,27.65981,25183864,0.4884,0.3714,0.1039,0.0391,0.086048,60086
"""EA100003""","""CCH""",3,"""BC""","""01""",150,27619,2.3736,2.25876,52.243236,46.4458,97.493723,22.411851,21937166,0.3731,0.4558,0.1281,0.0452,0.035936,55790
"""EA100004""","""CKD""",1,"""BC""","""01""",150,39249,2.5404,2.37357,56.691706,47.4691,97.340168,23.979565,23740910,0.4762,0.3779,0.1081,0.0409,0.060327,54474
"""EA100005""","""CKD""",1,"""PC""","""01""",150,62584,2.6227,3.81483,66.748842,48.2961,97.209476,32.502779,25998038,0.4765,0.3869,0.098,0.0404,0.12829,62934
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""EA100150""","""UIC""",14,"""PC""","""01""",150,35530,2.5364,2.91598,94.121214,45.9381,97.438476,33.126228,38277208,0.5987,0.2971,0.0884,0.0187,0.053717,60839
"""EA100151""","""UIC""",86,"""PC""","""01""",150,158011,3.5312,4.40308,156.058938,47.9666,97.887899,40.166329,61518770,0.7664,0.1744,0.0453,0.0168,0.117867,66037
"""EA100152""","""UIC""",89,"""PC""","""01""",150,46203,3.4814,3.6489,130.349206,46.0682,97.796966,30.234087,52388174,0.6608,0.2712,0.0637,0.0081,0.058016,64203
"""EA100153""","""UIC""",95,"""PC""","""01""",150,234934,4.2465,4.72518,166.81182,48.309,97.900565,38.437641,65689488,0.7714,0.1831,0.0367,0.0121,0.158825,51045


In [None]:
# help(full_qc_df)

In [None]:
plt = px.scatter(data_frame=full_qc_df, x="FRiP Score (0.05)", y="no_ccres_with_zscore_gt_1.64", hover_data="Sample")
plt.show()
plt.to_html("")