In [4]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import LeaveOneOut, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path


In [5]:
# Get the current working directory
script_dir = Path.cwd()

# Construct the path to the variants file
variants_file = script_dir.parent / "results" / "summary" / "all_originator_features.csv"

# Read the CSV file
variants = pd.read_csv(variants_file)

# Display the DataFrame
variants

Unnamed: 0,CHROM,POS,REF,ALT,QUAL,FILTER,Patient_ID,Sample_ID
0,chr1,752894,T,C,228.78,.,271251,ORIGINATOR
1,chr1,762273,G,A,8175.77,.,271251,ORIGINATOR
2,chr1,762589,G,C,1325.77,.,271251,ORIGINATOR
3,chr1,762592,C,G,1276.77,.,271251,ORIGINATOR
4,chr1,762601,T,C,1010.77,.,271251,ORIGINATOR
...,...,...,...,...,...,...,...,...
1078470,chrY,21154323,G,A,146.03,.,965255,ORIGINATOR
1078471,chrY,21154426,G,A,768.89,.,965255,ORIGINATOR
1078472,chrY,21154466,T,A,751.77,.,965255,ORIGINATOR
1078473,chrY,21154569,A,G,232.77,.,965255,ORIGINATOR


In [6]:
# Construct the path to the divergence file
divergence_file = script_dir.parent / "results" / "summary" / "all_divergence_metrics.csv"

# Read the CSV file
divergence = pd.read_csv(divergence_file)

# Display the DataFrame
divergence

Unnamed: 0,patient_id,originator_sample,passage_sample,passage_number,originator_specimen,passage_specimen,onco_tree_code,total_originator_variants,total_passage_variants,shared_variants,originator_only_variants,passage_only_variants,total_union_variants,jaccard_similarity,jaccard_distance,variant_gain_rate,variant_loss_rate,relative_divergence,mutation_burden_change
0,271251,ORIGINATOR,CJNX25_RG-T63,P2,349-R,349-R,CCRCC,96972,92494,87287,9685,5207,102179,0.854256,0.145744,0.053696,0.099874,0.145744,-0.046178
1,361957,ORIGINATOR,QQY,P0,307-R,307-R,CCRCC,96941,92116,86510,10431,5606,102547,0.843613,0.156387,0.057829,0.107602,0.156387,-0.049773
2,628781,ORIGINATOR,QAJPP5C49,P2,278-R,278-R,CCRCC,99004,92493,87880,11124,4613,103617,0.848123,0.151877,0.046594,0.112359,0.151877,-0.065765
3,361957,ORIGINATOR,QQYJC4G43GX2,P3,307-R,307-R,CCRCC,96941,91691,86332,10609,5359,102300,0.84391,0.15609,0.055281,0.109438,0.15609,-0.054157
4,965255,ORIGINATOR,CAA,P0,017-R,017-R,CCRCC,92786,93971,86704,6082,7267,100053,0.866581,0.133419,0.07832,0.065549,0.133419,0.012771
5,668183,ORIGINATOR,HG4,P0,249-R,249-R,CCRCC,98677,87468,82146,16531,5322,103999,0.789873,0.210127,0.053934,0.167526,0.210127,-0.113593
6,965255,ORIGINATOR,CAFE09,P1,017-R,017-R,CCRCC,92786,93298,86344,6442,6954,99740,0.865691,0.134309,0.074947,0.069429,0.134309,0.005518
7,361957,ORIGINATOR,QQYJC4G39,P2,307-R,307-R,CCRCC,96941,92687,87035,9906,5652,102593,0.848352,0.151648,0.058304,0.102186,0.151648,-0.043882
8,668183,ORIGINATOR,HG5,P0,249-R,249-R,CCRCC,98677,87088,81897,16780,5191,103868,0.788472,0.211528,0.052606,0.17005,0.211528,-0.117444
9,628781,ORIGINATOR,QFNPK2_RG-J80,P2,278-R,278-R,CCRCC,99004,92788,88533,10471,4255,103259,0.857388,0.142612,0.042978,0.105763,0.142612,-0.062785
