In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index

In [None]:
df1 = pd.read_csv('g:\\001.csv')
df2 = pd.read_csv('g:\\002.csv')

In [None]:
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['CD74', 'PSMB1', 'HSP90B1', 'HSPA5', 'CCT7','RALA','MDH1','NFE2L2','ATP5MC1','PCMT1','ZFAND5']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]

In [None]:
matched_pairs = []

for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))

In [None]:
cph = CoxPHFitter()

In [None]:
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')

In [None]:
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values

In [None]:
df = pd.concat([df1, df2], ignore_index=True)

In [None]:
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')

In [None]:
predicted_hazard = -cph.predict_partial_hazard(df)

In [None]:
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival = cph.predict_survival_function(df)

In [None]:
plt.figure(figsize=(10, 6))
for i in range(len(df1)):
    plt.step(predicted_survival.index, predicted_survival.iloc[:, i], where="post", label=f"Individual {i + 1} (First Half)")

for i in range(len(df2)):
    plt.step(predicted_survival.index, predicted_survival.iloc[:, len(df1) + i], where="post", linestyle="--", label=f"Individual {i + 1} (Second Half)")
plt.title('Cox Survival Curves for the Entire Dataset')
plt.xlabel('Time (in months)')
plt.ylabel('Survival Probability')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt

In [None]:
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_survival_2 = cph.predict_survival_function(df2)

In [None]:
plt.step(predicted_survival_1.index, predicted_survival_1.values, label='Group 1')
plt.step(predicted_survival_2.index, predicted_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['CD74', 'PSMB1', 'HSP90B1', 'HSPA5', 'CCT7','RALA','MDH1','NFE2L2','ATP5MC1','PCMT1','ZFAND5']])
predicted_survival_2 = cph.predict_survival_function(df2[['CD74', 'PSMB1', 'HSP90B1', 'HSPA5', 'CCT7','RALA','MDH1','NFE2L2','ATP5MC1','PCMT1','ZFAND5']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)

In [None]:
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Cox analysis plot of BLCA')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[ ['CD74', 'PSMB1', 'HSP90B1', 'HSPA5', 'CCT7','RALA','MDH1','NFE2L2','ATP5MC1','PCMT1','ZFAND5']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['CD74', 'PSMB1', 'HSP90B1', 'HSPA5', 'CCT7','RALA','MDH1','NFE2L2','ATP5MC1','PCMT1','ZFAND5']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)

In [None]:
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('BLCA')
plt.text(30, 0.83, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\BRCA1.csv')
df2 = pd.read_csv('g:\\BRCA2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['SLC39A9','SEL1L','CCDC124','TINF2','SDC1','AP2S1','MGST1','LYPLA2','FAM50A']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]

In [None]:
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values

In [None]:
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['SLC39A9','SEL1L','CCDC124','TINF2','SDC1','AP2S1','MGST1','LYPLA2','FAM50A']])
predicted_survival_2 = cph.predict_survival_function(df2[['SLC39A9','SEL1L','CCDC124','TINF2','SDC1','AP2S1','MGST1','LYPLA2','FAM50A']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['SLC39A9','SEL1L','CCDC124','TINF2','SDC1','AP2S1','MGST1','LYPLA2','FAM50A']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['SLC39A9','SEL1L','CCDC124','TINF2','SDC1','AP2S1','MGST1','LYPLA2','FAM50A']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('BRCA')
plt.text(30, 0.83, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\HNSC1.csv')
df2 = pd.read_csv('g:\\HNSC2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['PCMT1','TMSB10','KRT16P2','SNX3','SLC44A1','DIP2B']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['PCMT1','TMSB10','KRT16P2','SNX3','SLC44A1','DIP2B']])
predicted_survival_2 = cph.predict_survival_function(df2[['PCMT1','TMSB10','KRT16P2','SNX3','SLC44A1','DIP2B']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['PCMT1','TMSB10','KRT16P2','SNX3','SLC44A1','DIP2B']])
predicted_survival_2 = cph.predict_survival_function(df2[['PCMT1','TMSB10','KRT16P2','SNX3','SLC44A1','DIP2B']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('HNSC')
plt.text(30, 0.79, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\KIRC1.csv')
df2 = pd.read_csv('g:\\KIRC2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['PDK4','ERLEC1']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['PDK4','ERLEC1']])
predicted_survival_2 = cph.predict_survival_function(df2[['PDK4','ERLEC1']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['PDK4','ERLEC1']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['PDK4','ERLEC1']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('KIRC')
plt.text(30, 0.79, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\LIHC1.csv')
df2 = pd.read_csv('g:\\LIHC2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['ARF5','MRPS35','CPB2','C6','RHOA','CSDE1','PON1','GDI2','SPP2','STRAP','ITIH1','SEC61A1','C8B','PABPC1','YBX1','SCARB1','NDUFB4','CTNNA1','LASP1']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['ARF5','MRPS35','CPB2','C6','RHOA','CSDE1','PON1','GDI2','SPP2','STRAP','ITIH1','SEC61A1','C8B','PABPC1','YBX1','SCARB1','NDUFB4','CTNNA1','LASP1']])
predicted_survival_2 = cph.predict_survival_function(df2[['ARF5','MRPS35','CPB2','C6','RHOA','CSDE1','PON1','GDI2','SPP2','STRAP','ITIH1','SEC61A1','C8B','PABPC1','YBX1','SCARB1','NDUFB4','CTNNA1','LASP1']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['ARF5','MRPS35','CPB2','C6','RHOA','CSDE1','PON1','GDI2','SPP2','STRAP','ITIH1','SEC61A1','C8B','PABPC1','YBX1','SCARB1','NDUFB4','CTNNA1','LASP1']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['ARF5','MRPS35','CPB2','C6','RHOA','CSDE1','PON1','GDI2','SPP2','STRAP','ITIH1','SEC61A1','C8B','PABPC1','YBX1','SCARB1','NDUFB4','CTNNA1','LASP1']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('LIHC')
plt.text(30, 0.78, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\LUAD1.csv')
df2 = pd.read_csv('g:\\LUAD2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['PFKP','TYROBP','GDE1','CDC42','RAB20','TFPI']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['PFKP','TYROBP','GDE1','CDC42','RAB20','TFPI']])
predicted_survival_2 = cph.predict_survival_function(df2[['PFKP','TYROBP','GDE1','CDC42','RAB20','TFPI']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['PFKP','TYROBP','GDE1','CDC42','RAB20','TFPI']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['PFKP','TYROBP','GDE1','CDC42','RAB20','TFPI']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('LUAD')
plt.text(30, 0.78, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\LUSC1.csv')
df2 = pd.read_csv('g:\\LUSC2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['PLAU','SERPINB1','SLC39A9']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []

for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['PLAU','SERPINB1','SLC39A9']])
predicted_survival_2 = cph.predict_survival_function(df2[['PLAU','SERPINB1','SLC39A9']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['PLAU','SERPINB1','SLC39A9']])
predicted_survival_2 = cph.predict_survival_function(df2[['PLAU','SERPINB1','SLC39A9']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('LUSC')
plt.text(30, 0.77, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\OV1.csv')# 读取第二个CSV文件
df2 = pd.read_csv('g:\\OV2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['ISOC1','CCNI','RTF2','PSMC4','CCNG1','MPZL2','PCMT1','PPP2CB','NOP58','TUSC3']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['ISOC1','CCNI','RTF2','PSMC4','CCNG1','MPZL2','PCMT1','PPP2CB','NOP58','TUSC3']])
predicted_survival_2 = cph.predict_survival_function(df2[['ISOC1','CCNI','RTF2','PSMC4','CCNG1','MPZL2','PCMT1','PPP2CB','NOP58','TUSC3']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['ISOC1','CCNI','RTF2','PSMC4','CCNG1','MPZL2','PCMT1','PPP2CB','NOP58','TUSC3']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['ISOC1','CCNI','RTF2','PSMC4','CCNG1','MPZL2','PCMT1','PPP2CB','NOP58','TUSC3']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.title('OV')
plt.text(30, 0.8, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\SKCM1.csv')
df2 = pd.read_csv('g:\\SKCM2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['MPV17','PHF23','AP001324.1','POLDIP2','LYPLA2','CDK5','GPKOW','TXNDC11']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[['MPV17','PHF23','AP001324.1','POLDIP2','LYPLA2','CDK5','GPKOW','TXNDC11']])
predicted_survival_2 = cph.predict_survival_function(df2[['MPV17','PHF23','AP001324.1','POLDIP2','LYPLA2','CDK5','GPKOW','TXNDC11']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[ ['MPV17','PHF23','AP001324.1','POLDIP2','LYPLA2','CDK5','GPKOW','TXNDC11']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['MPV17','PHF23','AP001324.1','POLDIP2','LYPLA2','CDK5','GPKOW','TXNDC11']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.xlabel('months')
plt.ylabel('Survival Probability')
plt.title('SKCM')
plt.text(30, 0.83, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\STAD1.csv')
df2 = pd.read_csv('g:\\STAD2.csv')
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
selected_genes = ['NFIX','TTYH3','RAB11B','TCF3','CHPF','AL928646.1','ZBTB7A','TLE5','TSPAN15','CYB561','UPF1','SCAMP4']
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))
cph = CoxPHFitter()
cph.fit(df1, duration_col='SurvivalTime', event_col='event')
cph = CoxPHFitter(penalizer=0.01)
cph.fit(df2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])

In [None]:
import matplotlib.pyplot as plt
predicted_survival_1 = cph.predict_survival_function(df1[ ['NFIX','TTYH3','RAB11B','TCF3','CHPF','AL928646.1','ZBTB7A','TLE5','TSPAN15','CYB561','UPF1','SCAMP4']])
predicted_survival_2 = cph.predict_survival_function(df2[ ['NFIX','TTYH3','RAB11B','TCF3','CHPF','AL928646.1','ZBTB7A','TLE5','TSPAN15','CYB561','UPF1','SCAMP4']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
plt.xlim(0, 110)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Group 1')
plt.step(mean_survival_2.index, mean_survival_2.values, label='Group 2')
plt.xlabel('Time')
plt.ylabel('Survival Probability')
plt.title('Average Cox Survival Curves for Group 1 and Group 2')
plt.legend()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from lifelines.statistics import logrank_test
results = logrank_test(df1['SurvivalTime'], df2['SurvivalTime'], event_observed_A=df1['event'], event_observed_B=df2['event'])
p_value = results.p_value
predicted_survival_1 = cph.predict_survival_function(df1[  ['NFIX','TTYH3','RAB11B','TCF3','CHPF','AL928646.1','ZBTB7A','TLE5','TSPAN15','CYB561','UPF1','SCAMP4']])
predicted_survival_2 = cph.predict_survival_function(df2[  ['NFIX','TTYH3','RAB11B','TCF3','CHPF','AL928646.1','ZBTB7A','TLE5','TSPAN15','CYB561','UPF1','SCAMP4']])
mean_survival_1 = predicted_survival_1.mean(axis=1)
mean_survival_2 = predicted_survival_2.mean(axis=1)
min_time = min(mean_survival_1.index.min(), mean_survival_2.index.min())
max_time = max(mean_survival_1.index.max(), mean_survival_2.index.max())
plt.xlim(0, 60)
plt.step(mean_survival_1.index, mean_survival_1.values, label='Low survival group', color='red')
plt.step(mean_survival_2.index, mean_survival_2.values, label='High survival group', color='blue')
plt.xlabel('months')
plt.ylabel('Survival Probability')
plt.title('STAD')
plt.text(30, 0.77, f'Log-rank p-value: {p_value:.2e}', fontsize=12, color='black', bbox=dict(facecolor='white', alpha=0.8))
plt.legend()
plt.show()

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index
df1 = pd.read_csv('g:\\33x33BLCA1.csv')
df2 = pd.read_csv('g:\\33x33BLCA2.csv')

In [None]:
import pandas as pd
df1 = pd.read_csv('g:\\33x33BLCA1.csv')  
df2 = pd.read_csv('g:\\33x33BLCA2.csv')
columns_to_exclude = ['event', 'days_to_death','case_id','file_id','GBP1P1', 'ST6GALNAC2', 'HOXD4', 'FGFR2', 'TLL1', 'FMO3', 'NAMPTP1', 'MAD1L1', 'PRSS3', 'CD79B', 'AC005332.3', 'CYP4F22', 'EPN3', 'AC023043.4', 'DPF1', 'NOX1', 'SOX8', 'DRD4', 'OR7E14P', 'RSPH9', 'SurvivalTime', 'CHRDL2', 'ACP3', 'CADM4', 'FMO1', 'HES2', 'PLEKHB1', 'VSIG2', 'AL133520.1', 'PTMAP9', 'CD84', 'ST6GALNAC1', 'CNTN1', 'SYN1', 'HSD17B6', 'MIR1244-2', 'AL161430.1', 'NRXN3', 'COL6A6', 'RASGRF1', 'AC130371.2', 'HSPB6', 'AC020910.5', 'PTGES3P1', 'AC092794.1', 'SLC25A4', 'NOS2', 'SLC1A7']
selected_genes = [col for col in df1.columns if col not in columns_to_exclude]

In [None]:
df1['SurvivalTime'] = df1['days_to_death'] / 30.44
df2['SurvivalTime'] = df2['days_to_death'] / 30.44
gene_columns = ['event', 'SurvivalTime'] + selected_genes
df1 = df1[gene_columns]
df2 = df2[gene_columns]
matched_pairs = []
for i, row1 in df1.iterrows():
    for j, row2 in df2.iterrows():
        if row1['event'] == 1 and row2['event'] == 1 and row1['SurvivalTime'] < row2['SurvivalTime']:
            matched_pairs.append((i, j))

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
features = df1.drop(['event', 'SurvivalTime'], axis=1)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
df_scaled1 = pd.DataFrame(scaled_features, columns=features.columns)
df_scaled1[['event', 'SurvivalTime']] = df1[['event', 'SurvivalTime']] 

In [None]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
features = df2.drop(['event', 'SurvivalTime'], axis=1)  
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
df_scaled2 = pd.DataFrame(scaled_features, columns=features.columns)
df_scaled2[['event', 'SurvivalTime']] = df2[['event', 'SurvivalTime']]

In [None]:
missing_values = df2.isnull().sum()

In [None]:
descriptive_stats = df1.describe()

In [None]:
cph = CoxPHFitter(penalizer=0.1)
cph.fit(df_scaled1, duration_col='SurvivalTime', event_col='event')
cph.fit(df_scaled2, duration_col='SurvivalTime', event_col='event')
predicted_survival_1 = cph.predict_survival_function(df1)
predicted_hazard_1 = -cph.predict_partial_hazard(df1)
predicted_survival_1_values = predicted_survival_1.values

In [None]:
predicted_survival_2 = cph.predict_survival_function(df2)
predicted_hazard_2 = -cph.predict_partial_hazard(df2)
predicted_survival_2_values = predicted_survival_2.values
df = pd.concat([df1, df2], ignore_index=True)
cph = CoxPHFitter()
cph.fit(df, duration_col='SurvivalTime', event_col='event')
predicted_hazard = -cph.predict_partial_hazard(df)
c_index = concordance_index(event_times=df['SurvivalTime'], predicted_scores=predicted_hazard, event_observed=df['event'])