In [22]:
import pandas as pd
import numpy as np
from scipy.stats import expon, weibull_min, gamma, lognorm

In [23]:
# csv leri okuma
pfs_data = pd.read_csv("Progression_Free_Survival.csv")
os_data = pd.read_csv("Overall_Survival.csv")

In [24]:
# Sütun isimlerini ayarlama
pfs_data.columns = ["Months", "Progression_Free_Survival"]
os_data.columns = ["Months", "Overall_Survival"]

In [25]:
# Bireysel hasta düzeyinde veri oluşturma fonksiyonu
def generate_individual_data(data, survival_type):
    individual_data = []
    for index, row in data.iterrows():
        months = row['Months']
        survival_rate = row[survival_type] / 100  # Yüzdeyi olasılığa dönüştürme
        for i in range(int(survival_rate*100)):
            individual_data.append(months)
    return pd.DataFrame({'Months': individual_data})


individual_pfs_data = generate_individual_data(pfs_data, 'Progression_Free_Survival')
individual_os_data = generate_individual_data(os_data, 'Overall_Survival')


In [26]:
def fit_distribution(data):

    distributions = {'expon': expon, 'weibull_min': weibull_min, 'gamma': gamma, 'lognorm': lognorm}
    best_params = {}
    for name, dist in distributions.items():
        params = dist.fit(data)
        best_params[name] = params
    return best_params


pfs_params = fit_distribution(individual_pfs_data)
os_params = fit_distribution(individual_os_data)

  return np.sum((1 + np.log(shifted/scale)/shape**2)/shifted)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [27]:
# İstenilen 250 aylık period için ölüm ilerleme olasılıkları oluşturma

def generate_death_progression_probabilities(data, params, period=251):
    time_points = list(range(1, period + 1))
    probabilities = []
    for t in time_points:
        prob = 0
        for dist, dist_params in params.items():
            if dist == 'expon':
                prob += expon.pdf(t, *dist_params)
            elif dist == 'weibull_min':
                prob += weibull_min.pdf(t, *dist_params)
            elif dist == 'gamma':
                prob += gamma.pdf(t, *dist_params)
            elif dist == 'lognorm':
                prob += lognorm.pdf(t, *dist_params)
        probabilities.append(prob)
    return pd.DataFrame({'Time': time_points, 'Probability': probabilities})


pfs_death_probabilities = generate_death_progression_probabilities(individual_pfs_data['Months'], pfs_params)
os_death_probabilities = generate_death_progression_probabilities(individual_os_data['Months'], os_params)

In [28]:
# Time adında hata verdi ID ekleme
pfs_death_probabilities['ID'] = pfs_death_probabilities.index
os_death_probabilities['ID'] = os_death_probabilities.index

In [29]:
# Csv ye kaydetme
pfs_death_probabilities[['ID', 'Probability']].to_csv('sample_submission.csv', index=False)

In [30]:
data = pd.read_csv("sample_submission.csv")

In [31]:
# ID sütununda değeri 0 olan satırları düşür
data = data[data['ID'] != 0]

# Veri çerçevesinin 'Probability' sütunundaki değerlerin ortalamasını hesapla
average_probability = data['Probability'].mean()

print("Olasılık değerlerinin ortalaması:", average_probability)


Olasılık değerlerinin ortalaması: 0.011900126211595684


In [32]:
data.head()

Unnamed: 0,ID,Probability
1,1,0.371392
2,2,0.392121
3,3,0.417383
4,4,0.415538
5,5,0.374981


In [33]:
new_row = {'ID': 0, 'Probability': average_probability}
print(average_probability)
# Yeni satırı veri çerçevesine ekleyin
data = data.append(new_row, ignore_index=True)
data


0.011900126211595684


  data = data.append(new_row, ignore_index=True)


Unnamed: 0,ID,Probability
0,1.0,0.371392
1,2.0,0.392121
2,3.0,0.417383
3,4.0,0.415538
4,5.0,0.374981
...,...,...
246,247.0,0.000116
247,248.0,0.000116
248,249.0,0.000115
249,250.0,0.000115


In [34]:
#İstenilen olasılığı hesaplama
data['Probability'] = 1- data['Probability']

In [35]:
data.head()


Unnamed: 0,ID,Probability
0,1.0,0.628608
1,2.0,0.607879
2,3.0,0.582617
3,4.0,0.584462
4,5.0,0.625019


In [36]:
# Son satırı al
last_row = data.iloc[-1]

# Son satırı veri setinden çıkar
data = data.iloc[:-1]

# Veri setine ilk satırı ekleyin
data = pd.concat([last_row.to_frame().T, data], ignore_index=True)

# Veri setini yeniden indeksleme
data.reset_index(drop=True, inplace=True)

# Veri setini kontrol etmek için yazdırma
print(data)


        ID  Probability
0      0.0     0.988100
1      1.0     0.628608
2      2.0     0.607879
3      3.0     0.582617
4      4.0     0.584462
..     ...          ...
246  246.0     0.999883
247  247.0     0.999884
248  248.0     0.999884
249  249.0     0.999885
250  250.0     0.999885

[251 rows x 2 columns]


In [37]:
# Güncellenmiş veriyi sample_submission.csv dosyasına yaz
data.to_csv("sample_submission.csv", index=False)

print("Veriler başarıyla güncellendi ve sample_submission.csv dosyasına yazıldı.")


Veriler başarıyla güncellendi ve sample_submission.csv dosyasına yazıldı.
