In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [19]:
# Gerekli kütüphanelerin import edilmesi
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Eğitim ve test verilerinin okunması
train_df = pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')
test_df = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')

# PassengerId'yi kaydetme
PassengerId = test_df['PassengerId']

# Eğitim ve test verilerini birleştirme
combined_df = pd.concat([train_df.drop('Transported', axis=1), test_df], sort=False)

# Veri ön işleme fonksiyonu
def preprocess_data(df):
    # Cabin'den Deck, CabinNum ve Side özelliklerini çıkarma
    df['Deck'] = df['Cabin'].apply(lambda x: x.split('/')[0] if pd.notnull(x) else 'Unknown')
    df['CabinNum'] = df['Cabin'].apply(lambda x: x.split('/')[1] if pd.notnull(x) else 'Unknown')
    df['Side'] = df['Cabin'].apply(lambda x: x.split('/')[2] if pd.notnull(x) else 'Unknown')
    df.drop('Cabin', axis=1, inplace=True)
    
    # CabinNum'u sayısal değere çevirme
    df['CabinNum'] = pd.to_numeric(df['CabinNum'], errors='coerce').fillna(0)
    
    # Toplam harcama özelliği oluşturma
    spend_cols = ['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']
    df['TotalSpend'] = df[spend_cols].sum(axis=1)
    
    # Grup ve Grup Boyutunu PassengerId'den çıkarma
    df['Group'] = df['PassengerId'].apply(lambda x: x.split('_')[0])
    df['GroupSize'] = df.groupby('Group')['Group'].transform('count')
    
    # IsAlone özelliği
    df['IsAlone'] = df['GroupSize'] == 1
    
    # Gereksiz sütunları kaldırma
    df.drop(['PassengerId', 'Name'], axis=1, inplace=True)
    
    # Eksik sayısal değerleri doldurma
    num_cols = df.select_dtypes(include=['float64', 'int64']).columns
    df[num_cols] = df[num_cols].fillna(0)
    
    # Eksik kategorik değerleri doldurma
    cat_cols = df.select_dtypes(include=['object', 'bool']).columns
    for col in cat_cols:
        df[col].fillna(df[col].mode()[0], inplace=True)
    
    # Kategorik özellikleri Label Encoding ile kodlama
    for col in cat_cols:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
    
    return df

# Veri ön işlemenin uygulanması
combined_df = preprocess_data(combined_df)

# Eğitim ve test setlerine ayrılması
X_train = combined_df.iloc[:train_df.shape[0], :]
X_test = combined_df.iloc[train_df.shape[0]:, :]
y_train = train_df['Transported'].astype(int)

# Modelin eğitilmesi
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Doğrulama seti oluşturma ve modelin değerlendirilmesi
X_train_split, X_valid, y_train_split, y_valid = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)
y_pred = model.predict(X_valid)
accuracy = accuracy_score(y_valid, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")

# Test seti üzerinde tahminler
test_predictions = model.predict(X_test)

# Tahminleri True/False'a çevirme
test_predictions = test_predictions.astype(bool)

# Submission dosyasının oluşturulması
submission = pd.DataFrame({'PassengerId': PassengerId, 'Transported': test_predictions})

# Submission dosyasının kaydedilmesi
submission.to_csv('submission.csv', index=False)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
  df[col].fillna(df[col].mode()[0], inplace=True)


Validation Accuracy: 1.0000
