In [48]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Adım 1: Veri setini yükle
data = pd.read_csv("house_prices.csv.zip")

# Adım 2: Veri Ön İşleme
# 'Carpet Area' sütununu temizleme
data['Carpet Area'] = data['Carpet Area'].str.replace('sqft', '', regex=False).str.strip()
data['Carpet Area'] = pd.to_numeric(data['Carpet Area'], errors='coerce')
data['Carpet Area'].fillna(data['Carpet Area'].median(), inplace=True)
data['Carpet Area'] = data['Carpet Area'].astype(int)

# 'location' sütunundaki nadir değerleri gruplandırma
location_counts = data['location'].value_counts()
top_10_locations = location_counts.head(10).index
data['location'] = data['location'].apply(lambda x: x if x in top_10_locations else 'Other')

# Yüksek kardinaliteli sütunları silme
columns_to_drop = ['Title', 'Description', 'Society', 'Floor', 'Super Area']
data.drop(columns=columns_to_drop, inplace=True)

# **ÖNEMLİ ADIM:** 'Amount(in rupees)' sütununu temizleme ve sayısal hale getirme
def convert_price(price):
    price = str(price).lower().strip()
    if 'lac' in price:
        return float(price.replace('lac', '').strip()) * 100000
    elif 'cr' in price:
        return float(price.replace('cr', '').strip()) * 10000000
    else:
        try:
            return float(price)
        except ValueError:
            return np.nan

data['Amount(in rupees)'] = data['Amount(in rupees)'].apply(convert_price)

# **YENİ ADIM:** 'Amount(in rupees)' sütunundaki NaN değerlerini medyan ile doldurma
data['Amount(in rupees)'].fillna(data['Amount(in rupees)'].median(), inplace=True)

# Adım 3: Veriyi özellikler (X) ve hedef (y) olarak ayırma
y = data['Amount(in rupees)']
X = data.drop(columns=['Amount(in rupees)'])

# Adım 4: Kalan tüm kategorik (metin) sütunlarını One-Hot Encoding ile kodlama
X = pd.get_dummies(X, columns=X.select_dtypes(include='object').columns)

# Adım 5: Geri kalan tüm NaN değerlerini doldurma
X.fillna(0, inplace=True) 

# Adım 6: Veriyi eğitim ve test setlerine bölme
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Adım 7: Modeli eğitme
model = LinearRegression()
model.fit(X_train, y_train)

# Başarı mesajı
print("Model başarıyla eğitildi.")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Carpet Area'].fillna(data['Carpet Area'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Amount(in rupees)'].fillna(data['Amount(in rupees)'].median(), inplace=True)


Model başarıyla eğitildi.
