In [1]:
pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.4.2-cp39-cp39-win_amd64.whl (10.6 MB)
     ---------------------------------------- 10.6/10.6 MB 1.5 MB/s eta 0:00:00
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.4.0-py3-none-any.whl (17 kB)
Collecting joblib>=1.2.0
  Downloading joblib-1.4.0-py3-none-any.whl (301 kB)
     -------------------------------------- 301.2/301.2 KB 2.7 MB/s eta 0:00:00
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.0 scikit-learn-1.4.2 threadpoolctl-3.4.0
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\ASUS\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [24]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [25]:
data = pd.read_csv('house_listings.csv')
data.head(5)

Unnamed: 0,category,price,currency,price_1m2,title,address,floor,area,title_deed,repair,mortgage,url,room_number
0,Köhnə tikili,137 000,AZN,2 630 AZN/m²,"Satılır 3 otaqlı köhnə tikili 52 m², 8-ci kilo...",Elşən Süleymanov küç 137,7 / 9,52 m²,var,var,,https://bina.az/items/3858477,3.0
1,Yeni tikili,183 000,AZN,1 790 AZN/m²,"Satılır 2 otaqlı yeni tikili 102 m², Neftçilər m.",Mehdi Abbasov küçəsi,2 / 17,102 m²,yoxdur,var,,https://bina.az/items/3858493,2.0
2,Köhnə tikili,145 000,AZN,2 230 AZN/m²,"Satılır 2 otaqlı köhnə tikili 65 m², Nərimanov r.",Atatürk Prospekti,6 / 9,65 m²,var,var,var,https://bina.az/items/3858489,2.0
3,Köhnə tikili,190 000,AZN,2 000 AZN/m²,"Satılır 3 otaqlı köhnə tikili 95 m², Gənclik m.",Atatürk pr.,4 / 9,95 m²,var,var,var,https://bina.az/items/3858491,3.0
4,Yeni tikili,294 000,AZN,1 550 AZN/m²,"Satılır 3 otaqlı yeni tikili 190 m², Nəsimi r.",Möhsün Sənani küçəsi,6 / 16,190 m²,var,yoxdur,,https://bina.az/items/3858488,3.0


## Normalisasi Data

In [26]:
data['price'] = data['price'].str.replace(' ', '').astype(float)

data['price_1m2'] = data['price_1m2'].str.replace(' AZN/m²', '').str.replace(' ', '').astype(float)

# Normalisasi harga rumah dan harga per meter persegi menggunakan Min-Max scaling
scaler = MinMaxScaler()
data[['price', 'price_1m2']] = scaler.fit_transform(data[['price', 'price_1m2']])

# Menampilkan hasil normalisasi data
print(data[['price', 'price_1m2']])

          price  price_1m2
0      0.021298   0.116922
1      0.030369   0.076321
2      0.022875   0.097588
3      0.031749   0.086471
4      0.052258   0.064720
...         ...        ...
35498  0.020903   0.090338
35499  0.042595   0.121272
35500  0.055216   0.101455
35501  0.051864   0.093721
35502  0.019917   0.094688

[35503 rows x 2 columns]


## Encoding Data

In [27]:
# Encoding data kategorikal pada kolom 'Category', 'title_deed', 'repair', dan 'mortgage'
data = pd.get_dummies(data, columns=['category', 'title_deed', 'repair', 'mortgage'])

# Menampilkan hasil encoding data
print(data.columns)

Index(['price', 'currency', 'price_1m2', 'title', 'address', 'floor', 'area',
       'url', 'room_number', 'category_Köhnə tikili', 'category_Yeni tikili',
       'title_deed_var', 'title_deed_yoxdur', 'repair_var', 'repair_yoxdur',
       'mortgage_var'],
      dtype='object')


## Agregasi Data

In [29]:
# Aggregasi data rata-rata, median, dan modus dari harga rumah berdasarkan jumlah kamar dan tipe bangunan
data_agregasi = data.groupby(['room_number', 'category_Köhnə tikili', 'category_Yeni tikili']).agg({'price': ['mean', 'median', lambda x: x.mode()[0]]})
data_agregasi.columns = ['avg_price', 'median_price', 'mode_price']

# Menyimpan hasil aggregasi ke file Excel
data_agregasi.to_excel('data_agregasi.xlsx')

# Menampilkan hasil aggregasi
print(data_agregasi)

                                                        avg_price  \
room_number category_Köhnə tikili category_Yeni tikili              
1.0         False                 True                   0.018380   
            True                  False                  0.012295   
2.0         False                 True                   0.026653   
            True                  False                  0.017951   
3.0         False                 True                   0.044644   
            True                  False                  0.027152   
4.0         False                 True                   0.074255   
            True                  False                  0.036024   
5.0         False                 True                   0.118087   
            True                  False                  0.048823   
6.0         False                 True                   0.160358   
            True                  False                  0.117555   
7.0         False                 