### Loading the dataset

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
from matplotlib import rcParams

In [22]:
df = pd.read_csv("https://raw.githubusercontent.com/xKatyJane/Web_scraping-Property_data_analysis/refs/heads/main/Data/Property_data_idealista_20-02-2025.csv")

### Checking and cleaning the data

In [23]:
df.shape

(4579, 10)

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4579 entries, 0 to 4578
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Title                4579 non-null   object 
 1   Price_EUR_per_month  4579 non-null   float64
 2   Neighbourhood        4579 non-null   object 
 3   Person               4579 non-null   object 
 4   Number_of_rooms      4337 non-null   float64
 5   Square_meters        4579 non-null   float64
 6   Exterior_interior    4189 non-null   object 
 7   Floor                4092 non-null   object 
 8   Lift                 4405 non-null   float64
 9   Garage               628 non-null    object 
dtypes: float64(4), object(6)
memory usage: 357.9+ KB


In [25]:
df.dtypes

Title                   object
Price_EUR_per_month    float64
Neighbourhood           object
Person                  object
Number_of_rooms        float64
Square_meters          float64
Exterior_interior       object
Floor                   object
Lift                   float64
Garage                  object
dtype: object

In [26]:
df.isnull().sum()

Title                     0
Price_EUR_per_month       0
Neighbourhood             0
Person                    0
Number_of_rooms         242
Square_meters             0
Exterior_interior       390
Floor                   487
Lift                    174
Garage                 3951
dtype: int64

### Handling the missing values

In [27]:
columns_to_replace_NaNs = ['Floor', 'Exterior_interior', 'Lift', 'Number_of_rooms']
df[columns_to_replace_NaNs] = df[columns_to_replace_NaNs].fillna('unknown')
df['Garage'] = df['Garage'].fillna('No garage')

In [28]:
df.isnull().sum()

Title                  0
Price_EUR_per_month    0
Neighbourhood          0
Person                 0
Number_of_rooms        0
Square_meters          0
Exterior_interior      0
Floor                  0
Lift                   0
Garage                 0
dtype: int64

#### All the NaNs have been filled

In [29]:
df.head()

Unnamed: 0,Title,Price_EUR_per_month,Neighbourhood,Person,Number_of_rooms,Square_meters,Exterior_interior,Floor,Lift,Garage
0,Alquiler de Piso en calle de Morales,1400.0,"Les Corts, Barcelona",Profesional,1.0,50.0,exterior,Bajo,0.0,No garage
1,Alquiler de Piso en calle de Loreto,3000.0,"Les Corts, Barcelona",Profesional,2.0,254.0,exterior,7.0,1.0,No garage
2,Alquiler de Piso en calle de la Riera Blanca,1500.0,"La Maternitat i Sant Ramon, Barcelona",Profesional,2.0,71.0,exterior,4.0,1.0,No garage
3,Alquiler de Piso en Passatge d'Albert Pinyol,1250.0,"Les Corts, Barcelona",Profesional,1.0,45.0,exterior,1.0,1.0,No garage
4,Alquiler de Ático en calle de l'Alfambra,1795.0,"Pedralbes, Barcelona",Profesional,2.0,70.0,exterior,6.0,1.0,No garage


### Checking the values distribution

In [30]:
df.Number_of_rooms.value_counts()

Number_of_rooms
2.0        1773
1.0        1070
3.0        1003
4.0         333
unknown     242
5.0         105
6.0          30
7.0          14
9.0           5
10.0          4
Name: count, dtype: int64

In [31]:
df.Square_meters.value_counts()

Square_meters
70.0     190
60.0     169
80.0     166
90.0     155
65.0     155
        ... 
421.0      1
611.0      1
382.0      1
195.0      1
147.0      1
Name: count, Length: 303, dtype: int64

In [32]:
df.Person.value_counts()

Person
Profesional    4280
Particular      299
Name: count, dtype: int64

In [33]:
df.Exterior_interior.value_counts()

Exterior_interior
exterior    3849
unknown      390
interior     340
Name: count, dtype: int64

In [34]:
df.Garage.value_counts()

Garage
No garage          3951
Garage included     445
Garage optional     183
Name: count, dtype: int64

In [35]:
#Splitting the neighbourhood into "Neighbourhood" and "City" columns
df[['Neighbourhood', 'City']] = df['Neighbourhood'].str.split(',', expand=True)

In [36]:
df.Neighbourhood.value_counts()

Neighbourhood
La Dreta de l'Eixample                    633
Sant Pere - Santa Caterina i la Ribera    351
El Raval                                  328
El Gòtic                                  286
Eixample                                  222
                                         ... 
La Vall d'Hebron - La Clota                 3
Horta                                       3
Porta                                       3
La Font d'En Fargues                        2
Les Roquetes                                2
Name: count, Length: 67, dtype: int64

In [37]:
df.City.value_counts()

City
 Barcelona    4579
Name: count, dtype: int64

In [38]:
df.head()

Unnamed: 0,Title,Price_EUR_per_month,Neighbourhood,Person,Number_of_rooms,Square_meters,Exterior_interior,Floor,Lift,Garage,City
0,Alquiler de Piso en calle de Morales,1400.0,Les Corts,Profesional,1.0,50.0,exterior,Bajo,0.0,No garage,Barcelona
1,Alquiler de Piso en calle de Loreto,3000.0,Les Corts,Profesional,2.0,254.0,exterior,7.0,1.0,No garage,Barcelona
2,Alquiler de Piso en calle de la Riera Blanca,1500.0,La Maternitat i Sant Ramon,Profesional,2.0,71.0,exterior,4.0,1.0,No garage,Barcelona
3,Alquiler de Piso en Passatge d'Albert Pinyol,1250.0,Les Corts,Profesional,1.0,45.0,exterior,1.0,1.0,No garage,Barcelona
4,Alquiler de Ático en calle de l'Alfambra,1795.0,Pedralbes,Profesional,2.0,70.0,exterior,6.0,1.0,No garage,Barcelona


In [39]:
df.head()

Unnamed: 0,Title,Price_EUR_per_month,Neighbourhood,Person,Number_of_rooms,Square_meters,Exterior_interior,Floor,Lift,Garage,City
0,Alquiler de Piso en calle de Morales,1400.0,Les Corts,Profesional,1.0,50.0,exterior,Bajo,0.0,No garage,Barcelona
1,Alquiler de Piso en calle de Loreto,3000.0,Les Corts,Profesional,2.0,254.0,exterior,7.0,1.0,No garage,Barcelona
2,Alquiler de Piso en calle de la Riera Blanca,1500.0,La Maternitat i Sant Ramon,Profesional,2.0,71.0,exterior,4.0,1.0,No garage,Barcelona
3,Alquiler de Piso en Passatge d'Albert Pinyol,1250.0,Les Corts,Profesional,1.0,45.0,exterior,1.0,1.0,No garage,Barcelona
4,Alquiler de Ático en calle de l'Alfambra,1795.0,Pedralbes,Profesional,2.0,70.0,exterior,6.0,1.0,No garage,Barcelona


In [40]:
df.to_csv('Property_data_cleaned.csv', index=False)