# PCA Analysis for Apartment and Room Listings
This notebook performs PCA on both datasets using selected numerical variables.

## 1. Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

## 2. Load Datasets

In [None]:
df_apartment = pd.read_csv('/mnt/data/df_apartment_filtered.csv')
df_room = pd.read_csv('/mnt/data/df_room_filtered.csv')

df_apartment.head(), df_room.head()

## 3. Select Numeric Variables for PCA

In [None]:
cols = ['price','minimum_nights','number_of_reviews','reviews_per_month','availability_365']

apt = df_apartment[cols].dropna()
room = df_room[cols].dropna()

apt.head(), room.head()

## 4. Standardize the Data (Required for PCA)

In [None]:
scaler = StandardScaler()

apt_scaled = scaler.fit_transform(apt)
room_scaled = scaler.fit_transform(room)

## 5. Run PCA

In [None]:
pca_apt = PCA()
pca_room = PCA()

apt_components = pca_apt.fit_transform(apt_scaled)
room_components = pca_room.fit_transform(room_scaled)

## 6. Explained Variance (Scree Plots)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(pca_apt.explained_variance_ratio_, marker='o')
plt.title("Apartment PCA – Explained Variance")
plt.xlabel("Principal Component")
plt.ylabel("Explained Variance Ratio")
plt.tight_layout()
plt.show()

plt.figure(figsize=(7,5))
plt.plot(pca_room.explained_variance_ratio_, marker='o')
plt.title("Room PCA – Explained Variance")
plt.xlabel("Principal Component")
plt.ylabel("Explained Variance Ratio")
plt.tight_layout()
plt.show()

## 7. PCA Component Loadings

In [None]:
apt_loadings = pd.DataFrame(pca_apt.components_, columns=cols)
room_loadings = pd.DataFrame(pca_room.components_, columns=cols)

apt_loadings, room_loadings