In [1]:
# Import necessary libraries
import pandas as pd
import os
import sys

# Add the parent directory (utils folder) to the system path to import our custom module
# This ensures that our notebook can find the data_loader.py file.
sys.path.append(os.path.join(os.getcwd(), '..'))

# Import our custom data loading function
from utils.data_loader import load_and_clean_data

# ---
# # 1. Project Introduction and Data Loading
#
# In this notebook, we'll perform an initial exploration of the Airbnb datasets for Berlin, Istanbul, and Munich. We'll use our custom `load_and_clean_data` function to load the data, which also handles basic cleaning and saves a processed version for later use.
# ---

# Load and clean datasets for all three cities
df_berlin = load_and_clean_data('berlin')
df_istanbul = load_and_clean_data('istanbul')
df_munich = load_and_clean_data('munich')

# Create a dictionary for easy access to the dataframes
datasets = {
    'Berlin': df_berlin,
    'Istanbul': df_istanbul,
    'Munich': df_munich
}

# ---
# # 2. Initial Data Inspection
#
# After loading and cleaning the data, it's a good practice to inspect its structure, check for data types, and review a few rows to ensure everything is correct.
# ---

for name, df in datasets.items():
    print(f"--- Data Inspection for {name} ---")
    
    # Display the first 5 rows
    print("Head of the DataFrame:")
    print(df.head())
    
    # Display columns and data types
    print("\nDataFrame Info:")
    df.info()
    
    # Display summary statistics for numerical columns
    print("\nSummary Statistics:")
    print(df.describe())
    
    print("\n" + "="*50 + "\n")

Loading and cleaning raw data for Berlin...
Cleaned data for Berlin saved to: C:\Users\sonic\Documents\GitHub\Airbnb_Price_Analysis\data\berlin\processed\berlin_cleaned.csv
Loading and cleaning raw data for Istanbul...
Cleaned data for Istanbul saved to: C:\Users\sonic\Documents\GitHub\Airbnb_Price_Analysis\data\istanbul\processed\istanbul_cleaned.csv
Loading and cleaning raw data for Munich...
Cleaned data for Munich saved to: C:\Users\sonic\Documents\GitHub\Airbnb_Price_Analysis\data\munich\processed\munich_cleaned.csv
--- Data Inspection for Berlin ---
Head of the DataFrame:
      id  host_id  host_since           host_location  host_response_time  \
0   3176     3718  2008-10-19     Coledale, Australia  within a few hours   
1   9991    33852  2009-08-25         Berlin, Germany        within a day   
2  14325    55531  2009-11-18         Berlin, Germany        within a day   
4  17904    68997  2010-01-08  Rio de Janeiro, Brazil      within an hour   
6  22438    86159  2010-02-27 