In [1]:
pip install streamlit

Collecting protobuf<6,>=3.20 (from streamlit)
  Downloading protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl.metadata (592 bytes)
Downloading protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl (418 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.32.0
    Uninstalling protobuf-6.32.0:
      Successfully uninstalled protobuf-6.32.0
Successfully installed protobuf-5.29.5
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import joblib

# Load dataset
df = pd.read_csv("listings.csv")

# Clean the price column
df['price'] = df['price'].replace('[\$,]', '', regex=True)
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df = df.dropna(subset=['price'])

# Optional: filter out price outliers
df = df[df['price'] < 500]

# Select features and target
features = ['room_type', 'neighbourhood_cleansed', 'minimum_nights', 'availability_365']
df_model = df[features + ['price']].dropna()

X = df_model[features]
y = df_model['price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Categorical and numerical features
categorical = ['room_type', 'neighbourhood_cleansed']
numerical = ['minimum_nights', 'availability_365']

# Preprocessing and pipeline
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
], remainder='passthrough')

pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('model', LinearRegression())
])

# Train the model
pipeline.fit(X_train, y_train)

#  Now save it
joblib.dump(pipeline, "airbnb_price_model.pkl")

  df['price'] = df['price'].replace('[\$,]', '', regex=True)


['airbnb_price_model.pkl']

In [3]:
import streamlit as st
import pandas as pd
import joblib

# Load the trained pipeline
model = joblib.load("airbnb_price_model.pkl")

# App title
st.title("Paris Airbnb Price Predictor 💶")
st.markdown("Enter listing details to estimate the nightly price.")

# User input
room_type = st.selectbox("Room Type", ['Entire home/apt', 'Private room', 'Shared room', 'Hotel room'])

# You can update these with actual values from your dataset if desired
neighbourhood = st.selectbox("Neighbourhood", [
    'Buttes-Montmartre', 'Louvre', 'Popincourt', 'Batignolles-Monceau',
    'Vaugirard', 'Panthéon', 'Temple', 'Palais-Bourbon', 'Élysée',
    'Observatoire', 'Bourse', 'Passy', 'Gobelins', 'Reuilly', 'Opéra'
])

minimum_nights = st.number_input("Minimum Nights", min_value=1, max_value=365, value=3)
availability_365 = st.slider("Availability (days/year)", 0, 365, 200)

# Predict button
if st.button("Predict Price"):
    input_df = pd.DataFrame([{
        'room_type': room_type,
        'neighbourhood_cleansed': neighbourhood,
        'minimum_nights': minimum_nights,
        'availability_365': availability_365
    }])

    predicted_price = model.predict(input_df)[0]
    st.success(f"💰 Estimated Price: €{predicted_price:.2f} per night")

2025-08-24 13:33:32.074 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
2025-08-24 13:33:32.074 Session state does not function when running a script without `streamlit run`


In [4]:
import joblib

# Save the trained pipeline model
joblib.dump(pipeline, "airbnb_price_model.pkl")


['airbnb_price_model.pkl']

In [5]:
joblib.dump(pipeline, "airbnb_price_model.pkl")

['airbnb_price_model.pkl']

In [6]:
import os
print(os.path.isfile("airbnb_price_model.pkl"))  # Should return True


True


In [7]:
joblib.dump(pipeline, "airbnb_price_model.pkl")

['airbnb_price_model.pkl']