In [208]:
# Import necessary libraries

import pandas as pd
import geopandas as gpd
import zipfile
import numpy as np
# Import the regular expressions module to work with text pattern matching and manipulation
import re


In [209]:
# Create a data frame from listings.csv.zip and neighbourhoods.geojson

with zipfile.ZipFile("../sources/listings.csv.zip", 'r') as zip_ref:
    df = pd.read_csv(zip_ref.open('listings.csv'))

gdf = gpd.read_file("../sources/neighbourhoods.geojson")

In [210]:
# Check df

df.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,3176,https://www.airbnb.com/rooms/3176,20250620182343,2025-06-21,city scrape,Fabulous Flat in great Location,This beautiful first floor apartment is situa...,The neighbourhood is famous for its variety of...,https://a0.muscache.com/pictures/airflow/Hosti...,3718,...,4.7,4.92,4.61,,f,1,1,0,0,0.76
1,9991,https://www.airbnb.com/rooms/9991,20250620182343,2025-06-21,city scrape,Geourgeous flat - outstanding views,4 bedroom with very large windows and outstand...,Prenzlauer Berg is an amazing neighbourhood wh...,https://a0.muscache.com/pictures/42799131/59c8...,33852,...,5.0,4.86,4.86,03/Z/RA/003410-18,f,1,1,0,0,0.06
2,14325,https://www.airbnb.com/rooms/14325,20250620182343,2025-06-21,city scrape,Studio Apartment in Prenzlauer Berg,The apartment is located on the upper second f...,,https://a0.muscache.com/pictures/508703/24988a...,55531,...,4.85,4.6,4.45,,f,4,4,0,0,0.14
3,16644,https://www.airbnb.com/rooms/16644,20250620182343,2025-06-21,previous scrape,In the Heart of Berlin - Kreuzberg,Light and sunny 2-Room-turn of the century-fla...,Our Part of Kreuzberg is just the best. Good v...,https://a0.muscache.com/pictures/73759174/e2ef...,64696,...,4.86,4.67,4.71,,f,2,2,0,0,0.26
4,17904,https://www.airbnb.com/rooms/17904,20250620182343,2025-06-21,city scrape,Beautiful Kreuzberg studio - 3 months minimum,"- apt is available starting September 1, 2024<...","The apartment is located in Kreuzberg, which i...",https://a0.muscache.com/pictures/d9a6f8be-54b9...,68997,...,4.92,4.88,4.65,,f,1,1,0,0,1.6


In [211]:
# Check columns

df.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
       'description', 'neighborhood_overview', 'picture_url', 'host_id',
       'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
       'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'ca

In [212]:

# Dropping unnecessary columns that are either metadata, URLs, images, verbose text, or too granular for analysis

columns_to_drop = [
    'listing_url',
    'scrape_id',
    'last_scraped',
    'source',
    'name',
    'description',
    'neighborhood_overview',
    'picture_url',
    'host_listings_count',
    'host_total_listings_count',
    'host_url',
    'host_name',
    'host_since',
    'host_location',
    'host_about',
    'host_response_time',
    'host_response_rate',
    'host_acceptance_rate',
    'host_is_superhost',
    'host_thumbnail_url',
    'host_picture_url',
    'host_neighbourhood',
    'host_verifications',
    'host_has_profile_pic',
    'host_identity_verified',
    'host_location',
    'neighbourhood',            # Has only 'Berlin, Germany', not actual neighborhood
    'bathrooms',                # Keep `bathrooms_text` instead, because `bathrooms` has many missing values and `bathrooms_text` not, plus it has info if a bathroom is shared or not
    'calendar_updated',
    'calendar_last_scraped',
    'availability_eoy',  
    'number_of_reviews_ly', 
    'number_of_reviews_ltm',
    'number_of_reviews_l30d',
    'estimated_occupancy_l365d',
    'estimated_revenue_l365d',
    'first_review',
    'last_review',
    'license',
    'minimum_minimum_nights',
    'maximum_minimum_nights',
    'minimum_maximum_nights',
    'maximum_maximum_nights',
    'minimum_nights_avg_ntm',
    'maximum_nights_avg_ntm',
    'has_availability',
    'availability_30',
    'availability_60',
    'availability_90',
    'availability_365',
    'instant_bookable',
    'calculated_host_listings_count',
    'calculated_host_listings_count_entire_homes',
    'calculated_host_listings_count_private_rooms',
    'calculated_host_listings_count_shared_rooms'
]

In [213]:
# Apply the column drop to the DataFrame

df.drop(columns=columns_to_drop, inplace=True)

In [214]:
# Check df

df.head()

Unnamed: 0,id,host_id,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,...,maximum_nights,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
0,3176,3718,Prenzlauer Berg Südwest,Pankow,52.53471,13.4181,Entire rental unit,Entire home/apt,2,1 bath,...,730,149,4.63,4.67,4.52,4.65,4.7,4.92,4.61,0.76
1,9991,33852,Prenzlauer Berg Südwest,Pankow,52.53269,13.41805,Entire rental unit,Entire home/apt,7,2.5 baths,...,14,7,5.0,5.0,5.0,5.0,5.0,4.86,4.86,0.06
2,14325,55531,Prenzlauer Berg Nordwest,Pankow,52.54813,13.40366,Entire rental unit,Entire home/apt,1,1 bath,...,1125,26,4.68,5.0,4.85,4.7,4.85,4.6,4.45,0.14
3,16644,64696,nördliche Luisenstadt,Friedrichshain-Kreuzberg,52.50312,13.43508,Entire condo,Entire home/apt,4,1 bath,...,365,48,4.72,4.86,4.86,4.93,4.86,4.67,4.71,0.26
4,17904,68997,Reuterstraße,Neukölln,52.49419,13.42166,Entire rental unit,Entire home/apt,2,1 bath,...,365,298,4.77,4.82,4.71,4.89,4.92,4.88,4.65,1.6


In [215]:
# Check df shape

df.shape


(14187, 25)

In [216]:
# Check df columns

df.columns

Index(['id', 'host_id', 'neighbourhood_cleansed',
       'neighbourhood_group_cleansed', 'latitude', 'longitude',
       'property_type', 'room_type', 'accommodates', 'bathrooms_text',
       'bedrooms', 'beds', 'amenities', 'price', 'minimum_nights',
       'maximum_nights', 'number_of_reviews', 'review_scores_rating',
       'review_scores_accuracy', 'review_scores_cleanliness',
       'review_scores_checkin', 'review_scores_communication',
       'review_scores_location', 'review_scores_value', 'reviews_per_month'],
      dtype='object')

In [217]:
# Check the number of null values in each column

df.isnull().sum()


id                                 0
host_id                            0
neighbourhood_cleansed             0
neighbourhood_group_cleansed       0
latitude                           0
longitude                          0
property_type                      0
room_type                          0
accommodates                       0
bathrooms_text                     7
bedrooms                        2023
beds                            5003
amenities                          0
price                           5004
minimum_nights                     0
maximum_nights                     0
number_of_reviews                  0
review_scores_rating            3349
review_scores_accuracy          3351
review_scores_cleanliness       3349
review_scores_checkin           3352
review_scores_communication     3350
review_scores_location          3352
review_scores_value             3354
reviews_per_month               3349
dtype: int64

In [218]:
# Get an overview of the dataset

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14187 entries, 0 to 14186
Data columns (total 25 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            14187 non-null  int64  
 1   host_id                       14187 non-null  int64  
 2   neighbourhood_cleansed        14187 non-null  object 
 3   neighbourhood_group_cleansed  14187 non-null  object 
 4   latitude                      14187 non-null  float64
 5   longitude                     14187 non-null  float64
 6   property_type                 14187 non-null  object 
 7   room_type                     14187 non-null  object 
 8   accommodates                  14187 non-null  int64  
 9   bathrooms_text                14180 non-null  object 
 10  bedrooms                      12164 non-null  float64
 11  beds                          9184 non-null   float64
 12  amenities                     14187 non-null  object 
 13  p

In [219]:
# Remove $ and commas in `price` column, then convert to float

df['price'] = df['price'].replace(r'[\$,]', '', regex=True).astype(float).round(2)


In [220]:
# Check data types of each column

df.dtypes


id                                int64
host_id                           int64
neighbourhood_cleansed           object
neighbourhood_group_cleansed     object
latitude                        float64
longitude                       float64
property_type                    object
room_type                        object
accommodates                      int64
bathrooms_text                   object
bedrooms                        float64
beds                            float64
amenities                        object
price                           float64
minimum_nights                    int64
maximum_nights                    int64
number_of_reviews                 int64
review_scores_rating            float64
review_scores_accuracy          float64
review_scores_cleanliness       float64
review_scores_checkin           float64
review_scores_communication     float64
review_scores_location          float64
review_scores_value             float64
reviews_per_month               float64


In [221]:
# object columns to clean

cat_cols = df.select_dtypes(include='object').columns
print(cat_cols)


Index(['neighbourhood_cleansed', 'neighbourhood_group_cleansed',
       'property_type', 'room_type', 'bathrooms_text', 'amenities'],
      dtype='object')


In [222]:
# Check for leading/trailing spaces and inconsistent capitalization

for col in cat_cols:
    # Count entries with leading/trailing spaces
    n_spaces = (df[col].str.strip() != df[col]).sum()
    
    # Check capitalization inconsistency by comparing unique counts
    unique_lower = df[col].str.lower().nunique()
    unique_original = df[col].nunique()
    
    if n_spaces > 0:
        print(f"Column '{col}' has {n_spaces} entries with leading/trailing spaces")
    if unique_lower != unique_original:
        print(f"Column '{col}' has inconsistent capitalization")


Column 'bathrooms_text' has 7 entries with leading/trailing spaces


In [223]:
for col in cat_cols:
    df[col] = df[col].astype(str).str.strip()   # Remove leading/trailing spaces

In [224]:
# Change object type to string

# Select columns with object type

object_cols = df.select_dtypes(include=['object']).columns.tolist()

# Change to string

df[object_cols] = df[object_cols].astype('string')

In [225]:
df['neighbourhood_group_cleansed'].unique()


<StringArray>
[                  'Pankow', 'Friedrichshain-Kreuzberg',
                 'Neukölln',                    'Mitte',
     'Charlottenburg-Wilm.',   'Tempelhof - Schöneberg',
              'Lichtenberg',       'Treptow - Köpenick',
    'Steglitz - Zehlendorf',                  'Spandau',
            'Reinickendorf',    'Marzahn - Hellersdorf']
Length: 12, dtype: string

In [226]:
# Define your mapping (match keys exactly as they appear in your data)
name_map = {
    'Mitte': 'mitte',
    'Friedrichshain-Kreuzberg': 'friedrichshain-kreuzberg',
    'Pankow': 'pankow',
    'Charlottenburg-Wilm.': 'charlottenburg-wilmersdorf',
    'Spandau': 'spandau',
    'Steglitz - Zehlendorf': 'steglitz-zehlendorf',
    'Tempelhof - Schöneberg': 'tempelhof-schöneberg',
    'Neukölln': 'neukölln',
    'Treptow - Köpenick': 'treptow-köpenick',
    'Marzahn - Hellersdorf': 'marzahn-hellersdorf',
    'Lichtenberg': 'lichtenberg',
    'Reinickendorf': 'reinickendorf'
}

In [227]:
# Apply the mapping
df['neighbourhood_group_cleansed'] = df['neighbourhood_group_cleansed'].replace(name_map)

In [228]:
print(df['neighbourhood_group_cleansed'].unique())

<StringArray>
[                    'pankow',   'friedrichshain-kreuzberg',
                   'neukölln',                      'mitte',
 'charlottenburg-wilmersdorf',       'tempelhof-schöneberg',
                'lichtenberg',           'treptow-köpenick',
        'steglitz-zehlendorf',                    'spandau',
              'reinickendorf',        'marzahn-hellersdorf']
Length: 12, dtype: string


In [229]:
# Rename the column 'neighbourhood_group_cleansed' to 'district' for better readability and consistency

df.rename(columns={'neighbourhood_group_cleansed': 'district'}, inplace=True)

In [230]:
# Rename the column 'neighbourhood_cleansed' to 'neighborhood' for better readability and consistency

df.rename(columns={'neighbourhood_cleansed': 'neighborhood'}, inplace=True)

In [231]:
df.head()

Unnamed: 0,id,host_id,neighborhood,district,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,...,maximum_nights,number_of_reviews,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
0,3176,3718,Prenzlauer Berg Südwest,pankow,52.53471,13.4181,Entire rental unit,Entire home/apt,2,1 bath,...,730,149,4.63,4.67,4.52,4.65,4.7,4.92,4.61,0.76
1,9991,33852,Prenzlauer Berg Südwest,pankow,52.53269,13.41805,Entire rental unit,Entire home/apt,7,2.5 baths,...,14,7,5.0,5.0,5.0,5.0,5.0,4.86,4.86,0.06
2,14325,55531,Prenzlauer Berg Nordwest,pankow,52.54813,13.40366,Entire rental unit,Entire home/apt,1,1 bath,...,1125,26,4.68,5.0,4.85,4.7,4.85,4.6,4.45,0.14
3,16644,64696,nördliche Luisenstadt,friedrichshain-kreuzberg,52.50312,13.43508,Entire condo,Entire home/apt,4,1 bath,...,365,48,4.72,4.86,4.86,4.93,4.86,4.67,4.71,0.26
4,17904,68997,Reuterstraße,neukölln,52.49419,13.42166,Entire rental unit,Entire home/apt,2,1 bath,...,365,298,4.77,4.82,4.71,4.89,4.92,4.88,4.65,1.6


In [232]:
# Count duplicates

num_duplicates = df.duplicated().sum()
print(f"Number of duplicate rows: {num_duplicates}")

Number of duplicate rows: 0


In [233]:
if df['id'].is_unique:
    print("ID column is unique")
else:
    print("ID column has duplicates")


ID column is unique


In [234]:
df['bathrooms_text'].unique()

<StringArray>
[           '1 bath',         '2.5 baths',           '2 baths',
     '1 shared bath',  '1.5 shared baths',         '1.5 baths',
    '1 private bath',    '0 shared baths',    '2 shared baths',
           '0 baths',               'nan',  'Shared half-bath',
           '3 baths',         '4.5 baths',         'Half-bath',
 'Private half-bath',           '8 baths',         '3.5 baths',
  '2.5 shared baths',           '5 baths',         '8.5 baths',
    '4 shared baths',    '3 shared baths',           '4 baths',
  '8.5 shared baths',           '6 baths',          '10 baths',
          '15 baths',    '8 shared baths']
Length: 29, dtype: string

In [235]:
# Separate `bathrooms_text` into `bathrooms` and `is_shared`
 
# Extract the number (float), preserving NaNs

df['bathrooms'] = df['bathrooms_text'].str.extract(r'(\d+\.?\d*)')
df['bathrooms'] = df['bathrooms'].astype(float)

# Fill NaNs in `bathrooms` where "half" is mentioned in the text

mask_half_bath = (
    df['bathrooms'].isna() &
    df['bathrooms_text'].fillna('').str.contains('half', case=False)
)
df.loc[mask_half_bath, 'bathrooms'] = 0.5

# Determine if the bathroom is shared

df['is_shared'] = df['bathrooms_text'].apply(
    lambda x: 1 if isinstance(x, str) and 'shared' in x.lower()
    else 0 if isinstance(x, str)
    else pd.NA
).astype('Int64')

In [236]:
# Drop `bathrooms_text` column

df = df.drop(columns=['bathrooms_text'])

In [237]:
# Change gdf neighbourhood column to neighborhood for joining two dfs

gdf = gdf.rename(columns={'neighbourhood': 'neighborhood'})

In [238]:
# Join df with gdf

gdf_subset = gdf[['neighborhood', 'geometry']]
df = df.merge(gdf_subset, on='neighborhood', how='left')

In [239]:
df.head()

Unnamed: 0,id,host_id,neighborhood,district,latitude,longitude,property_type,room_type,accommodates,bedrooms,...,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month,bathrooms,is_shared,geometry
0,3176,3718,Prenzlauer Berg Südwest,pankow,52.53471,13.4181,Entire rental unit,Entire home/apt,2,1.0,...,4.67,4.52,4.65,4.7,4.92,4.61,0.76,1.0,0,"MULTIPOLYGON (((13.41253 52.54089, 13.41409 52..."
1,9991,33852,Prenzlauer Berg Südwest,pankow,52.53269,13.41805,Entire rental unit,Entire home/apt,7,4.0,...,5.0,5.0,5.0,5.0,4.86,4.86,0.06,2.5,0,"MULTIPOLYGON (((13.41253 52.54089, 13.41409 52..."
2,14325,55531,Prenzlauer Berg Nordwest,pankow,52.54813,13.40366,Entire rental unit,Entire home/apt,1,0.0,...,5.0,4.85,4.7,4.85,4.6,4.45,0.14,1.0,0,"MULTIPOLYGON (((13.40354 52.5402, 13.40339 52...."
3,16644,64696,nördliche Luisenstadt,friedrichshain-kreuzberg,52.50312,13.43508,Entire condo,Entire home/apt,4,1.0,...,4.86,4.86,4.93,4.86,4.67,4.71,0.26,1.0,0,"MULTIPOLYGON (((13.4443 52.50066, 13.44266 52...."
4,17904,68997,Reuterstraße,neukölln,52.49419,13.42166,Entire rental unit,Entire home/apt,2,0.0,...,4.82,4.71,4.89,4.92,4.88,4.65,1.6,1.0,0,"MULTIPOLYGON (((13.43515 52.48076, 13.43492 52..."


In [240]:
# Add `district_id` column:

district_map = {
    "mitte": "01",
    "friedrichshain-kreuzberg": "02",
    "pankow": "03",
    "charlottenburg-wilmersdorf": "04",
    "spandau": "05",
    "steglitz-zehlendorf": "06",
    "tempelhof-schöneberg": "07",
    "neukölln": "08",
    "treptow-köpenick": "09",
    "marzahn-hellersdorf": "10",
    "lichtenberg": "11",
    "reinickendorf": "12"
}

df["district_id"] = df["district"].map(district_map).astype(str)

In [241]:
# Save to csv (if needed):

# df.to_csv('listings_clean.csv')

In [242]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14187 entries, 0 to 14186
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype   
---  ------                       --------------  -----   
 0   id                           14187 non-null  int64   
 1   host_id                      14187 non-null  int64   
 2   neighborhood                 14187 non-null  object  
 3   district                     14187 non-null  string  
 4   latitude                     14187 non-null  float64 
 5   longitude                    14187 non-null  float64 
 6   property_type                14187 non-null  string  
 7   room_type                    14187 non-null  string  
 8   accommodates                 14187 non-null  int64   
 9   bedrooms                     12164 non-null  float64 
 10  beds                         9184 non-null   float64 
 11  amenities                    14187 non-null  string  
 12  price                        9183 non-null   float64 
 13  m

In [243]:
# Creating table and populating it
# Import necessary libraries

import psycopg2
from sqlalchemy import create_engine, text
import warnings

warnings.filterwarnings("ignore")

In [244]:
# Create connection and engine to establish a connection

with open("../../../_db_login/layered_db_url.txt", "r") as file:
    DATABASE_URL = file.read().strip()

engine = create_engine(DATABASE_URL)

In [245]:
# Set search path to avoid PostGIS issues during table creation/upload

with engine.connect() as conn:
    conn.execute(text('''SET search_path TO public, berlin_data;
'''))
    conn.commit()

In [246]:
# Create empty short_time_listings table with constraints

with engine.connect() as conn:
    conn.execute(text('''
    CREATE TABLE IF NOT EXISTS berlin_data.short_time_listings (
        district_id VARCHAR(2) NOT NULL,
        district VARCHAR(32) NOT NULL,
        id BIGINT NOT NULL PRIMARY KEY,
        host_id BIGINT,
        neighborhood VARCHAR(50) NOT NULL,
        latitude DECIMAL(9, 6) CHECK (latitude BETWEEN -90 AND 90),
        longitude DECIMAL(9, 6) CHECK (longitude BETWEEN -180 AND 180),
        property_type VARCHAR(50),
        room_type VARCHAR(50),
        accommodates INT,
        bedrooms DECIMAL(5, 1),
        beds DECIMAL(5, 1),
        bathrooms DECIMAL(5, 1),
        is_shared SMALLINT,
        amenities TEXT,
        price DECIMAL(10, 2),
        minimum_nights INT,
        maximum_nights INT,
        number_of_reviews INT,
        review_scores_rating DECIMAL (5, 2),
        review_scores_accuracy DECIMAL (5, 2),
        review_scores_cleanliness DECIMAL (5, 2),
        review_scores_checkin DECIMAL (5, 2),
        review_scores_communication DECIMAL (5, 2),
        review_scores_location DECIMAL (5, 2),
        review_scores_value DECIMAL (5, 2),
        reviews_per_month DECIMAL (5, 2),
        geometry GEOMETRY(MultiPolygon, 4326),
        CONSTRAINT district_id_fk
            FOREIGN KEY (district_id)
            REFERENCES berlin_data.districts(district_id)
            ON DELETE RESTRICT
            ON UPDATE CASCADE
    )
    '''))
    conn.commit()

In [247]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14187 entries, 0 to 14186
Data columns (total 28 columns):
 #   Column                       Non-Null Count  Dtype   
---  ------                       --------------  -----   
 0   id                           14187 non-null  int64   
 1   host_id                      14187 non-null  int64   
 2   neighborhood                 14187 non-null  object  
 3   district                     14187 non-null  string  
 4   latitude                     14187 non-null  float64 
 5   longitude                    14187 non-null  float64 
 6   property_type                14187 non-null  string  
 7   room_type                    14187 non-null  string  
 8   accommodates                 14187 non-null  int64   
 9   bedrooms                     12164 non-null  float64 
 10  beds                         9184 non-null   float64 
 11  amenities                    14187 non-null  string  
 12  price                        9183 non-null   float64 
 13  m

In [248]:
# Convert DataFrame to GeoDataFrame to handle geometry (for PostGIS upload)

if 'geometry' in df.columns:
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")
else:
    # If you only have lat/lon, create geometry column:
    from shapely.geometry import Point
    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df.longitude, df.latitude),
        crs="EPSG:4326"
    )

In [249]:
# Use to_postgis (not to_sql) to preserve geometry type during upload

from geoalchemy2 import Geometry

gdf.to_postgis(
    name='short_time_listings',
    con=engine,
    schema='berlin_data',
    if_exists='append',
    index=False
)