In [1]:
from housing_pricer.scraping.utilities.data_manager import DataManager
from housing_pricer.data_processing.data_processing_utils import format_json_to_dataframe
import pandas as pd
import plotly.express as px
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
import os

def lookup_raw_listing(data_manager: DataManager, listing_id: str) -> dict:
    for entry in data_manager.load_data():
        if entry["id"] == listing_id:
            return entry
    raise RuntimeError("Missing entry")

pd.set_option('display.max_columns', None)
RAW_DATA_STORAGE = "../scraping/data_storage"
LISTINGS_DF_PATH = "listings_dataframe.pickle"
if os.path.exists(LISTINGS_DF_PATH):
    listings = pd.read_pickle(LISTINGS_DF_PATH)
else:
    data = DataManager(base_dir=RAW_DATA_STORAGE, data_filename="scraped_data").load_data()
    listings = format_json_to_dataframe(data)
    listings.to_pickle(LISTINGS_DF_PATH)

Processing scraped JSON content to dataframe...: 968638it [00:32, 29738.10it/s]


In [4]:
def get_sold_property_details(entry):
    property_details_key_prefixes = (
        "SoldProperty:",
        "Listing:",
        "ResidenceWithSoldProperty:",
        "Residence:",
    )
    for data_keys, property_details in entry["data"].items():
        if data_keys.startswith(property_details_key_prefixes):
            return property_details


listing = lookup_raw_listing(DataManager(base_dir=RAW_DATA_STORAGE) ,"bostad/93631")

In [7]:
property_details = get_sold_property_details(listing)
property_details

{'__typename': 'SoldProperty',
 'residenceId': '93631',
 'id': '5137876',
 'breadcrumbs': [{'__typename': 'BreadcrumbItem',
   'label': 'Västra Götalands län',
   'url': '/sok/slutpriser?areaIds=23&objectType=Lägenhet'},
  {'__typename': 'BreadcrumbItem',
   'label': 'Skövde kommun',
   'url': '/sok/slutpriser?areaIds=401&objectType=Lägenhet'},
  {'__typename': 'BreadcrumbItem',
   'label': 'Havstenavägen',
   'url': '/sok/slutpriser?areaIds=90339&objectType=Lägenhet'},
  {'__typename': 'BreadcrumbItem',
   'label': 'Riksbyggen BRF Skövdehus nr 12',
   'url': '/bostadsrattsforening/56469'}],
 'breadcrumbsJsonLd': '{"@context":"http://schema.org","@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Västra Götalands län","item":"https://www.booli.se/sok/slutpriser?areaIds=23&objectType=Lägenhet"},{"@type":"ListItem","position":2,"name":"Skövde kommun","item":"https://www.booli.se/sok/slutpriser?areaIds=401&objectType=Lägenhet"},{"@type":"ListItem","positio

In [2]:
listings

Unnamed: 0,url_listing_type,url_listing_id,market_status,booli_id,sold_date,days_listed,residence_type,address,tenure_form,apartment_number,urban_area,municipality,living_area,construction_year,list_price,sold_price,sold_price_type,first_price,booli_valuation,booli_valuation_lb,booli_valuation_ub,monthly_payment,rent,operating_cost,energy_class,floor,building_floors,latitude,longitude,has_solar_panels,agency_id,agent_id,booli_ids_of_previous_sales,n_previous_sales
0,bostad,93631,Slutpris,5137876,2023-12-18,6.0,Lägenhet,Havstenavägen 9A,,1202,Havstena,Skövde,62.0,1964.0,895000.0,750000.0,Slutpris,895 000,,,,,3692.0,,E,3,,58.404068,13.845960,,20,11905,[5137876],1
1,annons,5174850,Slutpris,5174850,2023-12-18,18.0,Lägenhet,Rörstrandsgatan 37,,1102,Vasastan,Stockholm,66.0,1929.0,6850000.0,7250000.0,Slutpris,6 850 000,,,,,1774.0,483.0,,1,,59.340639,18.028567,,840,5762,[5174850],1
2,annons,5172906,Slutpris,5172906,2023-12-18,27.0,Lägenhet,Sicklastråket 15,,,Sickla,Nacka,35.0,2020.0,,3132500.0,Slutpris,,,,,,2501.0,,,,,59.305984,18.117877,,26,12358,[5172906],1
3,bostad,720112,Slutpris,5180749,2023-12-18,16.0,Lägenhet,Sandhamnsgatan 21,,1201,Gärdet,Stockholm,41.0,1943.0,3350000.0,3450000.0,Slutpris,3 350 000,,,,,1485.0,617.0,F,2,,59.345380,18.109405,,947,9277,"[5180749, 3723302]",2
4,bostad,641258,Slutpris,5182218,2023-12-18,10.0,Lägenhet,Kransbindarvägen 12,,1102,Midsommarkransen,Stockholm,50.0,1939.0,3150000.0,3575000.0,Slutpris,3 150 000,,,,,3059.0,350.0,F,2,,59.302136,18.002433,,840,13737,[5182218],1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968633,bostad,1585253,Ej på marknaden,1585253,,,Kedjehus,Frösögatan 92,,,Helsingborgs kommun,,82.0,1990.0,,,,,2820000.0,2560000.0,3080000.0,,,,,,,55.965969,12.778081,,,,"[-9560136, -7800535, 1583813, 138797]",4
968634,bostad,2261395,Ej på marknaden,2261395,,,Radhus,Carl Thunbergs väg 27,,,Jönköpings kommun,,116.0,1974.0,,,,,4460000.0,3940000.0,4970000.0,,,2383.0,E,,,57.802823,14.146297,,,,[3063349],1
968635,bostad,3767451,Ej på marknaden,3767451,,,Villa,Björkön 1:152,,,Sundsvalls kommun,,88.0,1990.0,,,,,5320000.0,4480000.0,6150000.0,,,,,,,62.224093,17.569335,,,,[-9548700],1
968636,bostad,3139535,Ej på marknaden,3139535,,,Villa,Spänstvägen 38,,,Hisingen,,151.0,2020.0,,,,,6860000.0,6390000.0,7340000.0,,,,,,,57.770553,11.847665,,,,"[-9544131, -9402929]",2
