# Bolig data from Dataforsyningen

## DAGI

Pandas indstillinger

In [21]:
import pandas as pd
# Set pandas options to display all columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
# Set pandas option to display the full content of each column
pd.set_option('display.max_colwidth', None)



Kommune koder i Danmark

In [31]:
# File path to csv file
path = r"C:\Users\viet-intel\boligpriser\data\input\kommunekoder.csv"

# Load data from csv into datafram
column_names = ['navn', 'kommunekode']
kommune_df = pd.read_csv(path, sep=',', encoding='latin-1', dtype=str, header=None, names=column_names, index_col=False)

# Display the first 5 rows of the dataframe
display(kommune_df.head(3))
display(kommune_df.tail(3))

Unnamed: 0,navn,kommunekode
0,Albertslund,165
1,Allerød,201
2,Assens,420
3,Ballerup,151
4,Billund,530


Unnamed: 0,navn,kommunekode
93,Vordingborg,390
94,Ærø,492
95,Aabenraa,580
96,Aalborg,851
97,Århus,751


Udvægelse af kommune koder i region hovedstaden

In [32]:
# List of kommune names in region hovedstaden
region_hovedstad_list = ["Albertslund", "Allerød", "Ballerup", "Bornholms Regionskommune", "Brøndby", 
                "Dragør", "Egedal", "Fredensborg", "Frederiksberg", "Frederikssund", "Furesø", 
                "Gentofte", "Gladsaxe", "Glostrup", "Gribskov", "Halsnæs", "Helsingør", "Herlev", 
                "Hillerød", "Hvidovre", "Høje-Taastrup", "Hørsholm", "Ishøj", "København", "Lyngby-Taarbæk", 
                "Rudersdal", "Rødovre", "Tårnby", "Vallensbæk"]

# Creating new dataframe with only kommune names in region hovedstaden
region_hovedstad_df = kommune_df[kommune_df['navn'].isin(region_hovedstad_list)]

# Creating a list of kommune codes in region hovedstaden
region_hovedstad_kommunekode_list = region_hovedstad_df['kommunekode'].tolist()

# print(region_hovedstad_kommunekode_list)
print(region_hovedstad_kommunekode_list)

['165', '201', '151', '153', '155', '240', '210', '147', '250', '190', '157', '159', '161', '270', '217', '163', '219', '167', '169', '223', '183', '101', '230', '175', '185', '187']


## Adress collection from Dataforsyningen in one dataframe 

Hent alle adresser ud fra kommunekoder

In [None]:
import requests
import pandas as pd

test_list = ['0165']#, '201', '151', '153', '155', '240', '210', '147', '250', '190', '157', '159', '161', '270', '217', '163', '219', '167', '169', '223', '183', '101', '230', '175', '185', '187']

# Define a function to make an API request to the Danish API for housing data
def housing_data_call(kommunekode):
    # URL to API
    url = f"https://api.dataforsyningen.dk/adresser?kommunekode={kommunekode}&format=csv"

    # Define a session with certificate verification disabled
    session = requests.Session()
    session.verify = False

    try:
        # Make the API request using the session
        response = session.get(url)
        response.raise_for_status()  # Make sure the API call was successful
    except requests.exceptions.RequestException as e:
        print(f"Error while fetching data for kommune {kommunekode}: {e}")
        return None

    # Read the response content as a DataFrame
    load_housing_adresses = pd.read_csv(response.url, low_memory=False, dtype={'col10': 'str'})
    return load_housing_adresses

# Empty list to store the dataframes
data_frame_list = []

# Loop through the kommune codes in the test_list and make an API request for each kommune to fetch the data
for kommune in test_list:
    kommune_data = housing_data_call(kommune)
    if kommune_data is not None:
        data_frame_list.append(kommune_data)

# Concatenate all the dataframes into one dataframe
all_kommune_df = pd.concat(data_frame_list, axis=0, ignore_index=True)

display(all_kommune_df.head())
print("length:", len(all_kommune_df))


### Land (Jordstykker), data collection

In [37]:
# Define a function to make an API request to the Danish API for land data
def land_data_call(kommunekode):
  
    # URL for API
    url = f"https://api.dataforsyningen.dk/jordstykker?kommunekode={kommunekode}&format=csv"

    # Make a GET request to the API and read the response as a DataFrame
    load_land_data = pd.read_csv(url, low_memory=False)
    return load_land_data

# Empty list to store the dataframes
land_data_frame_list = []

# Loop through the kommune codes in region hovedstaden and make an API request for each kommune and fetch the data
for kommune in region_hovedstad_kommunekode_list:
    land_data = land_data_call(kommune)
    land_data_frame_list.append(land_data)
    
# concatenate all the dataframes into one dataframe
all_land_df = pd.concat(land_data_frame_list, axis=0, ignore_index=True)

display(all_land_df.head())
print("Length:", len(all_land_df))


Unnamed: 0,ændret,geo_ændret,geo_version,bbox_xmin,bbox_ymin,bbox_xmax,bbox_ymax,visueltcenter_x,visueltcenter_y,ejerlavkode,matrikelnr,kommunekode,kommunenavn,sognekode,sognenavn,regionskode,regionsnavn,retskredskode,retskredsnavn,udvidet_esrejendomsnr,esrejendomsnr,sfeejendomsnr,bfenummer,ejerlavnavn,featureid,fælleslod,moderjordstykke,registreretareal,arealberegningsmetode,vejareal,vejarealberegningsmetode,vandarealberegningsmetode
0,2023-03-29T21:04:30.316Z,2015-02-27T20:24:00.545Z,1,12.347892,55.673141,12.350759,55.674584,12.348973,55.673985,20551,11am,165,Albertslund,7148,Herstedøster,1084,Region Hovedstaden,0,,1650057000.0,57236.0,1302759,1302759.0,"Herstedvester By, Herstedvester",999291,,,13141,o,3160,b,ukendt
1,2023-03-29T21:04:30.316Z,2015-02-27T20:24:00.545Z,1,12.341121,55.664756,12.342912,55.6657,12.341721,55.665352,20551,12s,165,Albertslund,7155,Herstedvester,1084,Region Hovedstaden,0,,1650059000.0,58771.0,9874836,9874836.0,"Herstedvester By, Herstedvester",2647884,,,2393,o,1182,b,ukendt
2,2023-03-29T21:04:30.316Z,2022-07-13T21:48:52.247Z,4,12.358818,55.65878,12.362671,55.660253,12.359989,55.659484,20551,15ak,165,Albertslund,9134,Opstandelseskirkens,1084,Region Hovedstaden,0,,1650066000.0,66019.0,100095931,100095931.0,"Herstedvester By, Herstedvester",100107370,,,13755,o,7020,b,ukendt
3,2023-05-09T21:46:11.428Z,2023-05-09T21:46:11.428Z,5,12.36063,55.67063,12.365362,55.674651,12.363326,55.673775,20652,10a,165,Albertslund,7148,Herstedøster,1084,Region Hovedstaden,0,,1650036000.0,36042.0,2126175,2126175.0,"Herstedøster By, Herstedøster",1001576,,,50472,k,314,b,incl
4,2023-05-09T21:46:11.428Z,2023-05-09T21:46:11.428Z,2,12.360788,55.671963,12.362603,55.673198,12.361809,55.67265,20652,10dn,165,Albertslund,7148,Herstedøster,1084,Region Hovedstaden,0,,1650066000.0,66024.0,100174931,100174931.0,"Herstedøster By, Herstedøster",100195210,,1001576.0,10584,o,780,b,ukendt


Length: 346347


## BBR.info

Hente BBR info ud fra kommune kode 

In [None]:
import requests
import pandas as pd

# Set the API URL
api_url = "https://services.datafordeler.dk/BBR/BBRPublic/1/rest/enhed"

# Function to fetch data
def fetch_data(kommunekode):
    # Set the parameters
    params = {
        "username": "ANLVUSSNAP",
        "password": "Essvietcc567!",
        "Format": "JSON",
        "Kommunekode": kommunekode,
        "pagesize": 9999999,  # Specify the maximum page size
        "page": 1  # Request the first page
    }

    # Fetch data from the API
    response = requests.get(api_url, params=params)

    # Check the status code and proceed with processing the response
    if response.status_code == 200:
        try:
            data = response.json()
            df = pd.DataFrame(data)
            return df

        except ValueError as e:
            print(f"Failed to decode JSON for kommunekode {kommunekode}.")
            return None

    else:
        print(f"Failed to fetch data for kommunekode {kommunekode} from the API. Status code: {response.status_code}")
        return None

# List of kommunekode values to iterate over
kommunekode_list = ['0165', '0201', '0151', '0153', '0155', '0240', '0210', '0147', '0250', '0190', '0157', '0159', '0161', '0270', '0217', '0163', '0219', '0167', '0169', '0223', '0183', '0101', '0230', '0175', '0185', '0187']


# Initialize an empty list to store the DataFrames
dataframes = []

# Fetch data for each kommunekode
for kommunekode in kommunekode_list:
    df = fetch_data(kommunekode)
    if df is not None:
        dataframes.append(df)

# Concatenate all the dataframes in the list
final_df = pd.concat(dataframes)

# Save the DataFrame to a pickle file
final_df.to_pickle(r"C:\Users\viet-intel\boligpriser\data\output\bbr_enhed.pkl")

# Display the first few rows and the total length of the final DataFrame
display(final_df.head())
print("Length:", len(final_df))


In [6]:
# Load pickle file
bbr_df = pd.read_pickle(r"C:\Users\viet-intel\boligpriser\data\output\bbr_enhed.pkl")

print(len(bbr_df))

1165203


## DAR

DAR information ud fra kommune kode

In [3]:
import os
import requests
import pandas as pd
import time

# Set the API URL
api_url = "https://services.datafordeler.dk/DAR/DAR/2.0.0/rest/adresse"

# Function to fetch data
def fetch_data(kommunekode):
    # Set the parameters
    params = {
        "username": "ANLVUSSNAP",
        "password": "Essvietcc567!",
        "Format": "JSON",
        "Kommunekode": kommunekode,
        "pagesize": 100000,  # Specify a reasonable page size
    }
    
    # Initialize an empty list to store the DataFrames
    dataframes = []
    
    # Initialize the page
    page = 1

    while True:
        # Update the 'page' parameter
        params["page"] = page

        # Fetch data from the API
        response = requests.get(api_url, params=params)
        time.sleep(1)  # Sleep for 1 second to prevent hitting rate limit

        # Check the status code and proceed with processing the response
        if response.status_code == 200:
            try:
                data = response.json()
                if not data:  # Break the loop if the data list is empty
                    break
                df = pd.DataFrame(data)
                dataframes.append(df)

            except ValueError as e:
                print(f"Failed to decode JSON for kommunekode {kommunekode}.")
                return None

        else:
            print(f"Failed to fetch data for kommunekode {kommunekode} from the API. Status code: {response.status_code}")
            return None
        
        # Increment the page number
        page += 1
    
    # Concatenate all dataframes
    if dataframes:
        return pd.concat(dataframes)
    else:
        return None

# List of kommunekode values to iterate over
kommunekode_list = ['0101'] 

# Output directory
output_dir = r'C:\Users\viet-intel\boligpriser\data\output\Hovedstad'

# Fetch data for each kommunekode
for kommunekode in kommunekode_list:
    df = fetch_data(kommunekode)
    if df is not None:
        # Print the length of the DataFrame
        print(f'Length of DataFrame for kommunekode {kommunekode}: {len(df)}')

        # Save the DataFrame to a pickle file
        pickle_file = os.path.join(output_dir, f'kommune_{kommunekode}.pkl')
        df.to_pickle(pickle_file)

print("Data saved to individual pickle files.")


Length of DataFrame for kommunekode 0101: 464484
Data saved to individual pickle files.


Sammensætning af forskellige pkl filer

In [4]:
import glob
import pandas as pd
import os

# Directory containing the pickle files
input_dir = r'C:\Users\viet-intel\boligpriser\data\output\Hovedstad'

# Output directory for the combined pickle file
output_dir = r'C:\Users\viet-intel\boligpriser\data\output'

# Get a list of all pickle files in the directory
pickle_files = glob.glob(os.path.join(input_dir, '*.pkl'))

# Load the data from each pickle file and store it in a list
dataframes = [pd.read_pickle(pickle_file) for pickle_file in pickle_files]

# Concatenate all dataframes into one
df_combined = pd.concat(dataframes, ignore_index=True)

# Save the combined DataFrame to a new pickle file
output_file = os.path.join(output_dir, 'DAR.pkl')
df_combined.to_pickle(output_file)

# Print the length of the combined DataFrame
print(f'Length of combined DataFrame: {len(df_combined)}')

print(f"Combined data saved to {output_file}.")



Length of combined DataFrame: 1144796
Combined data saved to C:\Users\viet-intel\boligpriser\data\output\DAR.pkl.


## BFE

Enkel søgning

In [7]:
# Define the API endpoint URL
url = "https://services.datafordeler.dk/DAR/DAR_BFE_Public/1/rest/adresseTilEnhedBfe?username=ANLVUSSNAP&password=Essvietcc567!&Format=JSON&adresseId=0a3f509e-d15b-32b8-e044-0003ba298018"

try:
    # Make the API call and get the JSON response
    response = requests.get(url)
    response.raise_for_status()  # Check for any errors in the API call

    # Print the JSON response
    print(response.json())

except requests.exceptions.RequestException as e:
    print("Error making the API call:", e)
except ValueError as ve:
    print("Error processing JSON response:", ve)


[]


Load pkl

In [1]:
import pandas as pd

# Load pickle file into dataframe
path = r"C:\Users\viet-intel\boligpriser\data\output\DAR.pkl"

DAR = pd.read_pickle(path)

print(list(DAR.columns))
print(len(DAR))

Lav ny dataframe med id og tom bfe kolonne

In [None]:
# select the columns id
bfe_id = DAR[['id_lokalId']]

# adding empty column bfe to the dataframe
bfe_id['BFE'] = ""

display(bfe_id.head())
len(bfe_id)

Finder tilsvarende BFE nummer til hver adresse

In [None]:
import requests
import pandas as pd

def get_data_from_api(adresse_ids):
    url = f"https://services.datafordeler.dk/EBR/Ejendomsbeliggenhed/1/rest/BFEnrAdresse?Adresseid={adresse_ids}&username=ANLVUSSNAP&password=Essvietcc567!"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print("Error making the API call:", e)
        return None
    except ValueError as ve:
        print("Error processing JSON response:", ve)
        return None

def chunker(seq, size):
    # Function to divide the list into chunks of specified size
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

# Load the dataframe
# region_hovedstad_df = pd.read_csv('your_data.csv')

# Select the first 1000 rows of the dataframe
adresse_ids = DAR['id_lokalId'].iloc[:100000].str.strip().tolist()

df_list = []
for chunk in chunker(adresse_ids, 20):
    adresse_ids_str = '|'.join(chunk)
    response_data = get_data_from_api(adresse_ids_str)
    if response_data and 'features' in response_data:
        for feature in response_data['features']:
            properties = feature['properties']
            df_list.append(properties)

# Convert the list of dictionaries to a DataFrame
bfe_df = pd.DataFrame(df_list)

# Display the first few rows of the DataFrame
display(bfe_df.head())
print(len(bfe_df))

# save the dataframe to a pickle file
bfe_df.to_pickle(r"C:\Users\viet-intel\boligpriser\data\output\EBR\bfe.pkl")

In [1]:
import pandas as pd
import os

# Get a list of all .pkl files in the directory
dir_path = r'C:\Users\viet-intel\boligpriser\data\output\EBR'
file_list = [f for f in os.listdir(dir_path) if f.endswith('.pkl')]

# Initialize an empty list that will be used to store the data frames
df_list = []

# Loop through the list of files and read each one into a pandas data frame
for file in file_list:
    file_path = os.path.join(dir_path, file)
    df = pd.read_pickle(file_path)  # load pickle file
    df_list.append(df)

# Concatenate all of the data frames into one
EBR_df = pd.concat(df_list, ignore_index=True)

# Print length of the combined DataFrame
print(f'Length of combined DataFrame: {len(EBR_df)}')

Length of combined DataFrame: 179463


In [2]:
EBR_df.head()


Unnamed: 0,id_namespace,id_lokalId,bestemtFastEjendomBFENr,Ejendomstype,adresseManueltAngivet,ESDHReferenceAdresse,ESDHReferenceKommune,kommuneManueltAngivet,kommuneinddelingKommunekode,betegnelse,...,status,forretningshaendelse,forretningsomraade,forretningsproces,virkningFra,virkningTil,virkningsaktoer,registreringFra,registreringTil,registreringsaktoer
0,http://data.gov.dk/Ejendomsbeliggenhedsregistret,fea8a5da-38ae-4504-afb6-a605d566e803,173846,Ejerlejlighed,False,,,False,101,,...,gældende,konverteretFraESR,52.20.05,konverteretFraESR,0001-01-01T15:00:00.000000+00:50,,Geodatastyrelsen,2019-04-28T13:21:47.972423+02:00,,Ejendomsbeliggenhedsregister
1,http://data.gov.dk/Ejendomsbeliggenhedsregistret,073e294d-7685-40ec-a802-87b2820983da,172353,Ejerlejlighed,False,,,False,101,,...,gældende,konverteretFraESR,52.20.05,konverteretFraESR,0001-01-01T15:00:00.000000+00:50,,Geodatastyrelsen,2019-04-28T13:21:46.321174+02:00,,Ejendomsbeliggenhedsregister
2,http://data.gov.dk/Ejendomsbeliggenhedsregistret,a4cdd92e-2fa0-4eb7-a599-68644e841806,172355,Ejerlejlighed,False,,,False,101,,...,gældende,konverteretFraESR,52.20.05,konverteretFraESR,0001-01-01T15:00:00.000000+00:50,,Geodatastyrelsen,2019-04-28T13:21:46.321737+02:00,,Ejendomsbeliggenhedsregister
3,http://data.gov.dk/Ejendomsbeliggenhedsregistret,2486fef4-13f9-4ee3-b78f-9785ea10d55d,172356,Ejerlejlighed,False,,,False,101,,...,gældende,konverteretFraESR,52.20.05,konverteretFraESR,0001-01-01T15:00:00.000000+00:50,,Geodatastyrelsen,2019-04-28T13:21:46.322216+02:00,,Ejendomsbeliggenhedsregister
4,http://data.gov.dk/Ejendomsbeliggenhedsregistret,2f57e964-cc7a-474c-a162-69161816ff50,172357,Ejerlejlighed,False,,,False,101,,...,gældende,konverteretFraESR,52.20.05,konverteretFraESR,0001-01-01T15:00:00.000000+00:50,,Geodatastyrelsen,2019-04-28T13:21:46.322340+02:00,,Ejendomsbeliggenhedsregister
