In [1]:
import pandas as pd
import requests
import bs4
import json
import requests 
import csv
import time
import concurrent.futures

In [2]:
def get_data_gov_map():
    '''
    Get all the real estate deals in Tel-Aviv from GovMap - from 1998 until now.
    50000000-50007000 include all the range of the street codes in Tel Aviv (note: part of them are empty).
    This function gets JSON data and returns a DataFrame with all the data.
    
    Notes:
    - need to split the requests into treads to get better results.
    '''
    url =  "https://ags.govmap.gov.il/Identify/GetDealsByStreetCodes"
    xmin, ymin, xmax, ymax = 175110.0, 659000.0, 185730.0, 674000.0
    street_codes = list(range(50000000, 50007000))
    data_list = []

    for i, street_code in enumerate(street_codes):
        if i % 100 == 0:
            print(f'page:{i}/6000')
        payload = {
            "StreetCodes": [street_code],
            "Extent": {
                "xmin": xmin,
                "ymin": ymin,
                "xmax": xmax,
                "ymax": ymax
            }
        }

        try:
            data = requests.post(url, json=payload, timeout=10).json()
            data_list.append(data["data"])
#             print(f'index: {i}, street code: {street_code}, shape: {len(data["data"])}')
        except:
            print(f"Oops! StreetCodes:{street_code} isn't working")

    df = pd.concat([pd.DataFrame(data) for data in data_list], ignore_index=True)
    
    cols = ["DEALDATE","DEALTYPE","PROJECTNAME","CITY","CITYID",'WKT','extent']
    df.drop(cols, axis=1, inplace=True)

    return df
    
    
# Save the Data into csv files

def split_and_save_df(df):
    '''
    This funtion use to split the dataframe og GOVMAPS beacuse of the size of the csv file
    '''
    middle_row = len(df) // 2
    
    # Split the DataFrame into two new DataFrames
    df1 = df.iloc[:middle_row]
    df2 = df.iloc[middle_row:]
    
    # Save the two DataFrames as CSV files
    df1.to_csv("Data/Real_Estate_TLV_GOVMAPS_1.csv", index=False)
    df2.to_csv("Data/Real_Estate_TLV_GOVMAPS_2.csv", index=False)
    

# gov_map_df = get_data_gov_map()
# split_and_save_df(gov_map_df)
# gov_map_df.to_csv("Data/Test.csv", index=False)


In [44]:
url =  "https://www.nadlan.gov.il/Nadlan.REST/Main/GetAssestAndDeals"

payload = {
    "MoreAssestsType": None,
    "FillterRoomNum": 0,
    "GridDisplayType": 0,
    "ResultLable": "תל אביב -יפו",
    "ResultType": 1,
    "ObjectID": "5000",
    "ObjectIDType": "text",
    "ObjectKey": "UNIQ_ID",
    "X": 180428.31832654,
    "Y": 665726.5550939,
    "Gush": "",
    "Parcel": "",
    "showLotParcel": False,
    "showLotAddress": False,
    "OriginalSearchString": "תל אביב  יפו",
    "CurrentLavel": 2,
    "Navs": [],
    "QueryMapParams": {
        "QueryToRun": None,
        "QueryObjectID": "5000",
        "QueryObjectType": "number",
        "QueryObjectKey": "SETL_CODE",
        "QueryDescLayerID": "KSHTANN_SETL_AREA",
        "SpacialWhereClause": True,
    },
    "isHistorical": False,
    "PageNo": 3900,
    "OrderByFilled": "DEALDATETIME",
    "OrderByDescending": True,
    "Distance": 0
}
# 4000 - 2009
# 4200 - 2008
# 4500 - 2007
# 5000 - 2005 
# 5100 - 2004 

def fetch_data(url, payload, page_no,lost_pages):
    try:
        with requests.Session() as session:
            if page_no % 10 == 0:
                print(f'page:{page_no}/5200')
            payload['PageNo'] = page_no
            response = session.post(url, json=payload)
            json_data = response.json()
            return pd.DataFrame(json_data['AllResults'])
    except:
        print(f"Error: {page_no}")
        lost_pages.append(page_no)
        return None

def get_nandlan_data(url, payload, num_pages, max_threads=8):
    '''
    this function gets all real estate deals in Tel-Aviv from nadlan.gov.il - from 1998 until today (- 2 months)
    we use treads to optimize the running time of this function.
    '''
    df = pd.DataFrame()
    lost_pages = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = []
        for page_no in range(num_pages):
            futures.append(executor.submit(fetch_data, url, payload, page_no ,lost_pages))
        for future in concurrent.futures.as_completed(futures):
            new_df = future.result()
            if new_df is not None and new_df.shape[0] != 0:
                df = pd.concat([df, new_df], ignore_index=True)
                
    cols = ["DEALDATETIME","TREND_IS_NEGATIVE","TREND_FORMAT"]
    df.drop(cols, axis=1, inplace=True)
    return df


nadlan_df = get_nandlan_data(url , payload ,5200,20 )
nadlan_df.to_csv("Data/Nadlan.csv", index=False)

page:0/5200
(40, 20)


Unnamed: 0,DEALDATE,DEALDATETIME,FULLADRESS,DISPLAYADRESS,GUSH,DEALNATUREDESCRIPTION,ASSETROOMNUM,FLOORNO,DEALNATURE,DEALAMOUNT,NEWPROJECTTEXT,PROJECTNAME,BUILDINGYEAR,YEARBUILT,BUILDINGFLOORS,KEYVALUE,TYPE,POLYGON_ID,TREND_IS_NEGATIVE,TREND_FORMAT
0,2026-01-16,2026-01-16T00:00:00,"ילין דוד 9, תל אביב -יפו",ילין דוד 9,6213-819-13,,2,שלישית,40,3070000,1,,1960,,3,11075548702,1,6213-819,False,
20,2026-01-16,2026-01-16T00:00:00,"ילין דוד 9, תל אביב -יפו",ילין דוד 9,6213-819-13,,2,שלישית,40,3070000,1,,1960,,3,11075548702,1,6213-819,False,
21,2025-10-06,2025-10-06T00:00:00,"שטריקר 8, תל אביב -יפו",שטריקר 8,6212-365-2,דירה,3,,72,4009000,1,,2000,,2,11075548699,1,6212-365,False,
1,2025-10-06,2025-10-06T00:00:00,"שטריקר 8, תל אביב -יפו",שטריקר 8,6212-365-2,דירה,3,,72,4009000,1,,2000,,2,11075548699,1,6212-365,False,
22,2024-12-30,2024-12-30T00:00:00,"וולפסון 54, תל אביב -יפו",וולפסון 54,9021-20-18,,2,,43,2820000,1,,2022,,4,11075548696,1,9021-20,False,
