In [52]:
import pandas as pd
import requests
import bs4
import json
import requests 
import csv
import time
import concurrent.futures
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


In [53]:
url =  "https://www.nadlan.gov.il/Nadlan.REST/Main/GetAssestAndDeals"


payload = {
    "MoreAssestsType": None,
    "FillterRoomNum": 0,
    "GridDisplayType": 0,
    "ResultLable": "תל אביב -יפו",
    "ResultType": 1,
    "ObjectID": "5000",
    "ObjectIDType": "text",
    "ObjectKey": "UNIQ_ID",
    "X": 180428.31832654,
    "Y": 665726.5550939,
    "Gush": "",
    "Parcel": "",
    "showLotParcel": False,
    "showLotAddress": False,
    "OriginalSearchString": "תל אביב  יפו",
    "CurrentLavel": 2,
    "Navs": [],
    "QueryMapParams": {
        "QueryToRun": None,
        "QueryObjectID": "5000",
        "QueryObjectType": "number",
        "QueryObjectKey": "SETL_CODE",
        "QueryDescLayerID": "KSHTANN_SETL_AREA",
        "SpacialWhereClause": True,
    },
    "isHistorical": False,
    "PageNo": 1,
    "OrderByFilled": "DEALDATETIME",
    "OrderByDescending": True,
    "Distance": 0
}


def fetch_data(url, payload, page_no, lost_pages):
    payload['PageNo'] = page_no
    if page_no % 100 == 0:
        print(f'page:{page_no}/5200')
    try:
        retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
        adapter = HTTPAdapter(max_retries=retries)
        session = requests.Session()
        session.mount('https://', adapter)
        response = session.post(url, json=payload, timeout=120)
        json_data = response.json()

    except Exception as e:
        print(f"Error: {e}\npage{page_no}")
#         lost_pages.append(page_no)
        # Wait for 5 seconds before retrying
        time.sleep(5)
        # Retry the request
        try:
            response = session.post(url, json=payload, timeout=120)
            json_data = response.json()
        except Exception as e:
            print(f"Error: {e}\n page{page_no}")
#             lost_pages.append(page_no)
            return None

    return pd.DataFrame(json_data['AllResults'])
    
def get_nandlan_data(url, payload, number_of_pages, max_threads=10):
    '''
    This function gets all real estate deals in Tel-Aviv from nadlan.gov.il - from 1998 until today (- 2 months)
    We use threads to optimize the running time of this function.
    '''
    df = pd.DataFrame()
    lost_pages = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = []
        for page_no in range(1, number_of_pages):
            futures.append(executor.submit(fetch_data, url, payload, page_no, lost_pages))
        for future in concurrent.futures.as_completed(futures):
            new_df = future.result()
            if new_df is not None and new_df.shape[0] != 0:
                df = pd.concat([df, new_df], ignore_index=True)
    if df.shape[0] > 10:
        cols = ["DEALDATETIME", "TREND_IS_NEGATIVE", "TREND_FORMAT"]
        df.drop(cols, axis=1, inplace=True)
        
    return df


nadlan_df  = get_nandlan_data(url , payload ,3000,15)

page:100/5200
page:200/5200
page:300/5200
page:400/5200
page:500/5200
page:600/5200
page:700/5200
page:800/5200
page:900/5200
page:1000/5200
page:1100/5200
page:1200/5200
page:1300/5200
page:1400/5200
page:1500/5200
page:1600/5200
page:1700/5200
page:1800/5200
page:1900/5200
page:2000/5200
page:2100/5200
page:2200/5200
Error: HTTPSConnectionPool(host='www.nadlan.gov.il', port=443): Read timed out. (read timeout=120)
page2237
Error: HTTPSConnectionPool(host='www.nadlan.gov.il', port=443): Read timed out. (read timeout=120)
page2242
Error: HTTPSConnectionPool(host='www.nadlan.gov.il', port=443): Read timed out. (read timeout=120)
page2243
Error: HTTPSConnectionPool(host='www.nadlan.gov.il', port=443): Read timed out. (read timeout=120)
page2245
Error: HTTPSConnectionPool(host='www.nadlan.gov.il', port=443): Read timed out. (read timeout=120)
page2247
Error: HTTPSConnectionPool(host='www.nadlan.gov.il', port=443): Read timed out. (read timeout=120)
page2248
Error: HTTPSConnectionPool(host

In [54]:
def update_nadlan_df(df, print_summary=False):
    nadlan_old = pd.read_csv("Data/Nadlan.csv")
    nadlan_old.drop_duplicates(inplace=True)
    
    nadlan_old = nadlan_old.append(df, ignore_index=True)
    nadlan_old.drop_duplicates(inplace=True)
    
    if print_summary:
        new_rows = abs(df.shape[0] - nadlan_old.shape[0])
        print(f"Total new rows: {new_rows}")
        print(f'nadlan_old shape: {nadlan_old.shape}')

    nadlan_old.to_csv("Data/Nadlan.csv", index=False)
    return nadlan_old

df_update = update_nadlan_df(nadlan_df,print_summary=True)
print(f'df_update shape: {df_update.shape}')

Total new rows: 88728
nadlan_old shape: (148568, 17)
df_update shape: (148568, 17)


Unnamed: 0,DEALDATE,FULLADRESS,DISPLAYADRESS,GUSH,DEALNATUREDESCRIPTION,ASSETROOMNUM,FLOORNO,DEALNATURE,DEALAMOUNT,NEWPROJECTTEXT,PROJECTNAME,BUILDINGYEAR,YEARBUILT,BUILDINGFLOORS,KEYVALUE,TYPE,POLYGON_ID
0,02.03.2023,"בן עטר 7, תל אביב -יפו",בן עטר 7,7084-66-7,דירה בבית קומות,2.0,ראשונה,54.65,1300000,,,1950.0,,3.0,6152936450,1,7084-66
1,01.03.2023,"נרדור 6, תל אביב -יפו",נרדור 6,6997-168-13,דירה בבית קומות,3.0,שלישית,48.98,1840000,,,1960.0,,4.0,6153320450,1,6997-168
2,01.03.2023,"בועז 45, תל אביב -יפו",בועז 45,6135-26-10,דירה בבית קומות,3.0,,50.0,1500000,,,1950.0,,1.0,6152790800,1,6135-26
3,28.02.2023,"בארי 19, תל אביב -יפו",בארי 19,6217-34-3,דירה בבית קומות,4.0,ראשונה,94.0,6200000,,,2026.0,,,6153388950,1,6217-34
4,28.02.2023,"יוחננוף כהן שמואל 6, תל אביב -יפו",יוחננוף כהן שמואל 6,6970-101-29,דירה בבית קומות,5.0,שישית,93.7,2800000,,,2001.0,,10.0,6152820000,1,6970-101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152423,16.06.2013,"שלום עליכם 52, תל אביב -יפו",שלום עליכם 52,6906-76-6,דירה בבית קומות,2,ראשונה,58.09,1700000,,,1950,,4,5656346200,1,6906-76
152424,16.06.2013,"רח 2425 5, תל אביב -יפו",רח 2425 5,7224-9-68,משרד,,שמינית,216,5675000,,,2016,,12,5657415400,1,7224-9
152425,16.06.2013,"מעון 1, תל אביב -יפו",מעון 1,7051-155-271,דירה,2,שישית,53,1416051,1,רביעיית פלורנטין,2010,,9,11082499802,1,7051-155
152426,16.06.2013,,,7016-6-17,משרד,,ראשונה,25,60000,,,2013,,,5656352350,2,7016-6


In [3]:
def create_and_write_file():
    file_path = "status.txt"
    message = "get_nadlan deal pass-success"

    try:
        # Create a new file and open it in write mode
        with open(file_path, "w") as file:
            # Write the message to the file
            file.write(message)
        
        print("File created and written successfully!")
    except Exception as e:
        print("An error occurred while creating and writing the file:", str(e))

# Call the function to create and write the file
create_and_write_file()


File created and written successfully!
