In [5]:
import pandas as pd
import requests
import bs4
import json
import csv
import time
import concurrent.futures
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import ssl
print(ssl.OPENSSL_VERSION)

OpenSSL 1.1.1k  25 Mar 2021


In [6]:
url =  "https://www.nadlan.gov.il/Nadlan.REST/Main/GetAssestAndDeals"

payload = {
    "MoreAssestsType": None,
    "FillterRoomNum": 0,
    "GridDisplayType": 0,
    "ResultLable": "תל אביב -יפו",
    "ResultType": 1,
    "ObjectID": "5000",
    "ObjectIDType": "text",
    "ObjectKey": "UNIQ_ID",
    "X": 180428.31832654,
    "Y": 665726.5550939,
    "Gush": "",
    "Parcel": "",
    "showLotParcel": False,
    "showLotAddress": False,
    "OriginalSearchString": "תל אביב  יפו",
    "CurrentLavel": 2,
    "Navs": [],
    "QueryMapParams": {
        "QueryToRun": None,
        "QueryObjectID": "5000",
        "QueryObjectType": "number",
        "QueryObjectKey": "SETL_CODE",
        "QueryDescLayerID": "KSHTANN_SETL_AREA",
        "SpacialWhereClause": True,
    },
    "isHistorical": False,
    "PageNo": 1,
    "OrderByFilled": "DEALDATETIME",
    "OrderByDescending": True,
    "Distance": 0
}


def fetch_data(url, payload, page_no, lost_pages):
    payload['PageNo'] = page_no
    if page_no % 100 == 0:
        print(f'page:{page_no}/5200')
    try:
        retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
        adapter = HTTPAdapter(max_retries=retries)
        session = requests.Session()
        session.mount('https://', adapter)
        response = session.post(url, json=payload, timeout=120)
        json_data = response.json()

    except Exception as e:
        print(f"Error: {e}\npage{page_no}")
#         lost_pages.append(page_no)
        time.sleep(5)
        try:
            response = session.post(url, json=payload, timeout=120)
            json_data = response.json()
        except Exception as e:
            print(f"Error: {e}\n page{page_no}")
#             lost_pages.append(page_no)
            return None

    return pd.DataFrame(json_data['AllResults'])
    
def get_nandlan_data(url, payload, number_of_pages, max_threads=10):
    '''
    This function gets all real estate deals in Tel-Aviv from nadlan.gov.il - from 1998 until today (- 2 months)
    We use threads to optimize the running time of this function.
    '''
    df = pd.DataFrame()
    lost_pages = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = []
        for page_no in range(1, number_of_pages):
            futures.append(executor.submit(fetch_data, url, payload, page_no, lost_pages))
        for future in concurrent.futures.as_completed(futures):
            new_df = future.result()
            if new_df is not None and new_df.shape[0] != 0:
                df = pd.concat([df, new_df], ignore_index=True)
    if df.shape[0] > 10:
        cols = ["DEALDATETIME", "TREND_IS_NEGATIVE", "TREND_FORMAT"]
        df.drop(cols, axis=1, inplace=True)
        
    return df


nadlan_df  = get_nandlan_data(url , payload ,20,10)

In [7]:
def update_nadlan_df(df, print_summary=False):
    nadlan_old = pd.read_csv("../Data/Nadlan.csv")
    nadlan_old.drop_duplicates(inplace=True)
    
    nadlan_old_shape = nadlan_old.shape[0]
    
    nadlan_update = nadlan_old.append(df, ignore_index=True)
    nadlan_update.drop_duplicates(inplace=True)
    
    if print_summary:
        new_rows = abs(nadlan_update.shape[0] - nadlan_old_shape)
        print(f"Total new rows: {new_rows}")
        print(f'nadlan_old shape: {nadlan_update.shape}')

    nadlan_update.to_csv("../Data/Nadlan.csv", index=False)
    return nadlan_update

df_update = update_nadlan_df(nadlan_df,print_summary=True)
print(f'df_update shape: {df_update.shape}')

Total new rows: 280
nadlan_old shape: (110937, 17)
df_update shape: (110937, 17)


In [8]:
def create_and_write_file():
    file_path = "status.txt"
    message = "get_nadlan deal pass-success"

    try:
        # Create a new file and open it in write mode
        with open(file_path, "w") as file:
            # Write the message to the file
            file.write(message)
        
        print("File created and written successfully!")
    except Exception as e:
        print("An error occurred while creating and writing the file:", str(e))

# Call the function to create and write the file
create_and_write_file()


File created and written successfully!
