In [None]:
import pandas as pd
import requests
import bs4
import json
import requests 
import csv
import time
import concurrent.futures
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

In [None]:
def get_data_gov_map():
    '''
    Get all the real estate deals in Tel-Aviv from GovMap - from 1998 until now.
    50000000-50007000 include all the range of the street codes in Tel Aviv (note: part of them are empty).
    This function gets JSON data and returns a DataFrame with all the data.
    
    Notes:
    - need to split the requests into threads to get better results.
    '''

    url = "https://ags.govmap.gov.il/Identify/GetDealsByStreetCodes"
    xmin, ymin, xmax, ymax = 175110.0, 659000.0, 185730.0, 674000.0
    street_codes = list(range(50000000, 50008000))
    data_list = []
    
    def process_street_code(street_code):
        payload = {
            "StreetCodes": [street_code],
            "Extent": {
                "xmin": xmin,
                "ymin": ymin,
                "xmax": xmax,
                "ymax": ymax
            }
        }
        
        try:
            retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
            adapter = HTTPAdapter(max_retries=retries)
            session = requests.Session()
            session.mount('https://', adapter)
            response = session.post(url, json=payload, timeout=30)

#             response = requests.post(url, json=payload, timeout=10)
            data = response.json()
            return data["data"]
        except:
            print(f"Oops! StreetCodes:{street_code} isn't working")
            return []

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        results = executor.map(process_street_code, street_codes)

        for data in results:
            data_list.append(data)
    
    df = pd.concat([pd.DataFrame(data) for data in data_list], ignore_index=True)
#     cols = ["DEALDATE","DEALTYPE","PROJECTNAME","CITY","CITYID",'WKT','extent']
#     df.drop(cols, axis=1, inplace=True)

    return df
    
    
def split_and_save_df(df):
    '''
    This funtion use to split the dataframe og GOVMAPS beacuse of the size of the csv file
    '''
    middle_row = len(df) // 2
    
    df1 = df.iloc[:middle_row]
    df2 = df.iloc[middle_row:]
    
    df1.to_csv("Data/Real_Estate_TLV_GOVMAPS_1.csv", index=False)
    df2.to_csv("Data/Real_Estate_TLV_GOVMAPS_2.csv", index=False)
    

# gov_map_df = get_data_gov_map()
# print(gov_map_df.shape)
# split_and_save_df(gov_map_df)
# gov_map_df.to_csv("Data/Test.csv", index=False)
# gov_map_df