In [1]:
import requests
import pandas as pd
from tabulate import tabulate
import re

In [2]:
# Initialize an empty list to store all project data
extracted_data = []
base_url = "https://gateway.chotot.com/v1/public/api-pty/project"
limit = 200
offset = 0
offset_item = 0

while offset <= 4150:
    # Construct URL with current offset and limit
    url = f"{base_url}?limit={limit}&offset={offset}"

    # Make request
    response = requests.get(url)

    # Check if request is successful
    if response.status_code == 200:
        # Convert the JSON response content into a Python dictionary
        data = response.json()
        # Retrieve projects from the response data
        projects = data.get('projects', [])
        offset += limit
        # Adjust offset for the last request
        if offset == 4000:
            limit = 150 
    else:
        print("Error fetching data")
        break

    # Process each project
    for i, project in enumerate(projects):
        # Extract facilities and surrounding
        facilities = project.get('facilities', [])
        # surrounding = project.get('surrounding', [])

        facilities_key = [facility.get('key') for facility in facilities if isinstance(facility, dict)]
        surrounding_key = project.get('surrounding', [])

        # Extract content from short_introduction or introduction
        short_introduction = project.get('short_introduction', [])
        introduction = project.get('introduction', '')

        if not isinstance(short_introduction, str) or not short_introduction:
            # If short_introduction is not a string or empty list, use introduction
            content = re.sub('<[^<]+?>', '', introduction).strip()
        else:
            # Use regex to extract text content within HTML tags
            content = re.sub('<[^<]+?>', '', short_introduction).strip()

        # Update URL for the current project with its specific offset
        project_url = f"{base_url}?limit=1&offset={offset_item}"
        offset_item += 1  # Increment offset_item for the next individual item

        # Create dictionary for extracted data
        extracted_project_data = {
            'project_oid': project.get('project_oid', ''),
            'area_v2': project.get('area_name', ''),
            'web_url': project.get('web_url', ''),
            'facilities_key': facilities_key,
            'surrounding_key': surrounding_key,
            'investor_id': project.get('investor_id', ''),
            'investor_name': project.get('investor_name', ''),
            'paid_time': project.get('paid_time', ''),
            'short_introduction_content': content,
            'full_url_with_params': project_url  # Use current URL with specific params
        }

        # Append extracted data to list
        extracted_data.append(extracted_project_data)

# Print attributes in tabular format for the first 5 rows
if extracted_data:
    headers = extracted_data[0].keys()
    rows = [list(project.values()) for project in extracted_data[:5]]  # Print only first 5 rows
    print(tabulate(rows, headers=headers, tablefmt="grid"))
else:
    print("No data fetched")

# Export data to CSV file
csv_filename = "projects_data.csv"
df = pd.DataFrame(extracted_data)
df.to_csv(csv_filename, index=False)

print(f"Data exported to {csv_filename}")

+---------------+-----------------------+--------------------------------------------------------------------------------------+------------------------------------------------------------+----------------------------------------+---------------+------------------------------+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------