In [95]:
import requests
import time
import pandas as pd

def get_bird_observations(place_id, start_date, end_date, per_page=200):

    url = "https://api.inaturalist.org/v1/observations"
    all_observations = []
    page = 1

    while True:
        # Define query parameters
        params = {
            "taxon_id": 3,            # Taxon ID for "Aves" (birds)
            "place_id": place_id,     # Geographic place ID
            "nelat": 43.85,  # Northeast latitude
            "nelng": -79.2,  # Northeast longitude
            "swlat": 43.6,   # Southwest latitude
            "swlng": -79.6,  # Southwest longitude
            "d1": start_date,         # Start date
            "d2": end_date,           # End date
            "per_page": per_page,     # Number of observations per page
            "page": page,             # Current page
        }

        # Send GET request
        response = requests.get(url, params=params)
        
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            break

        # Parse JSON response
        data = response.json()
        results = data.get("results", [])
        
        if not results:
            break  # Exit loop if no more results

        # Extract relevant fields
        for obs in results:
            all_observations.append({
                "species_name": obs.get("taxon", {}).get("name"),
                "observed_on": obs.get("observed_on"),
                "latitude": obs.get("geojson", {}).get("coordinates", [None, None])[1],
                "longitude": obs.get("geojson", {}).get("coordinates", [None, None])[0],
                "time": obs.get("time_observed_at"),
                "place_guess": obs.get("place_guess"),
            })

        # Break the loop if we've reached the last page
        if page >= data["total_results"] // per_page + 1:
            break

        page += 1  # Increment page

    # Convert to DataFrame
    return pd.DataFrame(all_observations)

# Example usage
place_id = 204988  
start_date = "2022-01-01"
end_date = "2023-12-31"

# Fetch data
bird_df = get_bird_observations(place_id, start_date, end_date)
bird_df['time'] = bird_df['time'].apply(lambda x: x[-14:] if x is not None else None)

In [96]:
bird_df

Unnamed: 0,species_name,observed_on,latitude,longitude,time,place_guess
0,Anas platyrhynchos,2022-05-07,43.642487,-79.467125,09:48:18-04:00,"Grenadier Pond, Toronto, ON, CA"
1,Ardea alba,2023-08-27,43.642250,-79.457344,14:35:49-04:00,"Toronto, ON M6S 5A3, Canada"
2,Ardea herodias,2023-07-10,43.643810,-79.467775,,"High Park-Swansea, Toronto, ON, Canada"
3,Anas platyrhynchos,2022-05-07,43.642478,-79.467033,09:48:21-04:00,"High Park, Toronto, ON, CA"
4,Cygnus olor,2022-04-02,43.643753,-79.467812,09:52:43-04:00,"Grenadier Pond, Toronto, ON, CA"
...,...,...,...,...,...,...
3554,Buteo jamaicensis,2022-01-04,43.645008,-79.458663,15:59:00-05:00,"High Park-Swansea, Toronto, ON, Canada"
3555,Melospiza melodia,2022-01-04,43.645101,-79.459006,16:03:00-05:00,"High Park-Swansea, Toronto, ON, Canada"
3556,Corvus corax,2022-01-04,43.645058,-79.459207,16:23:00-05:00,"High Park-Swansea, Toronto, ON, Canada"
3557,Corvus brachyrhynchos,2022-01-04,43.644940,-79.459228,16:20:00-05:00,"High Park-Swansea, Toronto, ON, Canada"


In [97]:
# scientific to common name

def get_common_name(species_name, delay=1):
    url = "https://api.inaturalist.org/v1/taxa"
    try:
        response = requests.get(url, params={"q": species_name, "rank": "species"})
        time.sleep(delay) 

        if response.status_code == 200:
            results = response.json().get("results", [])
            if results:
                taxon = results[0]
                common_name = taxon.get("preferred_common_name", "No common name found")
                return common_name
        else:
            print(f"Error: {response.status_code} for species: {species_name}")
            return None
    except Exception as e:
        print(f"Exception occurred: {e}")
        return None
    
unique_species = bird_df["species_name"].unique()

mapped_names = []
for species in unique_species:
    common_name = get_common_name(species, delay=1)
    mapped_names.append({"species_name": species, "common_name": common_name})

common_name_df = pd.DataFrame(mapped_names)
bird_df = bird_df.merge(common_name_df, on="species_name", how="left") 

In [98]:
bird_df

Unnamed: 0,species_name,observed_on,latitude,longitude,time,place_guess,common_name
0,Anas platyrhynchos,2022-05-07,43.642487,-79.467125,09:48:18-04:00,"Grenadier Pond, Toronto, ON, CA",Mallard
1,Ardea alba,2023-08-27,43.642250,-79.457344,14:35:49-04:00,"Toronto, ON M6S 5A3, Canada",Great Egret
2,Ardea herodias,2023-07-10,43.643810,-79.467775,,"High Park-Swansea, Toronto, ON, Canada",Great Blue Heron
3,Anas platyrhynchos,2022-05-07,43.642478,-79.467033,09:48:21-04:00,"High Park, Toronto, ON, CA",Mallard
4,Cygnus olor,2022-04-02,43.643753,-79.467812,09:52:43-04:00,"Grenadier Pond, Toronto, ON, CA",Mute Swan
...,...,...,...,...,...,...,...
3554,Buteo jamaicensis,2022-01-04,43.645008,-79.458663,15:59:00-05:00,"High Park-Swansea, Toronto, ON, Canada",Red-tailed Hawk
3555,Melospiza melodia,2022-01-04,43.645101,-79.459006,16:03:00-05:00,"High Park-Swansea, Toronto, ON, Canada",Song Sparrow
3556,Corvus corax,2022-01-04,43.645058,-79.459207,16:23:00-05:00,"High Park-Swansea, Toronto, ON, Canada",Common Raven
3557,Corvus brachyrhynchos,2022-01-04,43.644940,-79.459228,16:20:00-05:00,"High Park-Swansea, Toronto, ON, Canada",American Crow
