# Notify on new catalog imagery

Automate the process of checking for newly added catalog imagery over a specified AOI by setting up notifications to your email address.

## Set up the notebook

### 1. Install dependencies

In [None]:
!pip install up42-py --upgrade -q
!pip install google-api-python-client google-auth-oauthlib google-auth-httplib2 --upgrade -q

import up42, time, base64, pathlib
import geopandas as gpd
from datetime import datetime
from collections import defaultdict
from shapely.geometry import mapping
from email.mime.text import MIMEText
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

### 2. Configure credentials

Run the cell below to create a `credentials.json` file in a directory named `.up42` in your home folder.

In [None]:
# Defines the credentials file path if it doesn't exist
credentials_file_path = pathlib.Path.home().joinpath(".up42/credentials.json")
credentials_file_path.parent.mkdir(parents=True, exist_ok=True)
credentials_file_path.touch(exist_ok=True)

# Prints the path to the file
print(f"Credentials file is located at: {credentials_file_path}")

1. Click the link above to the created file and paste the following code:
    ```json
    {
        "username": "<your-email-address>",
        "password": "<your-password>"
    }

1. Retrieve the email address and password used for logging into the console. Use them as values for username and password.
1. Save the `credentials.json` file.

### 3. Authenticate

In [None]:
up42.authenticate(cfg_file=credentials_file_path)

### 4. Set up sender credentials

The script is configured to use Gmail's SMTP server, so the sending account must be a Google account.

1. Go to [Google Console](https://console.cloud.google.com/) and create a new project or select an existing one.
1. Open your project and go to **APIs and services** → **Credentials**.
1. Click **+ Create credentials** → **OAuth client ID**.
1. Select **Desktop app** as the application type and click **Create**.
1. Open the newly created client ID. In the **Client secrets** section, download the JSON file.
1. Go to **Audience**. Click **Publish app** → **Confirm**.

Run the cell below to create a `google_credentials.json` file in a directory named `.up42` in your home folder.

In [None]:
# Defines the Google credentials file path if it doesn't exist
google_credentials_file_path = pathlib.Path.home().joinpath(".up42/google_credentials.json")
google_credentials_file_path.parent.mkdir(parents=True, exist_ok=True)
google_credentials_file_path.touch(exist_ok=True)

# Prints the path to the file
print(f"Credentials file is located at: {google_credentials_file_path}")

GOOGLE_CREDENTIALS_PATH = google_credentials_file_path

Paste the contents of the downloaded client ID JSON into the `google_credentials.json` file and save it.

Specify the email address associated with your Google Cloud account:

In [None]:
SENDER_EMAIL = "example@gmail.com"

### 5. Specify the receiving email address

Specify the email address where notifications will be sent. This address doesn't have to be a Google account.

In [None]:
RECEIVER_EMAIL = "your-email-for-alerts@any-domain.com"

## Step 1. Define search parameters

In [None]:
# List UP42 catalog collections to search from
COLLECTIONS_TO_SEARCH = [
    "pneo-hd15",
    "worldview-legion-hd",
    "landsat-8",
]

# Specify the maximum cloud cover percentage from 0 to 100
MAX_CLOUD_COVER_PERCENTAGE = 30

# Specify the start date for the search in YYYY-MM-DD format
SEARCH_START_DATE = "2025-09-20"

# Load the AOI
CUSTOMER_AOI_PATH = "/Users/your-path/Desktop/Berlin.geojson"
customer_aoi = gpd.read_file(CUSTOMER_AOI_PATH)

## Step 2. Run the initial search

The first search establishes a baseline of existing imagery. The monitoring loop will compare future search results against this baseline to find new scenes.

### 1. Identify the host

In [None]:
# Get all available archive collections
archive_collections = up42.CollectionType.ARCHIVE
sort_by_name = up42.CollectionSorting.name
data_collections = up42.ProductGlossary.get_collections(
    collection_type=archive_collections,
    sort_by=sort_by_name
)

# Efficiently group desired collections by their host in a single pass
host_to_collections_map = defaultdict(lambda: {"host_obj": None, "collections": []})
for collection in data_collections:
    if collection.name in COLLECTIONS_TO_SEARCH:
        try:
            host = next(p for p in collection.providers if p.is_host)
            host_title = host.title
            host_to_collections_map[host_title]["host_obj"] = host
            host_to_collections_map[host_title]["collections"].append(collection.name)
        except StopIteration:
            print(f"Warning: Collection '{collection.name}' has no host provider.")

print("Collections grouped by host for searching:")
for host_title, data in host_to_collections_map.items():
    print(f"- Host '{host_title}': {data['collections']}")

### 2. Perform the initial search

In [None]:
# Get current date
today_date = datetime.today().strftime('%Y-%m-%d')
aoi_geometry_dict = mapping(customer_aoi.geometry[0])

# Perform search
all_initial_scenes = []
for host_title, data in host_to_collections_map.items():
    host = data["host_obj"]
    collections_for_host = data["collections"]

    print(f"Searching host '{host.title}' for collections: {collections_for_host}")
    try:
        scenes = list(host.search(
            collections=collections_for_host,
            intersects=aoi_geometry_dict,
            start_date=SEARCH_START_DATE,
            end_date=today_date,
            query={
                "cloudCoverage": {"lte": MAX_CLOUD_COVER_PERCENTAGE}
            }
        ))
        all_initial_scenes.extend(scenes)
        print(f"Found {len(scenes)} scenes.\n")
    except Exception as e:
        print(f"An error occurred while searching host '{host_title}': {e}")

# Convert combined results to a DataFrame for easier handling
previous_search_results_df = gpd.GeoDataFrame(all_initial_scenes)
print(f"Total of {len(previous_search_results_df)} images found in the initial search across all hosts.")

## Step 3. Set up the notification system

### 1. Define the email function

This function handles the connection to the SMTP server and sends the formatted email. It takes the email credentials, subject, and body as input.

In [None]:
# OAuth scope for sending email
SCOPES = ['https://www.googleapis.com/auth/gmail.send']

def gmail_authenticate():
    flow = InstalledAppFlow.from_client_secrets_file(
        GOOGLE_CREDENTIALS_PATH, # Credentials.json downloaded from Google Cloud
        SCOPES
        )
    creds = flow.run_local_server(port=0)
    return build('gmail', 'v1', credentials=creds)

def send_email_notification(service, sender_email, receiver_email, subject, body):
    # Email configuration
    message = MIMEText(body)
    message['to'] = receiver_email
    message['from'] = sender_email
    message['subject'] = subject
    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
    body = {'raw': raw}
    message = service.users().messages().send(userId="me", body=body).execute()
    print(f'Message ID: {message["id"]}')

### 2. Define the checking function

The cell below performs a new search with the same parameters as the initial search. It then compares the scene IDs from the new search with the IDs from the previous search. If new IDs are found, it calls the `send_email_notification` function for each new image.

In [None]:
def check_for_new_imagery(previous_search_results_df, search_params, service, sender_email, receiver_email):
    host_map, aoi_dict, start_date, query = search_params
    end_date_utc = datetime.utcnow()

    all_recent_scenes = []
    # Loop through each host and search for its collections
    for host_title, data in host_map.items():
        host = data["host_obj"]
        collections = data["collections"]

        scenes = list(host.search(
            collections=collections,
            intersects=aoi_dict,
            start_date=start_date,
            end_date=end_date_utc,
            query=query,
        ))
        all_recent_scenes.extend(scenes)

    # If no scenes are returned at all, we can stop early
    if not all_recent_scenes:
        print(f'No imagery found over AOI at {datetime.now()}')
        return previous_search_results_df # Return the existing dataframe

    most_recent_search_results_df = gpd.GeoDataFrame(all_recent_scenes)

    # Find new scenes by comparing sets of IDs
    previous_ids = set(previous_search_results_df['id'])
    recent_ids = set(most_recent_search_results_df['id'])
    new_ids = recent_ids - previous_ids

    if new_ids:
        for new_id in new_ids:
            now_time = str(datetime.now())
            subject = "New UP42 imagery alert"
            body = f'New catalog image available over your AOI.\n\nScene ID: {new_id}\nTime found: {now_time}'
            print(f"New imagery found: {new_id}. Sending a notification.")
            send_email_notification(service, sender_email, receiver_email, subject, body)
    else:
        now_time = str(datetime.now())
        print(f'No new imagery found over AOI at {now_time}')

    return most_recent_search_results_df

## Step 4. Run the monitoring loop

This final cell starts an infinite loop to monitor for new imagery. It will run the `check_for_new_imagery` function, wait for a specified interval, and repeat.

To stop the script, interrupt the kernel.

In [None]:
# Authenticate with Gmail to get the service object. This will open a browser window for you to log in.
print("Authenticating with Gmail...")
gmail_service = gmail_authenticate()
print("Authentication successful.")

# Convert the AOI geometry to a JSON-serializable dictionary
aoi_geometry_dict = mapping(customer_aoi.geometry[0])

# Define search parameters tuple for the checking function
search_params = (
    host_to_collections_map, # Pass the grouped hosts and collections
    aoi_geometry_dict,
    SEARCH_START_DATE,
    {"cloudCoverage": {"lte": MAX_CLOUD_COVER_PERCENTAGE}}
)

# Check for new imagery periodically
while True:
    try:
        most_recent_search_results_df = check_for_new_imagery(
            previous_search_results_df,
            search_params,
            gmail_service,
            SENDER_EMAIL,
            RECEIVER_EMAIL
        )
        previous_search_results_df = most_recent_search_results_df

        # Sleep for 3600 seconds (1 hour)
        print("Check complete. Waiting for 1 hour before the next check.")
        time.sleep(3600)

    except KeyboardInterrupt:
        print("\nMonitoring stopped by user.")
        break
    except Exception as e:
        print(f"An error occurred: {e}")
        # Add a shorter sleep time on error before retrying
        time.sleep(300)