In [None]:
# Extract Meta Data utilizing BC Data Catalogue API

import requests

def extract_metadata_from_json(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for non-2xx status codes

        # Load JSON data
        json_data = response.json()

        # Extract the metadata from the JSON structure
        metadata = json_data.get('info', {}).get('description')

        return metadata
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: Invalid JSON response - {e}")
        return None

if __name__ == "__main__":
    url = "https://raw.githubusercontent.com/bcgov/api-specs/master/bcdc/bcdc.json"
    metadata = extract_metadata_from_json(url)
    if metadata:
        print("Metadata:")
        print(metadata)
    else:
        print("Failed to extract metadata.")


Metadata:
This API provides live access to the BC Data Catalogue. Further documentation on the API is available from http://docs.ckan.org/en/latest/ Confirm the version of the API available from the catalogue by requesting https://catalogue.data.gov.bc.ca/api/3/action/status_show. 

Please note that you may experience issues when submitting requests to the delivery or test environment if using this [OpenAPI specification](https://github.com/bcgov/api-specs) in other API console viewers.


In [None]:
# Extract dataset related action details utilizing BC Data Catalogue API

import requests

def extract_dataset_metadata_from_json(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for non-2xx status codes

        # Load JSON data
        json_data = response.json()

        # Extract metadata at dataset level
        datasets_metadata = {}
        paths = json_data.get('paths', {})
        for path, path_data in paths.items():
            for method, method_data in path_data.items():
                if 'summary' in method_data:
                    datasets_metadata[path] = method_data['summary']

        return datasets_metadata
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: Invalid JSON response - {e}")
        return None

if __name__ == "__main__":
    url = "https://raw.githubusercontent.com/bcgov/api-specs/master/bcdc/bcdc.json"
    dataset_metadata = extract_dataset_metadata_from_json(url)
    if dataset_metadata:
        print("Dataset Metadata:")
        for path, summary in dataset_metadata.items():
            print(f"Dataset: {path}")
            print(f"Summary: {summary}")
            print("-" * 50)
    else:
        print("Failed to extract dataset metadata.")


Dataset Metadata:
Dataset: /action/tag_list
Summary: Get a list of tags
--------------------------------------------------
Dataset: /action/status_show
Summary: Get the site status
--------------------------------------------------
Dataset: /action/package_list
Summary: Get a list of all packages (datasets)
--------------------------------------------------
Dataset: /action/package_search
Summary: Find packages (datasets) matching query terms
--------------------------------------------------
Dataset: /action/package_show
Summary: Get metadata about one specific package (dataset)
--------------------------------------------------
Dataset: /action/package_activity_list
Summary: Get the activity stream of a package (dataset)
--------------------------------------------------
Dataset: /action/package_activity_list_html
Summary: Get the activity stream of a package (dataset), HTML format
--------------------------------------------------
Dataset: /action/package_autocomplete
Summary: Find 

In [None]:
# Python code snippet to extract dataset (i.e. BC Data Catalogue Packages) with keyword health and medical (Note that OR is not supported by default)

import requests

def search_packages_with_keywords(keywords):
    try:
        url = f"https://catalogue.data.gov.bc.ca/api/3/action/package_search?q={keywords}"
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for non-2xx status codes

        json_data = response.json()
        if json_data.get('success', False):
            return json_data.get('result', {}).get('results', [])
        else:
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: Invalid JSON response - {e}")
        return None

if __name__ == "__main__":
    keywords = "health OR medical"
    packages = search_packages_with_keywords(keywords)
    if packages:
        print("Packages found:")
        for package in packages:
            print(f"Package Name: {package.get('name')}")
            print(f"Title: {package.get('title')}")
            print(f"Description: {package.get('notes')}")
            print("-" * 50)
    else:
        print("No packages found with the specified keywords.")


Packages found:
Package Name: youth-custody-services-separate-confinement-duration-including-medical-reasons-
Title: Youth Custody Services-- Separate Confinement Duration (including Medical Reasons)
Description: B.C. Youth Custody Services has two youth custody centres. This extract provides the number of instances where youth were separately confined.  This data shows the duration of each instance of separate confinement (including youth who were separately confined for a medical reason under advice of a health care professional).  
--------------------------------------------------
Package Name: youth-custody-services-separate-confinement-duration-excluding-medical-reasons-
Title: Youth Custody Services-- Separate Confinement Duration (excluding Medical Reasons)
Description: B.C. Youth Custody Services has two youth custody centres. This extract provides the number of instances where youth were separately confined for each FY.  This data shows the duration of each instance of separa

In [None]:
# As I add more keywords, I noticed the package search doesn't support OR logic
# Hence this code is discarded and the next code snippet uses a for loop to extract keywords one by one

import requests
import pandas as pd

def search_packages_with_keywords(keywords):
    try:
        url = f"https://catalogue.data.gov.bc.ca/api/3/action/package_search?q={keywords}"
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for non-2xx status codes

        json_data = response.json()
        if json_data.get('success', False):
            return json_data.get('result', {}).get('results', [])
        else:
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: Invalid JSON response - {e}")
        return None

if __name__ == "__main__":
    keywords = "health OR medical OR healthcare OR hospital"
    packages = search_packages_with_keywords(keywords)
    if packages:
        print("Packages found:")
        df = pd.DataFrame(packages)
        df = df[['name', 'title', 'notes']]  # Select relevant columns
        print(df)

        # Export to Excel
        excel_filename = "packages_search_results.xlsx"
        df.to_excel(excel_filename, index=False)
        print(f"Search results exported to {excel_filename}")
    else:
        print("No packages found with the specified keywords.")


Packages found:
                                                name  \
0                                    hospitals-in-bc   
1    mental-health-and-substance-use-health-services   
2             b-c-covid-19-cases-by-health-authority   
3        metadata-for-chronic-disease-registry---e02   
4                       b-c-covid-19-lab-information   
5                british-columbia-covid-19-dashboard   
6  metadata-for-national-ambulatory-care-reportin...   
7                          b-c-covid-19-case-details   
8                    urgent-and-primary-care-centres   
9  motor-vehicle-serious-injuries-with-alcohol-an...   

                                               title  \
0                                    Hospitals in BC   
1    Mental Health and Substance Use Health Services   
2            B.C. COVID-19 Cases by Health Authority   
3        Metadata for Chronic Disease Registry - E02   
4                      B.C. COVID-19 Lab Information   
5                British Columb

In [None]:
# Revised code to extract dataset contains at least one keyword from keyword sets

import requests
import pandas as pd

def search_packages_with_keywords(keywords):
    try:
        keywords_list = keywords.split()
        packages = []

        for keyword in keywords_list:
            url = f"https://catalogue.data.gov.bc.ca/api/3/action/package_search?q={keyword}"
            response = requests.get(url)
            response.raise_for_status()  # Raise an exception for non-2xx status codes

            json_data = response.json()
            if json_data.get('success', False):
                packages.extend(json_data.get('result', {}).get('results', []))

        return packages
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None
    except ValueError as e:
        print(f"Error: Invalid JSON response - {e}")
        return None

if __name__ == "__main__":
    keywords = "health medical healthcare hospital age geo population"
    packages = search_packages_with_keywords(keywords)
    if packages:
        print("Packages found:")
        for package in packages:
            print(f"Package Name: {package.get('name')}")
            print(f"Title: {package.get('title')}")
            print(f"Description: {package.get('notes')}")
            print("-" * 50)

        # Convert search results to a DataFrame
        df = pd.DataFrame(packages)
        df = df[['name', 'title', 'notes']]  # Select relevant columns

        # Export to Excel
        excel_filename = "expanded_packages_search_results.xlsx"
        df.to_excel(excel_filename, index=False)
        print(f"Search results exported to {excel_filename}")
    else:
        print("No packages found with the specified keywords.")


Packages found:
Package Name: mental-health-and-substance-use-health-services
Title: Mental Health and Substance Use Health Services
Description: The HealthLink BC Mental Health and Substance Use (MHSU) data set includes the following:  

Programs that offer early intervention, transitional care or other services that supplement and facilitate primary and adjunctive therapies; which offer community mental health education programs; or which link people who are in need of treatment with appropriate providers. 

Programs that provide preventive, diagnostic and treatment services in a variety of community and hospital-based settings to help people achieve, maintain and enhance a state of emotional well-being, personal empowerment and the skills to cope with everyday demands without excessive stress or reliance on alcohol or other drugs. Treatment may include emotional support, introspection and problem-solving assistance using a variety of modalities and approaches, and medication, as