In [126]:
import datetime
import time
from datetime import timedelta

import altair as alt
import ipywidgets as widgets
import pandas as pd
import requests
import requests_cache
from IPython.display import HTML, Markdown
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

s = requests_cache.CachedSession(expire_after=timedelta(days=1))
retries = Retry(total=10, backoff_factor=1, status_forcelist=[502, 503, 504, 524, 429])
s.mount("http://", HTTPAdapter(max_retries=retries))
s.mount("https://", HTTPAdapter(max_retries=retries))

In [130]:
display(
    Markdown(
        """
# PROV Data dashboard

This experimental dashboard uses Public Record Office Victoria's [public API](https://prov.vic.gov.au/prov-collection-api) to provide an overview of the available data.
"""
    )
)

display(
    HTML(
        """
<ul class="browser-default">
    <li><a href="#Overview">Overview</a></li>
    <li><a href="#Functions">Functions</a></li>
    <li><a href="#Agencies">Agencies</a></li>
    <li><a href="#Series">Series</a></li>
    <li><a href="#Items">Items</a></li>
    <li><a href="#Images">Images</a></li>
</ul>
"""
    )
)

display(Markdown(f"*Last updated: {datetime.datetime.now().strftime('%-d %B %Y')}*"))

display(
    Markdown(
        """
----

## Overview
"""
    )
)


# PROV Data dashboard

This experimental dashboard uses Public Record Office Victoria's [public API](https://prov.vic.gov.au/prov-collection-api) to provide an overview of the available data.


Last updated: 4 April 2025


----

## Overview


In [132]:
def get_total_results(params):
    api_url = "https://api.prov.vic.gov.au/search/query"
    response = s.get(api_url, params=params)
    data = response.json()
    if not "response" in data:
        print(response.status_code)
        print(data)
        print(response.headers)
    return data["response"]["numFound"]


def make_facet_chart(
    df, title, x_field="value", y_field="total", width=250, height=250
):
    chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(f"{x_field}:N", title=None),
            y=alt.Y(f"{y_field}:Q"),
            color=alt.Color(f"{x_field}:N", title=field, legend=None),
            tooltip=[
                alt.Tooltip(x_field, title=field),
                alt.Tooltip(y_field, format=","),
            ],
        )
        .properties(width=width, height=height, title=title)
    )
    return chart


def make_grid(field, category=None, query=None):
    data_out = widgets.Output()
    facets = get_facets(field, query=query)
    df = pd.DataFrame(facets)
    with data_out:
        display(df.style.format(thousands=",").hide())
    chart_out = widgets.Output()
    with chart_out:
        display(make_facet_chart(df, field, x_field=field))
    grid = widgets.HBox([data_out, chart_out])
    return grid


def get_entity(category, id_field, id_value):
    params = {"q": f'category:{category} AND {id_field}:"{id_value}"'}
    response = s.get("https://api.prov.vic.gov.au/search/query", params=params)
    data = response.json()
    return data["response"]["docs"][0]


def get_series(series_id):
    series = get_entity("Series", "series_id", series_id)
    return series


def count_items_in_series(series_id, category):
    params = {
        "q": f"series_id:{series_id} AND category:{category}",
        "rows": 0,
    }
    return get_total_results(params)


def get_facets(facet_field, query=None, column="total"):
    if not query:
        query = "*"
    params = {"q": query, "facet": "true", "facet.field": facet_field, "rows": 0}
    response = s.get("https://api.prov.vic.gov.au/search/query", params=params)
    data = response.json()
    values = data["facet_counts"]["facet_fields"][facet_field]
    facets = [
        {facet_field: values[i], column: values[i + 1]}
        for i in range(0, len(values), 2)
        if values[i + 1] > 0
    ]
    return facets


def get_series_facets(query, column="items_digitised"):
    facets = get_facets("series_id", query, column)
    return facets


def totals_by_decade(query):
    totals = []
    for year in range(1839, 2039, 10):
        date_query = f"{query} AND start_dt:[1830-01-01 TO {year}-12-31] AND end_dt:[{year}-01-01 TO *]"
        params = {"q": date_query, "rows": 0}
        total = get_total_results(params)
        totals.append({"decade": f"{year-9}-{year}", "total": total})
    return totals


def make_decade_chart(query, title, x_title="decade", y_title="total"):
    decade_totals = totals_by_decade(query)
    df_decades = pd.DataFrame(decade_totals)
    chart = (
        alt.Chart(df_decades)
        .mark_bar()
        .encode(
            x=alt.X("decade:O", title=x_title),
            y=alt.Y("total:Q", title=y_title),
            tooltip=[
                alt.Tooltip("decade:O", title=x_title),
                alt.Tooltip("total:Q", title=y_title),
            ],
        )
        .properties(title=title, padding=20, width=350, height=250)
    )
    data_out = widgets.Output()
    with data_out:
        df_decades = (
            df_decades.style.format(thousands=",")
            .hide()
            .set_properties(**{"text-align": "left"})
        )
        display(
            df_decades.set_table_styles(
                [dict(selector="th", props=[("text-align", "left")])]
            )
        )
    chart_out = widgets.Output()
    with chart_out:
        display(chart)
    grid = widgets.HBox([data_out, chart_out])
    return grid


def style_df(df, link_field=None):
    if link_field:
        df[link_field] = df[link_field].apply(
            lambda x: f'<a href="https://prov.vic.gov.au/archive/{x}">{x}</a>'
        )
    df = df.style.format(thousands=",").hide().set_properties(**{"text-align": "left"})
    df = df.set_table_styles([dict(selector="th", props=[("text-align", "left")])])
    return df

In [27]:
total_results = get_total_results({"q": "*"})

display(
    HTML(
        f"<h2 class='alert alert-info'>The PROV API currently returns {total_results:,} results.</h2>"
    )
)

In [90]:
display(
    Markdown(
        """
The API includes information about a number of different entities described in the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model).

The `entity` and `category` fields tell you what sort of data you're dealing with. The charts below use facet counts from the API to find the number of results for each value in these fields.
"""
    )
)


The API includes information about a number of different entities described in the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model).

The `entity` and `category` fields tell you what sort of data you're dealing with. These charts use facet counts from the API to find the number of results for each value in these fields.


In [133]:
all_fields = {
    "entity": "Top-level conceptual entities defined by the PROV's [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)",
    "category": "A more fine-grained grouping of the descriptive entities used in PROV data – the top-level `Record` entity is sub-divided into `Series`, `Consignment`, `Item`, and `Image`",
}

series_fields = {
    "format": "Indicates whether a series is in physical or digital format",
    "location": "Indicates where the series is held – values are not exclusive, so an item can be `Online` and in `North Melbourne`",
    "rights_status": "Indicates whether an series' contents and metadata are available for public access",
}

item_fields = {
    "format": "Indicates whether an item is in physical or digital format",
    "record_form": "Describes the physical format of an item",
    "location": "Indicates where the item is held – values are not exclusive, so an item can be `Online` and in `North Melbourne`",
    "rights_status": "Indicates whether an items's contents and metadata are available for public access",
}

for field, note in all_fields.items():
    display(Markdown(f"### {field.title().replace('_', ' ')}"))
    display(Markdown(note))
    display(make_grid(field))

### Entity

Top-level conceptual entities defined by the PROV's [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)

HBox(children=(Output(), Output()))

### Category

A more fine-grained grouping of the descriptive entities used in PROV data – the top-level `Record` entity is sub-divided into `Series`, `Consignment`, `Item`, and `Image`

HBox(children=(Output(), Output()))

In [97]:
display(
    Markdown(
        """
----

## Functions

> The *function entity* in PROV’s ACM represents the major responsibilities of Victorian Government that may be managed by one or more agencies over time. Applying this entity helps to:
> - group together various records with the same administrative record context
> - links records to their provenance and complementary information.  

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)
"""
    )
)

total_functions = get_total_results({"q": "category:Function"})
display(
    HTML(f"<h3 class='alert alert-info'>There are {total_functions:,} functions.</h3>")
)


----

## Functions

> The *function entity* in PROV’s ACM represents the major responsibilities of Victorian Government that may be managed by one or more agencies over time. Applying this entity helps to:
> - group together various records with the same administrative record context
> - links records to their provenance and complementary information.  

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)


In [104]:
display(Markdown("### Count of agencies by function"))

display(
    Markdown(
        "Agencies are responsible for performing government functions. This table shows which functions are most frequently assigned to agencies."
    )
)

### Count of agencies by function

Agencies are responsible for performing government functions. This table shows which functions are most frequently assigned to agencies.

In [102]:
functions = []
for facet in get_facets(
    "entity_id",
    'category:relatedEntity AND relationship:"Primary responsible agency"',
    "total_agencies",
)[:20]:
    try:
        function = get_entity(
            "Function",
            "identifier.PROV_ACM.id",
            facet["entity_id"].replace("VF", "VF "),
        )
    except IndexError:
        facet["function"] = "[not found]"
    else:
        facet["function"] = function["title"]
    functions.append(facet)
df_functions = pd.DataFrame(functions)[["entity_id", "function", "total_agencies"]]
display(style_df(df_functions, link_field="entity_id"))

entity_id,function,total_agencies
VF382,Educational institutions,422
VF397,Municipalities,129
VF418,Water supply,109
VF381,Courts,60
VF416,Water and sewerage authorities,51
VF125,"Health, public",50
VF128,"Health, mental",37
VF122,Rural water supply,26
VF28,Cemeteries,25
VF11,Ports and harbours,21


In [98]:
display(
    Markdown(
        """
----

## Agencies

> The *agent entity* in PROV’s ACM represents a Victorian Government agency—an administrative unit which has or had responsibility for the provision of at least one aspect of government administration. This entity helps to provide a description of a record’s context, namely who created the records and for what purpose.

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)
"""
    )
)

total_agencies = get_total_results({"q": "category:Agency"})
display(
    HTML(f"<h3 class='alert alert-info'>There are {total_agencies:,} agencies.</h3>")
)


----

## Agencies

> The *agent entity* in PROV’s ACM represents a Victorian Government agency—an administrative unit which has or had responsibility for the provision of at least one aspect of government administration. This entity helps to provide a description of a record’s context, namely who created the records and for what purpose.

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)


In [111]:
display(Markdown("""

### Count of agencies by decade

This chart shows the number of agencies whose life dates overlap with each decade from 1830.

"""))



### Count of agencies by decade

This chart shows the number of agencies whose life dates overlap with each decade from 1830.



In [32]:
make_decade_chart(
    "category:Agency", title="Agencies by decade", y_title="number of agencies"
)

HBox(children=(Output(), Output()))

In [112]:
display(Markdown("""

### Count of functions by agency (top 20)

Agencies perform functions. This table shows which agencies are responsible for the most functions.

"""))



### Count of functions by agency (top 20)

Agencies perform functions. This table shows which agencies are responsible for the most functions.



In [41]:
agencies = []
for facet in get_facets(
    "related_entity_id",
    'category:relatedEntity AND relationship:"Primary responsible agency"',
    "total_functions",
)[:20]:
    agency = get_entity(
        "Agency",
        "identifier.PROV_ACM.id",
        facet["related_entity_id"].replace("VA", "VA "),
    )
    facet["agency_title"] = agency["title"]
    facet["agency_id"] = facet["related_entity_id"]
    agencies.append(facet)
df_agencies = pd.DataFrame(agencies)[["agency_id", "agency_title", "total_functions"]]
display(style_df(df_agencies, link_field="agency_id"))

agency_id,agency_title,total_functions
VA475,Chief Secretary's Department,69
VA538,Department of Crown Lands and Survey,34
VA669,Public Works Department (previously the Department of the Commissioner of Public Works),29
VA1039,Department of Premier and Cabinet,25
VA3970,Department of Human Services,25
VA5037,Department of Health and Human Services,25
VA3972,Department of Natural Resources and the Environment,24
VA856,Colonial Secretary's Office,24
VA430,Department of Property and Services,23
VA5251,"Department of Jobs, Precincts and Regions",23


In [114]:
display(Markdown("""

### Count of series by agency (top 20)

Agencies create and manage series. This chart shows the agencies that have created the most series.

"""))



### Count of series by agency (top 20)

Agencies create and manage series. This chart shows the agencies that have created the most series.



In [100]:
agencies = []
for facet in get_facets(
    "creating_agents.creating_agency_id", "category:Series", "total_series"
)[:20]:
    agency = get_entity(
        "Agency",
        "identifier.PROV_ACM.id",
        f"VA {facet['creating_agents.creating_agency_id']}",
    )
    facet["agency_title"] = agency["title"]
    facet["agency_id"] = f"VA{facet['creating_agents.creating_agency_id']}"
    agencies.append(facet)
df_agencies = pd.DataFrame(agencies)[["agency_id", "agency_title", "total_series"]]
display(style_df(df_agencies, link_field="agency_id"))

agency_id,agency_title,total_series
VA538,Department of Crown Lands and Survey,1254
VA2876,"Victorian Railways (also Victorian Railways Commissioners 1883-1973, Victorian Railways Board 1973-1983)",291
VA714,Education Department,255
VA865,Department of the Treasurer (also known as Treasury and Treasurer's Office),255
VA669,Public Works Department (previously the Department of the Commissioner of Public Works),190
VA2549,Supreme Court of Victoria,189
VA511,Melbourne (Town 1842-1847; City 1847-ct),185
VA475,Chief Secretary's Department,178
VA1002,"State Electricity Commission of Victoria I (previously known as the Electricity Commissioners, 1919 to 1921)",174
VA2984,Public Transport Corporation,122


In [62]:
display(
    Markdown(
        """
----

## Series
        
> *Record Series*: a group of records which are recorded or maintained by the same agency (or agencies) and which:
> - are in the same numerical, alphabetical, chronological or other identifiable sequence;
> - or result from the same accumulation or filing process.

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)
"""
    )
)

display(Markdown("### Count of series by field"))

## Series


> *Record Series*: a group of records which are recorded or maintained by the same agency (or agencies) and which:
> - are in the same numerical, alphabetical, chronological or other identifiable sequence;
> - or result from the same accumulation or filing process.

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)


### Counts by field

In [134]:
for field, note in series_fields.items():
    display(Markdown(f"#### {field.title().replace('_', ' ')}"))
    display(Markdown(note))
    display(make_grid(field, query="category:Series"))

#### Format

Indicates whether a series is in physical or digital format

HBox(children=(Output(), Output()))

#### Location

Indicates where the series is held – values are not exclusive, so an item can be `Online` and in `North Melbourne`

HBox(children=(Output(), Output()))

#### Rights Status

Indicates whether an series' contents and metadata are available for public access

HBox(children=(Output(), Output()))

In [None]:
display(Markdown("### Count of series by decade"))

display(
    Markdown(
        "This chart displays the number of series whose start and end dates overlap with each decade since 1830."
    )
)

In [44]:
make_decade_chart(
    "category:Series", title="Series by decade", y_title="number of series"
)

HBox(children=(Output(), Output()))

In [115]:
display(Markdown("""

### Count of items by series (top 20)

Series contain items. This table shows which series contain the most items.

"""))



### Count of items by series (top 20)

Series contain items. This table shows which series contain the most items.



In [53]:
digitised = []
for facet in get_series_facets("category:Item", "total_items")[:20]:
    series = get_series(facet["series_id"])
    facet["series_id"] = f"VPRS{facet['series_id']}"
    facet["series_title"] = series["title"]
    digitised.append(facet)
df_items = pd.DataFrame(digitised)[["series_id", "series_title", "total_items"]]
display(style_df(df_items, link_field="series_id"))

series_id,series_title,total_items
VPRS28,Probate and Administration Files,1430364
VPRS7591,Wills,943563
VPRS267,Civil Case Files,458736
VPRS24,Inquest Deposition Files,221147
VPRS17379,Probate and Administration Files (CourtView),218936
VPRS3183,Town Clerk's Correspondence Files II [MCC Series 120],200094
VPRS17865,Certified Plans of Subdivision - Lodged Plan (LP) series,140340
VPRS5357,Land Selection And Correspondence Files,137380
VPRS932,Trading Company Registration Files,128333
VPRS17075,Register Book of Crown Lease Titles,117202


In [63]:
display(
    Markdown(
        """
----

## Items

> *Record Item*: a discrete element [of] records managed within a ‘Series’. An Item represents a part of a recordkeeping system or a logical or convenient grouping of records. It may represent one record or multiple records such as a group of folios fastened together to form a file, a group of electronic files aggregated in a folder, or a single volume.

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)
"""
    )
)

## Items


> *Record Item*: a discrete element [of] records managed within a ‘Series’. An Item represents a part of a recordkeeping system or a logical or convenient grouping of records. It may represent one record or multiple records such as a group of folios fastened together to form a file, a group of electronic files aggregated in a folder, or a single volume.

&mdash; see the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model)


In [116]:
total_items = get_total_results({"q": "category:Item"})
total_series = get_total_results({"q": "category:Series"})
display(HTML(f"<h3 class='alert alert-info'>There are {total_items:,} items.</h3>"))

In [70]:
display(Markdown("### Count of items by field"))

### Counts by field

In [135]:
for field, note in item_fields.items():
    display(Markdown(f"#### {field.title().replace('_', ' ')}"))
    display(Markdown(note))
    display(make_grid(field, query="category:Item"))

#### Format

Indicates whether an item is in physical or digital format

HBox(children=(Output(), Output()))

#### Record Form

Describes the physical format of an item

HBox(children=(Output(), Output()))

#### Location

Indicates where the item is held – values are not exclusive, so an item can be `Online` and in `North Melbourne`

HBox(children=(Output(), Output()))

#### Rights Status

Indicates whether an items's contents and metadata are available for public access

HBox(children=(Output(), Output()))

In [117]:
display(Markdown("### Count of items by decade"))

display(
    Markdown(
        "This chart displays the number of items whose date range overlaps with each decade since 1830."
    )
)

### Count of items by decade

This chart displays the number of items whose date range overlaps with each decade since 1830.

In [51]:
make_decade_chart("category:Item", title="Items by decade", y_title="number of items")

HBox(children=(Output(), Output()))

In [68]:
display(Markdown("### Digitised items"))

### Digitised items

In [79]:
total_items = get_total_results({"q": "category:Item"})
digitised_items = get_total_results({"q": "iiif-manifest:[* TO *] AND category:Item"})
display(
    HTML(
        f"<h4 class='alert alert-info'>{digitised_items:,} items have been digitised ({digitised_items/total_items:.2%} of total items).</h4>"
    )
)

In [118]:
display(Markdown("#### Count of digitised items by decade"))

display(
    Markdown(
        "This chart displays the number of digitised items whose date range overlaps with each decade since 1830."
    )
)

#### Count of digitised items by decade

This chart displays the number of digitised items whose date range overlaps with each decade since 1830.

In [83]:
make_decade_chart(
    "iiif-manifest:[* TO *] AND category:Item", "Digitised items by decade"
)

HBox(children=(Output(), Output()))

In [119]:
display(Markdown("""

#### Count of digitised items by series (top 20)

This table shows which series contain the most digitised items.

"""))



#### Count of digitised items by series (top 20)

This table shows which series contain the most digitised items.



In [71]:
digitised = []
for facet in get_series_facets(
    "iiif-manifest:[* TO *] AND category:Item", "items_digitised"
)[:20]:
    series = get_series(facet["series_id"])
    total_items = count_items_in_series(facet["series_id"], "Item")
    percent = facet["items_digitised"] / total_items
    facet["series_id"] = f"VPRS{facet['series_id']}"
    facet["series_title"] = series["title"]
    facet["total_items"] = total_items
    facet["percent_digitised"] = f"{percent:.2%}"
    digitised.append(facet)
df_digitised_items = pd.DataFrame(digitised)[
    ["series_id", "series_title", "total_items", "items_digitised", "percent_digitised"]
]
display(style_df(df_digitised_items, link_field="series_id"))

series_id,series_title,total_items,items_digitised,percent_digitised
VPRS7591,Wills,943563,351756,37.28%
VPRS28,Probate and Administration Files,1430364,305314,21.35%
VPRS24,Inquest Deposition Files,221147,125164,56.60%
VPRS283,"Divorce Case Files, Melbourne",35072,19563,55.78%
VPRS19,Inward Registered Correspondence,20928,18884,90.23%
VPRS14517,Negatives of Photographs [Publications Branch],18283,18184,99.46%
VPRS12800,Photographic Collection: Railway Negatives: Alpha-numeric Systems,12332,12102,98.13%
VPRS12903,Photographic Negatives: Railways: Box Systems,11529,11093,96.22%
VPRS8168,Historic Plan Collection,11575,10932,94.44%
VPRS3183,Town Clerk's Correspondence Files II [MCC Series 120],200094,6461,3.23%


In [74]:
display(Markdown("### Digital items"))
display(
    Markdown(
        """
Items that have `format` set to `Digital` seem to be mostly born digital records – in other words, they were created as digital objects, rather than being physical records that were later digitised by scanning or imaging. These 'digital' records don't have IIIF manifests, and are encapsulated within XML files using the [VERS standard](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/vers-version-3). However, it seems that there are also some digitised images amongst the digital items.
"""
    )
)

total_born_digital = get_total_results({"q": "format:Digital AND category:Item"})
display(
    HTML(
        f"<h4 class='alert alert-info'>There are {total_born_digital:,} digital items.</h4>"
    )
)

### Digital items


Items that have `format` set to `Digital` seem to be mostly born digital records – in other words, they were created as digital objects, rather than being physical records that were later digitised by scanning or imaging. These 'digital' records don't have IIIF manifests, and are encapsulated within XML files using the [VERS standard](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/vers-version-3). However, it seems that there are also some digitised images amongst the digital items.


In [120]:
display(Markdown("#### Count of digital items by decade"))

display(
    Markdown(
        "This chart displays the number of digital items whose date range overlaps with each decade since 1830."
    )
)

#### Count of digital items by decade

This chart displays the number of digital items whose date range overlaps with each decade since 1830.

In [67]:
make_decade_chart("format:Digital AND category:Item", "Digital items by decade")

HBox(children=(Output(), Output()))

In [None]:
display(Markdown("""

#### Count of digital items by series (top 20)

This table shows which series contain the most digital items.

"""))

In [122]:
digitised = []
for facet in get_series_facets("format:Digital AND category:Item", "digital_items")[
    :20
]:
    try:
        series = get_series(facet["series_id"])
    except IndexError:
        facet["series_title"] = "[not found]"
    else:
        facet["series_title"] = series["title"]
    total_items = count_items_in_series(facet["series_id"], "Item")
    percent = facet["digital_items"] / total_items
    facet["series_id"] = f"VPRS{facet['series_id']}"
    facet["total_items"] = total_items
    facet["percent_digital"] = f"{percent:.2%}"
    digitised.append(facet)
df_digitised_items = pd.DataFrame(digitised)[
    ["series_id", "series_title", "total_items", "digital_items", "percent_digital"]
]
display(style_df(df_digitised_items, link_field="series_id"))

https://api.prov.vic.gov.au/search/query?facet=true&facet.field=series_id&q=format%3ADigital+AND+category%3AItem&rows=0


series_id,series_title,total_items,digital_items,percent_digital
VPRS15687,[not found],22042,21846,99.11%
VPRS16497,Exhibits,18134,18134,100.00%
VPRS17684,[not found],6832,6716,98.30%
VPRS16171,Regional Land Office Parish and Township Plans Digitised Reference Set,5091,5091,100.00%
VPRS18741,[not found],2462,2462,100.00%
VPRS16493,[not found],1686,1686,100.00%
VPRS18241,Submissions [Published],1665,1665,100.00%
VPRS16681,2009 Victorian Bushfires Fire Recovery Photo Point Library,1263,1263,100.00%
VPRS18244,Witness Statements [Published],1145,1145,100.00%
VPRS18552,Performance and Assurance Audit Reports to Parliament,422,422,100.00%


In [80]:
display(
    Markdown(
        """
----

## Images
        
There are a large number of results that have `category` set to `Image`. These 'image' records don't seem to be defined in the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model). The 'image' records describe individual pages from digitised items, attaching additional metadata such as the name of a person mentioned on that page. This means there can be multiple image records for a single page if, for example, the page includes a list of names.
"""
    )
)

total_images = get_total_results({"q": "category:Image"})
display(HTML(f"<h4 class='alert alert-info'>There are {total_images:,} images.</h4>"))

## Images


There are a large number of results that have `category` set to `Image`. These 'image' records don't seem to be defined in the [PROV Archival Control Model](https://prov.vic.gov.au/recordkeeping-government/a-z-topics/archival-control-model). The 'image' records describe individual pages from digitised items, attaching additional metadata such as the name of a person mentioned on that page. This means there can be multiple image records for a single page if, for example, the page includes a list of names.


In [None]:
display(Markdown("""

#### Count of images by series (top 20)

This table shows which series contain the most images.

"""))

In [87]:
digitised = []
for facet in get_series_facets("category:Image", "total_images")[:20]:
    series = get_series(facet["series_id"])
    digitised_items = get_total_results(
        {
            "q": f"category:Item AND series_id:{facet['series_id']} AND iiif-manifest:[* TO *]"
        }
    )
    facet["series_id"] = f"VPRS{facet['series_id']}"
    facet["series_title"] = series["title"]
    facet["digitised_items"] = digitised_items
    digitised.append(facet)
df_digitised_images = pd.DataFrame(digitised)[
    ["series_id", "series_title", "digitised_items", "total_images"]
]
display(style_df(df_digitised_images, link_field="series_id"))

series_id,series_title,digitised_items,total_images
VPRS948,"Outward Passengers to Interstate, U.K. and Foreign Ports (Refer to Microfilm Copy VPRS 3506)",246,1661194
VPRS947,Inward Overseas Passenger Lists (see Microfiche Copies: VPRS 7666 United Kingdom Ports; VPRS 7667 Foreign Ports; VPRS 13439 New Zealand Ports),267,1608518
VPRS14,"Register of Assisted Immigrants from the United Kingdom [refer to microform copy, VPRS 3502]",20,173167
VPRS4527,Ward Register (known as Children's Registers 1864 - 1887),156,55431
VPRS13579,Teacher Record Books,153,49106
VPRS515,Central Register of Male Prisoners,100,44285
VPRS5714,"Land Selection Files, Section 12 Closer Settlement Act 1938 [including obsolete and top numbered Closer Settlement and WW1 Discharged Soldier Settlement files]",5823,7721
VPRS8168,Historic Plan Collection,10932,7182
VPRS516,Central Register of Female Prisoners,15,6782
VPRS5357,Land Selection And Correspondence Files,491,322


In [125]:
display(Markdown("""

----

Created by [Tim Sherratt](https://timsherratt.au) for the [GLAM Workbench](https://glam-workbench.net).

Data from the PROV API is available under a CC-BY-NC licence.
"""))



----

Created by [Tim Sherratt](https://timsherratt.au) for the [GLAM Workbench](https://glam-workbench.net).

Data from the PROV API is available under a CC-BY-NC licence.
