### Update bib records via Alma API

update several bib records through the alma api, replace the url in field 856$u and create field 024 with ark id.

The script reads the input file named 'SoSaGraphics_ARK_MMSIDs.json' (generated by other script) and loops through the list of ID-URL pairs. It downloads the bib record in marcxml, finds the 856 field, updates the url and creates an identifier field (024) and uploads the new bib record.
This script assumes there is only one field 856 $$u.

To run this script, add the following:

Write your Alma API key in a file named config.py in the same folder as your script. The file should look like this:

(This file is under .gitignore, the API keys are not publicly shared)


In [44]:
# api_key for SLSP NZ Sandbox or production

api_key_nz = "NZ_API_KEY"



This script assumes that field 856 contains the old links to the Sosa graphics (edoc.zhbluzern.ch) and can be overwritten (which is true for these ca. 2300 titles, but may have to be checked for another purpose). 

In [15]:
import requests
import urllib.request
import json
import xml.etree.ElementTree as ET
import re
import config

# Set your Alma API key, base API endpoint, input file name
api_key = config.api_key_nz
base_url = 'https://api-eu.hosted.exlibrisgroup.com/almaws/v1'
#filename = 'SoSaGraphics_ARK_MMSIDs_20230310.json'
filename = 'sosatest01.json'
#filename = 'sosatest10.json'

# Opening JSON file and return JSON object as a dictionary
f = open(filename)
id_url_list = json.load(f)
# debug: print(id_url_list)

# Loop through the list and update each bib record
for bib in id_url_list:
    nzid = bib['networkID']
    #print(nzid)
    ark = "https://n2t.net/"+bib['ARK']
    # Construct the API URL to get the bib record in MARCXML format
    bib_url = f'{base_url}/bibs/{nzid}?view=full&expand=None&apikey={api_key}'

    # Send the request to get the bib record
    response = requests.get(bib_url)
    
    # create backup
    backupfile = "backup-xml/" + nzid + ".xml"
    with open(backupfile, "w") as f:
        f.write(response.text)

    # Parse the MARCXML data from the response content
    marcxml = ET.fromstring(response.content)  
    


    # Find the 856 field you want to replace
    for field in marcxml.findall('.//datafield[@tag="856"]'):
        # Find the subfield with the URL
        url_subfield = field.find('.//subfield[@code="u"]')
        if url_subfield is not None:
            # Replace the URL with the new value
            url_subfield.text = ark
            # debug: print(url_subfield.text)


    # Create a new 024 field with subfields a and 2
    # First check if there is a Field 024 matching the given ARK to avoid duplicate entries
    marc_024 = marcxml.findall(".//datafield[@tag='024']/subfield[@code='a']")
    match_024 = False
    for field in marc_024:
        if field.text == bib["ARK"]:
            print("ark match")
            match_024 = True
    if match_024 == False:
        record = marcxml.find("record")
        new_024 = ET.Element("datafield", attrib={"ind1": "7", "ind2": " ", "tag": "024"})
        new_024a = ET.Element("subfield", attrib={"code": "a"})
        new_024a.text = bib['ARK']
        new_0242 = ET.Element("subfield", attrib={"code": "2"})
        new_0242.text = 'ark'
        new_024.append(new_024a)
        new_024.append(new_0242)
        record.append(new_024)
        #debug: print(ET.tostring(new_024))
    
    # Create a new license field 
    # 540 $aPublic Domain Mark 1.0$uhttp://creativecommons.org/publicdomain/mark/1.0/$2cc
    marc_540 = marcxml.findall(".//datafield[@tag='540']/subfield[@code='a']")
    match_540 = False
    for field in marc_540:
        if field.text == "Public Domain Mark 1.0":
            print("license match")
            match_540 = True
    if match_540 == False:
        record = marcxml.find("record")
        new_540 = ET.Element("datafield", attrib={"ind1": " ", "ind2": " ", "tag": "540"})
        new_540a = ET.Element("subfield", attrib={"code": "a"})
        new_540a.text = 'Public Domain Mark 1.0'
        new_540u = ET.Element("subfield", attrib={"code": "u"})
        new_540u.text = 'https://creativecommons.org/publicdomain/mark/1.0/'        
        new_5402 = ET.Element("subfield", attrib={"code": "2"})
        new_5402.text = 'cc'
        new_540.append(new_540a)
        new_540.append(new_540u)
        new_540.append(new_5402)
        record.append(new_540)
        #debug: print(ET.tostring(new_540))

    # Create the restriction on access field
    # 506 $$a Open Access $$f Unrestricted online access $$q DE-14 $$u http://purl.org/coar/access_right/c_abf2 $$2 star
    marc_506 = marcxml.findall(".//datafield[@tag='506']/subfield[@code='a']")
    match_506 = False
    for field in marc_506:
        if field.text == "Open Access":
            print("restriction on access match")
            match_506 = True
    if match_506 == False:
        record = marcxml.find("record")
        new_506 = ET.Element("datafield", attrib={"ind1": " ", "ind2": " ", "tag": "506"})
        new_506a = ET.Element("subfield", attrib={"code": "a"})
        new_506a.text = 'Open Access'
        new_506f = ET.Element("subfield", attrib={"code": "f"})
        new_506f.text = 'Unrestricted online access'
        new_506u = ET.Element("subfield", attrib={"code": "u"})
        new_506u.text = 'http://purl.org/coar/access_right/c_abf2'        
        new_5062 = ET.Element("subfield", attrib={"code": "2"})
        new_5062.text = 'star'
        new_506.append(new_506a)
        new_506.append(new_506u)
        new_506.append(new_5062)
        record.append(new_506)
        #debug: print(ET.tostring(new_540))


    record = marcxml.find("record")
    record[:] = sorted(record, key=lambda field_or_contr: field_or_contr.get('tag', '000'))

    # Convert the updated MARCXML back to a string
    updated_marcxml = ET.tostring(marcxml, encoding='utf-8')
    #debug: 
    

    # Construct the API URL to update the bib record
    update_url = f'{base_url}/bibs/{nzid}?apikey={api_key}'

    # Send the request to update the bib record with the updated MARCXML
    response = requests.put(update_url, data=updated_marcxml, headers={'Content-Type': 'application/xml'})

    # Print the response status code to verify the update was successful
    print(f'Bib record {nzid} updated with URL {ark} - Response code: {response.status_code}')
    #debug: break

ark match
license match
restriction on access match
Bib record 991062340789705501 updated with URL https://n2t.net/ark:/63274/zhb1rm0d - Response code: 200
