In [1]:
test_url = "https://www.nb.no/oai/repository?verb=ListIdentifiers&from=2023-09-01&until=2023-09-07&set=norart&metadataPrefix=oai_dc"

In [35]:
import xml.etree.ElementTree as ET
import requests
from datetime import datetime, timedelta

class OAIHarvester:
    def __init__(self, base_url):
        self.base_url = base_url

    def make_request(self, verb, **kwargs):
        params = {'verb': verb}
        params.update(kwargs)
        response = requests.get(self.base_url, params=params)
        print(response.text)
        return response

    def generate_date_ranges(self, start_year, end_year):
        start_date = datetime(start_year, 1, 1)
        end_date = datetime(end_year, 12, 31)

        while start_date < end_date:
            end_of_month = start_date + timedelta(days=32)
            end_of_month = end_of_month.replace(day=1) - timedelta(days=1)
            yield (start_date, min(end_of_month, end_date))
            start_date = end_of_month + timedelta(days=1)

    def list_records(self, metadata_prefix, start_year, end_year, **kwargs):
        all_records = []
        
        for start_date, end_date in self.generate_date_ranges(start_year, end_year):
            resumption_token = None
            while True:
                if resumption_token:
                    response = self.make_request('ListRecords', resumptionToken=resumption_token)
                else:
                    response = self.make_request('ListRecords', metadataPrefix=metadata_prefix, from_=start_date.strftime('%Y-%m-%d'), until=end_date.strftime('%Y-%m-%d'), **kwargs)

                try:
                    root = ET.fromstring(response.content)
                    all_records.extend(root.findall('.//record'))
                    
                    resumption_token_element = root.find('.//resumptionToken')
                    if resumption_token_element is not None and resumption_token_element.text:
                        resumption_token = resumption_token_element.text
                    else:
                        break
                except ET.ParseError:
                    break

        return all_records

# Usage example
# harvester = OAIHarvester('http://example.com/oai')
# records = harvester.list_records('oai_dc', 2020, 2022)
# for record in records:
#     print(record)


In [60]:
import xml.etree.ElementTree as ET
import requests
from datetime import datetime, timedelta

class OAIHarvester:
    def __init__(self, base_url):
        self.base_url = base_url

    def make_request(self, verb, **kwargs):
        params = {'verb': verb}
        params.update(kwargs)
        response = requests.get(self.base_url, params=params)
        return response


    def list_records(self, metadata_prefix,**kwargs):
        #all_records = []
        #print("list_records")
  
        resumption_token = None
        count = 1
        while True:
            if resumption_token:
                response = self.make_request('ListRecords', resumptionToken=resumption_token)
            else:
                response = self.make_request('ListRecords', metadataPrefix=metadata_prefix, **kwargs)

            try:
                root = ET.fromstring(response.content)
                # all_records.extend(root.findall('.//{http://www.openarchives.org/OAI/2.0/}record'))
                
                file = "xml/page"+str(count)+".xml"
                root.write(file)
                count += 1
                
                resumption_token_element = root.find('.//{http://www.openarchives.org/OAI/2.0/}resumptionToken')
                if resumption_token_element is not None and resumption_token_element.text:
                    resumption_token = resumption_token_element.text
                    #print("resumption_token", resumption_token)
                else:
                    break
            except ET.ParseError:
                break

       # return all_records

# Usage example
# harvester = OAIHarvester('http://example.com/oai')
# records = harvester.list_records('oai_dc', 2020, 2022)
# for record in records:
#     print(record)


In [58]:
verb = 'ListRecords'
kwargs = {'metadataPrefix': 'marc21', 'set': 'norart'}
test_url = "https://bibsys.alma.exlibrisgroup.com/view/oai/47BIBSYS_NETWORK/request"

params = {'verb': verb}
params.update(kwargs)
response = requests.get(test_url, params=params)

In [47]:
harvester = OAIHarvester(test_url)
# records = harvester.list_records('marc21', 2020, 2022, set='norart')
res = harvester.make_request('ListRecords', metadataPrefix='marc21', set='norart')

In [61]:
harvester = OAIHarvester(test_url)
records = harvester.list_records('marc21', 
                                 set='norart')

list_records
resumption_token all@all@norart@marc21@15591023080002201
resumption_token all@all@norart@marc21@15591027810002201
resumption_token all@all@norart@marc21@15591029120002201
resumption_token all@all@norart@marc21@15591030900002201
resumption_token all@all@norart@marc21@15591037130002201
resumption_token all@all@norart@marc21@15591038130002201
resumption_token all@all@norart@marc21@15591041240002201
resumption_token all@all@norart@marc21@15591042930002201
resumption_token all@all@norart@marc21@15591044160002201
resumption_token all@all@norart@marc21@15591046580002201
resumption_token all@all@norart@marc21@20399462480002201
resumption_token all@all@norart@marc21@45133968800002201
resumption_token all@all@norart@marc21@57385540450002201
resumption_token all@all@norart@marc21@57738972010002201
resumption_token all@all@norart@marc21@58658307590002201
resumption_token all@all@norart@marc21@58715770070002201
resumption_token all@all@norart@marc21@58758725610002201
resumption_token a

KeyboardInterrupt: 

In [50]:
root = ET.fromstring(response.content)
              #  all_records.extend(root.findall('.//record'))

In [56]:
root.findall('.//{http://www.openarchives.org/OAI/2.0/}resumptionToken')

[<Element '{http://www.openarchives.org/OAI/2.0/}resumptionToken' at 0x7f61de344180>]

In [39]:
response.text

'<?xml version="1.0" encoding="UTF-8"?><OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd" xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n  <responseDate>2023-12-13T14:38:10Z</responseDate>\n  <request verb="ListRecords" metadataPrefix="marc21" set="norart">https://bibsys-network.alma.exlibrisgroup.com/view/oai/47BIBSYS_NETWORK/request</request>\n<ListRecords><record><header><identifier>oai:urm_publish:991112256804702201</identifier><datestamp>2022-12-15T03:10:00Z</datestamp><setSpec>norart</setSpec><setSpec>oai_share_vde</setSpec><setSpec>oai_komplett</setSpec><setSpec>oai_komplett_bib</setSpec><setSpec>solstad</setSpec><setSpec>nasjonalbibliografien</setSpec></header><metadata>\n<record xmlns="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/sche

In [None]:
requests.get

In [14]:
res

<Response [500]>

In [9]:
res.text

'<html><head><title>Pivotal tc Runtime 3.1.2.RELEASE/7.0.64.B.RELEASE - Error report</title><style><!--H1 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:22px;} H2 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:16px;} H3 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:14px;} BODY {font-family:Tahoma,Arial,sans-serif;color:black;background-color:white;} B {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;} P {font-family:Tahoma,Arial,sans-serif;background:white;color:black;font-size:12px;}A {color : black;}A.name {color : black;}HR {color : #525D76;}--></style> </head><body><h1>HTTP Status 500 - java.lang.RuntimeException: Request cannot be executed; I/O reactor status: STOPPED</h1><HR size="1" noshade="noshade"><p><b>type</b> Status report</p><p><b>message</b> <u>java.lang.RuntimeException: Request cannot be executed; I/O reactor status: STOPPED</u></p><p

In [15]:
test2_url = "https://bibsys.alma.exlibrisgroup.com/view/oai/47BIBSYS_NETWORK/request?verb=ListSets"

res = requests.get(test2_url)

res

<Response [200]>

In [18]:
test_url = "https://bibsys.alma.exlibrisgroup.com/view/oai/47BIBSYS_NETWORK/request?verb=ListSets"

requests.get(test_url)

<Response [200]>

In [17]:
requests.get("https://www.nb.no")

<Response [200]>