In [1]:
test_url = "https://www.nb.no/oai/repository?verb=ListIdentifiers&from=2023-09-01&until=2023-09-07&set=norart&metadataPrefix=oai_dc"

In [2]:
import xml.etree.ElementTree as ET
import requests
from datetime import datetime, timedelta

class OAIHarvester:
    def __init__(self, base_url):
        self.base_url = base_url

    def make_request(self, verb, **kwargs):
        params = {'verb': verb}
        params.update(kwargs)
        response = requests.get(self.base_url, params=params)
        return response

    def generate_date_ranges(self, start_year, end_year):
        start_date = datetime(start_year, 1, 1)
        end_date = datetime(end_year, 12, 31)

        while start_date < end_date:
            end_of_month = start_date + timedelta(days=32)
            end_of_month = end_of_month.replace(day=1) - timedelta(days=1)
            yield (start_date, min(end_of_month, end_date))
            start_date = end_of_month + timedelta(days=1)

    def list_records(self, metadata_prefix, start_year, end_year, **kwargs):
        all_records = []
        
        for start_date, end_date in self.generate_date_ranges(start_year, end_year):
            resumption_token = None
            while True:
                if resumption_token:
                    response = self.make_request('ListRecords', resumptionToken=resumption_token)
                else:
                    response = self.make_request('ListRecords', metadataPrefix=metadata_prefix, from_=start_date.strftime('%Y-%m-%d'), until=end_date.strftime('%Y-%m-%d'), **kwargs)

                try:
                    root = ET.fromstring(response.content)
                    all_records.extend(root.findall('.//record'))
                    
                    resumption_token_element = root.find('.//resumptionToken')
                    if resumption_token_element is not None and resumption_token_element.text:
                        resumption_token = resumption_token_element.text
                    else:
                        break
                except ET.ParseError:
                    break

        return all_records

# Usage example
harvester = OAIHarvester('http://example.com/oai')
records = harvester.list_records('oai_dc', 2020, 2022)
for record in records:
    print(record)


In [13]:
test_url = "https://www.nb.no/oai/repository?verb=ListIdentifiers&from=2023-08-01&until=2023-08-07&set=norart&metadataPrefix=mods"

res = requests.get(test_url)

In [None]:
requests.get

In [14]:
res

<Response [500]>

In [9]:
res.text

'<html><head><title>Pivotal tc Runtime 3.1.2.RELEASE/7.0.64.B.RELEASE - Error report</title><style><!--H1 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:22px;} H2 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:16px;} H3 {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;font-size:14px;} BODY {font-family:Tahoma,Arial,sans-serif;color:black;background-color:white;} B {font-family:Tahoma,Arial,sans-serif;color:white;background-color:#525D76;} P {font-family:Tahoma,Arial,sans-serif;background:white;color:black;font-size:12px;}A {color : black;}A.name {color : black;}HR {color : #525D76;}--></style> </head><body><h1>HTTP Status 500 - java.lang.RuntimeException: Request cannot be executed; I/O reactor status: STOPPED</h1><HR size="1" noshade="noshade"><p><b>type</b> Status report</p><p><b>message</b> <u>java.lang.RuntimeException: Request cannot be executed; I/O reactor status: STOPPED</u></p><p