From afc5c85b4319c3ad952ef5d12aa6b74abe0660e3 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Thu, 20 Nov 2025 15:29:27 +0100 Subject: [PATCH 01/10] created 2 commands: generate_submission_template.py and upload_submission.py --- generate_submission_template.py | 64 ++++++++++ rest_client/submission_client.py | 203 +++++++++++++++++++++++++++++++ upload_submission.py | 84 +++++++++++++ 3 files changed, 351 insertions(+) create mode 100644 generate_submission_template.py create mode 100644 rest_client/submission_client.py create mode 100644 upload_submission.py diff --git a/generate_submission_template.py b/generate_submission_template.py new file mode 100644 index 0000000..1a77d47 --- /dev/null +++ b/generate_submission_template.py @@ -0,0 +1,64 @@ +# This software is licenced under the BSD 3-Clause licence +# available at https://opensource.org/licenses/BSD-3-Clause +# and described in the LICENCE file in the root of this project + +""" +Python 3 application for Submission Template Generation, using the dspace.py API client library. +""" +import argparse +import os + +from rest_client.submission_client import SubmissionClient + +# Example system variables needed for authentication and submission template generation +# (all of these variables can be overwritten with command line arguments) +# AUTHORIZATION_TOKEN= +# DSPACE_API_ENDPOINT= +# SUBMISSION_DEFINITION_NAME= + +# Parse command-line arguments +parser = argparse.ArgumentParser(description="Command-line arguments") +parser.add_argument("filename", help="CSV template file name") +parser.add_argument("-t", "--token", help="Authorization token (optional), " + "or use the AUTHORIZATION_TOKEN env variable") +parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " + "or use the DSPACE_API_ENDPOINT env variable") +parser.add_argument("-s", "--submission-definition-name", help="Submission Definition Name(optional), " + "or use the SUBMISSION_DEFINITION_NAME env variable") +args = parser.parse_args() + +AUTHORIZATION_TOKEN = None +if args.token: + AUTHORIZATION_TOKEN = args.token +elif 'AUTHORIZATION_TOKEN' in os.environ: + AUTHORIZATION_TOKEN = os.environ['AUTHORIZATION_TOKEN'] + +if AUTHORIZATION_TOKEN is None: + print('No authorization token provided!') + exit(1) + +SUBMISSION_DEFINITION_NAME = 'traditional' +if args.submission_definition_name: + SUBMISSION_DEFINITION_NAME = args.submission_definition_name +elif 'SUBMISSION_DEFINITION_NAME' in os.environ: + SUBMISSION_DEFINITION_NAME = os.environ['SUBMISSION_DEFINITION_NAME'] + +API_ENDPOINT = 'http://localhost:8080/server/api' +if args.dspace_api_endpoint: + API_ENDPOINT = args.dspace_api_endpoint +elif 'DSPACE_API_ENDPOINT' in os.environ: + API_ENDPOINT = os.environ['DSPACE_API_ENDPOINT'] + +FILE_TYPE = 'csv' + +d = SubmissionClient(api_endpoint=API_ENDPOINT, authorization_token=AUTHORIZATION_TOKEN) + +# Authenticate against the DSpace client +authenticated = d.authenticate() +if not authenticated: + print('Error logging in! Giving up.') + exit(1) + +# for now, only CSV templates are generated +if FILE_TYPE == 'csv': + d.generateCsvTemplate(args.filename, SUBMISSION_DEFINITION_NAME) diff --git a/rest_client/submission_client.py b/rest_client/submission_client.py new file mode 100644 index 0000000..991b415 --- /dev/null +++ b/rest_client/submission_client.py @@ -0,0 +1,203 @@ +from dspace_rest_client.client import DSpaceClient +from enum import Enum +import logging +import csv +from requests import Response + +__all__ = ['SubmissionClient'] + +logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) +_logger = logging.getLogger('clarin.dspace') + +API_ENDPOINT = '' +AUTHORIZATION_TOKEN = '' + +class PatchOperation(Enum): + ADD = 'add' + REMOVE = 'remove' + REPLACE = 'replace' + MOVE = 'move' + +class SubmissionClient: + def __init__(self, api_endpoint = API_ENDPOINT, authorization_token = AUTHORIZATION_TOKEN): + self.authorization_token = authorization_token + self.api_endpoint = api_endpoint + self.dspaceClient = DSpaceClient(api_endpoint=self.api_endpoint, username = 'CLARIN_DSPACE_USER') + self.dspaceClient.auth_request_headers["Authorization"] = "Bearer " + self.authorization_token + self.valid_operations = [member.value for member in PatchOperation] + + def authenticate(self, retry=False): + if self.authorization_token == '': + print('No authorization token provided!') + return False + return self.dspaceClient.authenticate() + + def create_community(self, parent, data): + return self.dspaceClient.create_community(parent, data) + + def get_item(self, uuid): + return self.dspaceClient.get_item(uuid) + + def create_submission(self, parent): + url = f'{self.api_endpoint}/submission/workspaceitems' + if not parent: + _logger.error('Need a parent UUID!') + return None + params = {'owningCollection': parent} + + r = self.dspaceClient.api_post(url, params, None) + if r.status_code == 201: + # 201 Created - success! + _logger.info(f'New submission with id {r.json()["id"]} created successfully!') + else: + _logger.error(f'create operation failed: {r.status_code}: {r.text} ({url})') + return r + + def create_submission_from_csv(self, parent, csv_file_path): + payload = self.parse_submission_payload_csv(csv_file_path) + if len(payload) > 0: + return self.create_submission_from_payload(parent, payload) + else: + the_response = Response() + the_response.status_code = 400 + the_response._content = b'{ "error" : 400, "message" : "No metadata found in csv file" }' + the_response.url = self.api_endpoint + '/submission/workspaceitems?owningCollection=' + parent + return the_response + + def create_submission_from_payload(self, parent, payload): + create_response = self.create_submission(parent) + if create_response.status_code == 201: + workspace_item_id = create_response.json()['id'] + patch_response = self.patch_metadata(workspace_item_id, payload) + if patch_response.status_code == 200: + patch_response.status_code = 201 + return patch_response + + return create_response + + def patch_metadata(self, workspace_item_id, data): + url = f'{self.api_endpoint}/submission/workspaceitems/{workspace_item_id}' + if not data: + _logger.error('No data provided for patch operation!') + return None + if data.__class__ != list or len(data) == 0: + _logger.error('Input data should be in the form of the list of operations') + return None + + for operation in data: + print(operation) + path = operation['path'] if 'path' in operation else None + if not path: + _logger.error('Need valid path eg. /withdrawn or /metadata/dc.title/0') + return None + op = operation['op'] if 'op' in operation else None + value = operation['value'] if 'value' in operation else None + if op not in self.valid_operations: + _logger.error('Invalid operation name: {}'.format(op)) + return None + if value is None and op != PatchOperation.REMOVE.value: + # missing value required for add/replace/move operations + _logger.error('Missing required "value" argument for add/replace/move operations') + return None + if op == PatchOperation.REPLACE.value and not isinstance(value, dict): + # value should be object in replace operation + _logger.error('Invalid value format for replace operation - should be object') + return None + if op == PatchOperation.ADD.value and data.__class__ != list: + # value should be list in add operation + _logger.error('Invalid value format for add operation - should be list') + return None + + # perform patch request + r = self.dspaceClient.session.patch(url, json = data, headers=self.dspaceClient.request_headers) + self.dspaceClient.update_token(r) + + if r.status_code == 200: + # 200 Success + _logger.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') + else: + _logger.error(r.text) + # Return the raw API response + return r + + def parse_submission_payload_csv(self, file_path): + operations = [] + with open(file_path, 'r') as file: + reader = csv.reader(file) + section_path = '' + operation_map = {} + for row in reader: + print(row) + if row[0] == '__section__': + section_path = '/sections/' + row[1] + elif len(row) > 1 and row[1] is not None and row[1] != '': + path = section_path + '/' + row[0] + if operation_map.get(path) is None: + value = [] + for num in range(1, len(row)): + value.append({ + 'value': row[num] + }) + operation = { + 'op': 'add', + 'path': path, + 'value': value + } + operation_map[path] = operation + else: + operation = operation_map.get(path) + for num in range(1, len(row)): + operation['value'].append({ + 'value': row[num] + }) + + for key in operation_map: + operations.append(operation_map[key]) + print(f'Submission payload:\n{operations}') + return operations + + def get_submission_form_names(self, submission_definition_id): + url = f'{self.api_endpoint}/config/submissiondefinitions/{submission_definition_id}?embed=sections' + r = self.dspaceClient.session.get(url, headers=self.dspaceClient.request_headers) + if r is not None and r.status_code == 200: + _logger.info(f'successful retrieval of submission definition {submission_definition_id}') + form_names = [] + sections = r.json().get('_embedded', {}).get('sections', {}).get('_embedded', {}).get('sections', []) + for section in sections: + if section.get('sectionType') == 'submission-form': + form_names.append(section.get('id')) + return form_names + + return None + + def generateCsvTemplate(self, csv_file_name, submission_definition_name): + submission_form_names = self.get_submission_form_names(submission_definition_name) + if submission_form_names is not None and len(submission_form_names) > 0: + csv_lines = [] + for form_name in submission_form_names: + url = f'{self.api_endpoint}/config/submissionforms/{form_name}' + r = self.dspaceClient.session.get(url, headers=self.dspaceClient.request_headers) + if r is not None and r.status_code == 200: + _logger.info(f'successful retrieval of submission form {form_name}') + rows = r.json().get('rows', []) + if len(rows) > 0: + csv_lines.append(['__section__', form_name]) + for row in rows: + fields = row.get('fields', []) + if len(fields) > 0: + for field in fields: + selectableMetadata = field.get("selectableMetadata", []) + if len(selectableMetadata) > 0: + metadata_field = selectableMetadata[0]["metadata"] + if metadata_field is not None: + csv_lines.append([metadata_field,'']) + + if len(csv_lines) > 0: + with open(csv_file_name, 'w', newline='') as file: + writer = csv.writer(file) + writer.writerows(csv_lines) + + _logger.info(f'CSV template for submission definition: "{submission_definition_name}" ' + f'written to: "{csv_file_name}"') + else: + _logger.error(f'No submission forms found for submission definition name: "{submission_definition_name}"') \ No newline at end of file diff --git a/upload_submission.py b/upload_submission.py new file mode 100644 index 0000000..f2ac15a --- /dev/null +++ b/upload_submission.py @@ -0,0 +1,84 @@ +# This software is licenced under the BSD 3-Clause licence +# available at https://opensource.org/licenses/BSD-3-Clause +# and described in the LICENCE file in the root of this project + +""" +Python 3 application for Submission Upload, using the dspace.py API client library. +""" +import argparse +import os + +from rest_client.submission_client import SubmissionClient + +# Example system variables needed for authentication and submission upload +# (all of these variables can be overwritten with command line arguments) +# AUTHORIZATION_TOKEN= +# DSPACE_API_ENDPOINT= +# DSPACE_COLLECTION_ID= + +# Parse command-line arguments +parser = argparse.ArgumentParser(description="Command-line arguments") +parser.add_argument("filename", help="CSV file name") +parser.add_argument("-t", "--token", help="Authorization token (optional), " + "or use the AUTHORIZATION_TOKEN env variable") +parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " + "or use the DSPACE_API_ENDPOINT env variable") +parser.add_argument("-c", "--collection-id", help="DSpace Collection ID (optional), " + "or use the DSPACE_COLLECTION_ID env variable") +args = parser.parse_args() + +if not os.path.exists(args.filename): + print(f'File \"{args.filename}\" does not exist!') + exit(1) + +AUTHORIZATION_TOKEN = None +if args.token: + AUTHORIZATION_TOKEN = args.token +elif 'AUTHORIZATION_TOKEN' in os.environ: + AUTHORIZATION_TOKEN = os.environ['AUTHORIZATION_TOKEN'] + +if AUTHORIZATION_TOKEN is None: + print('No authorization token provided!') + exit(1) + +API_ENDPOINT = 'http://localhost:8080/server/api' +if args.dspace_api_endpoint: + API_ENDPOINT = args.dspace_api_endpoint +elif 'DSPACE_API_ENDPOINT' in os.environ: + API_ENDPOINT = os.environ['DSPACE_API_ENDPOINT'] + +DSPACE_COLLECTION_ID = None +if args.collection_id: + DSPACE_COLLECTION_ID = args.collection_id +elif 'DSPACE_COLLECTION_ID' in os.environ: + DSPACE_COLLECTION_ID = os.environ['DSPACE_COLLECTION_ID'] + +if DSPACE_COLLECTION_ID is None: + print('No DSpace collection id provided!') + exit(1) + +FILE_TYPE = 'csv' + +d = SubmissionClient(api_endpoint=API_ENDPOINT, authorization_token=AUTHORIZATION_TOKEN) + +# Authenticate against the DSpace client +authenticated = d.authenticate() +if not authenticated: + print('Error logging in! Giving up.') + exit(1) + +# for now, only CSV files are supported +if FILE_TYPE == 'csv': + submissionResponse = d.create_submission_from_csv(DSPACE_COLLECTION_ID, args.filename) + if submissionResponse is not None: + if submissionResponse.status_code == 201: + print(f'Submission \"{submissionResponse.json()["_embedded"]["item"]["name"]}\" ' + f'with id {submissionResponse.json()["id"]} created successfully.') + else: + print(f'Submission creation failed with status code {submissionResponse.status_code}') + if submissionResponse.request and submissionResponse.request.method: + print(f'Method: {submissionResponse.request.method}') + if submissionResponse.url: + print(f'Request URL: {submissionResponse.url}') + if submissionResponse.text: + print(f'Reason: {submissionResponse.text}') \ No newline at end of file From 79b4a450fd1159587045cc4f1d9a9d257e5a0ba8 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 25 Nov 2025 11:50:23 +0100 Subject: [PATCH 02/10] utility for submission files upload --- .gitignore | 1 + generate_submission_template.py | 14 +++-- rest_client/submission_client.py | 39 +++++++++--- upload_submission_files.py | 63 +++++++++++++++++++ ...ission.py => upload_submission_metadata.py | 4 -- 5 files changed, 106 insertions(+), 15 deletions(-) create mode 100644 upload_submission_files.py rename upload_submission.py => upload_submission_metadata.py (96%) diff --git a/.gitignore b/.gitignore index 0dc7c57..0c9c5aa 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ __pycache__/ Pipfile.lock __pypackages__/ .env +.envcmd .venv env/ venv/ diff --git a/generate_submission_template.py b/generate_submission_template.py index 1a77d47..f051b7e 100644 --- a/generate_submission_template.py +++ b/generate_submission_template.py @@ -20,11 +20,13 @@ parser = argparse.ArgumentParser(description="Command-line arguments") parser.add_argument("filename", help="CSV template file name") parser.add_argument("-t", "--token", help="Authorization token (optional), " - "or use the AUTHORIZATION_TOKEN env variable") + "or use the AUTHORIZATION_TOKEN env variable") parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " - "or use the DSPACE_API_ENDPOINT env variable") + "or use the DSPACE_API_ENDPOINT env variable") parser.add_argument("-s", "--submission-definition-name", help="Submission Definition Name(optional), " - "or use the SUBMISSION_DEFINITION_NAME env variable") + "or use the SUBMISSION_DEFINITION_NAME env variable") +parser.add_argument("-r", "--resource-type", help="Resource Type (optional), " + "sample values: corpus (default), lexicalConceptualResource, languageDescription, toolService") args = parser.parse_args() AUTHORIZATION_TOKEN = None @@ -49,6 +51,10 @@ elif 'DSPACE_API_ENDPOINT' in os.environ: API_ENDPOINT = os.environ['DSPACE_API_ENDPOINT'] +RESOURCE_TYPE = 'corpus' +if args.resource_type: + RESOURCE_TYPE = args.resource_type + FILE_TYPE = 'csv' d = SubmissionClient(api_endpoint=API_ENDPOINT, authorization_token=AUTHORIZATION_TOKEN) @@ -61,4 +67,4 @@ # for now, only CSV templates are generated if FILE_TYPE == 'csv': - d.generateCsvTemplate(args.filename, SUBMISSION_DEFINITION_NAME) + d.generateCsvTemplate(args.filename, SUBMISSION_DEFINITION_NAME, RESOURCE_TYPE) diff --git a/rest_client/submission_client.py b/rest_client/submission_client.py index 991b415..6450fdc 100644 --- a/rest_client/submission_client.py +++ b/rest_client/submission_client.py @@ -2,6 +2,10 @@ from enum import Enum import logging import csv +import os +# from requests import requests +import requests +from requests import Request from requests import Response __all__ = ['SubmissionClient'] @@ -170,7 +174,7 @@ def get_submission_form_names(self, submission_definition_id): return None - def generateCsvTemplate(self, csv_file_name, submission_definition_name): + def generateCsvTemplate(self, csv_file_name, submission_definition_name, resource_type): submission_form_names = self.get_submission_form_names(submission_definition_name) if submission_form_names is not None and len(submission_form_names) > 0: csv_lines = [] @@ -186,11 +190,17 @@ def generateCsvTemplate(self, csv_file_name, submission_definition_name): fields = row.get('fields', []) if len(fields) > 0: for field in fields: - selectableMetadata = field.get("selectableMetadata", []) - if len(selectableMetadata) > 0: - metadata_field = selectableMetadata[0]["metadata"] - if metadata_field is not None: - csv_lines.append([metadata_field,'']) + type_bind = field.get("typeBind", []) + # check if the resource_type is in the typeBind list + if len(type_bind) == 0 or resource_type in type_bind: + selectable_metadata = field.get("selectableMetadata", []) + if len(selectable_metadata) > 0: + metadata_field = selectable_metadata[0]["metadata"] + if metadata_field is not None: + if metadata_field == 'dc.type': + csv_lines.append([metadata_field,resource_type]) + else: + csv_lines.append([metadata_field,'']) if len(csv_lines) > 0: with open(csv_file_name, 'w', newline='') as file: @@ -200,4 +210,19 @@ def generateCsvTemplate(self, csv_file_name, submission_definition_name): _logger.info(f'CSV template for submission definition: "{submission_definition_name}" ' f'written to: "{csv_file_name}"') else: - _logger.error(f'No submission forms found for submission definition name: "{submission_definition_name}"') \ No newline at end of file + _logger.error(f'No submission forms found for submission definition name: "{submission_definition_name}"') + + def upload_file_to_workspace_item(self, workspace_item_id, file_paths): + url = f'{self.api_endpoint}/submission/workspaceitems/{workspace_item_id}' + for file_path in file_paths: + # the API only allows to upload one file per request + file = (os.path.basename(file_path), open(file_path, 'rb')) + files = {'file': file} + req = Request('POST', url, files = files) + prepared_req = self.dspaceClient.session.prepare_request(req) + r = self.dspaceClient.session.send(prepared_req) + if r.status_code == 201: + # 201 Created - success! + print(f'File "{file_path}" uploaded successfully to workspace item {workspace_item_id}') + else: + print(f'File upload for "{file_path}" failed: {r.status_code}: {r.text} ({url})') \ No newline at end of file diff --git a/upload_submission_files.py b/upload_submission_files.py new file mode 100644 index 0000000..29a6b88 --- /dev/null +++ b/upload_submission_files.py @@ -0,0 +1,63 @@ +# This software is licenced under the BSD 3-Clause licence +# available at https://opensource.org/licenses/BSD-3-Clause +# and described in the LICENCE file in the root of this project + +""" +Python 3 application for uploading submission files, using the dspace.py API client library. +""" +import argparse +import os + +from rest_client.submission_client import SubmissionClient + +# Example system variables needed for authentication and submission files upload +# (all of these variables can be overwritten with command line arguments) +# AUTHORIZATION_TOKEN= +# DSPACE_API_ENDPOINT= + +# Parse command-line arguments +parser = argparse.ArgumentParser(description="Command-line arguments") +parser.add_argument("-s", "--submission-id", required = True, help="submission ID (required)") +parser.add_argument("-f", "--files", nargs="+", required = True, help="Files to upload (required") +parser.add_argument("-t", "--token", help="Authorization token (optional), " + "or use the AUTHORIZATION_TOKEN env variable") +parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " + "or use the DSPACE_API_ENDPOINT env variable") +args = parser.parse_args() + +SUBMISSION_ID = args.submission_id +FILES = args.files + +if SUBMISSION_ID is None: + print('No submission-id parameter provided!') + exit(1) + +if FILES is None: + print('No files parameter provided!') + exit(1) + +AUTHORIZATION_TOKEN = None +if args.token: + AUTHORIZATION_TOKEN = args.token +elif 'AUTHORIZATION_TOKEN' in os.environ: + AUTHORIZATION_TOKEN = os.environ['AUTHORIZATION_TOKEN'] + +if AUTHORIZATION_TOKEN is None: + print('No authorization token provided!') + exit(1) + +API_ENDPOINT = 'http://localhost:8080/server/api' +if args.dspace_api_endpoint: + API_ENDPOINT = args.dspace_api_endpoint +elif 'DSPACE_API_ENDPOINT' in os.environ: + API_ENDPOINT = os.environ['DSPACE_API_ENDPOINT'] + +d = SubmissionClient(api_endpoint=API_ENDPOINT, authorization_token=AUTHORIZATION_TOKEN) + +# Authenticate against the DSpace client +authenticated = d.authenticate() +if not authenticated: + print('Error logging in! Giving up.') + exit(1) + +d.upload_file_to_workspace_item(SUBMISSION_ID, args.files) \ No newline at end of file diff --git a/upload_submission.py b/upload_submission_metadata.py similarity index 96% rename from upload_submission.py rename to upload_submission_metadata.py index f2ac15a..e9e101a 100644 --- a/upload_submission.py +++ b/upload_submission_metadata.py @@ -27,10 +27,6 @@ "or use the DSPACE_COLLECTION_ID env variable") args = parser.parse_args() -if not os.path.exists(args.filename): - print(f'File \"{args.filename}\" does not exist!') - exit(1) - AUTHORIZATION_TOKEN = None if args.token: AUTHORIZATION_TOKEN = args.token From ad05b9a9da7316bc5b7a4159a89a6bbd5863ffb4 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 25 Nov 2025 12:27:06 +0100 Subject: [PATCH 03/10] make command line arguments consistent --- ...> generate_submission_metadata_template.py | 33 ++++++++++++------- upload_submission_files.py | 10 +++--- upload_submission_metadata.py | 21 +++++++----- 3 files changed, 40 insertions(+), 24 deletions(-) rename generate_submission_template.py => generate_submission_metadata_template.py (60%) diff --git a/generate_submission_template.py b/generate_submission_metadata_template.py similarity index 60% rename from generate_submission_template.py rename to generate_submission_metadata_template.py index f051b7e..3c5def3 100644 --- a/generate_submission_template.py +++ b/generate_submission_metadata_template.py @@ -18,17 +18,23 @@ # Parse command-line arguments parser = argparse.ArgumentParser(description="Command-line arguments") -parser.add_argument("filename", help="CSV template file name") -parser.add_argument("-t", "--token", help="Authorization token (optional), " - "or use the AUTHORIZATION_TOKEN env variable") -parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " - "or use the DSPACE_API_ENDPOINT env variable") -parser.add_argument("-s", "--submission-definition-name", help="Submission Definition Name(optional), " - "or use the SUBMISSION_DEFINITION_NAME env variable") -parser.add_argument("-r", "--resource-type", help="Resource Type (optional), " - "sample values: corpus (default), lexicalConceptualResource, languageDescription, toolService") +parser.add_argument("-m", "--submission-metadata", + help="Template name for submission metadata, in CSV format (optional). Default: submission.csv") +parser.add_argument("-t", "--token", + help="Authorization token, or use the AUTHORIZATION_TOKEN env variable") +parser.add_argument("-e", "--dspace-api-endpoint", + help="DSpace API Endpoint, or use the DSPACE_API_ENDPOINT env variable") +parser.add_argument("-s", "--submission-definition-name", + help="Submission Definition Name, or use the SUBMISSION_DEFINITION_NAME env variable") +parser.add_argument("-r", "--resource-type", + help="Resource Type (optional), " + "sample values: corpus (default), lexicalConceptualResource, languageDescription, toolService") args = parser.parse_args() +SUBMISSION_METADATA = 'submission.csv' +if args.submission_metadata: + SUBMISSION_METADATA = args.submission_metadata + AUTHORIZATION_TOKEN = None if args.token: AUTHORIZATION_TOKEN = args.token @@ -39,12 +45,17 @@ print('No authorization token provided!') exit(1) -SUBMISSION_DEFINITION_NAME = 'traditional' +# SUBMISSION_DEFINITION_NAME = traditional +SUBMISSION_DEFINITION_NAME = None if args.submission_definition_name: SUBMISSION_DEFINITION_NAME = args.submission_definition_name elif 'SUBMISSION_DEFINITION_NAME' in os.environ: SUBMISSION_DEFINITION_NAME = os.environ['SUBMISSION_DEFINITION_NAME'] +if SUBMISSION_DEFINITION_NAME is None: + print('No submission definition name provided!') + exit(1) + API_ENDPOINT = 'http://localhost:8080/server/api' if args.dspace_api_endpoint: API_ENDPOINT = args.dspace_api_endpoint @@ -67,4 +78,4 @@ # for now, only CSV templates are generated if FILE_TYPE == 'csv': - d.generateCsvTemplate(args.filename, SUBMISSION_DEFINITION_NAME, RESOURCE_TYPE) + d.generateCsvTemplate(SUBMISSION_METADATA, SUBMISSION_DEFINITION_NAME, RESOURCE_TYPE) diff --git a/upload_submission_files.py b/upload_submission_files.py index 29a6b88..87fdabe 100644 --- a/upload_submission_files.py +++ b/upload_submission_files.py @@ -19,10 +19,10 @@ parser = argparse.ArgumentParser(description="Command-line arguments") parser.add_argument("-s", "--submission-id", required = True, help="submission ID (required)") parser.add_argument("-f", "--files", nargs="+", required = True, help="Files to upload (required") -parser.add_argument("-t", "--token", help="Authorization token (optional), " - "or use the AUTHORIZATION_TOKEN env variable") -parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " - "or use the DSPACE_API_ENDPOINT env variable") +parser.add_argument("-t", "--token", + help="Authorization token (optional), r use the AUTHORIZATION_TOKEN env variable") +parser.add_argument("-e", "--dspace-api-endpoint", + help="DSpace API Endpoint (optional), or use the DSPACE_API_ENDPOINT env variable") args = parser.parse_args() SUBMISSION_ID = args.submission_id @@ -60,4 +60,4 @@ print('Error logging in! Giving up.') exit(1) -d.upload_file_to_workspace_item(SUBMISSION_ID, args.files) \ No newline at end of file +d.upload_file_to_workspace_item(SUBMISSION_ID, FILES) \ No newline at end of file diff --git a/upload_submission_metadata.py b/upload_submission_metadata.py index e9e101a..4346b2b 100644 --- a/upload_submission_metadata.py +++ b/upload_submission_metadata.py @@ -18,15 +18,20 @@ # Parse command-line arguments parser = argparse.ArgumentParser(description="Command-line arguments") -parser.add_argument("filename", help="CSV file name") -parser.add_argument("-t", "--token", help="Authorization token (optional), " - "or use the AUTHORIZATION_TOKEN env variable") -parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint (optional), " - "or use the DSPACE_API_ENDPOINT env variable") -parser.add_argument("-c", "--collection-id", help="DSpace Collection ID (optional), " - "or use the DSPACE_COLLECTION_ID env variable") +parser.add_argument("-m", "--submission-metadata", + help="Submission metadata file name, in CSV format (optional). Default: submission.csv") +parser.add_argument("-t", "--token", + help="Authorization token, or use the AUTHORIZATION_TOKEN env variable") +parser.add_argument("-e", "--dspace-api-endpoint", + help="DSpace API, or use the DSPACE_API_ENDPOINT env variable") +parser.add_argument("-c", "--collection-id", + help="DSpace Collection ID, or use the DSPACE_COLLECTION_ID env variable") args = parser.parse_args() +SUBMISSION_METADATA = 'submission.csv' +if args.submission_metadata: + SUBMISSION_METADATA = args.submission_metadata + AUTHORIZATION_TOKEN = None if args.token: AUTHORIZATION_TOKEN = args.token @@ -65,7 +70,7 @@ # for now, only CSV files are supported if FILE_TYPE == 'csv': - submissionResponse = d.create_submission_from_csv(DSPACE_COLLECTION_ID, args.filename) + submissionResponse = d.create_submission_from_csv(DSPACE_COLLECTION_ID, SUBMISSION_METADATA) if submissionResponse is not None: if submissionResponse.status_code == 201: print(f'Submission \"{submissionResponse.json()["_embedded"]["item"]["name"]}\" ' From d13089b17f0e4fffebea62222ef2bc20a2d978ee Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 25 Nov 2025 16:22:31 +0100 Subject: [PATCH 04/10] allow upload/update submission with files in one command --- rest_client/submission_client.py | 26 +++++------ ...ission_metadata.py => upload_submission.py | 43 +++++++++++++------ upload_submission_files.py | 2 +- 3 files changed, 45 insertions(+), 26 deletions(-) rename upload_submission_metadata.py => upload_submission.py (57%) diff --git a/rest_client/submission_client.py b/rest_client/submission_client.py index 6450fdc..fa6a891 100644 --- a/rest_client/submission_client.py +++ b/rest_client/submission_client.py @@ -3,8 +3,6 @@ import logging import csv import os -# from requests import requests -import requests from requests import Request from requests import Response @@ -22,6 +20,15 @@ class PatchOperation(Enum): REPLACE = 'replace' MOVE = 'move' +def handle_failed_response(operation_name, response: Response): + print(f"{operation_name} failed with status code {response.status_code}") + if response.request and response.request.method: + print(f"Method: {response.request.method}") + if response.url: + print(f"Request URL: {response.url}") + if response.text: + print(f"Reason: {response.text}") + class SubmissionClient: def __init__(self, api_endpoint = API_ENDPOINT, authorization_token = AUTHORIZATION_TOKEN): self.authorization_token = authorization_token @@ -59,21 +66,14 @@ def create_submission(self, parent): def create_submission_from_csv(self, parent, csv_file_path): payload = self.parse_submission_payload_csv(csv_file_path) - if len(payload) > 0: - return self.create_submission_from_payload(parent, payload) - else: - the_response = Response() - the_response.status_code = 400 - the_response._content = b'{ "error" : 400, "message" : "No metadata found in csv file" }' - the_response.url = self.api_endpoint + '/submission/workspaceitems?owningCollection=' + parent - return the_response + return self.create_submission_from_payload(parent, payload) def create_submission_from_payload(self, parent, payload): create_response = self.create_submission(parent) if create_response.status_code == 201: workspace_item_id = create_response.json()['id'] patch_response = self.patch_metadata(workspace_item_id, payload) - if patch_response.status_code == 200: + if patch_response is not None and patch_response.status_code == 200: patch_response.status_code = 201 return patch_response @@ -88,6 +88,7 @@ def patch_metadata(self, workspace_item_id, data): _logger.error('Input data should be in the form of the list of operations') return None + print("Patch operations:") for operation in data: print(operation) path = operation['path'] if 'path' in operation else None @@ -118,7 +119,7 @@ def patch_metadata(self, workspace_item_id, data): if r.status_code == 200: # 200 Success - _logger.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') + _logger.info(f'Successful patch update to {r.json()["type"]} {r.json()["id"]}') else: _logger.error(r.text) # Return the raw API response @@ -157,7 +158,6 @@ def parse_submission_payload_csv(self, file_path): for key in operation_map: operations.append(operation_map[key]) - print(f'Submission payload:\n{operations}') return operations def get_submission_form_names(self, submission_definition_id): diff --git a/upload_submission_metadata.py b/upload_submission.py similarity index 57% rename from upload_submission_metadata.py rename to upload_submission.py index 4346b2b..dc64965 100644 --- a/upload_submission_metadata.py +++ b/upload_submission.py @@ -9,6 +9,7 @@ import os from rest_client.submission_client import SubmissionClient +from rest_client.submission_client import handle_failed_response # Example system variables needed for authentication and submission upload # (all of these variables can be overwritten with command line arguments) @@ -20,6 +21,9 @@ parser = argparse.ArgumentParser(description="Command-line arguments") parser.add_argument("-m", "--submission-metadata", help="Submission metadata file name, in CSV format (optional). Default: submission.csv") +parser.add_argument("-s", "--submission-id", help="submission ID (optional)," + " if provided, metadata + files will be uploaded to this submission") +parser.add_argument("-f", "--files", nargs="+", help="Files to upload (optional)") parser.add_argument("-t", "--token", help="Authorization token, or use the AUTHORIZATION_TOKEN env variable") parser.add_argument("-e", "--dspace-api-endpoint", @@ -32,6 +36,9 @@ if args.submission_metadata: SUBMISSION_METADATA = args.submission_metadata +FILES = args.files or [] +SUBMISSION_ID = args.submission_id + AUTHORIZATION_TOKEN = None if args.token: AUTHORIZATION_TOKEN = args.token @@ -70,16 +77,28 @@ # for now, only CSV files are supported if FILE_TYPE == 'csv': - submissionResponse = d.create_submission_from_csv(DSPACE_COLLECTION_ID, SUBMISSION_METADATA) - if submissionResponse is not None: - if submissionResponse.status_code == 201: - print(f'Submission \"{submissionResponse.json()["_embedded"]["item"]["name"]}\" ' - f'with id {submissionResponse.json()["id"]} created successfully.') + if not SUBMISSION_ID: + submission_response = d.create_submission_from_csv(DSPACE_COLLECTION_ID, SUBMISSION_METADATA) + if submission_response is not None: + if submission_response.status_code == 201: + submission_id = submission_response.json()['id'] + submission_name = submission_response.json()['_embedded']['item']['name'] or "Untitled" + print(f'Submission \"{submission_name}\" with id {submission_id} created successfully.') + if len(FILES) > 0: + d.upload_file_to_workspace_item(submission_id, FILES) + else: + handle_failed_response("Submission create", submission_response) + else: + payload = d.parse_submission_payload_csv(SUBMISSION_METADATA) + if len(payload) > 0: + submission_response = d.patch_metadata(SUBMISSION_ID, payload) + if submission_response is not None: + if submission_response.status_code == 200: + submission_name = submission_response.json()['_embedded']['item']['name'] or "Untitled" + print(f'Submission \"{submission_name}\" with id {SUBMISSION_ID} updated successfully.') + else: + handle_failed_response("Submission update", submission_response) else: - print(f'Submission creation failed with status code {submissionResponse.status_code}') - if submissionResponse.request and submissionResponse.request.method: - print(f'Method: {submissionResponse.request.method}') - if submissionResponse.url: - print(f'Request URL: {submissionResponse.url}') - if submissionResponse.text: - print(f'Reason: {submissionResponse.text}') \ No newline at end of file + print("No metadata found in csv file.") + if len(FILES) > 0: + d.upload_file_to_workspace_item(SUBMISSION_ID, FILES) \ No newline at end of file diff --git a/upload_submission_files.py b/upload_submission_files.py index 87fdabe..f7048b8 100644 --- a/upload_submission_files.py +++ b/upload_submission_files.py @@ -18,7 +18,7 @@ # Parse command-line arguments parser = argparse.ArgumentParser(description="Command-line arguments") parser.add_argument("-s", "--submission-id", required = True, help="submission ID (required)") -parser.add_argument("-f", "--files", nargs="+", required = True, help="Files to upload (required") +parser.add_argument("-f", "--files", nargs="+", required = True, help="Files to upload (required)") parser.add_argument("-t", "--token", help="Authorization token (optional), r use the AUTHORIZATION_TOKEN env variable") parser.add_argument("-e", "--dspace-api-endpoint", From 0ce36bdba1c7393aa36d702c26ce2fb806be2585 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 25 Nov 2025 17:16:57 +0100 Subject: [PATCH 05/10] Enhance README with environment variables and usage examples --- README.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/README.md b/README.md index 9830e36..a55c947 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,92 @@ # clarin-submission-python DSpace python library to allow ingestion of metadata with files to create new submission + +## Environment variables used by library +Example: +
+AUTHORIZATION_TOKEN = ey....0uYw
+DSPACE_API_ENDPOINT = 'https://lindat.mff.cuni.cz/repository/server/api'
+DSPACE_COLLECTION_ID = 34352d2c-3296-4448-aeb5-d18f2e179126
+SUBMISSION_DEFINITION_NAME = traditional
+
+ +Where: + +AUTHORIZATION_TOKEN is the persistence token used for authentication. The token can be created by admin or by submitter
+DSPACE_API_ENDPOINT is the base URL for DSpace Server API Endpoints
+DSPACE_COLLECTION_ID is the collection UUID string, specifying the (parent) collection where the submission data will be stored
+SUBMISSION_DEFINOTION_NAME is the submition definition name used by the collection. The submition definition specifies the submission metadata form used in submission requests
+ +All of these environment variables will be given to user by admin. + +## Create Submission Metadata Template file (CSV format) + +
+python generate_submission_metadata_template.py [-h] [-m SUBMISSION_METADATA]
+                                                [-t TOKEN]
+                                                [-e DSPACE_API_ENDPOINT]
+                                                [-s SUBMISSION_DEFINITION_NAME]
+                                                [-r RESOURCE_TYPE]
+
+Command-line arguments
+
+options:
+  -h, --help            show this help message and exit
+  -m, --submission-metadata SUBMISSION_METADATA
+                        Template name for submission metadata, in CSV format
+                        (optional). Default: submission.csv
+  -t, --token TOKEN     Authorization token, or use the AUTHORIZATION_TOKEN
+                        env variable
+  -e, --dspace-api-endpoint DSPACE_API_ENDPOINT
+                        DSpace API Endpoint, or use the DSPACE_API_ENDPOINT
+                        env variable
+  -s, --submission-definition-name SUBMISSION_DEFINITION_NAME
+                        Submission Definition Name, or use the
+                        SUBMISSION_DEFINITION_NAME env variable
+  -r, --resource-type RESOURCE_TYPE
+                        Resource Type (optional), sample values: corpus
+                        (default), lexicalConceptualResource,
+                        languageDescription, toolService
+
+ +Example: +
+python generate_submission_metadata_template.py -m /User/John/Submissions/submission-data.csv -r toolService
+
+This command creates a submission-data.csv file, in /Users/John/Submissions directory, for submissions with toolService resource type + +## Submission Metadata Template file (CSV format) + +File generated by previous command, can be completed by adding the metadata values for individual keys: + +Example: +
+__section__,traditionalpageone
+dc.type,toolService
+dc.title,DEMO Submission
+dc.source.uri,
+local.demo.uri,
+dc.relation.isreferencedby,
+dc.date.issued,
+dc.publisher,
+dc.contributor.author,John Lizard,Mark Hagues,"Emily, Smith Lion"
+local.contact.person,
+local.sponsor,
+__section__,traditionalpagetwo
+dc.description,
+dc.language.iso,
+dc.subject,
+metashare.ResourceInfo#ContentInfo.detailedType,
+metashare.ResourceInfo#ResourceComponentType#ToolServiceInfo.languageDependent,
+__section__,specialFields
+local.submission.note,
+dc.relation.replaces,
+
+Note, in this case, only the following metadata values are defined: dc.type, dc.title and dc.contributor.author where
+dc.type single value is automatically defined during metadata template file generation
+dc.title (single) value is defined by user
+dc.contributor.author (list) values are defined by user
+ +So, this way user may define any number of metadata values, where each metadata value can be set as a single value or a list of values.
+The \_\_section\_\_ values define the individual submission sections, where the metadata belong to + From a57cf9dc60ac26f8241041f01b433f1d523ae95c Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 25 Nov 2025 17:39:44 +0100 Subject: [PATCH 06/10] updating README file --- README.md | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/README.md b/README.md index a55c947..1b34647 100644 --- a/README.md +++ b/README.md @@ -90,3 +90,76 @@ Note, in this case, only the following metadata values are defined: dc.type, So, this way user may define any number of metadata values, where each metadata value can be set as a single value or a list of values.
The \_\_section\_\_ values define the individual submission sections, where the metadata belong to +## Submission Metadata + Files Upload (metadata in CSV format) + +
+python upload_submission.py [-h] [-m SUBMISSION_METADATA] [-s SUBMISSION_ID]
+                            [-f FILES [FILES ...]] [-t TOKEN]
+                            [-e DSPACE_API_ENDPOINT] [-c COLLECTION_ID]
+
+Command-line arguments
+
+options:
+  -h, --help            show this help message and exit
+  -m, --submission-metadata SUBMISSION_METADATA
+                        Submission metadata file name, in CSV format
+                        (optional). Default: submission.csv
+  -s, --submission-id SUBMISSION_ID
+                        submission ID (optional), if provided, metadata +
+                        files will be uploaded to this submission
+  -f, --files FILES [FILES ...]
+                        Files to upload (optional)
+  -t, --token TOKEN     Authorization token, or use the AUTHORIZATION_TOKEN
+                        env variable
+  -e, --dspace-api-endpoint DSPACE_API_ENDPOINT
+                        DSpace API, or use the DSPACE_API_ENDPOINT env
+                        variable
+  -c, --collection-id COLLECTION_ID
+                        DSpace Collection ID, or use the DSPACE_COLLECTION_ID
+                        env variable
+
+ +Example 1: +
+python upload_submission.py -m /User/John/Submissions/submission-data.csv -f articles.zip sample-video.mp4 logo.png
+
+ +In this case new submission will be created with metadata defined in submission-data.csv file and +3 files (bitstreams): articles.zip, sample-video.mp4, logo.png will be uploaded to created submission. + +Example 2: +
+python upload_submission.py -s 7401 -m /User/John/Submissions/new-submission-data.csv -f new-video.mp4
+
+ +In this case existing submission with ID 7401 will be updated with metadata defined in new-submission-data.csv +file and one video file: new-video.mp4 will be uploaded to this submission. + +## Submission File(s) upload + +
+python upload_submission_files.py [-h] -s SUBMISSION_ID -f FILES [FILES ...]
+                                  [-t TOKEN] [-e DSPACE_API_ENDPOINT]
+
+Command-line arguments
+
+options:
+  -h, --help            show this help message and exit
+  -s, --submission-id SUBMISSION_ID
+                        submission ID (required)
+  -f, --files FILES [FILES ...]
+                        Files to upload (required)
+  -t, --token TOKEN     Authorization token (optional), r use the
+                        AUTHORIZATION_TOKEN env variable
+  -e, --dspace-api-endpoint DSPACE_API_ENDPOINT
+                        DSpace API Endpoint (optional), or use the
+                        DSPACE_API_ENDPOINT env variable
+
+ + +Example: +
+python upload_submission_files.py -s 7401 -f new-logo.png examples.zip
+
+ +In this case two files (bitstreams): new-logo.png and examples.zip will be uploaded to submission 7401. From 43f1bc0611554ad9fddc30fd0b0cebc94bae6e4e Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Tue, 25 Nov 2025 18:01:33 +0100 Subject: [PATCH 07/10] improve README file --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b34647..ff534f0 100644 --- a/README.md +++ b/README.md @@ -82,13 +82,13 @@ __section__,specialFields local.submission.note, dc.relation.replaces, -Note, in this case, only the following metadata values are defined: dc.type, dc.title and dc.contributor.author where
-dc.type single value is automatically defined during metadata template file generation
+Note, here, only the following metadata values are defined: dc.type, dc.title and dc.contributor.author where
+dc.type (single) value is automatically defined during metadata template file generation
dc.title (single) value is defined by user
dc.contributor.author (list) values are defined by user
So, this way user may define any number of metadata values, where each metadata value can be set as a single value or a list of values.
-The \_\_section\_\_ values define the individual submission sections, where the metadata belong to +The \_\_section\_\_ lines define the individual submission sections, where the metadata belong to ## Submission Metadata + Files Upload (metadata in CSV format) @@ -162,4 +162,4 @@ Example: python upload_submission_files.py -s 7401 -f new-logo.png examples.zip -In this case two files (bitstreams): new-logo.png and examples.zip will be uploaded to submission 7401. +In this case two files (bitstreams): new-logo.png and examples.zip will be uploaded to existing submission with ID 7401. From 54d9c616935699972b44a37ee80e09e4a105f21b Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Wed, 26 Nov 2025 11:43:55 +0100 Subject: [PATCH 08/10] code cleanup: static,private methods --- generate_submission_metadata_template.py | 2 +- rest_client/submission_client.py | 120 +++++++++++------------ upload_submission.py | 5 +- 3 files changed, 60 insertions(+), 67 deletions(-) diff --git a/generate_submission_metadata_template.py b/generate_submission_metadata_template.py index 3c5def3..397f1f0 100644 --- a/generate_submission_metadata_template.py +++ b/generate_submission_metadata_template.py @@ -78,4 +78,4 @@ # for now, only CSV templates are generated if FILE_TYPE == 'csv': - d.generateCsvTemplate(SUBMISSION_METADATA, SUBMISSION_DEFINITION_NAME, RESOURCE_TYPE) + d.generate_csv_template(SUBMISSION_METADATA, SUBMISSION_DEFINITION_NAME, RESOURCE_TYPE) diff --git a/rest_client/submission_client.py b/rest_client/submission_client.py index fa6a891..63cb4d3 100644 --- a/rest_client/submission_client.py +++ b/rest_client/submission_client.py @@ -6,7 +6,7 @@ from requests import Request from requests import Response -__all__ = ['SubmissionClient'] +__all__ = ['SubmissionClient', 'handle_failed_response', 'parse_submission_payload_csv'] logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) _logger = logging.getLogger('clarin.dspace') @@ -29,6 +29,42 @@ def handle_failed_response(operation_name, response: Response): if response.text: print(f"Reason: {response.text}") + +def parse_submission_payload_csv(file_path): + operations = [] + with open(file_path, 'r') as file: + reader = csv.reader(file) + section_path = '' + operation_map = {} + for row in reader: + print(row) + if row[0] == '__section__': + section_path = '/sections/' + row[1] + elif len(row) > 1 and row[1] is not None and row[1] != '': + path = section_path + '/' + row[0] + if operation_map.get(path) is None: + value = [] + for num in range(1, len(row)): + value.append({ + 'value': row[num] + }) + operation = { + 'op': 'add', + 'path': path, + 'value': value + } + operation_map[path] = operation + else: + operation = operation_map.get(path) + for num in range(1, len(row)): + operation['value'].append({ + 'value': row[num] + }) + + for key in operation_map: + operations.append(operation_map[key]) + return operations + class SubmissionClient: def __init__(self, api_endpoint = API_ENDPOINT, authorization_token = AUTHORIZATION_TOKEN): self.authorization_token = authorization_token @@ -43,12 +79,6 @@ def authenticate(self, retry=False): return False return self.dspaceClient.authenticate() - def create_community(self, parent, data): - return self.dspaceClient.create_community(parent, data) - - def get_item(self, uuid): - return self.dspaceClient.get_item(uuid) - def create_submission(self, parent): url = f'{self.api_endpoint}/submission/workspaceitems' if not parent: @@ -65,10 +95,10 @@ def create_submission(self, parent): return r def create_submission_from_csv(self, parent, csv_file_path): - payload = self.parse_submission_payload_csv(csv_file_path) - return self.create_submission_from_payload(parent, payload) + payload = parse_submission_payload_csv(csv_file_path) + return self._create_submission_from_payload(parent, payload) - def create_submission_from_payload(self, parent, payload): + def _create_submission_from_payload(self, parent, payload): create_response = self.create_submission(parent) if create_response.status_code == 201: workspace_item_id = create_response.json()['id'] @@ -125,57 +155,8 @@ def patch_metadata(self, workspace_item_id, data): # Return the raw API response return r - def parse_submission_payload_csv(self, file_path): - operations = [] - with open(file_path, 'r') as file: - reader = csv.reader(file) - section_path = '' - operation_map = {} - for row in reader: - print(row) - if row[0] == '__section__': - section_path = '/sections/' + row[1] - elif len(row) > 1 and row[1] is not None and row[1] != '': - path = section_path + '/' + row[0] - if operation_map.get(path) is None: - value = [] - for num in range(1, len(row)): - value.append({ - 'value': row[num] - }) - operation = { - 'op': 'add', - 'path': path, - 'value': value - } - operation_map[path] = operation - else: - operation = operation_map.get(path) - for num in range(1, len(row)): - operation['value'].append({ - 'value': row[num] - }) - - for key in operation_map: - operations.append(operation_map[key]) - return operations - - def get_submission_form_names(self, submission_definition_id): - url = f'{self.api_endpoint}/config/submissiondefinitions/{submission_definition_id}?embed=sections' - r = self.dspaceClient.session.get(url, headers=self.dspaceClient.request_headers) - if r is not None and r.status_code == 200: - _logger.info(f'successful retrieval of submission definition {submission_definition_id}') - form_names = [] - sections = r.json().get('_embedded', {}).get('sections', {}).get('_embedded', {}).get('sections', []) - for section in sections: - if section.get('sectionType') == 'submission-form': - form_names.append(section.get('id')) - return form_names - - return None - - def generateCsvTemplate(self, csv_file_name, submission_definition_name, resource_type): - submission_form_names = self.get_submission_form_names(submission_definition_name) + def generate_csv_template(self, csv_file_name, submission_definition_name, resource_type): + submission_form_names = self._get_submission_form_names(submission_definition_name) if submission_form_names is not None and len(submission_form_names) > 0: csv_lines = [] for form_name in submission_form_names: @@ -212,13 +193,26 @@ def generateCsvTemplate(self, csv_file_name, submission_definition_name, resourc else: _logger.error(f'No submission forms found for submission definition name: "{submission_definition_name}"') + def _get_submission_form_names(self, submission_definition_id): + url = f'{self.api_endpoint}/config/submissiondefinitions/{submission_definition_id}?embed=sections' + r = self.dspaceClient.session.get(url, headers=self.dspaceClient.request_headers) + if r is not None and r.status_code == 200: + _logger.info(f'successful retrieval of submission definition {submission_definition_id}') + form_names = [] + sections = r.json().get('_embedded', {}).get('sections', {}).get('_embedded', {}).get('sections', []) + for section in sections: + if section.get('sectionType') == 'submission-form': + form_names.append(section.get('id')) + return form_names + + return None + def upload_file_to_workspace_item(self, workspace_item_id, file_paths): url = f'{self.api_endpoint}/submission/workspaceitems/{workspace_item_id}' for file_path in file_paths: # the API only allows to upload one file per request file = (os.path.basename(file_path), open(file_path, 'rb')) - files = {'file': file} - req = Request('POST', url, files = files) + req = Request('POST', url, files = {'file': file}) prepared_req = self.dspaceClient.session.prepare_request(req) r = self.dspaceClient.session.send(prepared_req) if r.status_code == 201: diff --git a/upload_submission.py b/upload_submission.py index dc64965..8fecfff 100644 --- a/upload_submission.py +++ b/upload_submission.py @@ -8,8 +8,7 @@ import argparse import os -from rest_client.submission_client import SubmissionClient -from rest_client.submission_client import handle_failed_response +from rest_client.submission_client import SubmissionClient, handle_failed_response, parse_submission_payload_csv # Example system variables needed for authentication and submission upload # (all of these variables can be overwritten with command line arguments) @@ -89,7 +88,7 @@ else: handle_failed_response("Submission create", submission_response) else: - payload = d.parse_submission_payload_csv(SUBMISSION_METADATA) + payload = parse_submission_payload_csv(SUBMISSION_METADATA) if len(payload) > 0: submission_response = d.patch_metadata(SUBMISSION_ID, payload) if submission_response is not None: From 7e17620849e5cd4eba8fac60820a8cad0fad4f44 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Thu, 27 Nov 2025 15:39:37 +0100 Subject: [PATCH 09/10] update README file --- README.md | 66 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index ff534f0..fde370e 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,35 @@ -# clarin-submission-python +## clarin-submission-python DSpace python library to allow ingestion of metadata with files to create new submission -## Environment variables used by library +### Environment variables used by library Example: -
+```
 AUTHORIZATION_TOKEN = ey....0uYw
 DSPACE_API_ENDPOINT = 'https://lindat.mff.cuni.cz/repository/server/api'
 DSPACE_COLLECTION_ID = 34352d2c-3296-4448-aeb5-d18f2e179126
 SUBMISSION_DEFINITION_NAME = traditional
-
+``` Where: -AUTHORIZATION_TOKEN is the persistence token used for authentication. The token can be created by admin or by submitter
-DSPACE_API_ENDPOINT is the base URL for DSpace Server API Endpoints
-DSPACE_COLLECTION_ID is the collection UUID string, specifying the (parent) collection where the submission data will be stored
-SUBMISSION_DEFINOTION_NAME is the submition definition name used by the collection. The submition definition specifies the submission metadata form used in submission requests
+**AUTHORIZATION_TOKEN** is the persistence token used for authentication. The token can be created by admin or by submitter -All of these environment variables will be given to user by admin. +**DSPACE_API_ENDPOINT** is the base URL for DSpace Server API Endpoints -## Create Submission Metadata Template file (CSV format) +**DSPACE_COLLECTION_ID** is the collection UUID string, specifying the (parent) collection where the submission data will be stored -
+**SUBMISSION_DEFINITION_NAME** is the submission definition name used by the collection. The submission definition specifies the submission metadata form used in submission requests
+
+All of these environment variables will be given to user by admin.  
+Note that every script allows to override these environment variables by providing the corresponding command-line arguments:
+- **-t, --token** to override AUTHORIZATION_TOKEN
+- **-e, --dspace-api-endpoint** to override DSPACE_API_ENDPOINT
+- **-c, --collection-id** to override DSPACE_COLLECTION_ID
+- **-s, --submission-definition-name** to override SUBMISSION_DEFINITION_NAME
+
+### Create Submission Metadata Template file (CSV format)
+
+```
 python generate_submission_metadata_template.py [-h] [-m SUBMISSION_METADATA]
                                                 [-t TOKEN]
                                                 [-e DSPACE_API_ENDPOINT]
@@ -47,20 +55,20 @@ options:
                         Resource Type (optional), sample values: corpus
                         (default), lexicalConceptualResource,
                         languageDescription, toolService
-
+``` Example: -
+```
 python generate_submission_metadata_template.py -m /User/John/Submissions/submission-data.csv -r toolService
-
+``` This command creates a submission-data.csv file, in /Users/John/Submissions directory, for submissions with toolService resource type -## Submission Metadata Template file (CSV format) +### Submission Metadata Template file (CSV format) File generated by previous command, can be completed by adding the metadata values for individual keys: Example: -
+```
 __section__,traditionalpageone
 dc.type,toolService
 dc.title,DEMO Submission
@@ -81,7 +89,7 @@ metashare.ResourceInfo#ResourceComponentType#ToolServiceInfo.languageDependent,
 __section__,specialFields
 local.submission.note,
 dc.relation.replaces,
-
+``` Note, here, only the following metadata values are defined: dc.type, dc.title and dc.contributor.author where
dc.type (single) value is automatically defined during metadata template file generation
dc.title (single) value is defined by user
@@ -90,9 +98,9 @@ Note, here, only the following metadata values are defined: dc.type, dc.title So, this way user may define any number of metadata values, where each metadata value can be set as a single value or a list of values.
The \_\_section\_\_ lines define the individual submission sections, where the metadata belong to -## Submission Metadata + Files Upload (metadata in CSV format) +### Submission Metadata + Files Upload (metadata in CSV format) -
+```
 python upload_submission.py [-h] [-m SUBMISSION_METADATA] [-s SUBMISSION_ID]
                             [-f FILES [FILES ...]] [-t TOKEN]
                             [-e DSPACE_API_ENDPOINT] [-c COLLECTION_ID]
@@ -117,27 +125,27 @@ options:
   -c, --collection-id COLLECTION_ID
                         DSpace Collection ID, or use the DSPACE_COLLECTION_ID
                         env variable
-
+``` Example 1: -
+```
 python upload_submission.py -m /User/John/Submissions/submission-data.csv -f articles.zip sample-video.mp4 logo.png
-
+``` In this case new submission will be created with metadata defined in submission-data.csv file and 3 files (bitstreams): articles.zip, sample-video.mp4, logo.png will be uploaded to created submission. Example 2: -
+```
 python upload_submission.py -s 7401 -m /User/John/Submissions/new-submission-data.csv -f new-video.mp4
-
+``` In this case existing submission with ID 7401 will be updated with metadata defined in new-submission-data.csv file and one video file: new-video.mp4 will be uploaded to this submission. -## Submission File(s) upload +### Submission File(s) upload -
+```
 python upload_submission_files.py [-h] -s SUBMISSION_ID -f FILES [FILES ...]
                                   [-t TOKEN] [-e DSPACE_API_ENDPOINT]
 
@@ -154,12 +162,12 @@ options:
   -e, --dspace-api-endpoint DSPACE_API_ENDPOINT
                         DSpace API Endpoint (optional), or use the
                         DSPACE_API_ENDPOINT env variable
-
+``` Example: -
+```
 python upload_submission_files.py -s 7401 -f new-logo.png examples.zip
-
+``` In this case two files (bitstreams): new-logo.png and examples.zip will be uploaded to existing submission with ID 7401. From 01f59bec9a782a3a123d0c6450d28fab0600f353 Mon Sep 17 00:00:00 2001 From: Milan Kuchtiak Date: Thu, 27 Nov 2025 16:11:01 +0100 Subject: [PATCH 10/10] improve README, change submission-definition-name parameter to -d --- README.md | 38 ++++++++++++------------ generate_submission_metadata_template.py | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index fde370e..3d55781 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ ## clarin-submission-python -DSpace python library to allow ingestion of metadata with files to create new submission +DSpace python library to allow ingestion of metadata with files to create new submission. ### Environment variables used by library Example: @@ -9,7 +9,6 @@ DSPACE_API_ENDPOINT = 'https://lindat.mff.cuni.cz/repository/server/api' DSPACE_COLLECTION_ID = 34352d2c-3296-4448-aeb5-d18f2e179126 SUBMISSION_DEFINITION_NAME = traditional ``` - Where: **AUTHORIZATION_TOKEN** is the persistence token used for authentication. The token can be created by admin or by submitter @@ -25,7 +24,7 @@ Note that every script allows to override these environment variables by providi - **-t, --token** to override AUTHORIZATION_TOKEN - **-e, --dspace-api-endpoint** to override DSPACE_API_ENDPOINT - **-c, --collection-id** to override DSPACE_COLLECTION_ID -- **-s, --submission-definition-name** to override SUBMISSION_DEFINITION_NAME +- **-d, --submission-definition-name** to override SUBMISSION_DEFINITION_NAME ### Create Submission Metadata Template file (CSV format) @@ -48,7 +47,7 @@ options: -e, --dspace-api-endpoint DSPACE_API_ENDPOINT DSpace API Endpoint, or use the DSPACE_API_ENDPOINT env variable - -s, --submission-definition-name SUBMISSION_DEFINITION_NAME + -d, --submission-definition-name SUBMISSION_DEFINITION_NAME Submission Definition Name, or use the SUBMISSION_DEFINITION_NAME env variable -r, --resource-type RESOURCE_TYPE @@ -61,14 +60,14 @@ Example: ``` python generate_submission_metadata_template.py -m /User/John/Submissions/submission-data.csv -r toolService ``` -This command creates a submission-data.csv file, in /Users/John/Submissions directory, for submissions with toolService resource type +This command creates a **submission-data.csv** file, in **/Users/John/Submissions** directory, for submissions with **toolService** resource type. ### Submission Metadata Template file (CSV format) -File generated by previous command, can be completed by adding the metadata values for individual keys: +File generated by previous command, can be completed by adding the metadata values for individual keys. Example: -``` +
 __section__,traditionalpageone
 dc.type,toolService
 dc.title,DEMO Submission
@@ -89,14 +88,15 @@ metashare.ResourceInfo#ResourceComponentType#ToolServiceInfo.languageDependent,
 __section__,specialFields
 local.submission.note,
 dc.relation.replaces,
-```
-Note, here, only the following metadata values are defined: dc.type, dc.title and dc.contributor.author where
-dc.type (single) value is automatically defined during metadata template file generation
-dc.title (single) value is defined by user
-dc.contributor.author (list) values are defined by user
+
+ +Note, here, only the following metadata values are defined: **dc.type, dc.title and dc.contributor.author** where +- **dc.type** (single) value is automatically defined during metadata template file generation +- **dc.title** (single) value is defined by user +- **dc.contributor.author** (list) values are defined by user -So, this way user may define any number of metadata values, where each metadata value can be set as a single value or a list of values.
-The \_\_section\_\_ lines define the individual submission sections, where the metadata belong to +So, this way user may define any number of metadata values, where each metadata value can be set as a single value or a list of values. +The **\_\_section\_\_** lines define the individual submission sections, where the metadata belong to ### Submission Metadata + Files Upload (metadata in CSV format) @@ -132,16 +132,16 @@ Example 1: python upload_submission.py -m /User/John/Submissions/submission-data.csv -f articles.zip sample-video.mp4 logo.png ``` -In this case new submission will be created with metadata defined in submission-data.csv file and -3 files (bitstreams): articles.zip, sample-video.mp4, logo.png will be uploaded to created submission. +In this case new submission will be created with metadata defined in **submission-data.csv** file and +3 files (bitstreams): **articles.zip, sample-video.mp4, logo.png** will be uploaded to created submission. Example 2: ``` python upload_submission.py -s 7401 -m /User/John/Submissions/new-submission-data.csv -f new-video.mp4 ``` -In this case existing submission with ID 7401 will be updated with metadata defined in new-submission-data.csv -file and one video file: new-video.mp4 will be uploaded to this submission. +In this case existing submission with ID 7401 will be updated with metadata defined in **new-submission-data.csv** +file and one video file **new-video.mp4** will be uploaded to this submission. ### Submission File(s) upload @@ -170,4 +170,4 @@ Example: python upload_submission_files.py -s 7401 -f new-logo.png examples.zip ``` -In this case two files (bitstreams): new-logo.png and examples.zip will be uploaded to existing submission with ID 7401. +In this case two files (bitstreams): **new-logo.png** and **examples.zip** will be uploaded to existing submission with ID 7401. diff --git a/generate_submission_metadata_template.py b/generate_submission_metadata_template.py index 397f1f0..6aa5078 100644 --- a/generate_submission_metadata_template.py +++ b/generate_submission_metadata_template.py @@ -24,7 +24,7 @@ help="Authorization token, or use the AUTHORIZATION_TOKEN env variable") parser.add_argument("-e", "--dspace-api-endpoint", help="DSpace API Endpoint, or use the DSPACE_API_ENDPOINT env variable") -parser.add_argument("-s", "--submission-definition-name", +parser.add_argument("-d", "--submission-definition-name", help="Submission Definition Name, or use the SUBMISSION_DEFINITION_NAME env variable") parser.add_argument("-r", "--resource-type", help="Resource Type (optional), "