From 1b5630446c869e8440a69409b7f4471dc853ecd8 Mon Sep 17 00:00:00 2001 From: jm Date: Mon, 15 Apr 2024 16:29:54 +0200 Subject: [PATCH 01/23] added get_items --- dspace_rest_client/client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 8ef88ed..938e688 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -687,6 +687,21 @@ def create_collection(self, parent, data): params = {'parent': parent} return Collection(api_resource=parse_json(self.create_dso(url, params, data))) + def get_items(self): + """ + Get all items + @return: list of Item objects + """ + url = f'{self.API_ENDPOINT}/core/items' + items = list() + r = self.api_get(url) + r_json = parse_json(r) + if '_embedded' in r_json: + if 'items' in r_json['_embedded']: + for item_resource in r_json['_embedded']['items']: + items.append(Item(item_resource)) + return items + def get_item(self, uuid): """ Get an item, given its UUID From 7442996c6364c30a28cbf30c6a2d224742188237 Mon Sep 17 00:00:00 2001 From: jm Date: Mon, 15 Apr 2024 16:35:00 +0200 Subject: [PATCH 02/23] do not require pysolr --- dspace_rest_client/client.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 107cc94..299c501 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -19,7 +19,6 @@ import requests from requests import Request -import pysolr import os from uuid import UUID from .models import * @@ -95,7 +94,12 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.USERNAME = username self.PASSWORD = password self.SOLR_ENDPOINT = solr_endpoint - self.solr = pysolr.Solr(url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth) + self.solr = None + try: + import pysolr + self.solr = pysolr.Solr(url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth) + except Exception: + pass # If fake_user_agent was specified, use this string that is known (as of 2023-12-03) to succeed with # requests to Cloudfront-protected API endpoints (tested on demo.dspace.org) # Otherwise, the user agent will be the more helpful and accurate default of 'DSpace Python REST Client' From 5e7e83163eadb8a28e0ffc0fd786938c3ab5627a Mon Sep 17 00:00:00 2001 From: MajoBerger Date: Fri, 19 Apr 2024 14:57:47 +0200 Subject: [PATCH 03/23] log in again, when logged off by timeout --- dspace_rest_client/client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 299c501..fa80504 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -207,6 +207,21 @@ def api_post(self, url, params, json, retry=False): logging.debug("Retrying request with updated CSRF token") return self.api_post(url, params=params, json=json, retry=True) + # we need to log in again, if there is login error. This is a bad + # solution copied from the past + elif r.status_code == 401: + r_json = parse_json(r) + if 'message' in r_json and 'Authentication is required' in r_json['message']: + if retry: + logging.error( + 'API Post: Already retried... something must be wrong') + else: + logging.debug("API Post: Retrying request with updated CSRF token") + # try to authenticate + self.authenticate() + # Try to authenticate and repeat the request 3 times - + # if it won't happen log error + return self.api_post(url, params=params, json=json, retry=False) return r def api_post_uri(self, url, params, uri_list, retry=False): From 426a80f3b129a1ec03c5fb50190e3ecf8757ef82 Mon Sep 17 00:00:00 2001 From: jm Date: Wed, 3 Jul 2024 12:29:20 +0200 Subject: [PATCH 04/23] [linter] fix ruff --- console.py | 4 +--- dspace_rest_client/client.py | 18 +++++++++--------- dspace_rest_client/models.py | 6 ------ example.py | 14 +++++++------- example_gets.py | 3 +-- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/console.py b/console.py index 795d6a7..150aaec 100644 --- a/console.py +++ b/console.py @@ -1,7 +1,5 @@ from dspace_rest_client.client import DSpaceClient -from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream import code -import logging import os # The DSpace client will look for the same environment variables but we can also look for them here explicitly @@ -22,7 +20,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') exit(1) code.interact(local=locals()) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 299c501..2ab1886 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -307,15 +307,15 @@ def api_patch(self, url, operation, path, value, retry=False): @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ if url is None: - logging.error(f'Missing required URL argument') + logging.error('Missing required URL argument') return None if path is None: - logging.error(f'Need valid path eg. /withdrawn or /metadata/dc.title/0/language') + logging.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language') return None if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE or operation == self.PatchOperation.MOVE) and value is None: # missing value required for add/replace/move operations - logging.error(f'Missing required "value" argument for add/replace/move operations') + logging.error('Missing required "value" argument for add/replace/move operations') return None # compile patch data @@ -464,8 +464,8 @@ def update_dso(self, dso, params=None): return None dso_type = type(dso) if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community) ' - f'are supported by generic update_dso PUT.') + logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) ' + 'are supported by generic update_dso PUT.') return dso try: # Get self URI from HAL links @@ -511,12 +511,12 @@ def delete_dso(self, dso=None, url=None, params=None): """ if dso is None: if url is None: - logging.error(f'Need a DSO or a URL to delete') + logging.error('Need a DSO or a URL to delete') return None else: if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' - f'are supported by generic update_dso PUT.') + logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' + 'are supported by generic update_dso PUT.') return dso # Get self URI from HAL links url = dso.links['self']['href'] @@ -957,7 +957,7 @@ def create_user(self, user, token=None): def delete_user(self, user): if not isinstance(user, User): - logging.error(f'Must be a valid user') + logging.error('Must be a valid user') return None return self.delete_dso(user) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index de463ac..21e3a3c 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -9,14 +9,8 @@ @author Kim Shepherd """ -import code import json -import logging -import requests -from requests import Request -import os -from uuid import UUID __all__ = ['DSpaceObject', 'HALResource', 'ExternalDataObject', 'SimpleDSpaceObject', 'Community', 'Collection', 'Item', 'Bundle', 'Bitstream', 'User', 'Group'] diff --git a/example.py b/example.py index 461078f..c526c07 100644 --- a/example.py +++ b/example.py @@ -31,7 +31,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') exit(1) # Put together some basic Community data. @@ -58,7 +58,7 @@ if isinstance(new_community, Community) and new_community.uuid is not None: print(f'New community created! Handle: {new_community.handle}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Update the community metadata @@ -93,7 +93,7 @@ if isinstance(new_collection, Collection) and new_collection.uuid is not None: print(f'New collection created! Handle: {new_collection.handle}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Put together some basic Item data. @@ -146,7 +146,7 @@ if isinstance(new_item, Item) and new_item.uuid is not None: print(f'New item created! Handle: {new_item.handle}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Add a single metadata field+value to the item (PATCH operation) @@ -159,7 +159,7 @@ if isinstance(new_bundle, Bundle) and new_bundle.uuid is not None: print(f'New bundle created! UUID: {new_bundle.uuid}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Create and upload a new bitstream using the LICENSE.txt file in this project @@ -181,10 +181,10 @@ if isinstance(new_bitstream, Bitstream) and new_bitstream.uuid is not None: print(f'New bitstream created! UUID: {new_bitstream.uuid}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) -print(f'All finished with example data creation. Visit your test repository to review created objects') +print('All finished with example data creation. Visit your test repository to review created objects') # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs diff --git a/example_gets.py b/example_gets.py index a6a6c77..fdd23fc 100644 --- a/example_gets.py +++ b/example_gets.py @@ -7,7 +7,6 @@ """ from dspace_rest_client.client import DSpaceClient -from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream # Example variables needed for authentication and basic API requests # SET THESE TO MATCH YOUR TEST SYSTEM BEFORE RUNNING THE EXAMPLE SCRIPT @@ -30,7 +29,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') exit(1) # Retrieving objects - now that we know there is some data in the repository we can demonstrate From 6cd46a34095cf52d52fe0fdc119fcf7552136f36 Mon Sep 17 00:00:00 2001 From: jm Date: Thu, 24 Oct 2024 20:23:34 +0200 Subject: [PATCH 05/23] make logging configurable --- dspace_rest_client/client.py | 113 ++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 299c501..a9ec5f0 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -26,6 +26,7 @@ __all__ = ['DSpaceClient'] logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) +_logger = logging.getLogger("dspace.client") def parse_json(response): @@ -38,7 +39,7 @@ def parse_json(response): try: response_json = response.json() except ValueError as err: - logging.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + _logger.error(f'Error parsing response JSON: {err}. Body text: {response.text}') return response_json @@ -131,16 +132,16 @@ def authenticate(self, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it if retry: - logging.error(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.error(f'Too many retries updating token: {r.status_code}: {r.text}') return False else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.authenticate(retry=True) if r.status_code == 401: # 401 Unauthorized # If we get a 401, this means a general authentication failure - logging.error(f'Authentication failure: invalid credentials for user {self.USERNAME}') + _logger.error(f'Authentication failure: invalid credentials for user {self.USERNAME}') return False # Update headers with new bearer token if present @@ -152,7 +153,7 @@ def authenticate(self, retry=False): if r.status_code == 200: r_json = parse_json(r) if 'authenticated' in r_json and r_json['authenticated'] is True: - logging.info(f'Authenticated successfully as {self.USERNAME}') + _logger.info(f'Authenticated successfully as {self.USERNAME}') return r_json['authenticated'] # Default, return false @@ -202,9 +203,9 @@ def api_post(self, url, params, json, retry=False): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_post(url, params=params, json=json, retry=True) return r @@ -230,9 +231,9 @@ def api_post_uri(self, url, params, uri_list, retry=False): r_json = r.json() if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True) return r @@ -255,14 +256,14 @@ def api_put(self, url, params, json, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) # Parse response r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_put(url, params=params, json=json, retry=True) return r @@ -284,14 +285,14 @@ def api_delete(self, url, params, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) # Parse response r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_delete(url, params=params, retry=True) return r @@ -307,15 +308,15 @@ def api_patch(self, url, operation, path, value, retry=False): @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ if url is None: - logging.error(f'Missing required URL argument') + _logger.error(f'Missing required URL argument') return None if path is None: - logging.error(f'Need valid path eg. /withdrawn or /metadata/dc.title/0/language') + _logger.error(f'Need valid path eg. /withdrawn or /metadata/dc.title/0/language') return None if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE or operation == self.PatchOperation.MOVE) and value is None: # missing value required for add/replace/move operations - logging.error(f'Missing required "value" argument for add/replace/move operations') + _logger.error(f'Missing required "value" argument for add/replace/move operations') return None # compile patch data @@ -339,17 +340,17 @@ def api_patch(self, url, operation, path, value, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_patch(url, operation, path, value, True) elif r.status_code == 200: # 200 Success - logging.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') + _logger.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') # Return the raw API response return r @@ -396,7 +397,7 @@ def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, dso = DSpaceObject(resource) dsos.append(dso) except (TypeError, ValueError) as err: - logging.error(f'error parsing search result json {err}') + _logger.error(f'error parsing search result json {err}') return dsos @@ -410,7 +411,7 @@ def fetch_resource(self, url, params=None): """ r = self.api_get(url, params, None) if r.status_code != 200: - logging.error(f'Error encountered fetching resource: {r.text}') + _logger.error(f'Error encountered fetching resource: {r.text}') return None # ValueError / JSON handling moved to static method return parse_json(r) @@ -429,7 +430,7 @@ def get_dso(self, url, uuid): url = f'{url}/{uuid}' return self.api_get(url, None, None) except ValueError: - logging.error(f'Invalid DSO UUID: {uuid}') + _logger.error(f'Invalid DSO UUID: {uuid}') return None def create_dso(self, url, params, data): @@ -446,9 +447,9 @@ def create_dso(self, url, params, data): if r.status_code == 201: # 201 Created - success! new_dso = parse_json(r) - logging.info(f'{new_dso["type"]} {new_dso["uuid"]} created successfully!') + _logger.info(f'{new_dso["type"]} {new_dso["uuid"]} created successfully!') else: - logging.error(f'create operation failed: {r.status_code}: {r.text} ({url})') + _logger.error(f'create operation failed: {r.status_code}: {r.text} ({url})') return r def update_dso(self, dso, params=None): @@ -464,7 +465,7 @@ def update_dso(self, dso, params=None): return None dso_type = type(dso) if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community) ' + _logger.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community) ' f'are supported by generic update_dso PUT.') return dso try: @@ -489,14 +490,14 @@ def update_dso(self, dso, params=None): if r.status_code == 200: # 200 OK - success! updated_dso = dso_type(parse_json(r)) - logging.info(f'{updated_dso.type} {updated_dso.uuid} updated sucessfully!') + _logger.debug(f'{updated_dso.type} {updated_dso.uuid} updated sucessfully!') return updated_dso else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + _logger.error(f'update operation failed: {r.status_code}: {r.text} ({url})') return None except ValueError as e: - logging.error("Error parsing DSO response", exc_info=True) + _logger.error("Error parsing DSO response", exc_info=True) return None def delete_dso(self, dso=None, url=None, params=None): @@ -511,11 +512,11 @@ def delete_dso(self, dso=None, url=None, params=None): """ if dso is None: if url is None: - logging.error(f'Need a DSO or a URL to delete') + _logger.error(f'Need a DSO or a URL to delete') return None else: if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' + _logger.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' f'are supported by generic update_dso PUT.') return dso # Get self URI from HAL links @@ -525,13 +526,13 @@ def delete_dso(self, dso=None, url=None, params=None): r = self.api_delete(url, params=params) if r.status_code == 204: # 204 No Content - success! - logging.info(f'{url} was deleted sucessfully!') + _logger.info(f'{url} was deleted sucessfully!') return r else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + _logger.error(f'update operation failed: {r.status_code}: {r.text} ({url})') return None except ValueError as e: - logging.error(f'Error deleting DSO {dso.uuid}: {e}') + _logger.error(f'Error deleting DSO {dso.uuid}: {e}') return None # PAGINATION @@ -570,7 +571,7 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None): for resource in resources: bundles.append(Bundle(resource)) except ValueError as err: - logging.error(f'error parsing bundle results: {err}') + _logger.error(f'error parsing bundle results: {err}') return bundles @@ -607,7 +608,7 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None): url = bundle.links['bitstreams']['href'] else: url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - logging.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + _logger.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') # Perform the actual request. By now, our URL and parameter should be properly set params = {} if size is not None: @@ -658,23 +659,23 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat r = self.session.send(prepared_req) if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug('Updating token to ' + t) + _logger.debug('Updating token to ' + t) self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) if r.status_code == 403: r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.error('Already retried... something must be wrong') + _logger.error('Already retried... something must be wrong') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.create_bitstream(bundle, name, path, mime, metadata, True) if r.status_code == 201 or r.status_code == 200: # Success return Bitstream(api_resource=parse_json(r)) else: - logging.error(f'Error creating bitstream: {r.status_code}: {r.text}') + _logger.error(f'Error creating bitstream: {r.status_code}: {r.text}') return None def download_bitstream(self, uuid=None): @@ -715,14 +716,14 @@ def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False): url = f'{url}/{uuid}' params = None except ValueError: - logging.error(f'Invalid community UUID: {uuid}') + _logger.error(f'Invalid community UUID: {uuid}') return None if top: # Set new URL url = f'{url}/search/top' - logging.debug(f'Performing get on {url}') + _logger.debug(f'Performing get on {url}') # Perform actual get r_json = self.fetch_resource(url, params) # Empty list @@ -778,7 +779,7 @@ def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None) url = f'{url}/{uuid}' params = None except ValueError: - logging.error(f'Invalid collection UUID: {uuid}') + _logger.error(f'Invalid collection UUID: {uuid}') return None if community is not None: @@ -845,7 +846,7 @@ def get_item(self, uuid): url = f'{url}/{uuid}' return self.api_get(url, None, None) except ValueError: - logging.error(f'Invalid item UUID: {uuid}') + _logger.error(f'Invalid item UUID: {uuid}') return None def get_items(self): @@ -882,11 +883,11 @@ def create_item(self, parent, item): """ url = f'{self.API_ENDPOINT}/core/items' if parent is None: - logging.error('Need a parent UUID!') + _logger.error('Need a parent UUID!') return None params = {'owningCollection': parent} if not isinstance(item, Item): - logging.error('Need a valid item') + _logger.error('Need a valid item') return None return Item(api_resource=parse_json(self.create_dso(url, params=params, data=item.as_dict()))) @@ -898,7 +899,7 @@ def update_item(self, item): @return: """ if not isinstance(item, Item): - logging.error('Need a valid item') + _logger.error('Need a valid item') return None return self.update_dso(item, params=None) @@ -916,7 +917,7 @@ def add_metadata(self, dso, field, value, language=None, authority=None, confide """ if dso is None or field is None or value is None or not isinstance(dso, DSpaceObject): # TODO: separate these tests, and add better error handling - logging.error('Invalid or missing DSpace object, field or value string') + _logger.error('Invalid or missing DSpace object, field or value string') return self dso_type = type(dso) @@ -957,7 +958,7 @@ def create_user(self, user, token=None): def delete_user(self, user): if not isinstance(user, User): - logging.error(f'Must be a valid user') + _logger.error(f'Must be a valid user') return None return self.delete_dso(user) @@ -997,7 +998,7 @@ def create_group(self, group): def start_workflow(self, workspace_item): url = f'{self.API_ENDPOINT}/workflow/workflowitems' res = parse_json(self.api_post_uri(url, params=None, uri_list=workspace_item)) - logging.debug(res) + _logger.debug(res) # TODO: WIP def update_token(self, r): @@ -1009,11 +1010,11 @@ def update_token(self, r): :return: """ if not self.session: - logging.debug('Session state not found, setting...') + _logger.debug('Session state not found, setting...') self.session = requests.Session() if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug(f'Updating XSRF token to {t}') + _logger.debug(f'Updating XSRF token to {t}') # Update headers and cookies self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) @@ -1024,7 +1025,7 @@ def get_short_lived_token(self): @return: short lived Authorization token """ if not self.session: - logging.debug('Session state not found, setting...') + _logger.debug('Session state not found, setting...') self.session = requests.Session() url = f'{self.API_ENDPOINT}/authn/shortlivedtokens' @@ -1033,7 +1034,7 @@ def get_short_lived_token(self): if r_json is not None and 'token' in r_json: return r_json['token'] - logging.error('Could not retrieve short-lived token') + _logger.error('Could not retrieve short-lived token') return None def solr_query(self, query, filters=None, fields=None, start=0, rows=999999999): From 3279d5bd5b10c28815f10592fe1e0fa6c4e1e36e Mon Sep 17 00:00:00 2001 From: jm Date: Wed, 5 Feb 2025 18:29:59 +0100 Subject: [PATCH 06/23] removed duplicate code --- dspace_rest_client/client.py | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 2633b19..3ee69bb 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -833,21 +833,6 @@ def create_collection(self, parent, data): params = {'parent': parent} return Collection(api_resource=parse_json(self.create_dso(url, params, data))) - def get_items(self): - """ - Get all items - @return: list of Item objects - """ - url = f'{self.API_ENDPOINT}/core/items' - items = list() - r = self.api_get(url) - r_json = parse_json(r) - if '_embedded' in r_json: - if 'items' in r_json['_embedded']: - for item_resource in r_json['_embedded']['items']: - items.append(Item(item_resource)) - return items - def get_item(self, uuid): """ Get an item, given its UUID @@ -864,29 +849,27 @@ def get_item(self, uuid): _logger.error(f'Invalid item UUID: {uuid}') return None - def get_items(self): + def get_items(self, page=0, size=20): """ Get all archived items for a logged-in administrator. Admin only! Usually you will want to use search or browse methods instead of this method @return: A list of items, or an error """ url = f'{self.API_ENDPOINT}/core/items' - # Empty item list - items = list() - # Perform the actual request - r_json = self.fetch_resource(url) - # Empty list items = list() + params = {} + if size is not None: + params['size'] = size + if page is not None: + params['page'] = page + r = self.api_get(url, params=params) + r_json = parse_json(response=r) if '_embedded' in r_json: - # This is a list of items if 'collections' in r_json['_embedded']: for item_resource in r_json['_embedded']['items']: items.append(Item(item_resource)) elif 'uuid' in r_json: - # This is a single item items.append(Item(r_json)) - - # Return list (populated or empty) return items def create_item(self, parent, item): From a906e75a301188410a0d9ddb98d7ec3908e755c9 Mon Sep 17 00:00:00 2001 From: jm Date: Sun, 16 Mar 2025 20:56:58 +0100 Subject: [PATCH 07/23] added remove_metadata and fixed get_item --- dspace_rest_client/client.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 3ee69bb..bab4bde 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -462,7 +462,7 @@ def create_dso(self, url, params, data): if r.status_code == 201: # 201 Created - success! new_dso = parse_json(r) - _logger.info(f'{new_dso["type"]} {new_dso["uuid"]} created successfully!') + _logger.info(f'Object type[{new_dso["type"]}] uuid:[{new_dso["uuid"]}] created successfully!') else: _logger.error(f'create operation failed: {r.status_code}: {r.text} ({url})') return r @@ -839,12 +839,13 @@ def get_item(self, uuid): @param uuid: the UUID of the item @return: the raw API response """ - # TODO - return constructed Item object instead, handling errors here? url = f'{self.API_ENDPOINT}/core/items' try: id = UUID(uuid).version url = f'{url}/{uuid}' - return self.api_get(url, None, None) + r = self.api_get(url, None, None) + r_json = parse_json(response=r) + return Item(r_json) except ValueError: _logger.error(f'Invalid item UUID: {uuid}') return None @@ -936,6 +937,25 @@ def add_metadata(self, dso, field, value, language=None, authority=None, confide return dso_type(api_resource=parse_json(r)) + def remove_metadata(self, dso, field): + """ + Remove metadata + """ + if dso is None or field is None or not isinstance(dso, DSpaceObject): + # TODO: separate these tests, and add better error handling + _logger.error('Invalid or missing DSpace object, field or value string') + return self + + dso_type = type(dso) + + # Place can be 0+ integer, or a hyphen - meaning "last" + path = f'/metadata/{field}' + url = dso.links['self']['href'] + + r = self.api_patch(url=url, operation=self.PatchOperation.REMOVE, path=path, value=None) + return dso_type(api_resource=parse_json(r)) + + def create_user(self, user, token=None): """ Create a user From 1415456cf4e464a3ced77e70e61a3af79507dccd Mon Sep 17 00:00:00 2001 From: jm Date: Mon, 17 Mar 2025 16:09:47 +0100 Subject: [PATCH 08/23] added owningCollection and request validation --- dspace_rest_client/client.py | 45 +++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index bab4bde..f421b6c 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -96,6 +96,7 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.PASSWORD = password self.SOLR_ENDPOINT = solr_endpoint self.solr = None + self._last_err = None try: import pysolr self.solr = pysolr.Solr(url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth) @@ -114,6 +115,10 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.request_headers = {'Content-type': 'application/json', 'User-Agent': self.USER_AGENT} self.list_request_headers = {'Content-type': 'text/uri-list', 'User-Agent': self.USER_AGENT} + @property + def last_err(self): + return self._last_err + def authenticate(self, retry=False): """ Authenticate with the DSpace REST API. As with other operations, perform XSRF refreshes when necessary. @@ -159,6 +164,25 @@ def authenticate(self, retry=False): # Default, return false return False + def verify_response(self, r, id_str: str, as_json: bool = False): + """ + Verify response from API. If response is not 200, log error and return False. + """ + if r.status_code != 200: + _logger.error(f'Error response [{id_str}]: {r.status_code}: {r.text} ... [ {r.url} ]') + self._last_err = r + return False + + if as_json: + try: + r.json() + except ValueError: + _logger.error(f'Error parsing JSON response [{id_str}]: {r.text} ... [ {r.url} ]') + return False + + return True + + def refresh_token(self): """ If the DSPACE-XSRF-TOKEN appears, we need to update our local stored token and re-send our API request @@ -176,6 +200,7 @@ def api_get(self, url, params=None, data=None, headers=None): @param headers: any override headers (eg. with short-lived token for download) @return: Response from API """ + self._last_err = None if headers is None: headers = self.request_headers r = self.session.get(url, params=params, data=data, headers=headers) @@ -192,6 +217,7 @@ def api_post(self, url, params, json, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ + self._last_err = None r = self.session.post(url, json=json, params=params, headers=self.request_headers) self.update_token(r) @@ -235,6 +261,7 @@ def api_post_uri(self, url, params, uri_list, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ + self._last_err = None r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers) self.update_token(r) @@ -263,6 +290,7 @@ def api_put(self, url, params, json, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ + self._last_err = None r = self.session.put(url, params=params, json=json, headers=self.request_headers) self.update_token(r) @@ -292,6 +320,7 @@ def api_delete(self, url, params, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ + self._last_err = None r = self.session.delete(url, params=params, headers=self.request_headers) self.update_token(r) @@ -322,6 +351,7 @@ def api_patch(self, url, operation, path, value, retry=False): @return: @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ + self._last_err = None if url is None: logging.error('Missing required URL argument') return None @@ -873,6 +903,20 @@ def get_items(self, page=0, size=20): items.append(Item(r_json)) return items + def get_owningCollection(self, item_uuid): + """ + Get owningCollection + """ + url = f'{self.API_ENDPOINT}/core/items/{item_uuid}/owningCollection' + try: + r = self.api_get(url, None, None) + self.verify_response(r, f"item:{item_uuid}", True) + r_json = parse_json(response=r) + return Collection(r_json) + except ValueError: + _logger.error(f'Invalid owningCollection for UUID: {item_uuid}') + return None + def create_item(self, parent, item): """ Create an item beneath the given parent collection @@ -942,7 +986,6 @@ def remove_metadata(self, dso, field): Remove metadata """ if dso is None or field is None or not isinstance(dso, DSpaceObject): - # TODO: separate these tests, and add better error handling _logger.error('Invalid or missing DSpace object, field or value string') return self From bbc348310e5ff1f8c47e2559da885a23ca260a41 Mon Sep 17 00:00:00 2001 From: jm Date: Tue, 18 Mar 2025 13:36:08 +0100 Subject: [PATCH 09/23] enable result count, otherwise non authenticated might get into problems --- dspace_rest_client/client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index f421b6c..a37caeb 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -401,7 +401,7 @@ def api_patch(self, url, operation, path, value, retry=False): return r # PAGINATION - def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, sort=None, dso_type=None): + def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, sort=None, dso_type=None, details=None): """ Do a basic search with optional query, filters and dsoType params. @param query: query string @@ -436,6 +436,8 @@ def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, # instead lots of 'does this key exist, etc etc' checks, just go for it and wrap in a try? try: + if details is not None: + details["page"] = r_json['_embedded']['searchResult']['page'] results = r_json['_embedded']['searchResult']['_embedded']['objects'] for result in results: resource = result['_embedded']['indexableObject'] @@ -653,7 +655,7 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None): url = bundle.links['bitstreams']['href'] else: url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - _logger.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + _logger.info(f'Cannot find bundle bitstream links, will try to construct manually: {url}') # Perform the actual request. By now, our URL and parameter should be properly set params = {} if size is not None: From 89b6757d24283f8580d1dc8780dbbd27df6a152a Mon Sep 17 00:00:00 2001 From: jm Date: Wed, 5 Nov 2025 23:18:01 +0100 Subject: [PATCH 10/23] docs: fix typos across repo - Fix 'bistreams' -> 'bitstreams' in docstring - Fix 'sucessfully' -> 'successfully' in log messages (2 instances) These spelling errors were found in comments and log messages and do not affect code behavior. --- dspace_rest_client/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index a37caeb..cb9c510 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -6,7 +6,7 @@ DSpace REST API client library. Intended to make interacting with DSpace in Python 3 easier, particularly when creating, updating, retrieving and deleting DSpace Objects. This client library is a work in progress and currently only implements the most basic functionality. -It was originally created to assist with a migration of container structure, items and bistreams from a non-DSpace +It was originally created to assist with a migration of container structure, items and bitstreams from a non-DSpace system to a new DSpace 7 repository. It needs a lot of expansion: resource policies and permissions, validation of prepared objects and responses, @@ -537,7 +537,7 @@ def update_dso(self, dso, params=None): if r.status_code == 200: # 200 OK - success! updated_dso = dso_type(parse_json(r)) - _logger.debug(f'{updated_dso.type} {updated_dso.uuid} updated sucessfully!') + _logger.debug(f'{updated_dso.type} {updated_dso.uuid} updated successfully!') return updated_dso else: _logger.error(f'update operation failed: {r.status_code}: {r.text} ({url})') @@ -573,7 +573,7 @@ def delete_dso(self, dso=None, url=None, params=None): r = self.api_delete(url, params=params) if r.status_code == 204: # 204 No Content - success! - _logger.info(f'{url} was deleted sucessfully!') + _logger.info(f'{url} was deleted successfully!') return r else: _logger.error(f'update operation failed: {r.status_code}: {r.text} ({url})') From 9c332d27d0ccaee2a04718153f6556479b88c749 Mon Sep 17 00:00:00 2001 From: jm Date: Tue, 20 Jan 2026 10:05:01 +0100 Subject: [PATCH 11/23] added resourcepolicy specific for dtq --- dspace_rest_client/client.py | 21 +++++++++++++++++++++ dspace_rest_client/models.py | 28 +++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index cb9c510..3540d7b 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -463,6 +463,27 @@ def fetch_resource(self, url, params=None): # ValueError / JSON handling moved to static method return parse_json(r) + def get_resourcepolicy(self, uuid, action='READ'): + """ + Fetch resource policies for a given resource UUID and action. + @param uuid: resource UUID to search for + @param action: action name to filter by (default: READ) + @return: Parsed JSON response from fetch_resource or None if error + """ + try: + # Validate UUID + id = UUID(uuid).version + url = f'{self.API_ENDPOINT}/authz/resourcepolicies/search/resource' + params = {'uuid': uuid} + if action is not None: + params['action'] = action + r_json = self.fetch_resource(url, params=params) + arr = r_json['_embedded'].get('resourcepolicies') or [] + return [ResourcePolicy(x) for x in arr] + except ValueError: + _logger.error(f'Invalid resource UUID: {uuid}') + return None + def get_dso(self, url, uuid): """ Base 'get DSpace Object' function. diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 21e3a3c..4f0530b 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -13,7 +13,7 @@ __all__ = ['DSpaceObject', 'HALResource', 'ExternalDataObject', 'SimpleDSpaceObject', 'Community', - 'Collection', 'Item', 'Bundle', 'Bitstream', 'User', 'Group'] + 'Collection', 'Item', 'Bundle', 'Bitstream', 'User', 'Group', 'ResourcePolicy'] class HALResource: @@ -512,3 +512,29 @@ def __init__(self, api_resource): super(RelationshipType, self).__init__(api_resource) +class ResourcePolicy(AddressableHALResource): + """ + DQ specific. Extends Addressable HAL Resource to model a resource policy. + """ + def __init__(self, api_resource: dict): + super(ResourcePolicy, self).__init__(api_resource) + api_resource = api_resource or {} + self.name = api_resource.get('name') + self.description = api_resource.get('description') + self.startDate = api_resource.get('startDate') + self.endDate = api_resource.get('endDate') + self.type = api_resource.get('type') + self.action = api_resource.get('action') + self.policyType = api_resource.get('policyType') + + def as_dict(self): + return { + 'id': self.id, + 'name': self.name, + 'type': self.type, + 'description': self.description, + 'startDate': self.startDate, + 'endDate': self.endDate, + 'action': self.action, + 'policyType': self.policyType, + } \ No newline at end of file From e46b57ae7509f49415d1825c58534117a67102f4 Mon Sep 17 00:00:00 2001 From: jm Date: Wed, 21 Jan 2026 14:12:10 +0100 Subject: [PATCH 12/23] add None check --- dspace_rest_client/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 3540d7b..9e3196d 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -478,6 +478,9 @@ def get_resourcepolicy(self, uuid, action='READ'): if action is not None: params['action'] = action r_json = self.fetch_resource(url, params=params) + if '_embedded' not in (r_json or {}): + _logger.debug(f"No resource policies found for resource UUID: {uuid} [{url}]") + return None arr = r_json['_embedded'].get('resourcepolicies') or [] return [ResourcePolicy(x) for x in arr] except ValueError: From 5c4e9f0ec7a452202d911c5b45af5fd65c590670 Mon Sep 17 00:00:00 2001 From: jm Date: Thu, 22 Jan 2026 14:44:32 +0100 Subject: [PATCH 13/23] add resourcePolicy from d --- dspace_rest_client/models.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 4f0530b..07e8949 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -28,7 +28,9 @@ def __init__(self, api_resource=None): Default constructor @param api_resource: optional API resource (JSON) from a GET response or successful POST can populate instance """ + self._from_d = None if api_resource is not None: + self._from_d = api_resource if 'type' in api_resource: self.type = api_resource['type'] if '_links' in api_resource: @@ -135,6 +137,10 @@ def __init__(self, api_resource=None, dso=None): if '_links' in api_resource: self.links = api_resource['_links'].copy() + @property + def resourcePolicies(self): + return (self._from_d or {}).get('resourcePolicies') + def add_metadata(self, field, value, language=None, authority=None, confidence=-1, place=None): """ Add metadata to a DSO. This is performed on the local object only, it is not an API operation (see patch) From 259a9757764884a1c71c3dee980c2b63a9155706 Mon Sep 17 00:00:00 2001 From: Juraj Roka <95219754+jr-rk@users.noreply.github.com> Date: Thu, 12 Feb 2026 16:33:46 +0100 Subject: [PATCH 14/23] add group info to resourcePolicy --- dspace_rest_client/models.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 07e8949..26cbb45 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -532,7 +532,12 @@ def __init__(self, api_resource: dict): self.type = api_resource.get('type') self.action = api_resource.get('action') self.policyType = api_resource.get('policyType') - + self.groupName = None + self.groupUUID = None + if '_embedded' in api_resource: + if 'group' in api_resource['_embedded']: + self.groupName = api_resource['_embedded']['group'].get('name') + self.groupUUID = api_resource['_embedded']['group'].get('uuid') def as_dict(self): return { 'id': self.id, @@ -543,4 +548,6 @@ def as_dict(self): 'endDate': self.endDate, 'action': self.action, 'policyType': self.policyType, + 'groupName': self.groupName, + 'groupUUID': self.groupUUID, } \ No newline at end of file From 283e39c856e3cdc65cf3be55a98a0ec697020956 Mon Sep 17 00:00:00 2001 From: Juraj Roka <95219754+jr-rk@users.noreply.github.com> Date: Fri, 13 Feb 2026 14:25:32 +0100 Subject: [PATCH 15/23] fix group properties adding --- dspace_rest_client/models.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 26cbb45..903dd8a 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -532,9 +532,11 @@ def __init__(self, api_resource: dict): self.type = api_resource.get('type') self.action = api_resource.get('action') self.policyType = api_resource.get('policyType') - self.groupName = None - self.groupUUID = None - if '_embedded' in api_resource: + # Check for direct groupName/groupUUID (cached format from as_dict()) + self.groupName = api_resource.get('groupName') + self.groupUUID = api_resource.get('groupUUID') + # If not found, try extracting from _embedded structure (live API format) + if self.groupName is None and '_embedded' in api_resource: if 'group' in api_resource['_embedded']: self.groupName = api_resource['_embedded']['group'].get('name') self.groupUUID = api_resource['_embedded']['group'].get('uuid') From 59402c0d2ec83d3a54d7087a027b57f5eaadf0bd Mon Sep 17 00:00:00 2001 From: jm Date: Fri, 13 Feb 2026 23:39:19 +0100 Subject: [PATCH 16/23] fix: upstream bugfixes and proxy support Backports critical bugfixes and proxy support from upstream the-library-code/dspace-rest-python. Bugfixes: - Fix User model trailing commas that turned fields into tuples - Fix get_items() using wrong embedded key ('collections' -> 'items') - Fix InProgressSubmission step assigned from lastModified instead of step - Fix InProgressSubmission type assigned from lastModified instead of type - Fix EntityType type field overwriting label - Fix parse_json to handle None response safely Improvements: - Add proxy support via PROXY_URL env var and proxies constructor param - Add proxies to all HTTP methods (GET, POST, PUT, DELETE, PATCH, send) - Add proxies to authenticate status check GET - Add params parameter to api_patch method - Add embedded attribute to HALResource base class - Add ITER_PAGE_SIZE class variable (preparation for pagination) - Add upstream_ref/ to .gitignore --- .gitignore | 1 + dspace_rest_client/client.py | 51 ++++++++++++++++++++++++------------ dspace_rest_client/models.py | 22 +++++++++------- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 0dc7c57..f0de7dc 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pypackages__/ env/ venv/ .idea/ +upstream_ref/ diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 9e3196d..80408b7 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -16,11 +16,12 @@ """ import json import logging +import os +from uuid import UUID import requests from requests import Request -import os -from uuid import UUID + from .models import * __all__ = ['DSpaceClient'] @@ -37,9 +38,13 @@ def parse_json(response): """ response_json = None try: - response_json = response.json() + if response is not None: + response_json = response.json() except ValueError as err: - _logger.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + if response is not None: + _logger.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + else: + _logger.error(f'Error parsing response JSON: {err}. Response is None') return response_json @@ -73,6 +78,8 @@ class DSpaceClient: if 'USER_AGENT' in os.environ: USER_AGENT = os.environ['USER_AGENT'] verbose = False + ITER_PAGE_SIZE = 20 + PROXY_DICT = dict(http=os.environ["PROXY_URL"],https=os.environ["PROXY_URL"]) if "PROXY_URL" in os.environ else dict() # Simple enum for patch operation types class PatchOperation: @@ -82,7 +89,7 @@ class PatchOperation: MOVE = 'move' def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWORD, solr_endpoint=SOLR_ENDPOINT, - solr_auth=SOLR_AUTH, fake_user_agent=False): + solr_auth=SOLR_AUTH, fake_user_agent=False, proxies=PROXY_DICT): """ Accept optional API endpoint, username, password arguments using the OS environment variables as defaults :param api_endpoint: base path to DSpace REST API, eg. http://localhost:8080/server/api @@ -95,6 +102,7 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.USERNAME = username self.PASSWORD = password self.SOLR_ENDPOINT = solr_endpoint + self.proxies = proxies self.solr = None self._last_err = None try: @@ -128,7 +136,8 @@ def authenticate(self, retry=False): # Set headers for requests made during authentication # Get and update CSRF token r = self.session.post(self.LOGIN_URL, data={'user': self.USERNAME, 'password': self.PASSWORD}, - headers=self.auth_request_headers) + headers=self.auth_request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -154,7 +163,8 @@ def authenticate(self, retry=False): self.session.headers.update({'Authorization': r.headers.get('Authorization')}) # Get and check authentication status - r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers) + r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers, + proxies=self.proxies) if r.status_code == 200: r_json = parse_json(r) if 'authenticated' in r_json and r_json['authenticated'] is True: @@ -203,7 +213,8 @@ def api_get(self, url, params=None, data=None, headers=None): self._last_err = None if headers is None: headers = self.request_headers - r = self.session.get(url, params=params, data=data, headers=headers) + r = self.session.get(url, params=params, data=data, headers=headers, + proxies=self.proxies) self.update_token(r) return r @@ -218,7 +229,8 @@ def api_post(self, url, params, json, retry=False): @return: Response from API """ self._last_err = None - r = self.session.post(url, json=json, params=params, headers=self.request_headers) + r = self.session.post(url, json=json, params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -262,7 +274,8 @@ def api_post_uri(self, url, params, uri_list, retry=False): @return: Response from API """ self._last_err = None - r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers) + r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -291,7 +304,8 @@ def api_put(self, url, params, json, retry=False): @return: Response from API """ self._last_err = None - r = self.session.put(url, params=params, json=json, headers=self.request_headers) + r = self.session.put(url, params=params, json=json, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -321,7 +335,8 @@ def api_delete(self, url, params, retry=False): @return: Response from API """ self._last_err = None - r = self.session.delete(url, params=params, headers=self.request_headers) + r = self.session.delete(url, params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -341,12 +356,13 @@ def api_delete(self, url, params, retry=False): return r - def api_patch(self, url, operation, path, value, retry=False): + def api_patch(self, url, operation, path, value, params=None, retry=False): """ @param url: DSpace REST API URL @param operation: 'add', 'remove', 'replace', or 'move' (see PatchOperation enumeration) @param path: path to perform operation - eg, metadata, withdrawn, etc. @param value: new value for add or replace operations, or 'original' path for move operations + @param params: Optional parameters @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md @@ -377,7 +393,8 @@ def api_patch(self, url, operation, path, value, retry=False): # set headers # perform patch request - r = self.session.patch(url, json=[data], headers=self.request_headers) + r = self.session.patch(url, json=[data], params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -392,7 +409,7 @@ def api_patch(self, url, operation, path, value, retry=False): _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: _logger.debug("Retrying request with updated CSRF token") - return self.api_patch(url, operation, path, value, True) + return self.api_patch(url, operation, path, value, params, True) elif r.status_code == 200: # 200 Success _logger.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') @@ -727,7 +744,7 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat h.update({'Content-Encoding': 'gzip', 'User-Agent': self.USER_AGENT}) req = Request('POST', url, data=payload, headers=h, files=files) prepared_req = self.session.prepare_request(req) - r = self.session.send(prepared_req) + r = self.session.send(prepared_req, proxies=self.proxies) if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] _logger.debug('Updating token to ' + t) @@ -922,7 +939,7 @@ def get_items(self, page=0, size=20): r = self.api_get(url, params=params) r_json = parse_json(response=r) if '_embedded' in r_json: - if 'collections' in r_json['_embedded']: + if 'items' in r_json['_embedded']: for item_resource in r_json['_embedded']['items']: items.append(Item(item_resource)) elif 'uuid' in r_json: diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 903dd8a..f1c8550 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -37,6 +37,10 @@ def __init__(self, api_resource=None): self.links = api_resource['_links'].copy() else: self.links = {'self': {'href': None}} + if '_embedded' in api_resource: + self.embedded = api_resource['_embedded'].copy() + else: + self.embedded = {} class AddressableHALResource(HALResource): id = None @@ -421,12 +425,12 @@ class User(SimpleDSpaceObject): Extends DSpaceObject to implement specific attributes and methods for users (aka. EPersons) """ type = 'user' - name = None, - netid = None, - lastActive = None, - canLogIn = False, - email = None, - requireCertificate = False, + name = None + netid = None + lastActive = None + canLogIn = False + email = None + requireCertificate = False selfRegistered = False def __init__(self, api_resource=None): @@ -473,11 +477,11 @@ def __init__(self, api_resource): if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: - self.step = api_resource['lastModified'] + self.step = api_resource['step'] if 'sections' in api_resource: self.sections = api_resource['sections'].copy() if 'type' in api_resource: - self.lastModified = api_resource['lastModified'] + self.type = api_resource['type'] def as_dict(self): parent_dict = super(InProgressSubmission, self).as_dict() @@ -508,7 +512,7 @@ def __init__(self, api_resource): if 'label' in api_resource: self.label = api_resource['label'] if 'type' in api_resource: - self.label = api_resource['type'] + self.type = api_resource['type'] class RelationshipType(AddressableHALResource): """ From c7ca664cc9d748188d1550a6943380a4f756a234 Mon Sep 17 00:00:00 2001 From: Jozef Misutka <332350+vidiecan@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:57:18 +0100 Subject: [PATCH 17/23] Remove upstream_ref from .gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index f0de7dc..0dc7c57 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,3 @@ __pypackages__/ env/ venv/ .idea/ -upstream_ref/ From 291bc468f12c2fda1edbc220ea6361e45a27751a Mon Sep 17 00:00:00 2001 From: jm Date: Tue, 17 Feb 2026 00:38:04 +0100 Subject: [PATCH 18/23] add rp by ai --- dspace_rest_client/client.py | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 9e3196d..1810074 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -487,6 +487,52 @@ def get_resourcepolicy(self, uuid, action='READ'): _logger.error(f'Invalid resource UUID: {uuid}') return None + def create_resourcepolicy( + self, resource_uuid, group_uuid, action='READ', + policy_name=None, start_date=None, end_date=None, + ): + """ + Create a new resource policy for a given DSpace resource. + Uses POST /authz/resourcepolicies?resource=&resource-type=bitstream + @param resource_uuid: UUID of the target bitstream (or other resource) + @param group_uuid: UUID of the group to grant access to + @param action: action name (default: READ) + @param policy_name: optional policy name + @param start_date: optional start date string (ISO 8601) + @param end_date: optional end date string (ISO 8601) + @return: ResourcePolicy on success, None on failure + """ + try: + UUID(resource_uuid) + UUID(group_uuid) + except ValueError: + _logger.error(f'Invalid UUID: resource={resource_uuid}, group={group_uuid}') + return None + + url = f'{self.API_ENDPOINT}/authz/resourcepolicies' + params = {'resource': resource_uuid, 'resource-type': 'bitstream'} + data = { + 'action': action, + 'name': policy_name, + 'startDate': start_date, + 'endDate': end_date, + } + # Link to the group via the eperson-group URI + group_uri = f'{self.API_ENDPOINT}/eperson/groups/{group_uuid}' + + r = self.api_post(url, params=params, json=data) + if r.status_code == 201: + rp = ResourcePolicy(parse_json(r)) + _logger.info(f'Created resource policy id={rp.id} for resource {resource_uuid}') + # Now link the group to the newly created policy + rp_group_url = f'{self.API_ENDPOINT}/authz/resourcepolicies/{rp.id}/group' + self.api_post_uri(rp_group_url, params=None, uri_list=group_uri) + return rp + + _logger.error( + f'Failed to create resource policy: {r.status_code}: {r.text}') + return None + def get_dso(self, url, uuid): """ Base 'get DSpace Object' function. From 3b3063a50633be122f6667284ecdfd637c41086c Mon Sep 17 00:00:00 2001 From: Juraj Roka <95219754+jr-rk@users.noreply.github.com> Date: Tue, 17 Feb 2026 17:30:07 +0100 Subject: [PATCH 19/23] fix rp create/delete --- dspace_rest_client/client.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 1810074..4719a2c 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -478,9 +478,12 @@ def get_resourcepolicy(self, uuid, action='READ'): if action is not None: params['action'] = action r_json = self.fetch_resource(url, params=params) - if '_embedded' not in (r_json or {}): - _logger.debug(f"No resource policies found for resource UUID: {uuid} [{url}]") + if r_json is None: + _logger.error(f"API call failed for resource UUID: {uuid}") return None + if '_embedded' not in r_json: + _logger.debug(f"No resource policies found for resource UUID: {uuid} [{url}]") + return [] arr = r_json['_embedded'].get('resourcepolicies') or [] return [ResourcePolicy(x) for x in arr] except ValueError: @@ -489,17 +492,16 @@ def get_resourcepolicy(self, uuid, action='READ'): def create_resourcepolicy( self, resource_uuid, group_uuid, action='READ', - policy_name=None, start_date=None, end_date=None, + start_date=None, end_date=None, ): """ Create a new resource policy for a given DSpace resource. - Uses POST /authz/resourcepolicies?resource=&resource-type=bitstream + Uses POST /api/authz/resourcepolicies?resource=&group= @param resource_uuid: UUID of the target bitstream (or other resource) @param group_uuid: UUID of the group to grant access to @param action: action name (default: READ) - @param policy_name: optional policy name - @param start_date: optional start date string (ISO 8601) - @param end_date: optional end date string (ISO 8601) + @param start_date: optional start date string (ISO 8601, YYYY-MM-DD) + @param end_date: optional end date string (ISO 8601, YYYY-MM-DD) @return: ResourcePolicy on success, None on failure """ try: @@ -510,23 +512,23 @@ def create_resourcepolicy( return None url = f'{self.API_ENDPOINT}/authz/resourcepolicies' - params = {'resource': resource_uuid, 'resource-type': 'bitstream'} + params = { + 'resource': resource_uuid, + 'group': group_uuid, + } data = { 'action': action, - 'name': policy_name, - 'startDate': start_date, - 'endDate': end_date, + 'type': 'resourcepolicy', } - # Link to the group via the eperson-group URI - group_uri = f'{self.API_ENDPOINT}/eperson/groups/{group_uuid}' + if start_date is not None: + data['startDate'] = start_date + if end_date is not None: + data['endDate'] = end_date r = self.api_post(url, params=params, json=data) - if r.status_code == 201: + if r.status_code in (200, 201): rp = ResourcePolicy(parse_json(r)) _logger.info(f'Created resource policy id={rp.id} for resource {resource_uuid}') - # Now link the group to the newly created policy - rp_group_url = f'{self.API_ENDPOINT}/authz/resourcepolicies/{rp.id}/group' - self.api_post_uri(rp_group_url, params=None, uri_list=group_uri) return rp _logger.error( From 1121bc7e28611db577fa3dfeb8709091c1f0971e Mon Sep 17 00:00:00 2001 From: jm Date: Wed, 18 Feb 2026 10:02:35 +0100 Subject: [PATCH 20/23] added repr to ResourcePolicy --- dspace_rest_client/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index 903dd8a..e57a339 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -552,4 +552,7 @@ def as_dict(self): 'policyType': self.policyType, 'groupName': self.groupName, 'groupUUID': self.groupUUID, - } \ No newline at end of file + } + + def __repr__(self): + return f"ResourcePolicy: {self.name} [{self.groupName}] [action: {self.action}] [type: {self.type}]" \ No newline at end of file From 9f3cd618249fafce8cfa8f20fbeec3f997d9867b Mon Sep 17 00:00:00 2001 From: jm Date: Thu, 19 Feb 2026 15:47:16 +0100 Subject: [PATCH 21/23] reauth --- dspace_rest_client/client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 4719a2c..fe4684a 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -781,14 +781,13 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat _logger.debug('Updating token to ' + t) self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) - if r.status_code == 403: + if not retry and r.status_code in (401, 403): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: - if retry: - _logger.error('Already retried... something must be wrong') - else: - _logger.debug("Retrying request with updated CSRF token") - return self.create_bitstream(bundle, name, path, mime, metadata, True) + _logger.debug("Retrying request with updated CSRF token") + else: + self.authenticate() + return self.create_bitstream(bundle, name, path, mime, metadata, True) if r.status_code == 201 or r.status_code == 200: # Success From e1c05a99bb598740ea4d072a58d576b562408766 Mon Sep 17 00:00:00 2001 From: jm Date: Mon, 23 Feb 2026 11:29:02 +0100 Subject: [PATCH 22/23] better logs --- dspace_rest_client/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index ff9f0dd..8e238f2 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -496,15 +496,14 @@ def get_resourcepolicy(self, uuid, action='READ'): params['action'] = action r_json = self.fetch_resource(url, params=params) if r_json is None: - _logger.error(f"API call failed for resource UUID: {uuid}") return None if '_embedded' not in r_json: _logger.debug(f"No resource policies found for resource UUID: {uuid} [{url}]") return [] arr = r_json['_embedded'].get('resourcepolicies') or [] return [ResourcePolicy(x) for x in arr] - except ValueError: - _logger.error(f'Invalid resource UUID: {uuid}') + except ValueError as e: + _logger.error(f'Invalid resource UUID: {uuid} - {e}') return None def create_resourcepolicy( From 51b65fe7970348b6b6e78a1adf90b1f2601264ed Mon Sep 17 00:00:00 2001 From: jm Date: Tue, 24 Feb 2026 12:45:03 +0100 Subject: [PATCH 23/23] add timeout --- dspace_rest_client/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 8e238f2..b2d3a89 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -218,7 +218,7 @@ def api_get(self, url, params=None, data=None, headers=None): self.update_token(r) return r - def api_post(self, url, params, json, retry=False): + def api_post(self, url, params, json, retry=False, timeout=None): """ Perform a POST request. Refresh XSRF token if necessary. POSTs are typically used to create objects. @@ -230,7 +230,7 @@ def api_post(self, url, params, json, retry=False): """ self._last_err = None r = self.session.post(url, json=json, params=params, headers=self.request_headers, - proxies=self.proxies) + proxies=self.proxies, timeout=timeout) self.update_token(r) if r.status_code == 403: @@ -244,7 +244,7 @@ def api_post(self, url, params, json, retry=False): _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: _logger.debug("Retrying request with updated CSRF token") - return self.api_post(url, params=params, json=json, retry=True) + return self.api_post(url, params=params, json=json, retry=True, timeout=timeout) # we need to log in again, if there is login error. This is a bad # solution copied from the past @@ -260,7 +260,7 @@ def api_post(self, url, params, json, retry=False): self.authenticate() # Try to authenticate and repeat the request 3 times - # if it won't happen log error - return self.api_post(url, params=params, json=json, retry=False) + return self.api_post(url, params=params, json=json, retry=False, timeout=timeout) return r def api_post_uri(self, url, params, uri_list, retry=False):