diff --git a/console.py b/console.py index 795d6a7..150aaec 100644 --- a/console.py +++ b/console.py @@ -1,7 +1,5 @@ from dspace_rest_client.client import DSpaceClient -from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream import code -import logging import os # The DSpace client will look for the same environment variables but we can also look for them here explicitly @@ -22,7 +20,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') exit(1) code.interact(local=locals()) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index c651bff..b2d3a89 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -6,7 +6,7 @@ DSpace REST API client library. Intended to make interacting with DSpace in Python 3 easier, particularly when creating, updating, retrieving and deleting DSpace Objects. This client library is a work in progress and currently only implements the most basic functionality. -It was originally created to assist with a migration of container structure, items and bistreams from a non-DSpace +It was originally created to assist with a migration of container structure, items and bitstreams from a non-DSpace system to a new DSpace 7 repository. It needs a lot of expansion: resource policies and permissions, validation of prepared objects and responses, @@ -16,17 +16,18 @@ """ import json import logging +import os +from uuid import UUID import requests from requests import Request -import pysolr -import os -from uuid import UUID + from .models import * __all__ = ['DSpaceClient'] logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) +_logger = logging.getLogger("dspace.client") def parse_json(response): @@ -37,9 +38,13 @@ def parse_json(response): """ response_json = None try: - response_json = response.json() + if response is not None: + response_json = response.json() except ValueError as err: - logging.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + if response is not None: + _logger.error(f'Error parsing response JSON: {err}. Body text: {response.text}') + else: + _logger.error(f'Error parsing response JSON: {err}. Response is None') return response_json @@ -73,6 +78,8 @@ class DSpaceClient: if 'USER_AGENT' in os.environ: USER_AGENT = os.environ['USER_AGENT'] verbose = False + ITER_PAGE_SIZE = 20 + PROXY_DICT = dict(http=os.environ["PROXY_URL"],https=os.environ["PROXY_URL"]) if "PROXY_URL" in os.environ else dict() # Simple enum for patch operation types class PatchOperation: @@ -82,7 +89,7 @@ class PatchOperation: MOVE = 'move' def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWORD, solr_endpoint=SOLR_ENDPOINT, - solr_auth=SOLR_AUTH, fake_user_agent=False): + solr_auth=SOLR_AUTH, fake_user_agent=False, proxies=PROXY_DICT): """ Accept optional API endpoint, username, password arguments using the OS environment variables as defaults :param api_endpoint: base path to DSpace REST API, eg. http://localhost:8080/server/api @@ -95,7 +102,14 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.USERNAME = username self.PASSWORD = password self.SOLR_ENDPOINT = solr_endpoint - self.solr = pysolr.Solr(url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth) + self.proxies = proxies + self.solr = None + self._last_err = None + try: + import pysolr + self.solr = pysolr.Solr(url=solr_endpoint, always_commit=True, timeout=300, auth=solr_auth) + except Exception: + pass # If fake_user_agent was specified, use this string that is known (as of 2023-12-03) to succeed with # requests to Cloudfront-protected API endpoints (tested on demo.dspace.org) # Otherwise, the user agent will be the more helpful and accurate default of 'DSpace Python REST Client' @@ -109,6 +123,10 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.request_headers = {'Content-type': 'application/json', 'User-Agent': self.USER_AGENT} self.list_request_headers = {'Content-type': 'text/uri-list', 'User-Agent': self.USER_AGENT} + @property + def last_err(self): + return self._last_err + def authenticate(self, retry=False): """ Authenticate with the DSpace REST API. As with other operations, perform XSRF refreshes when necessary. @@ -118,7 +136,8 @@ def authenticate(self, retry=False): # Set headers for requests made during authentication # Get and update CSRF token r = self.session.post(self.LOGIN_URL, data={'user': self.USERNAME, 'password': self.PASSWORD}, - headers=self.auth_request_headers) + headers=self.auth_request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -127,16 +146,16 @@ def authenticate(self, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it if retry: - logging.error(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.error(f'Too many retries updating token: {r.status_code}: {r.text}') return False else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.authenticate(retry=True) if r.status_code == 401: # 401 Unauthorized # If we get a 401, this means a general authentication failure - logging.error(f'Authentication failure: invalid credentials for user {self.USERNAME}') + _logger.error(f'Authentication failure: invalid credentials for user {self.USERNAME}') return False # Update headers with new bearer token if present @@ -144,16 +163,36 @@ def authenticate(self, retry=False): self.session.headers.update({'Authorization': r.headers.get('Authorization')}) # Get and check authentication status - r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers) + r = self.session.get(f'{self.API_ENDPOINT}/authn/status', headers=self.request_headers, + proxies=self.proxies) if r.status_code == 200: r_json = parse_json(r) if 'authenticated' in r_json and r_json['authenticated'] is True: - logging.info(f'Authenticated successfully as {self.USERNAME}') + _logger.info(f'Authenticated successfully as {self.USERNAME}') return r_json['authenticated'] # Default, return false return False + def verify_response(self, r, id_str: str, as_json: bool = False): + """ + Verify response from API. If response is not 200, log error and return False. + """ + if r.status_code != 200: + _logger.error(f'Error response [{id_str}]: {r.status_code}: {r.text} ... [ {r.url} ]') + self._last_err = r + return False + + if as_json: + try: + r.json() + except ValueError: + _logger.error(f'Error parsing JSON response [{id_str}]: {r.text} ... [ {r.url} ]') + return False + + return True + + def refresh_token(self): """ If the DSPACE-XSRF-TOKEN appears, we need to update our local stored token and re-send our API request @@ -171,13 +210,15 @@ def api_get(self, url, params=None, data=None, headers=None): @param headers: any override headers (eg. with short-lived token for download) @return: Response from API """ + self._last_err = None if headers is None: headers = self.request_headers - r = self.session.get(url, params=params, data=data, headers=headers) + r = self.session.get(url, params=params, data=data, headers=headers, + proxies=self.proxies) self.update_token(r) return r - def api_post(self, url, params, json, retry=False): + def api_post(self, url, params, json, retry=False, timeout=None): """ Perform a POST request. Refresh XSRF token if necessary. POSTs are typically used to create objects. @@ -187,7 +228,9 @@ def api_post(self, url, params, json, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.post(url, json=json, params=params, headers=self.request_headers) + self._last_err = None + r = self.session.post(url, json=json, params=params, headers=self.request_headers, + proxies=self.proxies, timeout=timeout) self.update_token(r) if r.status_code == 403: @@ -198,11 +241,26 @@ def api_post(self, url, params, json, retry=False): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") - return self.api_post(url, params=params, json=json, retry=True) + _logger.debug("Retrying request with updated CSRF token") + return self.api_post(url, params=params, json=json, retry=True, timeout=timeout) + # we need to log in again, if there is login error. This is a bad + # solution copied from the past + elif r.status_code == 401: + r_json = parse_json(r) + if 'message' in r_json and 'Authentication is required' in r_json['message']: + if retry: + logging.error( + 'API Post: Already retried... something must be wrong') + else: + logging.debug("API Post: Retrying request with updated CSRF token") + # try to authenticate + self.authenticate() + # Try to authenticate and repeat the request 3 times - + # if it won't happen log error + return self.api_post(url, params=params, json=json, retry=False, timeout=timeout) return r def api_post_uri(self, url, params, uri_list, retry=False): @@ -215,7 +273,9 @@ def api_post_uri(self, url, params, uri_list, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers) + self._last_err = None + r = self.session.post(url, data=uri_list, params=params, headers=self.list_request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -226,9 +286,9 @@ def api_post_uri(self, url, params, uri_list, retry=False): r_json = r.json() if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True) return r @@ -243,7 +303,9 @@ def api_put(self, url, params, json, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.put(url, params=params, json=json, headers=self.request_headers) + self._last_err = None + r = self.session.put(url, params=params, json=json, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -251,14 +313,14 @@ def api_put(self, url, params, json, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) # Parse response r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_put(url, params=params, json=json, retry=True) return r @@ -272,7 +334,9 @@ def api_delete(self, url, params, retry=False): @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ - r = self.session.delete(url, params=params, headers=self.request_headers) + self._last_err = None + r = self.session.delete(url, params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -280,38 +344,40 @@ def api_delete(self, url, params, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) # Parse response r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_delete(url, params=params, retry=True) return r - def api_patch(self, url, operation, path, value, retry=False): + def api_patch(self, url, operation, path, value, params=None, retry=False): """ @param url: DSpace REST API URL @param operation: 'add', 'remove', 'replace', or 'move' (see PatchOperation enumeration) @param path: path to perform operation - eg, metadata, withdrawn, etc. @param value: new value for add or replace operations, or 'original' path for move operations + @param params: Optional parameters @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ + self._last_err = None if url is None: - logging.error(f'Missing required URL argument') + logging.error('Missing required URL argument') return None if path is None: - logging.error(f'Need valid path eg. /withdrawn or /metadata/dc.title/0/language') + logging.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language') return None if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE or operation == self.PatchOperation.MOVE) and value is None: # missing value required for add/replace/move operations - logging.error(f'Missing required "value" argument for add/replace/move operations') + logging.error('Missing required "value" argument for add/replace/move operations') return None # compile patch data @@ -327,7 +393,8 @@ def api_patch(self, url, operation, path, value, retry=False): # set headers # perform patch request - r = self.session.patch(url, json=[data], headers=self.request_headers) + r = self.session.patch(url, json=[data], params=params, headers=self.request_headers, + proxies=self.proxies) self.update_token(r) if r.status_code == 403: @@ -335,23 +402,23 @@ def api_patch(self, url, operation, path, value, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") - return self.api_patch(url, operation, path, value, True) + _logger.debug("Retrying request with updated CSRF token") + return self.api_patch(url, operation, path, value, params, True) elif r.status_code == 200: # 200 Success - logging.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') + _logger.info(f'successful patch update to {r.json()["type"]} {r.json()["id"]}') # Return the raw API response return r # PAGINATION - def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, sort=None, dso_type=None): + def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, sort=None, dso_type=None, details=None): """ Do a basic search with optional query, filters and dsoType params. @param query: query string @@ -386,13 +453,15 @@ def search_objects(self, query=None, scope=None, filters=None, page=0, size=20, # instead lots of 'does this key exist, etc etc' checks, just go for it and wrap in a try? try: + if details is not None: + details["page"] = r_json['_embedded']['searchResult']['page'] results = r_json['_embedded']['searchResult']['_embedded']['objects'] for result in results: resource = result['_embedded']['indexableObject'] dso = DSpaceObject(resource) dsos.append(dso) except (TypeError, ValueError) as err: - logging.error(f'error parsing search result json {err}') + _logger.error(f'error parsing search result json {err}') return dsos @@ -406,11 +475,82 @@ def fetch_resource(self, url, params=None): """ r = self.api_get(url, params, None) if r.status_code != 200: - logging.error(f'Error encountered fetching resource: {r.text}') + _logger.error(f'Error encountered fetching resource: {r.text}') return None # ValueError / JSON handling moved to static method return parse_json(r) + def get_resourcepolicy(self, uuid, action='READ'): + """ + Fetch resource policies for a given resource UUID and action. + @param uuid: resource UUID to search for + @param action: action name to filter by (default: READ) + @return: Parsed JSON response from fetch_resource or None if error + """ + try: + # Validate UUID + id = UUID(uuid).version + url = f'{self.API_ENDPOINT}/authz/resourcepolicies/search/resource' + params = {'uuid': uuid} + if action is not None: + params['action'] = action + r_json = self.fetch_resource(url, params=params) + if r_json is None: + return None + if '_embedded' not in r_json: + _logger.debug(f"No resource policies found for resource UUID: {uuid} [{url}]") + return [] + arr = r_json['_embedded'].get('resourcepolicies') or [] + return [ResourcePolicy(x) for x in arr] + except ValueError as e: + _logger.error(f'Invalid resource UUID: {uuid} - {e}') + return None + + def create_resourcepolicy( + self, resource_uuid, group_uuid, action='READ', + start_date=None, end_date=None, + ): + """ + Create a new resource policy for a given DSpace resource. + Uses POST /api/authz/resourcepolicies?resource=&group= + @param resource_uuid: UUID of the target bitstream (or other resource) + @param group_uuid: UUID of the group to grant access to + @param action: action name (default: READ) + @param start_date: optional start date string (ISO 8601, YYYY-MM-DD) + @param end_date: optional end date string (ISO 8601, YYYY-MM-DD) + @return: ResourcePolicy on success, None on failure + """ + try: + UUID(resource_uuid) + UUID(group_uuid) + except ValueError: + _logger.error(f'Invalid UUID: resource={resource_uuid}, group={group_uuid}') + return None + + url = f'{self.API_ENDPOINT}/authz/resourcepolicies' + params = { + 'resource': resource_uuid, + 'group': group_uuid, + } + data = { + 'action': action, + 'type': 'resourcepolicy', + } + if start_date is not None: + data['startDate'] = start_date + if end_date is not None: + data['endDate'] = end_date + + r = self.api_post(url, params=params, json=data) + if r.status_code in (200, 201): + rp = ResourcePolicy(parse_json(r)) + _logger.info(f'Created resource policy id={rp.id} for resource {resource_uuid}') + return rp + + _logger.error( + f'Failed to create resource policy: {r.status_code}: {r.text}') + return None + def get_dso(self, url, uuid): """ Base 'get DSpace Object' function. @@ -425,7 +565,7 @@ def get_dso(self, url, uuid): url = f'{url}/{uuid}' return self.api_get(url, None, None) except ValueError: - logging.error(f'Invalid DSO UUID: {uuid}') + _logger.error(f'Invalid DSO UUID: {uuid}') return None def create_dso(self, url, params, data): @@ -442,9 +582,9 @@ def create_dso(self, url, params, data): if r.status_code == 201: # 201 Created - success! new_dso = parse_json(r) - logging.info(f'{new_dso["type"]} {new_dso["uuid"]} created successfully!') + _logger.info(f'Object type[{new_dso["type"]}] uuid:[{new_dso["uuid"]}] created successfully!') else: - logging.error(f'create operation failed: {r.status_code}: {r.text} ({url})') + _logger.error(f'create operation failed: {r.status_code}: {r.text} ({url})') return r def update_dso(self, dso, params=None): @@ -460,8 +600,8 @@ def update_dso(self, dso, params=None): return None dso_type = type(dso) if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community) ' - f'are supported by generic update_dso PUT.') + logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) ' + 'are supported by generic update_dso PUT.') return dso try: # Get self URI from HAL links @@ -485,14 +625,14 @@ def update_dso(self, dso, params=None): if r.status_code == 200: # 200 OK - success! updated_dso = dso_type(parse_json(r)) - logging.info(f'{updated_dso.type} {updated_dso.uuid} updated sucessfully!') + _logger.debug(f'{updated_dso.type} {updated_dso.uuid} updated successfully!') return updated_dso else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + _logger.error(f'update operation failed: {r.status_code}: {r.text} ({url})') return None except ValueError as e: - logging.error("Error parsing DSO response", exc_info=True) + _logger.error("Error parsing DSO response", exc_info=True) return None def delete_dso(self, dso=None, url=None, params=None): @@ -507,12 +647,12 @@ def delete_dso(self, dso=None, url=None, params=None): """ if dso is None: if url is None: - logging.error(f'Need a DSO or a URL to delete') + logging.error('Need a DSO or a URL to delete') return None else: if not isinstance(dso, SimpleDSpaceObject): - logging.error(f'Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' - f'are supported by generic update_dso PUT.') + logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' + 'are supported by generic update_dso PUT.') return dso # Get self URI from HAL links url = dso.links['self']['href'] @@ -521,13 +661,13 @@ def delete_dso(self, dso=None, url=None, params=None): r = self.api_delete(url, params=params) if r.status_code == 204: # 204 No Content - success! - logging.info(f'{url} was deleted sucessfully!') + _logger.info(f'{url} was deleted successfully!') return r else: - logging.error(f'update operation failed: {r.status_code}: {r.text} ({url})') + _logger.error(f'update operation failed: {r.status_code}: {r.text} ({url})') return None except ValueError as e: - logging.error(f'Error deleting DSO {dso.uuid}: {e}') + _logger.error(f'Error deleting DSO {dso.uuid}: {e}') return None # PAGINATION @@ -566,7 +706,7 @@ def get_bundles(self, parent=None, uuid=None, page=0, size=20, sort=None): for resource in resources: bundles.append(Bundle(resource)) except ValueError as err: - logging.error(f'error parsing bundle results: {err}') + _logger.error(f'error parsing bundle results: {err}') return bundles @@ -603,7 +743,7 @@ def get_bitstreams(self, uuid=None, bundle=None, page=0, size=20, sort=None): url = bundle.links['bitstreams']['href'] else: url = f'{self.API_ENDPOINT}/core/bundles/{bundle.uuid}/bitstreams' - logging.warning(f'Cannot find bundle bitstream links, will try to construct manually: {url}') + _logger.info(f'Cannot find bundle bitstream links, will try to construct manually: {url}') # Perform the actual request. By now, our URL and parameter should be properly set params = {} if size is not None: @@ -651,26 +791,25 @@ def create_bitstream(self, bundle=None, name=None, path=None, mime=None, metadat h.update({'Content-Encoding': 'gzip', 'User-Agent': self.USER_AGENT}) req = Request('POST', url, data=payload, headers=h, files=files) prepared_req = self.session.prepare_request(req) - r = self.session.send(prepared_req) + r = self.session.send(prepared_req, proxies=self.proxies) if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug('Updating token to ' + t) + _logger.debug('Updating token to ' + t) self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) - if r.status_code == 403: + if not retry and r.status_code in (401, 403): r_json = parse_json(r) if 'message' in r_json and 'CSRF token' in r_json['message']: - if retry: - logging.error('Already retried... something must be wrong') - else: - logging.debug("Retrying request with updated CSRF token") - return self.create_bitstream(bundle, name, path, mime, metadata, True) + _logger.debug("Retrying request with updated CSRF token") + else: + self.authenticate() + return self.create_bitstream(bundle, name, path, mime, metadata, True) if r.status_code == 201 or r.status_code == 200: # Success return Bitstream(api_resource=parse_json(r)) else: - logging.error(f'Error creating bitstream: {r.status_code}: {r.text}') + _logger.error(f'Error creating bitstream: {r.status_code}: {r.text}') return None def download_bitstream(self, uuid=None): @@ -711,14 +850,14 @@ def get_communities(self, uuid=None, page=0, size=20, sort=None, top=False): url = f'{url}/{uuid}' params = None except ValueError: - logging.error(f'Invalid community UUID: {uuid}') + _logger.error(f'Invalid community UUID: {uuid}') return None if top: # Set new URL url = f'{url}/search/top' - logging.debug(f'Performing get on {url}') + _logger.debug(f'Performing get on {url}') # Perform actual get r_json = self.fetch_resource(url, params) # Empty list @@ -774,7 +913,7 @@ def get_collections(self, uuid=None, community=None, page=0, size=20, sort=None) url = f'{url}/{uuid}' params = None except ValueError: - logging.error(f'Invalid collection UUID: {uuid}') + _logger.error(f'Invalid collection UUID: {uuid}') return None if community is not None: @@ -819,41 +958,54 @@ def get_item(self, uuid): @param uuid: the UUID of the item @return: the raw API response """ - # TODO - return constructed Item object instead, handling errors here? url = f'{self.API_ENDPOINT}/core/items' try: id = UUID(uuid).version url = f'{url}/{uuid}' - return self.api_get(url, None, None) + r = self.api_get(url, None, None) + r_json = parse_json(response=r) + return Item(r_json) except ValueError: - logging.error(f'Invalid item UUID: {uuid}') + _logger.error(f'Invalid item UUID: {uuid}') return None - def get_items(self): + def get_items(self, page=0, size=20): """ Get all archived items for a logged-in administrator. Admin only! Usually you will want to use search or browse methods instead of this method @return: A list of items, or an error """ url = f'{self.API_ENDPOINT}/core/items' - # Empty item list - items = list() - # Perform the actual request - r_json = self.fetch_resource(url) - # Empty list items = list() + params = {} + if size is not None: + params['size'] = size + if page is not None: + params['page'] = page + r = self.api_get(url, params=params) + r_json = parse_json(response=r) if '_embedded' in r_json: - # This is a list of items - if 'collections' in r_json['_embedded']: + if 'items' in r_json['_embedded']: for item_resource in r_json['_embedded']['items']: items.append(Item(item_resource)) elif 'uuid' in r_json: - # This is a single item items.append(Item(r_json)) - - # Return list (populated or empty) return items + def get_owningCollection(self, item_uuid): + """ + Get owningCollection + """ + url = f'{self.API_ENDPOINT}/core/items/{item_uuid}/owningCollection' + try: + r = self.api_get(url, None, None) + self.verify_response(r, f"item:{item_uuid}", True) + r_json = parse_json(response=r) + return Collection(r_json) + except ValueError: + _logger.error(f'Invalid owningCollection for UUID: {item_uuid}') + return None + def create_item(self, parent, item): """ Create an item beneath the given parent collection @@ -863,11 +1015,11 @@ def create_item(self, parent, item): """ url = f'{self.API_ENDPOINT}/core/items' if parent is None: - logging.error('Need a parent UUID!') + _logger.error('Need a parent UUID!') return None params = {'owningCollection': parent} if not isinstance(item, Item): - logging.error('Need a valid item') + _logger.error('Need a valid item') return None return Item(api_resource=parse_json(self.create_dso(url, params=params, data=item.as_dict()))) @@ -879,7 +1031,7 @@ def update_item(self, item): @return: """ if not isinstance(item, Item): - logging.error('Need a valid item') + _logger.error('Need a valid item') return None return self.update_dso(item, params=None) @@ -897,7 +1049,7 @@ def add_metadata(self, dso, field, value, language=None, authority=None, confide """ if dso is None or field is None or value is None or not isinstance(dso, DSpaceObject): # TODO: separate these tests, and add better error handling - logging.error('Invalid or missing DSpace object, field or value string') + _logger.error('Invalid or missing DSpace object, field or value string') return self dso_type = type(dso) @@ -918,6 +1070,24 @@ def add_metadata(self, dso, field, value, language=None, authority=None, confide return dso_type(api_resource=parse_json(r)) + def remove_metadata(self, dso, field): + """ + Remove metadata + """ + if dso is None or field is None or not isinstance(dso, DSpaceObject): + _logger.error('Invalid or missing DSpace object, field or value string') + return self + + dso_type = type(dso) + + # Place can be 0+ integer, or a hyphen - meaning "last" + path = f'/metadata/{field}' + url = dso.links['self']['href'] + + r = self.api_patch(url=url, operation=self.PatchOperation.REMOVE, path=path, value=None) + return dso_type(api_resource=parse_json(r)) + + def create_user(self, user, token=None): """ Create a user @@ -938,7 +1108,7 @@ def create_user(self, user, token=None): def delete_user(self, user): if not isinstance(user, User): - logging.error(f'Must be a valid user') + logging.error('Must be a valid user') return None return self.delete_dso(user) @@ -978,7 +1148,7 @@ def create_group(self, group): def start_workflow(self, workspace_item): url = f'{self.API_ENDPOINT}/workflow/workflowitems' res = parse_json(self.api_post_uri(url, params=None, uri_list=workspace_item)) - logging.debug(res) + _logger.debug(res) # TODO: WIP def update_token(self, r): @@ -990,11 +1160,11 @@ def update_token(self, r): :return: """ if not self.session: - logging.debug('Session state not found, setting...') + _logger.debug('Session state not found, setting...') self.session = requests.Session() if 'DSPACE-XSRF-TOKEN' in r.headers: t = r.headers['DSPACE-XSRF-TOKEN'] - logging.debug(f'Updating XSRF token to {t}') + _logger.debug(f'Updating XSRF token to {t}') # Update headers and cookies self.session.headers.update({'X-XSRF-Token': t}) self.session.cookies.update({'X-XSRF-Token': t}) @@ -1005,7 +1175,7 @@ def get_short_lived_token(self): @return: short lived Authorization token """ if not self.session: - logging.debug('Session state not found, setting...') + _logger.debug('Session state not found, setting...') self.session = requests.Session() url = f'{self.API_ENDPOINT}/authn/shortlivedtokens' @@ -1014,7 +1184,7 @@ def get_short_lived_token(self): if r_json is not None and 'token' in r_json: return r_json['token'] - logging.error('Could not retrieve short-lived token') + _logger.error('Could not retrieve short-lived token') return None def solr_query(self, query, filters=None, fields=None, start=0, rows=999999999): diff --git a/dspace_rest_client/models.py b/dspace_rest_client/models.py index de463ac..0f64357 100644 --- a/dspace_rest_client/models.py +++ b/dspace_rest_client/models.py @@ -9,17 +9,11 @@ @author Kim Shepherd """ -import code import json -import logging -import requests -from requests import Request -import os -from uuid import UUID __all__ = ['DSpaceObject', 'HALResource', 'ExternalDataObject', 'SimpleDSpaceObject', 'Community', - 'Collection', 'Item', 'Bundle', 'Bitstream', 'User', 'Group'] + 'Collection', 'Item', 'Bundle', 'Bitstream', 'User', 'Group', 'ResourcePolicy'] class HALResource: @@ -34,13 +28,19 @@ def __init__(self, api_resource=None): Default constructor @param api_resource: optional API resource (JSON) from a GET response or successful POST can populate instance """ + self._from_d = None if api_resource is not None: + self._from_d = api_resource if 'type' in api_resource: self.type = api_resource['type'] if '_links' in api_resource: self.links = api_resource['_links'].copy() else: self.links = {'self': {'href': None}} + if '_embedded' in api_resource: + self.embedded = api_resource['_embedded'].copy() + else: + self.embedded = {} class AddressableHALResource(HALResource): id = None @@ -141,6 +141,10 @@ def __init__(self, api_resource=None, dso=None): if '_links' in api_resource: self.links = api_resource['_links'].copy() + @property + def resourcePolicies(self): + return (self._from_d or {}).get('resourcePolicies') + def add_metadata(self, field, value, language=None, authority=None, confidence=-1, place=None): """ Add metadata to a DSO. This is performed on the local object only, it is not an API operation (see patch) @@ -421,12 +425,12 @@ class User(SimpleDSpaceObject): Extends DSpaceObject to implement specific attributes and methods for users (aka. EPersons) """ type = 'user' - name = None, - netid = None, - lastActive = None, - canLogIn = False, - email = None, - requireCertificate = False, + name = None + netid = None + lastActive = None + canLogIn = False + email = None + requireCertificate = False selfRegistered = False def __init__(self, api_resource=None): @@ -473,11 +477,11 @@ def __init__(self, api_resource): if 'lastModified' in api_resource: self.lastModified = api_resource['lastModified'] if 'step' in api_resource: - self.step = api_resource['lastModified'] + self.step = api_resource['step'] if 'sections' in api_resource: self.sections = api_resource['sections'].copy() if 'type' in api_resource: - self.lastModified = api_resource['lastModified'] + self.type = api_resource['type'] def as_dict(self): parent_dict = super(InProgressSubmission, self).as_dict() @@ -508,7 +512,7 @@ def __init__(self, api_resource): if 'label' in api_resource: self.label = api_resource['label'] if 'type' in api_resource: - self.label = api_resource['type'] + self.type = api_resource['type'] class RelationshipType(AddressableHALResource): """ @@ -518,3 +522,41 @@ def __init__(self, api_resource): super(RelationshipType, self).__init__(api_resource) +class ResourcePolicy(AddressableHALResource): + """ + DQ specific. Extends Addressable HAL Resource to model a resource policy. + """ + def __init__(self, api_resource: dict): + super(ResourcePolicy, self).__init__(api_resource) + api_resource = api_resource or {} + self.name = api_resource.get('name') + self.description = api_resource.get('description') + self.startDate = api_resource.get('startDate') + self.endDate = api_resource.get('endDate') + self.type = api_resource.get('type') + self.action = api_resource.get('action') + self.policyType = api_resource.get('policyType') + # Check for direct groupName/groupUUID (cached format from as_dict()) + self.groupName = api_resource.get('groupName') + self.groupUUID = api_resource.get('groupUUID') + # If not found, try extracting from _embedded structure (live API format) + if self.groupName is None and '_embedded' in api_resource: + if 'group' in api_resource['_embedded']: + self.groupName = api_resource['_embedded']['group'].get('name') + self.groupUUID = api_resource['_embedded']['group'].get('uuid') + def as_dict(self): + return { + 'id': self.id, + 'name': self.name, + 'type': self.type, + 'description': self.description, + 'startDate': self.startDate, + 'endDate': self.endDate, + 'action': self.action, + 'policyType': self.policyType, + 'groupName': self.groupName, + 'groupUUID': self.groupUUID, + } + + def __repr__(self): + return f"ResourcePolicy: {self.name} [{self.groupName}] [action: {self.action}] [type: {self.type}]" \ No newline at end of file diff --git a/example.py b/example.py index 461078f..c526c07 100644 --- a/example.py +++ b/example.py @@ -31,7 +31,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') exit(1) # Put together some basic Community data. @@ -58,7 +58,7 @@ if isinstance(new_community, Community) and new_community.uuid is not None: print(f'New community created! Handle: {new_community.handle}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Update the community metadata @@ -93,7 +93,7 @@ if isinstance(new_collection, Collection) and new_collection.uuid is not None: print(f'New collection created! Handle: {new_collection.handle}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Put together some basic Item data. @@ -146,7 +146,7 @@ if isinstance(new_item, Item) and new_item.uuid is not None: print(f'New item created! Handle: {new_item.handle}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Add a single metadata field+value to the item (PATCH operation) @@ -159,7 +159,7 @@ if isinstance(new_bundle, Bundle) and new_bundle.uuid is not None: print(f'New bundle created! UUID: {new_bundle.uuid}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) # Create and upload a new bitstream using the LICENSE.txt file in this project @@ -181,10 +181,10 @@ if isinstance(new_bitstream, Bitstream) and new_bitstream.uuid is not None: print(f'New bitstream created! UUID: {new_bitstream.uuid}') else: - print(f'Error! Giving up.') + print('Error! Giving up.') exit(1) -print(f'All finished with example data creation. Visit your test repository to review created objects') +print('All finished with example data creation. Visit your test repository to review created objects') # Retrieving objects - now that we know there is some data in the repository we can demonstrate # some simple ways of retrieving and iterating DSOs diff --git a/example_gets.py b/example_gets.py index a6a6c77..fdd23fc 100644 --- a/example_gets.py +++ b/example_gets.py @@ -7,7 +7,6 @@ """ from dspace_rest_client.client import DSpaceClient -from dspace_rest_client.models import Community, Collection, Item, Bundle, Bitstream # Example variables needed for authentication and basic API requests # SET THESE TO MATCH YOUR TEST SYSTEM BEFORE RUNNING THE EXAMPLE SCRIPT @@ -30,7 +29,7 @@ # Authenticate against the DSpace client authenticated = d.authenticate() if not authenticated: - print(f'Error logging in! Giving up.') + print('Error logging in! Giving up.') exit(1) # Retrieving objects - now that we know there is some data in the repository we can demonstrate