From df3202ee7a1c341152aa307385af7a29a8305ac6 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Thu, 3 Jul 2025 09:46:25 +0200 Subject: [PATCH 1/7] Added post method to pass data as text/plain content type --- dspace_rest_client/client.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 4bb14fa..6149896 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -113,6 +113,7 @@ def __init__(self, api_endpoint=API_ENDPOINT, username=USERNAME, password=PASSWO self.auth_request_headers = {'User-Agent': self.USER_AGENT} self.request_headers = {'Content-type': 'application/json', 'User-Agent': self.USER_AGENT} self.list_request_headers = {'Content-type': 'text/uri-list', 'User-Agent': self.USER_AGENT} + self.text_plain_request_headers = {'Content-type': 'text/plain', 'User-Agent': self.USER_AGENT} def authenticate(self, retry=False): """ @@ -253,6 +254,35 @@ def api_post_uri(self, url, params, uri_list, retry=False): return r + def api_post_text(self, url, params, data, retry=False): + """ + Perform a POST request. Refresh XSRF token if necessary. + POSTs are typically used to create objects. + @param data: Data in text/plain format to send as POST body + @param url: DSpace REST API URL + @param params: Any parameters to include (eg ?parent=abbc-....) + @param uri_list: One or more URIs referencing objects + @param retry: Has this method already been retried? Used if we need to refresh XSRF. + @return: Response from API + """ + r = self.session.post(url, data=data, params=params, headers=self.text_plain_request_headers) + self.update_token(r) + + if r.status_code == 403: + # 403 Forbidden + # If we had a CSRF failure, retry the request with the updated token + # After speaking in #dev it seems that these do need occasional refreshes but I suspect + # it's happening too often for me, so check for accidentally triggering it + r_json = r.json() + if 'message' in r_json and 'CSRF token' in r_json['message']: + if retry: + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + else: + _logger.debug("Retrying request with updated CSRF token") + return self.api_post_text(url, params=params, data=data, retry=True) + + return r + def api_put(self, url, params, json, retry=False): """ Perform a PUT request. Refresh XSRF token if necessary. From d1d78d417516ed998cc4b5c9bc49c9cc87f0f60e Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Thu, 3 Jul 2025 09:48:34 +0200 Subject: [PATCH 2/7] Updated doc --- dspace_rest_client/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 6149896..8cd0f71 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -261,7 +261,6 @@ def api_post_text(self, url, params, data, retry=False): @param data: Data in text/plain format to send as POST body @param url: DSpace REST API URL @param params: Any parameters to include (eg ?parent=abbc-....) - @param uri_list: One or more URIs referencing objects @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ From 26535b958153a71b3b9fec7ae655fe40f52ecff5 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 4 Jul 2025 15:15:34 +0200 Subject: [PATCH 3/7] Retry the request with authentication when the user is logged out --- dspace_rest_client/client.py | 71 +++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 8cd0f71..3c7ea4a 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -168,18 +168,49 @@ def refresh_token(self): r = self.api_post(self.LOGIN_URL, None, None) self.update_token(r) - def api_get(self, url, params=None, data=None, headers=None): + def api_get(self, url, params=None, data=None, headers=None, retry=False): """ Perform a GET request. Refresh XSRF token if necessary. @param url: DSpace REST API URL @param params: any parameters to include (eg ?page=0) @param data: any data to supply (typically not relevant for GET) @param headers: any override headers (eg. with short-lived token for download) + @param retry: Has this method already been retried? Used if we need to refresh XSRF. @return: Response from API """ if headers is None: headers = self.request_headers r = self.session.get(url, params=params, data=data, headers=headers) + + if r.status_code == 403: + # 403 Forbidden + # If we had a CSRF failure, retry the request with the updated token + # After speaking in #dev it seems that these do need occasional refreshes but I suspect + # it's happening too often for me, so check for accidentally triggering it + r_json = parse_json(r) + if 'message' in r_json and 'CSRF token' in r_json['message']: + if retry: + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + else: + _logger.debug("Retrying request with updated CSRF token") + return self.api_get(url, params=params, data=data, headers=headers, retry=True) + + # we need to log in again, if there is login error. This is a bad + # solution copied from the past + elif r.status_code == 401: + r_json = parse_json(r) + if 'message' in r_json and 'Authentication is required' in r_json['message']: + if retry: + logging.error( + 'API Post: Already retried... something must be wrong') + else: + logging.debug("API Post: Retrying request with updated CSRF token") + # try to authenticate + self.authenticate() + # Try to authenticate and repeat the request 3 times - + # if it won't happen log error + return self.api_get(url, params=params, data=data, headers=headers, retry=True) + self.update_token(r) return r @@ -252,33 +283,21 @@ def api_post_uri(self, url, params, uri_list, retry=False): _logger.debug("Retrying request with updated CSRF token") return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True) - return r - - def api_post_text(self, url, params, data, retry=False): - """ - Perform a POST request. Refresh XSRF token if necessary. - POSTs are typically used to create objects. - @param data: Data in text/plain format to send as POST body - @param url: DSpace REST API URL - @param params: Any parameters to include (eg ?parent=abbc-....) - @param retry: Has this method already been retried? Used if we need to refresh XSRF. - @return: Response from API - """ - r = self.session.post(url, data=data, params=params, headers=self.text_plain_request_headers) - self.update_token(r) - - if r.status_code == 403: - # 403 Forbidden - # If we had a CSRF failure, retry the request with the updated token - # After speaking in #dev it seems that these do need occasional refreshes but I suspect - # it's happening too often for me, so check for accidentally triggering it - r_json = r.json() - if 'message' in r_json and 'CSRF token' in r_json['message']: + # we need to log in again, if there is login error. This is a bad + # solution copied from the past + elif r.status_code == 401: + r_json = parse_json(r) + if 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: - _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + logging.error( + 'API Post: Already retried... something must be wrong') else: - _logger.debug("Retrying request with updated CSRF token") - return self.api_post_text(url, params=params, data=data, retry=True) + logging.debug("API Post: Retrying request with updated CSRF token") + # try to authenticate + self.authenticate() + # Try to authenticate and repeat the request 3 times - + # if it won't happen log error + return self.api_post_uri(url, params=params, uri_list=uri_list, retry=True) return r From a47ca2936a0eef93456779b7c11b7e8e20a0be04 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 4 Jul 2025 15:35:48 +0200 Subject: [PATCH 4/7] Add a null check after parse_json(r) before accessing r_json['message'], since parse_json may return null and cause a TypeError. --- dspace_rest_client/client.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 3c7ea4a..61a1cfa 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -199,7 +199,7 @@ def api_get(self, url, params=None, data=None, headers=None, retry=False): # solution copied from the past elif r.status_code == 401: r_json = parse_json(r) - if 'message' in r_json and 'Authentication is required' in r_json['message']: + if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: logging.error( 'API Post: Already retried... something must be wrong') @@ -244,7 +244,7 @@ def api_post(self, url, params, json, retry=False): # solution copied from the past elif r.status_code == 401: r_json = parse_json(r) - if 'message' in r_json and 'Authentication is required' in r_json['message']: + if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: logging.error( 'API Post: Already retried... something must be wrong') @@ -276,7 +276,7 @@ def api_post_uri(self, url, params, uri_list, retry=False): # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it r_json = r.json() - if 'message' in r_json and 'CSRF token' in r_json['message']: + if r_json and 'message' in r_json and 'CSRF token' in r_json['message']: if retry: _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: @@ -322,7 +322,7 @@ def api_put(self, url, params, json, retry=False): _logger.debug(r.text) # Parse response r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if r_json and 'message' in r_json and 'CSRF token' in r_json['message']: if retry: _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: @@ -351,7 +351,7 @@ def api_put_uri(self, url, params, uri_list, retry=False): logging.debug(r.text) # Parse response r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if r_json and 'message' in r_json and 'CSRF token' in r_json['message']: if retry: logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: @@ -380,7 +380,7 @@ def api_delete(self, url, params, retry=False): _logger.debug(r.text) # Parse response r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if r_json and 'message' in r_json and 'CSRF token' in r_json['message']: if retry: _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: @@ -434,7 +434,7 @@ def api_patch(self, url, operation, path, value, retry=False): # it's happening too often for me, so check for accidentally triggering it _logger.debug(r.text) r_json = parse_json(r) - if 'message' in r_json and 'CSRF token' in r_json['message']: + if r_json and 'message' in r_json and 'CSRF token' in r_json['message']: if retry: _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: From fe4e8ad9e64b94483b1232bfb08308717b941592 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 4 Jul 2025 15:37:11 +0200 Subject: [PATCH 5/7] Call self.update_token(r) before retrying the request to ensure the refreshed CSRF token is applied. --- dspace_rest_client/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 61a1cfa..1ad145c 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -181,7 +181,8 @@ def api_get(self, url, params=None, data=None, headers=None, retry=False): if headers is None: headers = self.request_headers r = self.session.get(url, params=params, data=data, headers=headers) - + self.update_token(r) + if r.status_code == 403: # 403 Forbidden # If we had a CSRF failure, retry the request with the updated token @@ -211,7 +212,6 @@ def api_get(self, url, params=None, data=None, headers=None, retry=False): # if it won't happen log error return self.api_get(url, params=params, data=data, headers=headers, retry=True) - self.update_token(r) return r def api_post(self, url, params, json, retry=False): From c2d9cff4e8f36d57f9e4618622902ff6a9e66a13 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 4 Jul 2025 15:39:02 +0200 Subject: [PATCH 6/7] =?UTF-8?q?Use=20the=20module=E2=80=99s=20=5Flogger=20?= =?UTF-8?q?instead=20of=20the=20root=20logging=20to=20maintain=20consisten?= =?UTF-8?q?t=20logging=20configuration.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dspace_rest_client/client.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 1ad145c..5647f25 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -182,7 +182,7 @@ def api_get(self, url, params=None, data=None, headers=None, retry=False): headers = self.request_headers r = self.session.get(url, params=params, data=data, headers=headers) self.update_token(r) - + if r.status_code == 403: # 403 Forbidden # If we had a CSRF failure, retry the request with the updated token @@ -202,10 +202,10 @@ def api_get(self, url, params=None, data=None, headers=None, retry=False): r_json = parse_json(r) if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: - logging.error( + _logger.error( 'API Post: Already retried... something must be wrong') else: - logging.debug("API Post: Retrying request with updated CSRF token") + _logger.debug("API Post: Retrying request with updated CSRF token") # try to authenticate self.authenticate() # Try to authenticate and repeat the request 3 times - @@ -246,10 +246,10 @@ def api_post(self, url, params, json, retry=False): r_json = parse_json(r) if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: - logging.error( + _logger.error( 'API Post: Already retried... something must be wrong') else: - logging.debug("API Post: Retrying request with updated CSRF token") + _logger.debug("API Post: Retrying request with updated CSRF token") # try to authenticate self.authenticate() # Try to authenticate and repeat the request 3 times - @@ -289,10 +289,10 @@ def api_post_uri(self, url, params, uri_list, retry=False): r_json = parse_json(r) if 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: - logging.error( + _logger.error( 'API Post: Already retried... something must be wrong') else: - logging.debug("API Post: Retrying request with updated CSRF token") + _logger.debug("API Post: Retrying request with updated CSRF token") # try to authenticate self.authenticate() # Try to authenticate and repeat the request 3 times - @@ -348,14 +348,14 @@ def api_put_uri(self, url, params, uri_list, retry=False): # If we had a CSRF failure, retry the request with the updated token # After speaking in #dev it seems that these do need occasional refreshes but I suspect # it's happening too often for me, so check for accidentally triggering it - logging.debug(r.text) + _logger.debug(r.text) # Parse response r_json = parse_json(r) if r_json and 'message' in r_json and 'CSRF token' in r_json['message']: if retry: - logging.warning(f'Too many retries updating token: {r.status_code}: {r.text}') + _logger.warning(f'Too many retries updating token: {r.status_code}: {r.text}') else: - logging.debug("Retrying request with updated CSRF token") + _logger.debug("Retrying request with updated CSRF token") return self.api_put_uri(url, params=params, uri_list=uri_list, retry=True) return r @@ -400,15 +400,15 @@ def api_patch(self, url, operation, path, value, retry=False): @see https://github.com/DSpace/RestContract/blob/main/metadata-patch.md """ if url is None: - logging.error('Missing required URL argument') + _logger.error('Missing required URL argument') return None if path is None: - logging.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language') + _logger.error('Need valid path eg. /withdrawn or /metadata/dc.title/0/language') return None if (operation == self.PatchOperation.ADD or operation == self.PatchOperation.REPLACE or operation == self.PatchOperation.MOVE) and value is None: # missing value required for add/replace/move operations - logging.error('Missing required "value" argument for add/replace/move operations') + _logger.error('Missing required "value" argument for add/replace/move operations') return None # compile patch data @@ -557,7 +557,7 @@ def update_dso(self, dso, params=None): return None dso_type = type(dso) if not isinstance(dso, SimpleDSpaceObject): - logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) ' + _logger.error('Only SimpleDSpaceObject types (eg Item, Collection, Community) ' 'are supported by generic update_dso PUT.') return dso try: @@ -604,11 +604,11 @@ def delete_dso(self, dso=None, url=None, params=None): """ if dso is None: if url is None: - logging.error('Need a DSO or a URL to delete') + _logger.error('Need a DSO or a URL to delete') return None else: if not isinstance(dso, SimpleDSpaceObject): - logging.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' + _logger.error('Only SimpleDSpaceObject types (eg Item, Collection, Community, EPerson) ' 'are supported by generic update_dso PUT.') return dso # Get self URI from HAL links @@ -1036,7 +1036,7 @@ def remove_metadata(self, dso, field, place): """ if dso is None or field is None or place is None or not isinstance(dso, DSpaceObject): # TODO: separate these tests, and add better error handling - logging.error('Invalid or missing DSpace object, field or value string') + _logger.error('Invalid or missing DSpace object, field or value string') return self dso_type = type(dso) @@ -1070,7 +1070,7 @@ def create_user(self, user, token=None): def delete_user(self, user): if not isinstance(user, User): - logging.error('Must be a valid user') + _logger.error('Must be a valid user') return None return self.delete_dso(user) From b477570a92aa67d4d3f14a18c7d3fa086f8afb0f Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Fri, 4 Jul 2025 15:39:52 +0200 Subject: [PATCH 7/7] The log message incorrectly references API Post in the GET method; update it to API Get for clarity. --- dspace_rest_client/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dspace_rest_client/client.py b/dspace_rest_client/client.py index 5647f25..3922e2d 100644 --- a/dspace_rest_client/client.py +++ b/dspace_rest_client/client.py @@ -203,9 +203,9 @@ def api_get(self, url, params=None, data=None, headers=None, retry=False): if r_json and 'message' in r_json and 'Authentication is required' in r_json['message']: if retry: _logger.error( - 'API Post: Already retried... something must be wrong') + 'API Get: Already retried... something must be wrong') else: - _logger.debug("API Post: Retrying request with updated CSRF token") + _logger.debug("API Get: Retrying request with updated CSRF token") # try to authenticate self.authenticate() # Try to authenticate and repeat the request 3 times -