stackitcloud · a-klos · Sep 10, 2025
diff --git a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/file_uploader.py b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/file_uploader.py
@@ -8,6 +8,7 @@
 
 
 class FileUploader(UploaderBase):
+    """File uploader implementation."""
 
     @abstractmethod
     async def upload_file(
@@ -16,7 +17,7 @@ async def upload_file(
         file: UploadFile,
     ) -> None:
         """
-        Uploads a source file for content extraction.
+        Upload a source file for content extraction.
 
         Parameters
         ----------

diff --git a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/source_uploader.py
@@ -21,7 +21,7 @@ async def upload_source(
         timeout: Optional[float],
     ) -> None:
         """
-        Uploads the parameters for source content extraction.
+        Upload the parameters for source content extraction.
 
         Parameters
         ----------

diff --git a/libs/admin-api-lib/src/admin_api_lib/api_endpoints/uploader_base.py b/libs/admin-api-lib/src/admin_api_lib/api_endpoints/uploader_base.py
@@ -7,9 +7,7 @@ class UploaderBase:
     """Base class for uploader API endpoints."""
 
     def __init__(self):
-        """
-        Initialize the UploaderBase.
-        """
+        """Initialize the UploaderBase."""
         self._background_threads = []
 
     def _prune_background_threads(self) -> list[Thread]:

diff --git a/libs/admin-api-lib/src/admin_api_lib/apis/admin_api.py b/libs/admin-api-lib/src/admin_api_lib/apis/admin_api.py
@@ -149,7 +149,7 @@ async def upload_file(
     request: Request,
 ) -> None:
     """
-    Uploads user selected sources.
+    Upload user selected sources.
 
     Parameters
     ----------
@@ -181,7 +181,7 @@ async def upload_source(
     key_value_pair: List[KeyValuePair] = Body(None, description="The key-value pairs for the source"),
 ) -> None:
     """
-    Uploads user selected sources.
+    Upload user selected sources.
 
     Parameters
     ----------

diff --git a/...dmin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py b/...dmin-api-lib/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py
@@ -2,7 +2,6 @@
 
 import io
 import logging
-import traceback
 
 from fastapi import HTTPException, Response, status
 
@@ -54,10 +53,8 @@ async def adocument_reference_id_get(self, identification: str) -> Response:
                 self._file_service.download_file(identification, document_buffer)
                 logger.debug("DONE retrieving document with id: %s", identification)
                 document_data = document_buffer.getvalue()
-            except Exception as e:
-                logger.error(
-                    "Error retrieving document with id: %s. Error: %s %s", identification, e, traceback.format_exc()
-                )
+            except Exception:
+                logger.exception("Error retrieving document with id: %s", identification)
                 raise ValueError(f"Document with id '{identification}' not found.")
             finally:
                 document_buffer.close()

diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py
@@ -1,6 +1,7 @@
+"""Module for the default file uploader implementation."""
+
 import logging
 from pathlib import Path
-import traceback
 import urllib
 import tempfile
 import asyncio
@@ -78,7 +79,7 @@ async def upload_file(
         file: UploadFile,
     ) -> None:
         """
-        Uploads a source file for content extraction.
+        Upload a source file for content extraction.
 
         Parameters
         ----------
@@ -109,7 +110,7 @@ async def upload_file(
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
         except Exception as e:
             self._key_value_store.upsert(source_name, Status.ERROR)
-            logger.error("Error while uploading %s = %s", source_name, str(e))
+            logger.exception("Error while uploading %s", source_name)
             raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
     def _log_task_exception(self, task: asyncio.Task) -> None:
@@ -124,19 +125,16 @@ def _log_task_exception(self, task: asyncio.Task) -> None:
         if task.done() and not task.cancelled():
             try:
                 task.result()  # This will raise the exception if one occurred
-            except Exception as e:
-                logger.error("Background task failed with exception: %s", str(e))
-                logger.debug("Background task exception traceback: %s", traceback.format_exc())
+            except Exception:
+                logger.exception("Background task failed with exception")
 
     def _prune_background_tasks(self) -> None:
-        """
-        Remove completed background tasks from the list.
-        """
+        """Remove completed background tasks from the list."""
         self._background_tasks = [task for task in self._background_tasks if not task.done()]
 
     def _check_if_already_in_processing(self, source_name: str) -> None:
         """
-        Checks if the source is already in processing state.
+        Check if the source is already in processing state.
 
         Parameters
         ----------
@@ -196,9 +194,9 @@ async def _handle_source_upload(
             await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces)
             self._key_value_store.upsert(source_name, Status.READY)
             logger.info("Source uploaded successfully: %s", source_name)
-        except Exception as e:
+        except Exception:
             self._key_value_store.upsert(source_name, Status.ERROR)
-            logger.error("Error while uploading %s = %s", source_name, str(e))
+            logger.exception("Error while uploading %s", source_name)
 
     def _add_file_url(self, file_name: str, base_url: str, chunked_documents: list[Document]):
         document_url = f"{base_url.rstrip('/')}/document_reference/{urllib.parse.quote_plus(file_name)}"
@@ -229,6 +227,6 @@ async def _asave_new_document(
 
                 self._file_service.upload_file(Path(temp_file_path), filename)
                 return filename
-        except Exception as e:
-            logger.error("Error during document saving: %s %s", e, traceback.format_exc())
+        except Exception:
+            logger.exception("Error during document saving")
             self._key_value_store.upsert(source_name, Status.ERROR)
diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py b/libs/admin-api-lib/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py
@@ -1,3 +1,5 @@
+"""Module for the default source uploader implementation."""
+
 import logging
 import asyncio
 from threading import Thread
@@ -28,6 +30,7 @@
 
 
 class DefaultSourceUploader(SourceUploader):
+    """Default implementation of the SourceUploader."""
 
     def __init__(
         self,
@@ -78,7 +81,7 @@ async def upload_source(
         kwargs: list[KeyValuePair],
     ) -> None:
         """
-        Uploads the parameters for source content extraction.
+        Upload the parameters for source content extraction.
 
         Parameters
         ----------
@@ -95,7 +98,6 @@ async def upload_source(
         -------
         None
         """
-
         self._prune_background_threads()
 
         source_name = f"{source_type}:{sanitize_document_name(name)}"
@@ -111,12 +113,12 @@ async def upload_source(
             raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
         except Exception as e:
             self._key_value_store.upsert(source_name, Status.ERROR)
-            logger.error("Error while uploading %s = %s", source_name, str(e))
+            logger.exception("Error while uploading %s", source_name)
             raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
     def _check_if_already_in_processing(self, source_name: str) -> None:
         """
-        Checks if the source is already in processing state.
+        Check if the source is already in processing state.
 
         Parameters
         ----------
@@ -197,6 +199,6 @@ async def _handle_source_upload(
             await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces)
             self._key_value_store.upsert(source_name, Status.READY)
             logger.info("Source uploaded successfully: %s", source_name)
-        except Exception as e:
+        except Exception:
             self._key_value_store.upsert(source_name, Status.ERROR)
-            logger.error("Error while uploading %s = %s", source_name, str(e))
+            logger.exception("Error while uploading %s", source_name)
diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py b/libs/admin-api-lib/src/admin_api_lib/impl/file_services/s3_service.py
@@ -1,7 +1,6 @@
 """Class to handle I/O with S3 storage."""
 
 import logging
-import traceback
 from pathlib import Path
 from typing import BinaryIO
 
@@ -125,7 +124,7 @@ def delete_file(self, file_name: str) -> None:
         try:
             file_name = f"/{file_name}" if not file_name.startswith("/") else file_name
             self._s3_client.delete_object(Bucket=self._s3_settings.bucket, Key=file_name)
-            logger.info(f"File {file_name} successfully deleted.")
-        except Exception as e:
-            logger.error("Error deleting file %s: %s %s" % (file_name, e, traceback.format_exc()))
+            logger.info("File %s successfully deleted.", file_name)
+        except Exception:
+            logger.exception("Error deleting file %s", file_name)
             raise
diff --git a/libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py b/libs/admin-api-lib/src/admin_api_lib/impl/summarizer/langchain_summarizer.py
@@ -1,7 +1,6 @@
 """Module for the LangchainSummarizer class."""
 
 import logging
-import traceback
 from typing import Optional
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -66,7 +65,7 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig]
         assert query, "Query is empty: %s" % query  # noqa S101
         config = ensure_config(config)
         tries_remaining = config.get("configurable", {}).get("tries_remaining", 3)
-        logger.debug("Tries remaining %d" % tries_remaining)
+        logger.debug("Tries remaining %d", tries_remaining)
 
         if tries_remaining < 0:
             raise Exception("Summary creation failed.")
@@ -81,8 +80,8 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig]
                     # Extract content from AIMessage if it's not already a string
                     content = result.content if hasattr(result, "content") else str(result)
                     outputs.append(content)
-                except Exception as e:
-                    logger.error("Error in summarizing langchain doc: %s %s", e, traceback.format_exc())
+                except Exception:
+                    logger.exception("Error in summarizing langchain doc")
                     config["tries_remaining"] = tries_remaining - 1
                     result = await self._create_chain().ainvoke({"text": langchain_document.page_content}, config)
                     # Extract content from AIMessage if it's not already a string
@@ -93,8 +92,9 @@ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig]
             return outputs[0]
         summary = " ".join(outputs)
         logger.debug(
-            "Reduced number of chars from %d to %d"
-            % (len("".join([x.page_content for x in langchain_documents])), len(summary))
+            "Reduced number of chars from %d to %d",
+            len("".join([x.page_content for x in langchain_documents])),
+            len(summary),
         )
         return await self.ainvoke(summary, config)
 

diff --git a/libs/extractor-api-lib/pyproject.toml b/libs/extractor-api-lib/pyproject.toml
@@ -32,8 +32,8 @@ per-file-ignores = """
   ./src/extractor_api_lib/apis/extractor_api.py: B008,WOT001,
   ./src/extractor_api_lib/impl/extractor_api_impl.py: B008,
   ./src/extractor_api_lib/container.py: CCE002,CCE001,
-  ./src/extractor_api_lib/apis/extractor_api_base.py: WOT001,
-  ./tests/*: S101,E501,
+  ./src/extractor_api_lib/apis/extractor_api_base.py: WOT001,D105,
+  ./tests/*: S101,E501,D105,D100,D102,
 """
 
 [tool.black]

diff --git a/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/file_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/file_extractor.py
@@ -1,3 +1,5 @@
+"""Module for the FileExtractor class."""
+
 from abc import ABC, abstractmethod
 from extractor_api_lib.models.extraction_request import ExtractionRequest
 from extractor_api_lib.models.information_piece import InformationPiece

diff --git a/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/source_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/api_endpoints/source_extractor.py
@@ -1,3 +1,5 @@
+"""Module for the SourceExtractor class."""
+
 from abc import ABC, abstractmethod
 
 from extractor_api_lib.models.extraction_parameters import ExtractionParameters

diff --git a/libs/extractor-api-lib/src/extractor_api_lib/extractors/information_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/extractors/information_extractor.py
@@ -13,7 +13,14 @@ class InformationExtractor(ABC):
 
     @property
     @abstractmethod
-    def extractor_type(self) -> ExtractorTypes: ...
+    def extractor_type(self) -> ExtractorTypes:
+        """Return the type of the extractor.
+
+        Returns
+        -------
+        ExtractorTypes
+            The type of the extractor.
+        """
 
     @abstractmethod
     async def aextract_content(

diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/general_file_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/api_endpoints/general_file_extractor.py
@@ -3,7 +3,6 @@
 import logging
 from pathlib import Path
 import tempfile
-import traceback
 
 
 from extractor_api_lib.api_endpoints.file_extractor import FileExtractor
@@ -76,5 +75,5 @@ async def aextract_information(self, extraction_request: ExtractionRequest) -> l
                 )
                 return [self._mapper.map_internal_to_external(x) for x in results if x.page_content is not None]
         except Exception as e:
-            logger.error("Error during document parsing: %s %s", e, traceback.format_exc())
+            logger.exception("Error during document parsing")
             raise e
diff --git a/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py b/libs/extractor-api-lib/src/extractor_api_lib/impl/extractors/confluence_extractor.py
@@ -34,6 +34,13 @@ def __init__(
 
     @property
     def extractor_type(self) -> ExtractorTypes:
+        """Return the type of the extractor.
+
+        Returns
+        -------
+        ExtractorTypes
+            The type of the extractor.
+        """
         return ExtractorTypes.CONFLUENCE
 
     async def aextract_content(

diff --git a/.../extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py b/.../extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/pdf_extractor.py
@@ -154,7 +154,7 @@ async def aextract_content(self, file_path: Path, name: str) -> list[InternalInf
                     )
                     pdf_elements += new_pdf_elements
 
-        logger.info(f"Extraction completed. Found {len(pdf_elements)} information pieces.")
+        logger.info("Extraction completed. Found %d information pieces.", len(pdf_elements))
         return pdf_elements
 
     def _is_text_based(self, page: Page) -> bool:
@@ -200,8 +200,8 @@ def _extract_tables_from_text_page(
                 table_df = pd.DataFrame(table_data)
                 try:
                     converted_table = self._dataframe_converter.convert(table_df)
-                except TypeError as e:
-                    logger.error(f"Error while converting table to string: {e}")
+                except TypeError:
+                    logger.exception("Error while converting table to string")
                     continue
                 if not converted_table.strip():
                     continue
@@ -215,8 +215,8 @@ def _extract_tables_from_text_page(
                         information_id=hash_datetime(),
                     )
                 )
-        except Exception as e:
-            logger.warning(f"Failed to find tables on page {page_index}: {e}")
+        except Exception:
+            logger.exception("Failed to find tables on page %d", page_index)
 
         return table_elements
 
@@ -321,19 +321,19 @@ def _extract_tables_from_scanned_page(
                                     },
                                 )
                             )
-                    except Exception as e:
-                        logger.warning(f"Failed to convert Camelot table {i + 1}: {e}")
+                    except Exception:
+                        logger.exception("Failed to convert Camelot table %d", i + 1)
 
-        except Exception as e:
-            logger.debug(f"Camelot table extraction failed for page {page_index}: {e}")
+        except Exception:
+            logger.exception("Camelot table extraction failed for page %d", page_index)
 
         return table_elements
 
     def _extract_text_from_text_page(self, page: Page) -> str:
         try:
             return page.extract_text() or ""
-        except Exception as e:
-            logger.warning(f"Failed to extract text with pdfplumber: {e}")
+        except Exception:
+            logger.exception("Failed to extract text with pdfplumber")
             return ""
 
     def _extract_content_from_page(