From 22cad43bdb163da1a8298f8cbc3d743138369d08 Mon Sep 17 00:00:00 2001 From: Andreas Klos Date: Thu, 5 Jun 2025 12:44:30 +0200 Subject: [PATCH] fix: standardize parameter name for document processing in MSDocsExtractor and XMLExtractor --- .../impl/extractors/file_extractors/ms_docs_extractor.py | 2 +- .../impl/extractors/file_extractors/xml_extractor.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/ms_docs_extractor.py b/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/ms_docs_extractor.py index 5201c62..1db8210 100644 --- a/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/ms_docs_extractor.py +++ b/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/ms_docs_extractor.py @@ -92,7 +92,7 @@ async def aextract_content(self, file_path: Path, name: str) -> list[InternalInf infer_table_structure=True, ) - return self._process_elements(elements, file_path.name) + return self._process_elements(elements, name) def _process_elements(self, elements: list[Element], document_name: str) -> list[InternalInformationPiece]: processed_elements: list[InternalInformationPiece] = [] diff --git a/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/xml_extractor.py b/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/xml_extractor.py index d72292a..997ede2 100644 --- a/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/xml_extractor.py +++ b/extractor-api-lib/src/extractor_api_lib/impl/extractors/file_extractors/xml_extractor.py @@ -61,7 +61,7 @@ async def aextract_content(self, file_path: Path, name: str) -> list[InternalInf A list of processed information pieces extracted from the XML file. """ elements = partition_xml(filename=file_path.as_posix(), xml_keep_tags=False) - return self._process_elements(elements, file_path.name) + return self._process_elements(elements, name) def _process_elements(self, elements: list[Element], document_name: str) -> list[InternalInformationPiece]: processed_elements: list[InternalInformationPiece] = []