From da18e84912114fc8b3e47b9c5fc6b02d6822e7b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 3 Apr 2026 13:00:02 +0200 Subject: [PATCH 1/2] docs: readthedocs in extensions java --- .docs/README.md | 33 +++++++++ .docs/_static/custom.css | 3 + .docs/conf.py | 51 +++++++++++++ .docs/content/0.overview.md | 47 ++++++++++++ .docs/content/1.client.md | 136 ++++++++++++++++++++++++++++++++++ .docs/content/2.worker.md | 144 ++++++++++++++++++++++++++++++++++++ .docs/content/3.examples.md | 3 + .docs/index.rst | 11 +++ .docs/requirements.txt | 4 + .readthedocs.yaml | 21 ++++++ 10 files changed, 453 insertions(+) create mode 100644 .docs/README.md create mode 100644 .docs/_static/custom.css create mode 100644 .docs/conf.py create mode 100644 .docs/content/0.overview.md create mode 100644 .docs/content/1.client.md create mode 100644 .docs/content/2.worker.md create mode 100644 .docs/content/3.examples.md create mode 100644 .docs/index.rst create mode 100644 .docs/requirements.txt create mode 100644 .readthedocs.yaml diff --git a/.docs/README.md b/.docs/README.md new file mode 100644 index 0000000..c0af7e5 --- /dev/null +++ b/.docs/README.md @@ -0,0 +1,33 @@ +# ArmoniK Docs + +Docs for ArmoniK + +## Installation + +> Be aware to be at the root of the repository + +```bash +python -m venv .venv-doc +``` + +Then activate the virtual environment: + +```bash +source .venv-doc/bin/activate +``` + +And install dependencies: + +```bash +pip install -r .docs/requirements.txt +``` + +## Usage + +To build the docs locally, run the following command: + +```bash +sphinx-build -M html .docs build +``` + +Outputs can be found in `build/html/index.html`. diff --git a/.docs/_static/custom.css b/.docs/_static/custom.css new file mode 100644 index 0000000..5bdc6a0 --- /dev/null +++ b/.docs/_static/custom.css @@ -0,0 +1,3 @@ +.wy-nav-content { + max-width: 100% !important; +} \ No newline at end of file diff --git a/.docs/conf.py b/.docs/conf.py new file mode 100644 index 0000000..564d000 --- /dev/null +++ b/.docs/conf.py @@ -0,0 +1,51 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "ArmoniK.Extensions.Java" +copyright = "2021-%Y, ANEO" +author = "ANEO" +release = "main" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["myst_parser", "sphinxcontrib.mermaid"] + +templates_path = ["_templates"] +exclude_patterns = ["requirements.txt", "README.md"] +suppress_warnings = ["myst.header"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] +html_css_files = ["custom.css"] +html_search = True +html_extra_path = ["api/client/apidocs"] + +# -- Options for source files ------------------------------------------------ +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-source-files +source_suffix = { + ".rst": "restructuredtext", + ".txt": "markdown", + ".md": "markdown", +} + +# -- Options MyST Parser ------------------------------------------------ +myst_fence_as_directive = ["mermaid"] +myst_heading_anchors = 3 + +# -- Options to show "Edit on GitHub" button --------------------------------- +html_context = { + "display_github": True, # Integrate GitHub + "github_user": "aneoconsulting", # Username + "github_repo": "ArmoniK.Extensions.Java", # Repo name + "github_version": "main", # Version + "conf_py_path": "/.docs/", # Path in the checkout to the docs root +} diff --git a/.docs/content/0.overview.md b/.docs/content/0.overview.md new file mode 100644 index 0000000..67bc219 --- /dev/null +++ b/.docs/content/0.overview.md @@ -0,0 +1,47 @@ +# ArmoniK Java SDK — Overview + +## Client–Worker Model + +ArmoniK implements a **Client–Worker** distributed processing model. Responsibilities are clearly separated: + +**Client Responsibilities** +- Create and manage sessions. +- Submit tasks and define execution dependencies (task graphs). +- Manage input/output blobs and error handling. + +**Worker Responsibilities** +- Implement the computation logic for each task. +- Read input blobs and produce output blobs. +- Optionally submit new tasks (dynamic workflows). + +## SDK Goals + +- **Simplify** the development of Clients and Workers. +- Provide **pre-built Worker containers** following unified conventions. +- Offer a **high-level Worker API** with consistent lifecycle and error handling. +- Provide **simplified Client APIs** to create sessions, submit tasks, and manage data. +- Promote **best practices**: idempotency, error reporting, observability. +- Ensure **compatibility and long-term maintainability** across ArmoniK versions. +- Ensure **interoperability**: clients and workers can be mixed across languages. + +## Interoperability + +The SDK is designed to enable cross-language interoperability — you can mix clients and workers across SDK languages. + +**Example:** A Java client orchestrates tasks executed by Python or C# workers. + +Two conventions make this possible: + +**Convention 1 — Payload structure** + +The payload is a JSON association table mapping logical names to blob IDs: +```json +{"inputs": {"A": "id1"}, "outputs": {"result": "id2"}} +``` + +**Convention 2 — Task options** + +Dynamic library loading is enabled via standardized task options carrying: +- Path to the library +- Symbol (class/function name) +- Blob ID for the library artifact \ No newline at end of file diff --git a/.docs/content/1.client.md b/.docs/content/1.client.md new file mode 100644 index 0000000..821d935 --- /dev/null +++ b/.docs/content/1.client.md @@ -0,0 +1,136 @@ +# ArmoniK Java SDK — Client + +## Role of the Client SDK + +The client SDK provides a language-friendly interface to interact with ArmoniK's control plane. It allows you to: + +- Create and manage **sessions**. +- Define and submit **tasks**, optionally with dependencies. +- Create and manage **input and output blobs**. +- Monitor task progress and retrieve results. +- Use optional asynchronous callback mechanisms. +- Ensure consistent workflow semantics: retries, error propagation, idempotency. + +--- + +## Entry Point: `ArmoniKClient` + +The main entry point is `ArmoniKClient`, configured via `ArmoniKConfig` which describes: +- Endpoints +- TLS configuration +- Retry policy + +Use try-with-resources for automatic cleanup: + +```java +try (ArmoniKClient client = new ArmoniKClient(config)) { + // ... +} +``` + +**Available operations on `ArmoniKClient`:** +- Create a new `Session` +- Retrieve an existing `Session` +- Cancel a `Session` +- Close a `Session` + +--- + +## Session Creation + +Define a session with `SessionDefinition`, then submit it to get a `SessionHandle`. + +`SessionDefinition` components: + +| Component | Description | +|---|---| +| Partition IDs | Where tasks can execute | +| `TaskConfiguration` | Default configuration for all tasks | +| `BlobCompletionListener` | Callback when an output blob is `COMPLETED` or `ABORTED` | + +--- + +## Blob Completion Listener + +`BlobCompletionListener` is a callback interface for async result notifications. Register it in `SessionDefinition` to enable event-driven result handling. + +```java +BlobCompletionListener listener = new BlobCompletionListener() { + @Override + public void onBlobSuccess(Blob blob) { /* blob is ready */ } + + @Override + public void onBlobError(BlobError error) { /* handle error */ } +}; +``` + +--- + +## Working with Blobs + +| Type | Description | +|---|---| +| `InputBlobDefinition` | Data sent to workers | +| `OutputBlobDefinition` | Placeholder for expected results | +| `BlobHandle` | Reference to an already uploaded/downloaded blob | + +Input data can come from: +- In-memory byte array +- File +- Custom source by implementing the `BlobData` interface + +`SessionHandle` can also create session-scoped blobs directly. + +--- + +## Task Submission + +Create a `TaskDefinition` specifying: +- **Inputs**: `InputBlobDefinition` or `BlobHandle` +- **Outputs**: `OutputBlobDefinition` +- **Optional**: override `TaskConfiguration` +- **Optional**: `WorkerLibrary` for dynamic loading + +Submit it to `SessionHandle`. This is **non-blocking** and returns a `TaskHandle` immediately. + +--- + +## WorkerLibrary (Dynamic Loading) + +`WorkerLibrary` describes a dynamically loaded worker library for task execution. It has three components: + +| Field | Description | Example | +|---|---|---| +| `path` | Path to the artifact inside the zip | `"MyWorker.jar"` | +| `symbol` | Fully qualified class name | `"com.example.MyProcessor"` | +| `libraryHandle` | `BlobHandle` referencing the uploaded library zip | — | + +--- + +## Typical Workflow + +``` +1. Create ArmoniKClient with config +2. Create SessionDefinition (with partitions, TaskConfiguration, listener) +3. Open SessionHandle +4. Define InputBlobDefinition and OutputBlobDefinition +5. Create and submit TaskDefinition +6. Wait for completion or react to BlobCompletionListener callbacks +7. Read output blobs +8. Close session and client +``` + +--- + +## Task Submission Flow (General) + +1. The client creates a **session** to group related tasks. +2. Input data is uploaded as **blobs**. +3. The client defines one or more **task definitions**, each referencing: + - input blobs (data dependencies) + - expected output blobs + - optional task configuration +4. Tasks are submitted to the control plane. +5. Workers execute tasks, read inputs, and produce outputs. +6. The control plane signals task completion. +7. The client retrieves output blobs or submits new tasks to continue the workflow. diff --git a/.docs/content/2.worker.md b/.docs/content/2.worker.md new file mode 100644 index 0000000..576cbbb --- /dev/null +++ b/.docs/content/2.worker.md @@ -0,0 +1,144 @@ +# ArmoniK Java SDK — Worker + +## Project Structure + +The Java Worker SDK is split into two Maven modules: + +| Module | Purpose | +|---|---| +| `armonik-worker-domain` | SDK for implementing `TaskProcessor`, used for dynamic loading | +| `armonik-worker` | Allows static `TaskProcessor` implementation | + +**Key entry points:** +- `TaskProcessor` interface — implement your task logic here. +- `ArmoniKWorker` class — handles server startup. + +--- + +## `TaskProcessor` Interface + +`TaskProcessor` is a `@FunctionalInterface` with a single method: + +```java +TaskOutcome processTask(TaskContext context); +``` + +**Return values:** + +| Value | Meaning | +|---|---| +| `TaskOutcome.SUCCESS` | Task completed successfully | +| `TaskOutcome.error(String)` | Business logic failure with a message | + +**Error handling strategy:** +- Return `SUCCESS` for successful execution. +- Return `error(message)` for expected business failures. +- Let exceptions propagate for infrastructure/unexpected issues. + +--- + +## `TaskContext` — Input/Output Access + +### Accessing inputs and outputs by logical name + +```java +TaskInput input = context.getInput("A"); +TaskOutput output = context.getOutput("result"); + +boolean hasInput = context.hasInput("A"); +boolean hasOutput = context.hasOutput("result"); +``` + +### `TaskInput` — Read operations + +| Method | Description | +|---|---| +| `rawData()` | Read entire blob as `byte[]` (cached if ≤ 8 MiB) | +| `stream()` | Read as `InputStream` (large files, bypasses cache) | +| `asString(Charset)` | Read as `String` with specified encoding | +| `size()` | Get blob size in bytes | + +### `TaskOutput` — Write operations + +| Method | Description | +|---|---| +| `write(byte[])` | Write byte array | +| `write(InputStream)` | Write from stream | +| `write(String, Charset)` | Write string with specified encoding | + +--- + +## `TaskContext` — Advanced Features + +### Blob creation + +```java +context.createBlob(InputBlobDefinition definition); +``` + +Creates a session-scoped blob from within the worker. + +### Subtask submission + +```java +context.submitTask(TaskDefinition definition); +``` + +Submits a child task from within the current task (dynamic workflows). + +**Subtask input sources:** +- Parent task input (reuse) +- Existing blob +- New `InputBlobDefinition` + +**Subtask output targets:** +- Parent task output (delegation) +- New `OutputBlobDefinition` + +--- + +## Deployment Modes + +### Dynamic Mode + +Uses a pre-built Docker image that loads your JAR at runtime. + +**Advantages:** +- Supports multiple task types. +- No Docker rebuild required when changing logic. + +**Requirements:** +- Your class must implement `WorkerLibrary`. +- The JAR is uploaded as a blob and referenced via `WorkerLibrary` in task options. + +**Runtime flow:** +1. Receive task with `WorkerLibrary` specification. +2. Unzip the blob containing the library (fat JAR). +3. Load the JAR with a dedicated `ClassLoader`. +4. Locate `TaskProcessor` by fully qualified class name. +5. Instantiate the processor. +6. Execute `processTask(TaskContext)`. +7. Return execution status. +8. Unload library and clean up. + +--- + +### Static Mode + +Build a custom Docker image with your processor embedded. + +**Advantages:** +- Simpler setup, no `WorkerLibrary` needed. +- Self-contained image. + +**Limitations:** +- Supports a single task type. +- Requires rebuilding the Docker image for any logic change. + +**Setup steps:** +1. Implement the `TaskProcessor` interface. +2. Create a `Main` class. +3. Configure with `ArmoniKWorker.builder().withTaskProcessor(...)`. +4. Build a fat JAR (Maven Shade or Gradle Shadow). +5. Create a Docker image with the JAR. +6. Deploy to ArmoniK (edit or create a partition). diff --git a/.docs/content/3.examples.md b/.docs/content/3.examples.md new file mode 100644 index 0000000..ee33153 --- /dev/null +++ b/.docs/content/3.examples.md @@ -0,0 +1,3 @@ +# Examples + +Please check the Java samples in our dedicated [Samples](https://github.com/aneoconsulting/ArmoniK.Samples) repository. \ No newline at end of file diff --git a/.docs/index.rst b/.docs/index.rst new file mode 100644 index 0000000..7f0c0bb --- /dev/null +++ b/.docs/index.rst @@ -0,0 +1,11 @@ +ArmoniK.Extensions.Java documentation +===================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + :glob: + :numbered: + + content/* + api/* diff --git a/.docs/requirements.txt b/.docs/requirements.txt new file mode 100644 index 0000000..c1fa8a3 --- /dev/null +++ b/.docs/requirements.txt @@ -0,0 +1,4 @@ +sphinx==8.2.3 +myst_parser==4.0.1 +sphinxcontrib-mermaid==1.0.0 +sphinx-rtd-theme==3.0.2 \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..037d4d5 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,21 @@ + +# Required +version: 2 + +# Set the OS, Python version, and other tools you might need +build: + os: ubuntu-24.04 + tools: + python: "3.13" + +# Move anchors out of the titles +# Build documentation in the ".docs/" directory with Sphinx +sphinx: + configuration: .docs/conf.py + +# Optionally, but recommended, +# declare the Python requirements required to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: .docs/requirements.txt From 13115f0d30407ed00c771f78a792cfa28b09a243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Fri, 3 Apr 2026 14:28:31 +0200 Subject: [PATCH 2/2] expand on the example section --- .docs/content/3.examples.md | 113 +++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/.docs/content/3.examples.md b/.docs/content/3.examples.md index ee33153..851c0da 100644 --- a/.docs/content/3.examples.md +++ b/.docs/content/3.examples.md @@ -1,3 +1,114 @@ # Examples -Please check the Java samples in our dedicated [Samples](https://github.com/aneoconsulting/ArmoniK.Samples) repository. \ No newline at end of file +For the actual code and instructions on how to execute them, +please check the Java samples in our dedicated [Samples](https://github.com/aneoconsulting/ArmoniK.Samples) repository. + + +## Sample Overview + +| Sample | Description | Key concept illustrated | +|---|---|---| +| `HelloWorld` | Submit a single task with inline input | Basic task submission | +| `SharedBlob` | Reuse one blob across multiple tasks | Blob reuse | +| `TaskDependencies` | Chain tasks using outputs as inputs | Task graph via blobs | +| `DynamicLibrary` | Load a library JAR at runtime | Dynamic worker loading | + +## Task Graphs via Blobs + +The blob abstraction is the mechanism that enables directed acyclic graphs (DAGs) of tasks in ArmoniK. The key insight is that an `OutputBlobDefinition` does not hold data — it is a **named placeholder** for a result that does not exist yet. When that placeholder is given to a subsequent task as an input, ArmoniK records the dependency and will not schedule the consuming task until the producing task has written to that blob. + +### How it works + +1. Submit task A, declaring an output blob `result_a`. +2. Use the `BlobHandle` for `result_a` as the **input** of task B. +3. Submit task B — it will only run once task A has completed and written `result_a`. + +``` +Task A ──writes──► blob result_a ──read by──► Task B +``` + +The control plane enforces this ordering automatically. No polling or manual synchronisation is needed on the client side. + +### `TaskDependencies` example + +The `TaskDependencies` sample illustrates a simple fan-in graph: + +``` +task1 (1+2=3) ──┐ + ├──► task3 (3+7=10) +task2 (3+4=7) ──┘ +``` + +The client submits all three tasks upfront. Tasks 1 and 2 are independent; task 3 declares the outputs of tasks 1 and 2 as its inputs, so the control plane holds task 3 until both predecessors complete. + +On the client side, this is expressed by passing the `BlobHandle` of a previous task's output directly into the next `TaskDefinition`: + +```java +// Task 1 — independent +var taskDef1 = new TaskDefinition() + .withInput("num1", InputBlobDefinition.from("1".getBytes(UTF_8))) + .withInput("num2", InputBlobDefinition.from("2".getBytes(UTF_8))) + .withOutput("result"); +var task1 = session.submitTask(taskDef1); + +// Task 2 — independent +var taskDef2 = new TaskDefinition() + .withInput("num1", InputBlobDefinition.from("3".getBytes(UTF_8))) + .withInput("num2", InputBlobDefinition.from("4".getBytes(UTF_8))) + .withOutput("result"); +var task2 = session.submitTask(taskDef2); + +// Task 3 — depends on outputs of task 1 and task 2 +var taskDef3 = new TaskDefinition() + .withInput("num1", task1.getOutput("result")) // BlobHandle from task 1 + .withInput("num2", task2.getOutput("result")) // BlobHandle from task 2 + .withOutput("result"); +session.submitTask(taskDef3); +``` + +`task1.getOutput("result")` returns a `BlobHandle` — a reference to an output blob that has been declared but not yet written. Passing it as input to task 3 is what expresses the data dependency to the control plane. + +### `SharedBlob` example + +The `SharedBlob` sample shows a complementary pattern: a single blob uploaded once and reused as input across multiple tasks. This avoids re-uploading the same data and lets several tasks read the same value concurrently. + +```java +var sharedBlob = session.createBlob(InputBlobDefinition.from("2".getBytes(UTF_8))); + +session.submitTask(new TaskDefinition() + .withInput("num1", sharedBlob) + .withInput("num2", InputBlobDefinition.from("2".getBytes(UTF_8))) + .withOutput("result")); + +session.submitTask(new TaskDefinition() + .withInput("num1", sharedBlob) + .withInput("num2", InputBlobDefinition.from("3".getBytes(UTF_8))) + .withOutput("result")); +``` + +### Dynamic graphs from workers + +Task graphs are not limited to what the client defines upfront. A worker can itself call `context.submitTask(...)`, passing one of its own outputs as the input of a new child task. This enables workflows whose shape is determined at runtime — for example, a recursive divide-and-conquer where each task spawns sub-tasks only after inspecting its input. + +See [Worker — Subtask submission](2.worker.md#taskcontext--advanced-features) for the worker-side API. + + +## Result Retrieval + +Output blobs are surfaced through the `BlobCompletionListener` registered on the session. When any output blob transitions to `COMPLETED` or `ABORTED`, the listener is called asynchronously: + +```java +public class SimpleBlobListener implements BlobCompletionListener { + @Override + public void onBlobSuccess(Blob blob) { + System.out.println("Data: " + new String(blob.data(), UTF_8)); + } + + @Override + public void onBlobError(BlobError error) { + System.out.println("Error: " + error.cause().getMessage()); + } +} +``` + +In a graph workflow, each leaf output blob fires a `onBlobSuccess` callback independently as soon as the task that produces it finishes, even if other branches of the graph are still running.