From da18e84912114fc8b3e47b9c5fc6b02d6822e7b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@aneo.fr>
Date: Fri, 3 Apr 2026 13:00:02 +0200
Subject: [PATCH 1/2] docs: readthedocs in extensions java

---
 .docs/README.md             |  33 +++++++++
 .docs/_static/custom.css    |   3 +
 .docs/conf.py               |  51 +++++++++++++
 .docs/content/0.overview.md |  47 ++++++++++++
 .docs/content/1.client.md   | 136 ++++++++++++++++++++++++++++++++++
 .docs/content/2.worker.md   | 144 ++++++++++++++++++++++++++++++++++++
 .docs/content/3.examples.md |   3 +
 .docs/index.rst             |  11 +++
 .docs/requirements.txt      |   4 +
 .readthedocs.yaml           |  21 ++++++
 10 files changed, 453 insertions(+)
 create mode 100644 .docs/README.md
 create mode 100644 .docs/_static/custom.css
 create mode 100644 .docs/conf.py
 create mode 100644 .docs/content/0.overview.md
 create mode 100644 .docs/content/1.client.md
 create mode 100644 .docs/content/2.worker.md
 create mode 100644 .docs/content/3.examples.md
 create mode 100644 .docs/index.rst
 create mode 100644 .docs/requirements.txt
 create mode 100644 .readthedocs.yaml

diff --git a/.docs/README.md b/.docs/README.md
new file mode 100644
index 0000000..c0af7e5
--- /dev/null
+++ b/.docs/README.md
@@ -0,0 +1,33 @@
+# ArmoniK Docs
+
+Docs for ArmoniK
+
+## Installation
+
+> Be aware to be at the root of the repository
+
+```bash
+python -m venv .venv-doc
+```
+
+Then activate the virtual environment:
+
+```bash
+source .venv-doc/bin/activate
+```
+
+And install dependencies:
+
+```bash
+pip install -r .docs/requirements.txt
+```
+
+## Usage
+
+To build the docs locally, run the following command:
+
+```bash
+sphinx-build -M html .docs build
+```
+
+Outputs can be found in `build/html/index.html`.
diff --git a/.docs/_static/custom.css b/.docs/_static/custom.css
new file mode 100644
index 0000000..5bdc6a0
--- /dev/null
+++ b/.docs/_static/custom.css
@@ -0,0 +1,3 @@
+.wy-nav-content {
+    max-width: 100% !important;
+}
\ No newline at end of file
diff --git a/.docs/conf.py b/.docs/conf.py
new file mode 100644
index 0000000..564d000
--- /dev/null
+++ b/.docs/conf.py
@@ -0,0 +1,51 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "ArmoniK.Extensions.Java"
+copyright = "2021-%Y, ANEO"
+author = "ANEO"
+release = "main"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = ["myst_parser", "sphinxcontrib.mermaid"]
+
+templates_path = ["_templates"]
+exclude_patterns = ["requirements.txt", "README.md"]
+suppress_warnings = ["myst.header"]
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "sphinx_rtd_theme"
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
+html_search = True
+html_extra_path = ["api/client/apidocs"]
+
+# -- Options for source files ------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-source-files
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".txt": "markdown",
+    ".md": "markdown",
+}
+
+# -- Options MyST Parser ------------------------------------------------
+myst_fence_as_directive = ["mermaid"]
+myst_heading_anchors = 3
+
+# -- Options to show "Edit on GitHub" button ---------------------------------
+html_context = {
+    "display_github": True,  # Integrate GitHub
+    "github_user": "aneoconsulting",  # Username
+    "github_repo": "ArmoniK.Extensions.Java",  # Repo name
+    "github_version": "main",  # Version
+    "conf_py_path": "/.docs/",  # Path in the checkout to the docs root
+}
diff --git a/.docs/content/0.overview.md b/.docs/content/0.overview.md
new file mode 100644
index 0000000..67bc219
--- /dev/null
+++ b/.docs/content/0.overview.md
@@ -0,0 +1,47 @@
+# ArmoniK Java SDK — Overview
+
+## Client–Worker Model
+
+ArmoniK implements a **Client–Worker** distributed processing model. Responsibilities are clearly separated:
+
+**Client Responsibilities**
+- Create and manage sessions.
+- Submit tasks and define execution dependencies (task graphs).
+- Manage input/output blobs and error handling.
+
+**Worker Responsibilities**
+- Implement the computation logic for each task.
+- Read input blobs and produce output blobs.
+- Optionally submit new tasks (dynamic workflows).
+
+## SDK Goals
+
+- **Simplify** the development of Clients and Workers.
+- Provide **pre-built Worker containers** following unified conventions.
+- Offer a **high-level Worker API** with consistent lifecycle and error handling.
+- Provide **simplified Client APIs** to create sessions, submit tasks, and manage data.
+- Promote **best practices**: idempotency, error reporting, observability.
+- Ensure **compatibility and long-term maintainability** across ArmoniK versions.
+- Ensure **interoperability**: clients and workers can be mixed across languages.
+
+## Interoperability
+
+The SDK is designed to enable cross-language interoperability — you can mix clients and workers across SDK languages.
+
+**Example:** A Java client orchestrates tasks executed by Python or C# workers.
+
+Two conventions make this possible:
+
+**Convention 1 — Payload structure**
+
+The payload is a JSON association table mapping logical names to blob IDs:
+```json
+{"inputs": {"A": "id1"}, "outputs": {"result": "id2"}}
+```
+
+**Convention 2 — Task options**
+
+Dynamic library loading is enabled via standardized task options carrying:
+- Path to the library
+- Symbol (class/function name)
+- Blob ID for the library artifact
\ No newline at end of file
diff --git a/.docs/content/1.client.md b/.docs/content/1.client.md
new file mode 100644
index 0000000..821d935
--- /dev/null
+++ b/.docs/content/1.client.md
@@ -0,0 +1,136 @@
+# ArmoniK Java SDK — Client
+
+## Role of the Client SDK
+
+The client SDK provides a language-friendly interface to interact with ArmoniK's control plane. It allows you to:
+
+- Create and manage **sessions**.
+- Define and submit **tasks**, optionally with dependencies.
+- Create and manage **input and output blobs**.
+- Monitor task progress and retrieve results.
+- Use optional asynchronous callback mechanisms.
+- Ensure consistent workflow semantics: retries, error propagation, idempotency.
+
+---
+
+## Entry Point: `ArmoniKClient`
+
+The main entry point is `ArmoniKClient`, configured via `ArmoniKConfig` which describes:
+- Endpoints
+- TLS configuration
+- Retry policy
+
+Use try-with-resources for automatic cleanup:
+
+```java
+try (ArmoniKClient client = new ArmoniKClient(config)) {
+    // ...
+}
+```
+
+**Available operations on `ArmoniKClient`:**
+- Create a new `Session`
+- Retrieve an existing `Session`
+- Cancel a `Session`
+- Close a `Session`
+
+---
+
+## Session Creation
+
+Define a session with `SessionDefinition`, then submit it to get a `SessionHandle`.
+
+`SessionDefinition` components:
+
+| Component | Description |
+|---|---|
+| Partition IDs | Where tasks can execute |
+| `TaskConfiguration` | Default configuration for all tasks |
+| `BlobCompletionListener` | Callback when an output blob is `COMPLETED` or `ABORTED` |
+
+---
+
+## Blob Completion Listener
+
+`BlobCompletionListener` is a callback interface for async result notifications. Register it in `SessionDefinition` to enable event-driven result handling.
+
+```java
+BlobCompletionListener listener = new BlobCompletionListener() {
+    @Override
+    public void onBlobSuccess(Blob blob) { /* blob is ready */ }
+
+    @Override
+    public void onBlobError(BlobError error) { /* handle error */ }
+};
+```
+
+---
+
+## Working with Blobs
+
+| Type | Description |
+|---|---|
+| `InputBlobDefinition` | Data sent to workers |
+| `OutputBlobDefinition` | Placeholder for expected results |
+| `BlobHandle` | Reference to an already uploaded/downloaded blob |
+
+Input data can come from:
+- In-memory byte array
+- File
+- Custom source by implementing the `BlobData` interface
+
+`SessionHandle` can also create session-scoped blobs directly.
+
+---
+
+## Task Submission
+
+Create a `TaskDefinition` specifying:
+- **Inputs**: `InputBlobDefinition` or `BlobHandle`
+- **Outputs**: `OutputBlobDefinition`
+- **Optional**: override `TaskConfiguration`
+- **Optional**: `WorkerLibrary` for dynamic loading
+
+Submit it to `SessionHandle`. This is **non-blocking** and returns a `TaskHandle` immediately.
+
+---
+
+## WorkerLibrary (Dynamic Loading)
+
+`WorkerLibrary` describes a dynamically loaded worker library for task execution. It has three components:
+
+| Field | Description | Example |
+|---|---|---|
+| `path` | Path to the artifact inside the zip | `"MyWorker.jar"` |
+| `symbol` | Fully qualified class name | `"com.example.MyProcessor"` |
+| `libraryHandle` | `BlobHandle` referencing the uploaded library zip | — |
+
+---
+
+## Typical Workflow
+
+```
+1. Create ArmoniKClient with config
+2. Create SessionDefinition (with partitions, TaskConfiguration, listener)
+3. Open SessionHandle
+4. Define InputBlobDefinition and OutputBlobDefinition
+5. Create and submit TaskDefinition
+6. Wait for completion or react to BlobCompletionListener callbacks
+7. Read output blobs
+8. Close session and client
+```
+
+---
+
+## Task Submission Flow (General)
+
+1. The client creates a **session** to group related tasks.
+2. Input data is uploaded as **blobs**.
+3. The client defines one or more **task definitions**, each referencing:
+   - input blobs (data dependencies)
+   - expected output blobs
+   - optional task configuration
+4. Tasks are submitted to the control plane.
+5. Workers execute tasks, read inputs, and produce outputs.
+6. The control plane signals task completion.
+7. The client retrieves output blobs or submits new tasks to continue the workflow.
diff --git a/.docs/content/2.worker.md b/.docs/content/2.worker.md
new file mode 100644
index 0000000..576cbbb
--- /dev/null
+++ b/.docs/content/2.worker.md
@@ -0,0 +1,144 @@
+# ArmoniK Java SDK — Worker
+
+## Project Structure
+
+The Java Worker SDK is split into two Maven modules:
+
+| Module | Purpose |
+|---|---|
+| `armonik-worker-domain` | SDK for implementing `TaskProcessor`, used for dynamic loading |
+| `armonik-worker` | Allows static `TaskProcessor` implementation |
+
+**Key entry points:**
+- `TaskProcessor` interface — implement your task logic here.
+- `ArmoniKWorker` class — handles server startup.
+
+---
+
+## `TaskProcessor` Interface
+
+`TaskProcessor` is a `@FunctionalInterface` with a single method:
+
+```java
+TaskOutcome processTask(TaskContext context);
+```
+
+**Return values:**
+
+| Value | Meaning |
+|---|---|
+| `TaskOutcome.SUCCESS` | Task completed successfully |
+| `TaskOutcome.error(String)` | Business logic failure with a message |
+
+**Error handling strategy:**
+- Return `SUCCESS` for successful execution.
+- Return `error(message)` for expected business failures.
+- Let exceptions propagate for infrastructure/unexpected issues.
+
+---
+
+## `TaskContext` — Input/Output Access
+
+### Accessing inputs and outputs by logical name
+
+```java
+TaskInput  input  = context.getInput("A");
+TaskOutput output = context.getOutput("result");
+
+boolean hasInput  = context.hasInput("A");
+boolean hasOutput = context.hasOutput("result");
+```
+
+### `TaskInput` — Read operations
+
+| Method | Description |
+|---|---|
+| `rawData()` | Read entire blob as `byte[]` (cached if ≤ 8 MiB) |
+| `stream()` | Read as `InputStream` (large files, bypasses cache) |
+| `asString(Charset)` | Read as `String` with specified encoding |
+| `size()` | Get blob size in bytes |
+
+### `TaskOutput` — Write operations
+
+| Method | Description |
+|---|---|
+| `write(byte[])` | Write byte array |
+| `write(InputStream)` | Write from stream |
+| `write(String, Charset)` | Write string with specified encoding |
+
+---
+
+## `TaskContext` — Advanced Features
+
+### Blob creation
+
+```java
+context.createBlob(InputBlobDefinition definition);
+```
+
+Creates a session-scoped blob from within the worker.
+
+### Subtask submission
+
+```java
+context.submitTask(TaskDefinition definition);
+```
+
+Submits a child task from within the current task (dynamic workflows).
+
+**Subtask input sources:**
+- Parent task input (reuse)
+- Existing blob
+- New `InputBlobDefinition`
+
+**Subtask output targets:**
+- Parent task output (delegation)
+- New `OutputBlobDefinition`
+
+---
+
+## Deployment Modes
+
+### Dynamic Mode
+
+Uses a pre-built Docker image that loads your JAR at runtime.
+
+**Advantages:**
+- Supports multiple task types.
+- No Docker rebuild required when changing logic.
+
+**Requirements:**
+- Your class must implement `WorkerLibrary`.
+- The JAR is uploaded as a blob and referenced via `WorkerLibrary` in task options.
+
+**Runtime flow:**
+1. Receive task with `WorkerLibrary` specification.
+2. Unzip the blob containing the library (fat JAR).
+3. Load the JAR with a dedicated `ClassLoader`.
+4. Locate `TaskProcessor` by fully qualified class name.
+5. Instantiate the processor.
+6. Execute `processTask(TaskContext)`.
+7. Return execution status.
+8. Unload library and clean up.
+
+---
+
+### Static Mode
+
+Build a custom Docker image with your processor embedded.
+
+**Advantages:**
+- Simpler setup, no `WorkerLibrary` needed.
+- Self-contained image.
+
+**Limitations:**
+- Supports a single task type.
+- Requires rebuilding the Docker image for any logic change.
+
+**Setup steps:**
+1. Implement the `TaskProcessor` interface.
+2. Create a `Main` class.
+3. Configure with `ArmoniKWorker.builder().withTaskProcessor(...)`.
+4. Build a fat JAR (Maven Shade or Gradle Shadow).
+5. Create a Docker image with the JAR.
+6. Deploy to ArmoniK (edit or create a partition).
diff --git a/.docs/content/3.examples.md b/.docs/content/3.examples.md
new file mode 100644
index 0000000..ee33153
--- /dev/null
+++ b/.docs/content/3.examples.md
@@ -0,0 +1,3 @@
+# Examples
+
+Please check the Java samples in our dedicated [Samples](https://github.com/aneoconsulting/ArmoniK.Samples) repository.
\ No newline at end of file
diff --git a/.docs/index.rst b/.docs/index.rst
new file mode 100644
index 0000000..7f0c0bb
--- /dev/null
+++ b/.docs/index.rst
@@ -0,0 +1,11 @@
+ArmoniK.Extensions.Java documentation
+=====================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+   :glob:
+   :numbered:
+
+   content/*
+   api/*
diff --git a/.docs/requirements.txt b/.docs/requirements.txt
new file mode 100644
index 0000000..c1fa8a3
--- /dev/null
+++ b/.docs/requirements.txt
@@ -0,0 +1,4 @@
+sphinx==8.2.3
+myst_parser==4.0.1
+sphinxcontrib-mermaid==1.0.0
+sphinx-rtd-theme==3.0.2
\ No newline at end of file
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..037d4d5
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,21 @@
+
+# Required
+version: 2
+
+# Set the OS, Python version, and other tools you might need
+build:
+  os: ubuntu-24.04
+  tools:
+    python: "3.13"
+
+# Move anchors out of the titles
+# Build documentation in the ".docs/" directory with Sphinx
+sphinx:
+   configuration: .docs/conf.py
+
+# Optionally, but recommended,
+# declare the Python requirements required to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+   install:
+   - requirements: .docs/requirements.txt

From 13115f0d30407ed00c771f78a792cfa28b09a243 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@aneo.fr>
Date: Fri, 3 Apr 2026 14:28:31 +0200
Subject: [PATCH 2/2] expand on the example section

---
 .docs/content/3.examples.md | 113 +++++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 1 deletion(-)

diff --git a/.docs/content/3.examples.md b/.docs/content/3.examples.md
index ee33153..851c0da 100644
--- a/.docs/content/3.examples.md
+++ b/.docs/content/3.examples.md
@@ -1,3 +1,114 @@
 # Examples
 
-Please check the Java samples in our dedicated [Samples](https://github.com/aneoconsulting/ArmoniK.Samples) repository.
\ No newline at end of file
+For the actual code and instructions on how to execute them,
+please check the Java samples in our dedicated [Samples](https://github.com/aneoconsulting/ArmoniK.Samples) repository.
+
+
+## Sample Overview
+
+| Sample | Description | Key concept illustrated |
+|---|---|---|
+| `HelloWorld` | Submit a single task with inline input | Basic task submission |
+| `SharedBlob` | Reuse one blob across multiple tasks | Blob reuse |
+| `TaskDependencies` | Chain tasks using outputs as inputs | Task graph via blobs |
+| `DynamicLibrary` | Load a library JAR at runtime | Dynamic worker loading |
+
+## Task Graphs via Blobs
+
+The blob abstraction is the mechanism that enables directed acyclic graphs (DAGs) of tasks in ArmoniK. The key insight is that an `OutputBlobDefinition` does not hold data — it is a **named placeholder** for a result that does not exist yet. When that placeholder is given to a subsequent task as an input, ArmoniK records the dependency and will not schedule the consuming task until the producing task has written to that blob.
+
+### How it works
+
+1. Submit task A, declaring an output blob `result_a`.
+2. Use the `BlobHandle` for `result_a` as the **input** of task B.
+3. Submit task B — it will only run once task A has completed and written `result_a`.
+
+```
+Task A ──writes──► blob result_a ──read by──► Task B
+```
+
+The control plane enforces this ordering automatically. No polling or manual synchronisation is needed on the client side.
+
+### `TaskDependencies` example
+
+The `TaskDependencies` sample illustrates a simple fan-in graph:
+
+```
+task1 (1+2=3) ──┐
+                ├──► task3 (3+7=10)
+task2 (3+4=7) ──┘
+```
+
+The client submits all three tasks upfront. Tasks 1 and 2 are independent; task 3 declares the outputs of tasks 1 and 2 as its inputs, so the control plane holds task 3 until both predecessors complete.
+
+On the client side, this is expressed by passing the `BlobHandle` of a previous task's output directly into the next `TaskDefinition`:
+
+```java
+// Task 1 — independent
+var taskDef1 = new TaskDefinition()
+    .withInput("num1", InputBlobDefinition.from("1".getBytes(UTF_8)))
+    .withInput("num2", InputBlobDefinition.from("2".getBytes(UTF_8)))
+    .withOutput("result");
+var task1 = session.submitTask(taskDef1);
+
+// Task 2 — independent
+var taskDef2 = new TaskDefinition()
+    .withInput("num1", InputBlobDefinition.from("3".getBytes(UTF_8)))
+    .withInput("num2", InputBlobDefinition.from("4".getBytes(UTF_8)))
+    .withOutput("result");
+var task2 = session.submitTask(taskDef2);
+
+// Task 3 — depends on outputs of task 1 and task 2
+var taskDef3 = new TaskDefinition()
+    .withInput("num1", task1.getOutput("result"))   // BlobHandle from task 1
+    .withInput("num2", task2.getOutput("result"))   // BlobHandle from task 2
+    .withOutput("result");
+session.submitTask(taskDef3);
+```
+
+`task1.getOutput("result")` returns a `BlobHandle` — a reference to an output blob that has been declared but not yet written. Passing it as input to task 3 is what expresses the data dependency to the control plane.
+
+### `SharedBlob` example
+
+The `SharedBlob` sample shows a complementary pattern: a single blob uploaded once and reused as input across multiple tasks. This avoids re-uploading the same data and lets several tasks read the same value concurrently.
+
+```java
+var sharedBlob = session.createBlob(InputBlobDefinition.from("2".getBytes(UTF_8)));
+
+session.submitTask(new TaskDefinition()
+    .withInput("num1", sharedBlob)
+    .withInput("num2", InputBlobDefinition.from("2".getBytes(UTF_8)))
+    .withOutput("result"));
+
+session.submitTask(new TaskDefinition()
+    .withInput("num1", sharedBlob)
+    .withInput("num2", InputBlobDefinition.from("3".getBytes(UTF_8)))
+    .withOutput("result"));
+```
+
+### Dynamic graphs from workers
+
+Task graphs are not limited to what the client defines upfront. A worker can itself call `context.submitTask(...)`, passing one of its own outputs as the input of a new child task. This enables workflows whose shape is determined at runtime — for example, a recursive divide-and-conquer where each task spawns sub-tasks only after inspecting its input.
+
+See [Worker — Subtask submission](2.worker.md#taskcontext--advanced-features) for the worker-side API.
+
+
+## Result Retrieval
+
+Output blobs are surfaced through the `BlobCompletionListener` registered on the session. When any output blob transitions to `COMPLETED` or `ABORTED`, the listener is called asynchronously:
+
+```java
+public class SimpleBlobListener implements BlobCompletionListener {
+    @Override
+    public void onBlobSuccess(Blob blob) {
+        System.out.println("Data: " + new String(blob.data(), UTF_8));
+    }
+
+    @Override
+    public void onBlobError(BlobError error) {
+        System.out.println("Error: " + error.cause().getMessage());
+    }
+}
+```
+
+In a graph workflow, each leaf output blob fires a `onBlobSuccess` callback independently as soon as the task that produces it finishes, even if other branches of the graph are still running.