Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7b2bdb9
Transformers.js models working in d8; WIP other shells, another model
danleh Feb 10, 2025
6d48daa
fix paths
danleh Aug 14, 2025
8bab51e
both distilbert and Whisper running in d8
danleh Feb 11, 2025
e1fc06b
cleanup dependencies, TODOs
danleh Feb 11, 2025
25bb946
make it compatible with all shells
danleh Feb 11, 2025
9ae0fb4
reuse same TextEncoder/Decoder polyfill as other wasm line item
danleh Feb 12, 2025
18b6918
WIP adapt to runner changes, new preloading code
danleh Aug 18, 2025
0e86613
Merge branch 'main' into transformersjs-rebase
danleh Aug 18, 2025
06610f5
fix both tasks, use preloading from runner/JetStream global
danleh Aug 18, 2025
abec374
fix accidentally committed dir
danleh Aug 18, 2025
1c425af
fix browser fetch with blob preloading, fix print calls
danleh Aug 19, 2025
8f14f8f
print is no longer used
danleh Aug 20, 2025
7aa469a
shorten audio clip to speed up whisper task
danleh Aug 20, 2025
f8147cd
shorten iterations for bert task
danleh Aug 20, 2025
1d122da
add output validation
danleh Aug 20, 2025
c9c67c0
polyfill performance.timeOrigin for transformersjs
danleh Aug 20, 2025
71e1ff0
update transformersjs dependency to latest
danleh Aug 20, 2025
f3bcfb4
cleanup, rebuild
danleh Aug 20, 2025
fb0ab2b
fix module resolution with blob URL/preload
danleh Aug 20, 2025
65b7e97
add tag to transformersjs workloads
danleh Sep 8, 2025
28b203c
Merge branch 'main' into transformersjs-rebase
danleh Sep 9, 2025
5b5e6c5
fix duplicate TextDecoder polyfill
danleh Sep 9, 2025
6b52dad
fix new allowUtf16 test
danleh Sep 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions JetStreamDriver.js
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,7 @@ class Scripts {
this.add(`
performance.mark ??= function(name) { return { name }};
performance.measure ??= function() {};
performance.timeOrigin ??= performance.now();
`);
}

Expand Down Expand Up @@ -2205,6 +2206,56 @@ let BENCHMARKS = [
worstCaseCount: 2,
tags: ["Default", "Wasm"],
}),
new AsyncBenchmark({
name: "transformersjs-bert-wasm",
files: [
"./polyfills/fast-text-encoding/1.0.3/text.js",
"./transformersjs/benchmark.js",
"./transformersjs/task-bert.js",
],
preload: {
transformersJsModule: "./transformersjs/build/transformers.js",

onnxJsModule: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.mjs",
onnxWasmBinary: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.wasm",

modelWeights: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/onnx/model_uint8.onnx",
modelConfig: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/config.json",
modelTokenizer: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/tokenizer.json",
modelTokenizerConfig: "./transformersjs/build/models/Xenova/distilbert-base-uncased-finetuned-sst-2-english/tokenizer_config.json",
},
iterations: 30,
allowUtf16: true,
tags: ["Default", "Wasm", "transformersjs"],
}),
new AsyncBenchmark({
name: "transformersjs-whisper-wasm",
files: [
"./polyfills/fast-text-encoding/1.0.3/text.js",
"./transformersjs/benchmark.js",
"./transformersjs/task-whisper.js",
],
preload: {
transformersJsModule: "./transformersjs/build/transformers.js",

onnxJsModule: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.mjs",
onnxWasmBinary: "./transformersjs/build/onnxruntime-web/ort-wasm-simd-threaded.wasm",

modelEncoderWeights: "./transformersjs/build/models/Xenova/whisper-tiny.en/onnx/encoder_model_quantized.onnx",
modelDecoderWeights: "./transformersjs/build/models/Xenova/whisper-tiny.en/onnx/decoder_model_merged_quantized.onnx",
modelConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/config.json",
modelTokenizer: "./transformersjs/build/models/Xenova/whisper-tiny.en/tokenizer.json",
modelTokenizerConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/tokenizer_config.json",
modelPreprocessorConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/preprocessor_config.json",
modelGenerationConfig: "./transformersjs/build/models/Xenova/whisper-tiny.en/generation_config.json",

inputFile: "./transformersjs/build/inputs/jfk.raw",
},
iterations: 5,
worstCaseCount: 1,
allowUtf16: true,
tags: ["Default", "Wasm", "transformersjs"],
}),
new WasmLegacyBenchmark({
name: "tfjs-wasm",
files: [
Expand Down
2 changes: 2 additions & 0 deletions transformersjs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/util/node_modules/
/util/package-lock.json
12 changes: 12 additions & 0 deletions transformersjs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
- Two tasks: one text/NLP, one audio processing/speech-to-text.
- Everything in `build/` is generated or an upstream library.
- Everything in `util/` is tooling for building and preparing the benchmark.

# Licenses

- Transformers.js: Apache 2.0, https://github.com/huggingface/transformers.js/blob/main/LICENSE
- ONNX runtime: MIT, https://github.com/microsoft/onnxruntime/blob/main/LICENSE
- `text-encoding` Polyfill: Unlicense OR Apache 2.0, https://github.com/inexorabletash/text-encoding/blob/master/LICENSE.md
- Model `DistilBERT base uncased finetuned SST-2`: Apache 2.0, https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english
- Model `openai/whisper-tiny.en`: Apache 2.0, https://huggingface.co/openai/whisper-tiny.en
- Audio file for speech-to-text task: Public domain, https://www.jfklibrary.org/learn/about-jfk/historic-speeches/inaugural-address
116 changes: 116 additions & 0 deletions transformersjs/benchmark.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2025 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Polyfills that Transformers.js / the ONNX runtime needs in JavaScript shells.

class URL {
href;
constructor(url, base) {
// DEBUG
// console.log('URL', url, base);
this.href = url;
}
}
globalThis.URL = URL;

// Polyfill fetch for shell-compatibility and to cache / preload model weights etc.
let preload = { /* Initialized in init() below due to async. */ };
const originalFetch = globalThis.fetch ?? function(url) {
throw new Error("no fetch available");
}
globalThis.fetch = async function(url) {
// DEBUG
// console.log('fetch', url);

// Redirect some paths to cached/preloaded resources.
if (preload[url]) {
return {
ok: true,
status: 200,
arrayBuffer() { return preload[url]; },
async blob() {
return {
size: preload[url].byteLength,
async arrayBuffer() { return preload[url]; }
}
},
};
}

// This should only be called in the browser, where fetch() is available.
return originalFetch(url);
};

// JetStream benchmark harness. Reuse for two different Transformers.js tasks.
// Assumes `preloadFiles(module)`, `initPipeline(pipelineFromTransformersJs)`,
// and `doTask(initializedPipeline, inputArrayBuffer)` is in the global scope.

class Benchmark {
transformersJsModule;
wasmBinary;
pipeline;
inputFile;
output;

async init() {
this.transformersJsModule = await JetStream.dynamicImport(JetStream.preload.transformersJsModule);
this.wasmBinary = await JetStream.getBinary(JetStream.preload.onnxWasmBinary);

for (const url of Object.values(JetStream.preload)) {
preload[url] = await JetStream.getBinary(url);
}

if ('inputFile' in JetStream.preload) {
this.inputFile = (await JetStream.getBinary(JetStream.preload.inputFile)).buffer;
// DEBUG
// console.log('inputFile', this.inputFile.byteLength, 'bytes');
}
}

async runIteration() {
// Initialize the inference pipeline in the first iteration.
if (!this.pipeline) {
// TODO: Profile startup only: What is taking so much time here?
let { env, pipeline } = this.transformersJsModule;

env.allowRemoteModels = false;
env.allowLocalModels = true;
env.localModelPath = './transformersjs/build/models/';

// Always select the Wasm backend, nothing else.
delete env.backends.onnx.webgl;
delete env.backends.onnx.webgpu;

// Single-threaded only for now, since we cannot spawn workers in shells.
// TODO: Implement sufficiently powerful workers in shells (or provide
// polyfills).
env.backends.onnx.wasm.numThreads = 1;
Comment thread
danleh marked this conversation as resolved.

// Do not specify path prefix, because this loads the JSEP build by default.
// TODO: Do we want the JSEP build because it's the default online, or the
// non-asyncified one, since it's the smaller / more performant one?
// env.backends.onnx.wasm.wasmPaths = 'build/onnxruntime-web/';
// So instead, give the ONNX runtime files directly:
env.backends.onnx.wasm.wasmPaths = {
// The ONNX runtime module is dynamically imported relative to the
// Transformers.js module above, hence strip the prefix.
// With preloading, this is an (absolute) blob URL, so the replace is a nop.
mjs: JetStream.preload.onnxJsModule.replace('./transformersjs/build/', './')
};
// Give it the wasmBinary directly instead of a path, such that the
// ONNX runtime uses asynchronous (not streaming) Wasm instantiation.
// (To keep the shell and browser results comparable, and streaming
// instantiation is not available in shells.)
env.backends.onnx.wasm.wasmBinary = this.wasmBinary;

this.pipeline = await initPipeline(pipeline);
}

this.output = await doTask(this.pipeline, this.inputFile);
}

validate() {
validate(this.output);
}
}
9 changes: 9 additions & 0 deletions transformersjs/build.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Built on 2025-08-20T13:30:51Z
Installing Node dependencies...
Download and convert audio input(s)...
Converted 4.25s of audio
from 'jfk.wav', 2 channel(s), 44100 Hz, 16 bit, 176000 samples
to 'build/inputs/jfk.raw', 1 channel(s), 16000 Hz, 32 bit float, 68000 samples, 272000 bytes
Download and run model(s)...
Copy library files into build/...
Building done
50 changes: 50 additions & 0 deletions transformersjs/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

set -euo pipefail

rm -rf build/
mkdir -p build/{models,inputs,onnxruntime-web}/

# Optional: clean all node packages as well.
rm -rf util/node_modules/

touch build.log
BUILD_LOG="$(realpath build.log)"
echo "Built on $(date -u '+%Y-%m-%dT%H:%M:%SZ')" | tee "$BUILD_LOG"

echo "Installing Node dependencies..." | tee -a "$BUILD_LOG"
pushd util/
npm install
popd

echo "Download and convert audio input(s)..." | tee -a "$BUILD_LOG"
wget https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav | tee -a "$BUILD_LOG"
# Shorten the audio file to one sentence in the middle, to speed up a single iteration.
node util/convert-audio.mjs jfk.wav build/inputs/jfk.raw 52000 120000 | tee -a "$BUILD_LOG"
rm jfk.wav

echo "Download and run model(s)..." | tee -a "$BUILD_LOG"
# This automatically places the model files in `build/models/`.
node util/test-models.mjs

echo "Copy library files into build/..." | tee -a "$BUILD_LOG"

cp util/node_modules/@huggingface/transformers/dist/transformers.js build/
git apply transformers.js.patch

# Transformers.js packages the ONNX runtime JSEP build by default, even when
# only using the Wasm backend, which would be fine with the non-JSEP build.
# JSEP uses ASYNCIFY, which isn't optimal. And it's a much larger Wasm binary.
# cp util/node_modules/@huggingface/transformers/dist/ort-wasm-simd-threaded.jsep.{mjs,wasm} build/

# There is also an ONNX runtime build in the onnxruntime-web package.
# TODO(dlehmann): Discuss with upstream Transformers.js folks, whether they can
# use the non-JSEP build if one requests the Wasm backend.
# TODO(dlehmann): Measure performance difference between the two.
cp util/node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.{mjs,wasm} build/onnxruntime-web/

# TODO: Compress model data (and maybe Wasm modules) with zstd.
# Either decompress with native APIs available in browsers or JS/Wasm polyfill?
# E.g., https://github.com/101arrowz/fzstd or https://github.com/fabiospampinato/zstandard-wasm or https://github.com/donmccurdy/zstddec-wasm

echo "Building done" | tee -a "$BUILD_LOG"
Binary file added transformersjs/build/inputs/jfk.raw
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
"activation": "gelu",
"architectures": [
"DistilBertForSequenceClassification"
],
"attention_dropout": 0.1,
"dim": 768,
"dropout": 0.1,
"finetuning_task": "sst-2",
"hidden_dim": 3072,
"id2label": {
"0": "NEGATIVE",
"1": "POSITIVE"
},
"initializer_range": 0.02,
"label2id": {
"NEGATIVE": 0,
"POSITIVE": 1
},
"max_position_embeddings": 512,
"model_type": "distilbert",
"n_heads": 12,
"n_layers": 6,
"output_past": true,
"pad_token_id": 0,
"qa_dropout": 0.1,
"seq_classif_dropout": 0.2,
"sinusoidal_pos_embds": false,
"tie_weights_": true,
"transformers_version": "4.29.2",
"vocab_size": 30522
}
Binary file not shown.
Loading
Loading