-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
80 lines (67 loc) · 2.45 KB
/
run.py
File metadata and controls
80 lines (67 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# /// script
# requires-python = ">=3.10,<3.13"
# dependencies = [
# "docling[vlm]>=2.64.0",
# ]
# ///
import logging
import time
# Enable debug logging for the MLX model to see tokens/sec per page
logging.basicConfig(level=logging.WARNING)
logging.getLogger("docling.models.vlm_models_inline.mlx_model").setLevel(logging.DEBUG)
from docling.datamodel import vlm_model_specs
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import AcceleratorOptions, VlmPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
# Download PDF first so network time isn't included in benchmark
import urllib.request, tempfile, os
pdf_url = "https://arxiv.org/pdf/2501.17887"
pdf_path = os.path.join(tempfile.gettempdir(), "benchmark.pdf")
if not os.path.exists(pdf_path):
urllib.request.urlretrieve(pdf_url, pdf_path)
source = pdf_path
# --- Phase 1: Setup (model loading, converter init) ---
t_setup_start = time.time()
vlm_options = vlm_model_specs.GRANITEDOCLING_MLX.model_copy()
vlm_options.track_generated_tokens = True
pipeline_options = VlmPipelineOptions(
vlm_options=vlm_options,
generate_page_images=True,
generate_picture_images=False,
)
converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(
pipeline_cls=VlmPipeline,
pipeline_options=pipeline_options,
),
}
)
t_setup = time.time() - t_setup_start
# --- Phase 2: Conversion ---
t_convert_start = time.time()
result = converter.convert(source=source)
t_convert = time.time() - t_convert_start
doc = result.document
num_pages = len(result.pages)
print("=" * 50)
print("BENCHMARK RESULTS")
print("=" * 50)
print(f"Source: {source}")
print(f"Pages: {num_pages}")
print(f"Setup time: {t_setup:.2f}s")
print(f"Conversion time: {t_convert:.2f}s")
print(f"Total time: {t_setup + t_convert:.2f}s")
if num_pages > 0:
print(f"Pages/sec: {num_pages / t_convert:.2f}")
print(f"Sec/page: {t_convert / num_pages:.2f}")
# --- Per-page VLM details ---
print(f"\n{'─' * 50}")
print("PER-PAGE VLM DETAILS")
print(f"{'─' * 50}")
for p in result.pages:
vlm = p.predictions.vlm_response
if vlm:
tps = vlm.num_tokens / vlm.generation_time if vlm.generation_time > 0 else 0
print(f" Page {p.page_no}: {vlm.num_tokens} tokens in {vlm.generation_time:.2f}s ({tps:.1f} tok/s)")