google · copybara-service · May 17, 2026 · May 17, 2026
diff --git a/docs/conf.py b/docs/conf.py
@@ -43,7 +43,7 @@
     '_build',
     'Thumbs.db',
     '.DS_Store',
-    'tutorials/dataset_basic_tutorial.md',
+    '**/*_tutorial.md',  # ipynb files will be used instead.
 ]
 
 # Suppress warning in exception basic_data_tutorial
@@ -116,6 +116,8 @@
     'tutorials/data_sources/bagz_data_source_tutorial.ipynb',
     'tutorials/data_sources/huggingface_dataset_tutorial.ipynb',
     'tutorials/data_sources/pytorch_dataset_tutorial.ipynb',
+    'tutorials/performance_debugging.ipynb',
+    'dataset/performance_debugging.ipynb',
 ]
 
 

diff --git a/docs/grain.data_loader.rst b/docs/grain.data_loader.rst
@@ -1,5 +1,5 @@
 ``grain`` DataLoader
-=================
+====================
 
 .. automodule:: grain._src.python.data_loader
 .. currentmodule:: grain

diff --git a/docs/index.md b/docs/index.md
@@ -44,6 +44,7 @@ not depend on TensorFlow.
 :maxdepth: 1
 :hidden:
 :caption: Get started
+overview
 installation
 api_choice
 ```

diff --git a/docs/tutorials/data_loader_tutorial.md b/docs/tutorials/data_loader_tutorial.md
@@ -11,6 +11,8 @@ kernelspec:
   name: python3
 ---
 
+
+
 +++ {"id": "qGiXX-sg4l9o"}
 
 # `DataLoader` guide
@@ -96,7 +98,7 @@ index_sampler = grain.IndexSampler(
 ## Data source
 A data source is responsible for reading indvidual records from underlying files / storage system. We provide the following data sources:
 
-*   `ArrayRecordDataSource`: reads records from [ArrayRecord](go/array-record-design) files.
+*   `ArrayRecordDataSource`: reads records from [ArrayRecord](https://github.com/google/array_record) files.
 *   `tfds.data_source`: data source for [TFDS](https://www.tensorflow.org/datasets) datasets without a TensorFlow dependency.
 
 
@@ -106,7 +108,7 @@ Below, we show an example using a TFDS data source, but using other data sources
 
 ## TFDS Data source
 
-```{code-cell}
+``` {code-cell}
 ---
 executionInfo:
   elapsed: 38785

diff --git a/grain/_src/python/dataset/dataset.py b/grain/_src/python/dataset/dataset.py
@@ -223,9 +223,10 @@ def range(
 
     Input arguments are interpreted the same way as in Python built-in
     ``range``:
-      - ``range(n)`` => start=0, stop=n, step=1
-      - ``range(m, n)`` => start=m, stop=n, step=1
-      - ``range(m, n, p)`` => start=m, stop=n, step=p
+
+    - ``range(n)`` => start=0, stop=n, step=1
+    - ``range(m, n)`` => start=m, stop=n, step=1
+    - ``range(m, n, p)`` => start=m, stop=n, step=p
 
     The produced values are consistent with the built-in `range` function::
 
@@ -572,8 +573,9 @@ def seed(self, seed: int) -> MapDataset[T]:
     When default seed generation is enabled by calling ``ds.seed``, every
     downstream random transformation will be automatically seeded with a unique
     seed by default. This simplifies seed management, making it easier to avoid:
-     - Having to provide a seed in multiple transformations.
-     - Accidentally reusing the same seed across transformations.
+
+    - Having to provide a seed in multiple transformations.
+    - Accidentally reusing the same seed across transformations.
 
     It is recommended to call this right after the source. ``ds.seed`` has to be
     called before any random transformations (such as ``shuffle`` or
@@ -1079,8 +1081,9 @@ def seed(self, seed: int) -> IterDataset[T]:
     When default seed generation is enabled by calling ``ds.seed``, every
     downstream random transformation will be automatically seeded with a unique
     seed by default. This simplifies seed management, making it easier to avoid:
-     - Having to provide a seed in multiple transformations.
-     - Accidentally reusing the same seed across transformations.
+
+    - Having to provide a seed in multiple transformations.
+    - Accidentally reusing the same seed across transformations.
 
     It is recommended to call this right after the source. ``ds.seed`` has to be
     called before any random transformations (such as ``random_map`` that rely

diff --git a/grain/_src/python/dataset/transformations/packing_concat_then_split.py b/grain/_src/python/dataset/transformations/packing_concat_then_split.py
@@ -601,7 +601,7 @@ class ConcatThenSplitIterDataset(dataset.IterDataset):
   packed element. Positions indicate the position within the unpacked sequence.
 
   Features can be "meta features" in which case they are never split
-  and we do not create *_positions and *_segment_ids features for them.
+  and we do not create ``*_positions`` and ``*_segment_ids`` features for them.
   """
 
   def __init__(
@@ -623,8 +623,8 @@ def __init__(
       meta_features: Set of feature names that are considered meta features.
         Meta features are never split and will be duplicated when other features
         of the same element are split. Otherwise, meta features are packed
-        normally (they have their own sequence length). No *_positions and
-        *_segment_ids features are created for meta features.
+        normally (they have their own sequence length). No ``*_positions`` and
+        ``*_segment_ids`` features are created for meta features.
       split_full_length_features: Whether full-length features are split, or
         they are considered packed and passed through in priority. Setting
         split_full_length_features=False is an optimization when some sequences