Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
13a47a1
decode view batch
paleolimbot Apr 5, 2026
e17d7cb
sketch refcounted arrays
paleolimbot Apr 5, 2026
6c5fb39
decode array works
paleolimbot Apr 5, 2026
06c2c60
remove clone hack
paleolimbot Apr 5, 2026
0e69e58
add note about copying
paleolimbot Apr 5, 2026
2d4796f
revert change
paleolimbot Apr 7, 2026
8fca3fe
aligned dictionary batch
paleolimbot Apr 7, 2026
8cae5be
add header docs and shared init function
paleolimbot Apr 7, 2026
85439ca
start dictionaries with a zero size dictionary
paleolimbot Apr 7, 2026
c9fcd6e
skip the two unconstructible test cases
paleolimbot Apr 7, 2026
1da9798
mostly there
paleolimbot Apr 7, 2026
70cce65
maybe actual failures
paleolimbot Apr 7, 2026
09ad0e4
fewer failures
paleolimbot Apr 7, 2026
51be30e
comment
paleolimbot Apr 7, 2026
12e9ac8
unregister uuid
paleolimbot Apr 7, 2026
1d526f3
check extension type with dictionary storage
paleolimbot Apr 7, 2026
5e4f273
hack to make the integration tests pass
paleolimbot Apr 10, 2026
40fbdbd
maybe fix namespace build and field metadata for dictionaries
paleolimbot Apr 12, 2026
29c0414
add definition for non-shared decoding
paleolimbot Apr 13, 2026
89c3d20
with internals
paleolimbot Apr 13, 2026
c05a471
implement support in the reader
paleolimbot Apr 13, 2026
400f5cf
simpler test usage
paleolimbot Apr 13, 2026
7a1b5f2
whoops
paleolimbot Apr 13, 2026
a51fc05
fix reader cleanup
paleolimbot Apr 17, 2026
67a534c
add reader tests
paleolimbot Apr 17, 2026
87798ed
implement dictionary index validation
paleolimbot Apr 17, 2026
282727e
dev
paleolimbot Apr 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/nanoarrow/common/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -1513,10 +1513,26 @@ static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], error));
}

// Dictionary validation not implemented
// Dictionary index validation
if (array_view->dictionary != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->dictionary, error));
// TODO: validate the indices

// Validate that all non-null indices are within the dictionary bounds
int64_t dictionary_length = array_view->dictionary->length;
for (int64_t i = 0; i < array_view->length; i++) {
if (ArrowArrayViewIsNull(array_view, i)) {
continue;
}

int64_t index = ArrowArrayViewGetIntUnsafe(array_view, i);
if (index < 0 || index >= dictionary_length) {
ArrowErrorSet(error,
"[%" PRId64 "] Expected dictionary index >= 0 and < %" PRId64
" but found value %" PRId64,
i, dictionary_length, index);
return EINVAL;
}
}
}

return NANOARROW_OK;
Expand Down
54 changes: 54 additions & 0 deletions src/nanoarrow/common/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,60 @@ TEST(ArrayTest, ArrayTestAllocateDictionary) {
ArrowArrayRelease(&array);
}

TEST(ArrayTest, ArrayTestValidateDictionaryIndices) {
struct ArrowArray array;
struct ArrowSchema schema;
struct ArrowArrayView array_view;
struct ArrowError error;

// Create a schema for dictionary-encoded int32 with string dictionary
ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_INT32), NANOARROW_OK);
ASSERT_EQ(ArrowSchemaAllocateDictionary(&schema), NANOARROW_OK);
ASSERT_EQ(ArrowSchemaInitFromType(schema.dictionary, NANOARROW_TYPE_STRING),
NANOARROW_OK);

// Initialize array_view from schema
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), NANOARROW_OK);

// Create a dictionary-encoded int32 array with a string dictionary
ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, &error), NANOARROW_OK);

// Build the array with dictionary values: ["zero", "one"] and indices [0, 1, 0]
ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendString(array.dictionary, "zero"_asv), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendString(array.dictionary, "one"_asv), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendInt(&array, 0), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
ASSERT_EQ(ArrowArrayAppendInt(&array, 0), NANOARROW_OK);
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);

// Valid indices should pass validation
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);

// Now modify index to be out of bounds (index 2 when dictionary has length 2)
int32_t* indices = reinterpret_cast<int32_t*>(ArrowArrayBuffer(&array, 1)->data);
indices[1] = 2; // Out of bounds (valid range is 0-1)
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
EINVAL);
EXPECT_STREQ(error.message,
"[1] Expected dictionary index >= 0 and < 2 but found value 2");

// Test negative index
indices[1] = -1;
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view, NANOARROW_VALIDATION_LEVEL_FULL, &error),
EINVAL);
EXPECT_STREQ(error.message,
"[1] Expected dictionary index >= 0 and < 2 but found value -1");

ArrowArrayViewReset(&array_view);
ArrowSchemaRelease(&schema);
ArrowArrayRelease(&array);
}

TEST(ArrayTest, ArrayTestInitFromSchema) {
struct ArrowArray array;
struct ArrowSchema schema;
Expand Down
Loading
Loading