Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/build-and-test-device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,11 @@ jobs:
- name: Install build dependencies
if: matrix.config.label == 'with-cuda'
run: |
# Install minimal CUDA packages needed for building (avoid full cuda-toolkit
# which includes nsight-systems and other large tools that exhaust disk space)
sudo apt-get update && \
sudo apt-get install -y cmake build-essential cuda-toolkit tzdata
sudo apt-get install -y cmake build-essential tzdata \
cuda-nvcc-13-2 cuda-cudart-dev-13-2 libcublas-dev-13-2

# Install newer cmake for building Arrow C++
pip install cmake
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/build-and-test-ipc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,8 @@ jobs:
with:
name: nanoarrow-ipc-memcheck
path: build/Testing/Temporary/MemoryChecker.*.log

- name: Run integration test validation for arrow-testing files
if: matrix.config.label == 'default-build'
run: |
./ci/scripts/run-ipc-integration-tests.sh build
164 changes: 164 additions & 0 deletions ci/scripts/run-ipc-integration-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# This script runs the nanoarrow_ipc_integration VALIDATE command against
# all test files from the arrow-testing repository's integration directory.
#
# Usage:
# export NANOARROW_ARROW_TESTING_DIR=/path/to/arrow-testing
# ./ci/scripts/run-ipc-integration-tests.sh [build_dir]
#
# Arguments:
# build_dir: Optional path to the build directory containing
# nanoarrow_ipc_integration. Defaults to "build".

REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
BUILD_DIR="${1:-${REPO_ROOT}/build}"

if [ -z "${NANOARROW_ARROW_TESTING_DIR}" ]; then
echo "Error: NANOARROW_ARROW_TESTING_DIR environment variable not set"
echo "Please set it to the path of a checkout of apache/arrow-testing"
exit 1
fi

if [ ! -d "${NANOARROW_ARROW_TESTING_DIR}" ]; then
echo "Error: NANOARROW_ARROW_TESTING_DIR does not exist: ${NANOARROW_ARROW_TESTING_DIR}"
exit 1
fi

INTEGRATION_BIN="${BUILD_DIR}/nanoarrow_ipc_integration"
if [ ! -x "${INTEGRATION_BIN}" ]; then
echo "Error: nanoarrow_ipc_integration not found at ${INTEGRATION_BIN}"
echo "Please build the project first or specify the build directory as an argument"
exit 1
fi

DATA_DIR="${NANOARROW_ARROW_TESTING_DIR}/data/arrow-ipc-stream/integration"

# Create a temp directory for decompressed JSON files
TEMP_DIR=$(mktemp -d)
trap "rm -rf ${TEMP_DIR}" EXIT

# Track results
PASSED=0
FAILED=0
SKIPPED=0

# Known files that are expected to be skipped (unsupported types)
SKIP_PATTERNS=(
"generated_list_view" # ListView not supported
"generated_binary_view" # BinaryView not supported
"generated_run_end_encoded" # REE not supported
)

# Function to check if a file should be skipped
should_skip() {
local basename="$1"
for pattern in "${SKIP_PATTERNS[@]}"; do
if [[ "${basename}" == *"${pattern}"* ]]; then
return 0
fi
done
return 1
}

# Function to run VALIDATE for a given test file
run_validate() {
local subdir="$1"
local basename="$2"

local arrow_file="${DATA_DIR}/${subdir}/${basename}.arrow_file"
local json_gz="${DATA_DIR}/${subdir}/${basename}.json.gz"
local json_file="${TEMP_DIR}/${subdir}_${basename}.json"

# We require .arrow_file format (with ARROW1 magic and footer)
if [ ! -f "${arrow_file}" ]; then
return 2 # Skip - no arrow file
fi

# Check if JSON exists (possibly gzipped)
if [ -f "${json_gz}" ]; then
gunzip -c "${json_gz}" > "${json_file}"
elif [ -f "${DATA_DIR}/${subdir}/${basename}.json" ]; then
json_file="${DATA_DIR}/${subdir}/${basename}.json"
else
return 2 # Skip - no JSON file
fi

if COMMAND=VALIDATE ARROW_PATH="${arrow_file}" JSON_PATH="${json_file}" "${INTEGRATION_BIN}" > /dev/null 2>&1; then
return 0 # Pass
else
return 1 # Fail
fi
}

echo "=== Running IPC Integration Tests ==="
echo "Using arrow-testing at: ${NANOARROW_ARROW_TESTING_DIR}"
echo "Using integration binary at: ${INTEGRATION_BIN}"
echo ""

# Find all subdirectories in the integration directory
for subdir_path in "${DATA_DIR}"/*/; do
[ -d "${subdir_path}" ] || continue
subdir=$(basename "${subdir_path}")

# Skip versions before 1.0.0
if [[ "${subdir}" == 0.* ]]; then
continue
fi

echo "=== Testing ${subdir} ==="

# Find all unique basenames (from .arrow_file files)
for arrow_file in "${subdir_path}"*.arrow_file; do
[ -f "${arrow_file}" ] || continue

basename=$(basename "${arrow_file}" .arrow_file)

# Check if this file should be skipped
if should_skip "${basename}"; then
((SKIPPED++))
continue
fi

run_validate "${subdir}" "${basename}"
result=$?

if [ $result -eq 0 ]; then
echo " PASS: ${basename}"
((PASSED++))
elif [ $result -eq 2 ]; then
echo " SKIP: ${basename} (missing files)"
((SKIPPED++))
else
echo " FAIL: ${basename}"
((FAILED++))
fi
done
echo ""
done

echo "=== Summary ==="
echo "Passed: ${PASSED}"
echo "Failed: ${FAILED}"
echo "Skipped: ${SKIPPED}"

if [ ${FAILED} -gt 0 ]; then
exit 1
fi
9 changes: 5 additions & 4 deletions examples/cmake-scenarios/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ WIN_DLL_NANOARROW_INSTALLED="$(pwd)/scratch/nanoarrow_install/bin"
# The mismatched_shared_libs test is static-only, so no DLL path needed
for dir in scratch/build*; do
# Special cases where we have to set PATH on Windows
if [ "${dir}" = "scratch/build_against_fetched_shared" ] && [ "${OSTYPE}" = "msys" ]; then
# OSTYPE can be "msys" (Git Bash) or "cygwin" (Cygwin/GitHub Actions)
if [ "${dir}" = "scratch/build_against_fetched_shared" ] && [[ "${OSTYPE}" == msys* || "${OSTYPE}" == cygwin* ]]; then
PATH="${PATH}:${WIN_DLL_NANOARROW_FETCHED}" ./${dir}/Debug/minimal_cpp_app
elif [ "${dir}" = "scratch/build_shared" ] && [ "${OSTYPE}" = "msys" ]; then
elif [ "${dir}" = "scratch/build_shared" ] && [[ "${OSTYPE}" == msys* || "${OSTYPE}" == cygwin* ]]; then
PATH="${PATH}:${WIN_DLL_NANOARROW_BUILT}" ./${dir}/Debug/minimal_cpp_app
elif [ "${dir}" = "scratch/build_against_install_shared" ] && [ "${OSTYPE}" = "msys" ]; then
elif [ "${dir}" = "scratch/build_against_install_shared" ] && [[ "${OSTYPE}" == msys* || "${OSTYPE}" == cygwin* ]]; then
PATH="${PATH}:${WIN_DLL_NANOARROW_INSTALLED}" ./${dir}/Debug/minimal_cpp_app
elif [ "${OSTYPE}" = "msys" ]; then
elif [[ "${OSTYPE}" == msys* || "${OSTYPE}" == cygwin* ]]; then
./${dir}/Debug/minimal_cpp_app
else
./${dir}/minimal_cpp_app
Expand Down
41 changes: 36 additions & 5 deletions src/nanoarrow/integration/ipc_integration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,45 @@ struct MaterializedArrayStream {

NANOARROW_RETURN_NOT_OK_WITH_ERROR(
ArrowSchemaDeepCopy(&decoder->footer->schema, schema.get()), error);
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderSetSchema(decoder.get(), &decoder->footer->schema, error));
NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetSchemaWithDictionaries(
decoder.get(), &decoder->footer->schema, &decoder->footer->dictionaries, error));
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
ArrowIpcDecoderSetEndianness(decoder.get(), decoder->endianness), error);

// Initialize dictionaries storage
nanoarrow::ipc::UniqueDictionaries dictionaries;
NANOARROW_RETURN_NOT_OK(ArrowIpcDictionariesInit(
dictionaries.get(), &decoder->footer->dictionaries, error));

// Move both block buffers out of the footer BEFORE decoding any headers,
// because ArrowIpcDecoderDecodeHeader resets the footer
nanoarrow::UniqueBuffer dictionary_blocks;
nanoarrow::UniqueBuffer record_batch_blocks;
ArrowBufferMove(&decoder->footer->dictionary_blocks, dictionary_blocks.get());
ArrowBufferMove(&decoder->footer->record_batch_blocks, record_batch_blocks.get());

// Read dictionary blocks
for (int i = 0; i < dictionary_blocks->size_bytes / sizeof(struct ArrowIpcFileBlock);
i++) {
const auto& block =
reinterpret_cast<struct ArrowIpcFileBlock*>(dictionary_blocks->data)[i];
struct ArrowBufferView metadata_view = {
{bytes.data() + block.offset},
block.metadata_length,
};
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderDecodeHeader(decoder.get(), metadata_view, error));

struct ArrowBufferView body_view = {
{metadata_view.data.as_uint8 + metadata_view.size_bytes},
block.body_length,
};
NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeDictionary(
decoder.get(), body_view, NANOARROW_VALIDATION_LEVEL_FULL, dictionaries.get(),
error));
}

// Read record batch blocks
for (int i = 0;
i < record_batch_blocks->size_bytes / sizeof(struct ArrowIpcFileBlock); i++) {
const auto& block =
Expand All @@ -219,9 +250,9 @@ struct MaterializedArrayStream {
block.body_length,
};
nanoarrow::UniqueArray batch;
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderDecodeArray(decoder.get(), body_view, -1, batch.get(),
NANOARROW_VALIDATION_LEVEL_FULL, error));
NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeArrayWithDictionaries(
decoder.get(), body_view, -1, dictionaries.get(), batch.get(),
NANOARROW_VALIDATION_LEVEL_FULL, error));
batches.push_back(std::move(batch));
}

Expand Down
26 changes: 26 additions & 0 deletions src/nanoarrow/ipc/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -1225,10 +1225,16 @@ static int ArrowIpcDecoderSetType(struct ArrowSchema* schema, ns(Field_table_t)
case ns(Type_FixedSizeBinary):
return ArrowIpcDecoderSetTypeFixedSizeBinary(schema, ns(Field_type_get(field)),
error);
case ns(Type_BinaryView):
ArrowErrorSet(error, "BinaryView not yet supported in IPC reader");
return ENOTSUP;
case ns(Type_Utf8):
return ArrowIpcDecoderSetTypeSimple(schema, NANOARROW_TYPE_STRING, error);
case ns(Type_LargeUtf8):
return ArrowIpcDecoderSetTypeSimple(schema, NANOARROW_TYPE_LARGE_STRING, error);
case ns(Type_Utf8View):
ArrowErrorSet(error, "Utf8View not yet supported in IPC reader");
return ENOTSUP;
case ns(Type_Date):
return ArrowIpcDecoderSetTypeDate(schema, ns(Field_type_get(field)), error);
case ns(Type_Time):
Expand All @@ -1248,11 +1254,18 @@ static int ArrowIpcDecoderSetType(struct ArrowSchema* schema, ns(Field_table_t)
case ns(Type_FixedSizeList):
return ArrowIpcDecoderSetTypeFixedSizeList(schema, ns(Field_type_get(field)),
error);
case ns(Type_ListView):
case ns(Type_LargeListView):
ArrowErrorSet(error, "ListView/LargeListView not yet supported in IPC reader");
return ENOTSUP;
case ns(Type_Map):
return ArrowIpcDecoderSetTypeMap(schema, ns(Field_type_get(field)), error);
case ns(Type_Union):
return ArrowIpcDecoderSetTypeUnion(schema, ns(Field_type_get(field)), n_children,
error);
case ns(Type_RunEndEncoded):
ArrowErrorSet(error, "RunEndEncoded not yet supported in IPC reader");
return ENOTSUP;
default:
ArrowErrorSet(error, "Unrecognized Field type with value %d", type_type);
return EINVAL;
Expand Down Expand Up @@ -1885,6 +1898,19 @@ ArrowErrorCode ArrowIpcDecoderDecodeFooter(struct ArrowIpcDecoder* decoder,
record_batches[i].body_length = ns(Block_bodyLength(blocks + i));
}

blocks = ns(Footer_dictionaries(footer));
n = ns(Block_vec_len(blocks));
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(&private_data->footer.dictionary_blocks,
sizeof(struct ArrowIpcFileBlock) * n,
/*shrink_to_fit=*/0));
struct ArrowIpcFileBlock* dictionaries =
(struct ArrowIpcFileBlock*)private_data->footer.dictionary_blocks.data;
for (int64_t i = 0; i < n; i++) {
dictionaries[i].offset = ns(Block_offset(blocks + i));
dictionaries[i].metadata_length = ns(Block_metaDataLength(blocks + i));
dictionaries[i].body_length = ns(Block_bodyLength(blocks + i));
}

decoder->footer = &private_data->footer;
return NANOARROW_OK;
}
Expand Down
2 changes: 2 additions & 0 deletions src/nanoarrow/ipc/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ ArrowErrorCode ArrowIpcEncoderEncodeSimpleRecordBatch(
void ArrowIpcFooterInit(struct ArrowIpcFooter* footer) {
footer->schema.release = NULL;
ArrowBufferInit(&footer->record_batch_blocks);
ArrowBufferInit(&footer->dictionary_blocks);
ArrowIpcDictionaryEncodingsInit(&footer->dictionaries);
}

Expand All @@ -637,6 +638,7 @@ void ArrowIpcFooterReset(struct ArrowIpcFooter* footer) {
ArrowSchemaRelease(&footer->schema);
}
ArrowBufferReset(&footer->record_batch_blocks);
ArrowBufferReset(&footer->dictionary_blocks);
ArrowIpcDictionaryEncodingsReset(&footer->dictionaries);
}

Expand Down
Loading
Loading