-
Notifications
You must be signed in to change notification settings - Fork 3.3k
API: Harden variant binary parsing against malformed input #16568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,28 +35,44 @@ static SerializedArray from(VariantMetadata metadata, byte[] bytes) { | |
| } | ||
|
|
||
| static SerializedArray from(VariantMetadata metadata, ByteBuffer value, int header) { | ||
| return from(metadata, value, header, 0); | ||
| } | ||
|
|
||
| static SerializedArray from(VariantMetadata metadata, ByteBuffer value, int header, int depth) { | ||
| Preconditions.checkArgument( | ||
| value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); | ||
| BasicType basicType = VariantUtil.basicType(header); | ||
| Preconditions.checkArgument( | ||
| basicType == BasicType.ARRAY, "Invalid array, basic type: " + basicType); | ||
| return new SerializedArray(metadata, value, header); | ||
| return new SerializedArray(metadata, value, header, depth); | ||
| } | ||
|
|
||
| private final VariantMetadata metadata; | ||
| private final ByteBuffer value; | ||
| private final int offsetSize; | ||
| private final int offsetListOffset; | ||
| private final int dataOffset; | ||
| private final int depth; | ||
| private final VariantValue[] array; | ||
|
|
||
| private SerializedArray(VariantMetadata metadata, ByteBuffer value, int header) { | ||
| private SerializedArray(VariantMetadata metadata, ByteBuffer value, int header, int depth) { | ||
| this.metadata = metadata; | ||
| this.value = value; | ||
| this.depth = depth; | ||
| this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); | ||
| int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; | ||
| Preconditions.checkArgument( | ||
| value.remaining() >= HEADER_SIZE + numElementsSize, | ||
| "Invalid variant array: buffer too small for element count field"); | ||
| int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); | ||
| Preconditions.checkArgument( | ||
| numElements >= 0, "Invalid variant array: negative element count %s", numElements); | ||
| this.offsetListOffset = HEADER_SIZE + numElementsSize; | ||
| long offsetTableEnd = (long) offsetListOffset + ((long) numElements + 1L) * offsetSize; | ||
| Preconditions.checkArgument( | ||
| offsetTableEnd <= value.remaining(), | ||
| "Invalid variant array: element count %s exceeds buffer", | ||
| numElements); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've just realised that this and the parquet-java hardening don't worry about leftover data. "don't do that" is implicit the policy there, being as it is useless. I wonder what the rust parquet reader does. |
||
| this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); | ||
| this.array = new VariantValue[numElements]; | ||
| } | ||
|
|
@@ -75,8 +91,16 @@ public VariantValue get(int index) { | |
| int next = | ||
| VariantUtil.readLittleEndianUnsigned( | ||
| value, offsetListOffset + (offsetSize * (1 + index)), offsetSize); | ||
| long dataLen = value.remaining() - (long) dataOffset; | ||
| Preconditions.checkArgument( | ||
| offset >= 0 && next >= offset && next <= dataLen, | ||
| "Invalid variant array: offset range [%s, %s] out of data region [0, %s]", | ||
| offset, | ||
| next, | ||
| dataLen); | ||
| array[index] = | ||
| VariantValue.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); | ||
| VariantUtil.fromBuffer( | ||
| metadata, VariantUtil.slice(value, dataOffset + offset, next - offset), depth + 1); | ||
| } | ||
| return array[index]; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,8 +31,37 @@ class VariantUtil { | |
| private static final int BASIC_TYPE_OBJECT = 2; | ||
| private static final int BASIC_TYPE_ARRAY = 3; | ||
|
|
||
| /** | ||
| * Maximum permitted nesting depth of a Variant value. The top-level value is depth 0, so a | ||
| * Variant may contain up to {@code MAX_VARIANT_DEPTH} nested levels. | ||
| */ | ||
| static final int MAX_VARIANT_DEPTH = 500; | ||
|
|
||
| private VariantUtil() {} | ||
|
|
||
| static VariantValue fromBuffer(VariantMetadata metadata, ByteBuffer value, int depth) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a javadoc, mention that the function includes validation, including depth limits |
||
| Preconditions.checkArgument( | ||
| depth <= MAX_VARIANT_DEPTH, | ||
| "Invalid variant: nesting depth %s exceeds maximum %s", | ||
| depth, | ||
| MAX_VARIANT_DEPTH); | ||
| Preconditions.checkArgument(value.remaining() >= 1, "Invalid variant: empty value buffer"); | ||
| int header = readByte(value, 0); | ||
| BasicType basicType = basicType(header); | ||
| switch (basicType) { | ||
| case PRIMITIVE: | ||
| return SerializedPrimitive.from(value, header); | ||
| case SHORT_STRING: | ||
| return SerializedShortString.from(value, header); | ||
| case OBJECT: | ||
| return SerializedObject.from(metadata, value, header, depth); | ||
| case ARRAY: | ||
| return SerializedArray.from(metadata, value, header, depth); | ||
| } | ||
|
|
||
| throw new UnsupportedOperationException("Unsupported basic type: " + basicType); | ||
| } | ||
|
|
||
| /** A hacky absolute put for ByteBuffer */ | ||
| static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) { | ||
| int originalPosition = buffer.position(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could this be private?