Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions parquet/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,10 @@ enum Encoding {
/// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may
/// perform poorly for large values of N.
BYTE_STREAM_SPLIT = 9;
/// Adaptive Lossless floating-Point encoding (ALP).
///
/// Currently specified for FLOAT and DOUBLE.
ALP = 10;
}
);

Expand All @@ -654,6 +658,7 @@ impl FromStr for Encoding {
"DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
"RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
"BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
"ALP" | "alp" => Ok(Encoding::ALP),
_ => Err(general_err!("unknown encoding: {}", s)),
}
}
Expand Down Expand Up @@ -791,6 +796,7 @@ fn i32_to_encoding(val: i32) -> Encoding {
7 => Encoding::DELTA_BYTE_ARRAY,
8 => Encoding::RLE_DICTIONARY,
9 => Encoding::BYTE_STREAM_SPLIT,
10 => Encoding::ALP,
_ => panic!("Impossible encoding {val}"),
}
}
Expand Down Expand Up @@ -2137,6 +2143,7 @@ mod tests {
);
assert_eq!(Encoding::DELTA_BYTE_ARRAY.to_string(), "DELTA_BYTE_ARRAY");
assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY");
assert_eq!(Encoding::ALP.to_string(), "ALP");
}

#[test]
Expand Down Expand Up @@ -2438,6 +2445,8 @@ mod tests {
assert_eq!(encoding, Encoding::RLE_DICTIONARY);
encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
encoding = "alp".parse().unwrap();
assert_eq!(encoding, Encoding::ALP);

// test lowercase
encoding = "byte_stream_split".parse().unwrap();
Expand Down Expand Up @@ -2573,6 +2582,7 @@ mod tests {
Encoding::PLAIN_DICTIONARY,
Encoding::RLE_DICTIONARY,
Encoding::BYTE_STREAM_SPLIT,
Encoding::ALP,
];
encodings_roundtrip(encodings.into());
}
Expand Down
9 changes: 8 additions & 1 deletion parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ use super::rle::RleDecoder;
use crate::basic::*;
use crate::data_type::private::ParquetValueType;
use crate::data_type::*;
use crate::encodings::decoding::alp::AlpDecoder;
use crate::encodings::decoding::byte_stream_split_decoder::{
ByteStreamSplitDecoder, VariableWidthByteStreamSplitDecoder,
};
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::bit_util::{self, BitReader};

mod alp;
mod byte_stream_split_decoder;

pub(crate) mod private {
Expand Down Expand Up @@ -63,7 +65,8 @@ pub(crate) mod private {
Encoding::RLE
| Encoding::DELTA_BINARY_PACKED
| Encoding::DELTA_BYTE_ARRAY
| Encoding::DELTA_LENGTH_BYTE_ARRAY => Err(general_err!(
| Encoding::DELTA_LENGTH_BYTE_ARRAY
| Encoding::ALP => Err(general_err!(
"Encoding {} is not supported for type",
encoding
)),
Expand Down Expand Up @@ -116,6 +119,7 @@ pub(crate) mod private {
) -> Result<Box<dyn Decoder<T>>> {
match encoding {
Encoding::BYTE_STREAM_SPLIT => Ok(Box::new(ByteStreamSplitDecoder::new())),
Encoding::ALP => Ok(Box::new(AlpDecoder::new())),
_ => get_decoder_default(descr, encoding),
}
}
Expand All @@ -127,6 +131,7 @@ pub(crate) mod private {
) -> Result<Box<dyn Decoder<T>>> {
match encoding {
Encoding::BYTE_STREAM_SPLIT => Ok(Box::new(ByteStreamSplitDecoder::new())),
Encoding::ALP => Ok(Box::new(AlpDecoder::new())),
_ => get_decoder_default(descr, encoding),
}
}
Expand Down Expand Up @@ -1135,6 +1140,8 @@ mod tests {
create_and_check_decoder::<ByteArrayType>(Encoding::DELTA_LENGTH_BYTE_ARRAY, None);
create_and_check_decoder::<ByteArrayType>(Encoding::DELTA_BYTE_ARRAY, None);
create_and_check_decoder::<BoolType>(Encoding::RLE, None);
create_and_check_decoder::<FloatType>(Encoding::ALP, None);
create_and_check_decoder::<DoubleType>(Encoding::ALP, None);

// error when initializing
create_and_check_decoder::<Int32Type>(
Expand Down
Loading
Loading