Skip to content

Commit cf7638f

Browse files
Dandandanjorgecarleitao
authored andcommitted
ARROW-11349: [Rust] Add from_iter_values to create arrays from (non null) values
The idea of this PR is to have a function `from_iter_values` that (just like `from_iter`) creates an array based on an iterator, but from `T` instead of `Option<T>`. I have seen some places in DataFusion (especially `to_array_of_size`) where an `Array` is generated from a `Vec` of items, which could be replaced by this. The other iterators have some memory / time overhead in both creating and manipulating the null buffer (and in the case of `Vec` for allocating / dropping the Vec) Closes #9293 from Dandandan/array_iter_non_null Authored-by: Heres, Daniel <danielheres@gmail.com> Signed-off-by: Jorge C. Leitao <jorgecarleitao@gmail.com>
1 parent 10f4ada commit cf7638f

2 files changed

Lines changed: 67 additions & 0 deletions

File tree

rust/arrow/src/array/array_primitive.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,21 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
9494
let offset = i + self.offset();
9595
unsafe { *self.raw_values.as_ptr().add(offset) }
9696
}
97+
98+
/// Creates a PrimitiveArray based on an iterator of values without nulls
99+
pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
100+
let val_buf: Buffer = iter.into_iter().collect();
101+
let data = ArrayData::new(
102+
T::DATA_TYPE,
103+
val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
104+
None,
105+
None,
106+
0,
107+
vec![val_buf],
108+
vec![],
109+
);
110+
PrimitiveArray::from(Arc::new(data))
111+
}
97112
}
98113

99114
impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
@@ -820,6 +835,18 @@ mod tests {
820835
}
821836
}
822837

838+
#[test]
839+
fn test_primitive_from_iter_values() {
840+
// Test building a primitive array with from_iter_values
841+
842+
let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10);
843+
assert_eq!(10, arr.len());
844+
assert_eq!(0, arr.null_count());
845+
for i in 0..10i32 {
846+
assert_eq!(i, arr.value(i as usize));
847+
}
848+
}
849+
823850
#[test]
824851
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
825852
(values buffer)")]

rust/arrow/src/array/array_string.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,36 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
146146
pub(crate) fn from_opt_vec(v: Vec<Option<&str>>) -> Self {
147147
v.into_iter().collect()
148148
}
149+
150+
/// Creates a `GenericStringArray` based on an iterator of values without nulls
151+
pub fn from_iter_values<Ptr, I: IntoIterator<Item = Ptr>>(iter: I) -> Self
152+
where
153+
Ptr: AsRef<str>,
154+
{
155+
let iter = iter.into_iter();
156+
let (_, data_len) = iter.size_hint();
157+
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
158+
159+
let mut offsets =
160+
MutableBuffer::new((data_len + 1) * std::mem::size_of::<OffsetSize>());
161+
let mut values = MutableBuffer::new(0);
162+
163+
let mut length_so_far = OffsetSize::zero();
164+
offsets.push(length_so_far);
165+
166+
for i in iter {
167+
let s = i.as_ref();
168+
length_so_far += OffsetSize::from_usize(s.len()).unwrap();
169+
offsets.push(length_so_far);
170+
values.extend_from_slice(s.as_bytes());
171+
}
172+
let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
173+
.len(data_len)
174+
.add_buffer(offsets.into())
175+
.add_buffer(values.into())
176+
.build();
177+
Self::from(array_data)
178+
}
149179
}
150180

151181
impl<'a, Ptr, OffsetSize: StringOffsetSizeTrait> FromIterator<Option<Ptr>>
@@ -411,6 +441,7 @@ mod tests {
411441
);
412442
}
413443

444+
#[test]
414445
fn test_string_array_from_iter() {
415446
let data = vec![Some("hello"), None, Some("arrow")];
416447
// from Vec<Option<&str>>
@@ -424,4 +455,13 @@ mod tests {
424455
assert_eq!(array1, array2);
425456
assert_eq!(array2, array3);
426457
}
458+
459+
#[test]
460+
fn test_string_array_from_iter_values() {
461+
let data = vec!["hello", "hello2"];
462+
let array1 = StringArray::from_iter_values(data.iter());
463+
464+
assert_eq!(array1.value(0), "hello");
465+
assert_eq!(array1.value(1), "hello2");
466+
}
427467
}

0 commit comments

Comments
 (0)