Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,21 @@ std::string DataTypeTimeStampTzSerDe::to_olap_string(const Field& field) const {
return CastToString::from_timestamptz(field.get<TYPE_TIMESTAMPTZ>(), 6);
}

void DataTypeTimeStampTzSerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const {
const auto type = static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ);
const auto& data_ref = assert_cast<const ColumnTimeStampTz&>(src_column).get_data_at(row_num);
const auto sc = static_cast<uint8_t>(_scale);

const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(uint8_t) + data_ref.size;
chars.resize(new_size);
memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&sc),
sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(uint8_t), data_ref.data,
data_ref.size);
}

} // namespace doris
4 changes: 4 additions & 0 deletions be/src/core/data_type_serde/data_type_timestamptz_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ class DataTypeTimeStampTzSerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_
int64_t start, int64_t end, Arena& arena,
const FormatOptions& options) const override;

// Override needed: paired reader skips a scale byte; the inherited number-serde writer omits it.
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override;

std::string to_olap_string(const Field& field) const override;

protected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,25 @@

#include "core/assert_cast.h"
#include "core/column/column.h"
#include "core/column/column_nullable.h"
#include "core/column/column_string.h"
#include "core/data_type/common_data_type_serder_test.h"
#include "core/data_type/common_data_type_test.h"
#include "core/data_type/data_type.h"
#include "core/data_type_serde/data_type_timestamptz_serde.h"
#include "core/field.h"
#include "testutil/test_util.h"
#include "util/slice.h"
#include "util/string_util.h"
namespace doris {
static std::string test_data_dir;

static auto serde_tz_0 = std::make_shared<DataTypeTimeStampTzSerDe>(0);
static auto serde_tz_3 = std::make_shared<DataTypeTimeStampTzSerDe>(3);
static auto serde_tz_6 = std::make_shared<DataTypeTimeStampTzSerDe>(6);

static ColumnTimeStampTz::MutablePtr column_tz_0;
static ColumnTimeStampTz::MutablePtr column_tz_3;
static ColumnTimeStampTz::MutablePtr column_tz_6;

class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
Expand All @@ -51,6 +56,7 @@ class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
test_data_dir = root_dir + "/be/test/data/vec/columns";

column_tz_0 = ColumnTimeStampTz::create();
column_tz_3 = ColumnTimeStampTz::create();
column_tz_6 = ColumnTimeStampTz::create();

load_columns_data();
Expand All @@ -70,6 +76,7 @@ class DataTypeTimeStampTzSerDeTest : public ::testing::Test {
EXPECT_TRUE(!column->empty());
};
test_func(column_tz_0->get_ptr(), serde_tz_0, "TIMESTAMPTZ(0).csv");
test_func(column_tz_3->get_ptr(), serde_tz_3, "TIMESTAMPTZ(3).csv");
test_func(column_tz_6->get_ptr(), serde_tz_6, "TIMESTAMPTZ(6).csv");

std::cout << "loading test dataset done" << std::endl;
Expand Down Expand Up @@ -189,6 +196,82 @@ TEST_F(DataTypeTimeStampTzSerDeTest, serdes) {
}
};
test_func(*serde_tz_0, column_tz_0);
test_func(*serde_tz_3, column_tz_3);
test_func(*serde_tz_6, column_tz_6);
}

// Roundtrip via write_one_cell_to_binary + DataTypeSerDe::deserialize_binary_to_*
// (the variant sparse-column path). Regression for DORIS-25915: the writer
// previously inherited DataTypeNumberSerDe's default, which omitted the scale
// byte, while the reader expected a scale byte — a 1-byte layout mismatch that
// silently corrupted timestamptz values stored in variant sparse paths.
TEST_F(DataTypeTimeStampTzSerDeTest, binary_roundtrip) {
auto test_func = [&](const DataTypeTimeStampTzSerDe& serde,
const ColumnTimeStampTz::MutablePtr& source_column, uint32_t scale) {
const size_t row_count = source_column->size();
ASSERT_GT(row_count, 0);

// Per-row layout: [type:1][scale:1][value:8] = 10 bytes.
constexpr size_t kBytesPerRow =
sizeof(uint8_t) + sizeof(uint8_t) + sizeof(TimestampTzValue::underlying_value);

auto chars_col = ColumnString::create();
ColumnString::Chars& chars = chars_col->get_chars();
std::vector<size_t> row_offsets;
row_offsets.reserve(row_count + 1);
row_offsets.push_back(0);
for (size_t i = 0; i < row_count; ++i) {
serde.write_one_cell_to_binary(*source_column, chars, i);
row_offsets.push_back(chars.size());
}

// Layout: each row contributes exactly 10 bytes; first byte is the
// TIMESTAMPTZ FieldType; second is the scale.
ASSERT_EQ(chars.size(), row_count * kBytesPerRow);
for (size_t i = 0; i < row_count; ++i) {
ASSERT_EQ(row_offsets[i + 1] - row_offsets[i], kBytesPerRow);
EXPECT_EQ(chars[row_offsets[i]],
static_cast<uint8_t>(FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ));
EXPECT_EQ(chars[row_offsets[i] + 1], static_cast<uint8_t>(scale));
}

// Roundtrip via deserialize_binary_to_column (column path).
{
auto inner_col = ColumnTimeStampTz::create();
auto null_map = ColumnUInt8::create();
auto nullable_col = ColumnNullable::create(std::move(inner_col), std::move(null_map));
for (size_t i = 0; i < row_count; ++i) {
const uint8_t* start = chars.data() + row_offsets[i];
const uint8_t* end =
DataTypeSerDe::deserialize_binary_to_column(start, *nullable_col);
EXPECT_EQ(end - start, kBytesPerRow);
}
const auto& read_col =
assert_cast<const ColumnTimeStampTz&>(nullable_col->get_nested_column());
for (size_t i = 0; i < row_count; ++i) {
EXPECT_EQ(read_col.get_element(i), source_column->get_element(i))
<< "row " << i << " mismatched after binary roundtrip";
}
}

// Roundtrip via deserialize_binary_to_field (field path); scale
// should propagate into FieldInfo.
{
for (size_t i = 0; i < row_count; ++i) {
const uint8_t* start = chars.data() + row_offsets[i];
Field f;
FieldInfo info;
const uint8_t* end = DataTypeSerDe::deserialize_binary_to_field(start, f, info);
EXPECT_EQ(end - start, kBytesPerRow);
EXPECT_EQ(info.scale, static_cast<int>(scale));
EXPECT_EQ(f.get<TYPE_TIMESTAMPTZ>(), source_column->get_element(i))
<< "row " << i << " mismatched after field roundtrip";
}
}
};

test_func(*serde_tz_0, column_tz_0, 0);
test_func(*serde_tz_3, column_tz_3, 3);
test_func(*serde_tz_6, column_tz_6, 6);
}
} // namespace doris
16 changes: 16 additions & 0 deletions be/test/data/vec/columns/TIMESTAMPTZ(3).csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
NULL
0000-01-01 00:00:00 +00:00
0000-01-01 00:00:00.000 +00:00
0000-01-01 00:00:00.001 +00:00
0000-01-01 00:00:00.123 +00:00
0000-01-01 00:00:00.999 +00:00
2023-08-08 20:20:20 +00:00
2023-08-08 20:20:20.000 +00:00
2023-08-08 20:20:20.001 +00:00
2023-08-08 20:20:20.123 +00:00
2023-08-08 20:20:20.999 +00:00
9999-12-31 23:59:59 +08:00
9999-12-31 23:59:59.000 +08:00
9999-12-31 23:59:59.001 +08:00
9999-12-31 23:59:59.123 +08:00
9999-12-31 23:59:59.999 +08:00
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,12 @@ public abstract class Type {
structSubTypes.add(MAP);
structSubTypes.add(STRUCT);

// Before adding a type here, the BE side must implement these three serde
// adapters for the type so values falling to the variant sparse path can
// round-trip correctly. Omitting any of them silently corrupts sparse reads.
// * DataTypeXxxSerDe::write_one_cell_to_binary
// * DataTypeXxxSerDe::deserialize_binary_to_field
// * DataTypeXxxSerDe::deserialize_binary_to_column
variantSubTypes = Lists.newArrayList();
variantSubTypes.add(BOOLEAN);
variantSubTypes.addAll(integerTypes);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_all --
1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000+08:00 2003-12-21 15:29:00.000000+08:00 2003-12-21 07:29:00.000000+08:00

-- !select_cast_ts0 --
1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000000+08:00

Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// Reproduces DORIS-25915:
// When VARIANT has typed paths that exceed variant_max_subcolumns_count and
// variant_enable_typed_paths_to_sparse=true, timestamptz values that fall to
// the sparse path are read back as only the timezone suffix (e.g. "+00:00")
// instead of the full timestamp.
suite("test_variant_timestamptz_sparse", "p0"){
sql " set time_zone = '+08:00' "
sql " set default_variant_enable_doc_mode = false "

sql "DROP TABLE IF EXISTS test_variant_timestamptz_sparse_repro"

sql """
CREATE TABLE test_variant_timestamptz_sparse_repro (
pk int,
var VARIANT<
'c1':bigint,
'c2':bigint,
'd1':date,
'd2':date,
'dt0':datetime(0),
'dt0n':datetime(0),
'dt3':datetime(3),
'dt3n':datetime(3),
'dt6':datetime(6),
'dt6n':datetime(6),
'ts0':timestamptz(0),
'ts0n':timestamptz(0),
'ts3':timestamptz(3),
'ts3n':timestamptz(3),
'ts6':timestamptz(6),
'ts6n':timestamptz(6),
'v1':string,
'v2':string,
properties("variant_max_subcolumns_count" = "2","variant_enable_typed_paths_to_sparse" = "true")
> NULL,
INDEX c1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c1"),
INDEX c2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c2"),
INDEX d1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d1"),
INDEX d2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d2"),
INDEX dt0 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0"),
INDEX dt0n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0n"),
INDEX dt3 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3"),
INDEX dt3n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3n"),
INDEX dt6 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6"),
INDEX dt6n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6n"),
INDEX v1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v1"),
INDEX v2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v2")
) ENGINE=OLAP
DUPLICATE KEY(pk)
DISTRIBUTED BY HASH(pk) BUCKETS 1
PROPERTIES("replication_num" = "1");
"""

sql """
INSERT INTO test_variant_timestamptz_sparse_repro VALUES
(1, '{"c1":1699516324,"c2":1690122421,"d1":"2024-02-25","d2":"2024-11-27","dt0":"2002-12-25 12:42:19","dt0n":"2015-02-05 02:36:13","dt3":"2015-02-22 02:09:01","dt3n":"2015-09-16 02:55:07","dt6":"2001-09-19 09:53:52","dt6n":"2003-12-21 02:29:00","ts0":"2003-12-21 02:29:00 +00:00","ts0n":"2003-12-21 02:29:00 +00:00","ts3":"2003-12-21 02:29:00 +00:00","ts3n":"2003-12-21 02:29:00 -05:00","ts6":"2003-12-21 02:29:00 -05:00","ts6n":"2003-12-21 02:29:00 +03:00","v1":"2024-12-10 10:16:19","v2":"2023-12-31 23:59:59"}');
"""

sql "SYNC"

// Pre-fix: every column that fell to sparse returned just the timezone
// suffix ("+08:00"). Post-fix: each ts* path round-trips as a full timestamp.
qt_select_all """
SELECT
pk,
CAST(var['ts0'] AS string) AS str_ts0,
CAST(var['ts0n'] AS string) AS str_ts0n,
CAST(var['ts3'] AS string) AS str_ts3,
CAST(var['ts3n'] AS string) AS str_ts3n,
CAST(var['ts6'] AS string) AS str_ts6,
CAST(var['ts6n'] AS string) AS str_ts6n
FROM test_variant_timestamptz_sparse_repro
ORDER BY pk;
"""

qt_select_cast_ts0 """
SELECT
pk,
CAST(var['ts0'] AS timestamptz(0)) AS cast_ts0,
CAST(var['ts3'] AS timestamptz(3)) AS cast_ts3,
CAST(var['ts6'] AS timestamptz(6)) AS cast_ts6
FROM test_variant_timestamptz_sparse_repro
ORDER BY pk;
"""
}
Loading