Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,16 @@ Status DataTypeDateSerDe<T>::from_olap_string(const std::string& str, Field& fie
? DatelikeTargetType::DATE_TIME
: DatelikeTargetType::DATE > (StringRef(str), res, options.timezone, params))
[[unlikely]] {
return Status::InvalidArgument("parse date or datetime fail, string: '{}'", str);
// In paths like partial update, we may fill default values into zonemap, while the default values for date-related
// types are filled with the default value 0 of the number base, corresponding to the date 0000-00-00, which is not always valid.
// so for the parse path of zonemap strings, we swallow the failure and return a default value. the value itself does not matter,
// after compaction it will be replaced.
res = VecDateTimeValue::FIRST_DAY;
Comment thread
zclllyybb marked this conversation as resolved.
if constexpr (IsDatetime) {
res.to_datetime();
} else {
Comment thread
zclllyybb marked this conversation as resolved.
res.cast_to_date();
}
}
field = Field::create_field<T>(std::move(res));
return Status::OK();
Expand Down
8 changes: 7 additions & 1 deletion be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,20 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef& str, IColumn& column,
// "2023-10-15 14:30:00.123000" => scale 6, microsecond = 123000
// "2023-10-15 14:30:00.123" => scale 3, microsecond = 123000
//
// On parse failure, falls back to MIN_DATETIME_V2.
// On parse failure, falls back to MIN_DATETIME_V2, the packed lower-bound
// DateTimeV2 value. This is MIN_DATE_V2 shifted into the DateTimeV2 date part,
// not VecDateTimeValue::FIRST_DAY, which belongs to the V1 representation.
Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str, Field& field,
const FormatOptions& options) const {
CastParameters params {.status = Status::OK(), .is_strict = false};

DateV2Value<DateTimeV2ValueType> res;
std::string date_format = "%Y-%m-%d %H:%i:%s.%f";

// In paths like partial update, we may fill default values into zonemap, while the default values for date-related
// types are filled with the default value 0 of the number base, corresponding to the date 0000-00-00, which is not always valid.
// so for the parse path of zonemap strings, we swallow the failure and return a default value. the value itself does not matter,
// after compaction it will be replaced.
if (!res.from_date_format_str(date_format.data(), date_format.size(), str.data(), str.size())) {
res = DateV2Value<DateTimeV2ValueType>(MIN_DATETIME_V2);
}
Expand Down
6 changes: 5 additions & 1 deletion be/src/core/data_type_serde/data_type_datev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ Status DataTypeDateV2SerDe::from_string_batch(const ColumnString& col_str, Colum
// uint32_t value = (year << 9) | (month << 5) | day
//
// Expected input format: "YYYY-MM-DD", e.g. "2023-10-15"
// On parse failure, falls back to MIN_DATE_V2.
// On parse failure, falls back to MIN_DATE_V2, the packed lower-bound DateV2 value.
Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field& field,
const FormatOptions& options) const {
CastParameters params {.status = Status::OK(), .is_strict = false};
Expand All @@ -243,6 +243,10 @@ Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field& fiel
tm time_tm;
char* tmp = strptime(str.c_str(), "%Y-%m-%d", &time_tm);

// In paths like partial update, we may fill default values into zonemap, while the default values for date-related
// types are filled with the default value 0 of the number base, corresponding to the date 0000-00-00, which is not always valid.
// so for the parse path of zonemap strings, we swallow the failure and return a default value. the value itself does not matter,
// after compaction it will be replaced.
if (nullptr != tmp) {
uint32_t value =
((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5) | time_tm.tm_mday;
Expand Down
62 changes: 61 additions & 1 deletion be/test/storage/olap_type_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,25 @@ class OlapTypeTest : public testing::Test {
}
};

template <typename CheckField>
void expect_from_storage_string_paths(const DataTypePtr& data_type, const std::string& input,
CheckField&& check_field) {
auto serde = data_type->get_serde();
for (int path = 0; path < 3; ++path) {
Field field;
const char* path_name = path == 0 ? "from_olap_string"
: path == 1 ? "from_fe_string"
: "from_zonemap_string";
auto status =
path == 0 ? serde->from_olap_string(input, field, DataTypeSerDe::FormatOptions())
: path == 1 ? serde->from_fe_string(input, field)
Comment thread
zclllyybb marked this conversation as resolved.
: serde->from_zonemap_string(input, field);
ASSERT_TRUE(status.ok()) << data_type->get_name() << " " << path_name
<< " failed: " << status.to_string();
check_field(field);
}
}

// deserialize float string serialized by old version of Doris
TEST_F(OlapTypeTest, deser_float_old) {
std::vector<float> normal_input_values = {
Expand Down Expand Up @@ -607,6 +626,47 @@ TEST_F(OlapTypeTest, ser_deser_double) {
}
}

TEST_F(OlapTypeTest, datelike_storage_string_parse_failure_defaults) {
const std::string invalid = "not-a-valid-value";

VecDateTimeValue datev1_default = VecDateTimeValue::FIRST_DAY;
datev1_default.cast_to_date();
const auto expected_datev1 = Field::create_field<TYPE_DATE>(datev1_default);
expect_from_storage_string_paths(DataTypeFactory::instance().create_data_type(TYPE_DATE, false),
invalid, [&](const Field& field) {
ASSERT_EQ(field.get_type(), TYPE_DATE);
EXPECT_TRUE(field == expected_datev1);
});

VecDateTimeValue datetimev1_default = VecDateTimeValue::FIRST_DAY;
datetimev1_default.to_datetime();
const auto expected_datetimev1 = Field::create_field<TYPE_DATETIME>(datetimev1_default);
expect_from_storage_string_paths(
DataTypeFactory::instance().create_data_type(TYPE_DATETIME, false), invalid,
[&](const Field& field) {
ASSERT_EQ(field.get_type(), TYPE_DATETIME);
EXPECT_TRUE(field == expected_datetimev1);
});

const auto expected_datev2 =
Field::create_field<TYPE_DATEV2>(DateV2Value<DateV2ValueType>(MIN_DATE_V2));
expect_from_storage_string_paths(
DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false), invalid,
[&](const Field& field) {
ASSERT_EQ(field.get_type(), TYPE_DATEV2);
EXPECT_TRUE(field == expected_datev2);
});

const auto expected_datetimev2 =
Field::create_field<TYPE_DATETIMEV2>(DateV2Value<DateTimeV2ValueType>(MIN_DATETIME_V2));
expect_from_storage_string_paths(
DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2, false, 0, 6), invalid,
[&](const Field& field) {
ASSERT_EQ(field.get_type(), TYPE_DATETIMEV2);
EXPECT_TRUE(field == expected_datetimev2);
});
}

// =============================================================================
// Tests for to_olap_string / from_zonemap_string on DataTypeSerDe
//
Expand Down Expand Up @@ -2023,4 +2083,4 @@ TEST_F(OlapTypeTest, timestamptz_type) {
<< "serde mismatch for TIMESTAMPTZ expected=" << tc.expected;
}
}
} // namespace doris
} // namespace doris
Loading