diff --git a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp index 3ed1b7ecb43fa2..d8fbaa0ece4cdf 100644 --- a/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp +++ b/be/src/core/data_type_serde/data_type_date_or_datetime_serde.cpp @@ -435,7 +435,16 @@ Status DataTypeDateSerDe::from_olap_string(const std::string& str, Field& fie ? DatelikeTargetType::DATE_TIME : DatelikeTargetType::DATE > (StringRef(str), res, options.timezone, params)) [[unlikely]] { - return Status::InvalidArgument("parse date or datetime fail, string: '{}'", str); + // In paths like partial update, we may fill default values into zonemap, while the default values for date-related + // types are filled with the default value 0 of the number base, corresponding to the date 0000-00-00, which is not always valid. + // so for the parse path of zonemap strings, we swallow the failure and return a default value. the value itself does not matter, + // after compaction it will be replaced. + res = VecDateTimeValue::FIRST_DAY; + if constexpr (IsDatetime) { + res.to_datetime(); + } else { + res.cast_to_date(); + } } field = Field::create_field(std::move(res)); return Status::OK(); diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp index ff8c33045e4dd6..bc2b0b94dcad4e 100644 --- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp +++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp @@ -134,7 +134,9 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef& str, IColumn& column, // "2023-10-15 14:30:00.123000" => scale 6, microsecond = 123000 // "2023-10-15 14:30:00.123" => scale 3, microsecond = 123000 // -// On parse failure, falls back to MIN_DATETIME_V2. +// On parse failure, falls back to MIN_DATETIME_V2, the packed lower-bound +// DateTimeV2 value. This is MIN_DATE_V2 shifted into the DateTimeV2 date part, +// not VecDateTimeValue::FIRST_DAY, which belongs to the V1 representation. Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str, Field& field, const FormatOptions& options) const { CastParameters params {.status = Status::OK(), .is_strict = false}; @@ -142,6 +144,10 @@ Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str, Field& DateV2Value res; std::string date_format = "%Y-%m-%d %H:%i:%s.%f"; + // In paths like partial update, we may fill default values into zonemap, while the default values for date-related + // types are filled with the default value 0 of the number base, corresponding to the date 0000-00-00, which is not always valid. + // so for the parse path of zonemap strings, we swallow the failure and return a default value. the value itself does not matter, + // after compaction it will be replaced. if (!res.from_date_format_str(date_format.data(), date_format.size(), str.data(), str.size())) { res = DateV2Value(MIN_DATETIME_V2); } diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp b/be/src/core/data_type_serde/data_type_datev2_serde.cpp index bd90bf91ab1949..3f86248a22ecb0 100644 --- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp +++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp @@ -236,7 +236,7 @@ Status DataTypeDateV2SerDe::from_string_batch(const ColumnString& col_str, Colum // uint32_t value = (year << 9) | (month << 5) | day // // Expected input format: "YYYY-MM-DD", e.g. "2023-10-15" -// On parse failure, falls back to MIN_DATE_V2. +// On parse failure, falls back to MIN_DATE_V2, the packed lower-bound DateV2 value. Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field& field, const FormatOptions& options) const { CastParameters params {.status = Status::OK(), .is_strict = false}; @@ -245,6 +245,10 @@ Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field& fiel tm time_tm; char* tmp = strptime(str.c_str(), "%Y-%m-%d", &time_tm); + // In paths like partial update, we may fill default values into zonemap, while the default values for date-related + // types are filled with the default value 0 of the number base, corresponding to the date 0000-00-00, which is not always valid. + // so for the parse path of zonemap strings, we swallow the failure and return a default value. the value itself does not matter, + // after compaction it will be replaced. if (nullptr != tmp) { uint32_t value = ((time_tm.tm_year + 1900) << 9) | ((time_tm.tm_mon + 1) << 5) | time_tm.tm_mday; diff --git a/be/test/storage/olap_type_test.cpp b/be/test/storage/olap_type_test.cpp index 8789c267097b06..05775b693e4430 100644 --- a/be/test/storage/olap_type_test.cpp +++ b/be/test/storage/olap_type_test.cpp @@ -47,6 +47,25 @@ class OlapTypeTest : public testing::Test { } }; +template +void expect_from_storage_string_paths(const DataTypePtr& data_type, const std::string& input, + CheckField&& check_field) { + auto serde = data_type->get_serde(); + for (int path = 0; path < 3; ++path) { + Field field; + const char* path_name = path == 0 ? "from_olap_string" + : path == 1 ? "from_fe_string" + : "from_zonemap_string"; + auto status = + path == 0 ? serde->from_olap_string(input, field, DataTypeSerDe::FormatOptions()) + : path == 1 ? serde->from_fe_string(input, field) + : serde->from_zonemap_string(input, field); + ASSERT_TRUE(status.ok()) << data_type->get_name() << " " << path_name + << " failed: " << status.to_string(); + check_field(field); + } +} + // deserialize float string serialized by old version of Doris TEST_F(OlapTypeTest, deser_float_old) { std::vector normal_input_values = { @@ -607,6 +626,47 @@ TEST_F(OlapTypeTest, ser_deser_double) { } } +TEST_F(OlapTypeTest, datelike_storage_string_parse_failure_defaults) { + const std::string invalid = "not-a-valid-value"; + + VecDateTimeValue datev1_default = VecDateTimeValue::FIRST_DAY; + datev1_default.cast_to_date(); + const auto expected_datev1 = Field::create_field(datev1_default); + expect_from_storage_string_paths(DataTypeFactory::instance().create_data_type(TYPE_DATE, false), + invalid, [&](const Field& field) { + ASSERT_EQ(field.get_type(), TYPE_DATE); + EXPECT_TRUE(field == expected_datev1); + }); + + VecDateTimeValue datetimev1_default = VecDateTimeValue::FIRST_DAY; + datetimev1_default.to_datetime(); + const auto expected_datetimev1 = Field::create_field(datetimev1_default); + expect_from_storage_string_paths( + DataTypeFactory::instance().create_data_type(TYPE_DATETIME, false), invalid, + [&](const Field& field) { + ASSERT_EQ(field.get_type(), TYPE_DATETIME); + EXPECT_TRUE(field == expected_datetimev1); + }); + + const auto expected_datev2 = + Field::create_field(DateV2Value(MIN_DATE_V2)); + expect_from_storage_string_paths( + DataTypeFactory::instance().create_data_type(TYPE_DATEV2, false), invalid, + [&](const Field& field) { + ASSERT_EQ(field.get_type(), TYPE_DATEV2); + EXPECT_TRUE(field == expected_datev2); + }); + + const auto expected_datetimev2 = + Field::create_field(DateV2Value(MIN_DATETIME_V2)); + expect_from_storage_string_paths( + DataTypeFactory::instance().create_data_type(TYPE_DATETIMEV2, false, 0, 6), invalid, + [&](const Field& field) { + ASSERT_EQ(field.get_type(), TYPE_DATETIMEV2); + EXPECT_TRUE(field == expected_datetimev2); + }); +} + // ============================================================================= // Tests for to_olap_string / from_zonemap_string on DataTypeSerDe // @@ -2023,4 +2083,4 @@ TEST_F(OlapTypeTest, timestamptz_type) { << "serde mismatch for TIMESTAMPTZ expected=" << tc.expected; } } -} // namespace doris \ No newline at end of file +} // namespace doris