Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,6 @@ BUCKAROO_DEPS
# Vim
*.swp
*.swo

# clangd cache
/.cache/clangd
10 changes: 10 additions & 0 deletions clickhouse/columns/factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,26 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSetti

case TypeAst::Tuple: {
std::vector<ColumnRef> columns;
std::vector<std::string> names;

columns.reserve(ast.elements.size());
names.reserve(ast.elements.size());
bool any_named = false;
for (const auto& elem : ast.elements) {
if (auto col = CreateColumnFromAst(elem, settings)) {
columns.push_back(col);
names.push_back(elem.element_name);
if (!elem.element_name.empty()) {
any_named = true;
}
} else {
return nullptr;
}
}

if (any_named) {
return std::make_shared<ColumnTuple>(columns, std::move(names));
}
return std::make_shared<ColumnTuple>(columns);
}

Expand Down
19 changes: 17 additions & 2 deletions clickhouse/columns/tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ ColumnTuple::ColumnTuple(const std::vector<ColumnRef>& columns)
{
}

ColumnTuple::ColumnTuple(const std::vector<ColumnRef>& columns,
std::vector<std::string> names)
: Column(Type::CreateTuple(CollectTypes(columns), std::move(names)))
, columns_(columns)
{
}

size_t ColumnTuple::TupleSize() const {
return columns_.size();
}
Expand Down Expand Up @@ -48,7 +55,11 @@ ColumnRef ColumnTuple::Slice(size_t begin, size_t len) const {
sliced_columns.push_back(column->Slice(begin, len));
}

return std::make_shared<ColumnTuple>(sliced_columns);
const auto& names = this->Type()->As<TupleType>()->GetItemNames();
if (names.empty()) {
return std::make_shared<ColumnTuple>(sliced_columns);
}
return std::make_shared<ColumnTuple>(sliced_columns, names);
}

ColumnRef ColumnTuple::CloneEmpty() const {
Expand All @@ -59,7 +70,11 @@ ColumnRef ColumnTuple::CloneEmpty() const {
result_columns.push_back(column->CloneEmpty());
}

return std::make_shared<ColumnTuple>(result_columns);
const auto& names = this->Type()->As<TupleType>()->GetItemNames();
if (names.empty()) {
return std::make_shared<ColumnTuple>(result_columns);
}
return std::make_shared<ColumnTuple>(result_columns, names);
}

bool ColumnTuple::LoadPrefix(InputStream* input, size_t rows) {
Expand Down
2 changes: 2 additions & 0 deletions clickhouse/columns/tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ namespace clickhouse {
class ColumnTuple : public Column {
public:
ColumnTuple(const std::vector<ColumnRef>& columns);
ColumnTuple(const std::vector<ColumnRef>& columns,
std::vector<std::string> names);

/// Returns count of columns in the tuple.
size_t TupleSize() const;
Expand Down
8 changes: 8 additions & 0 deletions clickhouse/types/type_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ bool TypeAst::operator==(const TypeAst & other) const {
return meta == other.meta
&& code == other.code
&& name == other.name
&& element_name == other.element_name
&& value == other.value
&& value_string == other.value_string
&& std::equal(elements.begin(), elements.end(), other.elements.begin(), other.elements.end());
}

Expand Down Expand Up @@ -167,6 +169,12 @@ bool TypeParser::Parse(TypeAst* type) {
break;
}
case Token::Name:
if (!type_->name.empty()) {
// A second Name token on the same element means the
// previous one was a field name in a named-tuple element
// (e.g. "a" in "Tuple(a Int32, …)").
type_->element_name = std::move(type_->name);
}
type_->meta = GetTypeMeta(token.value);
type_->name = token.value.to_string();
type_->code = GetTypeCode(type_->name);
Expand Down
3 changes: 3 additions & 0 deletions clickhouse/types/type_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ struct TypeAst {
/// Type's name.
/// Need to cache TypeAst, so can't use StringView for name.
std::string name;
/// Name of this element inside its parent (e.g. field name inside a named
/// Tuple). Empty for unnamed elements.
std::string element_name;
Comment on lines +34 to +36
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would make more sense to add a vector here, called all_names, store both the name and the type in it, and keep the name field set to the last value as it currently works.

/// Type's name.
/// Need to cache TypeAst, so can't use StringView for name.
std::string name;
/// List of all names assigned to the element, for example 
/// named tuples, have name and type, `Tuple(i Int64, s String)`.
std::vector<std::string> all_names

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally I dont really like that, because it

  • Creates two spots containing the same information
  • Does not make it clear through the type-system/name that the entries in all_names are rather different entities (field name, vs type name)

/// Value associated with the node,
/// used for fixed-width types and enum values.
int64_t value = 0;
Expand Down
31 changes: 29 additions & 2 deletions clickhouse/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,11 @@ TypeRef Type::CreateTuple(const std::vector<TypeRef>& item_types) {
return TypeRef(new TupleType(item_types));
}

TypeRef Type::CreateTuple(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names) {
return TypeRef(new TupleType(item_types, std::move(item_names)));
}

TypeRef Type::CreateEnum8(const std::vector<EnumItem>& enum_items) {
return TypeRef(new EnumType(Type::Enum8, enum_items));
}
Expand Down Expand Up @@ -447,6 +452,11 @@ NullableType::NullableType(TypeRef nested_type) : Type(Nullable), nested_type_(n
TupleType::TupleType(const std::vector<TypeRef>& item_types) : Type(Tuple), item_types_(item_types) {
}

TupleType::TupleType(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names)
: Type(Tuple), item_types_(item_types), item_names_(std::move(item_names)) {
}

/// class LowCardinalityType
LowCardinalityType::LowCardinalityType(TypeRef nested_type) : Type(LowCardinality), nested_type_(nested_type) {
}
Expand All @@ -456,13 +466,30 @@ LowCardinalityType::~LowCardinalityType() {

std::string TupleType::GetName() const {
Comment thread
IyeOnline marked this conversation as resolved.
std::string result("Tuple(");
bool has_complete_names = item_names_.size() == item_types_.size();
if (has_complete_names) {
Comment thread
IyeOnline marked this conversation as resolved.
Outdated
for (const auto& item_name : item_names_) {
if (item_name.empty()) {
has_complete_names = false;
break;
}
}
}

if (!item_types_.empty()) {
result += item_types_[0]->GetName();
if (has_complete_names) {
result += item_names_[0] + " " + item_types_[0]->GetName();
} else {
result += item_types_[0]->GetName();
}
}

for (size_t i = 1; i < item_types_.size(); ++i) {
result += ", " + item_types_[i]->GetName();
if (has_complete_names) {
result += ", " + item_names_[i] + " " + item_types_[i]->GetName();
} else {
result += ", " + item_types_[i]->GetName();
}
}

result += ")";
Expand Down
10 changes: 10 additions & 0 deletions clickhouse/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ class Type {

static TypeRef CreateTuple(const std::vector<TypeRef>& item_types);

static TypeRef CreateTuple(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names);

Comment thread
IyeOnline marked this conversation as resolved.
static TypeRef CreateEnum8(const std::vector<EnumItem>& enum_items);

static TypeRef CreateEnum16(const std::vector<EnumItem>& enum_items);
Expand Down Expand Up @@ -293,14 +296,21 @@ class NullableType : public Type {
class TupleType : public Type {
public:
explicit TupleType(const std::vector<TypeRef>& item_types);
TupleType(const std::vector<TypeRef>& item_types,
std::vector<std::string> item_names);

std::string GetName() const;

/// Type of nested Tuple element type.
std::vector<TypeRef> GetTupleType() const { return item_types_; }

/// Field names for named tuples. Same length as GetTupleType() when
/// populated, or empty when the tuple has no field names.
const std::vector<std::string>& GetItemNames() const { return item_names_; }

private:
std::vector<TypeRef> item_types_;
std::vector<std::string> item_names_;
};

class LowCardinalityType : public Type {
Expand Down
34 changes: 33 additions & 1 deletion ut/type_parser_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,28 @@ TEST(TypeParserCase, ParseTuple) {
auto element = ast.elements.begin();
for (size_t i = 0; i < 2; ++i) {
ASSERT_EQ(element->name, names[i]);
ASSERT_TRUE(element->element_name.empty());
++element;
}
}

TEST(TypeParserCase, ParseNamedTuple) {
TypeAst ast;
TypeParser("Tuple(a UInt8, b String)").Parse(&ast);
ASSERT_EQ(ast.meta, TypeAst::Tuple);
ASSERT_EQ(ast.name, "Tuple");
ASSERT_EQ(ast.code, Type::Tuple);
ASSERT_EQ(ast.elements.size(), 2u);

ASSERT_EQ(ast.elements[0].element_name, "a");
ASSERT_EQ(ast.elements[0].name, "UInt8");
ASSERT_EQ(ast.elements[0].code, Type::UInt8);

ASSERT_EQ(ast.elements[1].element_name, "b");
ASSERT_EQ(ast.elements[1].name, "String");
ASSERT_EQ(ast.elements[1].code, Type::String);
}

TEST(TypeParserCase, ParseDecimal) {
TypeAst ast;
TypeParser("Decimal(12, 5)").Parse(&ast);
Expand Down Expand Up @@ -167,6 +185,20 @@ TEST(TypeParserCase, ParseDateTime_MINSK_TIMEZONE) {
ASSERT_EQ(ast.elements[0].meta, TypeAst::Terminal);
}

TEST(TypeParserCase, EqualityIncludesValueString) {
TypeAst utc;
TypeAst minsk;
ASSERT_TRUE(TypeParser("DateTime('UTC')").Parse(&utc));
ASSERT_TRUE(TypeParser("DateTime('Europe/Minsk')").Parse(&minsk));
ASSERT_NE(utc, minsk);

TypeAst enum_one;
TypeAst enum_two;
ASSERT_TRUE(TypeParser("Enum8('ONE' = 1)").Parse(&enum_one));
ASSERT_TRUE(TypeParser("Enum8('TWO' = 1)").Parse(&enum_two));
ASSERT_NE(enum_one, enum_two);
}

TEST(TypeParserCase, LowCardinality_String) {
TypeAst ast;
ASSERT_TRUE(TypeParser("LowCardinality(String)").Parse(&ast));
Expand Down Expand Up @@ -194,7 +226,7 @@ TEST(TypeParserCase, LowCardinality_FixedString) {
ASSERT_EQ(ast.elements[0].name, "FixedString");
ASSERT_EQ(ast.elements[0].value, 0);
ASSERT_EQ(ast.elements[0].elements.size(), 1u);
auto param = TypeAst{TypeAst::Number, Type::Void, "", 10, {}, {}};
auto param = TypeAst{TypeAst::Number, Type::Void, "", "", 10, {}, {}};
ASSERT_EQ(ast.elements[0].elements[0], param);
}

Expand Down
60 changes: 60 additions & 0 deletions ut/types_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,66 @@ TEST(TypesCase, NullableType) {
ASSERT_EQ(Type::CreateNullable(nested)->As<NullableType>()->GetNestedType(), nested);
}

TEST(TypesCase, TupleTypeItemNames) {
auto unnamed = Type::CreateTuple({
Type::CreateSimple<int32_t>(),
Type::CreateString()});
ASSERT_TRUE(unnamed->As<TupleType>()->GetItemNames().empty());

auto named = Type::CreateTuple(
{Type::CreateSimple<int32_t>(), Type::CreateString()},
{"a", "b"});
const auto& names = named->As<TupleType>()->GetItemNames();
ASSERT_EQ(names.size(), 2u);
ASSERT_EQ(names[0], "a");
ASSERT_EQ(names[1], "b");
}

TEST(TypesCase, TupleTypeNameIncludesFieldNames) {
auto named = Type::CreateTuple(
{Type::CreateSimple<uint8_t>(), Type::CreateString()},
{"a", "b"});
ASSERT_EQ(named->GetName(), "Tuple(a UInt8, b String)");

auto partially_named = Type::CreateTuple(
{Type::CreateSimple<uint8_t>(), Type::CreateString()},
{"a", ""});
ASSERT_EQ(partially_named->GetName(), "Tuple(UInt8, String)");

auto mismatched_names = Type::CreateTuple(
{Type::CreateSimple<uint8_t>(), Type::CreateString()},
{"a"});
ASSERT_EQ(mismatched_names->GetName(), "Tuple(UInt8, String)");
}

TEST(TypesCase, TupleTypeNamesFromFactory) {
auto col = CreateColumnByType("Tuple(a UInt8, b String)");
ASSERT_NE(col, nullptr);
const auto& names = col->Type()->As<TupleType>()->GetItemNames();
ASSERT_EQ(names.size(), 2u);
ASSERT_EQ(names[0], "a");
ASSERT_EQ(names[1], "b");

auto col_unnamed = CreateColumnByType("Tuple(UInt8, String)");
ASSERT_NE(col_unnamed, nullptr);
ASSERT_TRUE(col_unnamed->Type()->As<TupleType>()->GetItemNames().empty());
}

TEST(TypesCase, TupleTypeEqualityIncludesFieldNames) {
auto unnamed = Type::CreateTuple(
{Type::CreateSimple<uint8_t>(), Type::CreateString()});
auto named_ab = Type::CreateTuple(
{Type::CreateSimple<uint8_t>(), Type::CreateString()},
{"a", "b"});
auto named_xy = Type::CreateTuple(
{Type::CreateSimple<uint8_t>(), Type::CreateString()},
{"x", "y"});

ASSERT_TRUE(named_ab->IsEqual(named_ab));
ASSERT_FALSE(named_ab->IsEqual(unnamed));
ASSERT_FALSE(named_ab->IsEqual(named_xy));
}

TEST(TypesCase, EnumTypes) {
auto enum8 = Type::CreateEnum8({{"One", 1}, {"Two", 2}});
ASSERT_EQ(enum8->GetName(), "Enum8('One' = 1, 'Two' = 2)");
Expand Down
Loading