Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
3,778 changes: 3,778 additions & 0 deletions bench.exs

Large diffs are not rendered by default.

78 changes: 61 additions & 17 deletions c_src/exml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,41 @@
#include <thread>
#include <vector>

using ustring = std::vector<unsigned char>;
template <typename T>
struct enif_allocator {
using value_type = T;

enif_allocator() noexcept = default;

template <typename U>
enif_allocator(const enif_allocator<U> &) noexcept {}

T *allocate(std::size_t n) {
if (n > std::size_t(-1) / sizeof(T))
throw std::bad_alloc();
void *p = enif_alloc(n * sizeof(T));
if (!p)
throw std::bad_alloc();
return static_cast<T *>(p);
}

void deallocate(T *p, std::size_t) noexcept { enif_free(p); }
};

template <typename T, typename U>
constexpr bool operator==(const enif_allocator<T> &, const enif_allocator<U> &) noexcept {
return true;
}

template <typename T, typename U>
constexpr bool operator!=(const enif_allocator<T> &, const enif_allocator<U> &) noexcept {
return false;
}

template <typename T>
using nif_vector = std::vector<T, enif_allocator<T>>;

using ustring = nif_vector<unsigned char>;

class xml_document {
public:
Expand Down Expand Up @@ -82,12 +116,15 @@ namespace {
} // namespace

struct Parser {
// Trailing bytes after the XML terminator so 128-bit simd skip can load 16 bytes safely.
static constexpr std::size_t kSimdTailPadding = 15;

ustring stream_tag;
std::uint64_t max_element_size = 0;
bool infinite_stream = false;

static thread_local std::vector<unsigned char> buffer;
static thread_local std::vector<ERL_NIF_TERM> term_buffer;
static thread_local nif_vector<unsigned char> buffer;
static thread_local nif_vector<ERL_NIF_TERM> term_buffer;

bool copy_buffer(ErlNifEnv *env, ERL_NIF_TERM buf) {
buffer.clear();
Expand All @@ -107,6 +144,7 @@ struct Parser {
}

buffer.push_back('\0');
buffer.resize(buffer.size() + kSimdTailPadding, '\0');
return true;
}

Expand All @@ -116,8 +154,8 @@ struct Parser {
}
};

thread_local std::vector<unsigned char> Parser::buffer;
thread_local std::vector<ERL_NIF_TERM> Parser::term_buffer;
thread_local nif_vector<unsigned char> Parser::buffer;
thread_local nif_vector<ERL_NIF_TERM> Parser::term_buffer;

struct ParseCtx {
ErlNifEnv *env;
Expand Down Expand Up @@ -170,7 +208,7 @@ ERL_NIF_TERM merge_data_nodes(ParseCtx &ctx,
}

void append_pending_data_nodes(ParseCtx &ctx,
std::vector<ERL_NIF_TERM> &children,
nif_vector<ERL_NIF_TERM> &children,
rapidxml::xml_node<unsigned char> *node,
const std::size_t pending) {
if (pending == 0)
Expand All @@ -186,7 +224,7 @@ ERL_NIF_TERM make_xmlel(ParseCtx &ctx, rapidxml::xml_node<unsigned char> *node);

ERL_NIF_TERM get_children_tuple(ParseCtx &ctx,
rapidxml::xml_node<unsigned char> *node) {
std::vector<ERL_NIF_TERM> &children = Parser::term_buffer;
nif_vector<ERL_NIF_TERM> &children = Parser::term_buffer;
std::size_t begin = children.size();

rapidxml::xml_node<unsigned char> *first_data_node = nullptr;
Expand Down Expand Up @@ -392,18 +430,20 @@ bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children,
return true;
}

using NifPrintBuffer =
rapidxml::PrintBuffer<unsigned char, enif_allocator<unsigned char>, RAPIDXML_STATIC_POOL_SIZE>;

ERL_NIF_TERM node_to_binary(ErlNifEnv *env,
rapidxml::xml_node<unsigned char> &node,
int flags) {
static thread_local std::vector<unsigned char> print_buffer;
static thread_local NifPrintBuffer print_buffer;
print_buffer.clear();

rapidxml::print(std::back_inserter(print_buffer), node, flags);
rapidxml::print(print_buffer, node, flags);

ERL_NIF_TERM ret_binary;
unsigned char *data =
enif_make_new_binary(env, print_buffer.size(), &ret_binary);
std::copy(print_buffer.begin(), print_buffer.end(), data);
unsigned char *data = enif_make_new_binary(env, print_buffer.size(), &ret_binary);
std::memcpy(data, print_buffer.data(), print_buffer.size());
return ret_binary;
}

Expand Down Expand Up @@ -546,8 +586,9 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int,
parseStreamOpen();
} else if (has_stream_closing_tag(parser, offset)) {
doc.clear();
// no data after closing tag
result.rest = &*Parser::buffer.rbegin();
// no data after closing tag (rest at logical terminator, not SIMD tail padding)
result.rest =
Parser::buffer.data() + Parser::buffer.size() - Parser::kSimdTailPadding - 1;
element = make_stream_end_tuple(ctx);
} else {
parseElement();
Expand All @@ -561,7 +602,8 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int,
if (result.eof) {
// Return an error if an incomplete element has at least max_element_size characters.
if (parser->max_element_size &&
Parser::buffer.size() - offset > parser->max_element_size) {
Parser::buffer.size() - Parser::kSimdTailPadding - offset >
parser->max_element_size) {
error_msg = "element too big";
} else {
result.rest = Parser::buffer.data() + offset;
Expand All @@ -572,8 +614,10 @@ static ERL_NIF_TERM parse_next(ErlNifEnv *env, int,
}

if (!error_msg) {
// Return an error when null character is found.
std::size_t rest_size = &Parser::buffer.back() - result.rest;
// Return an error when null character is found (compare against logical end, excluding SIMD padding).
const std::size_t logical_size = Parser::buffer.size() - Parser::kSimdTailPadding;
const unsigned char *logical_last = Parser::buffer.data() + logical_size - 1;
std::size_t rest_size = logical_last - result.rest;
if (std::strlen(reinterpret_cast<const char*>(result.rest)) != rest_size)
error_msg = "null character found in buffer";
}
Expand Down
Loading