blob: 65bcd78b9a92c08638df4d3fce17b3cc49026901 [file] [log] [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "tools/cddl/parse.h"
#include <algorithm>
#include <cctype>
#include <cstring>
#include <iostream>
#include <iterator>
#include <memory>
#include <sstream>
#include <utility>
#include <vector>
#include "tools/cddl/logging.h"
static_assert(sizeof(std::string_view::size_type) == sizeof(size_t),
"We assume string_view's size_type is the same as size_t. If "
"not, the following file needs to be refactored");
// All of the parsing methods in this file that operate on Parser are named
// either Parse* or Skip* and are named according to the CDDL grammar in
// grammar.abnf. Similarly, methods like ParseMemberKey1 attempt to parse the
// first choice in the memberkey rule.
struct Parser {
std::string_view data;
std::vector<std::unique_ptr<AstNode>> nodes;
};
std::string_view StripLeadingAsciiWhitespace(std::string_view view) {
while (!view.empty() &&
std::isspace(static_cast<unsigned char>(view.front()))) {
view.remove_prefix(1);
}
return view;
}
AstNode* AddNode(Parser* p,
AstNode::Type type,
std::string_view text,
AstNode* children = nullptr) {
p->nodes.push_back(std::make_unique<AstNode>());
AstNode* node = p->nodes.back().get();
node->children = children;
node->sibling = nullptr;
node->type = type;
node->text = text;
return node;
}
bool IsBinaryDigit(char x) {
return '0' == x || x == '1';
}
// Determines if the given character matches regex '[a-zA-Z@_$]'.
bool IsExtendedAlpha(char x) {
return std::isalpha(static_cast<unsigned char>(x)) || x == '@' || x == '_' ||
x == '$';
}
bool IsNewline(char x) {
return x == '\r' || x == '\n';
}
bool IsWhitespaceOrSemicolon(char c) {
return c == ' ' || c == ';' || c == '\r' || c == '\n';
}
std::string_view SkipNewline(std::string_view view) {
size_t index = 0;
while (index < view.length() && IsNewline(view[index])) {
++index;
}
return view.substr(index);
}
std::optional<std::string_view> ParseTypeKeyFromComment(Parser* p);
// Skips over a comment that makes up the remainder of the current line.
std::string_view SkipComment(std::string_view view, bool skip_type_key = true) {
size_t index = 0;
if (!view.empty() && view[index] == ';') {
if (!skip_type_key) {
Parser p = {view};
if (ParseTypeKeyFromComment(&p).has_value()) {
return view;
}
}
++index;
while (index < view.length() && !IsNewline(view[index])) {
CHECK(std::isprint(static_cast<unsigned char>(view[index])));
++index;
}
while (index < view.length() && IsNewline(view[index])) {
++index;
}
}
return view.substr(index);
}
void SkipWhitespaceImpl(Parser* p, bool skip_type_key = true) {
std::string_view view = p->data;
std::string_view new_view;
while (true) {
new_view = SkipComment(view, skip_type_key);
if (new_view.data() == view.data()) {
new_view = StripLeadingAsciiWhitespace(view);
}
if (new_view.data() == view.data() && new_view.size() == view.size()) {
break;
}
view = new_view;
}
p->data = new_view;
}
void SkipWhitespace(Parser* p, bool skip_comments = true) {
if (!skip_comments) {
p->data = StripLeadingAsciiWhitespace(p->data);
return;
}
SkipWhitespaceImpl(p);
}
// This is only used for the start of the file to avoid losing the first type
// key.
void SkipStartWhitespace(Parser* p) {
SkipWhitespaceImpl(p, false);
}
bool TrySkipNewline(Parser* p) {
std::string_view new_view = SkipNewline(p->data);
bool is_changed = p->data.size() != new_view.size();
p->data = new_view;
return is_changed;
}
bool TrySkipCharacter(Parser* p, char c) {
if (!p->data.empty() && p->data.front() == c) {
p->data.remove_prefix(1);
return true;
}
return false;
}
enum class AssignType {
kInvalid = -1,
kAssign,
kAssignT,
kAssignG,
};
AssignType ParseAssignmentType(Parser* p) {
std::string_view it = p->data;
if (it.empty()) {
return AssignType::kInvalid;
}
if (it[0] == '=') {
p->data.remove_prefix(1);
return AssignType::kAssign;
} else if (it[0] == '/') {
if (it.size() >= 3 && it[1] == '/' && it[2] == '=') {
p->data.remove_prefix(3);
return AssignType::kAssignG;
} else if (it.size() >= 2 && it[1] == '=') {
p->data.remove_prefix(2);
return AssignType::kAssignT;
}
}
return AssignType::kInvalid;
}
AstNode* ParseType1(Parser* p);
AstNode* ParseType(Parser* p, bool skip_comments = true);
AstNode* ParseId(Parser* p);
void SkipUint(Parser* p) {
std::string_view view = p->data;
bool is_binary = false;
size_t index = 0;
if (view.starts_with("0b")) {
is_binary = true;
index = 2;
} else if (view.starts_with("0x")) {
index = 2;
}
while (index < view.length() &&
std::isdigit(static_cast<unsigned char>(view[index]))) {
if (is_binary) {
CHECK(IsBinaryDigit(view[index]));
}
++index;
}
p->data = view.substr(index);
}
AstNode* ParseNumber(Parser* p) {
Parser p_speculative{p->data};
if (p_speculative.data.empty()) {
return nullptr;
}
if (!std::isdigit(static_cast<unsigned char>(p_speculative.data[0])) &&
p_speculative.data[0] != '-') {
// TODO(btolsch): Add support for hexfloat, fraction, exponent.
return nullptr;
}
if (p_speculative.data[0] == '-') {
p_speculative.data.remove_prefix(1);
}
SkipUint(&p_speculative);
AstNode* node =
AddNode(p, AstNode::Type::kNumber,
p->data.substr(0, p->data.size() - p_speculative.data.size()));
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
AstNode* ParseText(Parser* p) {
return nullptr;
}
AstNode* ParseBytes(Parser* p) {
return nullptr;
}
// Returns whether `c` could be the first character in a valid "value" string.
// This is not a guarantee however, since 'h' and 'b' could also indicate the
// start of an ID, but value needs to be tried first.
bool IsValue(char c) {
return (c == '-' ||
std::isdigit(static_cast<unsigned char>(c)) || // FIRST(number)
c == '"' || // FIRST(text)
c == '\'' || c == 'h' || c == 'b'); // FIRST(bytes)
}
AstNode* ParseValue(Parser* p) {
AstNode* node = ParseNumber(p);
if (!node) {
node = ParseText(p);
}
if (!node) {
ParseBytes(p);
}
return node;
}
// Determines whether an occurrence operator (such as '*' or '?') prefacing
// the group definition occurs before the next whitespace character, and
// creates a new Occurrence node if so.
AstNode* ParseOccur(Parser* p) {
Parser p_speculative{p->data};
if (p_speculative.data.empty()) {
return nullptr;
}
if (p_speculative.data[0] == '?' || p_speculative.data[0] == '+') {
p_speculative.data.remove_prefix(1);
} else {
SkipUint(&p_speculative);
if (p_speculative.data.empty() || p_speculative.data[0] != '*') {
return nullptr;
}
p_speculative.data.remove_prefix(1);
SkipUint(&p_speculative);
}
AstNode* node =
AddNode(p, AstNode::Type::kOccur,
p->data.substr(0, p->data.size() - p_speculative.data.size()));
p->data = p_speculative.data;
return node;
}
std::optional<std::string_view> ParseTypeKeyFromComment(Parser* p) {
Parser p_speculative{p->data};
if (!TrySkipCharacter(&p_speculative, ';')) {
return std::nullopt;
}
SkipWhitespace(&p_speculative, false);
constexpr std::string_view kTypeKeyPrefix = "type key";
if (!p_speculative.data.starts_with(kTypeKeyPrefix)) {
return std::nullopt;
}
p_speculative.data.remove_prefix(kTypeKeyPrefix.size());
SkipWhitespace(&p_speculative, false);
Parser p_speculative2{p_speculative.data};
while (!p_speculative2.data.empty() &&
std::isdigit(static_cast<unsigned char>(p_speculative2.data[0]))) {
p_speculative2.data.remove_prefix(1);
}
auto result = p_speculative.data.substr(
0, p_speculative.data.size() - p_speculative2.data.size());
p->data = p_speculative2.data;
return result;
}
AstNode* ParseMemberKeyFromComment(Parser* p) {
Parser p_speculative{p->data};
if (!TrySkipCharacter(&p_speculative, ';')) {
return nullptr;
}
SkipWhitespace(&p_speculative, false);
AstNode* value = ParseId(&p_speculative);
if (!value) {
return nullptr;
}
while (!p_speculative.data.empty() && (p_speculative.data.front() == ' ' ||
p_speculative.data.front() == '\t')) {
p_speculative.data.remove_prefix(1);
}
if (!TrySkipNewline(&p_speculative)) {
return nullptr;
}
AstNode* node = AddNode(p, AstNode::Type::kMemberKey, value->text, value);
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
AstNode* ParseMemberKey1(Parser* p) {
Parser p_speculative{p->data};
if (!ParseType1(&p_speculative)) {
return nullptr;
}
SkipWhitespace(&p_speculative);
if (p_speculative.data.size() < 2 || p_speculative.data[0] != '=' ||
p_speculative.data[1] != '>') {
return nullptr;
}
p_speculative.data.remove_prefix(2);
AstNode* node =
AddNode(p, AstNode::Type::kMemberKey,
p->data.substr(0, p->data.size() - p_speculative.data.size()));
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
AstNode* ParseMemberKey2(Parser* p) {
Parser p_speculative{p->data};
AstNode* id = ParseId(&p_speculative);
if (!id) {
return nullptr;
}
SkipWhitespace(&p_speculative);
if (p_speculative.data.empty() || p_speculative.data[0] != ':') {
return nullptr;
}
p_speculative.data.remove_prefix(1);
AstNode* node = AddNode(
p, AstNode::Type::kMemberKey,
p->data.substr(0, p->data.size() - p_speculative.data.size()), id);
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
AstNode* ParseMemberKey3(Parser* p) {
Parser p_speculative{p->data};
AstNode* value = ParseValue(&p_speculative);
if (!value) {
return nullptr;
}
SkipWhitespace(&p_speculative);
if (p_speculative.data.empty() || p_speculative.data[0] != ':') {
return nullptr;
}
p_speculative.data.remove_prefix(1);
AstNode* node = AddNode(
p, AstNode::Type::kMemberKey,
p->data.substr(0, p->data.size() - p_speculative.data.size()), value);
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
// Iteratively tries all valid member key formats, retuning a Node representing
// the member key if found or nullptr if not.
AstNode* ParseMemberKey(Parser* p) {
AstNode* node = ParseMemberKey1(p);
if (!node) {
node = ParseMemberKey2(p);
}
if (!node) {
node = ParseMemberKey3(p);
}
return node;
}
AstNode* ParseGroupEntry(Parser* p);
bool SkipOptionalComma(Parser* p) {
SkipWhitespace(p);
if (!p->data.empty() && p->data[0] == ',') {
p->data.remove_prefix(1);
SkipWhitespace(p);
}
return true;
}
// Parse the group contained inside of other brackets. Since the brackets around
// the group are optional inside of other brackets, we can't directly call
// ParseGroupEntry(...) and instead need this wrapper around it.
AstNode* ParseGroupChoice(Parser* p) {
Parser p_speculative{p->data};
AstNode* tail = nullptr;
AstNode* group_node =
AddNode(&p_speculative, AstNode::Type::kGrpchoice, std::string_view());
std::string_view group_node_text = p_speculative.data;
while (true) {
std::string_view orig = p_speculative.data;
AstNode* group_entry = ParseGroupEntry(&p_speculative);
if (!group_entry) {
p_speculative.data = orig;
if (!group_node->children) {
return nullptr;
}
group_node->text = group_node_text.substr(
0, group_node_text.size() - p_speculative.data.size());
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return group_node;
}
if (!group_node->children) {
group_node->children = group_entry;
}
if (tail) {
tail->sibling = group_entry;
}
tail = group_entry;
if (!SkipOptionalComma(&p_speculative)) {
return nullptr;
}
}
}
AstNode* ParseGroup(Parser* p) {
std::string_view orig = p->data;
AstNode* group_choice = ParseGroupChoice(p);
if (!group_choice) {
return nullptr;
}
return AddNode(p, AstNode::Type::kGroup,
orig.substr(0, orig.size() - p->data.size()), group_choice);
}
// Parse optional range operator .. (inlcusive) or ... (exclusive)
// ABNF rule: rangeop = "..." / ".."
AstNode* ParseRangeop(Parser* p) {
std::string_view view = p->data;
if (view.starts_with("...")) {
// rangeop ...
p->data.remove_prefix(3);
return AddNode(p, AstNode::Type::kRangeop, view.substr(0, 3));
} else if (view.starts_with("..")) {
// rangeop ..
p->data.remove_prefix(2);
return AddNode(p, AstNode::Type::kRangeop, view.substr(0, 2));
}
return nullptr;
}
// Parse optional control operator .id
// ABNF rule: ctlop = "." id
AstNode* ParseCtlop(Parser* p) {
std::string_view view = p->data;
std::string_view initial_data = p->data;
if (!view.starts_with(".")) {
return nullptr;
}
p->data.remove_prefix(1);
AstNode* id = ParseId(p);
if (!id) {
return nullptr;
}
return AddNode(p, AstNode::Type::kCtlop,
view.substr(0, initial_data.size() - p->data.size()), id);
}
AstNode* ParseType2(Parser* p) {
std::string_view orig = p->data;
std::string_view it = p->data;
if (it.empty()) {
return nullptr;
}
AstNode* node = AddNode(p, AstNode::Type::kType2, std::string_view());
if (IsValue(it[0])) {
AstNode* value = ParseValue(p);
if (!value) {
if (it[0] == 'h' || it[0] == 'b') {
AstNode* id = ParseId(p);
if (!id) {
return nullptr;
}
id->type = AstNode::Type::kTypename;
node->children = id;
} else {
return nullptr;
}
} else {
node->children = value;
}
} else if (IsExtendedAlpha(it[0])) {
AstNode* id = ParseId(p);
if (!id) {
return nullptr;
}
if (!p->data.empty() && p->data[0] == '<') {
std::cerr << "It looks like you're trying to use a generic argument, "
"which we don't support"
<< std::endl;
return nullptr;
}
id->type = AstNode::Type::kTypename;
node->children = id;
} else if (it[0] == '(') {
p->data.remove_prefix(1);
SkipWhitespace(p);
if (!p->data.empty() && p->data[0] == ')') {
std::cerr << "It looks like you're trying to provide an empty Type (), "
"which we don't support"
<< std::endl;
return nullptr;
}
AstNode* type = ParseType(p);
if (!type) {
return nullptr;
}
SkipWhitespace(p);
if (p->data.empty() || p->data[0] != ')') {
return nullptr;
}
p->data.remove_prefix(1);
node->children = type;
} else if (it[0] == '{') {
p->data.remove_prefix(1);
SkipWhitespace(p);
if (!p->data.empty() && p->data[0] == '}') {
std::cerr << "It looks like you're trying to provide an empty Group {}, "
"which we don't support"
<< std::endl;
return nullptr;
}
AstNode* group = ParseGroup(p);
if (!group) {
return nullptr;
}
SkipWhitespace(p);
if (p->data.empty() || p->data[0] != '}') {
return nullptr;
}
p->data.remove_prefix(1);
node->children = group;
} else if (it[0] == '[') {
p->data.remove_prefix(1);
SkipWhitespace(p);
AstNode* group = ParseGroup(p);
if (!group) {
return nullptr;
}
SkipWhitespace(p);
if (p->data.empty() || p->data[0] != ']') {
return nullptr;
}
p->data.remove_prefix(1);
node->children = group;
} else if (it[0] == '~') {
p->data.remove_prefix(1);
SkipWhitespace(p);
if (!ParseId(p)) {
return nullptr;
}
if (!p->data.empty() && p->data[0] == '<') {
std::cerr << "It looks like you're trying to use a generic argument, "
"which we don't support"
<< std::endl;
return nullptr;
}
} else if (it[0] == '&') {
p->data.remove_prefix(1);
SkipWhitespace(p);
if (!p->data.empty() && p->data[0] == '(') {
p->data.remove_prefix(1);
SkipWhitespace(p);
if (!p->data.empty() && p->data[0] == ')') {
std::cerr << "It looks like you're trying to provide an empty Type &(),"
" which we don't support"
<< std::endl;
return nullptr;
}
AstNode* group = ParseGroup(p);
if (!group) {
return nullptr;
}
SkipWhitespace(p);
if (p->data.empty() || p->data[0] != ')') {
return nullptr;
}
p->data.remove_prefix(1);
node->children = group;
} else {
AstNode* id = ParseId(p);
if (id) {
if (!p->data.empty() && p->data[0] == '<') {
std::cerr << "It looks like you're trying to use a generic argument, "
"which we don't support"
<< std::endl;
return nullptr;
}
id->type = AstNode::Type::kGroupname;
node->children = id;
} else {
return nullptr;
}
}
} else if (it[0] == '#') {
p->data.remove_prefix(1);
if (!p->data.empty() && p->data[0] == '6') {
p->data.remove_prefix(1);
if (!p->data.empty() && p->data[0] == '.') {
p->data.remove_prefix(1);
SkipUint(p);
}
if (p->data.empty() || p->data[0] != '(') {
return nullptr;
}
p->data.remove_prefix(1);
SkipWhitespace(p);
AstNode* type = ParseType(p);
if (!type) {
return nullptr;
}
SkipWhitespace(p);
if (p->data.empty() || p->data[0] != ')') {
return nullptr;
}
p->data.remove_prefix(1);
node->children = type;
} else if (!p->data.empty() &&
std::isdigit(static_cast<unsigned char>(p->data[0]))) {
std::cerr << "# MAJOR unimplemented" << std::endl;
return nullptr;
} else {
// '#' followed by something else? "p->data = it" logic in original was:
// it was incremented.
// If we fall through, p->data is updated.
// Original: p->data = it; (it was incremented at start of # block)
// Here p->data is already incremented.
}
} else {
return nullptr;
}
node->text = orig.substr(0, orig.size() - p->data.size());
return node;
}
AstNode* ParseType1(Parser* p) {
std::string_view orig = p->data;
AstNode* type2 = ParseType2(p);
if (!type2) {
return nullptr;
}
SkipWhitespace(p, false);
AstNode* rangeop_or_ctlop = ParseRangeop(p);
if (!rangeop_or_ctlop) {
rangeop_or_ctlop = ParseCtlop(p);
}
if (rangeop_or_ctlop) {
SkipWhitespace(p, false);
AstNode* param = ParseType2(p);
if (!param) {
return nullptr;
}
type2->sibling = rangeop_or_ctlop;
rangeop_or_ctlop->sibling = param;
}
return AddNode(p, AstNode::Type::kType1,
orig.substr(0, orig.size() - p->data.size()), type2);
}
// Different valid types for a call are specified as type1 / type2, so we split
// at the '/' character and process each allowed type separately.
AstNode* ParseType(Parser* p, bool skip_comments) {
Parser p_speculative{p->data};
// Parse all allowed types into a linked list starting in type1's sibling ptr.
AstNode* type1 = ParseType1(&p_speculative);
if (!type1) {
return nullptr;
}
SkipWhitespace(&p_speculative, skip_comments);
AstNode* tail = type1;
while (!p_speculative.data.empty() && p_speculative.data[0] == '/') {
p_speculative.data.remove_prefix(1);
SkipWhitespace(&p_speculative, skip_comments);
AstNode* next_type1 = ParseType1(&p_speculative);
if (!next_type1) {
return nullptr;
}
tail->sibling = next_type1;
tail = next_type1;
SkipWhitespace(&p_speculative, skip_comments);
}
// Create a new AstNode with all parsed types.
AstNode* node = AddNode(
p, AstNode::Type::kType,
p->data.substr(0, p->data.size() - p_speculative.data.size()), type1);
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
AstNode* ParseId(Parser* p) {
std::string_view id = p->data;
// If the id doesnt start with a valid name character, return null.
if (id.empty() || !IsExtendedAlpha(id[0])) {
return nullptr;
}
// Read through the name character by character and make sure it's valid.
size_t index = 1;
while (true) {
if (index < id.size() && (id[index] == '-' || id[index] == '.')) {
++index;
if (index < id.size() && !IsExtendedAlpha(id[index]) &&
!std::isdigit(static_cast<unsigned char>(id[index]))) {
return nullptr;
}
++index;
} else if (index < id.size() &&
(IsExtendedAlpha(id[index]) ||
std::isdigit(static_cast<unsigned char>(id[index])))) {
++index;
} else {
break;
}
}
// Create and return a new node with the parsed data.
AstNode* node = AddNode(p, AstNode::Type::kId, p->data.substr(0, index));
p->data.remove_prefix(index);
return node;
}
AstNode* UpdateNodesForGroupEntry(Parser* p,
Parser* p_speculative,
AstNode* occur,
AstNode* member_key,
AstNode* type) {
AstNode* node = AddNode(p, AstNode::Type::kGrpent, std::string_view());
if (occur) {
node->children = occur;
if (member_key) {
occur->sibling = member_key;
member_key->sibling = type;
} else {
occur->sibling = type;
}
} else if (member_key) {
node->children = member_key;
member_key->sibling = type;
} else {
node->children = type;
}
node->text = p->data.substr(0, p->data.size() - p_speculative->data.size());
p->data = p_speculative->data;
std::move(p_speculative->nodes.begin(), p_speculative->nodes.end(),
std::back_inserter(p->nodes));
return node;
}
// Parse a group entry of form <id_num>: <type> ; <name>
AstNode* ParseGroupEntryWithNameInComment(Parser* p) {
Parser p_speculative{p->data};
AstNode* occur = ParseOccur(&p_speculative);
if (occur) {
SkipWhitespace(&p_speculative, false);
}
AstNode* member_key_num = ParseValue(&p_speculative);
if (!member_key_num) {
return nullptr;
}
SkipWhitespace(&p_speculative, false);
if (p_speculative.data.empty() || p_speculative.data[0] != ':') {
return nullptr;
}
p_speculative.data.remove_prefix(1);
SkipWhitespace(&p_speculative, false);
AstNode* type = ParseType(&p_speculative, false);
if (!type) {
return nullptr;
}
SkipWhitespace(&p_speculative, false);
AstNode* member_key = ParseMemberKeyFromComment(&p_speculative);
if (!member_key) {
return nullptr;
}
member_key->integer_member_key_text = member_key_num->text;
return UpdateNodesForGroupEntry(p, &p_speculative, occur, member_key, type);
}
AstNode* ParseGroupEntryWithNameAsId(Parser* p) {
Parser p_speculative{p->data};
AstNode* occur = ParseOccur(&p_speculative);
if (occur) {
SkipWhitespace(&p_speculative);
}
AstNode* member_key = ParseMemberKey(&p_speculative);
if (member_key) {
SkipWhitespace(&p_speculative);
}
AstNode* type = ParseType(&p_speculative);
if (!type) {
return nullptr;
}
return UpdateNodesForGroupEntry(p, &p_speculative, occur, member_key, type);
}
// NOTE: This should probably never be hit, why is it in the grammar?
AstNode* ParseGroupEntryWithGroupReference(Parser* p) {
Parser p_speculative{p->data};
// Check for an occurance indicator ('?', '*', "+") before the sub-group
// definition.
AstNode* occur = ParseOccur(&p_speculative);
if (occur) {
SkipWhitespace(&p_speculative);
}
// Parse the ID of the sub-group.
AstNode* id = ParseId(&p_speculative);
if (!id) {
return nullptr;
}
id->type = AstNode::Type::kGroupname;
if (!p_speculative.data.empty() && p_speculative.data[0] == '<') {
std::cerr << "It looks like you're trying to use a generic argument, "
"which we don't support"
<< std::endl;
return nullptr;
}
// Create a new node containing this sub-group reference.
AstNode* node = AddNode(p, AstNode::Type::kGrpent, std::string_view());
if (occur) {
occur->sibling = id;
node->children = occur;
} else {
node->children = id;
}
node->text = p->data.substr(0, p->data.size() - p_speculative.data.size());
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
// Recursively parse a group entry that's an inline-defined group of the form
// '(...<some contents>...)'.
AstNode* ParseGroupEntryWithInlineGroupDefinition(Parser* p) {
Parser p_speculative{p->data};
AstNode* occur = ParseOccur(&p_speculative);
if (occur) {
SkipWhitespace(&p_speculative);
}
if (p_speculative.data.empty() || p_speculative.data[0] != '(') {
return nullptr;
}
p_speculative.data.remove_prefix(1);
SkipWhitespace(&p_speculative);
AstNode* group = ParseGroup(&p_speculative); // Recursive call here.
if (!group) {
return nullptr;
}
SkipWhitespace(&p_speculative);
if (p_speculative.data.empty() || p_speculative.data[0] != ')') {
return nullptr;
}
p_speculative.data.remove_prefix(1);
AstNode* node = AddNode(p, AstNode::Type::kGrpent, std::string_view());
if (occur) {
node->children = occur;
occur->sibling = group;
} else {
node->children = group;
}
node->text = p->data.substr(0, p->data.size() - p_speculative.data.size());
p->data = p_speculative.data;
std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
std::back_inserter(p->nodes));
return node;
}
// Recursively parse the group assignemnt.
AstNode* ParseGroupEntry(Parser* p) {
// Parse a group entry of form '#: type ; name'
AstNode* node = ParseGroupEntryWithNameInComment(p);
// Parse a group entry of form 'id: type'.
if (!node) {
node = ParseGroupEntryWithNameAsId(p);
}
// Parse a group entry of form 'subgroupName'.
if (!node) {
node = ParseGroupEntryWithGroupReference(p);
}
// Parse a group entry of the form: '(' <some contents> ')'.
// NOTE: This is the method hit during the top-level group parsing, and the
// recursive call occurs inside this method.
if (!node) {
node = ParseGroupEntryWithInlineGroupDefinition(p);
}
// Return the results of the recursive call.
return node;
}
AstNode* ParseRule(Parser* p) {
std::string_view start = p->data;
// Parse the type key, if it's present
std::optional<std::string_view> type_key = ParseTypeKeyFromComment(p);
SkipWhitespace(p);
// Use the parser to extract the id and data.
AstNode* id = ParseId(p);
if (!id) {
Logger::Error("No id found!");
return nullptr;
}
if (!p->data.empty() && p->data[0] == '<') {
std::cerr << "It looks like you're trying to use a generic parameter, "
"which we don't support"
<< std::endl;
return nullptr;
}
// Determine the type of assignment being done to this variable name (ie '=').
SkipWhitespace(p);
std::string_view assign_start = p->data;
AssignType assign_type = ParseAssignmentType(p);
if (assign_type != AssignType::kAssign) {
Logger::Error("No assignment operator found! assign_type: " +
std::to_string(static_cast<int>(assign_type)));
return nullptr;
}
AstNode* assign_node =
AddNode(p,
(assign_type == AssignType::kAssign) ? AstNode::Type::kAssign
: (assign_type == AssignType::kAssignT) ? AstNode::Type::kAssignT
: AstNode::Type::kAssignG,
assign_start.substr(0, assign_start.size() - p->data.size()));
id->sibling = assign_node;
// Parse the object type being assigned.
SkipWhitespace(p);
AstNode* type = ParseType(p, false); // Try to parse it as a type.
id->type = AstNode::Type::kTypename;
if (!type) { // If it's not a type, try and parse it as a group.
type = ParseGroupEntry(p);
id->type = AstNode::Type::kGroupname;
}
if (!type) { // if it's not a type or a group, exit as failure.
Logger::Error("No node type found!");
return nullptr;
}
assign_node->sibling = type;
SkipWhitespace(p, false);
// Return the results.
auto rule_node = AddNode(p, AstNode::Type::kRule,
start.substr(0, start.size() - p->data.size()), id);
rule_node->type_key = type_key;
return rule_node;
}
// Iteratively parse the CDDL spec into a tree structure.
ParseResult ParseCddl(std::string_view data) {
while (!data.empty() && data.back() == 0) {
data.remove_suffix(1);
}
if (data.empty()) {
return {nullptr, {}};
}
Parser p = {data};
SkipStartWhitespace(&p);
AstNode* root = nullptr;
AstNode* tail = nullptr;
do {
AstNode* next = ParseRule(&p);
if (!next) {
Logger::Error(
std::string("Failed to parse next node. Failed starting at: '") +
std::string(p.data) + "'");
return {nullptr, {}};
} else {
Logger::Log(std::string("Processed text \"") + std::string(next->text) +
"\" into node: ");
DumpAst(next);
}
if (!root) {
root = next;
}
if (tail) {
tail->sibling = next;
}
tail = next;
} while (!p.data.empty());
return {root, std::move(p.nodes)};
}
// Recursively print out the AstNode graph.
void DumpAst(AstNode* node, int indent_level) {
while (node) {
// Prefix with '-'s so the levels of the graph are clear.
std::string node_text = "";
for (int i = 0; i <= indent_level; ++i) {
node_text += "--";
}
// Print the type.
switch (node->type) {
case AstNode::Type::kRule:
node_text += "kRule";
break;
case AstNode::Type::kTypename:
node_text += "kTypename";
break;
case AstNode::Type::kGroupname:
node_text += "kGroupname";
break;
case AstNode::Type::kAssign:
node_text += "kAssign";
break;
case AstNode::Type::kAssignT:
node_text += "kAssignT";
break;
case AstNode::Type::kAssignG:
node_text += "kAssignG";
break;
case AstNode::Type::kType:
node_text += "kType";
break;
case AstNode::Type::kGrpent:
node_text += "kGrpent";
break;
case AstNode::Type::kType1:
node_text += "kType1";
break;
case AstNode::Type::kType2:
node_text += "kType2";
break;
case AstNode::Type::kValue:
node_text += "kValue";
break;
case AstNode::Type::kGroup:
node_text += "kGroup";
break;
case AstNode::Type::kUint:
node_text += "kUint";
break;
case AstNode::Type::kDigit:
node_text += "kDigit";
break;
case AstNode::Type::kRangeop:
node_text += "kRangeop";
break;
case AstNode::Type::kCtlop:
node_text += "kCtlop";
break;
case AstNode::Type::kGrpchoice:
node_text += "kGrpchoice";
break;
case AstNode::Type::kOccur:
node_text += "kOccur";
break;
case AstNode::Type::kMemberKey:
node_text += "kMemberKey";
break;
case AstNode::Type::kId:
node_text += "kId";
break;
case AstNode::Type::kNumber:
node_text += "kNumber";
break;
case AstNode::Type::kText:
node_text += "kText";
break;
case AstNode::Type::kBytes:
node_text += "kBytes";
break;
case AstNode::Type::kBool:
node_text += "kBool";
break;
case AstNode::Type::kFloat:
node_text += "kFloat";
break;
case AstNode::Type::kFloat64:
node_text += "kFloat64";
break;
case AstNode::Type::kInt:
node_text += "kInt";
break;
case AstNode::Type::kOther:
node_text += "kOther";
break;
}
if (node->type_key != std::nullopt) {
node_text +=
" (type key=\"" + std::string(node->type_key.value()) + "\")";
}
node_text += ": ";
// Print the contents.
int size = static_cast<int>(node->text.size());
std::string_view text = node->text.data();
for (int i = 0; i < size; ++i) {
if (text[i] == ' ' || text[i] == '\n') {
node_text += " ";
while (i < size - 1 && (text[i + 1] == ' ' || text[i + 1] == '\n')) {
++i;
}
continue;
} else {
node_text += text[i];
}
}
Logger::Log(node_text);
// Recursively print children then iteratively print siblings.
DumpAst(node->children, indent_level + 1);
node = node->sibling;
}
}