| 1 | #include <assert.h>
|
|---|
| 2 | #include <iostream>
|
|---|
| 3 | #include <peglib.h>
|
|---|
| 4 | #include <ctime>
|
|---|
| 5 |
|
|---|
| 6 | using namespace peg;
|
|---|
| 7 | using namespace std;
|
|---|
| 8 |
|
|---|
| 9 | int main(void) {
|
|---|
| 10 | // (2) Make a parser
|
|---|
| 11 | parser parser;
|
|---|
| 12 | parser.set_logger([](size_t line, size_t col, const string& msg, const string &rule) {
|
|---|
| 13 | cerr << line << ":" << col << ": " << msg << "\n";
|
|---|
| 14 | });
|
|---|
| 15 | auto grammar = (R"---(
|
|---|
| 16 | command_list <- command (";" command)*
|
|---|
| 17 | command <- "color" atom_specifier < "#" [a-f0-9]{6} >
|
|---|
| 18 | atom_specifier <- as_term "&" atom_specifier / as_term "|" atom_specifier / as_term
|
|---|
| 19 | as_term <- "(" atom_specifier ")" zone_selector? / "~" as_term zone_selector? / SELECTOR_NAME / model_list
|
|---|
| 20 | model_list <- model+
|
|---|
| 21 | model <- ("#!" / "#") model_hierarchy ("##" attribute_list)? model_parts* zone_selector? / attribute_list model_parts* zone_selector* / model_parts zone_selector*
|
|---|
| 22 | # should be negative lookbehind for white space before the '.' in the below...
|
|---|
| 23 | model_hierarchy <- < model_range_list (!Space "." !Space model_hierarchy)* >
|
|---|
| 24 | model_range_list <- model_range ("," model_range_list)*
|
|---|
| 25 | model_range <- MODEL_SPEC_START "-" MODEL_SPEC_END / MODEL_SPEC_ANY
|
|---|
| 26 | model_parts <- chain+
|
|---|
| 27 | chain <- "/" part_list ("//" attribute_list)? residue* / "//" attribute_list residue* / residue+
|
|---|
| 28 | residue <- ":" part_list ("::" attribute_list)? atom* / "::" attribute_list atom* / atom+
|
|---|
| 29 | part_list <- PART_RANGE_LIST "," part_list / PART_RANGE_LIST
|
|---|
| 30 | # atom ranges are not allowed
|
|---|
| 31 | atom <- "@" atom_list ("@@" attribute_list)? / "@@" attribute_list
|
|---|
| 32 | atom_list <- ATOM_NAME "," atom_list / ATOM_NAME
|
|---|
| 33 | attribute_list <- attr_test ("," attr_test)*
|
|---|
| 34 | attr_test <- ATTR_NAME ATTR_OPERATOR ATTR_VALUE / ATTR_NAME / "~" ATTR_NAME
|
|---|
| 35 | zone_selector <- ZONE_OPERATOR < [0-9]* "." [0-9]+ >
|
|---|
| 36 | # think about ranges in these character sets
|
|---|
| 37 | ATOM_NAME <- < [^#/:@; \t\n]+ >
|
|---|
| 38 | ATTR_NAME <- < [a-zA-Z_] [a-zA-Z0-9]* >
|
|---|
| 39 | ATTR_OPERATOR <- ">=" | ">" | "<=" | "<" | "==" | "=" | "!==" | "!=" | "<>"
|
|---|
| 40 | EndOfLine <- "\r\n" / "\n" / "\r"
|
|---|
| 41 | Space <- ' ' / '\t' / EndOfLine
|
|---|
| 42 | # Outer token delimiters to prevent automatic whitespace elimination inside quotes
|
|---|
| 43 | ATTR_VALUE <- < '"' < [^"]+ > '"' > / < "'" < [^']+ > "'" > / < [^#/:@,;"']+ >
|
|---|
| 44 | # limit model numbers to 5 digits to avoid conflicts with hex colors
|
|---|
| 45 | MODEL_SPEC <- < < [0-9]{1,5} > ![0-9A-Fa-f] >
|
|---|
| 46 | MODEL_SPEC_ANY <- MODEL_SPEC / "*"
|
|---|
| 47 | MODEL_SPEC_END <- MODEL_SPEC / "end" / "*"
|
|---|
| 48 | MODEL_SPEC_START <- MODEL_SPEC / "start" / "*"
|
|---|
| 49 | RANGE_CHAR <- [^#/:@,;- \t\n]
|
|---|
| 50 | PART_RANGE_LIST <- < "-"? RANGE_CHAR+ ("-" RANGE_CHAR+)? >
|
|---|
| 51 | SELECTOR_NAME <- < [a-zA-Z_][-+a-zA-Z0-9_]* >
|
|---|
| 52 | ZONE_OPERATOR <- "@>" | "@<" | ":>" | ":<" | "/>" | "/<" | "#>" | "#<"
|
|---|
| 53 | %whitespace <- [ \t\r\n]*
|
|---|
| 54 | )---");
|
|---|
| 55 | auto t0 = clock();
|
|---|
| 56 | auto ok = parser.load_grammar(grammar);
|
|---|
| 57 | auto t1 = clock();
|
|---|
| 58 | std::cerr << "load grammar: " << (t1-t0) / (float)CLOCKS_PER_SEC << " seconds\n";
|
|---|
| 59 | assert(ok);
|
|---|
| 60 |
|
|---|
| 61 | assert(static_cast<bool>(parser) == true);
|
|---|
| 62 |
|
|---|
| 63 | // (4) Parse
|
|---|
| 64 | auto do_packrat = true;
|
|---|
| 65 | if (do_packrat)
|
|---|
| 66 | parser.enable_packrat_parsing(); // Enable packrat parsing.
|
|---|
| 67 |
|
|---|
| 68 | string test_cmd = "";
|
|---|
| 69 | test_cmd += "color @@serial_number=1";
|
|---|
| 70 | for (int i = 2; i <= 8956; ++i) {
|
|---|
| 71 | test_cmd += "|@@serial_number=";
|
|---|
| 72 | test_cmd += to_string(i);
|
|---|
| 73 | }
|
|---|
| 74 | test_cmd += " #ff8654";
|
|---|
| 75 | std::cerr << "command string is " << test_cmd.size() << " characters\n";
|
|---|
| 76 |
|
|---|
| 77 |
|
|---|
| 78 | t0 = clock();
|
|---|
| 79 | parser.parse(test_cmd);
|
|---|
| 80 | t1 = clock();
|
|---|
| 81 | std::cerr << "parse (" << (do_packrat ? "" : "no ") << "packrat): " << (t1-t0) / (float)CLOCKS_PER_SEC << " seconds\n";
|
|---|
| 82 | }
|
|---|