From 3bee8b9acf2fd406e53df239a58c7257b3e879b7 Mon Sep 17 00:00:00 2001 From: Ian Gulliver Date: Thu, 26 Jun 2014 22:48:03 -0700 Subject: [PATCH] Checkpoint: working parsing --- grammars/mediawiki.js | 50 ++--- recentrunes.js | 438 +++++++++++++++++++++--------------------- test.js | 15 +- verify.sh | 15 ++ 4 files changed, 270 insertions(+), 248 deletions(-) create mode 100755 verify.sh diff --git a/grammars/mediawiki.js b/grammars/mediawiki.js index 59f9589..02675b1 100644 --- a/grammars/mediawiki.js +++ b/grammars/mediawiki.js @@ -1,27 +1,27 @@ var mediawiki = { - 'b': [rr.Literal("'''"), rr.Ref('wikichunk'), rr.Literal("'''")], - 'del': [rr.Literal(''), rr.Ref('wikichunk'), rr.Literal('')], - 'h2': [rr.StartOfLine(), rr.Literal('== '), rr.SingleLineText(), rr.Literal(' =='), rr.EndOfLine()], - 'h3': [rr.StartOfLine(), rr.Literal('=== '), rr.SingleLineText(), rr.Literal(' ==='), rr.EndOfLine()], - 'h4': [rr.StartOfLine(), rr.Literal('==== '), rr.SingleLineText(), rr.Literal(' ===='), rr.EndOfLine()], - 'h5': [rr.StartOfLine(), rr.Literal('===== '), rr.SingleLineText(), rr.Literal(' ====='), rr.EndOfLine()], - 'h6': [rr.StartOfLine(), rr.Literal('====== '), rr.SingleLineText(), rr.Literal(' ======'), rr.EndOfLine()], - 'hr': [rr.StartOfLine(), rr.Literal('----'), rr.EndOfLine()], - 'i': [rr.Literal("''"), rr.Ref('wikichunk'), rr.Literal("''")], - 'nowiki': [rr.Literal(''), rr.MultiLineText(), rr.Literal('')], - 'text': [rr.MultiLineText()], - 'wikichunk': [rr.Or( - // 'b', - // 'del', - // 'h2', - 'h3', - // 'h4', - // 'h5', - // 'h6', - // 'hr', - // 'i', - // 'nowiki', - 'text' - )], - 'wikidoc': [rr.ZeroOrMore('wikichunk')], + 'b': rr.Node('b', rr.Sequence(rr.Literal("'''"), rr.Ref('wikichunk'), rr.Literal("'''"))), + 'del': rr.Sequence(rr.Literal(''), rr.Ref('wikichunk'), rr.Literal('')), + 'h2': rr.Sequence(rr.StartOfLine(), rr.Literal('== '), rr.SingleLineText(), rr.Literal(' =='), rr.EndOfLine()), + 'h3': rr.Node('h3', rr.Sequence(rr.StartOfLine(), rr.Literal('=== '), rr.SingleLineText(), rr.Literal(' ==='), rr.EndOfLine())), + 'h4': rr.Sequence(rr.StartOfLine(), rr.Literal('==== '), rr.SingleLineText(), rr.Literal(' ===='), rr.EndOfLine()), + 'h5': rr.Sequence(rr.StartOfLine(), rr.Literal('===== '), rr.SingleLineText(), rr.Literal(' ====='), rr.EndOfLine()), + 'h6': rr.Sequence(rr.StartOfLine(), rr.Literal('====== '), rr.SingleLineText(), rr.Literal(' ======'), rr.EndOfLine()), + 'hr': rr.Sequence(rr.StartOfLine(), rr.Literal('----'), rr.EndOfLine()), + 'i': rr.Sequence(rr.Literal("''"), rr.Ref('wikichunk'), rr.Literal("''")), + 'nowiki': rr.Sequence(rr.Literal(''), rr.MultiLineText(), rr.Literal('')), + 'text': rr.MultiLineText(), + 'wikichunk': rr.Or( + rr.Ref('b'), + rr.Ref('del'), + rr.Ref('h2'), + rr.Ref('h3'), + rr.Ref('h4'), + rr.Ref('h5'), + rr.Ref('h6'), + rr.Ref('hr'), + rr.Ref('i'), + rr.Ref('nowiki'), + rr.Ref('text') + ), + 'wikidoc': rr.Node('wikidoc', rr.ZeroOrMore(rr.Ref('wikichunk'))), }; diff --git a/recentrunes.js b/recentrunes.js index 468e639..c37d3a3 100644 --- a/recentrunes.js +++ b/recentrunes.js @@ -1,6 +1,19 @@ var rr = {}; +rr.iterableFromArray_ = function(arr) { + var i = 0; + return { + 'next': function() { + if (i < arr.length) { + return { 'done': false, 'value': arr[i++] }; + } else { + return { 'done': true }; + } + }.bind(this) + } +}; + rr.Literal_ = function(value) { this.value_ = value; @@ -8,20 +21,12 @@ rr.Literal_ = function(value) { rr.Literal_.prototype.match = function(context) { if (context.stringAfter(this.value_.length) == this.value_) { - context.advance(this.value_.length); - return []; + return rr.iterableFromArray_([{ + 'context': context.advance(this.value_.length), + 'nodes': [] + }]); } else { - return null; - } -}; - -rr.Literal_.prototype.search = function(context) { - console.log(context.stringAfter()); - var index = context.stringAfter().indexOf(this.value_); - if (index == -1) { - return null; - } else { - return index; + return rr.iterableFromArray_([]); } }; @@ -37,12 +42,8 @@ rr.Ref_ = function(key) { this.key_ = key; }; -rr.Ref_.prototype.minimize = function(parser) { - return parser.minimize(this.key_); -}; - rr.Ref_.prototype.match = function(context) { - return context.parser.parse(this.key_, context); + return context.rules[this.key_].match(context); }; rr.Ref = function(key) { @@ -53,33 +54,66 @@ rr.Ref.cache = {}; +rr.Node_ = function(name, child) { + this.name_ = name; + this.child_ = child; +}; + +rr.Node_.prototype.match = function(context) { + var iterator = this.child_.match(context); + return { + 'next': function() { + var next = iterator.next(); + if (next['done']) { + return { 'done': true }; + } + var node = document.createElement(this.name_); + var nodes = next['value']['nodes']; + for (var i = 0; i < nodes.length; i++) { + node.appendChild(nodes[i]); + } + node.normalize(); + return { + 'done': false, + 'value': { + 'context': next['value']['context'], + 'nodes': [node] + } + } + }.bind(this) + } +}; + +rr.Node = function(name, child) { + return new rr.Node_(name, child); +}; + + + rr.EndOfLine_ = function() { }; rr.EndOfLine_.prototype.match = function(context) { if (context.atEnd()) { - return []; + return rr.iterableFromArray_([{ + 'context': context, + 'nodes': [] + }]); } if (context.stringAfter(1) == '\n') { - context.advance(1); + return rr.iterableFromArray_([{ + 'context': context.advance(1), + 'nodes': [] + }]); return []; } if (context.stringBefore(1) == '\n') { - return []; - } - return null; -}; - -rr.EndOfLine_.prototype.search = function(context) { - if (context.atEnd()) { - return 0; - } - var loc = context.stringAfter().indexOf('\n'); - if (loc == -1) { - return context.remaining(); - } else { - return loc; + return rr.iterableFromArray_([{ + 'context': context, + 'nodes': [] + }]); } + return rr.iterableFromArray_([]); }; rr.EndOfLine = function() { @@ -94,16 +128,15 @@ rr.EndOfText_ = function() { rr.EndOfText_.prototype.match = function(context) { if (context.atEnd()) { - return null; + return rr.iterableFromArray_([{ + 'context': context, + 'nodes': [] + }]); } else { - return []; + return rr.iterableFromArray_([]); } }; -rr.EndOfText_.prototype.search = function(context) { - return context.remaining(); -}; - rr.EndOfText = function() { return rr.EndOfText.cache; }; @@ -114,14 +147,26 @@ rr.EndOfText.cache = new rr.EndOfText_(); rr.MultiLineText_ = function() { }; -rr.MultiLineText_.prototype.minimize = function() { - return true; -}; - rr.MultiLineText_.prototype.match = function(context) { - var ret = [document.createTextNode(context.stringAfter())]; - context.advance(context.remaining()); - return ret; + var i = 1; + return { + 'next': function() { + if (i <= context.remaining()) { + var newNode = document.createTextNode(context.stringAfter(i)); + var ret = { + 'done': false, + 'value': { + 'nodes': [newNode], + 'context': context.advance(i) + } + }; + i++; + return ret; + } else { + return { 'done': true }; + } + }.bind(this) + } }; rr.MultiLineText = function() { @@ -135,25 +180,30 @@ rr.Or_ = function(options) { this.options_ = options; }; -rr.Or_.prototype.minimize = function(parser) { - for (var i = 0; i < this.options_.length; i++) { - var option = this.options_[i]; - if (parser.minimize(option)) { - return true; - } - } - return false; -}; - rr.Or_.prototype.match = function(context) { - for (var i = 0; i < this.options_.length; i++) { - var option = this.options_[i]; - var result = context.parser.parse(option, context); - if (result) { - return result; - } + var i = 0; + var lastIterator = null; + return { + 'next': function() { + if (lastIterator) { + var next = lastIterator.next(); + if (!next['done']) { + return next; + } + } + for (; i < this.options_.length; i++) { + var option = this.options_[i]; + lastIterator = option.match(context); + var next = lastIterator.next(); + if (next['done']) { + continue; + } else { + return next; + } + } + return { 'done': true }; + }.bind(this) } - return null; }; rr.Or = function() { @@ -165,18 +215,30 @@ rr.Or = function() { rr.SingleLineText_ = function() { }; -rr.SingleLineText_.prototype.minimize = function() { - return true; -} - rr.SingleLineText_.prototype.match = function(context) { - var newLine = context.stringAfter().indexOf('\n'); - if (newLine == -1) { - newLine = context.remaining(); + var i = 1; + return { + 'next': function() { + if (i <= context.remaining()) { + var newString = context.stringAfter(i); + if (newString.indexOf('\n') != -1) { + return { 'done': true }; + } + var newNode = document.createTextNode(newString); + var ret = { + 'done': false, + 'value': { + 'nodes': [newNode], + 'context': context.advance(i) + } + }; + i++; + return ret; + } else { + return {'done': true }; + } + }.bind(this) } - var ret = [document.createTextNode(context.stringAfter(newLine))]; - context.advance(newLine); - return ret; }; rr.SingleLineText = function() { @@ -191,27 +253,24 @@ rr.StartOfLine_ = function() { rr.StartOfLine_.prototype.match = function(context) { if (context.atStart()) { - return []; + return rr.iterableFromArray_([{ + 'context': context, + 'nodes': [] + }]); } if (context.stringAfter(1) == '\n') { - context.advance(1); - return []; + return rr.iterableFromArray_([{ + 'context': context.advance(1), + 'nodes': [] + }]); } if (context.stringBefore(1) == '\n') { - return []; + return rr.iterableFromArray_([{ + 'context': context, + 'nodes': [] + }]); } - return null; -}; - -rr.StartOfLine_.prototype.search = function(context) { - if (context.atStart()) { - return 0; - } - var loc = context.stringAfter().indexOf('\n'); - if (loc == -1) { - return null; - } - return loc + 1; + return rr.iterableFromArray_([]); }; rr.StartOfLine = function() { @@ -221,47 +280,93 @@ rr.StartOfLine.cache = new rr.StartOfLine_(); -rr.ZeroOrMore_ = function(key) { - this.key_ = key; -}; - -rr.ZeroOrMore_.prototype.minimize = function(parser) { - return parser.minimize(this.key_); +rr.ZeroOrMore_ = function(child) { + this.child_ = child; }; rr.ZeroOrMore_.prototype.match = function(context) { - var ret = []; - while (context.inputIndex < context.input.length) { - var result = context.parser.parse(this.key_, context); - if (!result) { + var nodes = []; + while (!context.atEnd()) { + var next = this.child_.match(context).next(); + if (next['done']) { break; } - result.forEach(function(child) { - ret.push(child); - }); - }; - return ret; + context = next['value']['context']; + Array.prototype.push.apply(nodes, next['value']['nodes']); + } + return rr.iterableFromArray_([{ + 'context': context, + 'nodes': nodes + }]); }; -rr.ZeroOrMore = function(key) { - return (rr.ZeroOrMore.cache[key] || - (rr.ZeroOrMore.cache[key] = new rr.ZeroOrMore_(key))); +rr.ZeroOrMore = function(child) { + return new rr.ZeroOrMore_(child); }; -rr.ZeroOrMore.cache = {}; -rr.Context = function(parser, input, inputIndex) { - this.parser = parser; + +rr.Sequence_ = function(children) { + this.child_ = children[0]; + if (children.length > 1) { + this.next_ = rr.Sequence.apply(null, children.slice(1)); + } else { + this.next_ = null; + } +}; + +rr.Sequence_.prototype.match = function(context) { + var childIterator = this.child_.match(context); + if (!this.next_) { + return childIterator; + } + var currentChildValue = null; + var nextIterator = null; + return { + 'next': function() { + while (true) { + if (!currentChildValue) { + currentChildValue = childIterator.next(); + if (currentChildValue['done']) { + return { 'done': true }; + } + nextIterator = null; + } + if (!nextIterator) { + nextIterator = this.next_.match(currentChildValue['value']['context']); + } + var nextAppendValue = nextIterator.next(); + if (nextAppendValue['done']) { + currentChildValue = null; + continue; + } + return { + 'done': false, + 'value': { + 'context': nextAppendValue['value']['context'], + 'nodes': currentChildValue['value']['nodes'].concat( + nextAppendValue['value']['nodes']) + } + } + } + }.bind(this) + } +}; + +rr.Sequence = function() { + return new rr.Sequence_(Array.prototype.slice.call(arguments)); +}; + + + +rr.Context = function(rules, input, inputIndex) { + this.rules = rules; this.input = input; this.inputIndex = inputIndex || 0; }; rr.Context.prototype.copy = function() { - return new rr.Context(this.parser, this.input, this.inputIndex); -}; - -rr.Context.prototype.truncate = function(numChars) { - this.input = this.input.slice(this.inputIndex, this.inputIndex + numChars); + return new rr.Context(this.rules, this.input, this.inputIndex); }; rr.Context.prototype.stringAfter = function(numChars) { @@ -293,102 +398,7 @@ rr.Context.prototype.remaining = function() { }; rr.Context.prototype.advance = function(numChars) { - console.log('advance', numChars); - this.inputIndex += numChars; -}; - - -var RecentRunes = function(dictionary) { - this.dictionary_ = dictionary; -}; - -RecentRunes.prototype.parseString = function(nodeType, input) { - var context = new rr.Context(this, input); - var ret = this.parse(nodeType, context); - if (ret) { - return ret[0]; - } else { - return null; - } -}; - -RecentRunes.prototype.minimize = function(nodeType) { - var rules = this.dictionary_[nodeType]; - for (var i = 0; i < rules.length; i++) { - if (rules.minimize && rules.minimize(this)) { - return true; - } - } - return false; -}; - -RecentRunes.prototype.parse = function(nodeType, origContext) { - var context = origContext.copy(); - var ret = document.createElement(nodeType); - var rules = this.dictionary_[nodeType]; - rules = []; - var lastRuleMinimize = false; - for (var i = 0; i < rules.length; i++) { - console.log('nodeType:', nodeType, 'rule:', i); - var rule = rules[i]; - if (rule.minimize && rule.minimize(this)) { - if (lastRuleMinimize) { - // Two minimize rules in a row is ambiguous - return null; - } - lastRuleMinimize = rule; - continue; - } - if (lastRuleMinimize) { - // Check if this rule can find a match in the string - var loc = rule.search(context); - if (loc == null) { - console.log('search fail'); - return null; - } - - // Check if the previous rule will match the interim data - var prevContext = context.copy(); - prevContext.truncate(loc); - var prevMatch = lastRuleMinimize.match(prevContext); - if (!prevMatch) { - console.log('prevMatch fail'); - return null; - }; - context.advance(prevContext.inputIndex - context.inputIndex); - prevMatch.forEach(function(child) { - ret.appendChild(child); - }); - - lastRuleMinimize = false; - } - console.log(context); - var match = rule.match(context); - if (!match) { - console.log('rule fail'); - return null; - } - match.forEach(function(child) { - ret.appendChild(child); - }); - }; - - if (lastRuleMinimize) { - var prevContext = context.copy(); - prevContext.truncate(loc); - var lastMatch = lastRuleMinimize.match(prevContext); - if (!lastMatch) { - return null; - } - context.advance(prevContext.inputIndex - context.inputIndex); - lastMatch.forEach(function(child) { - ret.appendChild(child); - }); - } - - console.log('nodeType:', nodeType, 'context:', context); - - origContext.advance(context.inputIndex - origContext.inputIndex); - - return [ret]; + var context = this.copy(); + context.inputIndex += numChars; + return context; }; diff --git a/test.js b/test.js index 3a27948..5e48876 100644 --- a/test.js +++ b/test.js @@ -1,12 +1,9 @@ asyncTest('Simple', function() { -// expect(1); - var parser = new RecentRunes(mediawiki); - console.log('foo'); - var result = parser.parseString('wikidoc', -'=== Heading ===\n\ + var context = new rr.Context(mediawiki, +"=== Heading ===\n\ This is a wiki doc.\n\ -How about some bold and bold italic.\n\ -I would also love some nowiki foo'); - console.log(result); - document.body.appendChild(result); +How about some '''bold and bold italic'''.\n\ +I would also love some nowiki foo"); + var iterable = context.rules['wikidoc'].match(context); + console.log(iterable.next()); }); diff --git a/verify.sh b/verify.sh new file mode 100755 index 0000000..fba5b62 --- /dev/null +++ b/verify.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +curl \ + --silent \ + --data compilation_level=ADVANCED_OPTIMIZATIONS \ + --data output_format=json \ + --data output_info=errors \ + --data output_info=warnings \ + --data language=ECMASCRIPT5 \ + --data warning_level=verbose \ + --data-urlencode "js_code@recentrunes.js" \ + http://closure-compiler.appspot.com/compile +echo + +gjslint --strict recentrunes.js