Checkpoint: working parsing

This commit is contained in:
Ian Gulliver
2014-06-26 22:48:03 -07:00
parent 8d445df2c4
commit 3bee8b9acf
4 changed files with 270 additions and 248 deletions

View File

@@ -1,27 +1,27 @@
var mediawiki = { var mediawiki = {
'b': [rr.Literal("'''"), rr.Ref('wikichunk'), rr.Literal("'''")], 'b': rr.Node('b', rr.Sequence(rr.Literal("'''"), rr.Ref('wikichunk'), rr.Literal("'''"))),
'del': [rr.Literal('<strike>'), rr.Ref('wikichunk'), rr.Literal('</strike>')], 'del': rr.Sequence(rr.Literal('<strike>'), rr.Ref('wikichunk'), rr.Literal('</strike>')),
'h2': [rr.StartOfLine(), rr.Literal('== '), rr.SingleLineText(), rr.Literal(' =='), rr.EndOfLine()], 'h2': rr.Sequence(rr.StartOfLine(), rr.Literal('== '), rr.SingleLineText(), rr.Literal(' =='), rr.EndOfLine()),
'h3': [rr.StartOfLine(), rr.Literal('=== '), rr.SingleLineText(), rr.Literal(' ==='), rr.EndOfLine()], 'h3': rr.Node('h3', rr.Sequence(rr.StartOfLine(), rr.Literal('=== '), rr.SingleLineText(), rr.Literal(' ==='), rr.EndOfLine())),
'h4': [rr.StartOfLine(), rr.Literal('==== '), rr.SingleLineText(), rr.Literal(' ===='), rr.EndOfLine()], 'h4': rr.Sequence(rr.StartOfLine(), rr.Literal('==== '), rr.SingleLineText(), rr.Literal(' ===='), rr.EndOfLine()),
'h5': [rr.StartOfLine(), rr.Literal('===== '), rr.SingleLineText(), rr.Literal(' ====='), rr.EndOfLine()], 'h5': rr.Sequence(rr.StartOfLine(), rr.Literal('===== '), rr.SingleLineText(), rr.Literal(' ====='), rr.EndOfLine()),
'h6': [rr.StartOfLine(), rr.Literal('====== '), rr.SingleLineText(), rr.Literal(' ======'), rr.EndOfLine()], 'h6': rr.Sequence(rr.StartOfLine(), rr.Literal('====== '), rr.SingleLineText(), rr.Literal(' ======'), rr.EndOfLine()),
'hr': [rr.StartOfLine(), rr.Literal('----'), rr.EndOfLine()], 'hr': rr.Sequence(rr.StartOfLine(), rr.Literal('----'), rr.EndOfLine()),
'i': [rr.Literal("''"), rr.Ref('wikichunk'), rr.Literal("''")], 'i': rr.Sequence(rr.Literal("''"), rr.Ref('wikichunk'), rr.Literal("''")),
'nowiki': [rr.Literal('<nowiki>'), rr.MultiLineText(), rr.Literal('</nowiki>')], 'nowiki': rr.Sequence(rr.Literal('<nowiki>'), rr.MultiLineText(), rr.Literal('</nowiki>')),
'text': [rr.MultiLineText()], 'text': rr.MultiLineText(),
'wikichunk': [rr.Or( 'wikichunk': rr.Or(
// 'b', rr.Ref('b'),
// 'del', rr.Ref('del'),
// 'h2', rr.Ref('h2'),
'h3', rr.Ref('h3'),
// 'h4', rr.Ref('h4'),
// 'h5', rr.Ref('h5'),
// 'h6', rr.Ref('h6'),
// 'hr', rr.Ref('hr'),
// 'i', rr.Ref('i'),
// 'nowiki', rr.Ref('nowiki'),
'text' rr.Ref('text')
)], ),
'wikidoc': [rr.ZeroOrMore('wikichunk')], 'wikidoc': rr.Node('wikidoc', rr.ZeroOrMore(rr.Ref('wikichunk'))),
}; };

View File

@@ -1,6 +1,19 @@
var rr = {}; var rr = {};
rr.iterableFromArray_ = function(arr) {
var i = 0;
return {
'next': function() {
if (i < arr.length) {
return { 'done': false, 'value': arr[i++] };
} else {
return { 'done': true };
}
}.bind(this)
}
};
rr.Literal_ = function(value) { rr.Literal_ = function(value) {
this.value_ = value; this.value_ = value;
@@ -8,20 +21,12 @@ rr.Literal_ = function(value) {
rr.Literal_.prototype.match = function(context) { rr.Literal_.prototype.match = function(context) {
if (context.stringAfter(this.value_.length) == this.value_) { if (context.stringAfter(this.value_.length) == this.value_) {
context.advance(this.value_.length); return rr.iterableFromArray_([{
return []; 'context': context.advance(this.value_.length),
'nodes': []
}]);
} else { } else {
return null; return rr.iterableFromArray_([]);
}
};
rr.Literal_.prototype.search = function(context) {
console.log(context.stringAfter());
var index = context.stringAfter().indexOf(this.value_);
if (index == -1) {
return null;
} else {
return index;
} }
}; };
@@ -37,12 +42,8 @@ rr.Ref_ = function(key) {
this.key_ = key; this.key_ = key;
}; };
rr.Ref_.prototype.minimize = function(parser) {
return parser.minimize(this.key_);
};
rr.Ref_.prototype.match = function(context) { rr.Ref_.prototype.match = function(context) {
return context.parser.parse(this.key_, context); return context.rules[this.key_].match(context);
}; };
rr.Ref = function(key) { rr.Ref = function(key) {
@@ -53,33 +54,66 @@ rr.Ref.cache = {};
rr.Node_ = function(name, child) {
this.name_ = name;
this.child_ = child;
};
rr.Node_.prototype.match = function(context) {
var iterator = this.child_.match(context);
return {
'next': function() {
var next = iterator.next();
if (next['done']) {
return { 'done': true };
}
var node = document.createElement(this.name_);
var nodes = next['value']['nodes'];
for (var i = 0; i < nodes.length; i++) {
node.appendChild(nodes[i]);
}
node.normalize();
return {
'done': false,
'value': {
'context': next['value']['context'],
'nodes': [node]
}
}
}.bind(this)
}
};
rr.Node = function(name, child) {
return new rr.Node_(name, child);
};
rr.EndOfLine_ = function() { rr.EndOfLine_ = function() {
}; };
rr.EndOfLine_.prototype.match = function(context) { rr.EndOfLine_.prototype.match = function(context) {
if (context.atEnd()) { if (context.atEnd()) {
return []; return rr.iterableFromArray_([{
'context': context,
'nodes': []
}]);
} }
if (context.stringAfter(1) == '\n') { if (context.stringAfter(1) == '\n') {
context.advance(1); return rr.iterableFromArray_([{
'context': context.advance(1),
'nodes': []
}]);
return []; return [];
} }
if (context.stringBefore(1) == '\n') { if (context.stringBefore(1) == '\n') {
return []; return rr.iterableFromArray_([{
} 'context': context,
return null; 'nodes': []
}; }]);
rr.EndOfLine_.prototype.search = function(context) {
if (context.atEnd()) {
return 0;
}
var loc = context.stringAfter().indexOf('\n');
if (loc == -1) {
return context.remaining();
} else {
return loc;
} }
return rr.iterableFromArray_([]);
}; };
rr.EndOfLine = function() { rr.EndOfLine = function() {
@@ -94,16 +128,15 @@ rr.EndOfText_ = function() {
rr.EndOfText_.prototype.match = function(context) { rr.EndOfText_.prototype.match = function(context) {
if (context.atEnd()) { if (context.atEnd()) {
return null; return rr.iterableFromArray_([{
'context': context,
'nodes': []
}]);
} else { } else {
return []; return rr.iterableFromArray_([]);
} }
}; };
rr.EndOfText_.prototype.search = function(context) {
return context.remaining();
};
rr.EndOfText = function() { rr.EndOfText = function() {
return rr.EndOfText.cache; return rr.EndOfText.cache;
}; };
@@ -114,14 +147,26 @@ rr.EndOfText.cache = new rr.EndOfText_();
rr.MultiLineText_ = function() { rr.MultiLineText_ = function() {
}; };
rr.MultiLineText_.prototype.minimize = function() {
return true;
};
rr.MultiLineText_.prototype.match = function(context) { rr.MultiLineText_.prototype.match = function(context) {
var ret = [document.createTextNode(context.stringAfter())]; var i = 1;
context.advance(context.remaining()); return {
return ret; 'next': function() {
if (i <= context.remaining()) {
var newNode = document.createTextNode(context.stringAfter(i));
var ret = {
'done': false,
'value': {
'nodes': [newNode],
'context': context.advance(i)
}
};
i++;
return ret;
} else {
return { 'done': true };
}
}.bind(this)
}
}; };
rr.MultiLineText = function() { rr.MultiLineText = function() {
@@ -135,25 +180,30 @@ rr.Or_ = function(options) {
this.options_ = options; this.options_ = options;
}; };
rr.Or_.prototype.minimize = function(parser) {
for (var i = 0; i < this.options_.length; i++) {
var option = this.options_[i];
if (parser.minimize(option)) {
return true;
}
}
return false;
};
rr.Or_.prototype.match = function(context) { rr.Or_.prototype.match = function(context) {
for (var i = 0; i < this.options_.length; i++) { var i = 0;
var option = this.options_[i]; var lastIterator = null;
var result = context.parser.parse(option, context); return {
if (result) { 'next': function() {
return result; if (lastIterator) {
} var next = lastIterator.next();
if (!next['done']) {
return next;
}
}
for (; i < this.options_.length; i++) {
var option = this.options_[i];
lastIterator = option.match(context);
var next = lastIterator.next();
if (next['done']) {
continue;
} else {
return next;
}
}
return { 'done': true };
}.bind(this)
} }
return null;
}; };
rr.Or = function() { rr.Or = function() {
@@ -165,18 +215,30 @@ rr.Or = function() {
rr.SingleLineText_ = function() { rr.SingleLineText_ = function() {
}; };
rr.SingleLineText_.prototype.minimize = function() {
return true;
}
rr.SingleLineText_.prototype.match = function(context) { rr.SingleLineText_.prototype.match = function(context) {
var newLine = context.stringAfter().indexOf('\n'); var i = 1;
if (newLine == -1) { return {
newLine = context.remaining(); 'next': function() {
if (i <= context.remaining()) {
var newString = context.stringAfter(i);
if (newString.indexOf('\n') != -1) {
return { 'done': true };
}
var newNode = document.createTextNode(newString);
var ret = {
'done': false,
'value': {
'nodes': [newNode],
'context': context.advance(i)
}
};
i++;
return ret;
} else {
return {'done': true };
}
}.bind(this)
} }
var ret = [document.createTextNode(context.stringAfter(newLine))];
context.advance(newLine);
return ret;
}; };
rr.SingleLineText = function() { rr.SingleLineText = function() {
@@ -191,27 +253,24 @@ rr.StartOfLine_ = function() {
rr.StartOfLine_.prototype.match = function(context) { rr.StartOfLine_.prototype.match = function(context) {
if (context.atStart()) { if (context.atStart()) {
return []; return rr.iterableFromArray_([{
'context': context,
'nodes': []
}]);
} }
if (context.stringAfter(1) == '\n') { if (context.stringAfter(1) == '\n') {
context.advance(1); return rr.iterableFromArray_([{
return []; 'context': context.advance(1),
'nodes': []
}]);
} }
if (context.stringBefore(1) == '\n') { if (context.stringBefore(1) == '\n') {
return []; return rr.iterableFromArray_([{
'context': context,
'nodes': []
}]);
} }
return null; return rr.iterableFromArray_([]);
};
rr.StartOfLine_.prototype.search = function(context) {
if (context.atStart()) {
return 0;
}
var loc = context.stringAfter().indexOf('\n');
if (loc == -1) {
return null;
}
return loc + 1;
}; };
rr.StartOfLine = function() { rr.StartOfLine = function() {
@@ -221,47 +280,93 @@ rr.StartOfLine.cache = new rr.StartOfLine_();
rr.ZeroOrMore_ = function(key) { rr.ZeroOrMore_ = function(child) {
this.key_ = key; this.child_ = child;
};
rr.ZeroOrMore_.prototype.minimize = function(parser) {
return parser.minimize(this.key_);
}; };
rr.ZeroOrMore_.prototype.match = function(context) { rr.ZeroOrMore_.prototype.match = function(context) {
var ret = []; var nodes = [];
while (context.inputIndex < context.input.length) { while (!context.atEnd()) {
var result = context.parser.parse(this.key_, context); var next = this.child_.match(context).next();
if (!result) { if (next['done']) {
break; break;
} }
result.forEach(function(child) { context = next['value']['context'];
ret.push(child); Array.prototype.push.apply(nodes, next['value']['nodes']);
}); }
}; return rr.iterableFromArray_([{
return ret; 'context': context,
'nodes': nodes
}]);
}; };
rr.ZeroOrMore = function(key) { rr.ZeroOrMore = function(child) {
return (rr.ZeroOrMore.cache[key] || return new rr.ZeroOrMore_(child);
(rr.ZeroOrMore.cache[key] = new rr.ZeroOrMore_(key)));
}; };
rr.ZeroOrMore.cache = {};
rr.Context = function(parser, input, inputIndex) {
this.parser = parser; rr.Sequence_ = function(children) {
this.child_ = children[0];
if (children.length > 1) {
this.next_ = rr.Sequence.apply(null, children.slice(1));
} else {
this.next_ = null;
}
};
rr.Sequence_.prototype.match = function(context) {
var childIterator = this.child_.match(context);
if (!this.next_) {
return childIterator;
}
var currentChildValue = null;
var nextIterator = null;
return {
'next': function() {
while (true) {
if (!currentChildValue) {
currentChildValue = childIterator.next();
if (currentChildValue['done']) {
return { 'done': true };
}
nextIterator = null;
}
if (!nextIterator) {
nextIterator = this.next_.match(currentChildValue['value']['context']);
}
var nextAppendValue = nextIterator.next();
if (nextAppendValue['done']) {
currentChildValue = null;
continue;
}
return {
'done': false,
'value': {
'context': nextAppendValue['value']['context'],
'nodes': currentChildValue['value']['nodes'].concat(
nextAppendValue['value']['nodes'])
}
}
}
}.bind(this)
}
};
rr.Sequence = function() {
return new rr.Sequence_(Array.prototype.slice.call(arguments));
};
rr.Context = function(rules, input, inputIndex) {
this.rules = rules;
this.input = input; this.input = input;
this.inputIndex = inputIndex || 0; this.inputIndex = inputIndex || 0;
}; };
rr.Context.prototype.copy = function() { rr.Context.prototype.copy = function() {
return new rr.Context(this.parser, this.input, this.inputIndex); return new rr.Context(this.rules, this.input, this.inputIndex);
};
rr.Context.prototype.truncate = function(numChars) {
this.input = this.input.slice(this.inputIndex, this.inputIndex + numChars);
}; };
rr.Context.prototype.stringAfter = function(numChars) { rr.Context.prototype.stringAfter = function(numChars) {
@@ -293,102 +398,7 @@ rr.Context.prototype.remaining = function() {
}; };
rr.Context.prototype.advance = function(numChars) { rr.Context.prototype.advance = function(numChars) {
console.log('advance', numChars); var context = this.copy();
this.inputIndex += numChars; context.inputIndex += numChars;
}; return context;
var RecentRunes = function(dictionary) {
this.dictionary_ = dictionary;
};
RecentRunes.prototype.parseString = function(nodeType, input) {
var context = new rr.Context(this, input);
var ret = this.parse(nodeType, context);
if (ret) {
return ret[0];
} else {
return null;
}
};
RecentRunes.prototype.minimize = function(nodeType) {
var rules = this.dictionary_[nodeType];
for (var i = 0; i < rules.length; i++) {
if (rules.minimize && rules.minimize(this)) {
return true;
}
}
return false;
};
RecentRunes.prototype.parse = function(nodeType, origContext) {
var context = origContext.copy();
var ret = document.createElement(nodeType);
var rules = this.dictionary_[nodeType];
rules = [];
var lastRuleMinimize = false;
for (var i = 0; i < rules.length; i++) {
console.log('nodeType:', nodeType, 'rule:', i);
var rule = rules[i];
if (rule.minimize && rule.minimize(this)) {
if (lastRuleMinimize) {
// Two minimize rules in a row is ambiguous
return null;
}
lastRuleMinimize = rule;
continue;
}
if (lastRuleMinimize) {
// Check if this rule can find a match in the string
var loc = rule.search(context);
if (loc == null) {
console.log('search fail');
return null;
}
// Check if the previous rule will match the interim data
var prevContext = context.copy();
prevContext.truncate(loc);
var prevMatch = lastRuleMinimize.match(prevContext);
if (!prevMatch) {
console.log('prevMatch fail');
return null;
};
context.advance(prevContext.inputIndex - context.inputIndex);
prevMatch.forEach(function(child) {
ret.appendChild(child);
});
lastRuleMinimize = false;
}
console.log(context);
var match = rule.match(context);
if (!match) {
console.log('rule fail');
return null;
}
match.forEach(function(child) {
ret.appendChild(child);
});
};
if (lastRuleMinimize) {
var prevContext = context.copy();
prevContext.truncate(loc);
var lastMatch = lastRuleMinimize.match(prevContext);
if (!lastMatch) {
return null;
}
context.advance(prevContext.inputIndex - context.inputIndex);
lastMatch.forEach(function(child) {
ret.appendChild(child);
});
}
console.log('nodeType:', nodeType, 'context:', context);
origContext.advance(context.inputIndex - origContext.inputIndex);
return [ret];
}; };

15
test.js
View File

@@ -1,12 +1,9 @@
asyncTest('Simple', function() { asyncTest('Simple', function() {
// expect(1); var context = new rr.Context(mediawiki,
var parser = new RecentRunes(mediawiki); "=== Heading ===\n\
console.log('foo');
var result = parser.parseString('wikidoc',
'=== Heading ===\n\
This is a wiki doc.\n\ This is a wiki doc.\n\
How about some <b>bold and <i>bold italic</i></b>.\n\ How about some '''bold and <i>bold italic</i>'''.\n\
I would also love some <nowiki>nowiki <b>foo</b></nowiki>'); I would also love some <nowiki>nowiki <b>foo</b></nowiki>");
console.log(result); var iterable = context.rules['wikidoc'].match(context);
document.body.appendChild(result); console.log(iterable.next());
}); });

15
verify.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/sh
curl \
--silent \
--data compilation_level=ADVANCED_OPTIMIZATIONS \
--data output_format=json \
--data output_info=errors \
--data output_info=warnings \
--data language=ECMASCRIPT5 \
--data warning_level=verbose \
--data-urlencode "js_code@recentrunes.js" \
http://closure-compiler.appspot.com/compile
echo
gjslint --strict recentrunes.js