Start of parsing of mediawiki text.

This commit is contained in:
Ian Gulliver
2014-05-20 16:40:35 -07:00
parent bbabf59505
commit 835979f95e
3 changed files with 183 additions and 29 deletions

View File

@@ -9,7 +9,7 @@ var mediawiki = {
'hr': [rr.StartOfLine(), rr.Literal('----'), rr.EndOfLine()], 'hr': [rr.StartOfLine(), rr.Literal('----'), rr.EndOfLine()],
'i': [rr.Literal("''"), rr.Ref('wikichunk'), rr.Literal("''")], 'i': [rr.Literal("''"), rr.Ref('wikichunk'), rr.Literal("''")],
'nowiki': [rr.Literal('<nowiki>'), rr.MultiLineText(), rr.Literal('</nowiki>')], 'nowiki': [rr.Literal('<nowiki>'), rr.MultiLineText(), rr.Literal('</nowiki>')],
'nowikiword': [rr.WordText(), rr.Literal('<nowiki/>')], 'text': [rr.MultiLineText()],
'wikichunk': [rr.Or( 'wikichunk': [rr.Or(
'b', 'b',
'del', 'del',
@@ -21,8 +21,7 @@ var mediawiki = {
'hr', 'hr',
'i', 'i',
'nowiki', 'nowiki',
'nowikiword', 'text'
rr.MultiLineText()
)], )],
'wikidoc': [rr.ZeroOrMore('wikichunk')], 'wikidoc': [rr.ZeroOrMore('wikichunk')],
}; };

View File

@@ -1,5 +1,7 @@
var rr = {}; var rr = {};
rr.Literal_ = function(value) { rr.Literal_ = function(value) {
this.value_ = value; this.value_ = value;
}; };
@@ -28,10 +30,19 @@ rr.Literal = function(value) {
rr.Literal.cache = {}; rr.Literal.cache = {};
rr.Ref_ = function(key) { rr.Ref_ = function(key) {
this.key_ = key; this.key_ = key;
}; };
rr.Ref_.prototype.minimize = function(parser) {
return parser.minimize(this.key_);
};
rr.Ref_.prototype.match = function(input, fullInput, inputIndex, parser) {
return parser.parse(this.key_, input);
};
rr.Ref = function(key) { rr.Ref = function(key) {
return (rr.Ref.cache[key] || return (rr.Ref.cache[key] ||
(rr.Ref.cache[key] = new rr.Ref_(key))); (rr.Ref.cache[key] = new rr.Ref_(key)));
@@ -39,19 +50,74 @@ rr.Ref = function(key) {
rr.Ref.cache = {}; rr.Ref.cache = {};
rr.EndOfLine_ = function() { rr.EndOfLine_ = function() {
}; };
rr.EndOfLine_.prototype.match = function(input, fullInput, inputIndex) {
if (input.length == 0) {
return [0, null];
}
if (input[0] == '\n') {
return [1, null];
}
if (inputIndex > 0 && fullInput[inputIndex - 1] == '\n') {
return [0, null];
}
return null;
};
rr.EndOfLine_.prototype.search = function(input, fullInput, inputIndex) {
if (input.length == 0) {
return 0;
}
var loc = input.indexOf('\n');
if (loc == -1) {
return input.length;
} else {
return loc;
}
};
rr.EndOfLine = function() { rr.EndOfLine = function() {
return rr.EndOfLine.cache; return rr.EndOfLine.cache;
} };
rr.EndOfLine.cache = new rr.EndOfLine_(); rr.EndOfLine.cache = new rr.EndOfLine_();
rr.EndOfText_ = function() {
};
rr.EndOfText_.prototype.match = function(input) {
if (input.length) {
return null;
} else {
return [0, null];
}
};
rr.EndOfText_.prototype.search = function(input) {
return input.length;
};
rr.EndOfText = function() {
return rr.EndOfText.cache;
};
rr.EndOfText.cache = new rr.EndOfText_();
rr.MultiLineText_ = function() { rr.MultiLineText_ = function() {
}; };
rr.MultiLineText_.prototype.minimize = true; rr.MultiLineText_.prototype.minimize = function() {
return true;
};
rr.MultiLineText_.prototype.match = function(input) {
return [input.length, document.createTextNode(input)];
};
rr.MultiLineText = function() { rr.MultiLineText = function() {
return rr.MultiLineText.cache; return rr.MultiLineText.cache;
@@ -59,19 +125,44 @@ rr.MultiLineText = function() {
rr.MultiLineText.cache = new rr.MultiLineText_(); rr.MultiLineText.cache = new rr.MultiLineText_();
rr.Or_ = function(options) { rr.Or_ = function(options) {
this.options_ = options; this.options_ = options;
}; };
rr.Or_.prototype.minimize = function(parser) {
for (var i = 0; i < this.options_.length; i++) {
var option = this.options_[i];
if (parser.minimize(option)) {
return true;
};
}
return false;
};
rr.Or_.prototype.match = function(input, fullInput, inputIndex, parser) {
for (var i = 0; i < this.options_.length; i++) {
var option = this.options_[i];
var result = parser.parse(option, input);
if (result) {
return result;
}
}
return null;
};
rr.Or = function() { rr.Or = function() {
return new rr.Or_(arguments); return new rr.Or_(arguments);
}; };
rr.SingleLineText_ = function() { rr.SingleLineText_ = function() {
}; };
rr.SingleLineText_.prototype.minimize = true; rr.SingleLineText_.prototype.minimize = function() {
return true;
}
rr.SingleLineText_.prototype.match = function(input) { rr.SingleLineText_.prototype.match = function(input) {
var newLine = input.indexOf('\n'); var newLine = input.indexOf('\n');
@@ -88,30 +179,65 @@ rr.SingleLineText = function() {
rr.SingleLineText.cache = new rr.SingleLineText_(); rr.SingleLineText.cache = new rr.SingleLineText_();
rr.StartOfLine_ = function() { rr.StartOfLine_ = function() {
}; };
rr.StartOfLine_.prototype.match = function(input, fullInput, inputIndex) {
if (inputIndex == 0) {
return [0, null];
}
if (input[0] == '\n') {
return [1, null];
}
if (fullInput[inputIndex - 1] == '\n') {
return [0, null];
}
return null;
};
rr.StartOfLine_.prototype.search = function(input, fullInput, inputIndex) {
if (inputIndex == 0) {
return 0;
}
var loc = input.indexOf('\n');
if (loc == -1) {
return null;
}
return loc + 1;
};
rr.StartOfLine = function() { rr.StartOfLine = function() {
return rr.StartOfLine.cache; return rr.StartOfLine.cache;
}; };
rr.StartOfLine.cache = new rr.StartOfLine_(); rr.StartOfLine.cache = new rr.StartOfLine_();
rr.WordText_ = function() {
};
rr.WordText_.prototype.minimize = true;
rr.WordText = function() {
return rr.WordText.cache;
};
rr.WordText.cache = new rr.WordText_();
rr.ZeroOrMore_ = function(key) { rr.ZeroOrMore_ = function(key) {
this.key_ = key; this.key_ = key;
}; };
rr.ZeroOrMore_.prototype.minimize = function(parser) {
return parser.minimize(this.key_);
};
rr.ZeroOrMore_.prototype.match = function(input, fullInput, inputIndex, parser) {
var ret = document.createElement('group');
var parseIndex = 0;
while (parseIndex < input.length - 1) {
var result = parser.parse(this.key_, input.slice(parseIndex));
if (!result) {
break;
}
parseIndex += result[0];
if (result[1]) {
ret.appendChild(result[1]);
}
};
return [parseIndex, ret];
};
rr.ZeroOrMore = function(key) { rr.ZeroOrMore = function(key) {
return (rr.ZeroOrMore.cache[key] || return (rr.ZeroOrMore.cache[key] ||
(rr.ZeroOrMore.cache[key] = new rr.ZeroOrMore_(key))); (rr.ZeroOrMore.cache[key] = new rr.ZeroOrMore_(key)));
@@ -119,10 +245,21 @@ rr.ZeroOrMore = function(key) {
rr.ZeroOrMore.cache = {}; rr.ZeroOrMore.cache = {};
var RecentRunes = function(dictionary) { var RecentRunes = function(dictionary) {
this.dictionary_ = dictionary; this.dictionary_ = dictionary;
}; };
RecentRunes.prototype.minimize = function(nodeType) {
var rules = this.dictionary_[nodeType];
for (var i = 0; i < rules.length; i++) {
if (rules.minimize && rules.minimize(this)) {
return true;
}
}
return false;
};
RecentRunes.prototype.parse = function(nodeType, input) { RecentRunes.prototype.parse = function(nodeType, input) {
var ret = document.createElement(nodeType); var ret = document.createElement(nodeType);
var rules = this.dictionary_[nodeType]; var rules = this.dictionary_[nodeType];
@@ -130,12 +267,12 @@ RecentRunes.prototype.parse = function(nodeType, input) {
var lastRuleMinimize = false; var lastRuleMinimize = false;
for (var i = 0; i < rules.length; i++) { for (var i = 0; i < rules.length; i++) {
var rule = rules[i]; var rule = rules[i];
if (rule.minimize) { if (rule.minimize && rule.minimize(this)) {
if (lastRuleMinimize) { if (lastRuleMinimize) {
// Two minimize rules in a row is ambiguous // Two minimize rules in a row is ambiguous
return null; return null;
} }
lastRuleMinimize = true; lastRuleMinimize = rule;
continue; continue;
} }
if (lastRuleMinimize) { if (lastRuleMinimize) {
@@ -146,17 +283,21 @@ RecentRunes.prototype.parse = function(nodeType, input) {
} }
// Check if the previous rule will match the interim data // Check if the previous rule will match the interim data
var prevMatch = rules[i - 1].match( var prevMatch = lastRuleMinimize.match(
input.slice(inputIndex, inputIndex + loc)); input.slice(inputIndex, inputIndex + loc),
if (!prevMatch) { input, inputIndex, this);
if (!prevMatch || prevMatch[0] != loc) {
return null; return null;
}; };
inputIndex += prevMatch[0]; inputIndex += prevMatch[0];
if (prevMatch[1]) { if (prevMatch[1]) {
ret.appendChild(prevMatch[1]); ret.appendChild(prevMatch[1]);
} }
lastRuleMinimize = false;
} }
var match = rule.match(input.slice(inputIndex)); var match = rule.match(
input.slice(inputIndex), input, inputIndex, this);
if (!match) { if (!match) {
return null; return null;
} }
@@ -165,5 +306,18 @@ RecentRunes.prototype.parse = function(nodeType, input) {
ret.appendChild(match[1]); ret.appendChild(match[1]);
} }
}; };
if (lastRuleMinimize) {
var lastMatch = lastRuleMinimize.match(
input.slice(inputIndex), input, inputIndex, this);
if (!lastMatch || lastMatch[0] != input.length - inputIndex) {
return null;
}
inputIndex += lastMatch[0];
if (lastMatch[1]) {
ret.appendChild(lastMatch[1]);
}
}
return [inputIndex, ret]; return [inputIndex, ret];
}; };

15
test.js
View File

@@ -1,10 +1,11 @@
test('Simple', function() { test('Simple', function() {
expect(0); expect(0);
var grammar = { var parser = new RecentRunes(mediawiki);
'rule1': [rr.Literal('=== '), rr.SingleLineText(), rr.Literal(' ===')], var result = parser.parse('wikidoc',
'rule2': [rr.SingleLineText(), rr.Literal('=')], '=== Heading ===\n\
}; This is a wiki doc.\n\
var parser = new RecentRunes(grammar); How about some <b>bold and <i>bold italic</i></b>.\n\
console.log(parser.parse('rule1', '=== bar ===')); I would also love some <nowiki>nowiki <b>foo</b></nowiki>');
console.log(parser.parse('rule2', 'foo=\nbar=')); console.log(result);
document.body.appendChild(result[1]);
}); });