Python Save() and SavedLiteral(). Split factory and matcher.

This commit is contained in:
Ian Gulliver
2014-08-03 11:25:29 -07:00
parent b11a659f0c
commit d0ab66a823
3 changed files with 76 additions and 19 deletions

View File

@@ -117,7 +117,6 @@ class Element(object):
return ''.join(x.getTextContent() for x in self.childNodes)
def __str__(self):
# TODO: attributes
values = map(str, self.childNodes)
return '<%s%s>%s</%s>' % (
self.nodeName,
@@ -278,6 +277,37 @@ class Ref(Matcher):
return context.rules[self._key].match(context)
class SaveAndDiscard(Matcher):
def __init__(self, key, child):
self._key = key
self._child = child
def match(self, context):
for result in self._child.match(context):
values = []
for node in result.nodes:
values.append(node.getTextContent())
yield MatchResult(
context.saveValue(self._key, ''.join(values)),
[])
class SavedLiteral(Matcher):
_cache = {}
def __new__(cls, key):
if key not in cls._cache:
cls._cache[key] = super(SavedLiteral, cls).__new__(cls, key)
return cls._cache[key]
def __init__(self, key):
self._key = key
def match(self, context):
value = context.getValue(self._key)
return Literal(value).match(context)
class SequentialPair(Matcher):
def __init__(self, child1, child2):
self._child1 = child1
@@ -354,6 +384,11 @@ def OneOrMore(child):
return SequentialPair(child, ZeroOrMore(child))
def Save(key, saveChild, matchChild):
save = SaveAndDiscard(key, saveChild)
return SequentialPair(save, matchChild)
def Sequence(*children):
if len(children) == 1:
return children[0]
@@ -458,13 +493,14 @@ def ApplyFilters(node, filters):
class Context(object):
def __init__(self, rules, string, inputIndex=0):
def __init__(self, rules, string, inputIndex=0, savedValues=None):
self.rules = rules
self.string = string
self.inputIndex = inputIndex
self.savedValues = dict(savedValues or {})
def copy(self):
return Context(self.rules, self.string, self.inputIndex)
return Context(self.rules, self.string, self.inputIndex, self.savedValues)
def stringAfter(self, numChars=None):
if numChars is None:
@@ -494,6 +530,14 @@ class Context(object):
context.inputIndex += numChars
return context
def getValue(self, key):
return self.savedValues[key]
def saveValue(self, key, value):
context = self.copy()
context.savedValues[key] = value
return context
class Parser(object):
@classmethod
@@ -536,6 +580,8 @@ class rr(object):
'Node': Node,
'Or': Or,
'Ref': Ref,
'Save': Save,
'SavedLiteral': SavedLiteral,
'SequentialPair': SequentialPair,
'StartOfLine': StartOfLine,
'ZeroOrMore': ZeroOrMore,

View File

@@ -475,7 +475,7 @@ rr.Ref.cache_ = {};
* @param {rr.typeMatcher} child
* @private
*/
rr.Save_ = function(key, child) {
rr.SaveAndDiscard_ = function(key, child) {
this.key_ = key;
this.child_ = child;
};
@@ -485,7 +485,7 @@ rr.Save_ = function(key, child) {
* @param {rr.Context} context
* @return {rr.typeIterator}
*/
rr.Save_.prototype.match = function(context) {
rr.SaveAndDiscard_.prototype.match = function(context) {
var iterator = this.child_.match(context);
return {
'next': function() {
@@ -510,17 +510,6 @@ rr.Save_.prototype.match = function(context) {
};
/**
* @param {string} key
* @param {rr.typeMatcher} saveChild
* @param {rr.typeMatcher} matchChild
* @return {rr.SequentialPair_}
*/
rr.Save = function(key, saveChild, matchChild) {
return new rr.SequentialPair_(new rr.Save_(key, saveChild), matchChild);
};
/**
* @constructor
@@ -785,6 +774,18 @@ rr.OneOrMore = function(child) {
};
/**
* @param {string} key
* @param {rr.typeMatcher} saveChild
* @param {rr.typeMatcher} matchChild
* @return {rr.SequentialPair_}
*/
rr.Save = function(key, saveChild, matchChild) {
save = new rr.SaveAndDiscard_(key, saveChild);
return new rr.SequentialPair_(save, matchChild);
};
/**
* @return {rr.SequentialPair_|rr.typeMatcher}
*/

16
test.py
View File

@@ -2,7 +2,7 @@
import recentrunes
parser = recentrunes.Parser.fromFile('static/grammars/mediawiki.js')
mediawiki = recentrunes.Parser.fromFile('static/grammars/mediawiki.js')
teststring = \
"""This is a paragraph with many text styles. This is ''italic'' and this
is '''bold'''; this is '''''both'''''. This is <u>underline</u> as is
@@ -38,7 +38,7 @@ shouldn't be visible --><blockquote>This is a blockquote</blockquote>
This line is pre-formatted and <del>not interpolated</del>
This line is also pre-formatted"""
result = str(parser.parseFromString(teststring))
result = str(mediawiki.parseFromString(teststring))
assert result == \
"""<wikidoc><p>This is a paragraph with many text styles. This is <i>italic</i> and this
is <b>bold</b>; this is <b><i>both</i></b>. This is <u>underline</u> as is
@@ -53,4 +53,14 @@ This line is also pre-formatted
</pre></p></wikidoc>""", result
parser = recentrunes.Parser.fromFile('static/grammars/badpenny.js')
badpenny = recentrunes.Parser.fromFile('static/grammars/badpenny.js')
teststring = \
"""foo{{value1}}bar
foo{{(container1}}contents{{)nottheend}}more contents{{)container1}}bar
foo{{[repeated1}}testing{{]notthis}}{{)repeated1}}zig{{]repeated1}}bar"""
result = str(badpenny.parseFromString(teststring))
assert result == \
"""<badpenny>foo<value name="value1"></value>bar
foo<container name="container1">contents{{)nottheend}}more contents</container>bar
foo<repeated name="repeated1">testing{{]notthis}}{{)repeated1}}zig</repeated>bar</badpenny>"""