First parse through a file; still have bugs
This commit is contained in:
433
recentrunes.py
Normal file
433
recentrunes.py
Normal file
@@ -0,0 +1,433 @@
|
||||
#!/usr/bin/python2.7
|
||||
|
||||
import collections
|
||||
|
||||
class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])):
|
||||
pass
|
||||
|
||||
|
||||
class TextNode(object):
|
||||
def __init__(self, textContent):
|
||||
self.textContent = textContent
|
||||
self.previousSibling = None
|
||||
|
||||
|
||||
class Element(object):
|
||||
def __init__(self, nodeName):
|
||||
self.nodeName = nodeName
|
||||
self.parentNode = None
|
||||
self.previousSibling = None
|
||||
self.childNodes = []
|
||||
self.attributes = {}
|
||||
|
||||
def appendChild(self, child):
|
||||
child.parentNode = self
|
||||
self.childNodes.append(child)
|
||||
if len(self.childNodes) == 1:
|
||||
child.previousSibling = None
|
||||
else:
|
||||
child.previousSibling = self.childNodes[len(self.childNodes) - 2]
|
||||
|
||||
def removeChild(self, child):
|
||||
self.childNodes.remove(child)
|
||||
child.parentNode = None
|
||||
child.previousSibling = None
|
||||
|
||||
def replaceChild(self, newNode, oldNode):
|
||||
index = self.childNodes.index(oldNode)
|
||||
self.childNodes[index] = newNode
|
||||
newNode.parentNode = self
|
||||
oldNode.parentNode = None
|
||||
newNode.previousSibling = oldNode.previousSibling
|
||||
oldNode.previousSibling = None
|
||||
|
||||
def normalize(self):
|
||||
# Cumbersome loop to allow modification inside
|
||||
i = 0
|
||||
while i < len(self.childNodes) - 1:
|
||||
child = self.childNodes[i]
|
||||
if isinstance(child, TextNode):
|
||||
while (i < len(self.childNodes) - 1 and
|
||||
isinstance(self.childNodes[i + 1], TextNode)):
|
||||
sibling = self.childNodes[i + 1]
|
||||
child.textContent += sibling.textContent
|
||||
self.childNodes.remove(sibling)
|
||||
if i < len(self.childNodes) - 1:
|
||||
self.childNodes[i + 1].previousSibling = child
|
||||
|
||||
def renameNode(self, nodeName):
|
||||
self.nodeName = nodeName
|
||||
|
||||
def setAttribute(self, key, value):
|
||||
self.attributes[key] = value
|
||||
|
||||
def getAttribute(self, key):
|
||||
return self.attributes[key]
|
||||
|
||||
|
||||
|
||||
# ============ Matchers ============
|
||||
|
||||
|
||||
class Matcher(object):
|
||||
pass
|
||||
|
||||
|
||||
class CharExcept(Matcher):
|
||||
def __init__(self, chars):
|
||||
self._chars = chars
|
||||
|
||||
def match(self, context):
|
||||
c = context.stringAfter(1)
|
||||
if c and c in self._chars:
|
||||
yield MatchResult(
|
||||
context.advance(1),
|
||||
[TextNode(c)])
|
||||
|
||||
|
||||
class EndOfLine(Matcher):
|
||||
def match(self, context):
|
||||
if context.atEnd():
|
||||
yield MatchResult(
|
||||
context,
|
||||
[])
|
||||
if context.stringAfter(1) == '\n':
|
||||
yield MatchResult(
|
||||
context.advance(1),
|
||||
[])
|
||||
if context.stringBefore(1) == '\n':
|
||||
yield MatchResult(
|
||||
context,
|
||||
[])
|
||||
|
||||
|
||||
class EndOfText(Matcher):
|
||||
def match(self, context):
|
||||
if context.atEnd():
|
||||
yield MatchResult(
|
||||
context,
|
||||
[])
|
||||
|
||||
|
||||
class Hidden(Matcher):
|
||||
def __init__(self, child):
|
||||
self._child = child
|
||||
|
||||
def match(self, context):
|
||||
for result in self._child.match(context):
|
||||
yield MatchResult(
|
||||
result.context,
|
||||
[])
|
||||
|
||||
|
||||
class Insert(Matcher):
|
||||
def __init__(self, value):
|
||||
self._value = value
|
||||
|
||||
def match(self, context):
|
||||
yield MatchResult(
|
||||
context,
|
||||
TextNode(self.value_))
|
||||
|
||||
|
||||
class Literal(Matcher):
|
||||
def __init__(self, value):
|
||||
self._value = value
|
||||
|
||||
def match(self, context):
|
||||
if context.stringAfter(len(self._value)) == self._value:
|
||||
yield MatchResult(
|
||||
context.advance(len(self._value)),
|
||||
[])
|
||||
|
||||
|
||||
class Node(Matcher):
|
||||
def __init__(self, name, child):
|
||||
self._name = name
|
||||
self._child = child
|
||||
|
||||
def match(self, context):
|
||||
for result in self._child.match(context):
|
||||
element = Element(self._name)
|
||||
for node in result.nodes:
|
||||
element.appendChild(node.cloneNode(True))
|
||||
element.normalize()
|
||||
yield MatchResult(
|
||||
result.context,
|
||||
[element])
|
||||
|
||||
|
||||
class Or(Matcher):
|
||||
def __init__(self, *options):
|
||||
self._options = options
|
||||
|
||||
def match(self, context):
|
||||
for option in self._options:
|
||||
for result in option.match(context):
|
||||
yield result
|
||||
|
||||
|
||||
class Ref(Matcher):
|
||||
def __init__(self, key):
|
||||
self._key = key
|
||||
|
||||
def match(self, context):
|
||||
return context.rules[self._key].match(context)
|
||||
|
||||
|
||||
class SequentialPair(Matcher):
|
||||
def __init__(self, child1, child2):
|
||||
self._child1 = child1
|
||||
self._child2 = child2
|
||||
|
||||
def match(self, context):
|
||||
for result1 in self._child1.match(context):
|
||||
for result2 in self._child2.match(result1.context):
|
||||
yield MatchResult(
|
||||
result2.context,
|
||||
result1.nodes + result2.nodes)
|
||||
|
||||
|
||||
class StartOfLine(Matcher):
|
||||
def match(self, context):
|
||||
if context.atStart():
|
||||
yield MatchResult(
|
||||
context,
|
||||
[])
|
||||
if context.stringAfter(1) == '\n':
|
||||
yield MatchResult(
|
||||
context.advance(1),
|
||||
[])
|
||||
if context.stringBefore(1) == '\n':
|
||||
yield MatchResult(
|
||||
context,
|
||||
[])
|
||||
|
||||
|
||||
class ZeroOrMore(Matcher):
|
||||
def __init__(self, child):
|
||||
self._pair = SequentialPair(child, self)
|
||||
|
||||
def match(self, context):
|
||||
yield MatchResult(
|
||||
context,
|
||||
[])
|
||||
for result in self._pair.match(context):
|
||||
if result.context.remaining() == context.remaining():
|
||||
raise Exception(
|
||||
"Child or ZeroOrMore didn't consume input; grammar bug?")
|
||||
yield result
|
||||
|
||||
|
||||
# ============ Convenience factories ============
|
||||
|
||||
|
||||
def Char():
|
||||
return CharExcept('')
|
||||
|
||||
|
||||
def MultiLineText():
|
||||
return OneOrMore(Char())
|
||||
|
||||
|
||||
def OneOrMore(child):
|
||||
return SequentialPair(child, ZeroOrMore(child))
|
||||
|
||||
|
||||
def Sequence(*children):
|
||||
if len(children) == 1:
|
||||
return children[0]
|
||||
return SequentialPair(
|
||||
children[0],
|
||||
Sequence(*children[1:]))
|
||||
|
||||
|
||||
def SingleLineText():
|
||||
return OneOrMore(CharExcept('\n'))
|
||||
|
||||
|
||||
|
||||
# ============ Filter factories ============
|
||||
|
||||
|
||||
def ChildToAttribute(parentName, childName):
|
||||
def Filter(node):
|
||||
if node.nodeName != parentName:
|
||||
return
|
||||
for childNode in node.childNodes:
|
||||
if childNode.nodeName == childName:
|
||||
node.setAttribute(childName, childNode.textContent)
|
||||
node.removeChild(childNode)
|
||||
break
|
||||
return Filter
|
||||
|
||||
|
||||
def ExtractElement(nodeName):
|
||||
def Filter(node):
|
||||
if node.nodeName != nodeName:
|
||||
return
|
||||
parentNode = node.parentNode
|
||||
for childNode in node.childNodes:
|
||||
parentNode.appendChild(childNode)
|
||||
parentNode.removeChild(node)
|
||||
parentNode.normalize()
|
||||
return Filter
|
||||
|
||||
|
||||
def GroupSiblings(parentName, childNames):
|
||||
def Filter(node):
|
||||
if node.nodeName not in childNames:
|
||||
return
|
||||
if (node.previousSibling and
|
||||
node.previousSibling.nodeName == parentName):
|
||||
node.previousSibling.appendChild(node)
|
||||
return
|
||||
newNode = Element(parentName)
|
||||
node.parentNode.replaceChild(newNode, node)
|
||||
newNode.appendChild(node)
|
||||
return Filter
|
||||
|
||||
|
||||
def RenameElement(oldName, newName):
|
||||
def Filter(node):
|
||||
if node.nodeName != oldName:
|
||||
return
|
||||
node.renameNode(newName)
|
||||
return Filter
|
||||
|
||||
|
||||
def SplitElementAndNest(originalName, newNames):
|
||||
def Filter(node):
|
||||
if node.nodeName != originalName:
|
||||
return
|
||||
outerNode = innerNode = None
|
||||
for newName in newNames:
|
||||
newNode = Element(newName)
|
||||
if not outerNode:
|
||||
outerNode = innerNode = newNode
|
||||
else:
|
||||
innerNode.appendChild(newNode)
|
||||
innerNode = newNode
|
||||
for childNode in node.childNodes:
|
||||
innerNode.appendChild(childNode)
|
||||
node.parentNode.replaceChild(outerNode, node)
|
||||
|
||||
|
||||
|
||||
# ============ Scaffolding ============
|
||||
|
||||
|
||||
def ApplyFilter(node, callback):
|
||||
callback(node)
|
||||
for childNode in node.childNodes:
|
||||
ApplyFilter(childNode, callback)
|
||||
|
||||
|
||||
def ApplyFilters(node, filters):
|
||||
for callback in filters:
|
||||
ApplyFilter(node, callback)
|
||||
|
||||
|
||||
class Context(object):
|
||||
def __init__(self, rules, string, inputIndex=0):
|
||||
self.rules = rules
|
||||
self.string = string
|
||||
self.inputIndex = inputIndex
|
||||
|
||||
def copy(self):
|
||||
return Context(self.rules, self.string, self.inputIndex)
|
||||
|
||||
def stringAfter(self, numChars=None):
|
||||
if numChars is None:
|
||||
numChars = self.remaining()
|
||||
return self.string[self.inputIndex:self.inputIndex + numChars]
|
||||
|
||||
def stringBefore(self, numChars):
|
||||
start = self.inputIndex - numChars
|
||||
if start < 0:
|
||||
numChars += start
|
||||
start = 0
|
||||
return self.string[start:start + numChars]
|
||||
|
||||
def atStart(self):
|
||||
return self.inputIndex == 0
|
||||
|
||||
def atEnd(self):
|
||||
return self.remaining() == 0
|
||||
|
||||
def remaining(self):
|
||||
return len(self.string) - self.inputIndex
|
||||
|
||||
def advance(self, numChars):
|
||||
if not numChars:
|
||||
raise Exception('Context.advance(0) called')
|
||||
context = self.copy()
|
||||
context.inputIndex += numChars
|
||||
return context
|
||||
|
||||
|
||||
class Parser(object):
|
||||
@classmethod
|
||||
def fromFile(cls, filename):
|
||||
fh = open(filename, 'r')
|
||||
grammar = fh.read()
|
||||
compiled = compile(grammar, filename, 'exec')
|
||||
glbls = {
|
||||
'rr': rr(),
|
||||
}
|
||||
eval(compiled, glbls)
|
||||
newKeys = (set(glbls) - {'__builtins__', 'rr'})
|
||||
assert len(newKeys) == 1, newKeys
|
||||
value = glbls[newKeys.pop()]
|
||||
assert isinstance(value, cls), value
|
||||
return value
|
||||
|
||||
def __init__(self, rules, filters):
|
||||
self.rules = rules
|
||||
self.filters = filters
|
||||
|
||||
def parseFromString(self, string):
|
||||
context = Context(self.rules, string)
|
||||
for result in context.rules['main'].match(context):
|
||||
rootNode = result.nodes[0]
|
||||
ApplyFilters(rootNode, self.filters)
|
||||
return rootNode
|
||||
return None
|
||||
|
||||
|
||||
class rr(object):
|
||||
_SYMBOLS = {
|
||||
# Matchers
|
||||
'CharExcept': CharExcept,
|
||||
'EndOfLine': EndOfLine,
|
||||
'EndOfText': EndOfText,
|
||||
'Hidden': Hidden,
|
||||
'Insert': Insert,
|
||||
'Literal': Literal,
|
||||
'Node': Node,
|
||||
'Or': Or,
|
||||
'Ref': Ref,
|
||||
'SequentialPair': SequentialPair,
|
||||
'StartOfLine': StartOfLine,
|
||||
'ZeroOrMore': ZeroOrMore,
|
||||
|
||||
# Convenience factories
|
||||
'Char': Char,
|
||||
'MultiLineText': MultiLineText,
|
||||
'OneOrMore': OneOrMore,
|
||||
'Sequence': Sequence,
|
||||
'SingleLineText': SingleLineText,
|
||||
|
||||
# Filter factories
|
||||
'ChildToAttribute': ChildToAttribute,
|
||||
'ExtractElement': ExtractElement,
|
||||
'GroupSiblings': GroupSiblings,
|
||||
'RenameElement': RenameElement,
|
||||
'SplitElementAndNest': SplitElementAndNest,
|
||||
|
||||
# Scaffolding
|
||||
'Parser': Parser,
|
||||
}
|
||||
|
||||
def __getattr__(self, key):
|
||||
return self._SYMBOLS[key]
|
||||
Reference in New Issue
Block a user