From 42f1547dd7aa8914634314987d405b47788eb15f Mon Sep 17 00:00:00 2001 From: Ian Gulliver Date: Wed, 2 Jul 2014 13:09:24 -0700 Subject: [PATCH] First parse through a file; still have bugs --- recentrunes.py | 433 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 recentrunes.py diff --git a/recentrunes.py b/recentrunes.py new file mode 100644 index 0000000..43af543 --- /dev/null +++ b/recentrunes.py @@ -0,0 +1,433 @@ +#!/usr/bin/python2.7 + +import collections + +class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])): + pass + + +class TextNode(object): + def __init__(self, textContent): + self.textContent = textContent + self.previousSibling = None + + +class Element(object): + def __init__(self, nodeName): + self.nodeName = nodeName + self.parentNode = None + self.previousSibling = None + self.childNodes = [] + self.attributes = {} + + def appendChild(self, child): + child.parentNode = self + self.childNodes.append(child) + if len(self.childNodes) == 1: + child.previousSibling = None + else: + child.previousSibling = self.childNodes[len(self.childNodes) - 2] + + def removeChild(self, child): + self.childNodes.remove(child) + child.parentNode = None + child.previousSibling = None + + def replaceChild(self, newNode, oldNode): + index = self.childNodes.index(oldNode) + self.childNodes[index] = newNode + newNode.parentNode = self + oldNode.parentNode = None + newNode.previousSibling = oldNode.previousSibling + oldNode.previousSibling = None + + def normalize(self): + # Cumbersome loop to allow modification inside + i = 0 + while i < len(self.childNodes) - 1: + child = self.childNodes[i] + if isinstance(child, TextNode): + while (i < len(self.childNodes) - 1 and + isinstance(self.childNodes[i + 1], TextNode)): + sibling = self.childNodes[i + 1] + child.textContent += sibling.textContent + self.childNodes.remove(sibling) + if i < len(self.childNodes) - 1: + self.childNodes[i + 1].previousSibling = child + + def renameNode(self, nodeName): + self.nodeName = nodeName + + def setAttribute(self, key, value): + self.attributes[key] = value + + def getAttribute(self, key): + return self.attributes[key] + + + +# ============ Matchers ============ + + +class Matcher(object): + pass + + +class CharExcept(Matcher): + def __init__(self, chars): + self._chars = chars + + def match(self, context): + c = context.stringAfter(1) + if c and c in self._chars: + yield MatchResult( + context.advance(1), + [TextNode(c)]) + + +class EndOfLine(Matcher): + def match(self, context): + if context.atEnd(): + yield MatchResult( + context, + []) + if context.stringAfter(1) == '\n': + yield MatchResult( + context.advance(1), + []) + if context.stringBefore(1) == '\n': + yield MatchResult( + context, + []) + + +class EndOfText(Matcher): + def match(self, context): + if context.atEnd(): + yield MatchResult( + context, + []) + + +class Hidden(Matcher): + def __init__(self, child): + self._child = child + + def match(self, context): + for result in self._child.match(context): + yield MatchResult( + result.context, + []) + + +class Insert(Matcher): + def __init__(self, value): + self._value = value + + def match(self, context): + yield MatchResult( + context, + TextNode(self.value_)) + + +class Literal(Matcher): + def __init__(self, value): + self._value = value + + def match(self, context): + if context.stringAfter(len(self._value)) == self._value: + yield MatchResult( + context.advance(len(self._value)), + []) + + +class Node(Matcher): + def __init__(self, name, child): + self._name = name + self._child = child + + def match(self, context): + for result in self._child.match(context): + element = Element(self._name) + for node in result.nodes: + element.appendChild(node.cloneNode(True)) + element.normalize() + yield MatchResult( + result.context, + [element]) + + +class Or(Matcher): + def __init__(self, *options): + self._options = options + + def match(self, context): + for option in self._options: + for result in option.match(context): + yield result + + +class Ref(Matcher): + def __init__(self, key): + self._key = key + + def match(self, context): + return context.rules[self._key].match(context) + + +class SequentialPair(Matcher): + def __init__(self, child1, child2): + self._child1 = child1 + self._child2 = child2 + + def match(self, context): + for result1 in self._child1.match(context): + for result2 in self._child2.match(result1.context): + yield MatchResult( + result2.context, + result1.nodes + result2.nodes) + + +class StartOfLine(Matcher): + def match(self, context): + if context.atStart(): + yield MatchResult( + context, + []) + if context.stringAfter(1) == '\n': + yield MatchResult( + context.advance(1), + []) + if context.stringBefore(1) == '\n': + yield MatchResult( + context, + []) + + +class ZeroOrMore(Matcher): + def __init__(self, child): + self._pair = SequentialPair(child, self) + + def match(self, context): + yield MatchResult( + context, + []) + for result in self._pair.match(context): + if result.context.remaining() == context.remaining(): + raise Exception( + "Child or ZeroOrMore didn't consume input; grammar bug?") + yield result + + +# ============ Convenience factories ============ + + +def Char(): + return CharExcept('') + + +def MultiLineText(): + return OneOrMore(Char()) + + +def OneOrMore(child): + return SequentialPair(child, ZeroOrMore(child)) + + +def Sequence(*children): + if len(children) == 1: + return children[0] + return SequentialPair( + children[0], + Sequence(*children[1:])) + + +def SingleLineText(): + return OneOrMore(CharExcept('\n')) + + + +# ============ Filter factories ============ + + +def ChildToAttribute(parentName, childName): + def Filter(node): + if node.nodeName != parentName: + return + for childNode in node.childNodes: + if childNode.nodeName == childName: + node.setAttribute(childName, childNode.textContent) + node.removeChild(childNode) + break + return Filter + + +def ExtractElement(nodeName): + def Filter(node): + if node.nodeName != nodeName: + return + parentNode = node.parentNode + for childNode in node.childNodes: + parentNode.appendChild(childNode) + parentNode.removeChild(node) + parentNode.normalize() + return Filter + + +def GroupSiblings(parentName, childNames): + def Filter(node): + if node.nodeName not in childNames: + return + if (node.previousSibling and + node.previousSibling.nodeName == parentName): + node.previousSibling.appendChild(node) + return + newNode = Element(parentName) + node.parentNode.replaceChild(newNode, node) + newNode.appendChild(node) + return Filter + + +def RenameElement(oldName, newName): + def Filter(node): + if node.nodeName != oldName: + return + node.renameNode(newName) + return Filter + + +def SplitElementAndNest(originalName, newNames): + def Filter(node): + if node.nodeName != originalName: + return + outerNode = innerNode = None + for newName in newNames: + newNode = Element(newName) + if not outerNode: + outerNode = innerNode = newNode + else: + innerNode.appendChild(newNode) + innerNode = newNode + for childNode in node.childNodes: + innerNode.appendChild(childNode) + node.parentNode.replaceChild(outerNode, node) + + + +# ============ Scaffolding ============ + + +def ApplyFilter(node, callback): + callback(node) + for childNode in node.childNodes: + ApplyFilter(childNode, callback) + + +def ApplyFilters(node, filters): + for callback in filters: + ApplyFilter(node, callback) + + +class Context(object): + def __init__(self, rules, string, inputIndex=0): + self.rules = rules + self.string = string + self.inputIndex = inputIndex + + def copy(self): + return Context(self.rules, self.string, self.inputIndex) + + def stringAfter(self, numChars=None): + if numChars is None: + numChars = self.remaining() + return self.string[self.inputIndex:self.inputIndex + numChars] + + def stringBefore(self, numChars): + start = self.inputIndex - numChars + if start < 0: + numChars += start + start = 0 + return self.string[start:start + numChars] + + def atStart(self): + return self.inputIndex == 0 + + def atEnd(self): + return self.remaining() == 0 + + def remaining(self): + return len(self.string) - self.inputIndex + + def advance(self, numChars): + if not numChars: + raise Exception('Context.advance(0) called') + context = self.copy() + context.inputIndex += numChars + return context + + +class Parser(object): + @classmethod + def fromFile(cls, filename): + fh = open(filename, 'r') + grammar = fh.read() + compiled = compile(grammar, filename, 'exec') + glbls = { + 'rr': rr(), + } + eval(compiled, glbls) + newKeys = (set(glbls) - {'__builtins__', 'rr'}) + assert len(newKeys) == 1, newKeys + value = glbls[newKeys.pop()] + assert isinstance(value, cls), value + return value + + def __init__(self, rules, filters): + self.rules = rules + self.filters = filters + + def parseFromString(self, string): + context = Context(self.rules, string) + for result in context.rules['main'].match(context): + rootNode = result.nodes[0] + ApplyFilters(rootNode, self.filters) + return rootNode + return None + + +class rr(object): + _SYMBOLS = { + # Matchers + 'CharExcept': CharExcept, + 'EndOfLine': EndOfLine, + 'EndOfText': EndOfText, + 'Hidden': Hidden, + 'Insert': Insert, + 'Literal': Literal, + 'Node': Node, + 'Or': Or, + 'Ref': Ref, + 'SequentialPair': SequentialPair, + 'StartOfLine': StartOfLine, + 'ZeroOrMore': ZeroOrMore, + + # Convenience factories + 'Char': Char, + 'MultiLineText': MultiLineText, + 'OneOrMore': OneOrMore, + 'Sequence': Sequence, + 'SingleLineText': SingleLineText, + + # Filter factories + 'ChildToAttribute': ChildToAttribute, + 'ExtractElement': ExtractElement, + 'GroupSiblings': GroupSiblings, + 'RenameElement': RenameElement, + 'SplitElementAndNest': SplitElementAndNest, + + # Scaffolding + 'Parser': Parser, + } + + def __getattr__(self, key): + return self._SYMBOLS[key]