First parse through a file; still have bugs

2014-07-02 13:09:24 -07:00
parent 09a3b9c2ec
commit 42f1547dd7
1 changed files with 433 additions and 0 deletions
--- a/recentrunes.py
+++ b/recentrunes.py
@@ -0,0 +1,433 @@
+#!/usr/bin/python2.7
+
+import collections
+
+class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])):
+  pass
+
+
+class TextNode(object):
+  def __init__(self, textContent):
+    self.textContent = textContent
+    self.previousSibling = None
+
+
+class Element(object):
+  def __init__(self, nodeName):
+    self.nodeName = nodeName
+    self.parentNode = None
+    self.previousSibling = None
+    self.childNodes = []
+    self.attributes = {}
+
+  def appendChild(self, child):
+    child.parentNode = self
+    self.childNodes.append(child)
+    if len(self.childNodes) == 1:
+      child.previousSibling = None
+    else:
+      child.previousSibling = self.childNodes[len(self.childNodes) - 2]
+
+  def removeChild(self, child):
+    self.childNodes.remove(child)
+    child.parentNode = None
+    child.previousSibling = None
+
+  def replaceChild(self, newNode, oldNode):
+    index = self.childNodes.index(oldNode)
+    self.childNodes[index] = newNode
+    newNode.parentNode = self
+    oldNode.parentNode = None
+    newNode.previousSibling = oldNode.previousSibling
+    oldNode.previousSibling = None
+
+  def normalize(self):
+    # Cumbersome loop to allow modification inside
+    i = 0
+    while i < len(self.childNodes) - 1:
+      child = self.childNodes[i]
+      if isinstance(child, TextNode):
+        while (i < len(self.childNodes) - 1 and
+               isinstance(self.childNodes[i + 1], TextNode)):
+          sibling = self.childNodes[i + 1]
+          child.textContent += sibling.textContent
+          self.childNodes.remove(sibling)
+        if i < len(self.childNodes) - 1:
+          self.childNodes[i + 1].previousSibling = child
+
+  def renameNode(self, nodeName):
+    self.nodeName = nodeName
+
+  def setAttribute(self, key, value):
+    self.attributes[key] = value
+
+  def getAttribute(self, key):
+    return self.attributes[key]
+
+
+
+# ============ Matchers ============
+
+
+class Matcher(object):
+  pass
+
+
+class CharExcept(Matcher):
+  def __init__(self, chars):
+    self._chars = chars
+
+  def match(self, context):
+    c = context.stringAfter(1)
+    if c and c in self._chars:
+      yield MatchResult(
+          context.advance(1),
+          [TextNode(c)])
+
+
+class EndOfLine(Matcher):
+  def match(self, context):
+    if context.atEnd():
+      yield MatchResult(
+          context,
+          [])
+    if context.stringAfter(1) == '\n':
+      yield MatchResult(
+          context.advance(1),
+          [])
+    if context.stringBefore(1) == '\n':
+      yield MatchResult(
+          context,
+          [])
+
+
+class EndOfText(Matcher):
+  def match(self, context):
+    if context.atEnd():
+      yield MatchResult(
+          context,
+          [])
+
+
+class Hidden(Matcher):
+  def __init__(self, child):
+    self._child = child
+
+  def match(self, context):
+    for result in self._child.match(context):
+      yield MatchResult(
+          result.context,
+          [])
+
+
+class Insert(Matcher):
+  def __init__(self, value):
+    self._value = value
+
+  def match(self, context):
+    yield MatchResult(
+        context,
+        TextNode(self.value_))
+
+
+class Literal(Matcher):
+  def __init__(self, value):
+    self._value = value
+
+  def match(self, context):
+    if context.stringAfter(len(self._value)) == self._value:
+      yield MatchResult(
+          context.advance(len(self._value)),
+          [])
+
+
+class Node(Matcher):
+  def __init__(self, name, child):
+    self._name = name
+    self._child = child
+
+  def match(self, context):
+    for result in self._child.match(context):
+      element = Element(self._name)
+      for node in result.nodes:
+        element.appendChild(node.cloneNode(True))
+      element.normalize()
+      yield MatchResult(
+          result.context,
+          [element])
+
+
+class Or(Matcher):
+  def __init__(self, *options):
+    self._options = options
+
+  def match(self, context):
+    for option in self._options:
+      for result in option.match(context):
+        yield result
+
+
+class Ref(Matcher):
+  def __init__(self, key):
+    self._key = key
+
+  def match(self, context):
+    return context.rules[self._key].match(context)
+
+
+class SequentialPair(Matcher):
+  def __init__(self, child1, child2):
+    self._child1 = child1
+    self._child2 = child2
+
+  def match(self, context):
+    for result1 in self._child1.match(context):
+      for result2 in self._child2.match(result1.context):
+        yield MatchResult(
+            result2.context,
+            result1.nodes + result2.nodes)
+
+
+class StartOfLine(Matcher):
+  def match(self, context):
+    if context.atStart():
+      yield MatchResult(
+          context,
+          [])
+    if context.stringAfter(1) == '\n':
+      yield MatchResult(
+          context.advance(1),
+          [])
+    if context.stringBefore(1) == '\n':
+      yield MatchResult(
+          context,
+          [])
+
+
+class ZeroOrMore(Matcher):
+  def __init__(self, child):
+    self._pair = SequentialPair(child, self)
+
+  def match(self, context):
+    yield MatchResult(
+        context,
+        [])
+    for result in self._pair.match(context):
+      if result.context.remaining() == context.remaining():
+        raise Exception(
+        "Child or ZeroOrMore didn't consume input; grammar bug?")
+      yield result
+
+
+# ============ Convenience factories ============
+
+
+def Char():
+  return CharExcept('')
+
+
+def MultiLineText():
+  return OneOrMore(Char())
+
+
+def OneOrMore(child):
+  return SequentialPair(child, ZeroOrMore(child))
+
+
+def Sequence(*children):
+  if len(children) == 1:
+    return children[0]
+  return SequentialPair(
+      children[0],
+      Sequence(*children[1:]))
+
+
+def SingleLineText():
+  return OneOrMore(CharExcept('\n'))
+
+
+
+# ============ Filter factories ============
+
+
+def ChildToAttribute(parentName, childName):
+  def Filter(node):
+    if node.nodeName != parentName:
+      return
+    for childNode in node.childNodes:
+      if childNode.nodeName == childName:
+        node.setAttribute(childName, childNode.textContent)
+        node.removeChild(childNode)
+        break
+  return Filter
+
+
+def ExtractElement(nodeName):
+  def Filter(node):
+    if node.nodeName != nodeName:
+      return
+    parentNode = node.parentNode
+    for childNode in node.childNodes:
+      parentNode.appendChild(childNode)
+    parentNode.removeChild(node)
+    parentNode.normalize()
+  return Filter
+
+
+def GroupSiblings(parentName, childNames):
+  def Filter(node):
+    if node.nodeName not in childNames:
+      return
+    if (node.previousSibling and
+        node.previousSibling.nodeName == parentName):
+      node.previousSibling.appendChild(node)
+      return
+    newNode = Element(parentName)
+    node.parentNode.replaceChild(newNode, node)
+    newNode.appendChild(node)
+  return Filter
+
+
+def RenameElement(oldName, newName):
+  def Filter(node):
+    if node.nodeName != oldName:
+      return
+    node.renameNode(newName)
+  return Filter
+
+
+def SplitElementAndNest(originalName, newNames):
+  def Filter(node):
+    if node.nodeName != originalName:
+      return
+    outerNode = innerNode = None
+    for newName in newNames:
+      newNode = Element(newName)
+      if not outerNode:
+        outerNode = innerNode = newNode
+      else:
+        innerNode.appendChild(newNode)
+        innerNode = newNode
+    for childNode in node.childNodes:
+      innerNode.appendChild(childNode)
+    node.parentNode.replaceChild(outerNode, node)
+
+
+
+# ============ Scaffolding ============
+
+
+def ApplyFilter(node, callback):
+  callback(node)
+  for childNode in node.childNodes:
+    ApplyFilter(childNode, callback)
+
+
+def ApplyFilters(node, filters):
+  for callback in filters:
+    ApplyFilter(node, callback)
+
+
+class Context(object):
+  def __init__(self, rules, string, inputIndex=0):
+    self.rules = rules
+    self.string = string
+    self.inputIndex = inputIndex
+
+  def copy(self):
+    return Context(self.rules, self.string, self.inputIndex)
+
+  def stringAfter(self, numChars=None):
+    if numChars is None:
+      numChars = self.remaining()
+    return self.string[self.inputIndex:self.inputIndex + numChars]
+
+  def stringBefore(self, numChars):
+    start = self.inputIndex - numChars
+    if start < 0:
+      numChars += start
+      start = 0
+    return self.string[start:start + numChars]
+
+  def atStart(self):
+    return self.inputIndex == 0
+
+  def atEnd(self):
+    return self.remaining() == 0
+
+  def remaining(self):
+    return len(self.string) - self.inputIndex
+
+  def advance(self, numChars):
+    if not numChars:
+      raise Exception('Context.advance(0) called')
+    context = self.copy()
+    context.inputIndex += numChars
+    return context
+
+
+class Parser(object):
+  @classmethod
+  def fromFile(cls, filename):
+    fh = open(filename, 'r')
+    grammar = fh.read()
+    compiled = compile(grammar, filename, 'exec')
+    glbls = {
+        'rr': rr(),
+    }
+    eval(compiled, glbls)
+    newKeys = (set(glbls) - {'__builtins__', 'rr'})
+    assert len(newKeys) == 1, newKeys
+    value = glbls[newKeys.pop()]
+    assert isinstance(value, cls), value
+    return value
+
+  def __init__(self, rules, filters):
+    self.rules = rules
+    self.filters = filters
+
+  def parseFromString(self, string):
+    context = Context(self.rules, string)
+    for result in context.rules['main'].match(context):
+      rootNode = result.nodes[0]
+      ApplyFilters(rootNode, self.filters)
+      return rootNode
+    return None
+
+
+class rr(object):
+  _SYMBOLS = {
+      # Matchers
+      'CharExcept': CharExcept,
+      'EndOfLine': EndOfLine,
+      'EndOfText': EndOfText,
+      'Hidden': Hidden,
+      'Insert': Insert,
+      'Literal': Literal,
+      'Node': Node,
+      'Or': Or,
+      'Ref': Ref,
+      'SequentialPair': SequentialPair,
+      'StartOfLine': StartOfLine,
+      'ZeroOrMore': ZeroOrMore, 
+
+      # Convenience factories
+      'Char': Char,
+      'MultiLineText': MultiLineText,
+      'OneOrMore': OneOrMore,
+      'Sequence': Sequence,
+      'SingleLineText': SingleLineText,
+
+      # Filter factories
+      'ChildToAttribute': ChildToAttribute,
+      'ExtractElement': ExtractElement,
+      'GroupSiblings': GroupSiblings,
+      'RenameElement': RenameElement,
+      'SplitElementAndNest': SplitElementAndNest, 
+
+      # Scaffolding
+      'Parser': Parser,
+  }
+
+  def __getattr__(self, key):
+    return self._SYMBOLS[key]