Working python parsing, some working stringification

This commit is contained in:
Ian Gulliver
2014-07-02 13:24:59 -07:00
parent 42f1547dd7
commit d6222c374c

View File

@@ -8,8 +8,17 @@ class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])):
class TextNode(object): class TextNode(object):
def __init__(self, textContent): def __init__(self, textContent):
self.nodeName = '#text'
self.textContent = textContent self.textContent = textContent
self.previousSibling = None self.previousSibling = None
self.childNodes = []
def cloneNode(self, deep):
return TextNode(self.textContent)
def __str__(self):
# TODO: HTML escaping
return self.textContent
class Element(object): class Element(object):
@@ -42,18 +51,16 @@ class Element(object):
oldNode.previousSibling = None oldNode.previousSibling = None
def normalize(self): def normalize(self):
# Cumbersome loop to allow modification inside lastTextNode = None
i = 0 for childNode in list(self.childNodes):
while i < len(self.childNodes) - 1: if isinstance(childNode, TextNode):
child = self.childNodes[i] if lastTextNode:
if isinstance(child, TextNode): lastTextNode.textContent += childNode.textContent
while (i < len(self.childNodes) - 1 and self.removeChild(childNode)
isinstance(self.childNodes[i + 1], TextNode)): else:
sibling = self.childNodes[i + 1] lastTextNode = childNode
child.textContent += sibling.textContent else:
self.childNodes.remove(sibling) lastTextNode = None
if i < len(self.childNodes) - 1:
self.childNodes[i + 1].previousSibling = child
def renameNode(self, nodeName): def renameNode(self, nodeName):
self.nodeName = nodeName self.nodeName = nodeName
@@ -64,6 +71,20 @@ class Element(object):
def getAttribute(self, key): def getAttribute(self, key):
return self.attributes[key] return self.attributes[key]
def cloneNode(self, deep):
element = Element(self.nodeName)
if not deep:
return element
for childNode in self.childNodes:
element.appendChild(childNode.cloneNode(True))
for key, value in self.attributes.iteritems():
element.setAttribute(key, value)
return element
def __str__(self):
# TODO: attributes
values = map(str, self.childNodes)
return '<%s>%s</%s>' % (self.nodeName, ''.join(values), self.nodeName)
# ============ Matchers ============ # ============ Matchers ============
@@ -79,7 +100,7 @@ class CharExcept(Matcher):
def match(self, context): def match(self, context):
c = context.stringAfter(1) c = context.stringAfter(1)
if c and c in self._chars: if c and c not in self._chars:
yield MatchResult( yield MatchResult(
context.advance(1), context.advance(1),
[TextNode(c)]) [TextNode(c)])
@@ -311,6 +332,7 @@ def SplitElementAndNest(originalName, newNames):
for childNode in node.childNodes: for childNode in node.childNodes:
innerNode.appendChild(childNode) innerNode.appendChild(childNode)
node.parentNode.replaceChild(outerNode, node) node.parentNode.replaceChild(outerNode, node)
return Filter