Working python parsing, some working stringification
This commit is contained in:
@@ -8,8 +8,17 @@ class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])):
|
|||||||
|
|
||||||
class TextNode(object):
|
class TextNode(object):
|
||||||
def __init__(self, textContent):
|
def __init__(self, textContent):
|
||||||
|
self.nodeName = '#text'
|
||||||
self.textContent = textContent
|
self.textContent = textContent
|
||||||
self.previousSibling = None
|
self.previousSibling = None
|
||||||
|
self.childNodes = []
|
||||||
|
|
||||||
|
def cloneNode(self, deep):
|
||||||
|
return TextNode(self.textContent)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
# TODO: HTML escaping
|
||||||
|
return self.textContent
|
||||||
|
|
||||||
|
|
||||||
class Element(object):
|
class Element(object):
|
||||||
@@ -42,18 +51,16 @@ class Element(object):
|
|||||||
oldNode.previousSibling = None
|
oldNode.previousSibling = None
|
||||||
|
|
||||||
def normalize(self):
|
def normalize(self):
|
||||||
# Cumbersome loop to allow modification inside
|
lastTextNode = None
|
||||||
i = 0
|
for childNode in list(self.childNodes):
|
||||||
while i < len(self.childNodes) - 1:
|
if isinstance(childNode, TextNode):
|
||||||
child = self.childNodes[i]
|
if lastTextNode:
|
||||||
if isinstance(child, TextNode):
|
lastTextNode.textContent += childNode.textContent
|
||||||
while (i < len(self.childNodes) - 1 and
|
self.removeChild(childNode)
|
||||||
isinstance(self.childNodes[i + 1], TextNode)):
|
else:
|
||||||
sibling = self.childNodes[i + 1]
|
lastTextNode = childNode
|
||||||
child.textContent += sibling.textContent
|
else:
|
||||||
self.childNodes.remove(sibling)
|
lastTextNode = None
|
||||||
if i < len(self.childNodes) - 1:
|
|
||||||
self.childNodes[i + 1].previousSibling = child
|
|
||||||
|
|
||||||
def renameNode(self, nodeName):
|
def renameNode(self, nodeName):
|
||||||
self.nodeName = nodeName
|
self.nodeName = nodeName
|
||||||
@@ -64,6 +71,20 @@ class Element(object):
|
|||||||
def getAttribute(self, key):
|
def getAttribute(self, key):
|
||||||
return self.attributes[key]
|
return self.attributes[key]
|
||||||
|
|
||||||
|
def cloneNode(self, deep):
|
||||||
|
element = Element(self.nodeName)
|
||||||
|
if not deep:
|
||||||
|
return element
|
||||||
|
for childNode in self.childNodes:
|
||||||
|
element.appendChild(childNode.cloneNode(True))
|
||||||
|
for key, value in self.attributes.iteritems():
|
||||||
|
element.setAttribute(key, value)
|
||||||
|
return element
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
# TODO: attributes
|
||||||
|
values = map(str, self.childNodes)
|
||||||
|
return '<%s>%s</%s>' % (self.nodeName, ''.join(values), self.nodeName)
|
||||||
|
|
||||||
|
|
||||||
# ============ Matchers ============
|
# ============ Matchers ============
|
||||||
@@ -79,7 +100,7 @@ class CharExcept(Matcher):
|
|||||||
|
|
||||||
def match(self, context):
|
def match(self, context):
|
||||||
c = context.stringAfter(1)
|
c = context.stringAfter(1)
|
||||||
if c and c in self._chars:
|
if c and c not in self._chars:
|
||||||
yield MatchResult(
|
yield MatchResult(
|
||||||
context.advance(1),
|
context.advance(1),
|
||||||
[TextNode(c)])
|
[TextNode(c)])
|
||||||
@@ -311,6 +332,7 @@ def SplitElementAndNest(originalName, newNames):
|
|||||||
for childNode in node.childNodes:
|
for childNode in node.childNodes:
|
||||||
innerNode.appendChild(childNode)
|
innerNode.appendChild(childNode)
|
||||||
node.parentNode.replaceChild(outerNode, node)
|
node.parentNode.replaceChild(outerNode, node)
|
||||||
|
return Filter
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user