Python: fix DOM bugs that were breaking grouping filter
This commit is contained in:
@@ -9,8 +9,10 @@ class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])):
|
||||
class TextNode(object):
|
||||
def __init__(self, textContent):
|
||||
self.nodeName = '#text'
|
||||
self.parentNode = None
|
||||
self.textContent = textContent
|
||||
self.previousSibling = None
|
||||
self.nextSibling = None
|
||||
self.childNodes = []
|
||||
|
||||
def cloneNode(self, deep):
|
||||
@@ -31,29 +33,45 @@ class Element(object):
|
||||
self.nodeName = nodeName
|
||||
self.parentNode = None
|
||||
self.previousSibling = None
|
||||
self.nextSibling = None
|
||||
self.childNodes = []
|
||||
self.attributes = {}
|
||||
|
||||
def appendChild(self, child):
|
||||
if child.parentNode:
|
||||
child.parentNode.removeChild(child)
|
||||
child.parentNode = self
|
||||
self.childNodes.append(child)
|
||||
if len(self.childNodes) == 1:
|
||||
child.previousSibling = None
|
||||
else:
|
||||
child.previousSibling = self.childNodes[len(self.childNodes) - 2]
|
||||
beforeChild = self.childNodes[len(self.childNodes) - 2]
|
||||
child.previousSibling = beforeChild
|
||||
beforeChild.nextSibling = child
|
||||
|
||||
def removeChild(self, child):
|
||||
self.childNodes.remove(child)
|
||||
if child.previousSibling:
|
||||
child.previousSibling.nextSibling = child.nextSibling
|
||||
if child.nextSibling:
|
||||
child.nextSibling.previousSibling = child.previousSibling
|
||||
child.parentNode = None
|
||||
child.previousSibling = None
|
||||
child.nextSibling = None
|
||||
|
||||
def replaceChild(self, newNode, oldNode):
|
||||
index = self.childNodes.index(oldNode)
|
||||
self.childNodes[index] = newNode
|
||||
newNode.parentNode = self
|
||||
oldNode.parentNode = None
|
||||
newNode.previousSibling = oldNode.previousSibling
|
||||
newNode.nextSibling = oldNode.nextSibling
|
||||
if newNode.previousSibling:
|
||||
newNode.previousSibling.nextSibling = newNode
|
||||
if newNode.nextSibling:
|
||||
newNode.nextSibling.previousSibling = newNode
|
||||
oldNode.parentNode = None
|
||||
oldNode.previousSibling = None
|
||||
oldNode.nextSibling = None
|
||||
|
||||
def normalize(self):
|
||||
lastTextNode = None
|
||||
@@ -354,7 +372,7 @@ def SplitElementAndNest(originalName, newNames):
|
||||
|
||||
def ApplyFilter(node, callback):
|
||||
callback(node)
|
||||
for childNode in node.childNodes:
|
||||
for childNode in list(node.childNodes):
|
||||
ApplyFilter(childNode, callback)
|
||||
|
||||
|
||||
|
||||
4
test.py
4
test.py
@@ -48,6 +48,6 @@ code looks like <code>this</code>. Fixed width text looks like
|
||||
<tt>this</tt>. <pre>This sentence is inline pre-formatted, which stops
|
||||
'''''this from being bold and italic.'''''</pre> We can also
|
||||
stop <u>this from being underlined</u>, or just try
|
||||
<pre>interrupting cow style.</pre><blockquote>This is a blockquote</blockquote></p><p><h2>Header 2</h2><h3>Header 3 <i>with italics</i></h3><h4>Header 4</h4><h5>Header 5</h5><h6>Header 6</h6><hr></hr><ul><li>Item 1a</li></ul><ul><li>Item 1b</li></ul><ul><ul><li>Item 2</li></ul></ul><ul><ul><ul><li>Item 3</li></ul></ul></ul><ul><li>Item 1c</li></ul><ol><li>Item 1a</li></ol><ol><li>Item 1b</li></ol><ol><ol><li>Item 2</li></ol></ol><ol><ol><ol><li>Item 3</li></ol></ol></ol><ol><li>Item 1c</li></ol><def>I don't really understand what a definition is</def><blockquote>But blockquotes are easy</blockquote><blockquote2>Even larger ones</blockquote2><blockquote5>And really huge ones</blockquote5><pre>This line is pre-formatted and <del>not interpolated</del>
|
||||
</pre><pre>This line is also pre-formatted
|
||||
<pre>interrupting cow style.</pre><blockquote>This is a blockquote</blockquote></p><p><h2>Header 2</h2><h3>Header 3 <i>with italics</i></h3><h4>Header 4</h4><h5>Header 5</h5><h6>Header 6</h6><hr></hr><ul><li>Item 1a</li><li>Item 1b</li><ul><li>Item 2</li><ul><li>Item 3</li></ul></ul><li>Item 1c</li></ul><ol><li>Item 1a</li><li>Item 1b</li><ol><li>Item 2</li><ol><li>Item 3</li></ol></ol><li>Item 1c</li></ol><def>I don't really understand what a definition is</def><blockquote>But blockquotes are easy</blockquote><blockquote2>Even larger ones</blockquote2><blockquote5>And really huge ones</blockquote5><pre>This line is pre-formatted and <del>not interpolated</del>
|
||||
This line is also pre-formatted
|
||||
</pre></p></wikidoc>"""
|
||||
|
||||
Reference in New Issue
Block a user