Python: fix DOM bugs that were breaking grouping filter

This commit is contained in:
Ian Gulliver
2014-07-02 14:01:19 -07:00
parent 8a9f9acbaa
commit ae2828b2c2
2 changed files with 23 additions and 5 deletions

View File

@@ -9,8 +9,10 @@ class MatchResult(collections.namedtuple('MatchResult', ['context', 'nodes'])):
class TextNode(object): class TextNode(object):
def __init__(self, textContent): def __init__(self, textContent):
self.nodeName = '#text' self.nodeName = '#text'
self.parentNode = None
self.textContent = textContent self.textContent = textContent
self.previousSibling = None self.previousSibling = None
self.nextSibling = None
self.childNodes = [] self.childNodes = []
def cloneNode(self, deep): def cloneNode(self, deep):
@@ -31,29 +33,45 @@ class Element(object):
self.nodeName = nodeName self.nodeName = nodeName
self.parentNode = None self.parentNode = None
self.previousSibling = None self.previousSibling = None
self.nextSibling = None
self.childNodes = [] self.childNodes = []
self.attributes = {} self.attributes = {}
def appendChild(self, child): def appendChild(self, child):
if child.parentNode:
child.parentNode.removeChild(child)
child.parentNode = self child.parentNode = self
self.childNodes.append(child) self.childNodes.append(child)
if len(self.childNodes) == 1: if len(self.childNodes) == 1:
child.previousSibling = None child.previousSibling = None
else: else:
child.previousSibling = self.childNodes[len(self.childNodes) - 2] beforeChild = self.childNodes[len(self.childNodes) - 2]
child.previousSibling = beforeChild
beforeChild.nextSibling = child
def removeChild(self, child): def removeChild(self, child):
self.childNodes.remove(child) self.childNodes.remove(child)
if child.previousSibling:
child.previousSibling.nextSibling = child.nextSibling
if child.nextSibling:
child.nextSibling.previousSibling = child.previousSibling
child.parentNode = None child.parentNode = None
child.previousSibling = None child.previousSibling = None
child.nextSibling = None
def replaceChild(self, newNode, oldNode): def replaceChild(self, newNode, oldNode):
index = self.childNodes.index(oldNode) index = self.childNodes.index(oldNode)
self.childNodes[index] = newNode self.childNodes[index] = newNode
newNode.parentNode = self newNode.parentNode = self
oldNode.parentNode = None
newNode.previousSibling = oldNode.previousSibling newNode.previousSibling = oldNode.previousSibling
newNode.nextSibling = oldNode.nextSibling
if newNode.previousSibling:
newNode.previousSibling.nextSibling = newNode
if newNode.nextSibling:
newNode.nextSibling.previousSibling = newNode
oldNode.parentNode = None
oldNode.previousSibling = None oldNode.previousSibling = None
oldNode.nextSibling = None
def normalize(self): def normalize(self):
lastTextNode = None lastTextNode = None
@@ -354,7 +372,7 @@ def SplitElementAndNest(originalName, newNames):
def ApplyFilter(node, callback): def ApplyFilter(node, callback):
callback(node) callback(node)
for childNode in node.childNodes: for childNode in list(node.childNodes):
ApplyFilter(childNode, callback) ApplyFilter(childNode, callback)

View File

@@ -48,6 +48,6 @@ code looks like <code>this</code>. Fixed width text looks like
<tt>this</tt>. <pre>This sentence is inline pre-formatted, which stops <tt>this</tt>. <pre>This sentence is inline pre-formatted, which stops
'''''this from being bold and italic.'''''</pre> We can also '''''this from being bold and italic.'''''</pre> We can also
stop &lt;u&gt;this from being underlined&lt;/u&gt;, or just try stop &lt;u&gt;this from being underlined&lt;/u&gt;, or just try
&lt;pre&gt;interrupting cow style.&lt;/pre&gt;<blockquote>This is a blockquote</blockquote></p><p><h2>Header 2</h2><h3>Header 3 <i>with italics</i></h3><h4>Header 4</h4><h5>Header 5</h5><h6>Header 6</h6><hr></hr><ul><li>Item 1a</li></ul><ul><li>Item 1b</li></ul><ul><ul><li>Item 2</li></ul></ul><ul><ul><ul><li>Item 3</li></ul></ul></ul><ul><li>Item 1c</li></ul><ol><li>Item 1a</li></ol><ol><li>Item 1b</li></ol><ol><ol><li>Item 2</li></ol></ol><ol><ol><ol><li>Item 3</li></ol></ol></ol><ol><li>Item 1c</li></ol><def>I don't really understand what a definition is</def><blockquote>But blockquotes are easy</blockquote><blockquote2>Even larger ones</blockquote2><blockquote5>And really huge ones</blockquote5><pre>This line is pre-formatted and &lt;del&gt;not interpolated&lt;/del&gt; &lt;pre&gt;interrupting cow style.&lt;/pre&gt;<blockquote>This is a blockquote</blockquote></p><p><h2>Header 2</h2><h3>Header 3 <i>with italics</i></h3><h4>Header 4</h4><h5>Header 5</h5><h6>Header 6</h6><hr></hr><ul><li>Item 1a</li><li>Item 1b</li><ul><li>Item 2</li><ul><li>Item 3</li></ul></ul><li>Item 1c</li></ul><ol><li>Item 1a</li><li>Item 1b</li><ol><li>Item 2</li><ol><li>Item 3</li></ol></ol><li>Item 1c</li></ol><def>I don't really understand what a definition is</def><blockquote>But blockquotes are easy</blockquote><blockquote2>Even larger ones</blockquote2><blockquote5>And really huge ones</blockquote5><pre>This line is pre-formatted and &lt;del&gt;not interpolated&lt;/del&gt;
</pre><pre>This line is also pre-formatted This line is also pre-formatted
</pre></p></wikidoc>""" </pre></p></wikidoc>"""