本文实例讲述了python解析xml文件操作的实现方法。分享给大家供大家参考。具体方法如下:
xml文件内容如下:
sample xml thing ma xiaoju Springs Widgets, Inc. First I think widgets are greate.You should buy lots of them forom Spirngy Widgts, Inc
python代码:
from xml.dom import minidom, Node
import re, textwrap
class SampleScanner:
\"\"\"\"\"\"
def __init__(self, doc):
\"\"\"Constructor\"\"\"
assert(isinstance(doc, minidom.Document))
for child in doc.childNodes:
if child.nodeType == Node.ELEMENT_NODE and \\
child.tagName == \"book\":
self.handle_book(child)
def handle_book(self, node):
for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == \"title\":
print \"Book titile is:\", self.gettext(child.childNodes)
if child.tagName == \"author\":
self.handle_author(child)
if child.tagName == \"chapter\":
self.handle_chapter(child)
def handle_chapter(self, node):
number = node.getAttribute(\"number\")
print \"number:\", number
title_node = node.getElementsByTagName(\"title\")
print \"title:\", self.gettext(title_node)
for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == \"para\":
self.handle_chapter_para(child)
def handle_chapter_para(self, node):
company = \"\"
company = self.gettext(node.getElementsByTagName(\"company\"))
print \"chapter:para:company\", company
def handle_author(self, node):
for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == \"name\":
self.handle_author_name(child)
if child.tagName == \"affiliation\":
print \"affiliation:\", self.gettext(child.childNodes)
def handle_author_name(self, node):
first = \"\"
last = \"\"
for child in node.childNodes:
if child.nodeType != Node.ELEMENT_NODE:
continue
if child.tagName == \"first\":
first = self.gettext(child.childNodes)
if child.tagName == \'last\':
last = self.gettext(child.childNodes)
print \"firstname:%s,lastname:%s\" % (first, last)
def gettext(self, nodelist):
retlist = []
for node in nodelist:
if node.nodeType == Node.TEXT_NODE:
retlist.append(node.wholeText)
elif node.hasChildNodes:
retlist.append(self.gettext(node.childNodes))
return re.sub(\'\\s+\', \" \", \'\'.join(retlist))
if __name__==\"__main__\":
doc = minidom.parse(\"simple.xml\")
sample = SampleScanner(doc)
希望本文所述对大家的Python程序设计有所帮助。