有如下的xml文件:

复制代码 代码如下:
 
 
 
1 
2 
 

下面介绍python解析xml文件的几种方法,使用python模块实现。

方式1,python模块实现自动遍历所有节点:

复制代码 代码如下:
#!/usr/bin/env python 
# -*- coding: utf-8 -*- 
from xml.sax.handler import ContentHandler 
from xml.sax import parse
class TestHandle(ContentHandler): 
    def __init__(self, inlist): 
        self.inlist = inlist 

    def startElement(self,name,attrs): 
        print \’name:\’,name, \’attrs:\’,attrs.keys() 

    def endElement(self,name): 
        print \’endname\’,name 

    def characters(self,chars): 
        print \’chars\’,chars 
        self.inlist.append(chars) 

             
if __name__ == \’__main__\’: 
    lt = [] 
    parse(\’test.xml\’, TestHandle(lt)) 
    print lt

结果:
[html] view plaincopy
name: root attrs: [] 
chars  

name: childs attrs: [] 
chars  

name: child attrs: [u\’name\’] 
chars 1 
endname child 
chars  

name: child attrs: [u\’value\’] 
chars 2 
endname child 
chars  

endname childs 
chars  

endname root 
[u\’\\n\’, u\’\\n\’, u\’1\’, u\’\\n\’, u\’2\’, u\’\\n\’, u\’\\n\’]

方式2,python模块实现获取根节点,按需查找指定节点:

复制代码 代码如下:
#!/usr/bin/env python   
# -*- coding: utf-8 -*-   
from xml.dom import minidom   
xmlstr = \’\’\’\’\'

    /2/photos/square/type.xml
    21301
    auth faild!

\’\’\’ 
def doxml(xmlstr): 
    dom = minidom.parseString(xmlstr)     
    print \’Dom:\’     
    print dom.toxml()   

    root = dom.firstChild     
    print \’root:\’     
    print root.toxml()   

    childs = root.childNodes   
    for child in childs: 
        print child.toxml() 
        if child.nodeType == child.TEXT_NODE: 
            pass 
        else: 
            print \’child node attribute name:\’, child.getAttribute(\’name\’) 
            print \’child node name:\’, child.nodeName 
            print \’child node len:\’,len(child.childNodes) 
            print \’child data:\’,child.childNodes[0].data 
            print \’=======================================\’ 
            print \’more help info to see:\’ 
            for med in dir(child): 
                print help(med)     

               
if __name__ == \’__main__\’:   
    doxml(xmlstr)

结果:
[html] view plaincopy
Dom: 
 
    /2/photos/square/type.xml 
    21301 
    auth faild! 
 
root: 
 
    /2/photos/square/type.xml 
    21301 
    auth faild! 
 

/2/photos/square/type.xml 
child node attribute name: first 
child node name: request 
child node len: 1 
child data: /2/photos/square/type.xml 
======================================= 
more help info to see: 
两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。

本节是 python解析xml模块封装代码 的第二部分。
====写xml内容的分割线=========

复制代码 代码如下:
#!\\urs\\bin\\env python 
#encoding: utf-8 
from xml.dom import minidom 

class xmlwrite: 
    def __init__(self, resultfile): 
        self.resultfile = resultfile 
        self.rootname = \’api\’ 
        self.__create_xml_dom() 

    def __create_xml_dom(self): 
        xmlimpl = minidom.getDOMImplementation() 
        self.dom = xmlimpl.createDocument(None, self.rootname, None) 
        self.root = self.dom.documentElement 

    def __get_spec_node(self, xpath): 
        patharr = xpath.split(r\’/\’) 
        parentnode = self.root 
        exist = 1 
        for nodename in patharr: 
            if nodename.strip() == \’\’: 
                continue 
            if not exist: 
                return None 
            spcindex = nodename.find(\'[\’) 
            if spcindex > -1: 
                index = int(nodename[spcindex+1:-1]) 
            else: 
                index = 0 
            count = 0 
            childs = parentnode.childNodes 
            for child in childs: 
                if child.nodeName == nodename[:spcindex]: 
                    if count == index: 
                        parentnode = child 
                        exist = 1 
                        break 
                    count += 1 
                    continue 
                else: 
                    exist = 0 
        return parentnode 

         
    def write_node(self, parent, nodename, value, attribute=None, CDATA=False): 
        node = self.dom.createElement(nodename) 
        if value: 
            if CDATA: 
                nodedata = self.dom.createCDATASection(value) 
            else: 
                nodedata = self.dom.createTextNode(value) 
            node.appendChild(nodedata) 
            if attribute and isinstance(attribute, dict): 
                for key, value in attribute.items(): 
                    node.setAttribute(key, value)    
        try: 
            parentnode = self.__get_spec_node(parent) 
        except: 
            print \’Get parent Node Fail, Use the Root as parent Node\’ 
            parentnode = self.root 
        parentnode.appendChild(node) 

     
    def write_start_time(self, time): 
        self.write_node(\’/\’,\’StartTime\’, time) 

    def write_end_time(self, time): 
        self.write_node(\’/\’,\’EndTime\’, time)     

    def write_pass_count(self, count): 
        self.write_node(\’/\’,\’PassCount\’, count)    

    def write_fail_count(self, count): 
        self.write_node(\’/\’,\’FailCount\’, count)    

    def write_case(self): 
        self.write_node(\’/\’,\’Case\’, None)    

    def write_case_no(self, index, value): 
        self.write_node(\’/Case[%s]/\’ % index,\’No\’, value) 

    def write_case_url(self, index, value): 
        self.write_node(\’/Case[%s]/\’ % index,\’URL\’, value) 

    def write_case_dbdata(self, index, value): 
        self.write_node(\’/Case[%s]/\’ % index,\’DBData\’, value) 

    def write_case_apidata(self, index, value): 
        self.write_node(\’/Case[%s]/\’ % index,\’APIData\’, value) 

    def write_case_dbsql(self, index, value): 
        self.write_node(\’/Case[%s]/\’ % index,\’DBSQL\’, value, CDATA=True) 

    def write_case_apixpath(self, index, value): 
        self.write_node(\’/Case[%s]/\’ % index,\’APIXPath\’, value)        

    def save_xml(self): 
        myfile = file(self.resultfile, \’w\’) 
        self.dom.writexml(myfile, encoding=\’utf-8\’) 
        myfile.close() 

if __name__ == \’__main__\’: 
      xr = xmlwrite(r\’D:\\test.xml\’) 
      xr.write_start_time(\’2223\’) 
      xr.write_end_time(\’444\’)       
      xr.write_pass_count(\’22\’) 
      xr.write_fail_count(\’33\’)   
      xr.write_case() 
      xr.write_case() 
      xr.write_case_no(0, \’0\’) 
      xr.write_case_url(0, \’http://www.google.com\’)    
      xr.write_case_url(0, \’http://www.google.com\’)    
      xr.write_case_dbsql(0, \’select * from \’) 
      xr.write_case_dbdata(0, \’dbtata\’) 
      xr.write_case_apixpath(0, \’/xpath\’) 
      xr.write_case_apidata(0, \’apidata\’) 
      xr.write_case_no(1, \’1\’)        
      xr.write_case_url(1, \’http://www.baidu.com\’)    
      xr.write_case_url(1, \’http://www.baidu.com\’)    
      xr.write_case_dbsql(1, \’select 1 from \’) 
      xr.write_case_dbdata(1, \’dbtata1\’) 
      xr.write_case_apixpath(1, \’/xpath1\’) 
      xr.write_case_apidata(1, \’apidata1\’) 
      xr.save_xml()

以上封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。
比如:/root/child[1], 表示root的第2个child节点。