python解析xml模块封装代码_程序人生

python解析xml模块封装代码

admin

2023-08-02 22:29:49

0次

有如下的xml文件：

复制代码代码如下:

1
2

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：

复制代码代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse
class TestHandle(ContentHandler):
def __init__(self, inlist):
self.inlist = inlist

def startElement(self,name,attrs):
print \’name:\’,name, \’attrs:\’,attrs.keys()

def endElement(self,name):
print \’endname\’,name

    def characters(self,chars):
        print \’chars\’,chars
        self.inlist.append(chars)

if __name__ == \’__main__\’:
    lt = []
    parse(\’test.xml\’, TestHandle(lt))
    print lt

结果：
[html] view plaincopy
name: root attrs: []
chars

endname childs
chars

endname root
[u\’\\n\’, u\’\\n\’, u\’1\’, u\’\\n\’, u\’2\’, u\’\\n\’, u\’\\n\’]

方式2，python模块实现获取根节点，按需查找指定节点：

复制代码代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.dom import minidom
xmlstr = \’\’\’\’\'

    /2/photos/square/type.xml
    21301
    auth faild!

\’\’\’
def doxml(xmlstr):
    dom = minidom.parseString(xmlstr)
    print \’Dom:\’
    print dom.toxml()

    root = dom.firstChild
    print \’root:\’
    print root.toxml()

    childs = root.childNodes
    for child in childs:
        print child.toxml()
        if child.nodeType == child.TEXT_NODE:
            pass
        else:
            print \’child node attribute name:\’, child.getAttribute(\’name\’)
            print \’child node name:\’, child.nodeName
            print \’child node len:\’,len(child.childNodes)
            print \’child data:\’,child.childNodes[0].data
            print \’=======================================\’
            print \’more help info to see:\’
            for med in dir(child):
                print help(med)

if __name__ == \’__main__\’:
    doxml(xmlstr)

结果：
[html] view plaincopy
Dom:

    /2/photos/square/type.xml
    21301
    auth faild!

root:

    /2/photos/square/type.xml
    21301
    auth faild!

/2/photos/square/type.xml
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。
====写xml内容的分割线=========

复制代码代码如下:
#!\\urs\\bin\\env python
#encoding: utf-8
from xml.dom import minidom

class xmlwrite:
    def __init__(self, resultfile):
        self.resultfile = resultfile
        self.rootname = \’api\’
        self.__create_xml_dom()

    def __create_xml_dom(self):
        xmlimpl = minidom.getDOMImplementation()
        self.dom = xmlimpl.createDocument(None, self.rootname, None)
        self.root = self.dom.documentElement

    def __get_spec_node(self, xpath):
        patharr = xpath.split(r\’/\’)
        parentnode = self.root
        exist = 1
        for nodename in patharr:
            if nodename.strip() == \’\’:
                continue
            if not exist:
                return None
            spcindex = nodename.find(\'[\’)
            if spcindex > -1:
                index = int(nodename[spcindex+1:-1])
            else:
                index = 0
            count = 0
            childs = parentnode.childNodes
            for child in childs:
                if child.nodeName == nodename[:spcindex]:
                    if count == index:
                        parentnode = child
                        exist = 1
                        break
                    count += 1
                    continue
                else:
                    exist = 0
        return parentnode

    def write_node(self, parent, nodename, value, attribute=None, CDATA=False):
        node = self.dom.createElement(nodename)
        if value:
            if CDATA:
                nodedata = self.dom.createCDATASection(value)
            else:
                nodedata = self.dom.createTextNode(value)
            node.appendChild(nodedata)
            if attribute and isinstance(attribute, dict):
                for key, value in attribute.items():
                    node.setAttribute(key, value)
        try:
            parentnode = self.__get_spec_node(parent)
        except:
            print \’Get parent Node Fail, Use the Root as parent Node\’
            parentnode = self.root
        parentnode.appendChild(node)

    def write_start_time(self, time):
        self.write_node(\’/\’,\’StartTime\’, time)

def write_end_time(self, time):
self.write_node(\’/\’,\’EndTime\’, time)

def write_pass_count(self, count):
self.write_node(\’/\’,\’PassCount\’, count)

def write_fail_count(self, count):
self.write_node(\’/\’,\’FailCount\’, count)

def write_case(self):
self.write_node(\’/\’,\’Case\’, None)

def write_case_no(self, index, value):
self.write_node(\’/Case[%s]/\’ % index,\’No\’, value)

def write_case_url(self, index, value):
self.write_node(\’/Case[%s]/\’ % index,\’URL\’, value)

def write_case_dbdata(self, index, value):
self.write_node(\’/Case[%s]/\’ % index,\’DBData\’, value)

def write_case_apidata(self, index, value):
self.write_node(\’/Case[%s]/\’ % index,\’APIData\’, value)

def write_case_dbsql(self, index, value):
self.write_node(\’/Case[%s]/\’ % index,\’DBSQL\’, value, CDATA=True)

def write_case_apixpath(self, index, value):
self.write_node(\’/Case[%s]/\’ % index,\’APIXPath\’, value)

    def save_xml(self):
        myfile = file(self.resultfile, \’w\’)
        self.dom.writexml(myfile, encoding=\’utf-8\’)
        myfile.close()

if __name__ == \’__main__\’:
      xr = xmlwrite(r\’D:\\test.xml\’)
      xr.write_start_time(\’2223\’)
      xr.write_end_time(\’444\’)
      xr.write_pass_count(\’22\’)
      xr.write_fail_count(\’33\’)
      xr.write_case()
      xr.write_case()
      xr.write_case_no(0, \’0\’)
      xr.write_case_url(0, \’http://www.google.com\’)
      xr.write_case_url(0, \’http://www.google.com\’)
      xr.write_case_dbsql(0, \’select * from \’)
      xr.write_case_dbdata(0, \’dbtata\’)
      xr.write_case_apixpath(0, \’/xpath\’)
      xr.write_case_apidata(0, \’apidata\’)
      xr.write_case_no(1, \’1\’)
      xr.write_case_url(1, \’http://www.baidu.com\’)
      xr.write_case_url(1, \’http://www.baidu.com\’)
      xr.write_case_dbsql(1, \’select 1 from \’)
      xr.write_case_dbdata(1, \’dbtata1\’)
      xr.write_case_apixpath(1, \’/xpath1\’)
      xr.write_case_apidata(1, \’apidata1\’)
      xr.save_xml()

以上封装了minidom，支持通过xpath来写节点，不支持xpath带属性的匹配，但支持带索引的匹配。
比如：/root/child[1], 表示root的第2个child节点。

python 解析xml

上一篇：把大数据数字口语化（python与js）两种实现

下一篇：python实现代码行数统计示例分享

python解析xml模块封装代码

相关内容

热门资讯