码迷,mamicode.com
首页 > 编程语言 > 详细

Python解析生成XML-ElementTree VS minidom

时间:2015-05-05 21:40:02      阅读:261      评论:0      收藏:0      [点我收藏+]

标签:

OS:Windows 7

关键字:Python3.4,XML,ElementTree,minidom

 

本文介绍用Python解析生成以下XML:

<Persons>
    <Person>
        <Name>LDL</Name>
        <Description Language=‘English‘><![CDATA[cdata text]]></Description>
    </Person>
    <Person>
        <Name>China</Name>
        <Description Language=‘English‘><![CDATA[cdata text]]></Description>
    </Person>
</Persons>

1.创建一个xml文件名为src.xml,内容如上,放到c:\temp

2.使用ElementTree读取src.xml,并创建一个内容相同的xml名为target-tree.xml。

ElementTreeSample.py如下:

# -*- coding: utf-8 -*-
"""
Sample of xml.etree.ElementTree

@author: ldlchina
"""

import os
import sys
import logging
import traceback
import xml.etree.ElementTree as ET
import time

def copy_node(src_node, target_node):
    # Copy attr
    for key in src_node.keys():
        target_node.set(key, src_node.get(key))
    
    if len(list(src_node)) > 0:
        for child in src_node:
            target_child = ET.Element(child.tag)
            target_node.append(target_child)
            copy_node(child, target_child)
    else:
        target_node.text = src_node.text
    
def read_write_xml(src, target):
    tree = ET.parse(src)
    root = tree.getroot()
    
    target_root = ET.Element(root.tag)
    start_time = time.time() * 1000
    copy_node(root, target_root)
    end_time = time.time() * 1000
    print(copy_node: + str(end_time - start_time))
    
    target_tree = ET.ElementTree(target_root)
    target_tree.write(target)
    logging.info(target)

def main():
    try:
        current_file = os.path.realpath(__file__)
        
        # Configure logger
        log_file = current_file.replace(.py, .log)
        logging.basicConfig(filename = log_file, filemode = w, level = logging.INFO)
        
        # Create console handler
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        
        logger = logging.getLogger(‘‘)
        logger.addHandler(ch)
        
        #src = sys.argv[1]
        #target = sys.argv[2]
        
        # For debugging
        src = C:/temp/src.xml
        target = C:/temp/target-tree.xml
        
        # Generate results
        start_time = time.time() * 1000
        read_write_xml(src, target)
        end_time = time.time() * 1000
        print(read_write_xml: + str(end_time - start_time))
    except:
        logging.exception(‘‘.format(traceback.format_exc()))
    
    input(Press any key to exit...)

main()

 3.使用minidom读取src.xml,并创建一个内容相同的xml名为target-dom.xml。

MinidomSample.py如下:

# -*- coding: utf-8 -*-
"""
Sample of xml.dom.minidom

@author: ldlchina
"""

import os
import sys
import logging
import traceback
import xml.dom.minidom as MD
import time

def get_text(n):
    nodelist = n.childNodes
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE or node.nodeType == node.CDATA_SECTION_NODE:
            rc = rc + node.data
    return rc

def copy_node(target_doc, src_node, target_node):
    if not isinstance(src_node, MD.Document) and src_node.hasAttributes():
        for item in src_node.attributes.items():
            target_node.setAttribute(item[0], item[1])
    for node in src_node.childNodes:
        if node.nodeType == node.TEXT_NODE:
            target_child = target_doc.createTextNode(node.nodeValue)
            target_node.appendChild(target_child)
        elif node.nodeType == node.CDATA_SECTION_NODE:
            target_child = target_doc.createCDATASection(node.nodeValue)
            target_node.appendChild(target_child)
        elif node.nodeType == node.ELEMENT_NODE:
            target_child = target_doc.createElement(node.nodeName)
            target_node.appendChild(target_child)
            copy_node(target_doc, node, target_child)
    
def read_write_xml(src, target):
    doc = MD.parse(src)
    target_doc = MD.Document()

    start_time = time.time() * 1000
    copy_node(target_doc, doc, target_doc)
    end_time = time.time() * 1000
    print(copy_node:  + str(end_time - start_time))
    
    # Write to file
    f = open(target, w)
    f.write(target_doc.documentElement.toxml())
    f.close()
    logging.info(target)

def main():
    try:
        current_file = os.path.realpath(__file__)
        
        # Configure logger
        log_file = current_file.replace(.py, .log)
        logging.basicConfig(filename = log_file, filemode = w, level = logging.INFO)
        
        # Create console handler
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        
        logger = logging.getLogger(‘‘)
        logger.addHandler(ch)
        
        #src = sys.argv[1]
        #target = sys.argv[2]
        
        # For debugging
        src = C:/temp/src.xml
        target = C:/temp/target-dom.xml
        
        # Generate results
        start_time = time.time() * 1000
        read_write_xml(src, target)
        end_time = time.time() * 1000
        print(read_write_xml:  + str(end_time - start_time))
    except:
        logging.exception(‘‘.format(traceback.format_exc()))
    
    input(Press any key to exit...)

main()

4.运行ElementTreeSample.py,得到XML如下:

<Persons><Person><Name>LDL</Name><Description Language="English">cdata text</Description></Person><Person><Name>China</Name><Description Language="Chinese">cdata text</Description></Person></Persons>

5.运行MinidomSample.py,得到XML如下:

<Persons>
    <Person>
        <Name>LDL</Name>
        <Description Language="English"><![CDATA[cdata text]]></Description>
    </Person>
    <Person>
        <Name>China</Name>
        <Description Language="Chinese"><![CDATA[cdata text]]></Description>
    </Person>
</Persons>

 

ElementTree VS minidom:

1.ElementTree执行速度会比minidom快一些。

2.ElemenTree不能分析XML的转行和缩进。minidom可以。

3.ElemenTree不支持CDATA,minidom可以。

Python解析生成XML-ElementTree VS minidom

标签:

原文地址:http://www.cnblogs.com/ldlchina/p/4469026.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!