日期:2014-05-20  浏览次数:20875 次

求XML达人帮忙!我有一个860MB左右的XML文件,要怎么分割它?
新手刚刚接触这方面的问题 请教一下大侠们 帮帮忙 这个XML文件太大了 我用notepad打开后就是未响应 现在不是很清楚里面的具体结构 然后向把XML文件先分成几个小文件一个一个打开 看结构 不然太卡了 sax和DOM到底怎么用 网上搜了一个4种方法区别 看了以后还是不太懂 我用的DOM4J解析器 也可以使用sax方法么??好多疑问 求教!!

------解决方案--------------------
我这可以通过 python 把你这个文件 分割成一份一份的可以帮助你查看xml的内容

Python code
#!C:\Python27 python
# -*-  coding:UTF-8 -*-
import os,sys
size = 80*1024*1024
partnum = 0
input = open("c:\\test.zip",'rb')
text = input.read(size)
while text:
    partnum = partnum + 1
    fileoutput = open("c:\\ttt\\test%04d"%partnum+".zip",'wb')
    fileoutput.write(text)
    fileoutput.close()
    text = input.read(size)
input.close()

------解决方案--------------------
Java code

package com.huawei.hdm.util;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/**
 * The StrutsXMLParser class parse struts.xml.
 */
public class StrutsXMLParser {
   Map<String, Object> map = new HashMap<String, Object>();

   class StrutsXMLHandler extends SAXHandler {
      private String cls;
      private boolean isUrl;
      private List<String> fileList = new ArrayList<String>();

      @Override
      public void startDocument() throws SAXException {
      }

      @Override
      public void startElement(String namespaceURI, String localName,
            String qName, Attributes atts) throws SAXException {
         if("include".equals(qName)) {
            fileList.add(atts.getValue("file"));
         }

         if("constant".equals(qName)) {
            map.put(atts.getValue("name"), atts.getValue("value"));
         }

         if("action".equals(qName)) {
            cls = atts.getValue("class");
         }

         if("result".equals(qName) && "error".equals(atts.getValue("name"))) {
            isUrl = true;
         }
      }

      @Override
      public void characters(char[] ch, int start, int length) throws SAXException {
         if(cls != null && isUrl) {
            String url = new String(ch, start, length);

            if(!map.containsKey(cls)) {
               cls = cls.substring(cls.lastIndexOf(".") + 1);
               map.put(cls, url);
            }

            isUrl = false;
            cls = null;
         }
      }

      @Override
      public void endElement(String namespaceURI, String localName, String qName)
            throws SAXException {
      }

      @SuppressWarnings("unchecked")
      @Override
      public void endDocument() throws SAXException {
         if(fileList.size() > 0) {
            map.put("fileList", fileList);
         }
      }
   }

   @SuppressWarnings("unchecked")
   public Map<String, Object> parser(String xml) throws Exception {
      String path = xml.substring(0, xml.lastIndexOf("/") + 1);
      SAXParserFactory spf = SAXParserFactory.newInstance();
      SAXParser sp = spf.newSAXParser();
      XMLReader xr = sp.getXMLReader();
      StrutsXMLHandler handler = new StrutsXMLHandler();
      xr.setContentHandler(handler);
      xr.parse(xml);

      List<String> fileList = (List<String>) map.get("fileList");

      if(fileList != null) {
         map.remove("fileList");
         Iterator it = fileList.iterator();

         while(it.hasNext()) {
            parser(path + it.next());
         }
      }

      return map;
   }

   public static void main(String[] args) {
      try {
         Map<String, Object> map = 
            new StrutsXMLParser().parser("struts.xml");
         Iterator<String> it = map.keySet().iterator();

         while(it.hasNext()) {
            String key = it.next();
            String val = String.valueOf(map.get(key));

            System.err.println(key + " = " + val);
         }
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   }
}