日期:2014-05-17  浏览次数:20734 次

使用java框架POI将word转换成html格式
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;

public class WordToHtml {

	/**
	 * @param args
	 * @throws IOException 
	 * @throws ParserConfigurationException 
	 * @throws TransformerException 
	 */
	public static void main(String[] args) throws IOException, ParserConfigurationException, TransformerException {
		        final String path = "d:/temp/";
		        final String file = "1.doc";
		        InputStream input = new FileInputStream(path + file);
		        HWPFDocument wordDocument = new HWPFDocument(input);
		        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
		        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
		            public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
		                File file = new File(path + suggestedName);
		            	try {
							OutputStream os = new FileOutputStream(file);
							os.write(content);
							os.close();
						} catch (FileNotFoundException e) {
							// TODO Auto-generated catch block
							e.printStackTrace();
						} catch (IOException e) {
							// TODO Auto-generated catch block
							e.printStackTrace();
						}
		            	return path + suggestedName;
		            }
		        });
		        wordToHtmlConverter.processDocument(wordDocument);
		        Document htmlDocument = wordToHtmlConverter.getDocument();
		        File htmlFile = new File(path + "1.html");
		        OutputStream outStream = new FileOutputStream(htmlFile);
		        DOMSource domSource = new DOMSource(htmlDocument);
		        StreamResult streamResult = new StreamResult(outStream);
		 
		        TransformerFactory tf = TransformerFactory.newInstance();
		        Transformer serializer = tf.newTransformer();
		        serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
		        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
		        serializer.setOutputProperty(OutputKeys.METHOD, "html");
		        serializer.transform(domSource, streamResult);
		        outStream.close();

	}

}