日期:2014-05-20  浏览次数:20894 次

如何提取文本文档中的数据
有一个文本文档,内容如下:
# Well known service port numbers -*- mode: fundamental; -*-
# From the Nmap security scanner ( http://www.insecure.org/nmap/ )
#
# $Id: nmap-services 6563 2007-12-19 01:26:32Z doug $
# For a HUGE list of services (including these and others), 
# see http://www.graffiti.com/services
tcpmux 1/tcp # TCP Port Service Multiplexer [rfc-1078]
tcpmux 1/udp # TCP Port Service Multiplexer
compressnet 2/tcp # Management Utility
compressnet 2/udp # Management Utility
compressnet 3/tcp # Compression Process
compressnet 3/udp # Compression Process
rje 5/tcp # Remote Job Entry
echo 7/tcp # 
echo 7/tcp # 


我想把一些内容提取出来,输出格式如下:
服务:tcpmux  
端口:1
注释:TCP Port Service Multiplexer [rfc-1078]  

服务:compressnet
端口:2
注释:Management Utility

服务:compressnet
端口:3
注释:Compression Process

服务:rje  
端口:5
注释:Remote Job Entry

服务:echo  
端口:7
注释:

请问如何编程实现,希望能有源代码看一下,谢谢

------解决方案--------------------

------解决方案--------------------
Java code

public static void main(String[] args) throws Exception {
            FileReader read = new FileReader("f:/3.txt");
            BufferedReader br = new BufferedReader(read);
            String readline;
            String temp=null;
            while((readline = br.readLine())!=null){
            if(!row.startsWith("#"))
            {              
                String[] str=readline.split("#",2);
                String[] str1=str[0].split(" ",2);
                String[] str2=str1[1].split("/");
                if(str2[1].trim().equals("tcp")&&!row.equals(temp))
                {    
                    System.out.println("服务:"+str1[0]);
                    System.out.println("端口:"+str2[0].trim());
                    System.out.println("注释:"+str[1].trim());
                    System.out.println();
                }               
            }
            temp=readline;
            }
    }

------解决方案--------------------
根据8楼的改了一下,这个结果应该是楼主想要的:

Java code

import java.io.*;

public class GetFile1 {
public static void main(String[] args) throws Exception {
            FileReader read = new FileReader("f:/source.txt");
            BufferedReader br = new BufferedReader(read);
            String readline;
            String temp=null;
            while((readline = br.readLine())!=null){
            if(!readline.startsWith("#"))
            {              
                String[] str=readline.split("#",2);     
                String[] str1=str[0].split(" ",2); //tcpmux
                String[] str2=str1[1].split("/"); //1  
                    System.out.println("服务:"+str1[0]);
                    System.out.println("端口:"+str2[0].trim());
                    System.out.println("注释:"+str[1].trim());
                    System.out.println();             
            }
            temp=readline;
            }
    }
}

------解决方案--------------------
学习正则表达式的实现

Java code

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 如何提取文本文档中的数据 
 * http://topic.csdn.net/u/20081229/15/df3b854c-ab74-405c-8830-bc6a5c016b23.html
 */
public class TestTcp {
    public static void doFile(String fileName){
        Pattern p=Pattern.compile("[^#]\\w*[ \t]*\\d+/tcp[ \t]*# [a-zA-Z0-9\\-\\[\\]]*");
        Pattern p2=Pattern.compile("\\w*");
        Pattern p3=Pattern.compile("\\d+/tcp");
        Pattern p4=Pattern.compile("# [a-zA-Z0-9\\-\\[\\]]*");
//        Matcher m=p.matcher(test);
        BufferedReader reader=null;
        try {
            reader=new BufferedReader(new FileReader(fileName));
            String line=null;
            while((line=reader.readLine())!=null){
                Matcher m=p.matcher(line);
                if (m.find()){
                    m=p2.matcher(line);
                    if (m.find()){
                        System.out.println("服务:"+m.group());
                    }
                    m=p3.matcher(line);
                    if (m.find()){
                        String temp=m.group();
                        System.out.println("端口:"+temp.substring(0,temp.lastIndexOf('/')));
                    }
                    m=p4.matcher(line);
                    if (m.find()){
                        String temp=m.group();
                        System.out.println("注释:"+temp.substring(2));
                    }
                }
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally{
            if (reader!=null)
                try {
                    reader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
        }
    }
    public static void main(String[] args) {
        doFile("d:/temp/testreg.txt");
    }

}