日期:2014-05-20  浏览次数:20736 次

java读取txt文件
程序从txt中读取数据,txt文件较大,大概400W行。怎么读取最后10W行到一个数组中?要很快就能得到结果。

谁能有一个比较好的方法?最好有代码给提示,io的readline肯定不行。请给别的提示

------解决方案--------------------
刚看了帖子,也是定位读取文件的行,不过你这个文件High大发了
http://topic.csdn.net/u/20100608/14/dd144f29-5df5-41d2-be2e-420a330fd965.html?20636


------解决方案--------------------
写了个分线程读的,但是无法精确只读10W行
看看行不,行的话,你就自己改改吧(很多小细节没写)
把下面两个变量值改成
MAX_LINE_COUNT = 10W
MAX_LINE_COUNT = 400W

finish函数里面随便写点你的东西

Java code

package to.shin.sai;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.RandomAccessFile;

public class Test4ReadLargeFile {
    private final long LAST_LINE_COUNT = 10;
    private final long MAX_LINE_COUNT = 400;
    private final int MAX_THREAD_COUNT = 5;
    private String[][] resArr = null;
    private boolean[] finished = null;
    private String fileEncode = "";
    Object lock = new Object();

    /**
     * @param args
     */
    public static void main(String[] args) {
        String filePath = "c:\\largeFile.txt";
        Test4ReadLargeFile trlf = new Test4ReadLargeFile();
        trlf.read(filePath, "SJIS");
    }

    public void read(String filePath, String charset) {
        long beginTime = System.currentTimeMillis();
        System.out.println("开始时间:" + beginTime);
        this.fileEncode = charset;
        RandomAccessFile raf = null;
        try {
            raf = new RandomAccessFile(filePath, "r");
            long fileSize = raf.length();
            long beginPos = (MAX_LINE_COUNT - LAST_LINE_COUNT) * fileSize / MAX_LINE_COUNT;
            long length = fileSize - beginPos;
            long perLength = length / MAX_THREAD_COUNT;
            resArr = new String[MAX_THREAD_COUNT][];
            finished = new boolean[MAX_THREAD_COUNT];

            for (int i = 0; i < MAX_THREAD_COUNT; i++) {
                new ReadFileThread(beginPos + i * perLength, Math.min(beginPos + (i + 1)
                        * perLength, fileSize - 1), filePath, i).start();
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (null != raf)
                try {
                    raf.close();
                } catch (IOException ioe) {
                }
        }

        //等待结束
        synchronized (lock) {

            try {
                lock.wait();
            } catch (InterruptedException e) {
            }
        }

        long endTime = System.currentTimeMillis();
        System.out.println("结束时间:" + endTime);
        System.out.println("耗时:" + (endTime - beginTime) + "ms");
    }

    class ReadFileThread extends Thread {
        // the begin position
        private long beginPos = 0L;
        // the end position
        private long endPos = 0L;
        // the current position
        private long curPos = 0L;
        // the file
        private RandomAccessFile raf = null;
        private int resIndex = 0;

        public ReadFileThread(long beginPos, long endPos, String filePath, int resIndex)
                throws Exception {
            super("ReadFileThread: " + resIndex);
            if (endPos - beginPos > Integer.MAX_VALUE)
                throw new Exception("It's too large size to read for this job");
            this.beginPos = beginPos;
            this.endPos = endPos;
            this.resIndex = resIndex;
            raf = new RandomAccessFile(filePath, "r");
            raf.seek(beginPos);
        }

        private boolean checkRun() {
            try {
                // 起点和终点在同一行的话,不需要读
                raf.seek(beginPos);
                raf.readLine();
                long beginLine = raf.getFilePointer();

                raf.seek(endPos);
                raf.readLine();
                long endLine = raf.getFilePointer();

                raf.seek(beginPos);

                if (beginLine == endLine)
                    return false;
            } catch (Exception e) {
                e.printStackTrace();
            }

            return true;
        }

        @Override
        public void run() {
            if (!checkRun())
                return;
            try {
                // skip the first line
                raf.readLine();
                curPos = raf.getFilePointer();

                byte[] content = new byte[(int) (endPos - curPos + 1)];
                int offset = 0;
                while (curPos + offset <= endPos) {
                    offset += raf.read(content, offset, (int) (content.length - offset));
                }

                byte[] res = content;
                // 读取最后一行
                byte[] lastBytes = new byte[1024];
                raf.read(lastBytes);
                int returnKeyIndex = 0;
                // 假设最后一行最大不超过1K,如果考虑全,还要写完整(我懒)
                for (int i = 0; i < lastBytes.length; i++) {
                    if (lastBytes[i] == 10 || lastBytes[i] == 13) {
                        returnKeyIndex = i;
                        break;
                    }
                }

                res = new byte[offset + returnKeyIndex];
                System.arraycopy(content, 0, res, 0, offset);
                System.arraycopy(lastBytes, 0, res, offset, returnKeyIndex);
                String resStr = new String(res, fileEncode);
                resStr = resStr.replaceAll("\r", "");
                resArr[resIndex] = resStr.split("\n");
                noticeFinished(resIndex);
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (null != raf)
                    try {
                        raf.close();
                    } catch (IOException ioe) {
                    }
            }

            super.run();
        }
    }

    private void noticeFinished(int index) {
        finished[index] = true;
        System.out.println("Thread " + index + " is over.");
        for (boolean f : finished)
            if (!f)
                return;

        synchronized (lock) {
            lock.notify();
        }
        finish();
        System.out.println("all data is readed");
    }

    private void finish() {
        PrintWriter pw = null;
        try {
            pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("c:\\result.txt"),
                    fileEncode));
            for (int i = 0; i < resArr.length; i++)
                if (null != resArr[i])
                    for (int j = 0; j < resArr[i].length; j++) {
                        pw.println(resArr[i][j]);
                    }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (null != pw)
                try {
                    pw.close();
                } catch (Exception ioe) {
                }
        }
    }
}