日期:2014-05-20  浏览次数:20752 次

如何利用java程序获取视频的评论
http://v.youku.com/v_playlist/f17252787o1p0.html#replyLocation
有一个优酷的视频,想要获取该视频的评论,但是从源码来分析不到这些评论,评论隐藏到一个frame框架。

------解决方案--------------------
GET /comments/~ajax/vpcommentContent.html?
参数:__ap={"id":"XMzcyNjAwNzM2","sid":354909363,"page":4,"last_modify":1333031704}
__callback=displayComments
__ai=
------解决方案--------------------
Java code

package other;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;

public class 获取视频评论 {
    public static String  escape (String src) {  int i;
    char j;
    StringBuffer tmp = new StringBuffer();
    tmp.ensureCapacity(src.length()*6);
    for (i=0;i<src.length() ;i++ )  
    {   
        j = src.charAt(i);
        if (Character.isDigit(j) || Character.isLowerCase(j) || Character.isUpperCase(j))    
            tmp.append(j);
        else    if (j<256)    { 
            tmp.append( "%" );
            if (j<16)  
                tmp.append( "0" );
            tmp.append( Integer.toString(j,16) );
        }    else    {
            tmp.append( "%u" );
            tmp.append( Integer.toString(j,16) );
        }  
    }  
    return tmp.toString();
    } 
    public static String  unescape (String src) { 
        StringBuffer tmp = new StringBuffer();
        tmp.ensureCapacity(src.length());
        int  lastPos=0,pos=0;
        char ch;
        while (lastPos<src.length())  {   pos = src.indexOf("%",lastPos);
        if (pos == lastPos)    {    if (src.charAt(pos+1)=='u')     {     ch = (char)Integer.parseInt(src.substring(pos+2,pos+6),16);
        tmp.append(ch);
        lastPos = pos+6;
        }    else     {     ch = (char)Integer.parseInt(src.substring(pos+1,pos+3),16);
        tmp.append(ch);
        lastPos = pos+3;
        }    }   else    {    if (pos == -1)     {     tmp.append(src.substring(lastPos));
        lastPos=src.length();
        }    else     {     tmp.append(src.substring(lastPos,pos));
        lastPos=pos;
        }    }  }  return tmp.toString();
    }
    public static void main(String[]args){
        HttpClient client = new HttpClient();
        GetMethod get = new GetMethod("http://v.youku.com/v_playlist/f17252787o1p0.html#replyLocation");
        GetMethod get2 = new GetMethod("http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap=%7B%22id%22%3A%22XMzcyOTQ0MTY4%22%2C%22sid%22%3A355016202%2C%22page%22%3A2%2C%22last_modify%22%3A1333080602%7D&__ai=&__callback=displayComments");
        Pattern pp = Pattern.compile("<p id=\\\"content_.*?>(.*?)<");
        try {
            System.out.println(client.executeMethod(get));
            System.out.println(client.executeMethod(get2));
            String rsult=get2.getResponseBodyAsString();
            rsult=rsult.replaceAll("\\\\\"","\"");
            Matcher mm = pp.matcher(rsult);
            while(mm.find()){
                String yy=mm.group(1).replaceAll("\\\\","%");
                System.out.println(unescape(yy));
            }
        } catch (HttpException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        System.out.println();
    }
}