怎么获取<p id="nv">下<a>标签里面的内容
public List<String> getNewsurl(String s) {
String regex = "<a.*?</a>";
Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
Matcher ma = pa.matcher(s);
List<String> list = new ArrayList<String>();
while (ma.find()) {
list.add(ma.group());
}
return list;
}
public String save(){
NewsAction t = new NewsAction();
String content = t.getHtmlContent("http://www.baidu.com");
HttpServletRequest request = ServletActionContext.getRequest();
String id=request.getParameter("news_id");
n=newsService.findById(Integer.valueOf(id));
// 分页显示时添加的代码
//content = content.replaceAll("(<br>)+?", "\n");//转化换行
//content = content.replaceAll("<p><em>.*?</em></p>", "");// 去图片注释
List<String> a = t.getNewsurl(content);
List<String> news = new ArrayList<String>();
for (String s : a) {
news.add(s.replaceAll("<.*?>", "")); //正则表达式
n.setNews_Title(s);
String str=n.getNews_Title();
Pattern p=Pattern.compile("<a.*?>(.+?)</a>");
Matcher m=p.matcher(str);
while(m.find()){
n.setNews_Title(m.group(1));
n.setNews_Id(n.getNews_Id()+1);
}
System.out.println(n.getNews_Title());
System.out.println(n.getNews_Content());
&n