日期:2014-05-19  浏览次数:20741 次

C#代码转为java代码
哪位高人能帮我把下面这段解析网页资源的C#代码转化为java代码。java解析网页可以用htmlparser包。



void OntologyElementRecognize()
  {
  double maxmark = 0;
  foreach (HtmlElement i in pageBroswer.Document.All)
  {
  if (i.Name == "区块")
  {
  CountSentences(i.InnerText);
  if (SentenceCount == 0)
  {
  try
  {
  i.InnerHtml = null;
  i.OuterHtml = null;
  }
  catch
  {
  }
  }
  }
  }
  foreach (HtmlElement i in pageBroswer.Document.All)
  {
  IHTMLElement el = (IHTMLElement)i.DomElement;
  if (i.Name == "区块")
  {
  int num = 0, count = 0, naviCount = 0, newsCount = 0, DivCount = 0, paraCount = 0;
  double mark = 0;
  if (i.InnerText != null)
  {
  CountSentences(i.InnerText);
  mark = SentenceCount * SentenceCount;
  Rectangle r = i.ClientRectangle; ##ClientRectangle应该是跟该对象面积有关的参数
  mark = mark * r.Width * r.Height;

  }
  foreach (HtmlElement j in i.All)
  {
  if (j.Name == "超链接")
  {
  naviCount++;
  }
  }
  mark = mark / (naviCount+1);
  if (mark > maxmark)
  {
  maxmark = mark;
  text = i.InnerText;
  }
  }
  }
  } 







  void HtmlElementRecognize()
  {
  foreach (HtmlElement i in pageBroswer.Document.All)
  {
  if (i.TagName.ToUpper() == "A")
  {
  i.Name = "超链接";
  }
  if (i.TagName.ToUpper() == "DIV" || i.TagName.ToUpper() == "TABLE" || i.TagName.ToUpper() == "TD")
  {
  i.Name = "区块";
  }
  if (i.TagName.ToUpper() == "H1")
  {
  i.Name = "标题";
  }
  if (i.TagName.ToUpper() == "P" && i.Children.Count == 0)
  {
  i.Name = "段落";
  }
  }
  } 






 
##计算句子的数量:  
 void CountSentences(string txt)
  {
  ParagraphCount = 0;
  SentenceCount = 0;
  CurrentSentenceCount = 0;
  if (txt == null) return;