日期:2014-05-16  浏览次数:20397 次

正则截取网页的一部分
在网上看到一题,写了两种方法都不行,故在此贴出,望高手解惑
HTML code

<div class="song-list song-list-hook">
<ul>
<li class="bb-dotimg clearfix song-item-hook {"songItem":{"sid":"245423"}} ">

<div class="song-item">    
<span class="checkbox-item">
<input type="checkbox" class="checkbox-item-hook" />
</span>
<span class="song-title" style="width: 148px;">
<a href="/song/245423" title="左边"><em>左边</em></a>
</span>
<span class="singer" style="width: 95px;">
<a href="artist/82366"><em>杨丞琳</em></a>
</span>
<span class="album-title" style="width: 120px;">
<a href="/album/7273576" title="遇上爱">《遇上爱》</a></span>
............
<span class="fun-icon">                    
<span class="music-icon-hook { 'musicIcon': { id: '245423' , type:'song', iconStr:' play add collect songword download' } }">
<a class="icon-play"
title="播放"
href="#"></a><a class="icon-add"
title="添加"
href="#"></a><a class="icon-collect"
title="收藏"
href="#"></a><a class="icon-songword"
title="歌词"
href="#"></a><a class="icon-download"
title="下载"
href="#"></a></span>                    
</span>
</div>
</li>


原网页内容如上,截取后的内容为
HTML code

<div class="song-item">    
<span class="song-title" style="width: 148px;">
<a href="/song/245423" title="左边"><em>左边</em></a>
</span>
<span class="singer" style="width: 95px;">
<a href="artist/82366"><em>杨丞琳</em></a>
</span>
<span class="album-title" style="width: 120px;">
<a href="/album/7273576" title="遇上爱">《遇上爱》</a>
</span>
</div> 


我首先用的方法是,分两次截取,然后用replace替换
JScript code

function test() {
    var cont = document.getElementById('cont').value;
    var reg = /<div class="song-item">[^<>]*<span[\s]*[^>]*>[\S\s]*[^<>]*<\/a>[\S\s]*[^<>]*<\/span>[\S\s]*[^<>]*<\/div>/gm;
    var g1 = reg.exec(cont);
    alert(g1);
    if(g1 !== null) {
        reg = /<span class="fun-icon">[\S\s]*[^<>]*<\/span>[^<>]*<\/span>/gm;
        var g2 = reg.exec(g1);        
        if(g2 !== null) {
            alert(g2);
            g2 = g2.replace(g2,'');
            alert(g2);
        }
    }
}


不过运行到g2 = g2.replace(g2,'');时不灵了。
后来便直接用正则替换
JScript code

function test() {
    var cont = document.getElementById('cont').value;
    var reg = /<div class="song-item">[^<>]*<span[\s]*[^>]*>[\S\s]*[^<>]*<\/a>[\S\s]*[^<>]*<\/span>[\S\s]*[^<>]*(<span class="fun-icon">[\S\s]*[^<>]*<\/span>[^<>]*<\/span>)[\S\s]*[^<>]*<\/div>/gm;
    var g1 = reg.exec(cont);
    alert(g1);
    al