日期:2014-05-16  浏览次数:20895 次

关于正则表达式“(?=pattern)”型匹配
在LINUX C下使用正则"Windows (?=95|98|NT|2000)"匹配"Windows 95"中的“Windows ”不成功,为什么?  
程序运行时,regcomp返回“REG_BADRPT”,查了下手册,这样描述:
Invalid use of repetition operators such as using '*' as the first character.
也就是说,是“?”号用错了,系统把它识别成repetition operators,但网上标准的正则都是这么写的呀?难道regex.h不支持“(?=pattern)”这种匹配形式?

请各位大大们悉心指导下。

代码如下:
C/C++ code

#include<stdio.h>
#include<string.h>
#include<sys/types.h>
#include<regex.h>
/* 取子串的函数 */
void substr (char *dest,char *str, unsigned start, unsigned end)
{
  unsigned n = end - start;
    printf("\n%u \n",n);
  strncpy(dest, str + start, n);
  dest[n] = 0;
}
int Pattern(char *str,char *pattern,char *result)
{
    int cflags=REG_EXTENDED,z;
    regex_t reg;
    regmatch_t pm[10];
    const size_t nmatch=10;
    z = regcomp (&reg, pattern, cflags);
    printf("%d\n",z);
    if(z!=0) return z;
    z = regexec (&reg, str, nmatch, pm, 0);
    printf("%d\n",z);
    if(z!=0) return z;
    substr(result,str, pm[0].rm_so, pm[0].rm_eo);
    return z;
}

int main(int argc, char **argv)
{
    char result[100];
    Pattern("Windows 95","Windows (?=95|98|NT|2000)",result);
    printf("%s\n",result);
  return 0;
}



------解决方案--------------------
在正则表达式测试器中测试了下你的正则,是对的。
但是在C中就是不对。错误提示为:Invalid preceding regular expression,大概意思是:接在Windows后的这个表达式无效吧,具体什么原因我也不太清楚,供你参考下。

C/C++ code

#include<stdio.h>
#include<string.h>
#include<sys/types.h>
#include<regex.h>

/* 取子串的函数 */
void substr (char *dest,char *str, unsigned start, unsigned end)
{
  unsigned n = end - start;
  printf("[%u, %u)=%u\n", start, end, n);
  strncpy(dest, str + start, n);
  dest[n] = 0;
}

int Pattern(char *str,char *pattern,char *result)
{
    int cflags=REG_EXTENDED, z;
    regex_t reg;
    regmatch_t pm[10];
    const size_t nmatch=10;
    char errmsg[100] = {0};

    z = regcomp (&reg, pattern, cflags);
    printf("1. z=%d\n",z);
    if(z!=0)
    {
        regerror(z, &reg, errmsg, 100);
        printf("1. errmsg=[%s]\n", errmsg);
        return z;
    }
    z = regexec (&reg, str, nmatch, pm, 0);
    printf("2. z=%d\n",z);
    if(z!=0)
    {
        regerror(z, &reg, errmsg, 100);
        printf("2. errmsg=[%s]\n", errmsg);
        return z;
    }
    substr(result, str, pm[0].rm_so, pm[0].rm_eo);

    return z;
}

int main(int argc, char **argv)
{
    char result[100] = {0};
    char pat[] = "Windows (?=95|98|NT|2000)";
    int ret = Pattern("Windows 95", pat, result);
    printf("ret=%d, result=[%s]\n", ret, result);

    return 0;
}