C/C++正则表达式regex - JoXrays's Blog

最近接触了C++的正则表达式,感觉还可以,以此来记录下过程

正文

关于C++的正则表达式头文件,可以参考这个网站的教程http://www.cplusplus.com/reference/regex/

C正则表达式头文件regex.h
http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html

代码如下

#include <iostream>
#include <regex.h>
#include <regex>
#include <sys/types.h>
#include <string.h>
#include <string>
using namespace std;
//c语言形式的正则表达式
void cRegex(){
    regex_t r;
    //成功返回0(REG_NOERROR)
    int ret=regcomp(&r,"abc(.*?)-(.*?)xyz",REG_EXTENDED|REG_ICASE);
    if(ret==0){
        regmatch_t pm[10];
        char src[]="abcQQQQQQ-5555555xyz";
        ret=regexec(&r,src,10,pm,0);
        if(ret!=REG_NOMATCH&&ret==REG_NOERROR){
            for (int i = 0; i <sizeof(pm)/sizeof(regmatch_t); ++i) {
                if(pm[i].rm_so==-1){
                    break;
                }
                //rm_so:匹配子字符串的开始
                //rm_eo:匹配子字符串的结束
                //rm_eo-rm_so:表示匹配的字符串的长度
                //可参考:http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
                //cout<<pm[i].rm_so<<"\t"<<pm[i].rm_eo<<endl;
                int matchcount=pm[i].rm_eo-pm[i].rm_so;
                char matchvalue[20+1];
                bzero(matchvalue,21);
                //从匹配到的子字符串索引拷贝内容
                memcpy(matchvalue,src+pm[i].rm_so,matchcount);
                matchvalue[strlen(matchvalue)]='\0';
                printf("--->%s\n",matchvalue);
            }
        }else{
            char errmsg[250]={0};
            regerror(ret,&r,errmsg,250);
            cout<<"Message:"<<errmsg<<endl;
        }
        //释放内存
        regfree(&r);
    }
}
//c++语言正则表达式替换
void cppRegex_replace(){
    string tx="https://www.baidu.com";
    cout<<"old text:"<<tx.c_str()<<endl;
    //替换
    try {
        regex re("\\bbaidu\\b");
        tx=regex_replace(tx,re,"google");
        cout<<"new text:"<<tx.c_str()<<endl;
    }catch (regex_error &e){
        if(e.code()==regex_constants::error_badrepeat){
            //do something...
        }
        cout<<"code:"<<e.code()<<endl
            <<"msg:"<<e.what()<<endl;
    }
}
//c++语言形式的正则表达式
void cppRegex(){
    /**
     *检测是否为正确的邮箱格式*/
    string pattern="^\\w{2,20}@\\w{2,}.com$";
    string text="[email protected]";
    regex re(pattern,
             std::regex_constants::icase|
             std::regex_constants::extended|
             regex_constants::ECMAScript);
    match_results<string::const_iterator> res;
    //smatch res;
    bool is=regex_match(text,res,re);
    //is=regex_search(text,res,re);
    if(is==true){
        printf("ok ,this is a true email format :)\n");
    }else{
        printf("error email format :(\n");
    }
}
//c++语言形式的正则表达式2
void cppRegex_matchGroup(){
    /**
     *检测是否为正确的邮箱格式*/
    string pattern="(.*?)@(.*?)\\.(.*?)";
    string text="[email protected]";
    regex re(pattern,
             std::regex_constants::icase|//忽略大小写
             std::regex_constants::extended|
             regex_constants::ECMAScript);
    match_results<string::const_iterator> res;
    //smatch res;
    try {
        //bool is=regex_search(text,res,re);
        bool is=regex_match(text,res,re);
        if(is==true){
            printf("ok :)\n");
            for (int i = 0; i < res.size(); i++) {
                printf("-->%s \n",res.str(i).c_str());
            }
        }else{
            printf("no match :(\n");
        }
    }catch (regex_error &e){
        printf("exception:( \n \t %d \t %s\n",e.code(),e.what());
    }
}
//c++语言形式的正则表达式3
void cppRegex_matchGroup_iterator(){
    string pattern="<a href=\"(.*?)\">(.*?)</a>";
    string text=
            "<a href=\"https://www.baidu.com\">百度</a>"\
            "<a href=\"https://www.google.com\">谷歌</a>"\
            "<a href=\"http://www.csdn.net\">CSDN</a>";
    regex re(pattern,
             std::regex_constants::icase|
             std::regex_constants::extended|
             regex_constants::ECMAScript);
    //regex_iterator 匹配所有子串
    sregex_iterator its(text.begin(),text.end(),re);
    sregex_iterator ends;
    while (its!=ends){
        //match_results<T>
        //printf("%s \n",its->str(0).c_str());
        printf("-->%s \t",its->str(1).c_str());
        printf("%s \n",its->str(2).c_str());
        its++;
    }
}
int main()
{
    cRegex();
    cppRegex();
    cppRegex_matchGroup();
    cppRegex_replace();
    cppRegex_matchGroup_iterator();
    return 0;
}

编译运行: g++ main.cpp -o regex_test && ./regex_test

输出结果:

--->abcQQQQQQ-5555555xyz
--->QQQQQQ
--->5555555
ok ,this is a truly email format :)
ok :)
-->[email protected] 
-->josephxrays 
-->163 
-->com 
old text:https://www.baidu.com
new text:https://www.google.com
-->https://www.baidu.com 	百度 
-->https://www.google.com 	谷歌 
-->http://www.csdn.net 	CSDN

bye~

Programming

C++ C

本博客所有文章除特别声明外，均采用 CC BY-SA 4.0 协议，转载请注明出处！

安装Linux系统到U盘上一篇

Linux lshw和dmidecode查看硬件信息下一篇