最近接触了C++的正则表达式,感觉还可以,以此来记录下过程
正文
关于C++的正则表达式头文件,可以参考这个网站的教程http://www.cplusplus.com/reference/regex/
C正则表达式头文件regex.h
http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
代码如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 #include <iostream> #include <regex.h> #include <regex> #include <sys/types.h> #include <string.h> #include <string> using namespace std;void cRegex () { regex_t r; int ret=regcomp (&r,"abc(.*?)-(.*?)xyz" ,REG_EXTENDED|REG_ICASE); if (ret==0 ){ regmatch_t pm[10 ]; char src[]="abcQQQQQQ-5555555xyz" ; ret=regexec (&r,src,10 ,pm,0 ); if (ret!=REG_NOMATCH&&ret==REG_NOERROR){ for (int i = 0 ; i <sizeof (pm)/sizeof (regmatch_t ); ++i) { if (pm[i].rm_so==-1 ){ break ; } int matchcount=pm[i].rm_eo-pm[i].rm_so; char matchvalue[20 +1 ]; bzero (matchvalue,21 ); memcpy (matchvalue,src+pm[i].rm_so,matchcount); matchvalue[strlen (matchvalue)]='\0' ; printf ("--->%s\n" ,matchvalue); } }else { char errmsg[250 ]={0 }; regerror (ret,&r,errmsg,250 ); cout<<"Message:" <<errmsg<<endl; } regfree (&r); } }void cppRegex_replace () { string tx="https://www.baidu.com" ; cout<<"old text:" <<tx.c_str ()<<endl; try { regex re ("\\bbaidu\\b" ) ; tx=regex_replace (tx,re,"google" ); cout<<"new text:" <<tx.c_str ()<<endl; }catch (regex_error &e){ if (e.code ()==regex_constants::error_badrepeat){ } cout<<"code:" <<e.code ()<<endl <<"msg:" <<e.what ()<<endl; } }void cppRegex () { string pattern="^\\w{2,20}@\\w{2,}.com$" ; string text="[email protected] " ; regex re (pattern, std::regex_constants::icase| std::regex_constants::extended| regex_constants::ECMAScript) ; match_results<string::const_iterator> res; bool is=regex_match (text,res,re); if (is==true ){ printf ("ok ,this is a true email format :)\n" ); }else { printf ("error email format :(\n" ); } }void cppRegex_matchGroup () { string pattern="(.*?)@(.*?)\\.(.*?)" ; string text="[email protected] " ; regex re (pattern, std::regex_constants::icase| std::regex_constants::extended| regex_constants::ECMAScript) ; match_results<string::const_iterator> res; try { bool is=regex_match (text,res,re); if (is==true ){ printf ("ok :)\n" ); for (int i = 0 ; i < res.size (); i++) { printf ("-->%s \n" ,res.str (i).c_str ()); } }else { printf ("no match :(\n" ); } }catch (regex_error &e){ printf ("exception:( \n \t %d \t %s\n" ,e.code (),e.what ()); } }void cppRegex_matchGroup_iterator () { string pattern="<a href=\"(.*?)\">(.*?)</a>" ; string text= "<a href=\"https://www.baidu.com\">百度</a>" \ "<a href=\"https://www.google.com\">谷歌</a>" \ "<a href=\"http://www.csdn.net\">CSDN</a>" ; regex re (pattern, std::regex_constants::icase| std::regex_constants::extended| regex_constants::ECMAScript) ; sregex_iterator its (text.begin(),text.end(),re) ; sregex_iterator ends; while (its!=ends){ printf ("-->%s \t" ,its->str (1 ).c_str ()); printf ("%s \n" ,its->str (2 ).c_str ()); its++; } }int main () { cRegex (); cppRegex (); cppRegex_matchGroup (); cppRegex_replace (); cppRegex_matchGroup_iterator (); return 0 ; }
编译运行: g++ main.cpp -o regex_test && ./regex_test
输出结果:
ok ,this is a truly email format :) ok :) old text :https ://www.baidu.comnew text :https ://www.google.com
bye~