C/C++正则表达式regex

最近接触了C++的正则表达式,感觉还可以,以此来记录下过程

正文

关于C++的正则表达式头文件,可以参考这个网站的教程http://www.cplusplus.com/reference/regex/

C正则表达式头文件regex.h
http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include <iostream>
#include <regex.h>
#include <regex>
#include <sys/types.h>
#include <string.h>
#include <string>
using namespace std;
//c语言形式的正则表达式
void cRegex(){
regex_t r;
//成功返回0(REG_NOERROR)
int ret=regcomp(&r,"abc(.*?)-(.*?)xyz",REG_EXTENDED|REG_ICASE);
if(ret==0){
regmatch_t pm[10];
char src[]="abcQQQQQQ-5555555xyz";
ret=regexec(&r,src,10,pm,0);
if(ret!=REG_NOMATCH&&ret==REG_NOERROR){
for (int i = 0; i <sizeof(pm)/sizeof(regmatch_t); ++i) {
if(pm[i].rm_so==-1){
break;
}
//rm_so:匹配子字符串的开始
//rm_eo:匹配子字符串的结束
//rm_eo-rm_so:表示匹配的字符串的长度
//可参考:http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
//cout<<pm[i].rm_so<<"\t"<<pm[i].rm_eo<<endl;
int matchcount=pm[i].rm_eo-pm[i].rm_so;
char matchvalue[20+1];
bzero(matchvalue,21);
//从匹配到的子字符串索引拷贝内容
memcpy(matchvalue,src+pm[i].rm_so,matchcount);
matchvalue[strlen(matchvalue)]='\0';
printf("--->%s\n",matchvalue);
}
}else{
char errmsg[250]={0};
regerror(ret,&r,errmsg,250);
cout<<"Message:"<<errmsg<<endl;
}
//释放内存
regfree(&r);
}
}
//c++语言正则表达式替换
void cppRegex_replace(){
string tx="https://www.baidu.com";
cout<<"old text:"<<tx.c_str()<<endl;
//替换
try {
regex re("\\bbaidu\\b");
tx=regex_replace(tx,re,"google");
cout<<"new text:"<<tx.c_str()<<endl;
}catch (regex_error &e){
if(e.code()==regex_constants::error_badrepeat){
//do something...
}
cout<<"code:"<<e.code()<<endl
<<"msg:"<<e.what()<<endl;
}
}
//c++语言形式的正则表达式
void cppRegex(){
/**
*检测是否为正确的邮箱格式*/
string pattern="^\\w{2,20}@\\w{2,}.com$";
string text="[email protected]";
regex re(pattern,
std::regex_constants::icase|
std::regex_constants::extended|
regex_constants::ECMAScript);
match_results<string::const_iterator> res;
//smatch res;
bool is=regex_match(text,res,re);
//is=regex_search(text,res,re);
if(is==true){
printf("ok ,this is a true email format :)\n");
}else{
printf("error email format :(\n");
}
}
//c++语言形式的正则表达式2
void cppRegex_matchGroup(){
/**
*检测是否为正确的邮箱格式*/
string pattern="(.*?)@(.*?)\\.(.*?)";
string text="[email protected]";
regex re(pattern,
std::regex_constants::icase|//忽略大小写
std::regex_constants::extended|
regex_constants::ECMAScript);
match_results<string::const_iterator> res;
//smatch res;
try {
//bool is=regex_search(text,res,re);
bool is=regex_match(text,res,re);
if(is==true){
printf("ok :)\n");
for (int i = 0; i < res.size(); i++) {
printf("-->%s \n",res.str(i).c_str());
}
}else{
printf("no match :(\n");
}
}catch (regex_error &e){
printf("exception:( \n \t %d \t %s\n",e.code(),e.what());
}
}
//c++语言形式的正则表达式3
void cppRegex_matchGroup_iterator(){
string pattern="<a href=\"(.*?)\">(.*?)</a>";
string text=
"<a href=\"https://www.baidu.com\">百度</a>"\
"<a href=\"https://www.google.com\">谷歌</a>"\
"<a href=\"http://www.csdn.net\">CSDN</a>";
regex re(pattern,
std::regex_constants::icase|
std::regex_constants::extended|
regex_constants::ECMAScript);
//regex_iterator 匹配所有子串
sregex_iterator its(text.begin(),text.end(),re);
sregex_iterator ends;
while (its!=ends){
//match_results<T>
//printf("%s \n",its->str(0).c_str());
printf("-->%s \t",its->str(1).c_str());
printf("%s \n",its->str(2).c_str());
its++;
}
}
int main()
{
cRegex();
cppRegex();
cppRegex_matchGroup();
cppRegex_replace();
cppRegex_matchGroup_iterator();
return 0;
}

编译运行: g++ main.cpp -o regex_test && ./regex_test

输出结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
--->abcQQQQQQ-5555555xyz
--->QQQQQQ
--->5555555
ok ,this is a truly email format :)
ok :)
-->[email protected]
-->josephxrays
-->163
-->com
old text:https://www.baidu.com
new text:https://www.google.com
-->https://www.baidu.com 百度
-->https://www.google.com 谷歌
-->http://www.csdn.net CSDN

bye~


本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!