按照一定的规则,从某个字符串中匹配出想要的数据,这个规则就是正则表达式
匹配某个字符 he
import re
text = "abc aAbc aaaABC"
reg = r"abc"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
匹配任意一个字符 .
不能匹配换行符\n
import re
text = "abc aAbc aaaABC"
reg = r"."
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
匹配0-9 任意数字 \d
import re
text = "1112abc aAbc aaaABC"
reg = r"\d"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
匹配任意非数字 \D
import re
text = "abc aAbc aaaABC"
reg = r"\D"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
匹配空白字符 \r \n \t 空格
\s
import re
text = "\tabc aAbc aaaABC"
reg = r"\s"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
\w
匹配的是a-z 和 A-Z 以及数字和下划线
import re
text = "_abc aAbc aaaABC"
reg = r"\w"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
\W
匹配的是与\w
正好相反的
import re
text = "+abc aAbc aaaABC"
reg = r"\W"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
\S
匹配非空字符
import re
text = "\tabc aAbc aaaABC"
reg = r"\S"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
+
^
import re
text = "abc aAbc aaaABC"
reg = r"[\W\w]"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
text = "028-88888888"
reg = r"[\d-]+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
text = "028-88888888"
reg = r"[a-zA-Z0-9_]"
reg = r"[^a-zA-Z0-9_]"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
*
:匹配0个或者任意多个字符
import re
text = "028-88888888"
reg = r"\d*-\d*"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
+
: 匹配1个或者多个字符串(最少必须一个满足条件,否则匹配不成功)
import re
text = "028-88888888"
reg = r"\w+-\d+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
?
:匹配1个或者0个(要么没有要么只有1个)
import re
text = "-88888888"
reg = r"\d?"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
{m}: 匹配m个字符
import re
text = "88888888"
reg = r"\d{5}"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
{m,n}:匹配m-n个字符
import re
text = "88888888"
reg = r"\d{1,15}"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
验证手机号码
import re
text = "18209985826"
reg = r"1[34578]\d{9}"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
验证邮箱
import re
text = "3456546747@qq.com"
reg = r"\w+@[a-z0-9]+.[a-z]+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
验证url
import re
text = "https://search.bilibili.com/all"
reg = r"(http|https|ftp)://\S+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
^
以...开始
import re
text = "ahttps://search.bilibili.com/all"
reg = r"^(http|https|ftp)://\S+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
在中括号[]中^
取反操作
import re
text = "hello"
reg = r"[^\d]+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
$
以... 结尾
import re
text = "hello@163.com"
reg = r"\w+@163.com$"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
|
多个选项之间匹配
import re
text = "http://127.0.0.1"
reg = r"(http|https|ftp)://\S+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
会尽量多的去匹配 默认是贪婪模式
会尽量少的去匹配 在正则表达式后面加上?
import re
text = "$69658.369"
reg = r"\$\d+\.\d+"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
import re
text = "\\n"
print(text)
reg = "\\\\n"
reg = r"\\n"
ret = re.match(reg, text)
if ret:
print("匹配到了:", ret.group())
else:
print("没有匹配到")
match # 从头开始匹配 没有匹配成功返回None 匹配成功返回Match对象
search # 只要找到就返回,并且只返回一个
sub # 替换
split # 分隔
findall # 找出所有满足条件的 以列表的方式返回
compile # 编译
原文:https://www.cnblogs.com/huameixiao/p/14687722.html