首页 > 编程语言 > 详细

PythonStudy——re 模块

时间:2019-05-20 21:18:39      阅读:116      评论:0      收藏:0      [点我收藏+]

# 正则:是有语法的字符串,用来匹配目标字符串的

# 将目标字符串中的所以数字找出
data = 123abc呵呵

res = re.findall(r\d, data)  # \d就代表数字
print(res)  # [‘1‘, ‘2‘, ‘3‘]

单个字符

# re.I不区分大小写的匹配
print(re.findall(ra, abc123嘿嘿abcABC, flags=re.I))  # [‘a‘, ‘a‘, ‘A‘]

# a|b a或b单个字符
print(re.findall(ra|b, abc123嘿嘿abcABC, flags=re.I))  # [‘a‘, ‘b‘, ‘a‘, ‘b‘, ‘A‘, ‘B‘]

# [a,b] a或,或b单个字符
print(re.findall(r[a,b], abc,123嘿嘿abcABC, flags=re.I))  # [‘a‘, ‘b‘, ‘,‘, ‘a‘, ‘b‘, ‘A‘, ‘B‘]

# [^ab]非a及非b的所有单个字符
print(re.findall(r[^ab], abc,123嘿嘿abcABC))  # [‘c‘, ‘,‘, ‘1‘, ‘2‘, ‘3‘, ‘嘿‘, ‘嘿‘, ‘c‘, ‘A‘, ‘B‘, ‘C‘]

# [a-z]所有单个小写字母   [A-Z]所有单个大写字母   [0-9]所有单个数字
print(re.findall(r[a-z], abc,123嘿嘿abcABC))  # [‘a‘, ‘b‘, ‘c‘, ‘a‘, ‘b‘, ‘c‘]
print(re.findall(r[0-9], abc,123嘿嘿abcABC))  # [‘1‘, ‘2‘, ‘3‘]

# 所有小写大写数字单个字符
print(re.findall(r[a-z]|[A-Z]|[0-9], abc,123嘿嘿abcABC))  # [‘a‘, ‘b‘, ‘c‘, ‘1‘, ‘2‘, ‘3‘, ‘a‘, ‘b‘, ‘c‘, ‘A‘, ‘B‘, ‘C‘]

print(re.findall(r[A-Za-z0-9], abc,123嘿嘿[abcABC))  # [‘a‘, ‘b‘, ‘c‘, ‘1‘, ‘2‘, ‘3‘, ‘a‘, ‘b‘, ‘c‘, ‘A‘, ‘B‘, ‘C‘]

# .会匹配除\n以为的所有单个字符
print(re.findall(r., *\_+=\n \r\t))  # [‘*‘, ‘\\‘, ‘_‘, ‘+‘, ‘=‘, ‘ ‘, ‘\r‘, ‘\t‘]

# re.S会让.能匹配所有单个字符
print(re.findall(r., *\_+=\n \r\t, flags=re.S))  # [‘*‘, ‘\\‘, ‘_‘, ‘+‘, ‘=‘, ‘\n‘, ‘ ‘, ‘\r‘, ‘\t‘]

# \d单个数字 == [0-9]
print(re.findall(r\d, abc,123嘿嘿[abcABC))  # [‘1‘, ‘2‘, ‘3‘]
# \w == [A-Za-z0-9_]  将常见的汉字就理解为单个字母

print(re.findall(r\w, abc,123嘿[_))  # [‘a‘, ‘b‘, ‘c‘, ‘1‘, ‘2‘, ‘3‘, ‘嘿‘, ‘_‘]
# \s == [\f\n\r\t\v ] 单个空:空格、制表符、换页符等

print(re.findall(r\s, \f\n\r\t\v ))  # [‘\x0c‘, ‘\n‘, ‘\r‘, ‘\t‘, ‘\x0b‘, ‘ ‘]

# \D就是\d的对立面:非数字的所有单个字符  \W就是\w的对立面  \S就是\s的对立面
print(re.findall(r\D, abc,123嘿[_))  # [‘a‘, ‘b‘, ‘c‘, ‘,‘, ‘嘿‘, ‘[‘, ‘_‘]


# 单个汉字 [\u4e00-\u9fa5]
print(re.findall(r[\u4e00-\u9fa5], abc,123嘿[_))  # [‘嘿‘]

# 建议使用  [0-9]  [A-Za-z0-9_]  [\f\n\r\t\v ]  [^0-9]  [\u4e00-\u9fa5]
# 不建议使用 \d  \w   \s  \D   \w

 

 

正则匹配步骤

import re
# 1.将r‘\\‘的正则语法字符串转换成 正则对象 ‘\‘, 用来匹配 ‘\‘ 字符的
# 2.拿着转换后的正则对象,来匹配目标字符串
print(re.findall(r\\, ra\d\p\\))  # [‘\\‘, ‘\\‘, ‘\\‘, ‘\\‘]


re_obj = re.compile(r\n)  # 转换成匹配 换行符 的正则对象
res = re_obj.findall(\n)
print(res)  # [‘\n‘]

re_obj = re.compile(r\\d)  # 转换成匹配 \d 的正则对象
res = re_obj.findall(\d)
print(res)  # [‘\\d‘]

re_obj = re.compile(r\d)  # 转换成匹配 数字 的正则对象
res = re_obj.findall(\d)  # \d不是数字
print(res)  # []

re_obj = re.compile(r\\n)  # 转换成匹配 \n 的正则对象
res = re_obj.findall(\n)  # 代表换行,不能被匹配
print(res)  # []
res = re_obj.findall(r\n)  # 就代表\n,能被匹配
print(res)  # [‘\\n‘]

 

 

多个字符

# 明确个数的重复
# {n}
print(re.findall(ra, aaabbb))  # [‘a‘, ‘a‘, ‘a‘]
print(re.findall(ra{2}, aaabbb))  # [‘aa‘]
print(re.findall(rab, aabbababab))  # [‘ab‘, ‘ab‘, ‘ab‘, ‘ab‘]
print(re.findall(ra{2}b{2}, aabbababab))  # [‘aabb‘]
print(re.findall(rab{2}, aabbababab))  # [‘abb‘]

# {n,}  匹配n到无数个,题中最少匹配abb, 贪婪匹配 abbb 能被匹配为 abb 和 abbb,优先匹配多的
print(re.findall(rab{2,}, ababbabbbabbbb))  # [‘abb‘, ‘abbb‘, ‘abbbb‘]

# {,n} 匹配0到n个,ab{,2} 优先匹配abb,没有ab也行,如果还没有a也将就
print(re.findall(rab{,2}, aababbabbbabbbb))  # [‘a‘, ‘ab‘, ‘abb‘, ‘abb‘, ‘abb‘]

# {n,m} 匹配n到m个,ab{1,3} 优先匹配 abbb,再考虑abb, ab
print(re.findall(rab{1,3}, aababbabbbabbbb))  # [‘ab‘, ‘abb‘, ‘abbb‘, ‘abbb‘]

# 特殊符号的重复
# *: 匹配0到无数个
print(re.findall(rab*, aababbabbbabbbb))  # [‘a‘, ‘ab‘, ‘abb‘, ‘abbb‘, ‘abbbb‘]
# +: 匹配1到无数个
print(re.findall(rab+, aababbabbbabbbb))  # [‘ab‘, ‘abb‘, ‘abbb‘, ‘abbbb‘]
# ?: 匹配0到1个
print(re.findall(rab?, aababbabbbabbbb))  # [‘a‘, ‘ab‘, ‘ab‘, ‘ab‘, ‘ab‘]


# 需求:匹配所以单词
print(re.findall(r[a-z]+, abc def hello print))  # [‘abc‘, ‘def‘, ‘hello‘, ‘print‘]
print(re.findall(r[a-z]+\b, abc def hello print))  # [‘abc‘, ‘def‘, ‘hello‘, ‘print‘]

# \b代表单词边界,用空格(字符串的结尾也包括)作为匹配规则
print(re.findall(r[a-z]*c, abc def hello print acb zc))  # [‘abc‘, ‘ac‘, ‘zc‘]
print(re.findall(r[a-z]*c\b, abc def hello print acb zc))  # [‘abc‘, ‘zc‘]

 

 

多行匹配

import re
s = """http://www.baidu.com
https://sina.com.cn
https://youku.com
haam
abchttp://www.oldboy.com
"""
# ^代表以什么开头,$代表以什么结尾,必须结合flags=re.M来完成多行匹配
print(re.findall(r^http.+com$, s, re.M))  # [‘http://www.baidu.com‘, ‘https://youku.com‘]

 

 

分组

import re

url = https://www.baidu.com, http://www.youku.com
# 需求:拿到url的域名的  baidu , youku
print(re.findall(rwww.([a-z]+).com, url))  # [‘baidu‘, ‘youku‘]

# ()代表分组
# findall匹配,如果匹配规则用有分组语法,只存放分组结果
print(re.findall(r(www).([a-z]+).com, url))  # [(‘www‘, ‘baidu‘), (‘www‘, ‘youku‘)]

# 分组的编号:分组的顺序编号按照左括号的前后顺序
print(re.findall(r(((w)ww).([a-z]+).com), url))  # [(‘www.baidu.com‘, ‘www‘, ‘w‘, ‘baidu‘), (‘www.youku.com‘, ‘www‘, ‘w‘, ‘youku‘)]


# findall是全文匹配,可以从任意位置开始,匹配多次
# match非全文匹配,必须从头开始匹配,只能匹配一次

# 专门处理分组的方法:分组,分组编号,有名分组,取消分组
# 取消分组: 必须写(),但是()为分组语法,我们只是想通过()将一些数据作为整体,所以()必须,再取消分组即可
# (?:) 取消分组只是作为整体   (?P<名字>) 有名分组
url = www.baidu.com,www.youku.com
res = re.match(r((?:www).(?P<name>[a-z]+).com), url)
# print(res)  # <_sre.SRE_Match object; span=(0, 13), match=‘www.baidu.com‘>
print(res.group(1))  # www.baidu.com
print(res.group(2))  # baidu
print(res.group(name))  # baidu

 

拆分与替换

import re

s = a b ac def
print(s.split( ))  # [‘a‘, ‘b‘, ‘ac‘, ‘def‘]

# 正则拆分
s = a b,ac@def
print(re.split(r[ ,@], s))  # [‘a‘, ‘b‘, ‘ac‘, ‘def‘]


s = python abc python
print(re.sub(python, Python, s))  # Python abc Python
print(re.sub(python, Python, s, count=1))  # Python abc python


# 结合分组可以完成信息的重组与替换
s = day a good!!!  # ‘a good good day‘
print(re.sub((day) (a) (good), rtoday is \2 \3 \3 \1, s))

 

PythonStudy——re 模块

原文:https://www.cnblogs.com/tingguoguoyo/p/10896456.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!