import re # re.match() 能够匹配出以xxx开头的字符串 ret = re.match(r"H", "Hello Python") # print(ret.group()) # 分组 ret = re.match(r"([^-]*)-(\d+)", "010-12345678") # print(ret.group(1)) # 通过引用分组中匹配到的数据即可,但是要注意是元字符串,即类似 r""这种格式 ret = re.match(r"<([a-zA-Z]*)>\w*</\1>", "<html>hh</html>") # print(ret.group(0)) # 不仅匹配开头 ret = re.search(r"\d+", "阅读次数为 9999") # print(ret.group()) # 匹配多个值,并返回一个列表 ret_list = re.findall(r"\d+", "python = 9999, c = 7890, c++ = 12345") # print(ret_list) # 匹配并替换多个值,并返回一个列表 ret_list = re.sub(r"\d+", ‘998‘, "python = 997 python = 997") print(‘ret_list=====: %s‘ % ret_list) # 匹配并切割 ret_list = re.split(r":| ", "info:xiaoZhang 33 shandong") # print(ret_list) # 非贪婪模式。在"*","?","+","{m,n}"后面加上?,使贪婪变成非贪婪。 s = "aa2343ddd" r = re.match(r"aa(\d+?)", s) # print(r.group(1)) print(‘111111111111111111‘) # 匹配:‘http://www.freebuf.com‘, # url = ‘http://www.freebuf.com‘ url = ‘https://freebuf.com/articles/es/123%e7%b1%b3%e9%9b%aa%e5%84%bf‘ url = ‘http://www.freebuf.com/author/%e7%b1%b3%e9%9b%aa%e5%84%bf‘ # url = ‘http://www.freebuf.com/157843sdf.html‘ # 匹配域名:https://www.freebuf.com ret_list = re.search(r‘https?://(\w+?\.)+\w+\/?$‘, url) print(ret_list.group()) if ret_list != None else print(‘ret_list = None‘) # 匹配文件夹:https://www.freebuf.com/articles/es ret_list = re.search(r‘https?://(\w+?\.)+\w+(\/\w+)*(\/\w+\/?)$‘, url) if ret_list != None: print(ret_list.group()) else: print(‘ret_list = None‘) # print(ret_list.group()) if ret_list != None else print(‘ret_list = None‘) pass # 匹配文件:http://www.freebuf.com/news/157843.html ret_list = re.search(r‘https?://(\w+?\.)+\w+(\/\w+)*(\/\w+\.\w+)$‘, url) print(ret_list.group()) if ret_list != None else print(‘ret_list = None‘)
先编译一次,后面就不编译了
import re str = ‘https://www.freebuf.com/page/357‘ pattern = re.compile(r‘(p|s)\:\/\/www\.(.+?\..+?)\/+?‘, re.DOTALL) match = pattern.findall(str) print(type(match)) print(match)
忽略大小写
在正则表达式前面加(?i)
import re str1 = """ 201.158.69.116 - - [03/Jan/2013:21:17:20 -0600] fwf[-] tip[-] 127.0.0.1:9000 0.007 0.007 MX pythontab.com GET /html/test.html http/1.1 " 200" 2426 "http://a.com" "es-ES,es;q=0.8" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11" 172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE HTtP/1.1" 201 649 172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE/!svn/vcc/default HTTP/1.1" 207 401 172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE/!svn/bln/31 HTTP/1.1" 207 454 172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE HTTP/1.1" 207 649 172.16.119.8 - admin [15/Aug/2011:18:17:50 +0800] "PROPFIND /svn/EAGLE/!svn/vcc/default HTTP/1.1" 207 454 """ pattern = re.compile(r‘(?i)HTTP/.+?\b[^\d]+?([2345]\d{2})‘, re.DOTALL) ret = pattern.findall(str1) print(ret)
点号默认不匹配换行,要想匹配换行,需要设置re.DOTALL
pattern = re.compile(r‘(?i)HTTP/.+?\b[^\d]+?([2345]\d{2})‘, re.DOTALL)