>>> print(html) <div id=1> my <br> name <br> is JAY <br> </div>
>>> from bs4 import BeautifulSoup >>> soup = BeautifulSoup(html, ‘html.parser‘) >>> soup.string >>>
>>> soup.get_text() ‘ \n \xa0\xa0my \n \xa0 name \n is \xa0 JAY \n‘
>>> soup.get_text(‘|‘) #所有tag文本内容的分隔符 ‘ \n \xa0\xa0my | \n \xa0 name |\n is \xa0 JAY |\n‘ >>> soup.get_text(‘|‘, strip=True) #去掉文本内容前后的空白 ‘my|name|is \xa0 JAY‘
>>> content_soup = soup.div.contents >>> content_soup [‘ \n \xa0\xa0my ‘, <br/>, ‘ \n \xa0 name ‘, <br/>, ‘\n is \xa0 JAY ‘, <br/>, ‘\n‘] >>> content_soup = [str(i) for i in content_soup] #列表中的所有值改换为字符串类型 >>> content_text = ‘‘.join(content_soup) #合并列表到一个字符串中 >>> content_text ‘ \n \xa0\xa0my <br/> \n \xa0 name <br/>\n is \xa0 JAY <br/>\n‘ >>> print(content_text) my <br/> name <br/> is JAY <br/>
原文:http://www.cnblogs.com/stuqx/p/7291940.html