content是根据网址获得的网页源码字符串
NSRegularExpression
*regularExpretion=[
NSRegularExpression
regularExpressionWithPattern
:
@"<[^>]*>|\n"
options
:
0
error
:nil
];
//替换所有html和换行匹配元素为"-"
content=[regularExpretion
stringByReplacingMatchesInString
:content
options
:
NSMatchingReportProgress
range
:NSMakeRange(
0
, content
.length
)
withTemplate
:
@"-"
];
regularExpretion=[
NSRegularExpression
regularExpressionWithPattern
:
@"-{1,}"
options
:
0
error
:nil
] ;
content=[regularExpretion
stringByReplacingMatchesInString
:content
options
:
NSMatchingReportProgress
range
:NSMakeRange(
0
, content
.length
)
withTemplate
:
@"-"
];
//根据"-"分割到数组
NSArray
*arr=[
NSArray
array
];
content=[
NSString
stringWithString
:content];
arr = [content
componentsSeparatedByString
:
@"-"
];
NSMutableArray
*marr=[
NSMutableArray
arrayWithArray
:arr];
[marr
removeObject
:
@""
];
return
marr;
IOS 去掉代码html中的标签元素,获得纯文本,布布扣,bubuko.com
原文:http://www.cnblogs.com/luoyubuku/p/3896712.html