首页 > Web开发 > 详细

Node.js抓取新浪新闻标题

时间:2018-04-10 22:14:16      阅读:194      评论:0      收藏:0      [点我收藏+]
"use strict";

let cheerio = require("cheerio");
let http = require("http");
let iconv = require("iconv-lite");

let mainUrl = "http://news.sina.com.cn/world/";

http.get(mainUrl, function(sres) {
    var chunks = [];
    sres.on(‘data‘, function(chunk) {
        chunks.push(chunk);
    });

    sres.on(‘end‘, function() {

        var html = iconv.decode(Buffer.concat(chunks), ‘utf8‘);
        var $ = cheerio.load(html, {decodeEntities: false});

        $(‘.content a‘).each(function (idx,element){
            let ele = $(element);
            let title = ele.text();
            title = !!title?title.trim():‘‘;
            let url = ele.attr(‘href‘);
            url = !!url?url.trim():‘‘;
            if(title.length>4 && url.length>0&&url.indexOf(‘javascript‘)===-1){
                console.log(title+‘|‘+url);
            }

        })

    });
});

 

Node.js抓取新浪新闻标题

原文:https://www.cnblogs.com/sheryee/p/8783446.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!