爬取网站表情包

时间：2021-06-15 23:54:33 阅读：27 评论：0 收藏：0 [点我收藏+]

const cheerio = require(‘cheerio‘);

const axios = require(‘axios‘)

const fs = require(‘fs‘)

const path = require(‘path‘)

let httpUrL = ‘https://www.doutula.com/article/list/?page=1‘

function sleep(number){

var now = new Date();

var exitTime = now.getTime() + number * 1000;

while (true) {

now = new Date();

if(now.getTime() > exitTime)

return

}

// 获取页面总数

async function geteNum(){

let res = await axios.get(httpUrL)

let $ = cheerio.load(res.data);

let btnLength = $(‘.pagination li‘).length;

let allNum = $(‘.pagination li‘).eq(btnLength -2).find(‘a‘).text();

// console.log(allNum);

return allNum

}

async function spider(){

// 获取所有页面总数

let allPageNum = await geteNum();

allPageNum = 3;

for(let i=0; i<= allPageNum;i++){

getListPage(i)

sleep(3)

}

async function getListPage(pageNum){

let httpUrL = ‘https://www.doutula.com/article/list/?page=‘ + pageNum;

let res = await axios.get(httpUrL);

// cheerio 解析 html 文档

let $ = cheerio.load(res.data);

$(‘#home > div > div.col-sm-9>a‘).each((i,element)=>{

// console.log($(element).attr(‘href‘));

let pageUrl = $(element).attr(‘href‘);

let title = $(element).find(‘.random_title‘).text();

// 正则过滤数据

let reg = /(.*?)\d/igs

title = reg.exec(title)[1]

// 创建文件夹

fs.mkdir(‘./img/‘ + title,(err)=>{

if(err) return

})

parsePage(pageUrl,title)

})

}

async function parsePage(url,title){

let res = await axios.get(url);

let $ = cheerio.load(res.data);

$(‘.pic-content img‘).each(function(i,element){

let imgUrl = $(element).attr(‘src‘)

// 获取后缀名

let extname = path.basename(imgUrl);

// 图片写入的路径和名字

let imgPath = `./img/${title}/${extname}`

// 创建写入流

let ws = fs.createWriteStream(imgPath)

axios.get(imgUrl,{responseType:‘stream‘}).then((res)=>{

res.data.pipe(ws)

// 关闭写入流

res.data.on(‘close‘,function(){

ws.close()

})

}

spider();

爬取网站表情包

原文：https://www.cnblogs.com/eric-share/p/14886582.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)