首页 > 其他 > 详细

油猴脚本爬虫

时间:2019-12-10 15:50:48      阅读:272      评论:0      收藏:0      [点我收藏+]

脚本内容

// ==UserScript==
// @name         大众点评评论爬虫
// @namespace    http://tampermonkey.net/
// @version      0.1
// @description  crawl is greate
// @author       陈祥安
// @include      http://www.dianping.com/shop*
// @match        http://www.dianping.com/ajax/json/shopDynamic/allReview*
// @require      http://cdn.bootcss.com/jquery/1.11.2/jquery.js
// @grant        GM_xmlhttpRequest


// ==/UserScript==

(function() {
    var $x = function (xpath, context) {
        var nodes = [];
        try {
            var doc = (context && context.ownerDocument) || window.document;
            var results = doc.evaluate(xpath, context || doc, null, XPathResult.ANY_TYPE, null);
            var node;
            while (node = results.iterateNext()) {
                nodes.push(node);
            }
        } catch (e) {
             throw e;
        }
        return nodes;
    }

    var server_url = 'http://127.0.0.1:9090/comment/'

    window.addEventListener('load', (event) => {
       //关闭弹窗
       let close_btn = $(".J-bonus-close")
       console.log("准备关闭",close_btn)
       if(close_btn){
                  close_btn.click();
       }
       let li_item_list=$x("//ul[@class='comment-list J-list']/li[@class='comment-item']/div[@class='content']//p[@class='desc']");
       var dataList = [];
       li_item_list.forEach(v=>{
             console.log(v);
             dataList.push({"data":v.innerText})
       });


        GM_xmlhttpRequest({
            method: "POST",
            url: server_url,
            data : JSON.stringify({'name':"爬虫","dataList":dataList}),
            onload: function(response) {
                //这里写处理函数
                //document.getElementById('text').innerHTML = this.responseText;
                console.log(response);
                console.log("dataList",dataList);
                //window.close();
            }
        });
    });
})();

python代码

# @Author : cxa
# @File : server.py
# @Software: PyCharm
import json

from flask import Flask, request, render_template

app = Flask(__name__)


@app.route('/')
def index():
    return "<h1>大众点评API</h1>"


@app.route('/comment/', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        form_data = request.get_data()
        result = json.loads(form_data.decode("utf-8"))
    print(result)
    return result


@app.errorhandler(404)
def miss(e):
    return render_template('404.html'), 404


@app.errorhandler(500)
def error(e):
    return render_template('500.html'), 500


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9090, debug=True)

油猴脚本爬虫

原文:https://www.cnblogs.com/c-x-a/p/12016854.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!