PhantomJS is a headless WebKit scriptable with a JavaScript API. It has fast and native support for various web standards: DOM handling, CSS selector, JSON, Canvas, and SVG.
Full web stack No browser required.
PhantomJS是一个服务器端的 JavaScript API 的WebKit(开源的浏览器引擎)。其支持各种Web标准: DOM 处理, CSS 选择器, JSON, Canvas 和 SVG。PhantomJS可以用于页面自动化,网络监测,网页截屏,以及无界面测试等。
console.log(‘Hello world!‘); phantom.exit();通过Ctrl+R打开CMD调用phantomjs.exe执行该程序输出如下图所示:
1 var system = require(‘system‘); 2 if (system.args.length === 1) { 3 console.log(‘Try to pass some args when invoking this script!‘); 4 } else { 5 system.args.forEach(function (arg, i) { 6 console.log(i + ‘: ‘ + arg); 7 }); 8 } 9 phantom.exit();
var page = require(‘webpage‘).create(); page.open(‘http://www.baidu.com‘, function () { page.render(‘example.png‘); phantom.exit(); });
1 var page = require(‘webpage‘).create(), 2 system = require(‘system‘), 3 t, address; 4 5 if (system.args.length === 1) { 6 console.log(‘Usage: loadspeed.js <some URL>‘); 7 phantom.exit(1); 8 } else { 9 t = Date.now(); 10 address = system.args[1]; 11 page.open(address, function (status) { 12 if (status !== ‘success‘) { 13 console.log(‘FAIL to load the address‘); 14 } else { 15 t = Date.now() - t; 16 console.log(‘Page title is ‘ + page.evaluate(function () { 17 return document.title; 18 })); 19 console.log(‘Loading time ‘ + t + ‘ msec‘); 20 } 21 phantom.exit(); 22 }); 23 }
4.代码运算-Code Evaluation
1 var page = require(‘webpage‘).create(); 2 page.open(‘http://www.csdn.net‘, function(status) { 3 var title = page.evaluate(function() { 4 return document.title; 5 }); 6 phantom.outputEncoding="gbk"; 7 console.log(‘Page title is ‘ + title); 8 phantom.exit(); 9 });
var page = require(‘webpage‘).create(); phantom.outputEncoding="gbk"; page.onConsoleMessage = function(msg) { console.log(‘Page title is ‘ + msg); }; page.open(‘http://www.csdn.net‘, function(status) { page.evaluate(function() { console.log(document.title); }); phantom.exit(); });
调用phantomjs gettile2.js即可。
5.DOM操作-DOM Manipulation
参考page automation tasks
下面的 useragent.js(examples文件样本)将读取id 为myagent的元素的 textContent 属性:
1 var page = require(‘webpage‘).create(); 2 console.log(‘The default user agent is ‘ + page.settings.userAgent); 3 page.settings.userAgent = ‘SpecialAgent‘; 4 page.open(‘http://www.httpuseragent.org‘, function (status) { 5 if (status !== ‘success‘) { 6 console.log(‘Unable to access network‘); 7 } else { 8 var ua = page.evaluate(function () { 9 return document.getElementById(‘myagent‘).innerText; 10 }); 11 console.log(ua); 12 } 13 phantom.exit(); 14 });
phantomjs examples/useragent.js
var page = require(‘webpage‘).create(); page.open(‘http://www.sample.com‘, function() { page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() { page.evaluate(function() { $("button").click(); }); phantom.exit() }); });
The above snippet will open up a web page, include the jQuery library into the page, and then click on all buttons using jQuery. It will then exit from the web page. Make sure to put the exit statement within the page.includeJs or else it may exit prematurely before the javascript code is included.
即需要确保JavaScript代码中包括引用的页面存在。The Webpage instance具体用法参考前面官方文档。
6.网络请求及响应 – Network Requests and Responses
当一个页面从一台远程服务器请求一个资源的时候,请求和响应均可以通过 onResourceRequested 和 onResourceReceived 回调方法追踪到。文档示例 netlog.js:
1 var page = require(‘webpage‘).create(), 2 system = require(‘system‘), 3 address; 4 5 if (system.args.length === 1) { 6 console.log(‘Usage: netlog.js <some URL>‘); 7 phantom.exit(1); 8 } else { 9 address = system.args[1]; 10 11 page.onResourceRequested = function (req) { 12 console.log(‘requested: ‘ + JSON.stringify(req, undefined, 4)); 13 }; 14 15 page.onResourceReceived = function (res) { 16 console.log(‘received: ‘ + JSON.stringify(res, undefined, 4)); 17 }; 18 19 page.open(address, function (status) { 20 if (status !== ‘success‘) { 21 console.log(‘FAIL to load the address‘); 22 } 23 phantom.exit(); 24 }); 25 }
phantomjs examples/netlog.js http://www.baidu.com
1 received: { 2 "contentType": "text/javascript; charset=gbk", 3 "headers": [ 4 { 5 "name": "Server", 6 "value": "bfe/" 7 }, 8 { 9 "name": "Date", 10 "value": "Tue, 18 Aug 2015 20:10:03 GMT" 11 }, 12 { 13 "name": "Content-Type", 14 "value": "text/javascript; charset=gbk" 15 }, 16 { 17 "name": "Content-Length", 18 "value": "88" 19 }, 20 { 21 "name": "Connection", 22 "value": "keep-alive" 23 }, 24 { 25 "name": "Cache-Control", 26 "value": "private" 27 } 28 ], 29 "id": 13, 30 "redirectURL": null, 31 "stage": "end", 32 "status": 200, 33 "statusText": "OK", 34 "time": "2015-08-18T20:09:38.085Z", 35 "url": "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=&json=1&p=3& 36 sid=16486_16222_1421_16896_16738_12825_12868_16800_16659_16424_16514_15936_12073 37 _13932_16866&csor=0&cb=jQuery110208203572703059763_1439928574608&_=1439928574609 38 " 39 }
获取如何把该特性用于HAR 输出以及基于YSlow的性能分析的更多信息,请参阅网络监控页面:network monitoring
下面显示了从英国广播公司网站获得典范的瀑布图(waterfall diagram):
用CasperJs自动浏览页面-by:kiwi小白 CSDN
Windows中Phantomjs + Casperjs安装使用方法
CasperJS 的安装和快速入门-oschina
使用 CasperJS 对 Web 网站进行功能测试-oschina
(By:Eastmount 2015-8-19 深夜4点半 http://blog.csdn.net/eastmount/)
[Python爬虫] 在Windows下安装PhantomJS和CasperJS及入门介绍(上)