想用python模拟浏览器访问web的方法测试些东西,有哪几种方法呢?
一类:单纯的访问web,不解析其js,css等。
1. urllib2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 |
#-*- coding:utf-8 -* import
urllib2 def Furllib2(ip,port,url,timeout): proxydict =
{} print
proxydict proxy_handler =
urllib2.ProxyHandler(proxydict) opener =
urllib2.build_opener(proxy_handler) opener.addheaders =
[( ‘User-agent‘ , ‘Mozilla/5.0‘ )] urllib2.install_opener(opener) try : response =
urllib2.urlopen(url,timeout = timeout) print
response.geturl() print
response.getcode() print
response.info() print
response.read() return
True except : print
‘some errors occored‘ +
‘-‘ * 50 return
0 def
main(): proxyip =
‘14.18.16.69‘ proxyport =
‘80‘ timeout =
4 print
Furllib2(proxyip,proxyport,url,timeout) if __name__ = =
"__main__" : main() |
2. mechanize(与网站的自动化交互)
http://wwwsearch.sourceforge.net/mechanize/doc.html
1
2
3
4
5
6
7
8
9
10
11
12 |
def Fmechanize(url): cookies =
mechanize.CookieJar() opener =
mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) try : r =
opener. open (url) # GET # r = opener.open("http://example.com/", data) # POST print
r.geturl() print
r.info() return
True except : return
0 |
二类:模拟浏览器,使用firefox等的浏览器引擎,支持js,css等。
1. selenium 的firefox或者chrome等驱动,但是由于要打开一个浏览器,所以会比较慢(浏览器驱动可以到selenium官网上下载,也可以到firefox插件出搜索)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 |
def Fselenium_firefox(ip,port,url,timeout): try : profile =
webdriver.FirefoxProfile() profile.set_preference( ‘network.proxy.type‘ , 1 ) profile.set_preference( ‘network.proxy.http‘ ,ip) profile.set_preference( ‘network.proxy.http_port‘ , port) profile.update_preferences() driver =
webdriver.Firefox(profile,timeout =
timeout) except
Exception: print
traceback.print_exc() return
0 pass try : driver.get(url) time.sleep( 5 ) cookies =
driver.get_cookies() print
cookies # driver.get() driver.quit() return
1 except
Exception: traceback.print_exc() # print ‘not have Union allianceid‘ driver.quit() return
0 |
2. selenium :headless test使用selenium+ phantomjs驱动,无需打开浏览器,但是支持js的模拟浏览器动作,也就说说和你手工打开是没有区别的。
http://selenium.googlecode.com/git/docs/api/py/api.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 |
def Fselenium_phantomjs(ip,port,url,timeout): try : proxyip =
‘%s%s%s%s‘ % ( ‘--proxy=‘ ,ip, ‘:‘ ,port) proxyport =
‘--proxy-type=http‘ service_args =
[] service_args.append(proxyip) service_args.append(proxyport) print
service_args driver =
webdriver.PhantomJS(service_args =
service_args) driver.set_page_load_timeout(timeout) driver.get(url) time.sleep( 4 ) except
Exception: traceback.print_exc() try : geturl =
driver.current_url print
driver.current_url return
True except
Exception: traceback.print_exc() geturl =
None return
0 |
3. qt,网上戗来的代码
http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 |
from PyQt4 import
QtCore, QtGui, QtWebKit, QtNetwork class
cookieJar(QtNetwork.QNetworkCookieJar): def
__init__( self , cookiesKey, parent = None ): super (cookieJar, self ).__init__(parent) self .mainWindow =
parent self .cookiesKey =
cookiesKey cookiesValue =
self .mainWindow.settings.value( self .cookiesKey) if
cookiesValue: cookiesList =
QtNetwork.QNetworkCookie.parseCookies(cookiesValue) self .setAllCookies(cookiesList) # def setCookiesFromUrl (self, cookieList, url): # cookiesValue = self.mainWindow.settings.value(self.cookiesKey) # cookiesArray = cookiesValue if cookiesValue else QtCore.QByteArray() # for cookie in cookieList: # cookiesArray.append(cookie.toRawForm() + "\n") #self.mainWindow.settings.setValue(self.cookiesKey, cookiesArray) #return super(cookieJar, self).setCookiesFromUrl(cookieList, url) def
deleteCookie( self ,cookieList): cookie =
[] self .mainWindow.settings.value(cookie) class
webView(QtWebKit.QWebView): def
__init__( self , cookiesKey, url, parent = None ): super (webView, self ).__init__(parent) self .cookieJar =
cookieJar(cookiesKey, parent) self .page().networkAccessManager().setCookieJar( self .cookieJar) class
myWindow(QtGui.QMainWindow): def
__init__( self , parent = None ): super (myWindow, self ).__init__(parent) self .cookiesKey =
"cookies" self .centralwidget =
QtGui.QWidget( self ) self .tabWidget =
QtGui.QTabWidget( self .centralwidget) self .tabWidget.setTabsClosable( True ) self .verticalLayout =
QtGui.QVBoxLayout( self .centralwidget) self .verticalLayout.addWidget( self .tabWidget) self .actionTabAdd =
QtGui.QAction( self ) self .actionTabAdd.setText( "Add Tab" ) self .actionTabAdd.triggered.connect( self .on_actionTabAdd_triggered) self .lineEdit =
QtGui.QLineEdit( self ) self .toolBar =
QtGui.QToolBar( self ) self .toolBar.addAction( self .actionTabAdd) self .toolBar.addWidget( self .lineEdit) self .addToolBar(QtCore.Qt.ToolBarArea(QtCore.Qt.TopToolBarArea), self .toolBar) self .setCentralWidget( self .tabWidget) self .settings =
QtCore.QSettings() @QtCore .pyqtSlot() def
on_actionShowCookies_triggered( self ): webView =
self .tabWidget.currentWidget() listCookies =
webView.page().networkAccessManager().cookieJar().allCookies() for
cookie in
listCookies: print
cookie.toRawForm() @QtCore .pyqtSlot() def
on_actionTabAdd_triggered( self ): url =
self .lineEdit.text() self .addNewTab(url if
url else
‘about:blank‘ ) def
addNewTab( self , url): tabName =
u "Tab {0}" . format ( str ( self .tabWidget.count())) tabWidget =
webView( self .cookiesKey, url, self ) tabWidget.loadFinished.connect( self .on_tabWidget_loadFinished) tabWidget.load(QtCore.QUrl(url)) tabIndex =
self .tabWidget.addTab(tabWidget, tabName) self .tabWidget.setCurrentIndex(tabIndex) @QtCore .pyqtSlot() def
on_tabWidget_loadFinished( self ): cookies2 =
self .settings.value( self .cookiesKey) if __name__ = =
"__main__" : import
sys app =
QtGui.QApplication(sys.argv) app.setApplicationName( ‘myWindow‘ ) main =
myWindow() main.resize( 666 , 333 ) main.show() sys.exit(app.exec_()) |
4. qt-headless
http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 |
import
sys from PyQt4.QtGui import
* from PyQt4.QtCore import
* from PyQt4.QtWebKit import
* class
Render(QWebPage): def
__init__( self , url): self .app =
QApplication(sys.argv) QWebPage.__init__( self ) self .loadFinished.connect( self ._loadFinished) self .mainFrame().load(QUrl(url)) self .app.exec_() def
_loadFinished( self , result): self .frame =
self .mainFrame() self .app.quit() r =
Render(url) html =
r.frame.toHtml() print
html |
5. splinter :打开浏览器,模拟操作,python的
http://splinter.cobrateam.info/docs/tutorial.html
1
2
3
4 |
>>> from
splinter import
Browser >>> browser =
Browser() >>> browser.visit(url) |
具体用哪个要看你有什么具体的需求了
原文:http://www.cnblogs.com/maseng/p/3578553.html