想用python模拟浏览器访问web的方法测试些东西,有哪几种方法呢?
一类:单纯的访问web,不解析其js,css等。
1. urllib2
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 |
#-*- coding:utf-8 -*import
urllib2def Furllib2(ip,port,url,timeout): proxydict =
{} print
proxydict proxy_handler =
urllib2.ProxyHandler(proxydict) opener =
urllib2.build_opener(proxy_handler) opener.addheaders =
[(‘User-agent‘, ‘Mozilla/5.0‘)] urllib2.install_opener(opener) try: response =
urllib2.urlopen(url,timeout=timeout) print
response.geturl() print
response.getcode() print
response.info() print
response.read() return
True except: print
‘some errors occored‘ +
‘-‘*50 return
0def
main(): proxyip =
‘14.18.16.69‘ proxyport =
‘80‘ timeout =
4 print
Furllib2(proxyip,proxyport,url,timeout)if __name__ ==
"__main__": main() |
2. mechanize(与网站的自动化交互)
http://wwwsearch.sourceforge.net/mechanize/doc.html
|
1
2
3
4
5
6
7
8
9
10
11
12 |
def Fmechanize(url): cookies =
mechanize.CookieJar() opener =
mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) try: r =
opener.open(url) # GET # r = opener.open("http://example.com/", data) # POST print
r.geturl() print
r.info() return
True except: return
0 |
二类:模拟浏览器,使用firefox等的浏览器引擎,支持js,css等。
1. selenium 的firefox或者chrome等驱动,但是由于要打开一个浏览器,所以会比较慢(浏览器驱动可以到selenium官网上下载,也可以到firefox插件出搜索)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 |
def Fselenium_firefox(ip,port,url,timeout): try: profile =
webdriver.FirefoxProfile() profile.set_preference(‘network.proxy.type‘, 1) profile.set_preference(‘network.proxy.http‘,ip) profile.set_preference(‘network.proxy.http_port‘, port) profile.update_preferences() driver =
webdriver.Firefox(profile,timeout =
timeout) except
Exception: print
traceback.print_exc() return
0 pass try: driver.get(url) time.sleep(5) cookies=
driver.get_cookies() print
cookies # driver.get() driver.quit() return
1 except
Exception: traceback.print_exc() # print ‘not have Union allianceid‘ driver.quit() return
0 |
2. selenium :headless test使用selenium+ phantomjs驱动,无需打开浏览器,但是支持js的模拟浏览器动作,也就说说和你手工打开是没有区别的。
http://selenium.googlecode.com/git/docs/api/py/api.html
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 |
def Fselenium_phantomjs(ip,port,url,timeout): try: proxyip =
‘%s%s%s%s‘%(‘--proxy=‘,ip,‘:‘,port) proxyport =
‘--proxy-type=http‘ service_args =
[] service_args.append(proxyip) service_args.append(proxyport) print
service_args driver =
webdriver.PhantomJS(service_args =
service_args) driver.set_page_load_timeout(timeout) driver.get(url) time.sleep(4) except
Exception: traceback.print_exc() try: geturl =
driver.current_url print
driver.current_url return
True except
Exception: traceback.print_exc() geturl =
None return
0 |
3. qt,网上戗来的代码
http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 |
from PyQt4 import
QtCore, QtGui, QtWebKit, QtNetworkclass
cookieJar(QtNetwork.QNetworkCookieJar): def
__init__(self, cookiesKey, parent=None): super(cookieJar, self).__init__(parent) self.mainWindow =
parent self.cookiesKey =
cookiesKey cookiesValue =
self.mainWindow.settings.value(self.cookiesKey) if
cookiesValue: cookiesList =
QtNetwork.QNetworkCookie.parseCookies(cookiesValue) self.setAllCookies(cookiesList) # def setCookiesFromUrl (self, cookieList, url): # cookiesValue = self.mainWindow.settings.value(self.cookiesKey) # cookiesArray = cookiesValue if cookiesValue else QtCore.QByteArray() # for cookie in cookieList: # cookiesArray.append(cookie.toRawForm() + "\n") #self.mainWindow.settings.setValue(self.cookiesKey, cookiesArray) #return super(cookieJar, self).setCookiesFromUrl(cookieList, url) def
deleteCookie(self,cookieList): cookie =
[] self.mainWindow.settings.value(cookie)class
webView(QtWebKit.QWebView): def
__init__(self, cookiesKey, url, parent=None): super(webView, self).__init__(parent) self.cookieJar =
cookieJar(cookiesKey, parent) self.page().networkAccessManager().setCookieJar(self.cookieJar)class
myWindow(QtGui.QMainWindow): def
__init__(self, parent=None): super(myWindow, self).__init__(parent) self.cookiesKey =
"cookies" self.centralwidget =
QtGui.QWidget(self) self.tabWidget =
QtGui.QTabWidget(self.centralwidget) self.tabWidget.setTabsClosable(True) self.verticalLayout =
QtGui.QVBoxLayout(self.centralwidget) self.verticalLayout.addWidget(self.tabWidget) self.actionTabAdd =
QtGui.QAction(self) self.actionTabAdd.setText("Add Tab") self.actionTabAdd.triggered.connect(self.on_actionTabAdd_triggered) self.lineEdit =
QtGui.QLineEdit(self) self.toolBar =
QtGui.QToolBar(self) self.toolBar.addAction(self.actionTabAdd) self.toolBar.addWidget(self.lineEdit) self.addToolBar(QtCore.Qt.ToolBarArea(QtCore.Qt.TopToolBarArea), self.toolBar) self.setCentralWidget(self.tabWidget) self.settings =
QtCore.QSettings() @QtCore.pyqtSlot() def
on_actionShowCookies_triggered(self): webView =
self.tabWidget.currentWidget() listCookies =
webView.page().networkAccessManager().cookieJar().allCookies() for
cookie in
listCookies: print
cookie.toRawForm() @QtCore.pyqtSlot() def
on_actionTabAdd_triggered(self): url =
self.lineEdit.text() self.addNewTab(url if
url else
‘about:blank‘) def
addNewTab(self, url): tabName =
u"Tab {0}".format(str(self.tabWidget.count())) tabWidget=
webView(self.cookiesKey, url, self) tabWidget.loadFinished.connect(self.on_tabWidget_loadFinished) tabWidget.load(QtCore.QUrl(url)) tabIndex =
self.tabWidget.addTab(tabWidget, tabName) self.tabWidget.setCurrentIndex(tabIndex) @QtCore.pyqtSlot() def
on_tabWidget_loadFinished(self): cookies2 =
self.settings.value(self.cookiesKey) if __name__ ==
"__main__": import
sys app =
QtGui.QApplication(sys.argv) app.setApplicationName(‘myWindow‘) main =
myWindow() main.resize(666, 333) main.show() sys.exit(app.exec_()) |
4. qt-headless
http://qt-project.org/wiki/PySide#PySide.QtWebKit.PySide.QtWebKit.QWebView.url
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 |
import
sys from PyQt4.QtGui import
* from PyQt4.QtCore import
* from PyQt4.QtWebKit import
* class
Render(QWebPage): def
__init__(self, url): self.app =
QApplication(sys.argv) QWebPage.__init__(self) self.loadFinished.connect(self._loadFinished) self.mainFrame().load(QUrl(url)) self.app.exec_() def
_loadFinished(self, result): self.frame =
self.mainFrame() self.app.quit() r =
Render(url) html =
r.frame.toHtml() print
html |
5. splinter :打开浏览器,模拟操作,python的
http://splinter.cobrateam.info/docs/tutorial.html
|
1
2
3
4 |
>>> from
splinter import
Browser>>> browser =
Browser()>>> browser.visit(url) |
具体用哪个要看你有什么具体的需求了
原文:http://www.cnblogs.com/maseng/p/3578553.html